Skip to content

Commit

Permalink
Merge branch 'main' into lmukhopadhyay-SNOW-1856438-cortex-funcs-apply
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-lmukhopadhyay committed Jan 27, 2025
2 parents 582623d + f01dae3 commit 90acd1e
Show file tree
Hide file tree
Showing 197 changed files with 8,398 additions and 5,765 deletions.
31 changes: 21 additions & 10 deletions .github/workflows/daily_modin_precommit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
name: Daily Snowpark pandas API test
on:
schedule:
# 9 AM UTC
- cron: "0 9 * * *"
# 8 AM UTC
- cron: "0 8 * * *"
workflow_dispatch:
inputs:
logLevel:
Expand Down Expand Up @@ -171,14 +171,25 @@ jobs:
# Specify SNOWFLAKE_IS_PYTHON_RUNTIME_TEST: 1 when adding >= python3.11 with no server-side support
# For example, see https://github.com/snowflakedb/snowpark-python/pull/681
shell: bash
- name: Run Snowpark pandas API tests (excluding doctests)
- if: ${{ contains('macos', matrix.os.download_name) }}
name: Run Snowpark pandas API tests (excluding doctests)
run: python -m tox -e "py${PYTHON_VERSION/\./}-snowparkpandasdailynotdoctest-modin-ci"
env:
PYTHON_VERSION: ${{ matrix.python-version }}
cloud_provider: ${{ matrix.cloud-provider }}
PYTEST_ADDOPTS: --color=yes --tb=short
TOX_PARALLEL_NO_SPINNER: 1
shell: bash
- if: ${{ !contains('macos', matrix.os.download_name) }}
name: Run Snowpark pandas API tests (excluding doctests)
# Use regular github setup for large windows and linux instance
run: python -m tox -e "py${PYTHON_VERSION/\./}-snowparkpandasnotdoctest-modin-ci"
env:
PYTHON_VERSION: ${{ matrix.python-version }}
cloud_provider: ${{ matrix.cloud-provider }}
PYTEST_ADDOPTS: --color=yes --tb=short
TOX_PARALLEL_NO_SPINNER: 1
shell: bash
- name: Combine coverages
run: python -m tox -e coverage --skip-missing-interpreters false
shell: bash
Expand Down Expand Up @@ -248,7 +259,7 @@ jobs:
# For example, see https://github.com/snowflakedb/snowpark-python/pull/681
shell: bash
- name: Run Snowpark pandas API tests (excluding doctests)
run: python -m tox -e "modin_pandas_version-py${PYTHON_VERSION/\./}-snowparkpandasdailynotdoctest-modin-ci"
run: python -m tox -e "modin_pandas_version-py${PYTHON_VERSION/\./}-snowparkpandasnotdoctest-modin-ci"
env:
MODIN_PANDAS_PATCH_VERSION: ${{ matrix.pandas-version }}
PYTHON_VERSION: ${{ matrix.python-version }}
Expand All @@ -265,10 +276,10 @@ jobs:
fail-fast: false
matrix:
os:
- image_name: macos-latest
download_name: macos # it includes doctest
python-version: ["3.9", "3.10", "3.11"]
cloud-provider: [aws]
- image_name: windows-latest-64-cores
download_name: windows
python-version: ["3.11"]
cloud-provider: [azure]
steps:
- name: Checkout Code
uses: actions/checkout@v4
Expand Down Expand Up @@ -309,7 +320,7 @@ jobs:
TOX_PARALLEL_NO_SPINNER: 1
shell: bash
- name: Run Snowpark pandas API tests (excluding doctests)
run: python -m tox -e "py${PYTHON_VERSION/\./}-snowparkpandasdailynotdoctest-modin-ci"
run: python -m tox -e "py${PYTHON_VERSION/\./}-snowparkpandasnotdoctest-modin-ci"
env:
PYTHON_VERSION: ${{ matrix.python-version }}
cloud_provider: ${{ matrix.cloud-provider }}
Expand Down Expand Up @@ -339,7 +350,7 @@ jobs:
os:
- image_name: ubuntu-latest-64-cores
download_name: linux
python-version: ["3.9", "3.10", "3.11"]
python-version: ["3.10"]
cloud-provider: [aws]
steps:
- name: Checkout Code
Expand Down
8 changes: 3 additions & 5 deletions .github/workflows/daily_precommit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
name: Daily precommit test
on:
schedule:
# 10 AM UTC
- cron: "0 10 * * *"
# 9 AM UTC
- cron: "0 9 * * *"

workflow_dispatch:
inputs:
Expand Down Expand Up @@ -373,9 +373,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os:
- image_name: macos-latest
download_name: macos # it includes doctest
os: [macos-latest, windows-latest, ubuntu-latest]
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
cloud-provider: [aws]
steps:
Expand Down
63 changes: 62 additions & 1 deletion .github/workflows/precommit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,25 @@ jobs:
- os: windows-latest-64-cores
python-version: "3.11"
cloud-provider: azure
# limit python 3.12 to one os per csp
- os: macos-latest
python-version: "3.12"
cloud-provider: aws
- os: macos-latest
python-version: "3.12"
cloud-provider: gcp
- os: ubuntu-latest-64-cores
python-version: "3.12"
cloud-provider: azure
- os: ubuntu-latest-64-cores
python-version: "3.12"
cloud-provider: gcp
- os: windows-latest-64-cores
python-version: "3.12"
cloud-provider: azure
- os: windows-latest-64-cores
python-version: "3.12"
cloud-provider: aws
steps:
- name: Checkout Code
uses: actions/checkout@v4
Expand Down Expand Up @@ -223,6 +242,32 @@ jobs:
os: [macos-latest, windows-latest, ubuntu-latest]
python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
cloud-provider: [aws]
exclude:
# only run each version on one os
# daily-precommit does full matrix
# macos runs 3.8 and 3.9
- os: macos-latest
python-version: "3.12"
- os: macos-latest
python-version: "3.11"
- os: macos-latest
python-version: "3.9"
# windows runs 3.9 and 3.11
- os: windows-latest
python-version: "3.12"
- os: windows-latest
python-version: "3.10"
- os: windows-latest
python-version: "3.8"
# ubuntu runs 3.12
- os: ubuntu-latest
python-version: "3.11"
- os: ubuntu-latest
python-version: "3.10"
- os: ubuntu-latest
python-version: "3.9"
- os: ubuntu-latest
python-version: "3.8"
steps:
- name: Checkout Code
uses: actions/checkout@v4
Expand Down Expand Up @@ -413,7 +458,23 @@ jobs:
matrix:
os: [ ubuntu-latest ]
python-version: [ "3.9", "3.11", "3.12" ]
cloud-provider: [ aws ]
cloud-provider: [aws, gcp, azure]
exclude:
# Run 3.9 on aws
- python-version: "3.11"
cloud-provider: aws
- python-version: "3.12"
cloud-provider: aws
# Run 3.11 on gcp
- python-version: "3.9"
cloud-provider: gcp
- python-version: "3.12"
cloud-provider: gcp
# Run 3.12 on azure
- python-version: "3.9"
cloud-provider: azure
- python-version: "3.11"
cloud-provider: azure
steps:
- name: Checkout Code
uses: actions/checkout@v4
Expand Down
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,26 @@
- `regr_sxy`
- `regr_syy`
- `try_to_binary`
- Added support for specifying a schema string (including implicit struct syntax) when calling `DataFrame.create_dataframe`.
- Added support for `DataFrameWriter.insert_into/insertInto`. This method also supports local testing mode.

#### Experimental Features

- Added `Catalog` class to manage snowflake objects. It can be accessed via `Session.catalog`.
- Added support for querying json element of a VARIANT column in `functions.col` and `functions.column` with an optional keyword argument `json_element`.
- Allow user input schema when reading JSON file on stage.
- Added support for specifying a schema string (including implicit struct syntax) when calling `DataFrame.create_dataframe`.
- `snowflake.core` is a dependency required for this feature.

#### Improvements

- Updated README.md to include instructions on how to verify package signatures using `cosign`.
- Added an option `keep_column_order` for keeping original column order in `DataFrame.with_column` and `DataFrame.with_columns`.
- Added options to column casts that allow renaming or adding fields in StructType columns.
- Added support for `contains_null` parameter to ArrayType.
- Added support for creating a temporary view via `DataFrame.create_or_replace_temp_view` from a DataFrame created by reading a file from a stage.
- Added support for `value_contains_null` parameter to MapType.
- Added `interactive` to telemetry that indicates whether the current environment is an interactive one.

#### Bug Fixes

Expand All @@ -60,6 +69,7 @@
- Fixed a bug in function `date_format` that caused an error when the input column was date type or timestamp type.
- Fixed a bug in dataframe that null value can be inserted in a non-nullable column.
- Fixed a bug in `replace` and `lit` which raised type hint assertion error when passing `Column` expression objects.
- Fixed a bug in `pandas_udf` and `pandas_udtf` where `session` parameter was erroneously ignored.

### Snowpark pandas API Updates

Expand Down Expand Up @@ -89,6 +99,7 @@
- Added support for `expand=True` in `Series.str.split`.
- Added support for `DataFrame.pop` and `Series.pop`.
- Added support for `first` and `last` in `DataFrameGroupBy.agg` and `SeriesGroupBy.agg`.
- Added support for `Index.drop_duplicates`.

#### Bug Fixes

Expand Down
1 change: 1 addition & 0 deletions docs/source/modin/indexing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ Index
Index.argmin
Index.argmax
Index.copy
Index.drop_duplicates
Index.equals
Index.identical
Index.is_boolean
Expand Down
2 changes: 1 addition & 1 deletion docs/source/modin/supported/index_supported.rst
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ Methods
+-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
| ``drop`` | N | | |
+-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
| ``drop_duplicates`` | N | | |
| ``drop_duplicates`` | Y | | |
+-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
| ``duplicated`` | N | | |
+-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
Expand Down
2 changes: 2 additions & 0 deletions docs/source/snowpark/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ Input/Output
DataFrameWriter.save
DataFrameWriter.saveAsTable
DataFrameWriter.save_as_table
DataFrameWriter.insertInto
DataFrameWriter.insert_into
FileOperation.get
FileOperation.get_stream
FileOperation.put
Expand Down
5 changes: 4 additions & 1 deletion recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ build:
- SNOWFLAKE_IS_PYTHON_RUNTIME_TEST=1
{% if noarch_build %}
noarch: python
string: "py39_0" # [py==39]
string: "py310_0" # [py==310]
string: "py311_0" # [py==311]
string: "py312_0" # [py==312]
{% endif %}

requirements:
Expand All @@ -43,7 +47,6 @@ requirements:
- protobuf >=3.20,<6
- python-dateutil
- tzlocal
- snowflake.core >=1.0.0,<2

test:
imports:
Expand Down
37 changes: 26 additions & 11 deletions scripts/copy-remote-ast.sh
Original file line number Diff line number Diff line change
@@ -1,27 +1,42 @@
#!/bin/bash
set -euxo pipefail

#!/usr/bin/env bash
# This script assumes the target Cloud Workspace specified as the command-line argument has the build target.
# To make sure this is the case, run bazel build //Snowpark/ast:ast_proto and bazel build //Snowpark/unparser.
# To make sure this is the case, run bazel build //Snowpark/ast:ast_proto && bazel build //Snowpark/unparser && bazel run //Snowpark/unparser.
# The bazel build commands will create the proto and unparser.jar files, whereas bazel run will create the run-files directory.

# N.B. The calling environment further requires:
# export MONOREPO_DIR=$TMPDIR

set -euxo pipefail

if [ "$#" -ne 1 ]; then
echo "Wrong number of parameters, usage: ./copy-remote-ast.sh <workspace id>"
exit 1
fi

MONOREPO_DIR=${MONOREPO_DIR:-$TMPDIR}

# To allow this script to run from any subdirectory within snowpark-python, we use git rev-parse.
SNOWPARK_ROOT=$(git rev-parse --show-toplevel)

if [ ! -d "$TMPDIR" ]; then
echo "TMPDIR not defined"
if [ ! -d "$MONOREPO_DIR" ]; then
echo "MONOREPO_DIR not defined"
exit 1
fi

scp $1:~/Snowflake/trunk/bazel-bin/Snowpark/ast/ast.proto $SNOWPARK_ROOT/src/snowflake/snowpark/_internal/proto/ast.proto
# Quick way to determine what ~ is on the server, made explicit to avoid confusion.
REMOTE_HOME=$(ssh $1 'echo "$HOME"')

# Run bazel build remotely.
# Adding _deploy to a bazel JVM target builds a fat jar,
# For the unparser this target is //Snowpark/unparser:unparser_deploy.jar.
ssh $1 'cd ~/Snowflake/trunk && bazel build //Snowpark/ast:ast_proto && bazel build //Snowpark/unparser:unparser_deploy.jar'

mkdir -p $TMPDIR/bazel-bin/Snowpark/unparser/unparser.runfiles
scp -r $1:~/Snowflake/trunk/bazel-bin/Snowpark/unparser/unparser.runfiles/ $TMPDIR/bazel-bin/Snowpark/unparser/unparser.runfiles/
# (1) Copy over ast.proto file (required by python -x tox -e protoc).
scp $1:"$REMOTE_HOME/Snowflake/trunk/bazel-bin/Snowpark/ast/ast.proto" $SNOWPARK_ROOT/src/snowflake/snowpark/_internal/proto/ast.proto

scp $1:~/Snowflake/trunk/bazel-bin/Snowpark/unparser/unparser-lib.jar $TMPDIR/bazel-bin/Snowpark/unparser/
scp $1:~/Snowflake/trunk/bazel-bin/Snowpark/unparser/unparser.jar $TMPDIR/bazel-bin/Snowpark/unparser/
# (2) Copy over fat unparser_deploy.jar and rename to unparser.jar.
mkdir -p $MONOREPO_DIR/bazel-bin/Snowpark/unparser/
scp $1:$REMOTE_HOME/Snowflake/trunk/bazel-bin/Snowpark/unparser/unparser_deploy.jar $MONOREPO_DIR/bazel-bin/Snowpark/unparser/unparser.jar

pushd $SNOWPARK_ROOT
python -m tox -e protoc
Expand Down
Binary file modified scripts/parameters_qa.py.gpg
Binary file not shown.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
"protobuf>=3.20, <6", # Snowpark IR
"python-dateutil", # Snowpark IR
"tzlocal", # Snowpark IR
"snowflake.core>=1.0.0, <2", # Catalog
]
REQUIRED_PYTHON_VERSION = ">=3.8, <3.13"

Expand Down Expand Up @@ -60,6 +59,7 @@
"decorator", # sql counter check
"lxml", # used in read_xml tests
"tox", # used for setting up testing environments
"snowflake.core>=1.0.0, <2", # Catalog
]

# read the version
Expand Down
7 changes: 4 additions & 3 deletions src/snowflake/snowpark/_internal/analyzer/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#
import uuid
from collections import Counter, defaultdict
from typing import TYPE_CHECKING, DefaultDict, Dict, List, Optional, Union
from typing import TYPE_CHECKING, DefaultDict, Dict, List, Union

from snowflake.connector import IntegrityError

Expand Down Expand Up @@ -168,7 +168,7 @@ def __init__(self, session: "snowflake.snowpark.session.Session") -> None:
self.plan_builder = SnowflakePlanBuilder(self.session)
self.generated_alias_maps = {}
self.subquery_plans = []
self.alias_maps_to_use: Optional[Dict[uuid.UUID, str]] = None
self.alias_maps_to_use: Dict[uuid.UUID, str] = {}

def analyze(
self,
Expand Down Expand Up @@ -368,7 +368,6 @@ def analyze(
return expr.sql

if isinstance(expr, Attribute):
assert self.alias_maps_to_use is not None
name = self.alias_maps_to_use.get(expr.expr_id, expr.name)
return quote_name(name)

Expand Down Expand Up @@ -661,6 +660,8 @@ def unary_expression_extractor(
),
expr.to,
expr.try_,
expr.is_rename,
expr.is_add,
)
else:
return unary_expression(
Expand Down
Loading

0 comments on commit 90acd1e

Please sign in to comment.