Skip to content

Commit

Permalink
use fakesnow for better transpilation
Browse files Browse the repository at this point in the history
  • Loading branch information
buremba committed Aug 4, 2024
1 parent c683c28 commit 147731f
Show file tree
Hide file tree
Showing 12 changed files with 463 additions and 337 deletions.
1 change: 1 addition & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ jobs:
pip:
name: Release PyPI
runs-on: ubuntu-latest
environment: deploy
steps:
- name: Check out the repository
uses: actions/checkout@v3
Expand Down
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -103,4 +103,5 @@ celerybeat.pid
.metabase/*
.clickhouse/*
.DS_STORE
.certs/*
.certs/*
.rill/*
34 changes: 0 additions & 34 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -143,40 +143,6 @@ It gives you free https connection to your local server and it's the default hos
For Catalog, [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/create-iceberg-table-snowflake) and [Object Store](https://docs.snowflake.com/en/sql-reference/sql/create-iceberg-table-iceberg-files) catalogs are supported at the moment.
For Data lake, S3 and GCS supported.

## Can't query all Snowflake types locally

Here is a Markdown table of some Snowflake data types with a "Supported" column. The checkbox indicates whether the type is supported or not. Please replace the checkboxes with the correct values according to your project's support for each data type.

| Snowflake Data Type | Supported |
| --- |--------------------------------|
| NUMBER | ✓ |
| DECIMAL | ✓ |
| INT | ✓ |
| BIGINT | ✓ |
| SMALLINT | ✓ |
| TINYINT | ✓ |
| FLOAT | ✓ |
| DOUBLE | ✓ |
| VARCHAR | ✓ |
| CHAR | ✓ |
| STRING | ✓ |
| TEXT | ✓ |
| BOOLEAN | ✓ |
| DATE | ✓ |
| DATETIME | ✓ |
| TIME | ✓ |
| TIMESTAMP | ✓ |
| TIMESTAMP_LTZ | ✗ ¹ |
| TIMESTAMP_NTZ | ✗ ¹ |
| TIMESTAMP_TZ | ✗¹ |
| VARIANT | ✓ |
| OBJECT | ✓ |
| ARRAY | ✓ |
| GEOGRAPHY | ✗ ¹ |
| VECTOR | ✗ ¹ |

¹: No Support in DuckDB yet.

## Can't query native Snowflake tables locally

UniverSQL doesn't support querying native Snowflake tables as they're not accessible from outside of Snowflake. If you try to query a Snowflake table directly, it will return an error.
Expand Down
180 changes: 103 additions & 77 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ uvicorn = "^0.30.1"
snowflake-connector-python = {extras = ["pandas", "secure-local-storage"], version = "^3.11.0"}
eval-type-backport = "^0.2.0"
pip-system-certs = "^4.0"
sqlglot = {extras = ["rs"], version = "^25.5.1"}
chdb = "^1.4.1"
fsspec = "^2024.6.1"
gcsfs = "^2024.6.1"
Expand All @@ -29,6 +28,7 @@ pyiceberg = "^0.7.0"
sqlalchemy = "^2.0.31"
fastapi-utils = "^0.7.0"

fakesnow = "^0.9.20"
[tool.poetry.dev-dependencies]
pylint = ">=2.11.1"

Expand Down
55 changes: 41 additions & 14 deletions tests/sqlglot_tests.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
import time
import pyarrow as pa

import duckdb
import sqlglot
from fakesnow.fakes import FakeSnowflakeCursor, FakeSnowflakeConnection

from universql.warehouse.duckdb import fix_snowflake_to_duckdb_types

# queries = sqlglot.parse("""
# SET tables = (SHOW TABLES);
Expand All @@ -8,17 +14,38 @@
# """, read="snowflake")


query = sqlglot.parse_one("""
SET stmt = $$
SELECT PI();
$$;
SELECT *, 1 FROM $stmt;
""", dialect="snowflake")

zsql = query.sql(dialect="duckdb")

query = duckdb.sql(query.sql())


print(sql)
# query = sqlglot.parse_one("""
# SET stmt = $$
# SELECT PI();
# $$;
#
# SELECT *, 1 FROM $stmt;
# """, dialect="snowflake")

fields = [
pa.field("epoch", nullable=False, type=pa.int64()),
pa.field("fraction", nullable=False, type=pa.int32()),
pa.field("timezone", nullable=False, type=pa.int32()),
]
pa_type = pa.struct(fields)
pa.StructArray.from_arrays(arrays=[pa.array([1, 2, 3], type=pa.int64()), pa.array([1, 2, 3], type=pa.int32()), pa.array([1, 2, 3], type=pa.int32())], fields=fields)

query = """
SELECT
CAST('2023-01-01 10:34:56 +00:00' AS TIMESTAMPLTZ) AS sample_timestamp_ltz,
CAST('2023-01-01 11:34:56' AS TIMESTAMP) AS sample_timestamp_ntz,
CAST('2023-01-01 12:34:56 +00:00' AS TIMESTAMPTZ) AS sample_timestamp_tz,
CAST(JSON('{"key":"value"}') /* Semi-structured data types */ AS VARIANT) AS sample_variant,
"""

start = time.time()
for i in range(10):
con = duckdb.connect(f"ali/{i}")
con.execute("CREATE TABLE test (a int, b int)")
print(time.time() - start)
ast = sqlglot.parse_one(query, dialect="duckdb")
transformed_ast = ast.transform(fix_snowflake_to_duckdb_types)
query = transformed_ast.sql(dialect="duckdb", pretty=True)
print(query)
response = duckdb.sql(query)
print(response.show())
3 changes: 1 addition & 2 deletions universql/catalog/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from snowflake.connector.options import pyarrow

from universql import util
from universql.lake.cloud import CACHE_DIRECTORY_KEY
from universql.util import Catalog


Expand All @@ -25,7 +24,7 @@ def get_catalog(context: dict, query_id: str, credentials: dict):

class Cursor(ABC):
@abstractmethod
def execute(self, ast: sqlglot.exp.Expression) -> None:
def execute(self, ast: typing.Optional[sqlglot.exp.Expression], raw_query : str) -> None:
pass

@abstractmethod
Expand Down
Loading

0 comments on commit 147731f

Please sign in to comment.