Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,19 @@ def import_array_stream(
) -> typing.Any:
return polars.from_arrow(handle)

def import_schema(self, handle: _lib.ArrowSchemaHandle) -> typing.Any:
raise _lib.NotSupportedError("Polars does not support __arrow_c_schema__")
def import_schema(self, handle: _lib.ArrowSchemaHandle) -> polars.Schema:
# The version that Polars added support to initialize a schema via the
# __arrow_c_schema__ interface
required_version = (1, 32, 2)
polars_version = polars.__version__
if tuple(int(v) for v in polars_version.split(".")) >= required_version:
return polars.Schema(handle)
Comment on lines +183 to +188
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alternatively, could not do the version check and just return polars.Schema(handle) immediately.

Would raise with the below before the required version. IMO providing the user a nicer error message is preferable though.

Traceback (most recent call last):
  File "/home/henry/development/arrow-adbc/python/adbc_driver_manager/try_import_schema.py", line 11, in <module>
    schema = conn.adbc_get_table_schema("test_table_schema")
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/henry/development/arrow-adbc/python/adbc_driver_manager/adbc_driver_manager/dbapi.py", line 521, in adbc_get_table_schema
    return self._backend.import_schema(handle)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/henry/development/arrow-adbc/python/adbc_driver_manager/adbc_driver_manager/_dbapi_backend.py", line 183, in import_schema
    return polars.Schema(handle)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/home/henry/.cache/uv/archive-v0/SC8MnmcDLgKYq5uEoGc4z/lib/python3.11/site-packages/polars/schema.py", line 102, in __init__
    for name, tp in input:
TypeError: 'adbc_driver_manager._lib.ArrowSchemaHandle' object is not iterable

msg = (
"Initializing Polars Schema from __arrow_c_schema__ interface requires "
f"version {'.'.join(str(m) for m in required_version)} or higher. "
f"Found {polars_version!r}"
)
raise _lib.NotSupportedError(msg)

_ALL_BACKENDS.append(_PolarsBackend())
except ImportError:
Expand Down
5 changes: 5 additions & 0 deletions python/adbc_driver_manager/adbc_driver_manager/_lib.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -145,16 +145,21 @@ class ArrowArrayHandle:
address: int
is_valid: bool
def release(self) -> None: ...
def __arrow_c_array__(
self, requested_schema: object | None
) -> tuple[object, object]: ...

class ArrowArrayStreamHandle:
address: int
is_valid: bool
def release(self) -> None: ...
def __arrow_c_stream__(self, requested_schema: object | None) -> object: ...

class ArrowSchemaHandle:
address: int
is_valid: bool
def release(self) -> None: ...
def __arrow_c_schema__(self) -> object: ...
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This makes the type checker not complain about polars.Schema(handle).
Figured it would be worth adding the other __arrow_c_... methods to the stubs too.


class DataError(DatabaseError): ...
class DatabaseError(Error): ...
Expand Down
20 changes: 12 additions & 8 deletions python/adbc_driver_manager/adbc_driver_manager/dbapi.py
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, I think this works but would be inconvenient for people (I'm guessing this will require type assertions all over the place). But I'm not sure of a better way to do this for now. (IIRC associated types are unsupported? Else we could template Cursor on the backend type and make the return type dependent on the backend type's associated type.)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I agree it will be inconvenient. FWIW, I think the user manually type hinting the variable assignment works too.

Else we could template Cursor on the backend type and make the return type dependent on the backend type's associated type.

I will give something a crack

Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,7 @@ def adbc_get_table_schema(
*,
catalog_filter: Optional[str] = None,
db_schema_filter: Optional[str] = None,
) -> "pyarrow.Schema":
) -> "pyarrow.Schema | polars.Schema":
"""
Get the Arrow schema of a table by name.

Expand Down Expand Up @@ -1027,16 +1027,16 @@ def adbc_execute_partitions(
self,
operation,
parameters=None,
) -> Tuple[List[bytes], "pyarrow.Schema"]:
) -> Tuple[List[bytes], "pyarrow.Schema | polars.Schema"]:
"""
Execute a query and get the partitions of a distributed result set.

Returns
-------
partitions : list of byte
partitions : list of bytes
A list of partition descriptors, which can be read with
read_partition.
schema : pyarrow.Schema or None
schema : pyarrow.Schema, polars.Schema or None
The schema of the result set. May be None if incremental query
execution is enabled and the server has not returned a schema.

Expand All @@ -1055,13 +1055,15 @@ def adbc_execute_partitions(
schema = None
return partitions, schema

def adbc_execute_schema(self, operation, parameters=None) -> "pyarrow.Schema":
def adbc_execute_schema(
self, operation, parameters=None
) -> "pyarrow.Schema | polars.Schema":
"""
Get the schema of the result set of a query without executing it.

Returns
-------
pyarrow.Schema
pyarrow.Schema or polars.Schema
The schema of the result set.

Notes
Expand All @@ -1073,7 +1075,9 @@ def adbc_execute_schema(self, operation, parameters=None) -> "pyarrow.Schema":
schema = _blocking_call(self._stmt.execute_schema, (), {}, self._stmt.cancel)
return self._conn._backend.import_schema(schema)

def adbc_prepare(self, operation: Union[bytes, str]) -> Optional["pyarrow.Schema"]:
def adbc_prepare(
self, operation: Union[bytes, str]
) -> Optional["pyarrow.Schema | polars.Schema"]:
"""
Prepare a query without executing it.

Expand All @@ -1083,7 +1087,7 @@ def adbc_prepare(self, operation: Union[bytes, str]) -> Optional["pyarrow.Schema

Returns
-------
pyarrow.Schema or None
pyarrow.Schema, polars.Schema or None
The schema of the bind parameters, or None if the schema
could not be determined.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -295,15 +295,15 @@ def test_query_double_capsule(sqlite: dbapi.Connection) -> None:
polars.from_arrow(capsule)


@pytest.mark.xfail(raises=dbapi.NotSupportedError)
def test_get_table_schema(sqlite: dbapi.Connection) -> None:
with sqlite.cursor() as cursor:
cursor.execute("CREATE TABLE test_table_schema (a INT, b STRING)")
cursor.execute("INSERT INTO test_table_schema VALUES (1, 'hello')")

schema = sqlite.adbc_get_table_schema("test_table_schema")
assert schema == polars.Schema(
[
("a", polars.Int32),
("a", polars.Int64),
("b", polars.String),
]
)
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@


class DatabaseOptions(enum.Enum):
"""Database options specific to the Flight SQL driver."""
"""Database options specific to the Snowflake driver."""

ACCOUNT = "adbc.snowflake.sql.account"
APPLICATION_NAME = "adbc.snowflake.sql.client_option.app_name"
Expand Down
Loading