Skip to content

chore: refactor IsNullOp and NotNullOp logic to make scalar ops generation easier #1822

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions bigframes/core/array_value.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@
from bigframes.core.window_spec import WindowSpec
import bigframes.dtypes
import bigframes.exceptions as bfe
import bigframes.operations as ops
import bigframes.operations.aggregations as agg_ops

if typing.TYPE_CHECKING:
# Avoid circular imports.
import bigframes.operations.aggregations as agg_ops
from bigframes.session import Session

ORDER_ID_COLUMN = "bigframes_ordering_id"
Expand Down Expand Up @@ -185,6 +185,8 @@ def get_column_type(self, key: str) -> bigframes.dtypes.Dtype:

def row_count(self) -> ArrayValue:
"""Get number of rows in ArrayValue as a single-entry ArrayValue."""
import bigframes.operations.aggregations as agg_ops # Avoid circular imports.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've noticed a potential circular import in the new code and have a few questions. First, what is causing this to occur? Second, I'm concerned this might negatively impact code maintainability and readability. Finally, what would be the best way to refactor the code to resolve this circular dependency?


return ArrayValue(
nodes.AggregateNode(
child=self.node,
Expand All @@ -200,6 +202,8 @@ def row_count(self) -> ArrayValue:
# Operations
def filter_by_id(self, predicate_id: str, keep_null: bool = False) -> ArrayValue:
"""Filter the table on a given expression, the predicate must be a boolean series aligned with the table expression."""
import bigframes.operations as ops # Avoid circular imports.

predicate: ex.Expression = ex.deref(predicate_id)
if keep_null:
predicate = ops.fillna_op.as_expr(predicate, ex.const(True))
Expand Down
2 changes: 1 addition & 1 deletion bigframes/core/compile/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
from __future__ import annotations

from bigframes.core.compile.api import test_only_ibis_inferred_schema
from bigframes.core.compile.compiler import compile_sql
from bigframes.core.compile.configs import CompileRequest, CompileResult
from bigframes.core.compile.ibis_compiler.ibis_compiler import compile_sql

__all__ = [
"test_only_ibis_inferred_schema",
Expand Down
2 changes: 1 addition & 1 deletion bigframes/core/compile/aggregate_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
import pandas as pd

from bigframes.core.compile import constants as compiler_constants
import bigframes.core.compile.ibis_compiler.scalar_op_compiler as scalar_compilers
import bigframes.core.compile.ibis_types as compile_ibis_types
import bigframes.core.compile.scalar_op_compiler as scalar_compilers
import bigframes.core.expression as ex
import bigframes.core.window_spec as window_spec
import bigframes.operations.aggregations as agg_ops
Expand Down
6 changes: 3 additions & 3 deletions bigframes/core/compile/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from typing import TYPE_CHECKING

from bigframes.core import rewrite
from bigframes.core.compile import compiler
from bigframes.core.compile.ibis_compiler import ibis_compiler

if TYPE_CHECKING:
import bigframes.core.nodes
Expand All @@ -26,9 +26,9 @@ def test_only_ibis_inferred_schema(node: bigframes.core.nodes.BigFrameNode):
"""Use only for testing paths to ensure ibis inferred schema does not diverge from bigframes inferred schema."""
import bigframes.core.schema

node = compiler._replace_unsupported_ops(node)
node = ibis_compiler._replace_unsupported_ops(node)
node = rewrite.bake_order(node)
ir = compiler.compile_node(node)
ir = ibis_compiler.compile_node(node)
items = tuple(
bigframes.core.schema.SchemaItem(name, ir.get_column_type(ibis_id))
for name, ibis_id in zip(node.schema.names, ir.column_ids)
Expand Down
7 changes: 4 additions & 3 deletions bigframes/core/compile/compiled.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,8 @@
from bigframes.core import utils
import bigframes.core.compile.aggregate_compiler as agg_compiler
import bigframes.core.compile.googlesql
import bigframes.core.compile.ibis_compiler.scalar_op_compiler as op_compilers
import bigframes.core.compile.ibis_types
import bigframes.core.compile.scalar_op_compiler as op_compilers
import bigframes.core.compile.scalar_op_compiler as scalar_op_compiler
import bigframes.core.expression as ex
from bigframes.core.ordering import OrderingExpression
import bigframes.core.sql
Expand Down Expand Up @@ -679,13 +678,15 @@ def _join_condition(


def _as_groupable(value: ibis_types.Value):
from bigframes.core.compile.ibis_compiler import scalar_op_registry

# Some types need to be converted to another type to enable groupby
if value.type().is_float64():
return value.cast(ibis_dtypes.str)
elif value.type().is_geospatial():
return typing.cast(ibis_types.GeoSpatialColumn, value).as_binary()
elif value.type().is_json():
return scalar_op_compiler.to_json_string(value)
return scalar_op_registry.to_json_string(value)
else:
return value

Expand Down
25 changes: 25 additions & 0 deletions bigframes/core/compile/ibis_compiler/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Compiler for BigFrames expression to Ibis expression.

Make sure to import all ibis_compiler implementations here so that they get
registered.
"""

from __future__ import annotations

import bigframes.core.compile.ibis_compiler.operations.generic_ops.isnull_op # noqa: F401
import bigframes.core.compile.ibis_compiler.operations.generic_ops.notnull_op # noqa: F401
import bigframes.core.compile.ibis_compiler.scalar_op_registry # noqa: F401
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
import bigframes.core.compile.concat as concat_impl
import bigframes.core.compile.configs as configs
import bigframes.core.compile.explode
import bigframes.core.compile.scalar_op_compiler as compile_scalar
import bigframes.core.nodes as nodes
import bigframes.core.ordering as bf_ordering
import bigframes.core.rewrite as rewrites
Expand Down Expand Up @@ -178,6 +177,8 @@ def compile_readlocal(node: nodes.ReadLocalNode, *args):

@_compile_node.register
def compile_readtable(node: nodes.ReadTableNode, *args):
from bigframes.core.compile.ibis_compiler import scalar_op_registry

ibis_table = _table_to_ibis(
node.source, scan_cols=[col.source_id for col in node.scan_list.items]
)
Expand All @@ -188,7 +189,7 @@ def compile_readtable(node: nodes.ReadTableNode, *args):
scan_item.dtype == dtypes.JSON_DTYPE
and ibis_table[scan_item.source_id].type() == ibis_dtypes.string
):
json_column = compile_scalar.parse_json(
json_column = scalar_op_registry.parse_json(
ibis_table[scan_item.source_id]
).name(scan_item.source_id)
ibis_table = ibis_table.mutate(json_column)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Operation implementations for the Ibis-based compiler.

This directory structure should reflect the same layout as the
`bigframes/operations` directory where the operations are defined.

Prefer one file per op to keep file sizes manageable for text editors and LLMs.
"""
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

from bigframes_vendored.ibis.expr import types as ibis_types

from bigframes.core.compile.ibis_compiler import scalar_op_compiler
from bigframes.operations.generic_ops import isnull_op


@scalar_op_compiler.scalar_op_compiler.register_unary_op(isnull_op.isnull_op)
def _ibis_isnull_op_impl(x: ibis_types.Value):
return x.isnull()
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import dataclasses
from typing import ClassVar

# Imports for Ibis compilation
from bigframes_vendored.ibis.expr import types as ibis_types

# Direct imports from bigframes
from bigframes import dtypes
from bigframes.core.compile.ibis_compiler import scalar_op_compiler
from bigframes.operations import base_ops


@dataclasses.dataclass(frozen=True)
class NotNullOp(base_ops.UnaryOp):
name: ClassVar[str] = "notnull"

def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
return dtypes.BOOL_DTYPE


notnull_op = NotNullOp()


def _ibis_notnull_op_impl(x: ibis_types.Value):
return x.notnull()


scalar_op_compiler.scalar_op_compiler.register_unary_op(notnull_op)(
_ibis_notnull_op_impl
)


__all__ = [
"NotNullOp",
"notnull_op",
]
Loading