From 8bde85d85470c197e791bfcfd65f884ab09b950d Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Wed, 19 Mar 2025 14:46:06 +0000 Subject: [PATCH 1/6] feat: updates time partitioning --- sqlalchemy_bigquery/base.py | 47 ++++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/sqlalchemy_bigquery/base.py b/sqlalchemy_bigquery/base.py index 0204bc92..dd3c2aab 100644 --- a/sqlalchemy_bigquery/base.py +++ b/sqlalchemy_bigquery/base.py @@ -832,14 +832,11 @@ def _process_time_partitioning( function returns: "PARTITION BY TIMESTAMP_TRUNC(event_timestamp, DAY)". - Current inputs allowed by BQ and covered by this function include: + Current inputs allowed by BQ AND covered by this function include: * _PARTITIONDATE * DATETIME_TRUNC(, DAY/HOUR/MONTH/YEAR) * TIMESTAMP_TRUNC(, DAY/HOUR/MONTH/YEAR) * DATE_TRUNC(, MONTH/YEAR) - - Additional options allowed by BQ but not explicitly covered by this - function include: * DATE(_PARTITIONTIME) * DATE() * DATE() @@ -847,12 +844,40 @@ def _process_time_partitioning( """ sqltypes = { - "_PARTITIONDATE": ("_PARTITIONDATE", None), - "TIMESTAMP": ("TIMESTAMP_TRUNC", {"DAY", "HOUR", "MONTH", "YEAR"}), - "DATETIME": ("DATETIME_TRUNC", {"DAY", "HOUR", "MONTH", "YEAR"}), - "DATE": ("DATE_TRUNC", {"MONTH", "YEAR"}), + # column_type | truncation func OR default value | partitioning_period(s) + + "_PARTITIONDATE": ("_PARTITIONDATE", None), # default value, no period + "_PARTITIONTIME": ("DATE", None), # trunc_fn, no period + "DATE": { + "no_period": (None, None), # date_column, no trunc_fn, no period + "period": ("DATE_TRUNC", {"MONTH", "YEAR"}), # date_column, trunc_fn, period(s) + }, + "DATETIME": { + "no_period": ("DATE", None), # datetime_column, trunc_fn, no period + "period": ("DATETIME_TRUNC", {"DAY", "HOUR", "MONTH", "YEAR"}), # datetime_column, trunc_fn, period(s) + }, + "TIMESTAMP": { + "no_period": ("DATE", None), # timestamp_column, trunc_fn, no period + "period": ("TIMESTAMP_TRUNC", {"DAY", "HOUR", "MONTH", "YEAR"}), # timestamp_column, trunc_fn, period(s) + }, } + def parse_sqltypes(coltype, partitioning_period): + """Returns the default value OR the truncation function to be used + and the allowed partitioning periods. + """ + + if coltype in {"_PARTITIONDATE", "_PARTITIONTIME"}: + return sqltypes[coltype] + + # by this point, value must be a nested dict + if partitioning_period is None: + # use "no_period" key + return sqltypes[coltype]["no_period"] + else: + # use "period" key + return sqltypes[coltype]["period"] + # Extract field (i.e or _PARTITIONDATE) # AND extract the name of the column_type (i.e. "TIMESTAMP", "DATE", # "DATETIME", "_PARTITIONDATE") @@ -870,14 +895,14 @@ def _process_time_partitioning( # immediately overwritten by python-bigquery to a default of DAY. partitioning_period = time_partitioning.type_ - # Extract the truncation_function (i.e. DATE_TRUNC) + # Extract the default value or truncation_function (i.e. DATE_TRUNC()) # and the set of allowable partition_periods # that can be used in that function - trunc_fn, allowed_partitions = sqltypes[column_type] + trunc_fn, allowed_partitions = parse_sqltypes(column_type, time_partitioning) # Create output: # Special Case: _PARTITIONDATE does NOT use a function or partitioning_period - if trunc_fn == "_PARTITIONDATE": + if trunc_fn is None or trunc_fn in {"_PARTITIONDATE"}: return f"PARTITION BY {field}" # Special Case: BigQuery will not accept DAY as partitioning_period for From d68cf1740fe692e1c484875a4989885b12a397e5 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Wed, 19 Mar 2025 14:50:12 +0000 Subject: [PATCH 2/6] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20po?= =?UTF-8?q?st-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- sqlalchemy_bigquery/base.py | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/sqlalchemy_bigquery/base.py b/sqlalchemy_bigquery/base.py index dd3c2aab..cdd9aeb3 100644 --- a/sqlalchemy_bigquery/base.py +++ b/sqlalchemy_bigquery/base.py @@ -845,20 +845,28 @@ def _process_time_partitioning( sqltypes = { # column_type | truncation func OR default value | partitioning_period(s) - - "_PARTITIONDATE": ("_PARTITIONDATE", None), # default value, no period - "_PARTITIONTIME": ("DATE", None), # trunc_fn, no period + "_PARTITIONDATE": ("_PARTITIONDATE", None), # default value, no period + "_PARTITIONTIME": ("DATE", None), # trunc_fn, no period "DATE": { - "no_period": (None, None), # date_column, no trunc_fn, no period - "period": ("DATE_TRUNC", {"MONTH", "YEAR"}), # date_column, trunc_fn, period(s) - }, + "no_period": (None, None), # date_column, no trunc_fn, no period + "period": ( + "DATE_TRUNC", + {"MONTH", "YEAR"}, + ), # date_column, trunc_fn, period(s) + }, "DATETIME": { - "no_period": ("DATE", None), # datetime_column, trunc_fn, no period - "period": ("DATETIME_TRUNC", {"DAY", "HOUR", "MONTH", "YEAR"}), # datetime_column, trunc_fn, period(s) - }, + "no_period": ("DATE", None), # datetime_column, trunc_fn, no period + "period": ( + "DATETIME_TRUNC", + {"DAY", "HOUR", "MONTH", "YEAR"}, + ), # datetime_column, trunc_fn, period(s) + }, "TIMESTAMP": { - "no_period": ("DATE", None), # timestamp_column, trunc_fn, no period - "period": ("TIMESTAMP_TRUNC", {"DAY", "HOUR", "MONTH", "YEAR"}), # timestamp_column, trunc_fn, period(s) + "no_period": ("DATE", None), # timestamp_column, trunc_fn, no period + "period": ( + "TIMESTAMP_TRUNC", + {"DAY", "HOUR", "MONTH", "YEAR"}, + ), # timestamp_column, trunc_fn, period(s) }, } @@ -866,10 +874,10 @@ def parse_sqltypes(coltype, partitioning_period): """Returns the default value OR the truncation function to be used and the allowed partitioning periods. """ - + if coltype in {"_PARTITIONDATE", "_PARTITIONTIME"}: return sqltypes[coltype] - + # by this point, value must be a nested dict if partitioning_period is None: # use "no_period" key From 3954351c329ab9abaea842c251b4f46943822f5c Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Thu, 20 Mar 2025 15:18:32 +0000 Subject: [PATCH 3/6] Updates function argument: partitioning_period --- sqlalchemy_bigquery/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlalchemy_bigquery/base.py b/sqlalchemy_bigquery/base.py index cdd9aeb3..472c9dd4 100644 --- a/sqlalchemy_bigquery/base.py +++ b/sqlalchemy_bigquery/base.py @@ -906,7 +906,7 @@ def parse_sqltypes(coltype, partitioning_period): # Extract the default value or truncation_function (i.e. DATE_TRUNC()) # and the set of allowable partition_periods # that can be used in that function - trunc_fn, allowed_partitions = parse_sqltypes(column_type, time_partitioning) + trunc_fn, allowed_partitions = parse_sqltypes(column_type, partitioning_period) # Create output: # Special Case: _PARTITIONDATE does NOT use a function or partitioning_period From 73eb01ff57fe6381163680c1e6bb11106b5409c8 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Mon, 31 Mar 2025 18:07:29 +0000 Subject: [PATCH 4/6] updates sqltypes dicts --- sqlalchemy_bigquery/base.py | 47 +++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/sqlalchemy_bigquery/base.py b/sqlalchemy_bigquery/base.py index 472c9dd4..b5743237 100644 --- a/sqlalchemy_bigquery/base.py +++ b/sqlalchemy_bigquery/base.py @@ -870,22 +870,6 @@ def _process_time_partitioning( }, } - def parse_sqltypes(coltype, partitioning_period): - """Returns the default value OR the truncation function to be used - and the allowed partitioning periods. - """ - - if coltype in {"_PARTITIONDATE", "_PARTITIONTIME"}: - return sqltypes[coltype] - - # by this point, value must be a nested dict - if partitioning_period is None: - # use "no_period" key - return sqltypes[coltype]["no_period"] - else: - # use "period" key - return sqltypes[coltype]["period"] - # Extract field (i.e or _PARTITIONDATE) # AND extract the name of the column_type (i.e. "TIMESTAMP", "DATE", # "DATETIME", "_PARTITIONDATE") @@ -903,14 +887,37 @@ def parse_sqltypes(coltype, partitioning_period): # immediately overwritten by python-bigquery to a default of DAY. partitioning_period = time_partitioning.type_ + # TODO: move dict outside the function or to top of function + sqltypes_w_no_partitioning_period = { + # Keys are columns, values are functions + "_PARTITIONDATE": None, + "_PARTITIONTIME": "DATE", # + "DATE": None, # 'DATE' is a not a function + "DATETIME": "DATE", # + "TIMESTAMP": "DATE", # + } + + # TODO: move dict outside the function or to top of function + sqltypes_w_partitioning_period = { + # Keys are columns, values are (functions, {allowed_partioning_periods}) + "DATE": ("DATE_TRUNC", {"MONTH", "YEAR"}), + "DATETIME": ("DATETIME_TRUNC", {"DAY", "HOUR", "MONTH", "YEAR"}), + "TIMESTAMP": ("TIMESTAMP_TRUNC", {"DAY", "HOUR", "MONTH", "YEAR"}), + } + # Extract the default value or truncation_function (i.e. DATE_TRUNC()) # and the set of allowable partition_periods # that can be used in that function - trunc_fn, allowed_partitions = parse_sqltypes(column_type, partitioning_period) + if partitioning_period is None: + # do stuff via swnpp + function = sqltypes_w_no_partitioning_period[column_type] + else: + # do different stuff via swpp + function, allowed_partitions = sqltypes_w_partitioning_period[column_type, partitioning_period] # Create output: # Special Case: _PARTITIONDATE does NOT use a function or partitioning_period - if trunc_fn is None or trunc_fn in {"_PARTITIONDATE"}: + if function is None: return f"PARTITION BY {field}" # Special Case: BigQuery will not accept DAY as partitioning_period for @@ -919,7 +926,7 @@ def parse_sqltypes(coltype, partitioning_period): # is DAY. This case overwrites that to avoid making a breaking change in # python-bigquery. # https://github.com/googleapis/python-bigquery/blob/a4d9534a900f13ae7355904cda05097d781f27e3/google/cloud/bigquery/table.py#L2916 - if trunc_fn == "DATE_TRUNC" and partitioning_period == "DAY": + if function == "DATE_TRUNC" and partitioning_period == "DAY": raise ValueError( "The TimePartitioning.type_ must be one of: " f"{allowed_partitions}, received {partitioning_period}." @@ -935,7 +942,7 @@ def parse_sqltypes(coltype, partitioning_period): f"{allowed_partitions}, received {partitioning_period}." ) - return f"PARTITION BY {trunc_fn}({field}, {partitioning_period})" + return f"PARTITION BY {function}({field}, {partitioning_period})" def _process_range_partitioning( self, table: Table, range_partitioning: RangePartitioning From 89548bc8857192d582a4159e9fe56d872b06735e Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Tue, 1 Apr 2025 12:48:58 +0000 Subject: [PATCH 5/6] Updates processing_time_partitioning() method --- sqlalchemy_bigquery/base.py | 70 +++++++++++++------------------------ 1 file changed, 25 insertions(+), 45 deletions(-) diff --git a/sqlalchemy_bigquery/base.py b/sqlalchemy_bigquery/base.py index b5743237..d2c75580 100644 --- a/sqlalchemy_bigquery/base.py +++ b/sqlalchemy_bigquery/base.py @@ -843,51 +843,31 @@ def _process_time_partitioning( * DATE column """ - sqltypes = { - # column_type | truncation func OR default value | partitioning_period(s) - "_PARTITIONDATE": ("_PARTITIONDATE", None), # default value, no period - "_PARTITIONTIME": ("DATE", None), # trunc_fn, no period - "DATE": { - "no_period": (None, None), # date_column, no trunc_fn, no period - "period": ( - "DATE_TRUNC", - {"MONTH", "YEAR"}, - ), # date_column, trunc_fn, period(s) - }, - "DATETIME": { - "no_period": ("DATE", None), # datetime_column, trunc_fn, no period - "period": ( - "DATETIME_TRUNC", - {"DAY", "HOUR", "MONTH", "YEAR"}, - ), # datetime_column, trunc_fn, period(s) - }, - "TIMESTAMP": { - "no_period": ("DATE", None), # timestamp_column, trunc_fn, no period - "period": ( - "TIMESTAMP_TRUNC", - {"DAY", "HOUR", "MONTH", "YEAR"}, - ), # timestamp_column, trunc_fn, period(s) - }, - } - - # Extract field (i.e or _PARTITIONDATE) - # AND extract the name of the column_type (i.e. "TIMESTAMP", "DATE", + # Extract field if given (i.e ) or _PARTITIONDATE if not given + # AND extract the name of the column_type (i.e. is it a "TIMESTAMP", "DATE", # "DATETIME", "_PARTITIONDATE") + # Also extract the time_partitioning.type_ (i.e. the truncation granularity: + # HOUR, DAY, MONTH, YEAR) if time_partitioning.field is not None: field = time_partitioning.field column_type = table.columns[field].type.__visit_name__.upper() + # Extract time_partitioning.type_ (DAY, HOUR, MONTH, YEAR) + # i.e. generates one partition per type (1/DAY, 1/HOUR) + # NOTE: if time_partitioning.type_ == None, the python-bigquery library + # will eventually overwrite it with a default of DAY. + partitioning_period = time_partitioning.type_ + else: + # If no field is given, default to "_PARTITIONDATE" as the + # field to partition on. In addition, to normalize the processing in + # the remainder of this function, set column_type and partitioning_period + # as shown below. field = "_PARTITIONDATE" column_type = "_PARTITIONDATE" + partitioning_period = None - # Extract time_partitioning.type_ (DAY, HOUR, MONTH, YEAR) - # i.e. generates one partition per type (1/DAY, 1/HOUR) - # NOTE: if time_partitioning.type_ == None, it gets - # immediately overwritten by python-bigquery to a default of DAY. - partitioning_period = time_partitioning.type_ - - # TODO: move dict outside the function or to top of function + # TODO: move this dictionary outside the function or to top of function sqltypes_w_no_partitioning_period = { # Keys are columns, values are functions "_PARTITIONDATE": None, @@ -897,32 +877,32 @@ def _process_time_partitioning( "TIMESTAMP": "DATE", # } - # TODO: move dict outside the function or to top of function + # TODO: move this dictionary outside the function or to top of function sqltypes_w_partitioning_period = { # Keys are columns, values are (functions, {allowed_partioning_periods}) + #"_PARTITIONDATE": ("_PARTITIONDATE", {}), "DATE": ("DATE_TRUNC", {"MONTH", "YEAR"}), "DATETIME": ("DATETIME_TRUNC", {"DAY", "HOUR", "MONTH", "YEAR"}), "TIMESTAMP": ("TIMESTAMP_TRUNC", {"DAY", "HOUR", "MONTH", "YEAR"}), } - # Extract the default value or truncation_function (i.e. DATE_TRUNC()) - # and the set of allowable partition_periods + # Extract truncation_function (i.e. DATE_TRUNC()) or a default value if + # a truncation_function is not used (i.e. for _PARTITIONDATE) + # Also extract the set of allowable partition_periods # that can be used in that function if partitioning_period is None: - # do stuff via swnpp function = sqltypes_w_no_partitioning_period[column_type] else: - # do different stuff via swpp - function, allowed_partitions = sqltypes_w_partitioning_period[column_type, partitioning_period] + function, allowed_partitions = sqltypes_w_partitioning_period[column_type] # Create output: - # Special Case: _PARTITIONDATE does NOT use a function or partitioning_period + # Special Case 1: _PARTITIONDATE does NOT use a function or partitioning_period if function is None: return f"PARTITION BY {field}" - # Special Case: BigQuery will not accept DAY as partitioning_period for + # Special Case 2: BigQuery will not accept DAY as partitioning_period for # DATE_TRUNC. - # However, the default argument in python-bigquery for TimePartioning + # However, the default argument in python-bigquery for TimePartitioning # is DAY. This case overwrites that to avoid making a breaking change in # python-bigquery. # https://github.com/googleapis/python-bigquery/blob/a4d9534a900f13ae7355904cda05097d781f27e3/google/cloud/bigquery/table.py#L2916 From ebc810cc1b296ce3725b1b096466956f70dffc86 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Wed, 9 Apr 2025 12:23:50 +0000 Subject: [PATCH 6/6] updates linting on base.py --- sqlalchemy_bigquery/base.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sqlalchemy_bigquery/base.py b/sqlalchemy_bigquery/base.py index d2c75580..257c5684 100644 --- a/sqlalchemy_bigquery/base.py +++ b/sqlalchemy_bigquery/base.py @@ -859,7 +859,7 @@ def _process_time_partitioning( partitioning_period = time_partitioning.type_ else: - # If no field is given, default to "_PARTITIONDATE" as the + # If no field is given, default to "_PARTITIONDATE" as the # field to partition on. In addition, to normalize the processing in # the remainder of this function, set column_type and partitioning_period # as shown below. @@ -871,16 +871,16 @@ def _process_time_partitioning( sqltypes_w_no_partitioning_period = { # Keys are columns, values are functions "_PARTITIONDATE": None, - "_PARTITIONTIME": "DATE", # - "DATE": None, # 'DATE' is a not a function - "DATETIME": "DATE", # - "TIMESTAMP": "DATE", # + "_PARTITIONTIME": "DATE", # + "DATE": None, # 'DATE' is a not a function + "DATETIME": "DATE", # + "TIMESTAMP": "DATE", # } # TODO: move this dictionary outside the function or to top of function sqltypes_w_partitioning_period = { # Keys are columns, values are (functions, {allowed_partioning_periods}) - #"_PARTITIONDATE": ("_PARTITIONDATE", {}), + # "_PARTITIONDATE": ("_PARTITIONDATE", {}), "DATE": ("DATE_TRUNC", {"MONTH", "YEAR"}), "DATETIME": ("DATETIME_TRUNC", {"DAY", "HOUR", "MONTH", "YEAR"}), "TIMESTAMP": ("TIMESTAMP_TRUNC", {"DAY", "HOUR", "MONTH", "YEAR"}), @@ -893,7 +893,7 @@ def _process_time_partitioning( if partitioning_period is None: function = sqltypes_w_no_partitioning_period[column_type] else: - function, allowed_partitions = sqltypes_w_partitioning_period[column_type] + function, allowed_partitions = sqltypes_w_partitioning_period[column_type] # Create output: # Special Case 1: _PARTITIONDATE does NOT use a function or partitioning_period