From 4c8d937734a32334e8786fbd0c9e5f9314490b6c Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Fri, 18 Jul 2025 03:53:17 +0000 Subject: [PATCH 1/4] docs: add code snippets for session and IO public docs --- .../snippets/load_data_from_bigquery_test.py | 24 --- samples/snippets/load_data_from_csv_test.py | 25 --- samples/snippets/sessions_and_io_test.py | 167 ++++++++++++++++++ 3 files changed, 167 insertions(+), 49 deletions(-) delete mode 100644 samples/snippets/load_data_from_bigquery_test.py delete mode 100644 samples/snippets/load_data_from_csv_test.py create mode 100644 samples/snippets/sessions_and_io_test.py diff --git a/samples/snippets/load_data_from_bigquery_test.py b/samples/snippets/load_data_from_bigquery_test.py deleted file mode 100644 index 4523eece97..0000000000 --- a/samples/snippets/load_data_from_bigquery_test.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -def test_bigquery_dataframes_load_data_from_bigquery() -> None: - # [START bigquery_dataframes_load_data_from_bigquery] - # Create a DataFrame from a BigQuery table: - import bigframes.pandas as bpd - - query_or_table = "bigquery-public-data.ml_datasets.penguins" - bq_df = bpd.read_gbq(query_or_table) - # [END bigquery_dataframes_load_data_from_bigquery] - assert bq_df is not None diff --git a/samples/snippets/load_data_from_csv_test.py b/samples/snippets/load_data_from_csv_test.py deleted file mode 100644 index cc96b92fb8..0000000000 --- a/samples/snippets/load_data_from_csv_test.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -def test_bigquery_dataframes_load_data_from_csv() -> None: - # [START bigquery_dataframes_load_data_from_csv] - import bigframes.pandas as bpd - - filepath_or_buffer = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" - df_from_gcs = bpd.read_csv(filepath_or_buffer) - # Display the first few rows of the DataFrame: - df_from_gcs.head() - # [END bigquery_dataframes_load_data_from_csv] - assert df_from_gcs is not None diff --git a/samples/snippets/sessions_and_io_test.py b/samples/snippets/sessions_and_io_test.py new file mode 100644 index 0000000000..47969268f8 --- /dev/null +++ b/samples/snippets/sessions_and_io_test.py @@ -0,0 +1,167 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def test_sessions_and_io(project_id, dataset_id) -> None: + YOUR_PROJECT_ID = project_id + YOUR_LOCATION = "us" + + # [START bigquery_dataframes_create_and_use_session_instance] + import bigframes + import bigframes.pandas as bpd + + # Create session object + context = bigframes.BigQueryOptions( + project=YOUR_PROJECT_ID, + location=YOUR_LOCATION, + ) + session = bigframes.Session(context) + + # Load a BigQuery table into a dataframe + df1 = session.read_gbq("bigquery-public-data.ml_datasets.penguins") + + # Create a dataframe with local data: + df2 = bpd.DataFrame({"my_col": [1, 2, 3]}, session=session) + # [END bigquery_dataframes_create_and_use_session_instance] + assert df1 is not None + assert df2 is not None + + # [START bigquery_dataframes_combine_data_from_multiple_sessions_raise_error] + import bigframes + import bigframes.pandas as bpd + + context = bigframes.BigQueryOptions(location=YOUR_LOCATION, project=YOUR_PROJECT_ID) + + session1 = bigframes.Session(context) + session2 = bigframes.Session(context) + + series1 = bpd.Series([1, 2, 3, 4, 5], session=session1) + series2 = bpd.Series([1, 2, 3, 4, 5], session=session2) + + try: + series1 + series2 + except ValueError as e: + print(e) # Error message: Cannot use combine sources from multiple sessions + # [END bigquery_dataframes_combine_data_from_multiple_sessions_raise_error] + + # [START bigquery_dataframes_set_options_for_global_session] + import bigframes.pandas as bpd + + # Set project ID for the global session + bpd.options.bigquery.project = YOUR_PROJECT_ID + # Update the global default session location + bpd.options.bigquery.location = YOUR_LOCATION + # [END bigquery_dataframes_set_options_for_global_session] + + # [START bigquery_dataframes_global_session_is_the_default_session] + # The following two statements are essentiall the same + df = bpd.read_gbq("bigquery-public-data.ml_datasets.penguins") + df = bpd.get_global_session().read_gbq("bigquery-public-data.ml_datasets.penguins") + # [END bigquery_dataframes_global_session_is_the_default_session] + assert df is not None + + # [START bigquery_dataframes_create_dataframe_from_py_and_np] + import numpy as np + + import bigframes.pandas as bpd + + s = bpd.Series([1, 2, 3]) + + # Create a dataframe with Python dict + df = bpd.DataFrame( + { + "col_1": [1, 2, 3], + "col_2": [4, 5, 6], + } + ) + + # Create a series with Numpy + s = bpd.Series(np.arange(10)) + # [END bigquery_dataframes_create_dataframe_from_py_and_np] + assert s is not None + + # [START bigquery_dataframes_create_dataframe_from_pandas] + import numpy as np + import pandas as pd + + import bigframes.pandas as bpd + + pd_df = pd.DataFrame(np.random.randn(4, 2)) + + # Convert Pandas dataframe to BigQuery DataFrame with read_pandas() + df_1 = bpd.read_pandas(pd_df) + # Convert Pandas dataframe to BigQuery DataFrame with the dataframe constructor + df_2 = bpd.DataFrame(pd_df) + # [END bigquery_dataframes_create_dataframe_from_pandas] + + # [START bigquery_dataframes_convert_bq_dataframe_to_pandas] + import bigframes.pandas as bpd + + bf_df = bpd.DataFrame({"my_col": [1, 2, 3]}) + # Returns a Pandas Dataframe + bf_df.to_pandas() + + bf_s = bpd.Series([1, 2, 3]) + # Returns a Pandas Series + bf_s.to_pandas() + # [END bigquery_dataframes_convert_bq_dataframe_to_pandas] + assert bf_s.to_pandas is not None + + # [START bigquery_dataframes_to_pandas_dry_run] + import bigframes.pandas as bpd + + df = bpd.read_gbq("bigquery-public-data.ml_datasets.penguins") + + # Returns a Pandas series with dry run stats + df.to_pandas(dry_run=True) + # [END bigquery_dataframes_to_pandas_dry_run] + assert df.to_pandas(dry_run=True) is not None + + # [START bigquery_dataframes_load_data_from_csv] + import bigframes.pandas as bpd + + # Read a CSV file from GCS + df = bpd.read_csv("gs://cloud-samples-data/bigquery/us-states/us-states.csv") + # [END bigquery_dataframes_load_data_from_csv] + assert df is not None + + # [START bigquery_dataframes_load_data_from_bigquery] + import bigframes.pandas as bpd + + df = bpd.read_gbq("bigquery-public-data.ml_datasets.penguins") + # [END bigquery_dataframes_load_data_from_bigquery] + assert df is not None + + # [START bigquery_dataframes_read_from_sql_query] + import bigframes.pandas as bpd + + sql = """ + SELECT species, island, body_mass_g + FROM bigquery-public-data.ml_datasets.penguins + WHERE sex = 'MALE' + """ + + df = bpd.read_gbq(sql) + # [END bigquery_dataframes_read_from_sql_query] + assert df is not None + + table_name = "snippets-session-and-io-test" + + # [START bigquery_dataframes_dataframe_to_bigquery_table] + import bigframes.pandas as bpd + + df = bpd.DataFrame({"my_col": [1, 2, 3]}) + + df.to_gbq(f"{project_id}.{dataset_id}.{table_name}") + # [END bigquery_dataframes_dataframe_to_bigquery_table] From 187e2790958127670bb34ab604d527bf8aa398a6 Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Fri, 18 Jul 2025 04:00:12 +0000 Subject: [PATCH 2/4] fix lint --- samples/snippets/sessions_and_io_test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/samples/snippets/sessions_and_io_test.py b/samples/snippets/sessions_and_io_test.py index 47969268f8..0cbdecd0e7 100644 --- a/samples/snippets/sessions_and_io_test.py +++ b/samples/snippets/sessions_and_io_test.py @@ -13,7 +13,7 @@ # limitations under the License. -def test_sessions_and_io(project_id, dataset_id) -> None: +def test_sessions_and_io(project_id: str, dataset_id: str) -> None: YOUR_PROJECT_ID = project_id YOUR_LOCATION = "us" @@ -104,6 +104,8 @@ def test_sessions_and_io(project_id, dataset_id) -> None: # Convert Pandas dataframe to BigQuery DataFrame with the dataframe constructor df_2 = bpd.DataFrame(pd_df) # [END bigquery_dataframes_create_dataframe_from_pandas] + assert df_1 is not None + assert df_2 is not None # [START bigquery_dataframes_convert_bq_dataframe_to_pandas] import bigframes.pandas as bpd From 204842d5885cfb1a47fd3f2ee9ce70eed648ea6f Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Mon, 21 Jul 2025 17:08:26 +0000 Subject: [PATCH 3/4] undo test file deletion --- .../snippets/load_data_from_bigquery_test.py | 24 ++++++++++++++++++ samples/snippets/load_data_from_csv_test.py | 25 +++++++++++++++++++ samples/snippets/sessions_and_io_test.py | 8 +++--- 3 files changed, 53 insertions(+), 4 deletions(-) create mode 100644 samples/snippets/load_data_from_bigquery_test.py create mode 100644 samples/snippets/load_data_from_csv_test.py diff --git a/samples/snippets/load_data_from_bigquery_test.py b/samples/snippets/load_data_from_bigquery_test.py new file mode 100644 index 0000000000..4523eece97 --- /dev/null +++ b/samples/snippets/load_data_from_bigquery_test.py @@ -0,0 +1,24 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def test_bigquery_dataframes_load_data_from_bigquery() -> None: + # [START bigquery_dataframes_load_data_from_bigquery] + # Create a DataFrame from a BigQuery table: + import bigframes.pandas as bpd + + query_or_table = "bigquery-public-data.ml_datasets.penguins" + bq_df = bpd.read_gbq(query_or_table) + # [END bigquery_dataframes_load_data_from_bigquery] + assert bq_df is not None diff --git a/samples/snippets/load_data_from_csv_test.py b/samples/snippets/load_data_from_csv_test.py new file mode 100644 index 0000000000..cc96b92fb8 --- /dev/null +++ b/samples/snippets/load_data_from_csv_test.py @@ -0,0 +1,25 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def test_bigquery_dataframes_load_data_from_csv() -> None: + # [START bigquery_dataframes_load_data_from_csv] + import bigframes.pandas as bpd + + filepath_or_buffer = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" + df_from_gcs = bpd.read_csv(filepath_or_buffer) + # Display the first few rows of the DataFrame: + df_from_gcs.head() + # [END bigquery_dataframes_load_data_from_csv] + assert df_from_gcs is not None diff --git a/samples/snippets/sessions_and_io_test.py b/samples/snippets/sessions_and_io_test.py index 0cbdecd0e7..ee5d83ee89 100644 --- a/samples/snippets/sessions_and_io_test.py +++ b/samples/snippets/sessions_and_io_test.py @@ -130,19 +130,19 @@ def test_sessions_and_io(project_id: str, dataset_id: str) -> None: # [END bigquery_dataframes_to_pandas_dry_run] assert df.to_pandas(dry_run=True) is not None - # [START bigquery_dataframes_load_data_from_csv] + # [START bigquery_dataframes_read_data_from_csv] import bigframes.pandas as bpd # Read a CSV file from GCS df = bpd.read_csv("gs://cloud-samples-data/bigquery/us-states/us-states.csv") - # [END bigquery_dataframes_load_data_from_csv] + # [END bigquery_dataframes_read_data_from_csv] assert df is not None - # [START bigquery_dataframes_load_data_from_bigquery] + # [START bigquery_dataframes_read_data_from_bigquery_table] import bigframes.pandas as bpd df = bpd.read_gbq("bigquery-public-data.ml_datasets.penguins") - # [END bigquery_dataframes_load_data_from_bigquery] + # [END bigquery_dataframes_read_data_from_bigquery_table] assert df is not None # [START bigquery_dataframes_read_from_sql_query] From bc2e377083a45d6ea23e617748d420b9f015a94a Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Mon, 21 Jul 2025 17:48:49 +0000 Subject: [PATCH 4/4] fix typo --- samples/snippets/sessions_and_io_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/sessions_and_io_test.py b/samples/snippets/sessions_and_io_test.py index ee5d83ee89..98c2c71424 100644 --- a/samples/snippets/sessions_and_io_test.py +++ b/samples/snippets/sessions_and_io_test.py @@ -118,7 +118,7 @@ def test_sessions_and_io(project_id: str, dataset_id: str) -> None: # Returns a Pandas Series bf_s.to_pandas() # [END bigquery_dataframes_convert_bq_dataframe_to_pandas] - assert bf_s.to_pandas is not None + assert bf_s.to_pandas() is not None # [START bigquery_dataframes_to_pandas_dry_run] import bigframes.pandas as bpd