From c4c7f4a32fac8fab1b75c6255b5c32e40bc95ca5 Mon Sep 17 00:00:00 2001 From: Hussainparbtani <43732117+Hussainparbtani@users.noreply.github.com> Date: Sat, 8 Feb 2025 20:23:10 +0000 Subject: [PATCH 01/11] added attractions to pipeline --- steps/01_setup_snowflake.sql | 4 ++-- steps/03_harmonize_data.py | 29 ++++++++++++++++++++++++++++- steps/04_orchestrate_jobs.sql | 19 +++++++++++++++---- 3 files changed, 45 insertions(+), 7 deletions(-) diff --git a/steps/01_setup_snowflake.sql b/steps/01_setup_snowflake.sql index 7e1bb6a6..4f2e769c 100644 --- a/steps/01_setup_snowflake.sql +++ b/steps/01_setup_snowflake.sql @@ -13,14 +13,14 @@ CREATE OR ALTER DATABASE QUICKSTART_COMMON; -- API integration is needed for GitHub integration CREATE OR REPLACE API INTEGRATION git_api_integration API_PROVIDER = git_https_api - API_ALLOWED_PREFIXES = ('https://github.com/') -- INSERT YOUR GITHUB USERNAME HERE + API_ALLOWED_PREFIXES = ('https://github.com/Hussainparbtani') -- INSERT YOUR GITHUB USERNAME HERE ENABLED = TRUE; -- Git repository object is similar to external stage CREATE OR REPLACE GIT REPOSITORY quickstart_common.public.quickstart_repo API_INTEGRATION = git_api_integration - ORIGIN = ''; -- INSERT URL OF FORKED REPO HERE + ORIGIN = 'https://github.com/Hussainparbtani/sfguide-getting-started-with-snowflake-devops.git'; -- INSERT URL OF FORKED REPO HERE CREATE OR ALTER DATABASE QUICKSTART_PROD; diff --git a/steps/03_harmonize_data.py b/steps/03_harmonize_data.py index 83b62d48..d21ee2d8 100644 --- a/steps/03_harmonize_data.py +++ b/steps/03_harmonize_data.py @@ -231,7 +231,34 @@ def main(df): group by city.geo_id, city.geo_name, city.total_population """, ), - # Placeholder: Add new view definition here + View( + name="attractions", + columns=[ + ViewColumn(name="geo_id"), + ViewColumn(name="geo_name"), + ViewColumn(name="aquarium_cnt"), + ViewColumn(name="zoo_cnt"), + ViewColumn(name="korean_restaurant_cnt"), + ], + query=""" + select + city.geo_id, + city.geo_name, + count(case when category_main = 'Aquarium' THEN 1 END) aquarium_cnt, + count(case when category_main = 'Zoo' THEN 1 END) zoo_cnt, + count(case when category_main = 'Korean Restaurant' THEN 1 END) korean_restaurant_cnt, + from us_addresses__poi.cybersyn.point_of_interest_index poi + join us_addresses__poi.cybersyn.point_of_interest_addresses_relationships poi_add + on poi_add.poi_id = poi.poi_id + join us_addresses__poi.cybersyn.us_addresses address + on address.address_id = poi_add.address_id + join major_us_cities city on city.geo_id = address.id_city + where true + and category_main in ('Aquarium', 'Zoo', 'Korean Restaurant') + and id_country = 'country/USA' + group by city.geo_id, city.geo_name + """, + ), ] diff --git a/steps/04_orchestrate_jobs.sql b/steps/04_orchestrate_jobs.sql index e5e1d120..1f68597f 100644 --- a/steps/04_orchestrate_jobs.sql +++ b/steps/04_orchestrate_jobs.sql @@ -12,7 +12,9 @@ create or alter table vacation_spots ( , avg_relative_humidity_pct float , avg_cloud_cover_pct float , precipitation_probability_pct float - -- STEP 5: INSERT CHANGES HERE + , aquarium_cnt int + , zoo_cnt int + , korean_restaurant_cnt int ) data_retention_time_in_days = 1; @@ -26,6 +28,7 @@ create or alter task vacation_spots_update from silver.flights_from_home flight join silver.weather_joined_with_major_cities city on city.geo_name = flight.arrival_city -- STEP 5: INSERT CHANGES HERE + join silver.attractions att on att.geo_name = city.geo_name ) as harmonized_vacation_spots ON vacation_spots.city = harmonized_vacation_spots.arrival_city and vacation_spots.airport = harmonized_vacation_spots.arrival_airport WHEN MATCHED THEN UPDATE SET @@ -36,6 +39,9 @@ create or alter task vacation_spots_update , vacation_spots.avg_cloud_cover_pct = harmonized_vacation_spots.avg_cloud_cover_pct , vacation_spots.precipitation_probability_pct = harmonized_vacation_spots.precipitation_probability_pct -- STEP 5: INSERT CHANGES HERE + , vacation_spots.aquarium_cnt = harmonized_vacation_spots.aquarium_cnt + , vacation_spots.zoo_cnt = harmonized_vacation_spots.zoo_cnt + , vacation_spots.korean_restaurant_cnt = harmonized_vacation_spots.korean_restaurant_cnt WHEN NOT MATCHED THEN INSERT VALUES ( harmonized_vacation_spots.arrival_city @@ -47,6 +53,9 @@ create or alter task vacation_spots_update , harmonized_vacation_spots.avg_cloud_cover_pct , harmonized_vacation_spots.precipitation_probability_pct -- STEP 5: INSERT CHANGES HERE + , harmonized_vacation_spots.aquarium_cnt + , harmonized_vacation_spots.zoo_cnt + , harmonized_vacation_spots.korean_restaurant_cnt ); @@ -64,13 +73,15 @@ create or alter task email_notification and punctual_pct >= 50 and avg_temperature_air_f >= 70 -- STEP 5: INSERT CHANGES HERE + and korean_restaurant_cnt > 0 + and (zoo_cnt > 0 or aquarium_cnt > 0) limit 10); if (:options = '[]') then CALL SYSTEM$SEND_EMAIL( 'email_integration', - '', -- INSERT YOUR EMAIL HERE + 'hussain.parbtani@thebridge.com', -- INSERT YOUR EMAIL HERE 'New data successfully processed: No suitable vacation spots found.', 'The query did not return any results. Consider adjusting your filters.'); end if; @@ -83,14 +94,14 @@ create or alter task email_notification CALL SYSTEM$SEND_EMAIL( 'email_integration', - '', -- INSERT YOUR EMAIL HERE + 'hussain.parbtani@thebridge.com', -- INSERT YOUR EMAIL HERE 'New data successfully processed: The perfect place for your summer vacation has been found.', :response); exception when EXPRESSION_ERROR then CALL SYSTEM$SEND_EMAIL( 'email_integration', - '', -- INSERT YOUR EMAIL HERE + ' Date: Sun, 9 Feb 2025 00:33:37 +0000 Subject: [PATCH 02/11] parameterize pipeline --- steps/01_setup_snowflake.sql | 2 +- steps/03_harmonize_data.py | 2 +- steps/04_orchestrate_jobs.sql | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/steps/01_setup_snowflake.sql b/steps/01_setup_snowflake.sql index 4f2e769c..a656bb8d 100644 --- a/steps/01_setup_snowflake.sql +++ b/steps/01_setup_snowflake.sql @@ -7,7 +7,7 @@ CREATE OR ALTER WAREHOUSE QUICKSTART_WH -- Separate database for git repository -CREATE OR ALTER DATABASE QUICKSTART_COMMON; +CREATE OR ALTER DATABASE QUICKSTART_{{environment}}; -- API integration is needed for GitHub integration diff --git a/steps/03_harmonize_data.py b/steps/03_harmonize_data.py index d21ee2d8..abeec037 100644 --- a/steps/03_harmonize_data.py +++ b/steps/03_harmonize_data.py @@ -266,7 +266,7 @@ def main(df): root = Root(Session.builder.getOrCreate()) # create views in Snowflake -silver_schema = root.databases["quickstart_prod"].schemas["silver"] +silver_schema = silver_schema = root.databases[f"quickstart_{os.environ['environment']}"].schemas["silver"] silver_schema.user_defined_functions.create( map_city_to_airport, mode=CreateMode.or_replace ) diff --git a/steps/04_orchestrate_jobs.sql b/steps/04_orchestrate_jobs.sql index 1f68597f..9011c344 100644 --- a/steps/04_orchestrate_jobs.sql +++ b/steps/04_orchestrate_jobs.sql @@ -1,5 +1,5 @@ use role accountadmin; -use schema quickstart_prod.gold; +use schema use schema quickstart_{{environment}}.gold; -- declarative target table of pipeline @@ -15,7 +15,7 @@ create or alter table vacation_spots ( , aquarium_cnt int , zoo_cnt int , korean_restaurant_cnt int -) data_retention_time_in_days = 1; +) data_retention_time_in_days = {{retention_time}}; -- task to merge pipeline results into target table From 69ff763df18a0fd7100a80dbf9348006eb34a8e7 Mon Sep 17 00:00:00 2001 From: Hussainparbtani <43732117+Hussainparbtani@users.noreply.github.com> Date: Sun, 9 Feb 2025 00:40:54 +0000 Subject: [PATCH 03/11] testing minor change --- steps/01_setup_snowflake.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/steps/01_setup_snowflake.sql b/steps/01_setup_snowflake.sql index a656bb8d..ffe6d7a5 100644 --- a/steps/01_setup_snowflake.sql +++ b/steps/01_setup_snowflake.sql @@ -1,5 +1,6 @@ USE ROLE ACCOUNTADMIN; + CREATE OR ALTER WAREHOUSE QUICKSTART_WH WAREHOUSE_SIZE = XSMALL AUTO_SUSPEND = 300 From 16dbd1289b7bb94488e89471be3286aecb2fcfde Mon Sep 17 00:00:00 2001 From: Hussainparbtani <43732117+Hussainparbtani@users.noreply.github.com> Date: Sun, 9 Feb 2025 05:11:43 +0000 Subject: [PATCH 04/11] adding private key passphrase as a env variable --- .github/workflows/deploy_pipeline.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/deploy_pipeline.yml b/.github/workflows/deploy_pipeline.yml index 3ef0f13c..9ff592b2 100644 --- a/.github/workflows/deploy_pipeline.yml +++ b/.github/workflows/deploy_pipeline.yml @@ -21,6 +21,7 @@ jobs: # Read connection secrets SNOWFLAKE_CONNECTIONS_DEFAULT_ACCOUNT: ${{ secrets.SNOWFLAKE_ACCOUNT }} SNOWFLAKE_CONNECTIONS_DEFAULT_USER: ${{ secrets.SNOWFLAKE_USER }} + PRIVATE_KEY_PASSPHRASE: ${{ secrets.PRIVATE_KEY_PASSPHRASE }} steps: # Checkout step is necessary if you want to use a config file from your repo From a96f1f4c84029253dfc3bc8e4ad1568fc47bbf00 Mon Sep 17 00:00:00 2001 From: Hussainparbtani <43732117+Hussainparbtani@users.noreply.github.com> Date: Sun, 9 Feb 2025 05:18:48 +0000 Subject: [PATCH 05/11] fixing error in use schema syntax --- steps/04_orchestrate_jobs.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/steps/04_orchestrate_jobs.sql b/steps/04_orchestrate_jobs.sql index 9011c344..b9db4284 100644 --- a/steps/04_orchestrate_jobs.sql +++ b/steps/04_orchestrate_jobs.sql @@ -1,5 +1,5 @@ use role accountadmin; -use schema use schema quickstart_{{environment}}.gold; +use schema quickstart_{{environment}}.gold; -- declarative target table of pipeline From bea5609fb9647021fd4e55ad2b8938a79d7e6d9f Mon Sep 17 00:00:00 2001 From: Hussainparbtani <43732117+Hussainparbtani@users.noreply.github.com> Date: Thu, 13 Feb 2025 20:40:40 +0000 Subject: [PATCH 06/11] add new column in vacation_spots --- steps/04_orchestrate_jobs.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/steps/04_orchestrate_jobs.sql b/steps/04_orchestrate_jobs.sql index b9db4284..7f5695ab 100644 --- a/steps/04_orchestrate_jobs.sql +++ b/steps/04_orchestrate_jobs.sql @@ -15,6 +15,7 @@ create or alter table vacation_spots ( , aquarium_cnt int , zoo_cnt int , korean_restaurant_cnt int + , new_column string ) data_retention_time_in_days = {{retention_time}}; From 65b8534cdd5ff007cbec7a830c91dbbe42dfa57b Mon Sep 17 00:00:00 2001 From: Hussainparbtani <43732117+Hussainparbtani@users.noreply.github.com> Date: Thu, 13 Feb 2025 20:53:51 +0000 Subject: [PATCH 07/11] updating task logic --- steps/04_orchestrate_jobs.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/steps/04_orchestrate_jobs.sql b/steps/04_orchestrate_jobs.sql index 7f5695ab..4f084abc 100644 --- a/steps/04_orchestrate_jobs.sql +++ b/steps/04_orchestrate_jobs.sql @@ -43,6 +43,7 @@ create or alter task vacation_spots_update , vacation_spots.aquarium_cnt = harmonized_vacation_spots.aquarium_cnt , vacation_spots.zoo_cnt = harmonized_vacation_spots.zoo_cnt , vacation_spots.korean_restaurant_cnt = harmonized_vacation_spots.korean_restaurant_cnt + , vacation_spots.new_column = {{environment}} WHEN NOT MATCHED THEN INSERT VALUES ( harmonized_vacation_spots.arrival_city @@ -57,6 +58,7 @@ create or alter task vacation_spots_update , harmonized_vacation_spots.aquarium_cnt , harmonized_vacation_spots.zoo_cnt , harmonized_vacation_spots.korean_restaurant_cnt + ,vacation_spots.new_column = {{environment}} ); From 632838bcfe82ab2e6618796657ee920514869987 Mon Sep 17 00:00:00 2001 From: Hussainparbtani <43732117+Hussainparbtani@users.noreply.github.com> Date: Thu, 13 Feb 2025 21:00:56 +0000 Subject: [PATCH 08/11] fixing insert error --- steps/04_orchestrate_jobs.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/steps/04_orchestrate_jobs.sql b/steps/04_orchestrate_jobs.sql index 4f084abc..9fb15c7b 100644 --- a/steps/04_orchestrate_jobs.sql +++ b/steps/04_orchestrate_jobs.sql @@ -58,7 +58,7 @@ create or alter task vacation_spots_update , harmonized_vacation_spots.aquarium_cnt , harmonized_vacation_spots.zoo_cnt , harmonized_vacation_spots.korean_restaurant_cnt - ,vacation_spots.new_column = {{environment}} + , {{environment}} ); From ec73e6ecd724460e2f5ccce8e6fb177c8562077e Mon Sep 17 00:00:00 2001 From: Hussainparbtani <43732117+Hussainparbtani@users.noreply.github.com> Date: Thu, 13 Feb 2025 21:08:07 +0000 Subject: [PATCH 09/11] trying again to fix error --- steps/04_orchestrate_jobs.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/steps/04_orchestrate_jobs.sql b/steps/04_orchestrate_jobs.sql index 9fb15c7b..ed800a0c 100644 --- a/steps/04_orchestrate_jobs.sql +++ b/steps/04_orchestrate_jobs.sql @@ -43,7 +43,7 @@ create or alter task vacation_spots_update , vacation_spots.aquarium_cnt = harmonized_vacation_spots.aquarium_cnt , vacation_spots.zoo_cnt = harmonized_vacation_spots.zoo_cnt , vacation_spots.korean_restaurant_cnt = harmonized_vacation_spots.korean_restaurant_cnt - , vacation_spots.new_column = {{environment}} + , vacation_spots.new_column = '{{environment}}' WHEN NOT MATCHED THEN INSERT VALUES ( harmonized_vacation_spots.arrival_city @@ -58,7 +58,7 @@ create or alter task vacation_spots_update , harmonized_vacation_spots.aquarium_cnt , harmonized_vacation_spots.zoo_cnt , harmonized_vacation_spots.korean_restaurant_cnt - , {{environment}} + , '{{environment}}' ); From df78cdf3881a66c3fdc903b0cf397aeb6f7e361e Mon Sep 17 00:00:00 2001 From: Hussainparbtani <43732117+Hussainparbtani@users.noreply.github.com> Date: Thu, 13 Feb 2025 21:39:05 +0000 Subject: [PATCH 10/11] added new column 2 --- steps/04_orchestrate_jobs.sql | 3 +++ 1 file changed, 3 insertions(+) diff --git a/steps/04_orchestrate_jobs.sql b/steps/04_orchestrate_jobs.sql index ed800a0c..62939d3a 100644 --- a/steps/04_orchestrate_jobs.sql +++ b/steps/04_orchestrate_jobs.sql @@ -16,6 +16,7 @@ create or alter table vacation_spots ( , zoo_cnt int , korean_restaurant_cnt int , new_column string + , new_column2 string ) data_retention_time_in_days = {{retention_time}}; @@ -44,6 +45,7 @@ create or alter task vacation_spots_update , vacation_spots.zoo_cnt = harmonized_vacation_spots.zoo_cnt , vacation_spots.korean_restaurant_cnt = harmonized_vacation_spots.korean_restaurant_cnt , vacation_spots.new_column = '{{environment}}' + , vacation_spots.new_column2 = '{{environment}}' WHEN NOT MATCHED THEN INSERT VALUES ( harmonized_vacation_spots.arrival_city @@ -59,6 +61,7 @@ create or alter task vacation_spots_update , harmonized_vacation_spots.zoo_cnt , harmonized_vacation_spots.korean_restaurant_cnt , '{{environment}}' + , '{{environment}}' ); From ba0cf3d18c195055c9fb5f79a223a8bdf62b0337 Mon Sep 17 00:00:00 2001 From: Hussainparbtani <43732117+Hussainparbtani@users.noreply.github.com> Date: Fri, 21 Feb 2025 20:42:46 +0000 Subject: [PATCH 11/11] adding a column3 --- steps/04_orchestrate_jobs.sql | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/steps/04_orchestrate_jobs.sql b/steps/04_orchestrate_jobs.sql index 62939d3a..094c497a 100644 --- a/steps/04_orchestrate_jobs.sql +++ b/steps/04_orchestrate_jobs.sql @@ -8,7 +8,8 @@ create or alter table vacation_spots ( , airport varchar , co2_emissions_kg_per_person float , punctual_pct float - , avg_temperature_air_f float + , avg_temperature_air_f float + , new_column3 string , avg_relative_humidity_pct float , avg_cloud_cover_pct float , precipitation_probability_pct float @@ -17,6 +18,7 @@ create or alter table vacation_spots ( , korean_restaurant_cnt int , new_column string , new_column2 string + , new_column3 string ) data_retention_time_in_days = {{retention_time}}; @@ -46,6 +48,7 @@ create or alter task vacation_spots_update , vacation_spots.korean_restaurant_cnt = harmonized_vacation_spots.korean_restaurant_cnt , vacation_spots.new_column = '{{environment}}' , vacation_spots.new_column2 = '{{environment}}' + , vacation_spots.new_column3 = '{{environment}}' WHEN NOT MATCHED THEN INSERT VALUES ( harmonized_vacation_spots.arrival_city @@ -62,6 +65,7 @@ create or alter task vacation_spots_update , harmonized_vacation_spots.korean_restaurant_cnt , '{{environment}}' , '{{environment}}' + ,'{{environment}}' );