diff --git a/sql/moz-fx-data-shared-prod/firefox_desktop_derived/newtab_content_items_daily_combined_v1/metadata.yaml b/sql/moz-fx-data-shared-prod/firefox_desktop_derived/newtab_content_items_daily_combined_v1/metadata.yaml new file mode 100644 index 00000000000..e270c971375 --- /dev/null +++ b/sql/moz-fx-data-shared-prod/firefox_desktop_derived/newtab_content_items_daily_combined_v1/metadata.yaml @@ -0,0 +1,7 @@ +friendly_name: Newtab Content Items Daily Combined +description: |- + A view of the combined (Newtab + Newtab-Content) daily aggregation of newtab content actions on content/items, + joined with the latest corpus item details from the corpus_items_current table so that the most current values for + the corpus item are available. +owners: +- lmcfall@mozilla.com diff --git a/sql/moz-fx-data-shared-prod/firefox_desktop_derived/newtab_content_items_daily_combined_v1/view.sql b/sql/moz-fx-data-shared-prod/firefox_desktop_derived/newtab_content_items_daily_combined_v1/view.sql new file mode 100644 index 00000000000..1027ba4a8cf --- /dev/null +++ b/sql/moz-fx-data-shared-prod/firefox_desktop_derived/newtab_content_items_daily_combined_v1/view.sql @@ -0,0 +1,11 @@ +CREATE OR REPLACE VIEW + `moz-fx-data-shared-prod.firefox_desktop_derived.newtab_content_items_daily_combined_v1` +AS +SELECT + content.*, + corpus_items.* EXCEPT (corpus_item_id, row_num) +FROM + `moz-fx-data-shared-prod.firefox_desktop_derived.newtab_content_items_daily_v1` AS content +LEFT OUTER JOIN + `moz-fx-data-shared-prod.snowflake_migration_derived.corpus_items_current_v1` AS corpus_items + ON content.corpus_item_id = corpus_items.corpus_item_id diff --git a/sql/moz-fx-data-shared-prod/firefox_desktop_derived/newtab_content_items_daily_v1/metadata.yaml b/sql/moz-fx-data-shared-prod/firefox_desktop_derived/newtab_content_items_daily_v1/metadata.yaml new file mode 100644 index 00000000000..d27ffcc11c7 --- /dev/null +++ b/sql/moz-fx-data-shared-prod/firefox_desktop_derived/newtab_content_items_daily_v1/metadata.yaml @@ -0,0 +1,33 @@ +friendly_name: Newtab Content Items Daily +description: |- + A daily aggregation of newtab content actions on content/items (example: impressions, clicks, dismissals) + for Firefox desktop, partitioned by day. +owners: +- lmcfall@mozilla.com +labels: + application: newtab + incremental: true + schedule: daily + dag: bqetl_newtab + owner1: lmcfall + table_type: aggregate +scheduling: + dag_name: bqetl_newtab +bigquery: + time_partitioning: + type: day + field: submission_date + require_partition_filter: true + expiration_days: null + range_partitioning: null + clustering: + fields: + - channel + - country +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozsoc-ml/service + - workgroup:mozsoc-ml/developers +references: {} +require_column_descriptions: false diff --git a/sql/moz-fx-data-shared-prod/firefox_desktop_derived/newtab_content_items_daily_v1/query.sql b/sql/moz-fx-data-shared-prod/firefox_desktop_derived/newtab_content_items_daily_v1/query.sql new file mode 100644 index 00000000000..6eb73db77cf --- /dev/null +++ b/sql/moz-fx-data-shared-prod/firefox_desktop_derived/newtab_content_items_daily_v1/query.sql @@ -0,0 +1,194 @@ +WITH newtab_events_unnested AS ( + SELECT + DATE(submission_timestamp) AS submission_date, + mozfun.norm.browser_version_info(client_info.app_display_version).major_version AS app_version, + normalized_channel AS channel, + metrics.string.newtab_locale AS locale, + normalized_country_code AS country, + metrics.string.newtab_content_surface_id AS newtab_content_surface_id, + timestamp AS event_timestamp, + category AS event_category, + name AS event_name, + extra AS event_details, + FROM + `moz-fx-data-shared-prod.firefox_desktop_stable.newtab_v1`, + UNNEST(events) + WHERE + DATE(submission_timestamp) = @submission_date + AND category IN ('pocket') + AND name IN ('impression', 'click', 'dismiss') + AND mozfun.norm.browser_version_info( + client_info.app_display_version + ).major_version >= 121 -- the [Pocket team started using Glean](https://github.com/Pocket/dbt-snowflake/pull/459) from this version on. This prevents duplicates for previous releases. +), +newtab_flattened_events AS ( + SELECT + submission_date, + SAFE_CAST(app_version AS INT64) AS app_version, + channel, + locale, + country, + newtab_content_surface_id, + event_category, + event_name, + mozfun.map.get_key(event_details, 'corpus_item_id') AS corpus_item_id, + SAFE_CAST(mozfun.map.get_key(event_details, 'position') AS INT64) AS position, + SAFE_CAST(mozfun.map.get_key(event_details, 'is_sponsored') AS BOOLEAN) AS is_sponsored, + SAFE_CAST( + mozfun.map.get_key(event_details, 'is_section_followed') AS BOOLEAN + ) AS is_section_followed, + mozfun.map.get_key(event_details, 'matches_selected_topic') AS matches_selected_topic, + SAFE_CAST(mozfun.map.get_key(event_details, 'received_rank') AS INT64) AS received_rank, + mozfun.map.get_key(event_details, 'section') AS section, + SAFE_CAST(mozfun.map.get_key(event_details, 'section_position') AS INT64) AS section_position, + mozfun.map.get_key(event_details, 'topic') AS topic, + IFNULL(mozfun.map.get_key(event_details, 'content_redacted'), 'false') AS content_redacted, + NULL AS newtab_content_ping_version + FROM + newtab_events_unnested +), +newtab_daily_agg AS ( + SELECT + submission_date, + app_version, + channel, + country, + IFNULL( + newtab_content_surface_id, + mozfun.newtab.scheduled_surface_id_v1(country, locale) + ) AS newtab_content_surface_id, + corpus_item_id, + position, + is_sponsored, + is_section_followed, + matches_selected_topic, + received_rank, + section, + section_position, + topic, + content_redacted, + newtab_content_ping_version, + COUNTIF(event_name = 'impression') AS impression_count, + COUNTIF(event_name = 'click') AS click_count, + COUNTIF(event_name = 'dismiss') AS dismiss_count + FROM + newtab_flattened_events + WHERE + -- Filters out non-redacted events. Redacted events will be counted in the newtab_content ping data. + content_redacted = 'false' + GROUP BY + submission_date, + app_version, + channel, + country, + newtab_content_surface_id, + corpus_item_id, + position, + is_sponsored, + is_section_followed, + matches_selected_topic, + received_rank, + section, + section_position, + topic, + content_redacted, + newtab_content_ping_version +), +newtab_content_events_unnested AS ( + SELECT + DATE(submission_timestamp) AS submission_date, + normalized_channel AS channel, + IFNULL(metrics.string.newtab_content_country, normalized_country_code) AS country, + metrics.string.newtab_content_surface_id AS newtab_content_surface_id, + timestamp AS event_timestamp, + category AS event_category, + name AS event_name, + extra AS event_details, + metrics.quantity.newtab_content_ping_version AS newtab_content_ping_version + FROM + `moz-fx-data-shared-prod.firefox_desktop.newtab_content`, + UNNEST(events) + WHERE + DATE(submission_timestamp) = @submission_date + AND category IN ('newtab_content') + AND name IN ('impression', 'click', 'dismiss') +), +newtab_content_flattened_events AS ( + SELECT + submission_date, + NULL AS app_version, + channel, + country, + newtab_content_surface_id, + event_category, + event_name, + mozfun.map.get_key(event_details, 'corpus_item_id') AS corpus_item_id, + SAFE_CAST(mozfun.map.get_key(event_details, 'position') AS INT64) AS position, + SAFE_CAST(mozfun.map.get_key(event_details, 'is_sponsored') AS BOOLEAN) AS is_sponsored, + SAFE_CAST( + mozfun.map.get_key(event_details, 'is_section_followed') AS BOOLEAN + ) AS is_section_followed, + mozfun.map.get_key(event_details, 'matches_selected_topic') AS matches_selected_topic, + SAFE_CAST(mozfun.map.get_key(event_details, 'received_rank') AS INT64) AS received_rank, + mozfun.map.get_key(event_details, 'section') AS section, + SAFE_CAST(mozfun.map.get_key(event_details, 'section_position') AS INT64) AS section_position, + mozfun.map.get_key(event_details, 'topic') AS topic, + CAST(NULL AS STRING) AS content_redacted, + newtab_content_ping_version + FROM + newtab_content_events_unnested +), +newtab_content_daily_agg AS ( + SELECT + submission_date, + app_version, + channel, + country, + newtab_content_surface_id, + corpus_item_id, + position, + is_sponsored, + is_section_followed, + matches_selected_topic, + received_rank, + section, + section_position, + topic, + content_redacted, + newtab_content_ping_version, + COUNTIF(event_name = 'impression') AS impression_count, + COUNTIF(event_name = 'click') AS click_count, + COUNTIF(event_name = 'dismiss') AS dismiss_count + FROM + newtab_content_flattened_events + WHERE + -- Only including events from pings with a version + -- to ensure all events coming from this CTE are from the Newtab-Content ping + newtab_content_ping_version IS NOT NULL + GROUP BY + submission_date, + app_version, + channel, + country, + newtab_content_surface_id, + corpus_item_id, + position, + is_sponsored, + is_section_followed, + matches_selected_topic, + received_rank, + section, + section_position, + topic, + content_redacted, + newtab_content_ping_version +) +SELECT + * +FROM + newtab_daily_agg +UNION ALL +SELECT + * +FROM + newtab_content_daily_agg diff --git a/sql/moz-fx-data-shared-prod/firefox_desktop_derived/newtab_content_items_daily_v1/schema.yaml b/sql/moz-fx-data-shared-prod/firefox_desktop_derived/newtab_content_items_daily_v1/schema.yaml new file mode 100644 index 00000000000..7e9849829ec --- /dev/null +++ b/sql/moz-fx-data-shared-prod/firefox_desktop_derived/newtab_content_items_daily_v1/schema.yaml @@ -0,0 +1,75 @@ +fields: +- name: submission_date + type: DATE + mode: NULLABLE + description: Date when client action took place +- name: channel + type: STRING + mode: NULLABLE +- name: country + type: STRING + mode: NULLABLE +- name: newtab_content_surface_id + type: STRING + mode: NULLABLE +- name: corpus_item_id + type: STRING + mode: NULLABLE +- name: position + type: INTEGER + mode: NULLABLE + description: The position (0-index) of the pocket tile. +- name: is_sponsored + type: BOOLEAN + mode: NULLABLE + description: Whether the pocket tile was sponsored (has an ad callback). +- name: is_section_followed + type: BOOLEAN + mode: NULLABLE + description: If click belongs in a section, if that section is followed +- name: matches_selected_topic + type: STRING + mode: NULLABLE + description: > + Returns value based on if a the topic of the pocket recommendation + matches one of the user-selected topic categories +- name: received_rank + type: INTEGER + mode: NULLABLE + description: > + The rank or order of the recommendation at the time it was sent to + the client. +- name: section + type: STRING + mode: NULLABLE + description: If click belongs in a section, the name of the section +- name: section_position + type: INTEGER + mode: NULLABLE + description: If click belongs in a section, the numberic position of the section +- name: topic + type: STRING + mode: NULLABLE + description: The topic of the recommendation. Like "entertainment". +- name: content_redacted + type: STRING + mode: NULLABLE + description: Are content details sent separately in the newtab_content ping +- name: newtab_content_ping_version + type: INTEGER + mode: NULLABLE +- name: impression_count + type: INTEGER + mode: NULLABLE + description: Count of articles impressed on Newtab +- name: click_count + type: INTEGER + mode: NULLABLE + description: Count of articles clicked on Newtab +- name: dismiss_count + type: INTEGER + mode: NULLABLE + description: Count of articles dismissed on Newtab +- name: app_version + type: INTEGER + mode: NULLABLE diff --git a/sql/moz-fx-data-shared-prod/snowflake_migration_derived/corpus_items_current_v1/view.sql b/sql/moz-fx-data-shared-prod/snowflake_migration_derived/corpus_items_current_v1/view.sql index ebc273d6f50..632b63616b3 100644 --- a/sql/moz-fx-data-shared-prod/snowflake_migration_derived/corpus_items_current_v1/view.sql +++ b/sql/moz-fx-data-shared-prod/snowflake_migration_derived/corpus_items_current_v1/view.sql @@ -5,7 +5,7 @@ WITH corpus_item_id_row_nums AS ( SELECT approved_corpus_item_external_id AS corpus_item_id, title, - url, + url AS recommendation_url, authors, publisher, reviewed_corpus_item_updated_at AS corpus_item_updated_at,