Skip to content
This repository has been archived by the owner on Dec 18, 2024. It is now read-only.

Commit

Permalink
Merge branch 'main' into more-fixes-to-improve-run
Browse files Browse the repository at this point in the history
  • Loading branch information
tunetheweb committed Mar 11, 2024
2 parents 686a0cd + de42ed9 commit 6a45c64
Show file tree
Hide file tree
Showing 9 changed files with 667 additions and 3 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/deploy-dataflow-flex-template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ on:
- "cloudbuild.yaml"
- "Dockerfile"
- "flex_template_metadata_*.json"
- "requirements.txt"
- "requirements*.txt"

jobs:
deploy:
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
*.pyc
/.vscode
.coverage
.tool-versions

# Ignore generated credentials from google-github-actions/auth
gha-creds-*.json
2 changes: 1 addition & 1 deletion data-pipeline.workflows.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ main:
- project: ${sys.get_env("GOOGLE_CLOUD_PROJECT_ID")}
- region: "us-west1"
- flexTemplateRepo: "data-pipeline"
- flexTemplateBuildTag: "2024-03-01_07-51-29"
- flexTemplateBuildTag: "2024-03-11_01-04-46"
- flexTemplateBasePath: ${"gs://" + project + "/dataflow/templates/" + flexTemplateRepo}
- flexTemplateTemp: ${"gs://" + project + "-staging/dataflow"}

Expand Down
309 changes: 309 additions & 0 deletions modules/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,312 @@ class MaxContentSize(Enum):

# limit response bodies to 20MB
RESPONSE_BODIES = 20 * 1000000


TECHNOLOGY_QUERY_ID_KEYS = {
"adoption": ["date", "technology", "geo", "rank"],
"lighthouse": ["date", "technology", "geo", "rank"],
"core_web_vitals": ["date", "technology", "geo", "rank"],
"page_weight": ["date", "technology", "geo", "rank"],
"technologies": ["client", "technology", "category"],
"categories": ["category"],
}
"""Mapping of query types to a list of fields that uniquely identify a row."""

# editorconfig-checker-disable
TECHNOLOGY_QUERIES = {
"adoption": """
CREATE TEMPORARY FUNCTION GET_ADOPTION(
records ARRAY<STRUCT<
client STRING,
origins INT64
>>
) RETURNS STRUCT<
desktop INT64,
mobile INT64
> LANGUAGE js AS '''
return Object.fromEntries(records.map(({{client, origins}}) => {{
return [client, origins];
}}));
''';
SELECT
STRING(DATE(date)) as date,
app AS technology,
rank,
geo,
GET_ADOPTION(ARRAY_AGG(STRUCT(
client,
origins
))) AS adoption
FROM
`httparchive.core_web_vitals.technologies`
WHERE date = '{date}'
GROUP BY date, app, rank, geo
""",
"lighthouse": """
CREATE TEMPORARY FUNCTION GET_LIGHTHOUSE(
records ARRAY<STRUCT<
client STRING,
median_lighthouse_score_accessibility NUMERIC,
median_lighthouse_score_best_practices NUMERIC,
median_lighthouse_score_performance NUMERIC,
median_lighthouse_score_pwa NUMERIC,
median_lighthouse_score_seo NUMERIC
>>
) RETURNS ARRAY<STRUCT<
name STRING,
desktop STRUCT<
median_score NUMERIC
>,
mobile STRUCT<
median_score NUMERIC
>
>> LANGUAGE js AS '''
const METRIC_MAP = {{
accessibility: 'median_lighthouse_score_accessibility',
best_practices: 'median_lighthouse_score_best_practices',
performance: 'median_lighthouse_score_performance',
pwa: 'median_lighthouse_score_pwa',
seo: 'median_lighthouse_score_seo',
}};
// Initialize the Lighthouse map.
const lighthouse = Object.fromEntries(Object.keys(METRIC_MAP).map(metricName => {{
return [metricName, {{name: metricName}}];
}}));
// Populate each client record.
records.forEach(record => {{
Object.entries(METRIC_MAP).forEach(([metricName, median_score]) => {{
lighthouse[metricName][record.client] = {{median_score: record[median_score]}};
}});
}});
return Object.values(lighthouse);
''';
SELECT
STRING(DATE(date)) as date,
app AS technology,
rank,
geo,
GET_LIGHTHOUSE(ARRAY_AGG(STRUCT(
client,
median_lighthouse_score_accessibility,
median_lighthouse_score_best_practices,
median_lighthouse_score_performance,
median_lighthouse_score_pwa,
median_lighthouse_score_seo
))) AS lighthouse
FROM
`httparchive.core_web_vitals.technologies`
WHERE date = '{date}'
GROUP BY date, app, rank, geo
""",
"core_web_vitals": """
CREATE TEMPORARY FUNCTION GET_VITALS(
records ARRAY<STRUCT<
client STRING,
origins_with_good_fid INT64,
origins_with_good_cls INT64,
origins_with_good_lcp INT64,
origins_with_good_fcp INT64,
origins_with_good_ttfb INT64,
origins_with_good_inp INT64,
origins_with_any_fid INT64,
origins_with_any_cls INT64,
origins_with_any_lcp INT64,
origins_with_any_fcp INT64,
origins_with_any_ttfb INT64,
origins_with_any_inp INT64,
origins_with_good_cwv INT64,
origins_eligible_for_cwv INT64
>>
) RETURNS ARRAY<STRUCT<
name STRING,
desktop STRUCT<
good_number INT64,
tested INT64
>,
mobile STRUCT<
good_number INT64,
tested INT64
>
>> LANGUAGE js AS '''
const METRIC_MAP = {{
overall: ['origins_with_good_cwv', 'origins_eligible_for_cwv'],
LCP: ['origins_with_good_lcp', 'origins_with_any_lcp'],
CLS: ['origins_with_good_cls', 'origins_with_any_cls'],
FID: ['origins_with_good_fid', 'origins_with_any_fid'],
FCP: ['origins_with_good_fcp', 'origins_with_any_fcp'],
TTFB: ['origins_with_good_ttfb', 'origins_with_any_ttfb'],
INP: ['origins_with_good_inp', 'origins_with_any_inp']
}};
// Initialize the vitals map.
const vitals = Object.fromEntries(Object.keys(METRIC_MAP).map(metricName => {{
return [metricName, {{name: metricName}}];
}}));
// Populate each client record.
records.forEach(record => {{
Object.entries(METRIC_MAP).forEach(([metricName, [good_number, tested]]) => {{
vitals[metricName][record.client] = {{good_number: record[good_number], tested: record[tested]}};
}});
}});
return Object.values(vitals);
''';
SELECT
STRING(DATE(date)) as date,
app AS technology,
rank,
geo,
GET_VITALS(ARRAY_AGG(STRUCT(
client,
origins_with_good_fid,
origins_with_good_cls,
origins_with_good_lcp,
origins_with_good_fcp,
origins_with_good_ttfb,
origins_with_good_inp,
origins_with_any_fid,
origins_with_any_cls,
origins_with_any_lcp,
origins_with_any_fcp,
origins_with_any_ttfb,
origins_with_any_inp,
origins_with_good_cwv,
origins_eligible_for_cwv
))) AS vitals
FROM
`httparchive.core_web_vitals.technologies`
WHERE date = '{date}'
GROUP BY date, app, rank, geo
""",
"technologies": """
SELECT
client,
app AS technology,
description,
category,
SPLIT(category, ",") AS category_obj,
NULL AS similar_technologies,
origins
FROM
`httparchive.core_web_vitals.technologies`
JOIN
`httparchive.core_web_vitals.technology_descriptions`
ON
app = technology
WHERE date = '{date}' AND geo = 'ALL' AND rank = 'ALL'
ORDER BY origins DESC
""",
"page_weight": """
CREATE TEMPORARY FUNCTION GET_PAGE_WEIGHT(
records ARRAY<STRUCT<
client STRING,
total INT64,
js INT64,
images INT64
>>
) RETURNS ARRAY<STRUCT<
name STRING,
mobile STRUCT<
median_bytes INT64
>,
desktop STRUCT<
median_bytes INT64
>
>> LANGUAGE js AS '''
const METRICS = ['total', 'js', 'images'];
// Initialize the page weight map.
const pageWeight = Object.fromEntries(METRICS.map(metricName => {{
return [metricName, {{name: metricName}}];
}}));
// Populate each client record.
records.forEach(record => {{
METRICS.forEach(metricName => {{
pageWeight[metricName][record.client] = {{median_bytes: record[metricName]}};
}});
}});
return Object.values(pageWeight);
''';
SELECT
STRING(DATE(date)) as date,
app AS technology,
rank,
geo,
GET_PAGE_WEIGHT(ARRAY_AGG(STRUCT(
client,
median_bytes_total,
median_bytes_js,
median_bytes_image
))) AS pageWeight
FROM
`httparchive.core_web_vitals.technologies`
WHERE date = '{date}'
GROUP BY date, app, rank, geo
""",
"categories": """
WITH categories AS (
SELECT
category,
COUNT(DISTINCT root_page) AS origins
FROM
`httparchive.all.pages`,
UNNEST(technologies) AS t,
UNNEST(t.categories) AS category
WHERE
date = '{date}' AND
client = 'mobile'
GROUP BY
category
),
technologies AS (
SELECT
category,
technology,
COUNT(DISTINCT root_page) AS origins
FROM
`httparchive.all.pages`,
UNNEST(technologies) AS t,
UNNEST(t.categories) AS category
WHERE
date = '{date}' AND
client = 'mobile'
GROUP BY
category,
technology
)
SELECT
category,
categories.origins,
ARRAY_AGG(technology ORDER BY technologies.origins DESC) AS technologies
FROM
categories
JOIN
technologies
USING
(category)
GROUP BY
category,
categories.origins
ORDER BY
categories.origins DESC
"""
}
"""Mapping of query types to BigQuery SQL queries.
The queries are formatted with the `date` parameter.
Queries containing javascript UDFs require additional curly braces to escape the braces in the UDF.
"""
# editorconfig-checker-enable
Loading

0 comments on commit 6a45c64

Please sign in to comment.