Skip to content

Commit 42208c4

Browse files
blink_features.usage with ranks and partitioned (#39)
* direct and partitioned * partition filter optional * description
1 parent ccb2e0d commit 42208c4

File tree

2 files changed

+44
-57
lines changed

2 files changed

+44
-57
lines changed

definitions/output/blink_features/features.js

-37
This file was deleted.

definitions/output/blink_features/usage.js

+44-20
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,36 @@ publish('usage', {
22
schema: 'blink_features',
33
type: 'incremental',
44
protected: true,
5+
bigquery: {
6+
partitionBy: 'date',
7+
clusterBy: ['client', 'rank', 'feature']
8+
},
9+
description: 'Used in https://lookerstudio.google.com/u/0/reporting/1M8kXOqPkwYNKjJhtag_nvDNJCpvmw_ri/page/tc5b, embedded in https://chromestatus.com/metrics/feature/timeline/popularity/2203',
510
tags: ['crawl_complete', 'blink_report']
611
}).preOps(ctx => `
712
DELETE FROM ${ctx.self()}
8-
WHERE yyyymmdd = REPLACE('${constants.currentMonth}', '-', '');
13+
WHERE date = '${constants.currentMonth}';
914
`).query(ctx => `
15+
WITH pages AS (
1016
SELECT
11-
REPLACE(CAST(date AS STRING), '-', '') AS yyyymmdd,
17+
date,
1218
client,
19+
rank,
20+
page,
21+
features
22+
FROM ${ctx.ref('crawl', 'pages')}
23+
WHERE
24+
date = '${constants.currentMonth}' AND
25+
is_root_page = TRUE
26+
${constants.devRankFilter}
27+
), ranks AS (
28+
SELECT DISTINCT rank FROM pages
29+
)
30+
31+
SELECT
32+
date,
33+
client,
34+
rank,
1335
id,
1436
feature,
1537
type,
@@ -19,20 +41,22 @@ SELECT
1941
sample_urls
2042
FROM (
2143
SELECT
22-
yyyymmdd AS date,
44+
date,
2345
client,
24-
id,
25-
feature,
26-
type,
27-
COUNT(DISTINCT url) AS num_urls,
28-
ARRAY_AGG(url ORDER BY rank, url LIMIT 100) AS sample_urls
29-
FROM ${ctx.ref('blink_features', 'features')}
30-
WHERE
31-
yyyymmdd = '${constants.currentMonth}'
32-
${constants.devRankFilter}
46+
ranks.rank,
47+
feature.id,
48+
feature.feature,
49+
feature.type,
50+
COUNT(DISTINCT page) AS num_urls,
51+
ARRAY_AGG(page ORDER BY pages.rank, page LIMIT 100) AS sample_urls
52+
FROM pages
53+
CROSS JOIN UNNEST(features) AS feature
54+
FULL OUTER JOIN ranks
55+
ON pages.rank <= ranks.rank
3356
GROUP BY
34-
yyyymmdd,
57+
date,
3558
client,
59+
ranks.rank,
3660
id,
3761
feature,
3862
type
@@ -41,15 +65,15 @@ JOIN (
4165
SELECT
4266
date,
4367
client,
68+
ranks.rank,
4469
COUNT(DISTINCT page) AS total_urls
45-
FROM ${ctx.ref('crawl', 'pages')}
46-
WHERE
47-
date = '${constants.currentMonth}' AND
48-
is_root_page = TRUE
49-
${constants.devRankFilter}
70+
FROM pages
71+
FULL OUTER JOIN ranks
72+
ON pages.rank <= ranks.rank
5073
GROUP BY
5174
date,
52-
client
75+
client,
76+
ranks.rank
5377
)
54-
USING (date, client)
78+
USING (date, client, rank)
5579
`)

0 commit comments

Comments
 (0)