Skip to content

Commit 88387c6

Browse files
Tech Report: Technologies total origins from crawl (#47)
* full adoption * sql fix * Update definitions/output/reports/cwv_tech_technologies.js Co-authored-by: Barry Pollard <[email protected]> * Update definitions/output/reports/cwv_tech_technologies.js Co-authored-by: Barry Pollard <[email protected]> * Update definitions/output/reports/cwv_tech_technologies.js Co-authored-by: Barry Pollard <[email protected]> * review * cleanup * sorted arrays --------- Co-authored-by: Barry Pollard <[email protected]>
1 parent de144b0 commit 88387c6

File tree

1 file changed

+63
-9
lines changed

1 file changed

+63
-9
lines changed

definitions/output/reports/cwv_tech_technologies.js

Lines changed: 63 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,19 +6,73 @@ publish('cwv_tech_technologies', {
66
tags: ['crux_ready']
77
}).query(ctx => `
88
/* {"dataform_trigger": "report_cwv_tech_complete", "name": "technologies", "type": "dict"} */
9+
WITH pages AS (
10+
SELECT DISTINCT
11+
client,
12+
root_page,
13+
tech.technology
14+
FROM ${ctx.ref('crawl', 'pages')},
15+
UNNEST(technologies) AS tech
16+
WHERE
17+
date = '${pastMonth}'
18+
${constants.devRankFilter}
19+
),
20+
21+
tech_origins AS (
22+
SELECT
23+
client,
24+
technology,
25+
COUNT(DISTINCT root_page) AS origins
26+
FROM pages
27+
GROUP BY
28+
client,
29+
technology
30+
),
31+
32+
technologies AS (
33+
SELECT
34+
name AS technology,
35+
description,
36+
STRING_AGG(DISTINCT category, ', ' ORDER BY category ASC) AS category,
37+
categories AS category_obj,
38+
NULL AS similar_technologies
39+
FROM ${ctx.ref('wappalyzer', 'technologies')},
40+
UNNEST(categories) AS category
41+
GROUP BY
42+
technology,
43+
description,
44+
categories
45+
),
46+
47+
total_pages AS (
48+
SELECT
49+
client,
50+
COUNT(DISTINCT root_page) AS origins
51+
FROM pages
52+
GROUP BY client
53+
)
54+
955
SELECT
1056
client,
11-
app AS technology,
57+
technology,
1258
description,
1359
category,
14-
SPLIT(category, ",") AS category_obj,
60+
category_obj,
61+
similar_technologies,
62+
origins
63+
FROM tech_origins
64+
INNER JOIN technologies
65+
USING(technology)
66+
67+
UNION ALL
68+
69+
SELECT
70+
client,
71+
'ALL' AS technology,
72+
NULL AS description,
73+
NULL AS category,
74+
NULL AS category_obj,
1575
NULL AS similar_technologies,
1676
origins
17-
FROM ${ctx.ref('core_web_vitals', 'technologies')}
18-
LEFT JOIN ${ctx.ref('wappalyzer', 'technologies')}
19-
ON app = name
20-
WHERE date = '${pastMonth}' AND
21-
geo = 'ALL' AND
22-
rank = 'ALL'
23-
ORDER BY origins DESC
77+
FROM total_pages
2478
`)

0 commit comments

Comments
 (0)