-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhive_transform.hql
50 lines (46 loc) · 1.61 KB
/
hive_transform.hql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
DROP TABLE collisions_ext;
DROP TABLE zipcode_ext;
DROP TABLE manhattan_street;
CREATE EXTERNAL TABLE IF NOT EXISTS collisions_ext(
street STRING,
zipcode STRING,
person_type STRING,
damage_type STRING,
count INT)
COMMENT 'collisions'
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
STORED AS SEQUENCEFILE
LOCATION '/user/motor_vehicle_collisions/output_mr';
CREATE EXTERNAL TABLE IF NOT EXISTS zipcode_ext(
zipcode STRING,
borough STRING)
COMMENT 'zipcode'
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
STORED AS TEXTFILE
LOCATION '/user/motor_vehicle_collisions/input/datasource_zip_codes'
TBLPROPERTIES("skip.header.line.count"="1");
CREATE EXTERNAL TABLE IF NOT EXISTS manhattan_street_ext(
street STRING,
person_type STRING,
killed INT,
injured INT)
COMMENT 'manhattan_street'
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.JsonSerDe'
STORED AS TEXTFILE
LOCATION '/user/motor_vehicle_collisions/output';
INSERT OVERWRITE TABLE manhattan_street_ext
SELECT street, person_type, killed, injured
FROM (
SELECT street, person_type, SUM(killed) AS killed, SUM(injured) AS injured,
RANK () OVER (PARTITION BY person_type ORDER BY SUM(killed) + SUM(injured) DESC) AS rank_in_person_type
FROM (SELECT zipcode, street, person_type,
CASE WHEN damage_type = 'INJURED' THEN count ELSE 0 END injured,
CASE WHEN damage_type = 'KILLED' THEN count ELSE 0 END killed
FROM collisions_ext) c
JOIN zipcode_ext z ON (z.zipcode = c.zipcode)
WHERE z.borough = 'MANHATTAN'
GROUP BY c.street, c.person_type
) r
WHERE r.rank_in_person_type <= 3;