-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathmain.py
146 lines (126 loc) · 4.23 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#!/usr/bin/python
# coding=utf-8
import requests
import time
import json
import ast
import os
import utils
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from lxml import etree
PAGE_URL = 'http://map.amap.com/subway/index.html?&1100'
DATA_URL = 'http://map.amap.com/service/subway?srhdata='
HEADER = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36"}
def fetchAllCity(url, header):
r = requests.get(url, header)
html = r.content
element = etree.HTML(html)
options = element.xpath("//a[contains(@class, 'city')]")
cities = []
for option in options:
city = {
'id': option.get('id'),
'name': option.get('cityname'),
'text': option.text
}
cities.append(city)
return cities
def parseAllCityData(cities):
# 启动一个chrome浏览器
browser = webdriver.Chrome()
browser.get(PAGE_URL)
data = [];
for city in cities:
data.append(parseCityData(city, browser))
return data
def saveData(citiesData):
path = './data/'
if not os.path.exists(path):
os.mkdir(path)
for cityData in citiesData:
f = open(path + cityData['name'] + '.json', 'w')
# dict to json; json to str
f.write(str(json.dumps(cityData)))
def parseCityData(city, browser):
apiData = parseCityDataFromApi(city)
domData = parseCityDataFromDom(city, browser)
return formatCityData(apiData, domData)
def parseCityDataFromApi(city):
url = DATA_URL + "{}_drw_{}.json".format(city['id'], city['name'])
print(url)
r = requests.get(url)
# 字符串转json(ast.literal_eval())
return eval(r.text.encode('utf-8'))
def parseCityDataFromDom(city, browser):
id = city['id']
# hidden 元素需要手动hover后才能执行,下面那种方式完全自动化了,因此采用下面的做法
# element = WebDriverWait(browser, 20).until(EC.visibility_of_element_located((By.ID, id)))
# element.click()
menu = browser.find_element_by_css_selector(".more-city")
el = browser.find_element_by_id(id)
# hover 隐藏菜单然后点击
webdriver.common.action_chains.ActionChains(browser).move_to_element(menu).click(el).perform()
# 等ajax请求加载完
time.sleep(2)
element = etree.HTML(browser.page_source)
# lines = element.xpath("//g[@id='g-line']/path")
# stations = element.xpath("//g[@id='g-station']/circle")
stationNames = element.xpath("//g[@id='g-station-name']/g/text")
result = {
'st': {}
}
for station in stationNames:
sid = station.get('id').split('-')[1]
# 站点名称坐标
result['st'][sid] = {}
result['st'][sid]['lp'] = {
'x': int(station.get('x')),
'y': int(station.get('y'))
}
return result
def formatCityData(apiData, domData):
# string to json
data = {
'name': apiData['s'],
'l': [],
}
lines = apiData['l']
# list get index and value
for lidx, line in enumerate(lines):
labelp = []
p = []
for val in line['lp']:
labelp.append(utils.getP(val))
for val in line['c']:
p.append(utils.getP(val))
l = {
'name': line['ln'],
'p': p,
'labelp': labelp,
'st': [],
'la': line['la']
}
for sidx, stop in enumerate(line['st']):
# 开通的站
if line['st'][sidx]['su'] == '1':
labelp = domData['st'][stop['si']]['lp'];
st = {
'name': stop['n'],
'p': utils.getP(stop['p']),
'labelp': labelp,
'strans': bool(int(stop['t'])),
'ldir': utils.getLdir(stop['lg']),
}
l['st'].append(st)
data["l"].append(l)
return data
def main():
cities = fetchAllCity(PAGE_URL, HEADER)
print(cities)
citiesData = parseAllCityData(cities)
saveData(citiesData)
if __name__ == '__main__':
main()