-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcrawl_rev.py
60 lines (51 loc) · 2.13 KB
/
crawl_rev.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import requests
from bs4 import BeautifulSoup
import time
import datetime
import xmltodict
import json
def get_nexttrain(station_ID):
headers={'Content-Type': 'text/xml','SOAPAction':'http://tempuri.org/GetNextTrain2'}
data="""<?xml version="1.0" encoding="utf-8"?>
<soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<soap:Body>
<GetNextTrain2 xmlns="http://tempuri.org/">
<stnid>{}</stnid>
</GetNextTrain2>
</soap:Body>
</soap:Envelope>""".format(station_ID)
res=requests.post("http://ws.metro.taipei/trtcappweb/Traintime.asmx",headers=headers,data=data)
return BeautifulSoup(res.text,'xml')
def car_stat(trip_no):
headers={'Content-Type': 'text/xml','SOAPAction':'http://tempuri.org/GetCartDetailbyTripID'}
data="""<?xml version="1.0" encoding="utf-8"?>
<soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
<soap:Body>
<GetCartDetailbyTripID xmlns="http://tempuri.org/">
<strCW>{}</strCW>
</GetCartDetailbyTripID>
</soap:Body>
</soap:Envelope>""".format(trip_no)
res=requests.post("http://ws.metro.taipei/trtcappweb/CartWeight.asmx",headers=headers,data=data)
return BeautifulSoup(res.text,'xml')
out={}
trip_no=""
while 1:
print(get_nexttrain("097").find_all("Detail")[1])
if get_nexttrain("097").find_all("Detail")[1]['tripno']!="":
trip_no=get_nexttrain("097").find_all("Detail")[1]['tripno']
break
time.sleep(1)
print()
print(trip_no)
station=""
while 1:
res=xmltodict.parse(str(car_stat(trip_no).find("Detail")))["Detail"]
if station!=res["@StationID"]:
station=res["@StationID"]
out[res["@StationID"]]=res
print(station)
f=open(str(trip_no),"w+")
f.write(json.dumps(out))
f.close()
time.sleep(1)