-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtechstack.py
39 lines (32 loc) · 1.36 KB
/
techstack.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from Wappalyzer import Wappalyzer, WebPage
from requests.exceptions import ConnectionError,ReadTimeout,InvalidURL
from pandas import DataFrame,read_csv
from multiprocessing import Pool
from os.path import join
import warnings
warnings.filterwarnings('ignore')
def techstack(filepath):
urls=readstack(join(filepath,'probing.csv'))
with Pool(4) as p:
data=p.map(gettechstackdata,urls)
writetechstack(data,join(filepath,'techstack.csv'))
print("\nThe data has been saved to techstack.csv")
def writetechstack(data,outfile):
DataFrame(data).to_csv(outfile,index=False)
def readstack(inpfile):
return read_csv(inpfile).query("status=='Live'")["urls"].tolist()
def gettechstackdata(url):
try:
simplifieddata={"url":url,"versions":[]}
webpage = WebPage.new_from_url(f"http://{url}",timeout=2.50)
wappalyzer = Wappalyzer.latest()
data = wappalyzer.analyze_with_versions_and_categories(webpage)
for x in data:
if data[x]["versions"]:
simplifieddata["versions"].append(x+" "+data[x]["versions"][0])
else:
simplifieddata["versions"].append(x)
simplifieddata["versions"]=",".join(simplifieddata["versions"])
return simplifieddata
except (ConnectionError,ReadTimeout,InvalidURL) as ce:
return