-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrack_web_update.py
36 lines (30 loc) · 1.1 KB
/
track_web_update.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# Python code to track a particular change in website
import requests
from bs4 import BeautifulSoup
# website_link = "http://www.ietdavv.edu.in/index.php/notices"
website_link = "http://www.ietdavv.edu.in/index.php"
print("connecting .... ")
res = requests.get(website_link)
print("connected.")
print(res.status_code)
print("Writing to file ...")
file = open("web.html", "w")
print(res.headers)
print("Performing Analysis")
soup = BeautifulSoup(res.text, 'html.parser')
# notice_section_start_string = '<div itemprop="articleBody">'
# notice_section_end_string = '</div>'
# start_index = text.find(notice_section_start_string)
# end_index = (text[start_index:]).find(notice_section_end_string)
# end_index = start_index + end_index
# print(start_index, end_index)
# parser.feed(text[start_index:end_index])
file.write(soup.prettify())
file.close()
print("Wrote to file.")
# notice_section = soup.find("div", itemprop="articleBody")
# for l in notice_section.find_all("a"):
# print(l.string)
latest_section = soup.find("div", "mod_placehere_leading")
for notice in latest_section.find_all("a"):
print(notice.string)