-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmaster.py
41 lines (35 loc) · 1.38 KB
/
master.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import scraper, queue, threading, time
class Master():
def __init__(self):
self.checked = set()
self.not_seen = queue.Queue()
self.scrap_res_queue = queue.Queue()
self.scrapers = []
self.SCRAPER_LIMIT = 5
self.free_scrapers = queue.Queue()
self.queueLock = threading.Lock()
def update_schedule(self):
if self.scrap_res_queue.empty():
return
self.queueLock.acquire()
data = self.scrap_res_queue._get()
self.queueLock.release()
#print ('<Master>: dostalem : ', data)
for x in data:
if not self.checked.__contains__(x):
self.not_seen.put(x)
self.checked.add(x)
def crawl(self,seed):
print ('Wywolano z seed =',seed)
self.checked.add(seed)
self.not_seen._put(seed)
while True:
#print(threading.active_count(),"<<<<< ilosc watkow")
if threading.active_count() < self.SCRAPER_LIMIT + 1 and (not self.not_seen.empty()): #this thread count too
self.queueLock.acquire()
new_scraper = scraper.Scraper(self.not_seen._get(),self.scrap_res_queue,threading.active_count())
self.queueLock.release()
new_scraper.start()
self.scrapers.append(new_scraper)
time.sleep(1)
self.update_schedule()