1313ConnectionError = requests_html .requests .ConnectionError
1414
1515
16+ class UnauthenticatedError (ConnectionError ):
17+ """Raised by API functions if they detect the active session was refused."""
18+
19+
1620class Constants ():
1721 """URLs and HTML component names for data acquisition.
1822
@@ -24,6 +28,7 @@ class Constants():
2428 item_class = 'userVotesPage__result'
2529 rating_source = 'userVotes'
2630 rating_stype = 'application/json'
31+ no_access_class = 'noResultsPlaceholder'
2732 movie_count_span = 'blockHeader__titleInfoCount'
2833 series_count_span = 'blockHeader__titleInfoCount'
2934 game_count_span = 'blockHeader__titleInfoCount'
@@ -70,9 +75,17 @@ def login(username, password):
7075 return (True , session )
7176
7277 def enforceSession (fun ):
73- """Decorator to mark API functions that require a live session.
78+ """Decorator to mark API functions that require an authenticated session.
79+
80+ This safeguards the calls to ensure they do not fail due to a lack of
81+ authentication with Filmweb. To achieve this goal, two checks are made:
82+ * before calling the decorated function, a check whether a live HTMLSession
83+ exists is made; if not, a login is requested,
84+ * the call itself is guarded against UnauthenticatedError, also resulting
85+ in a request for login and re-calling of the function.
86+ Additionally, session cookies are watched for changes, in order to set the
87+ isDirty flag in case that happens.
7488
75- It will perform a session check before calling the actual function.
7689 Because it assumes that the first argument of the wrapped function is
7790 a bound FilmwebAPI instance ("self"), it shall only be used with FilmwebAPI
7891 methods.
@@ -82,21 +95,30 @@ def enforceSession(fun):
8295 https://stackoverflow.com/q/21382801/6919631
8396 https://stackoverflow.com/q/11058686/6919631
8497 The bottom line is that it should NEVER be called directly.
85-
86- Also checks if the session cookies were changed in the process of making
87- a request.
8898 """
8999 def wrapper (* args , ** kwargs ):
100+ # Extract the bound FilmwebAPI instance
90101 self = args [0 ]
91- if self .checkSession ():
92- old_cookies = set (self .session .cookies .values ())
93- result = fun (* args , ** kwargs )
94- new_cookies = set (self .session .cookies .values ())
95- if old_cookies != new_cookies :
96- self .isDirty = True
97- return result
98- else :
102+ # First check: for presence of a live session
103+ if not self .checkSession ():
99104 return None
105+ old_cookies = set (self .session .cookies .values ())
106+ # Second check: whether the call failed due to lack of authentication
107+ try :
108+ result = fun (* args , ** kwargs )
109+ except UnauthenticatedError :
110+ # Request login and call again
111+ print ('Session was stale! Requesting login...' )
112+ self .requestSession ()
113+ if not self .session :
114+ return None
115+ result = fun (* args , ** kwargs )
116+ # Session change detection
117+ new_cookies = set (self .session .cookies .values ())
118+ if old_cookies != new_cookies :
119+ self .isDirty = True
120+ # Finally the produced data is returned
121+ return result
100122 return wrapper
101123
102124 def __init__ (self , login_handler , username :str = '' ):
@@ -156,11 +178,7 @@ def __cacheParsingRules(self, itemtype:str):
156178 self .parsingRules [itemtype ] = pTree
157179
158180 def checkSession (self ):
159- """Check if there exists a live session and acquire a new one if not.
160- #TODO: now with improved session handling we need something smarter
161- (cause we'll nearly always have a session, except it might sometimes get stale
162- resulting in an acquisition failure)
163- """
181+ """Check if there exists a session instance and acquire a new one if not."""
164182 session_requested = False
165183 if not self .session :
166184 self .requestSession ()
@@ -254,7 +272,13 @@ def getItemsPage(self, itemtype:str, page:int=1):
254272
255273 @enforceSession
256274 def fetchPage (self , url ):
257- """Fetch the page and return its BeautifulSoup representation."""
275+ """Fetch the page and return its BeautifulSoup representation.
276+
277+ ConnectionError is raised in case of any failure to get HTML data or page
278+ status being not-ok after get.
279+ UnauthenticatedError is raised if the response contains a span indicating
280+ that the session used to obtain it is no longer valid.
281+ """
258282 try :
259283 page = self .session .get (url )
260284 except :
@@ -264,7 +288,13 @@ def fetchPage(self, url):
264288 print ("FETCH ERROR {}" .format (status ))
265289 raise ConnectionError
266290 else :
267- return BS (page .html .html , 'lxml' )
291+ bspage = BS (page .html .html , 'lxml' )
292+ # If a request required an active session but the one we had happened to be
293+ # stale, this magical span will be found in the page data:
294+ span = bspage .find ('span' , attrs = {'class' : self .constants .no_access_class })
295+ if span :
296+ raise UnauthenticatedError
297+ return bspage
268298
269299 def parsePage (self , page , itemtype :str ):
270300 """Parse items and ratings, returning constructed Item objects."""
0 commit comments