Skip to content

Commit 497dadf

Browse files
committed
API cleanup: objectified Constants, docstrings
Instead of a somewhat clumsy class-in-a-class, have Constants as a wholly independent class with a user-specific instance constructed by the API object. This simplifies calls to getUser<>Page.
1 parent e1decca commit 497dadf

File tree

3 files changed

+130
-105
lines changed

3 files changed

+130
-105
lines changed

filmatyk/database.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,10 @@
33
from math import ceil
44

55
import containers
6+
from filmweb import ConnectionError, FilmwebAPI
67

78
class Database(object):
8-
def __init__(self, itemtype:str, api:object, callback):
9+
def __init__(self, itemtype:str, api:FilmwebAPI, callback:callable):
910
self.itemtype = itemtype
1011
self.callback = callback
1112
self.items = []
@@ -27,7 +28,7 @@ def __iter__(self):
2728

2829
# Serialization-deserialization
2930
@staticmethod
30-
def restoreFromString(itemtype:str, string:str, api:object, callback):
31+
def restoreFromString(itemtype:str, string:str, api:FilmwebAPI, callback:callable):
3132
newDatabase = Database(itemtype, api, callback)
3233
if not string:
3334
# simply return a raw, empty DB
@@ -47,7 +48,7 @@ def softUpdate(self):
4748
try:
4849
# in case there are network problems
4950
first_request = self.api.getNumOf(self.itemtype)
50-
except self.api.ConnectionError:
51+
except ConnectionError:
5152
self.callback(-1, abort=True) #hide the progress bar
5253
return None
5354
if first_request is None:

filmatyk/filmweb.py

Lines changed: 122 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -7,60 +7,77 @@
77

88
import containers
99

10-
class FilmwebAPI(object):
11-
class Constants(object):
12-
login_path = 'https://ssl.filmweb.pl/j_login'
13-
base_path = 'https://www.filmweb.pl'
14-
auth_error = 'błędny e-mail lub hasło' #TODO: be a bit more intelligent here
15-
main_class = 'userVotesPage__results'
16-
item_class = 'userVotesPage__result'
17-
rating_source = 'userVotes'
18-
rating_stype = 'application/json'
19-
m_count_span = 'blockHeader__titleInfoCount'
20-
s_count_span = 'blockHeader__titleInfoCount'
21-
g_count_span = 'blockHeader__titleInfoCount'
22-
@classmethod
23-
def getUserPage(self, username):
24-
return self.base_path + '/user/' + username
25-
@classmethod
26-
def getUserMoviePage(self, username, page=1):
27-
userpage = self.getUserPage(username)
28-
return userpage + '/films?page={}'.format(page)
29-
@classmethod
30-
def getUserSeriesPage(self, username, page=1):
31-
userpage = self.getUserPage(username)
32-
return userpage + '/serials?page={}'.format(page)
33-
@classmethod
34-
def getUserGamePage(self, username, page=1):
35-
userpage = self.getUserPage(username)
36-
return userpage + '/games?page={}'.format(page)
37-
38-
ConnectionError = requests_html.requests.ConnectionError
3910

11+
ConnectionError = requests_html.requests.ConnectionError
12+
13+
class Constants():
14+
"""URLs and HTML component names for data acquisition.
15+
16+
Create an instance for a given user to generate their specific URLs.
17+
"""
18+
login_path = 'https://ssl.filmweb.pl/j_login'
19+
base_path = 'https://www.filmweb.pl'
20+
main_class = 'userVotesPage__results'
21+
item_class = 'userVotesPage__result'
22+
rating_source = 'userVotes'
23+
rating_stype = 'application/json'
24+
movie_count_span = 'blockHeader__titleInfoCount'
25+
series_count_span = 'blockHeader__titleInfoCount'
26+
game_count_span = 'blockHeader__titleInfoCount'
27+
28+
def __init__(self, username):
29+
self.username = username
30+
self.userpage = self.getUserPage()
31+
32+
def getUserPage(self):
33+
return self.base_path + '/user/' + self.username
34+
35+
def getUserMoviePage(self, page=1):
36+
return self.userpage + '/films?page={}'.format(page)
37+
38+
def getUserSeriesPage(self, page=1):
39+
return self.userpage + '/serials?page={}'.format(page)
40+
41+
def getUserGamePage(self, page=1):
42+
return self.userpage + '/games?page={}'.format(page)
43+
44+
45+
class FilmwebAPI():
46+
"""HTML-based API for acquiring data from Filmweb."""
4047
@staticmethod
4148
def login(username, password):
49+
"""Attempt to acquire an authenticated user session."""
4250
session = requests_html.HTMLSession()
51+
auth_package = {
52+
'j_username': username,
53+
'j_password': password,
54+
}
55+
# Catch connection errors
4356
try:
44-
log = session.post(
45-
FilmwebAPI.Constants.login_path,
46-
data={'j_username': username, 'j_password': password}
47-
)
48-
except FilmwebAPI.ConnectionError:
49-
return (False, True) # login error, connection error
50-
if FilmwebAPI.Constants.auth_error in log.text:
57+
log = session.post(Constants.login_path, data=auth_package)
58+
except ConnectionError:
59+
return (False, True)
60+
# Catch authentication errors
61+
if len(session.cookies) == 0:
5162
print('BŁĄD LOGOWANIA')
5263
return (False, False)
5364
else:
5465
return (True, session)
5566

56-
#@staticmethod -- but not actually a static method, see:
57-
# https://stackoverflow.com/q/21382801/6919631
58-
# https://stackoverflow.com/q/11058686/6919631
59-
#in short: THIS METHOD SHOULD NEVER BE CALLED DIRECTLY
6067
def enforceSession(fun):
61-
#this decorator makes the function being called cause the caller
62-
#(assumed to be the first object in the argument list) to call the
63-
#session check method (which in turn can request a new session)
68+
"""Decorator to mark API functions that require a live session.
69+
70+
It will perform a session check before calling the actual function.
71+
Because it assumes that the first argument of the wrapped function is
72+
a bound FilmwebAPI instance ("self"), it shall only be used with FilmwebAPI
73+
methods.
74+
Because it is meant to be used as a class-level function decorator, it has
75+
no real "self" argument. It is effectively something like a static method.
76+
See the following links for more info:
77+
https://stackoverflow.com/q/21382801/6919631
78+
https://stackoverflow.com/q/11058686/6919631
79+
The bottom line is that it should NEVER be called directly.
80+
"""
6481
def wrapper(*args, **kwargs):
6582
self = args[0]
6683
if self.checkSession():
@@ -69,43 +86,48 @@ def wrapper(*args, **kwargs):
6986
return None
7087
return wrapper
7188

72-
def __init__(self, callback, username:str=''):
89+
def __init__(self, login_handler, username:str=''):
7390
self.username = username
74-
self.requestLogin = callback
91+
self.constants = Constants(username)
92+
self.login_handler = login_handler
7593
self.session = None
7694
self.parsingRules = {}
77-
self.__cacheAllParsingRules()
95+
for container in containers.classByString.keys():
96+
self.__cacheParsingRules(container)
7897
# bind specific methods and constants to their item types
7998
self.urlGenerationMethods = {
80-
'Movie': self.Constants.getUserMoviePage,
81-
'Series': self.Constants.getUserSeriesPage,
82-
'Game': self.Constants.getUserGamePage
99+
'Movie': self.constants.getUserMoviePage,
100+
'Series': self.constants.getUserSeriesPage,
101+
'Game': self.constants.getUserGamePage
83102
}
84103
self.countSpanClasses = {
85-
'Movie': self.Constants.m_count_span,
86-
'Series': self.Constants.s_count_span,
87-
'Game': self.Constants.g_count_span
104+
'Movie': self.constants.movie_count_span,
105+
'Series': self.constants.series_count_span,
106+
'Game': self.constants.game_count_span
88107
}
89108

90-
def __cacheAllParsingRules(self):
91-
for container in containers.classByString.keys():
92-
self.__cacheParsingRules(container)
93-
94109
def __cacheParsingRules(self, itemtype:str):
95-
#get all the blueprints of a given class
110+
"""Converts parsing rules for a given type into a neater representation.
111+
112+
The rules for each Blueprint are expressed in a human-readable and human-
113+
writable form that makes them easy to modify if need be, but not very
114+
convenient for the parser. This method groups rules in a parser-friendly
115+
representation that makes its job easier.
116+
"""
117+
# Get all the blueprints of a given class
96118
rawRules = {}
97119
for key, val in containers.classByString[itemtype].blueprints.items():
98120
rawRules[key] = val.getParsing()
99-
#convert them to a parsing tree
121+
# Convert them to a parsing tree
100122
pTree = {}
101123
classes = set(rule['tag'] for rule in rawRules.values() if rule is not None)
102124
for c in classes:
103125
pTree[c] = {}
104126
for key, rule in rawRules.items():
105-
#ignore any non-parsable fields
127+
# Ignore any non-parsable fields
106128
if rule is None:
107129
continue
108-
#process only the rules of class c
130+
# Process only the rules of class c
109131
if rule['tag'] != c:
110132
continue
111133
pClass = rule['class']
@@ -116,44 +138,49 @@ def __cacheParsingRules(self, itemtype:str):
116138
'attr': rule['attr'] if 'attr' in rule.keys() else None,
117139
'type': rule['type'] if 'type' in rule.keys() else None
118140
}
119-
#store the result
141+
# Bind the result to a type name
120142
self.parsingRules[itemtype] = pTree
121143

122144
def checkSession(self):
123-
#check if there is a session and acquire one if not
145+
"""Check if there exists a live session and acquire a new one if not.
146+
#TODO: now with improved session handling we need something smarter
147+
(cause we'll nearly always have a session, except it might sometimes get stale
148+
resulting in an acquisition failure)
149+
"""
124150
if not self.session:
125151
self.requestSession()
126-
#check again - in case the user cancelled a login
152+
# Check again - in case the user cancelled a login
127153
if not self.session:
128154
return False
129-
#at this point everything is definitely safe
155+
# At this point everything is definitely safe
130156
return True
131157

132158
def requestSession(self):
133-
#call the GUI callback for login handling
134-
#it will halt execution until the user logs in or cancels
135-
session, username = self.requestLogin(self.username)
136-
#if the session wasn't acquired, don't care about username
137-
#but for good session, check if it agrees with the existing one (or set it)
159+
"""Call the GUI to handle a login and bind a session object to self."""
160+
# This pauses execution until the user logs in or cancels
161+
session, username = self.login_handler(self.username)
138162
if session:
163+
# Set the username in case it's a first run (it will be empty)
139164
if not self.username:
140165
self.username = username
141166
else:
167+
# If it's not the first log in, make sure the user has logged as the
168+
# same user. If the GUI is to be trusted, it shouldn't be possible, but
169+
# we can still check in case of an accident during external usage.
170+
# Returned value isn't important. *NOT* setting self.session is.
142171
if username != self.username:
143-
#this should never happen, if GUI is to be trusted
144-
#returning is not as important as *not* setting self.session
145172
return None
146173
self.session = session
147174

148175
@enforceSession
149176
def getNumOf(self, itemtype:str):
150-
#return a tuple: (number of rated movies, number of movies per page)
151-
try:
152-
getURL = self.urlGenerationMethods[itemtype]
153-
spanClass = self.countSpanClasses[itemtype]
154-
except KeyError:
155-
return 0, 0 # should never happen though
156-
url = getURL(self.username)
177+
"""Return the number of items of a given type that the user has rated.
178+
179+
Returns a tuple: (number of rated items, number of items per page).
180+
"""
181+
getURL = self.urlGenerationMethods[itemtype]
182+
spanClass = self.countSpanClasses[itemtype]
183+
url = getURL()
157184
page = self.fetchPage(url)
158185
# TODO: in principle, this page could be cached for some small time
159186
#the number of user's movies is inside a span of a specific class
@@ -169,21 +196,21 @@ def getNumOf(self, itemtype:str):
169196
for div in page.body.find_all('div'):
170197
if not div.has_attr('data-id') or not div.has_attr('class'):
171198
continue
172-
if not self.Constants.item_class in div.attrs['class']:
199+
if not self.constants.item_class in div.attrs['class']:
173200
continue
174201
per_page += 1
175202
return items, per_page
176203

177204
@enforceSession
178205
def getItemsPage(self, itemtype:str, page:int=1):
179-
# Get the URL of a page containing items of requested type, fetch and parse
180-
# it and return data (as list of objects). Instead of several ifs, retrieve
181-
# the proper methods from a dict prepared during init.
182-
try:
183-
getURL = self.urlGenerationMethods[itemtype]
184-
except KeyError:
185-
return [] # should never happen though
186-
url = getURL(self.username, page)
206+
"""Acquire items of a given type from a given page number.
207+
208+
The user's ratings are displayed on pages. This fetches a page by number,
209+
parses it and returns a list of Item-based objects. URL is delivered by a
210+
cached dict, binding URL-generating functions to respective item types.
211+
"""
212+
getURL = self.urlGenerationMethods[itemtype]
213+
url = getURL(page)
187214
page = self.fetchPage(url)
188215
data = self.parsePage(page, itemtype)
189216
return data
@@ -216,11 +243,11 @@ def parsePage(self, page, itemtype:str):
216243

217244
def extractDataSource(self, page):
218245
"""Extract the div that holds all the data."""
219-
return page.find('div', attrs={'class': self.Constants.main_class})
246+
return page.find('div', attrs={'class': self.constants.main_class})
220247

221248
def extractItems(self, div):
222249
"""From the main div, extract all divs holding item details."""
223-
sub_divs = div.find_all('div', attrs={'class': self.Constants.item_class})
250+
sub_divs = div.find_all('div', attrs={'class': self.constants.item_class})
224251
sub_divs = [div for div in sub_divs if div.has_attr('data-id')]
225252
return sub_divs
226253

@@ -229,8 +256,8 @@ def extractRatings(self, div):
229256
230257
They're held in a specific span as <script> contents.
231258
"""
232-
span = div.find('span', attrs={'data-source': self.Constants.rating_source})
233-
scripts = span.find_all('script', attrs={'type': self.Constants.rating_stype})
259+
span = div.find('span', attrs={'data-source': self.constants.rating_source})
260+
scripts = span.find_all('script', attrs={'type': self.constants.rating_stype})
234261
ratings = [script.getText() for script in scripts]
235262
return ratings
236263

@@ -278,19 +305,19 @@ def parseRating(self, text):
278305
Item's addRating method.
279306
"""
280307
origDict = json.loads(text)
281-
#ensure all date keys are present
308+
# Ensure all date keys are present
282309
try:
283310
date_ = origDict['d']
284311
except KeyError:
285-
# once I've seen a bugged span in which this key was not present
312+
# I've seen a bugged span once, which didn't have this key
286313
date_ = {'y':2000, 'm':1, 'd':1}
287314
if 'm' not in date_.keys():
288315
date_['m'] = 1
289316
if 'd' not in date_.keys():
290317
date_['d'] = 1
291-
# unescape HTML-coded characters from the comment
318+
# Unescape HTML-coded characters from the comment
292319
comment = html.unescape(origDict['c'] if 'c' in origDict.keys() else '')
293-
# translate that dict to more readable standard
320+
# Translate that dict to a more readable standard
294321
iid = origDict['eId']
295322
isFaved = origDict['f'] if 'f' in origDict.keys() else 0
296323
ratingDict = {

0 commit comments

Comments
 (0)