55import containers
66from filmweb import ConnectionError , FilmwebAPI
77
8+
89class Database (object ):
910 def __init__ (self , itemtype :str , api :FilmwebAPI , callback :callable ):
1011 self .itemtype = itemtype
@@ -22,6 +23,7 @@ def getItemByID(self, id:int):
2223 for item in self .items :
2324 if item .getRawProperty ('id' ) == id :
2425 return item
26+ return None
2527
2628 def __iter__ (self ):
2729 return self .items .__iter__ ()
@@ -43,53 +45,194 @@ def storeToString(self):
4345
4446 # Data acquisition
4547 def softUpdate (self ):
46- self .callback (0 ) #display the progress bar
47- # ask the API how many items should there be and how many are there per page
48+ """Quickly pull the most recent changes from Filmweb.
49+
50+ The algorithm allows detecting additions and removals of items by comparing
51+ the total item count in the local and remote databases and keeping track of
52+ the items that differ between the two.
53+ There are two fundamental problems that it solves are related to the fact
54+ that getting to know the full state of the remote database is a very time-
55+ consuming operation (it can only be fetched in chunks of n items, usually
56+ n=25). Therefore the first problem is to determine how many pages to read
57+ from the remote database. The second problem is related to detecting when
58+ an item has been deleted remotely.
59+ The solution can be described in the following * steps:
60+ * compare the item counts between the databases - an update is to be made
61+ only if there is a difference,
62+ * fetch a new chunk of the remote database (a page),
63+ * detect which items have been added or changed with respect to the local
64+ (this makes use of a special HashedItem class, see its docs for details),
65+ * identify the last non-changed remote item and find its local counterpart,
66+ * split the local database into two parts:
67+ * a "changed" part comprises all items up to and including this last non-
68+ changed item - all these items are a potentially obsolete state of the
69+ database, and they could be simply replaced with the currently held
70+ remote items,
71+ * an "unchanged" part comprises all other items - nothing about their
72+ remote counterparts is known at this time.
73+ * check whether merging the currently held remote items with the possibly
74+ up-to-date unchanged part of the local database can satisfy the general
75+ condition (that the local and remote databases should count the same).
76+ At some point either the counts will even out, or all of the remote items
77+ will be loaded. In either case the update completes.
78+
79+ The problem this algorithm solves has one special form that is impossible
80+ to overcome: when a symmetric change has occurred past a certain page. In
81+ this case, any count-based algorithm will stop at the first chance it can
82+ get (when it notices a count balance), ignoring any additions and removals
83+ that may happen on further pages, if they balance out.
84+ Example:
85+ total change: length += 3
86+ page 1: 4 additions, 1 removal
87+ page 2: 1 addition, 1 removal
88+ The algorithm will reach balance after page 1 and not move on to page 2.
89+
90+ Returns True in case of success, False if it aborted before completion.
91+ """
92+ # Display the progress bar
93+ self .callback (0 )
94+ # Ask the API how many items should there be (abort on network problems)
4895 try :
49- # in case there are network problems
50- first_request = self .api .getNumOf (self .itemtype )
96+ num_request = self .api .getNumOf (self .itemtype )
5197 except ConnectionError :
52- self .callback (- 1 , abort = True ) #hide the progress bar
53- return None
54- if first_request is None :
55- #this will happen if the user fails to log in
5698 self .callback (- 1 , abort = True )
57- return None
58- rated , per_page = first_request
59- # compute how many pages should be requested
60- if not rated or not per_page :
61- # will happen if the user does not have any items in the list
62- self .callback (- 1 )
63- return None
64- pages = ceil ((rated - len (self .items ))/ per_page )
65- # request these pages from the API
66- itemPages = []
67- for page in range (1 , pages + 1 ):
68- itemPages .append (self .api .getItemsPage (itemtype = self .itemtype , page = page ))
69- perc_done = int (100 * page / pages )
70- self .callback (perc_done ) #increment the progress bar
71- self .callback (100 ) #correct the rounding error - set the bar to full
72- new_items = [item for page in itemPages for item in page ]
73- # no need to do anything if no new items were acquired
74- if len (new_items ) == 0 :
99+ return False
100+ # Exit if the user failed to log in
101+ if num_request is None :
102+ self .callback (- 1 , abort = True )
103+ return False
104+ # Workload estimation
105+ local_count = len (self .items )
106+ remote_count , items_per_page = num_request
107+ still_need = remote_count - local_count
108+ # Exit if nothing to download
109+ if not remote_count or not items_per_page or not still_need :
75110 self .callback (- 1 )
76- return False # just in case this was an error during a hardUpdate
77- # add items to the database, replacing duplicates by new ones
78- old_items = self .items
111+ return False
112+ # Convert the existing database to a hashed format
113+ local_hashed = list (HashedItem (item ) for item in self .items )
114+ local_hashed_dict = {item .id : item for item in local_hashed }
115+ # Prepare to and run the main loop
116+ remote_page_no = 0
117+ remote_items = []
118+ local_changed = []
119+ local_unchanged = []
120+ while still_need :
121+ # Fetch a page and represent it in the hashed form
122+ remote_page_no += 1
123+ fetched_items = list (
124+ HashedItem (item ) for item in
125+ self .api .getItemsPage (self .itemtype , page = remote_page_no )
126+ )
127+ # Detect additions and changes among the new items
128+ for item in fetched_items :
129+ local_item = local_hashed_dict .get (item .id , None )
130+ # If this ID was not among known items - it's a simple addition
131+ if not local_item :
132+ item .added = True
133+ item .changed = True
134+ else :
135+ # If it was, check if the data differs to detect a change
136+ item .added = False
137+ item .changed = item .hash != local_item .hash
138+ # Store its local counterpart for a safe update
139+ item .local_item = local_item
140+ # Join the new items with the previously acquired but unprocessed ones
141+ remote_items .extend (fetched_items )
142+ # One edge case is that all of the remote items have been just acquired.
143+ # This would happen when updating the Database for the first time.
144+ if len (remote_items ) == remote_count :
145+ local_changed = local_hashed
146+ local_unchanged = []
147+ break
148+ # If the last remote item has been changed, it is difficult to figure out
149+ # how do the currently known remote items relate to the local database.
150+ # In such a case, another page is fetched, allowing a better view.
151+ if remote_items [- 1 ].changed :
152+ continue
153+ # Otherwise, locate the item in the local Database and split it.
154+ last_unchanged_pos = local_hashed .index (remote_items [- 1 ].id ) + 1
155+ local_changed = local_hashed [:last_unchanged_pos ]
156+ local_unchanged = local_hashed [last_unchanged_pos :]
157+ # Check if the databases would balance out if they were merged right now.
158+ still_need = remote_count - (len (remote_items ) + len (local_unchanged ))
159+ # At this point the database can be reconstructed from the two components.
160+ new_items = []
161+ # First, incorporate the changes from the remotely acquired items
162+ for item in remote_items :
163+ # If the item had a local counterpart, do not throw it away but instead
164+ # update it with the remotely acquired data (allows preserving any local
165+ # data that might not originate at the remote database).
166+ if item .local_item :
167+ local_item = item .local_item .parent
168+ local_item .update (item .parent )
169+ new_items .append (local_item )
170+ else :
171+ new_items .append (item .parent )
172+ # Then add the rest of unchanged items.
173+ new_items .extend (item .parent for item in local_unchanged )
79174 self .items = new_items
80- new_ids = [item ['id' ] for item in new_items ]
81- for item in old_items :
82- if item ['id' ] not in new_ids :
83- self .items .append (item )
175+ # Finalize - notify the GUI and potential caller.
84176 self .callback (- 1 )
85177 self .isDirty = True
86178 return True
87179
88180 def hardUpdate (self ):
89- # in theory, this removes all existing items and recollects the whole data
90- # but in practice this reacquisition may fail - in which case we shouldn't
91- # just lose the existing database and shrug, so this backs it up first
181+ """Drop all the Items and reload all the data.
182+
183+ This uses softUpdate under the hood. In case of its failure, no data is
184+ lost as everything is backed up first.
185+ """
92186 old_items = self .items
93187 self .items = []
94188 if not self .softUpdate ():
95189 self .items = old_items
190+
191+
192+ class HashedItem ():
193+ """A hashed representation of an Item that allows detecting changes.
194+
195+ Computing a standard hash of the Item's UserData makes it possible to detect
196+ when an Item was not just added or removed but also whether it has changed
197+ with respect to the locally stored version of that Item.
198+ Flags indicating whether an item was added or changed are helpful in the
199+ process of performing an update.
200+
201+ In theory, the Item class itself could implement the hashing functionality,
202+ but doing this in a separate technical class also allows storing the flags,
203+ which would only clutter the base class.
204+
205+ Some caveats:
206+ * HashedItem also maintains a reference to the original item that it has been
207+ created from. This is convenient during the update operation, as it allows
208+ operating directly on the list of HashedItems instead of having to ensure
209+ that each list-changing operation happens both on the list of hashes and
210+ the list of the original items.
211+ * When used to hash a remotely acquired item, the corresponding local version
212+ of that item can be attached to the HashedItem. This saves an additional
213+ search operation later in the update process.
214+ * HashedItem can be equality-compared with not only instances of the same
215+ type, but also ints. This makes it possible to search for an integer ID in
216+ a list of HashedItems.
217+ """
218+ hash_data = ['rating' , 'comment' , 'dateOf' ]
219+
220+ def __init__ (self , item :containers .Item ):
221+ self .parent = item
222+ self .id = item .getRawProperty ('id' )
223+ self .hash = self .computeHash (item )
224+ # Flags used to compare remote items with the local ones
225+ self .added = None
226+ self .changed = None
227+ self .local_item = None
228+
229+ def computeHash (self , item :containers .Item ):
230+ """Summarize UserData of an Item by a simple hash function."""
231+ userDataString = '#' .join (item [prop ] for prop in self .hash_data )
232+ return hash (userDataString )
233+
234+ def __eq__ (self , other ):
235+ if isinstance (other , int ):
236+ return self .id == other
237+ else :
238+ return super (HashedItem , self ).__eq__ (other )
0 commit comments