Skip to content

Commit

Permalink
Merge pull request #692 from BLKSerene/dev
Browse files Browse the repository at this point in the history
Fix notifications that newer versions of corpora are available
  • Loading branch information
wannaphong authored Aug 25, 2022
2 parents 976eb28 + 739f04d commit 9124ab3
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 8 deletions.
19 changes: 12 additions & 7 deletions pythainlp/corpus/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,10 +399,11 @@ def download(
return False
corpus_versions = corpus["versions"][version]
file_name = corpus_versions["filename"]
found = ''
found = ""
for i, item in local_db["_default"].items():
if item["name"] == name and item["version"] == version:
# Record corpus no. if found
# Do not check version here
if item["name"] == name:
# Record corpus no. if found in local database
found = i
break

Expand Down Expand Up @@ -445,7 +446,9 @@ def download(
# This awkward behavior is for backward-compatibility with
# database files generated previously using TinyDB
if local_db["_default"]:
corpus_no = max((int(no) for no in local_db["_default"])) + 1
corpus_no = max((
int(no) for no in local_db["_default"]
)) + 1
else:
corpus_no = 1
local_db["_default"][str(corpus_no)] = {
Expand All @@ -458,8 +461,10 @@ def download(

with open(corpus_db_path(), "w", encoding="utf-8") as f:
json.dump(local_db, f, ensure_ascii=False)
# Check if versions match if the corpus is found in local database
# but a re-download is not forced
else:
current_ver = local_db['_default'][found]["version"]
current_ver = local_db["_default"][found]["version"]

if current_ver == version:
# Already has the same version
Expand Down Expand Up @@ -519,8 +524,8 @@ def remove(name: str) -> bool:
os.remove(path)
for i, corpus in db["_default"].copy().items():
if corpus["name"] == name:
del db['_default'][i]
with open(corpus_db_path(), 'w', encoding='utf-8') as f:
del db["_default"][i]
with open(corpus_db_path(), "w", encoding="utf-8") as f:
json.dump(db, f, ensure_ascii=False)
return True

Expand Down
5 changes: 4 additions & 1 deletion tests/test_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,10 @@ def test_corpus(self):
self.assertIsNotNone(download(name="test", version="0.0.9"))
self.assertIsNotNone(download(name="test", version="0.0.10"))
with self.assertRaises(Exception) as context:
self.assertIsNotNone(download(name="test", version="0.0.11"))
# Force re-downloading since the corpus already exists
self.assertIsNotNone(download(
name="test", version="0.0.11", force=True
))
self.assertTrue(
"Hash does not match expected."
in
Expand Down

0 comments on commit 9124ab3

Please sign in to comment.