Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
6be7217
Update photodownloader.py
Tadusko Oct 31, 2024
d5d840e
Added tags and license to the schema
Tadusko Oct 31, 2024
d4725d5
Also queries tags and license
Tadusko Oct 31, 2024
130064e
Use psycopg2-binary to not build from source
Tadusko Nov 1, 2024
31f4ab9
Catch KeyError when 'photos' is missing from results dict
Tadusko Nov 3, 2024
1802b9d
Check that response isn't empty.
Tadusko Nov 4, 2024
e7e055e
Rest of the additions.
Tadusko Mar 31, 2025
0b32215
Return the number of workers to cpu_count + 1
Tadusko Mar 31, 2025
73b19e9
linted
christophfink Mar 31, 2025
6f3a6ae
Merge remote-tracking branch 'upstream/main'
christophfink Apr 14, 2025
f332d7e
pydocstyle, changed local def
christophfink Apr 14, 2025
05f097d
move nul-cleaner to sqlalchemy validator
christophfink Apr 14, 2025
366051b
major refactoring, WIP
christophfink Apr 14, 2025
d9034cf
cleaning up a bit
christophfink Apr 14, 2025
b87321a
database schema updates (maybe incomplete?)
christophfink Apr 14, 2025
c7f0195
clean redundant licenses, lint throughout
christophfink Apr 14, 2025
1400c32
...
christophfink Apr 14, 2025
4ac47bd
progress, remaining issues:
christophfink Apr 15, 2025
0ac70a0
linted
christophfink Apr 15, 2025
4725b32
Merge remote-tracking branch 'upstream'
christophfink Apr 15, 2025
f46b5ff
typos
christophfink Apr 15, 2025
475261f
do not break out if key missing, but move on to next page
christophfink Apr 15, 2025
08ff0ea
avoid MAGIC NUMBERS
christophfink Apr 15, 2025
3b58815
cleaner string formatting
christophfink Apr 15, 2025
4fa31a0
Merge remote-tracking branch 'upstream/main'
christophfink Apr 15, 2025
7d3a594
refactoring
christophfink Apr 15, 2025
4f5cae7
api parameter
christophfink Apr 15, 2025
8d1a4b3
database concurrency issues
christophfink Apr 15, 2025
03d6fbd
Merge remote-tracking branch 'upstream/main'
christophfink Apr 15, 2025
3c9acbe
update photos that lack tags, accuracy, or license
christophfink Apr 15, 2025
20e5712
prepare for v0.3.0
christophfink Apr 15, 2025
f26ddfc
linted
christophfink Apr 15, 2025
4341592
v0.3.0.dev0
christophfink Apr 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 31 additions & 2 deletions flickrhistory/databaseobjects.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,11 @@ class FlickrPhoto(Base):
user_id = sqlalchemy.Column(sqlalchemy.BigInteger, nullable=False)
user_farm = sqlalchemy.Column(sqlalchemy.SmallInteger, nullable=False)

# New fields for tags and license
tags = sqlalchemy.Column(sqlalchemy.Text)
license = sqlalchemy.Column(sqlalchemy.Integer)
geo_accuracy = sqlalchemy.Column(sqlalchemy.Integer)

user = sqlalchemy.orm.relationship("FlickrUser", back_populates="photos")

__table_args__ = (
Expand All @@ -181,6 +186,15 @@ class FlickrPhoto(Base):
@classmethod
def from_raw_api_data_flickrphotossearch(cls, data):
"""Initialise a new FlickrPhoto with a flickr.photos.search data dict."""

# Helper function to clean NUL characters
def clean_string(input_string):
return (
input_string.replace("\x00", "")
if isinstance(input_string, str)
else input_string
)

# the API does not always return all fields
# we need to figure out which ones we can use

Expand Down Expand Up @@ -209,12 +223,12 @@ def from_raw_api_data_flickrphotossearch(cls, data):
pass

try:
photo_data["title"] = data["title"]
photo_data["title"] = clean_string(data["title"])
except KeyError:
pass

try:
photo_data["description"] = data["description"]["_content"]
photo_data["description"] = clean_string(data["description"]["_content"])
except KeyError:
pass

Expand Down Expand Up @@ -254,6 +268,21 @@ def from_raw_api_data_flickrphotossearch(cls, data):
):
pass

try:
photo_data["tags"] = clean_string(data["tags"])
except KeyError:
pass

try:
photo_data["license"] = int(data["license"])
except (ValueError, KeyError):
pass

try:
photo_data["geo_accuracy"] = int(data["accuracy"])
except (ValueError, KeyError):
pass

# finally, the user
# (let’s just delegate that to the FlickrUser constructor)
photo_data["user"] = FlickrUser.from_raw_api_data_flickrphotossearch(data)
Expand Down
18 changes: 15 additions & 3 deletions flickrhistory/photodownloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,15 @@ def photos(self):
"per_page": 500,
"has_geo": 1,
"extras": ", ".join(
["description", "date_upload", "date_taken", "geo", "owner_name"]
[
"description",
"date_upload",
"date_taken",
"geo",
"owner_name",
"tags",
"license",
]
),
"min_upload_date": self._timespan.start.timestamp(),
"max_upload_date": self._timespan.end.timestamp(),
Expand Down Expand Up @@ -81,17 +89,21 @@ def photos(self):
# unsuccessful and start over
raise ApiResponseError() from exception

# Check for 'photos' in results to avoid KeyError
if "photos" not in results or "photo" not in results["photos"]:
break

try:
num_photos = int(results["photos"]["total"])
except TypeError:
num_photos = 0

if num_photos > 4000 and self._timespan.duration > datetime.timedelta(
if num_photos > 3000 and self._timespan.duration > datetime.timedelta(
seconds=1
):
raise DownloadBatchIsTooLargeError(
(
"More than 4000 rows returned ({:d}), "
"More than 3000 rows returned ({:d}), "
+ "please specify a shorter time span."
).format(num_photos)
)
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ install_requires =
blessed
GeoAlchemy2
PyYAML
psycopg2
psycopg2-binary
Comment thread
christophfink marked this conversation as resolved.
Outdated
requests
SQLAlchemy >= 1.4.0b1
urllib3
Expand Down