-
Notifications
You must be signed in to change notification settings - Fork 178
Use caching for the etag using stored hash if available #182
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
67849af
17e7424
794c9d4
00a02fb
006e7bc
ed52375
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -27,6 +27,7 @@ | |
| __all__ = ("PyMongo", "ASCENDING", "DESCENDING", "BSONObjectIdConverter", "BSONProvider") | ||
|
|
||
| import hashlib | ||
| from collections import OrderedDict | ||
| from mimetypes import guess_type | ||
| from typing import Any | ||
|
|
||
|
|
@@ -66,6 +67,8 @@ def __init__( | |
| ) -> None: | ||
| self.cx: MongoClient | None = None | ||
| self.db: Database | None = None | ||
| self._hash_cache = OrderedDict() | ||
| self._hash_limit = 128 | ||
|
|
||
| if app is not None: | ||
| self.init_app(app, uri, *args, **kwargs) | ||
|
|
@@ -183,12 +186,21 @@ def get_upload(filename): | |
| response.headers["Content-Disposition"] = f"attachment; filename={filename}" | ||
| response.content_length = fileobj.length | ||
| response.last_modified = fileobj.upload_date | ||
| # Compute the sha1 sum of the file for the etag. | ||
| pos = fileobj.tell() | ||
| raw_data = fileobj.read() | ||
| fileobj.seek(pos) | ||
| digest = hashlib.sha1(raw_data).hexdigest() | ||
| response.set_etag(digest) | ||
|
|
||
| # GridFS does not manage its own checksum, so we manage our own using its | ||
| # metadata storage, to be used for the etag. | ||
| sha1_sum = self._hash_cache.get(str(fileobj._id)) | ||
| if sha1_sum is None: | ||
| # Compute the checksum of the file for the etag. | ||
| pos = fileobj.tell() | ||
| raw_data = fileobj.read() | ||
| fileobj.seek(pos) | ||
| sha1_sum = hashlib.sha1(raw_data).hexdigest() | ||
| while len(self._hash_cache) >= self._hash_limit: | ||
| self._hash_cache.popitem() | ||
| self._hash_cache[str(fileobj._id)] = sha1_sum | ||
| response.set_etag(sha1_sum) | ||
|
||
|
|
||
| response.cache_control.max_age = cache_for | ||
| response.cache_control.public = True | ||
| response.make_conditional(request) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is problematic because although
_idis unique,_ids can be reused over time by deleting the old file and uploading a new one with the same_id. In that case this cache becomes out of date.