Skip to content

Commit

Permalink
send file: change cache control behaviour
Browse files Browse the repository at this point in the history
if files are restricted the cache-control header will now be set to "no-cache" so that resources are always checked to be fresh before being served.

partly closes inveniosoftware/invenio-communities#718
  • Loading branch information
nico committed Aug 10, 2022
1 parent f9c7266 commit d9dded1
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 62 deletions.
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = None
language = "en"

# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
Expand Down
132 changes: 73 additions & 59 deletions invenio_files_rest/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,18 @@
from werkzeug.urls import url_quote
from werkzeug.wsgi import FileWrapper

MIMETYPE_TEXTFILES = {
'readme'
}
MIMETYPE_TEXTFILES = {"readme"}

MIMETYPE_WHITELIST = {
'audio/mpeg',
'audio/ogg',
'audio/wav',
'audio/webm',
'image/gif',
'image/jpeg',
'image/png',
'image/tiff',
'text/plain',
"audio/mpeg",
"audio/ogg",
"audio/wav",
"audio/webm",
"image/gif",
"image/jpeg",
"image/png",
"image/tiff",
"text/plain",
}
"""List of whitelisted MIME types.
Expand All @@ -45,14 +43,14 @@
"""

MIMETYPE_PLAINTEXT = {
'application/javascript',
'application/json',
'application/xhtml+xml',
'application/xml',
'text/css',
'text/csv',
'text/html',
'image/svg+xml',
"application/javascript",
"application/json",
"application/xhtml+xml",
"application/xml",
"text/css",
"text/csv",
"text/html",
"image/svg+xml",
}


Expand All @@ -61,9 +59,20 @@ def chunk_size_or_default(chunk_size):
return chunk_size or 5 * 1024 * 1024 # 5MiB


def send_stream(stream, filename, size, mtime, mimetype=None, restricted=True,
as_attachment=False, etag=None, content_md5=None,
chunk_size=None, conditional=True, trusted=False):
def send_stream(
stream,
filename,
size,
mtime,
mimetype=None,
restricted=True,
as_attachment=False,
etag=None,
content_md5=None,
chunk_size=None,
conditional=True,
trusted=False,
):
"""Send the contents of a file to the client.
.. warning::
Expand Down Expand Up @@ -106,46 +115,47 @@ def send_stream(stream, filename, size, mtime, mimetype=None, restricted=True,
if mimetype is None and filename:
mimetype = mimetypes.guess_type(filename)[0]
if mimetype is None:
mimetype = 'application/octet-stream'
mimetype = "application/octet-stream"

# Construct headers
headers = Headers()
headers['Content-Length'] = size
headers["Content-Length"] = size
if content_md5:
headers['Content-MD5'] = content_md5
headers["Content-MD5"] = content_md5

if not trusted:
# Sanitize MIME type
mimetype = sanitize_mimetype(mimetype, filename=filename)
# See https://www.owasp.org/index.php/OWASP_Secure_Headers_Project
# Prevent JavaScript execution
headers['Content-Security-Policy'] = "default-src 'none';"
headers["Content-Security-Policy"] = "default-src 'none';"
# Prevent MIME type sniffing for browser.
headers['X-Content-Type-Options'] = 'nosniff'
headers["X-Content-Type-Options"] = "nosniff"
# Prevent opening of downloaded file by IE
headers['X-Download-Options'] = 'noopen'
headers["X-Download-Options"] = "noopen"
# Prevent cross domain requests from Flash/Acrobat.
headers['X-Permitted-Cross-Domain-Policies'] = 'none'
headers["X-Permitted-Cross-Domain-Policies"] = "none"
# Prevent files from being embedded in frame, iframe and object tags.
headers['X-Frame-Options'] = 'deny'
headers["X-Frame-Options"] = "deny"
# Enable XSS protection (IE, Chrome, Safari)
headers['X-XSS-Protection'] = '1; mode=block'
headers["X-XSS-Protection"] = "1; mode=block"

# Force Content-Disposition for application/octet-stream to prevent
# Content-Type sniffing.
if as_attachment or mimetype == 'application/octet-stream':
if as_attachment or mimetype == "application/octet-stream":
# See https://github.com/pallets/flask/commit/0049922f2e690a6d
try:
filenames = {'filename': filename.encode('latin-1')}
filenames = {"filename": filename.encode("latin-1")}
except UnicodeEncodeError:
filenames = {'filename*': "UTF-8''%s" % url_quote(filename)}
encoded_filename = (unicodedata.normalize('NFKD', filename)
.encode('latin-1', 'ignore'))
filenames = {"filename*": "UTF-8''%s" % url_quote(filename)}
encoded_filename = unicodedata.normalize("NFKD", filename).encode(
"latin-1", "ignore"
)
if encoded_filename:
filenames['filename'] = encoded_filename
headers.add('Content-Disposition', 'attachment', **filenames)
filenames["filename"] = encoded_filename
headers.add("Content-Disposition", "attachment", **filenames)
else:
headers.add('Content-Disposition', 'inline')
headers.add("Content-Disposition", "inline")

# Construct response object.
rv = current_app.response_class(
Expand All @@ -166,10 +176,15 @@ def send_stream(stream, filename, size, mtime, mimetype=None, restricted=True,
# Set cache-control
if not restricted:
rv.cache_control.public = True
# See flask config variable "SEND_FILE_MAX_AGE_DEFAULT"
# https://flask.palletsprojects.com/en/2.1.x/api/#flask.Flask.get_send_file_max_age
cache_timeout = current_app.get_send_file_max_age(filename)

if cache_timeout is not None:
rv.cache_control.max_age = cache_timeout
rv.expires = int(time() + cache_timeout)
else:
rv.cache_control.no_cache = True

if conditional:
rv = rv.make_conditional(request)
Expand All @@ -183,11 +198,12 @@ def sanitize_mimetype(mimetype, filename=None):
if mimetype in MIMETYPE_WHITELIST:
return mimetype
# Rewrite HTML, JavaScript, CSS etc to text/plain.
if mimetype in MIMETYPE_PLAINTEXT or \
(filename and filename.lower() in MIMETYPE_TEXTFILES):
return 'text/plain'
if mimetype in MIMETYPE_PLAINTEXT or (
filename and filename.lower() in MIMETYPE_TEXTFILES
):
return "text/plain"
# Default
return 'application/octet-stream'
return "application/octet-stream"


def make_path(base_uri, path, filename, path_dimensions, split_length):
Expand Down Expand Up @@ -217,11 +233,12 @@ def compute_md5_checksum(stream, **kwargs):
:param stream: The input stream.
:returns: The MD5 checksum.
"""
return compute_checksum(stream, 'md5', hashlib.md5(), **kwargs)
return compute_checksum(stream, "md5", hashlib.md5(), **kwargs)


def compute_checksum(stream, algo, message_digest, chunk_size=None,
progress_callback=None):
def compute_checksum(
stream, algo, message_digest, chunk_size=None, progress_callback=None
):
"""Get helper method to compute checksum from a stream.
:param stream: File-like object.
Expand All @@ -248,8 +265,7 @@ def compute_checksum(stream, algo, message_digest, chunk_size=None,
return "{0}:{1}".format(algo, message_digest.hexdigest())


def populate_from_path(bucket, source, checksum=True, key_prefix='',
chunk_size=None):
def populate_from_path(bucket, source, checksum=True, key_prefix="", chunk_size=None):
"""Populate a ``bucket`` from all files in path.
:param bucket: The bucket (instance or id) to create the object in.
Expand All @@ -272,15 +288,14 @@ def create_file(key, path):

if checksum:
file_checksum = compute_md5_checksum(
open(path, 'rb'), chunk_size=chunk_size)
open(path, "rb"), chunk_size=chunk_size
)
file_instance = FileInstance.query.filter_by(
checksum=file_checksum, size=os.path.getsize(path)
).first()
if file_instance:
return ObjectVersion.create(
bucket, key, _file_id=file_instance.id
)
return ObjectVersion.create(bucket, key, stream=open(path, 'rb'))
return ObjectVersion.create(bucket, key, _file_id=file_instance.id)
return ObjectVersion.create(bucket, key, stream=open(path, "rb"))

if os.path.isfile(source):
yield create_file(os.path.basename(source), source)
Expand All @@ -290,15 +305,14 @@ def create_file(key, path):
filename = os.path.join(root, name)
assert filename.startswith(source)
parts = [p for p in filename[len(source):].split(os.sep) if p]
yield create_file('/'.join(parts), os.path.join(root, name))
yield create_file("/".join(parts), os.path.join(root, name))


def create_file_streaming_redirect_response(obj):
"""Redirect response generating function."""
warnings.warn('This streaming does not support multiple storage backends.')
warnings.warn("This streaming does not support multiple storage backends.")
response = make_response()
redirect_url_base = '/user_files/'
redirect_url_base = "/user_files/"
redirect_url_key = urlsplit(obj.file.uri).path
response.headers['X-Accel-Redirect'] = redirect_url_base + \
redirect_url_key[1:]
response.headers["X-Accel-Redirect"] = redirect_url_base + redirect_url_key[1:]
return response
23 changes: 21 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,26 @@ def base_app():
Babel(app_)
Menu(app_)

def delete_user_from_cache(exception):
"""Delete user from `flask.g` when the request is tearing down.
Flask-login==0.6.2 changed the way the user is saved i.e uses `flask.g`.
Flask.g is pointing to the application context which is initialized per
request. That said, `pytest-flask` is pushing an application context on each
test initialization that causes problems as subsequent requests during a test
are detecting the active application request and not popping it when the
sub-request is tearing down. That causes the logged in user to remain cached
for the whole duration of the test. To fix this, we add an explicit teardown
handler that will pop out the logged in user in each request and it will force
the user to be loaded each time.
"""
from flask import g

if "_login_user" in g:
del g._login_user

app_.teardown_request(delete_user_from_cache)

return app_


Expand All @@ -108,8 +128,7 @@ def app(base_app):
InvenioFilesREST(base_app)
base_app.register_blueprint(blueprint)

with base_app.app_context():
yield base_app
yield base_app


@pytest.yield_fixture()
Expand Down

0 comments on commit d9dded1

Please sign in to comment.