Skip to content

Commit

Permalink
send file: change cache control behaviour
Browse files Browse the repository at this point in the history
if files are restricted the cache-control header will now be set to "no-cache" so that resources are always checked to be fresh before being served.

partly closes inveniosoftware/invenio-communities#718
  • Loading branch information
nico committed Jul 25, 2022
1 parent f9c7266 commit 2974dbc
Showing 1 changed file with 73 additions and 59 deletions.
132 changes: 73 additions & 59 deletions invenio_files_rest/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,18 @@
from werkzeug.urls import url_quote
from werkzeug.wsgi import FileWrapper

MIMETYPE_TEXTFILES = {
'readme'
}
MIMETYPE_TEXTFILES = {"readme"}

MIMETYPE_WHITELIST = {
'audio/mpeg',
'audio/ogg',
'audio/wav',
'audio/webm',
'image/gif',
'image/jpeg',
'image/png',
'image/tiff',
'text/plain',
"audio/mpeg",
"audio/ogg",
"audio/wav",
"audio/webm",
"image/gif",
"image/jpeg",
"image/png",
"image/tiff",
"text/plain",
}
"""List of whitelisted MIME types.
Expand All @@ -45,14 +43,14 @@
"""

MIMETYPE_PLAINTEXT = {
'application/javascript',
'application/json',
'application/xhtml+xml',
'application/xml',
'text/css',
'text/csv',
'text/html',
'image/svg+xml',
"application/javascript",
"application/json",
"application/xhtml+xml",
"application/xml",
"text/css",
"text/csv",
"text/html",
"image/svg+xml",
}


Expand All @@ -61,9 +59,20 @@ def chunk_size_or_default(chunk_size):
return chunk_size or 5 * 1024 * 1024 # 5MiB


def send_stream(stream, filename, size, mtime, mimetype=None, restricted=True,
as_attachment=False, etag=None, content_md5=None,
chunk_size=None, conditional=True, trusted=False):
def send_stream(
stream,
filename,
size,
mtime,
mimetype=None,
restricted=True,
as_attachment=False,
etag=None,
content_md5=None,
chunk_size=None,
conditional=True,
trusted=False,
):
"""Send the contents of a file to the client.
.. warning::
Expand Down Expand Up @@ -106,46 +115,47 @@ def send_stream(stream, filename, size, mtime, mimetype=None, restricted=True,
if mimetype is None and filename:
mimetype = mimetypes.guess_type(filename)[0]
if mimetype is None:
mimetype = 'application/octet-stream'
mimetype = "application/octet-stream"

# Construct headers
headers = Headers()
headers['Content-Length'] = size
headers["Content-Length"] = size
if content_md5:
headers['Content-MD5'] = content_md5
headers["Content-MD5"] = content_md5

if not trusted:
# Sanitize MIME type
mimetype = sanitize_mimetype(mimetype, filename=filename)
# See https://www.owasp.org/index.php/OWASP_Secure_Headers_Project
# Prevent JavaScript execution
headers['Content-Security-Policy'] = "default-src 'none';"
headers["Content-Security-Policy"] = "default-src 'none';"
# Prevent MIME type sniffing for browser.
headers['X-Content-Type-Options'] = 'nosniff'
headers["X-Content-Type-Options"] = "nosniff"
# Prevent opening of downloaded file by IE
headers['X-Download-Options'] = 'noopen'
headers["X-Download-Options"] = "noopen"
# Prevent cross domain requests from Flash/Acrobat.
headers['X-Permitted-Cross-Domain-Policies'] = 'none'
headers["X-Permitted-Cross-Domain-Policies"] = "none"
# Prevent files from being embedded in frame, iframe and object tags.
headers['X-Frame-Options'] = 'deny'
headers["X-Frame-Options"] = "deny"
# Enable XSS protection (IE, Chrome, Safari)
headers['X-XSS-Protection'] = '1; mode=block'
headers["X-XSS-Protection"] = "1; mode=block"

# Force Content-Disposition for application/octet-stream to prevent
# Content-Type sniffing.
if as_attachment or mimetype == 'application/octet-stream':
if as_attachment or mimetype == "application/octet-stream":
# See https://github.com/pallets/flask/commit/0049922f2e690a6d
try:
filenames = {'filename': filename.encode('latin-1')}
filenames = {"filename": filename.encode("latin-1")}
except UnicodeEncodeError:
filenames = {'filename*': "UTF-8''%s" % url_quote(filename)}
encoded_filename = (unicodedata.normalize('NFKD', filename)
.encode('latin-1', 'ignore'))
filenames = {"filename*": "UTF-8''%s" % url_quote(filename)}
encoded_filename = unicodedata.normalize("NFKD", filename).encode(
"latin-1", "ignore"
)
if encoded_filename:
filenames['filename'] = encoded_filename
headers.add('Content-Disposition', 'attachment', **filenames)
filenames["filename"] = encoded_filename
headers.add("Content-Disposition", "attachment", **filenames)
else:
headers.add('Content-Disposition', 'inline')
headers.add("Content-Disposition", "inline")

# Construct response object.
rv = current_app.response_class(
Expand All @@ -166,10 +176,15 @@ def send_stream(stream, filename, size, mtime, mimetype=None, restricted=True,
# Set cache-control
if not restricted:
rv.cache_control.public = True
# See flask config variable "SEND_FILE_MAX_AGE_DEFAULT"
# https://flask.palletsprojects.com/en/2.1.x/api/#flask.Flask.get_send_file_max_age
cache_timeout = current_app.get_send_file_max_age(filename)

if cache_timeout is not None:
rv.cache_control.max_age = cache_timeout
rv.expires = int(time() + cache_timeout)
else:
rv.cache_control.no_cache = True

if conditional:
rv = rv.make_conditional(request)
Expand All @@ -183,11 +198,12 @@ def sanitize_mimetype(mimetype, filename=None):
if mimetype in MIMETYPE_WHITELIST:
return mimetype
# Rewrite HTML, JavaScript, CSS etc to text/plain.
if mimetype in MIMETYPE_PLAINTEXT or \
(filename and filename.lower() in MIMETYPE_TEXTFILES):
return 'text/plain'
if mimetype in MIMETYPE_PLAINTEXT or (
filename and filename.lower() in MIMETYPE_TEXTFILES
):
return "text/plain"
# Default
return 'application/octet-stream'
return "application/octet-stream"


def make_path(base_uri, path, filename, path_dimensions, split_length):
Expand Down Expand Up @@ -217,11 +233,12 @@ def compute_md5_checksum(stream, **kwargs):
:param stream: The input stream.
:returns: The MD5 checksum.
"""
return compute_checksum(stream, 'md5', hashlib.md5(), **kwargs)
return compute_checksum(stream, "md5", hashlib.md5(), **kwargs)


def compute_checksum(stream, algo, message_digest, chunk_size=None,
progress_callback=None):
def compute_checksum(
stream, algo, message_digest, chunk_size=None, progress_callback=None
):
"""Get helper method to compute checksum from a stream.
:param stream: File-like object.
Expand All @@ -248,8 +265,7 @@ def compute_checksum(stream, algo, message_digest, chunk_size=None,
return "{0}:{1}".format(algo, message_digest.hexdigest())


def populate_from_path(bucket, source, checksum=True, key_prefix='',
chunk_size=None):
def populate_from_path(bucket, source, checksum=True, key_prefix="", chunk_size=None):
"""Populate a ``bucket`` from all files in path.
:param bucket: The bucket (instance or id) to create the object in.
Expand All @@ -272,15 +288,14 @@ def create_file(key, path):

if checksum:
file_checksum = compute_md5_checksum(
open(path, 'rb'), chunk_size=chunk_size)
open(path, "rb"), chunk_size=chunk_size
)
file_instance = FileInstance.query.filter_by(
checksum=file_checksum, size=os.path.getsize(path)
).first()
if file_instance:
return ObjectVersion.create(
bucket, key, _file_id=file_instance.id
)
return ObjectVersion.create(bucket, key, stream=open(path, 'rb'))
return ObjectVersion.create(bucket, key, _file_id=file_instance.id)
return ObjectVersion.create(bucket, key, stream=open(path, "rb"))

if os.path.isfile(source):
yield create_file(os.path.basename(source), source)
Expand All @@ -290,15 +305,14 @@ def create_file(key, path):
filename = os.path.join(root, name)
assert filename.startswith(source)
parts = [p for p in filename[len(source):].split(os.sep) if p]
yield create_file('/'.join(parts), os.path.join(root, name))
yield create_file("/".join(parts), os.path.join(root, name))


def create_file_streaming_redirect_response(obj):
"""Redirect response generating function."""
warnings.warn('This streaming does not support multiple storage backends.')
warnings.warn("This streaming does not support multiple storage backends.")
response = make_response()
redirect_url_base = '/user_files/'
redirect_url_base = "/user_files/"
redirect_url_key = urlsplit(obj.file.uri).path
response.headers['X-Accel-Redirect'] = redirect_url_base + \
redirect_url_key[1:]
response.headers["X-Accel-Redirect"] = redirect_url_base + redirect_url_key[1:]
return response

0 comments on commit 2974dbc

Please sign in to comment.