Skip to content

Add ability to limit amount uploaded by a user #18527

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/18527.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add ability to limit amount uploaded by a user in a given time period.
17 changes: 17 additions & 0 deletions docs/usage/configuration/config_documentation.md
Original file line number Diff line number Diff line change
Expand Up @@ -2058,6 +2058,23 @@ Example configuration:
max_upload_size: 60M
```
---
### `media_upload_limits`

*(array)* A list of media upload limits defining how much data a given user can upload in a given time period.

An empty list means no limits are applied.

Defaults to `[]`.

Example configuration:
```yaml
media_upload_limits:
- time_period: 1h
max_size: 100M
- time_period: 1w
max_size: 500M
```
---
### `max_image_pixels`

*(byte size)* Maximum number of pixels that will be thumbnailed. Defaults to `"32M"`.
Expand Down
24 changes: 24 additions & 0 deletions schema/synapse-config.schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2300,6 +2300,30 @@ properties:
default: 50M
examples:
- 60M
media_upload_limits:
type: array
description: >-
A list of media upload limits defining how much data a given user can
upload in a given time period.


An empty list means no limits are applied.
default: []
items:
time_period:
type: "#/$defs/duration"
description: >-
The time period over which the limit applies. Required.
max_size:
type: "#/$defs/bytes"
description: >-
Amount of data that can be uploaded in the time period by the user.
Required.
examples:
- - time_period: 1h
max_size: 100M
- time_period: 1w
max_size: 500M
max_image_pixels:
$ref: "#/$defs/bytes"
description: Maximum number of pixels that will be thumbnailed.
Expand Down
16 changes: 16 additions & 0 deletions synapse/config/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,15 @@ def parse_thumbnail_requirements(
}


@attr.s(auto_attribs=True, slots=True, frozen=True)
class MediaUploadLimit:
"""A limit on the amount of data a user can upload in a given time
period."""

max_bytes: int
time_period_ms: int


class ContentRepositoryConfig(Config):
section = "media"

Expand Down Expand Up @@ -274,6 +283,13 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None:

self.enable_authenticated_media = config.get("enable_authenticated_media", True)

self.media_upload_limits: List[MediaUploadLimit] = []
for limit_config in config.get("media_upload_limits", []):
time_period_ms = self.parse_duration(limit_config["time_period"])
max_bytes = self.parse_size(limit_config["max_size"])

self.media_upload_limits.append(MediaUploadLimit(max_bytes, time_period_ms))

def generate_config_section(self, data_dir_path: str, **kwargs: Any) -> str:
assert data_dir_path is not None
media_store = os.path.join(data_dir_path, "media_store")
Expand Down
2 changes: 1 addition & 1 deletion synapse/handlers/sso.py
Original file line number Diff line number Diff line change
Expand Up @@ -824,7 +824,7 @@ def is_allowed_mime_type(content_type: str) -> bool:
return True

# store it in media repository
avatar_mxc_url = await self._media_repo.create_content(
avatar_mxc_url = await self._media_repo.create_or_update_content(
media_type=headers[b"Content-Type"][0].decode("utf-8"),
upload_name=upload_name,
content=picture,
Expand Down
128 changes: 66 additions & 62 deletions synapse/media/media_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,13 @@ def __init__(self, hs: "HomeServer"):
else:
self.url_previewer = None

# We get the media upload limits and sort them in descending order of
# time period, so that we can apply some optimizations.
self.media_upload_limits = hs.config.media.media_upload_limits
self.media_upload_limits.sort(
key=lambda limit: limit.time_period_ms, reverse=True
)

def _start_update_recently_accessed(self) -> Deferred:
return run_as_background_process(
"update_recently_accessed_media", self._update_recently_accessed
Expand Down Expand Up @@ -285,80 +292,37 @@ async def verify_can_upload(self, media_id: str, auth_user: UserID) -> None:
raise NotFoundError("Media ID has expired")

@trace
async def update_content(
self,
media_id: str,
media_type: str,
upload_name: Optional[str],
content: IO,
content_length: int,
auth_user: UserID,
) -> None:
"""Update the content of the given media ID.

Args:
media_id: The media ID to replace.
media_type: The content type of the file.
upload_name: The name of the file, if provided.
content: A file like object that is the content to store
content_length: The length of the content
auth_user: The user_id of the uploader
"""
file_info = FileInfo(server_name=None, file_id=media_id)
sha256reader = SHA256TransparentIOReader(content)
# This implements all of IO as it has a passthrough
fname = await self.media_storage.store_file(sha256reader.wrap(), file_info)
sha256 = sha256reader.hexdigest()
should_quarantine = await self.store.get_is_hash_quarantined(sha256)
logger.info("Stored local media in file %r", fname)

if should_quarantine:
logger.warn(
"Media has been automatically quarantined as it matched existing quarantined media"
)

await self.store.update_local_media(
media_id=media_id,
media_type=media_type,
upload_name=upload_name,
media_length=content_length,
user_id=auth_user,
sha256=sha256,
quarantined_by="system" if should_quarantine else None,
)

try:
await self._generate_thumbnails(None, media_id, media_id, media_type)
except Exception as e:
logger.info("Failed to generate thumbnails: %s", e)

@trace
async def create_content(
async def create_or_update_content(
self,
media_type: str,
upload_name: Optional[str],
content: IO,
content_length: int,
auth_user: UserID,
media_id: Optional[str] = None,
) -> MXCUri:
"""Store uploaded content for a local user and return the mxc URL
"""Create or update the content of the given media ID.

Args:
media_type: The content type of the file.
upload_name: The name of the file, if provided.
content: A file like object that is the content to store
content_length: The length of the content
auth_user: The user_id of the uploader
media_id: The media ID to update if provided, otherwise creates
new media ID.

Returns:
The mxc url of the stored content
"""

media_id = random_string(24)
is_new_media = media_id is None
if media_id is None:
media_id = random_string(24)

file_info = FileInfo(server_name=None, file_id=media_id)
# This implements all of IO as it has a passthrough
sha256reader = SHA256TransparentIOReader(content)
# This implements all of IO as it has a passthrough
fname = await self.media_storage.store_file(sha256reader.wrap(), file_info)
sha256 = sha256reader.hexdigest()
should_quarantine = await self.store.get_is_hash_quarantined(sha256)
Expand All @@ -370,16 +334,56 @@ async def create_content(
"Media has been automatically quarantined as it matched existing quarantined media"
)

await self.store.store_local_media(
media_id=media_id,
media_type=media_type,
time_now_ms=self.clock.time_msec(),
upload_name=upload_name,
media_length=content_length,
user_id=auth_user,
sha256=sha256,
quarantined_by="system" if should_quarantine else None,
)
# Check that the user has not exceeded any of the media upload limits.

# This is the total size of media uploaded by the user in the last
# `time_period_ms` milliseconds, or None if we haven't checked yet.
uploaded_media_size: Optional[int] = None

# Note: the media upload limits are sorted so larger time periods are
# first.
for limit in self.media_upload_limits:
# We only need to check the amount of media uploaded by the user in
# this latest (smaller) time period if the amount of media uploaded
# in a previous (larger) time period is above the limit.
#
# This optimization means that in the common case where the user
# hasn't uploaded much media, we only need to query the database
# once.
if (
uploaded_media_size is None
or uploaded_media_size + content_length > limit.max_bytes
):
uploaded_media_size = await self.store.get_media_uploaded_size_for_user(
user_id=auth_user.to_string(), time_period_ms=limit.time_period_ms
)

if uploaded_media_size + content_length > limit.max_bytes:
raise SynapseError(
400, "Media upload limit exceeded", Codes.RESOURCE_LIMIT_EXCEEDED
)

if is_new_media:
await self.store.store_local_media(
media_id=media_id,
media_type=media_type,
time_now_ms=self.clock.time_msec(),
upload_name=upload_name,
media_length=content_length,
user_id=auth_user,
sha256=sha256,
quarantined_by="system" if should_quarantine else None,
)
else:
await self.store.update_local_media(
media_id=media_id,
media_type=media_type,
upload_name=upload_name,
media_length=content_length,
user_id=auth_user,
sha256=sha256,
quarantined_by="system" if should_quarantine else None,
)

try:
await self._generate_thumbnails(None, media_id, media_id, media_type)
Expand Down
6 changes: 3 additions & 3 deletions synapse/rest/media/upload_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ async def on_POST(self, request: SynapseRequest) -> None:

try:
content: IO = request.content # type: ignore
content_uri = await self.media_repo.create_content(
content_uri = await self.media_repo.create_or_update_content(
media_type, upload_name, content, content_length, requester.user
)
except SpamMediaException:
Expand Down Expand Up @@ -170,13 +170,13 @@ async def on_PUT(

try:
content: IO = request.content # type: ignore
await self.media_repo.update_content(
media_id,
await self.media_repo.create_or_update_content(
media_type,
upload_name,
content,
content_length,
requester.user,
media_id=media_id,
)
except SpamMediaException:
# For uploading of media we want to respond with a 400, instead of
Expand Down
36 changes: 36 additions & 0 deletions synapse/storage/databases/main/media_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -1034,3 +1034,39 @@ def get_matching_media_txn(
"local_media_repository",
sha256,
)

async def get_media_uploaded_size_for_user(
self, user_id: str, time_period_ms: int
) -> int:
"""Get the total size of media uploaded by a user in the last
time_period_ms milliseconds.

Args:
user_id: The user ID to check.
time_period_ms: The time period in milliseconds to consider.

Returns:
The total size of media uploaded by the user in bytes.
"""

sql = """
SELECT COALESCE(SUM(media_length), 0)
FROM local_media_repository
WHERE user_id = ? AND created_ts > ?
"""

def _get_media_uploaded_size_for_user_txn(
txn: LoggingTransaction,
) -> int:
# Calculate the timestamp for the start of the time period
start_ts = self._clock.time_msec() - time_period_ms
txn.execute(sql, (user_id, start_ts))
row = txn.fetchone()
if row is None:
return 0
return row[0]

return await self.db_pool.runInteraction(
"get_media_uploaded_size_for_user",
_get_media_uploaded_size_for_user_txn,
)
10 changes: 5 additions & 5 deletions tests/federation/test_federation_media.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
def test_file_download(self) -> None:
content = io.BytesIO(b"file_to_stream")
content_uri = self.get_success(
self.media_repo.create_content(
self.media_repo.create_or_update_content(
"text/plain",
"test_upload",
content,
Expand Down Expand Up @@ -110,7 +110,7 @@ def test_file_download(self) -> None:

content = io.BytesIO(SMALL_PNG)
content_uri = self.get_success(
self.media_repo.create_content(
self.media_repo.create_or_update_content(
"image/png",
"test_png_upload",
content,
Expand Down Expand Up @@ -152,7 +152,7 @@ def test_federation_etag(self) -> None:

content = io.BytesIO(b"file_to_stream")
content_uri = self.get_success(
self.media_repo.create_content(
self.media_repo.create_or_update_content(
"text/plain",
"test_upload",
content,
Expand Down Expand Up @@ -215,7 +215,7 @@ def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
def test_thumbnail_download_scaled(self) -> None:
content = io.BytesIO(small_png.data)
content_uri = self.get_success(
self.media_repo.create_content(
self.media_repo.create_or_update_content(
"image/png",
"test_png_thumbnail",
content,
Expand Down Expand Up @@ -255,7 +255,7 @@ def test_thumbnail_download_scaled(self) -> None:
def test_thumbnail_download_cropped(self) -> None:
content = io.BytesIO(small_png.data)
content_uri = self.get_success(
self.media_repo.create_content(
self.media_repo.create_or_update_content(
"image/png",
"test_png_thumbnail",
content,
Expand Down
2 changes: 1 addition & 1 deletion tests/media/test_media_retention.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def _create_media_and_set_attributes(
# If the meda
random_content = bytes(random_string(24), "utf-8")
mxc_uri: MXCUri = self.get_success(
media_repository.create_content(
media_repository.create_or_update_content(
media_type="text/plain",
upload_name=None,
content=io.BytesIO(random_content),
Expand Down
Loading
Loading