Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Add config option to prevent media downloads from listed domains. #15197

Merged
merged 17 commits into from
May 9, 2023
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/15197.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add config option to prevent media downloads from listed domains.
turt2live marked this conversation as resolved.
Show resolved Hide resolved
22 changes: 22 additions & 0 deletions docs/usage/configuration/config_documentation.md
Original file line number Diff line number Diff line change
Expand Up @@ -1764,6 +1764,28 @@ Example configuration:
max_image_pixels: 35M
```
---
### `prevent_downloads_from`
turt2live marked this conversation as resolved.
Show resolved Hide resolved

A list of domains where users will be unable to download media from. If
media from these domains is already cached, it will not be deleted but
also not able to be downloaded. This option does not affect admin APIs
trying to download/operate on media.
turt2live marked this conversation as resolved.
Show resolved Hide resolved

This will not prevent the listed domains from accessing media themselves.
It simply prevents users on this server from downloading media originating
from the listed servers.

This will have no effect on media originating from the local server.
turt2live marked this conversation as resolved.
Show resolved Hide resolved

Defaults to an empty list (nothing blocked).

Example configuration:
```yaml
prevent_downloads_from:
- evil.example.org
- evil2.example.org
```
---
### `dynamic_thumbnails`

Whether to generate new thumbnails on the fly to precisely match
Expand Down
2 changes: 2 additions & 0 deletions synapse/config/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,8 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None:
self.max_image_pixels = self.parse_size(config.get("max_image_pixels", "32M"))
self.max_spider_size = self.parse_size(config.get("max_spider_size", "10M"))

self.prevent_downloads_from = config.get("prevent_downloads_from", [])
clokep marked this conversation as resolved.
Show resolved Hide resolved

self.media_store_path = self.ensure_directory(
config.get("media_store_path", "media_store")
)
Expand Down
9 changes: 9 additions & 0 deletions synapse/media/media_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ def __init__(self, hs: "HomeServer"):
self.federation_domain_whitelist = (
hs.config.federation.federation_domain_whitelist
)
self.prevent_downloads_from = hs.config.media.prevent_downloads_from

# List of StorageProviders where we should search for media and
# potentially upload to.
Expand Down Expand Up @@ -276,6 +277,14 @@ async def get_remote_media(
):
raise FederationDeniedError(server_name)
clokep marked this conversation as resolved.
Show resolved Hide resolved

# Don't let users download media from domains listed in the config, even
# if we might have the media to serve. This is Trust & Safety tooling to
# block some servers' media from being accessible to local users.
# See `prevent_downloads_from` config docs for more info.
if server_name in self.prevent_downloads_from:
respond_404(request)
erikjohnston marked this conversation as resolved.
Show resolved Hide resolved
return

self.mark_recently_accessed(server_name, media_id)

# We linearize here to ensure that we don't try and download remote
Expand Down
9 changes: 9 additions & 0 deletions synapse/rest/media/thumbnail_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def __init__(
self.media_storage = media_storage
self.dynamic_thumbnails = hs.config.media.dynamic_thumbnails
self.server_name = hs.hostname
self.prevent_downloads_from = hs.config.media.prevent_downloads_from

async def _async_render_GET(self, request: SynapseRequest) -> None:
set_cors_headers(request)
Expand All @@ -82,6 +83,14 @@ async def _async_render_GET(self, request: SynapseRequest) -> None:
)
self.media_repo.mark_recently_accessed(None, media_id)
else:
# Don't let users download media from domains listed in the config, even
# if we might have the media to serve. This is Trust & Safety tooling to
# block some servers' media from being accessible to local users.
turt2live marked this conversation as resolved.
Show resolved Hide resolved
# See `prevent_downloads_from` config docs for more info.
if server_name in self.prevent_downloads_from:
respond_404(request)
return

if self.dynamic_thumbnails:
await self._select_or_generate_remote_thumbnail(
request, server_name, media_id, width, height, method, m_type
Expand Down
145 changes: 145 additions & 0 deletions tests/rest/media/test_domain_blocking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
# Copyright 2023 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from binascii import unhexlify
from typing import Dict

from twisted.test.proto_helpers import MemoryReactor
from twisted.web.resource import Resource

from synapse.media._base import FileInfo
from synapse.server import HomeServer
from synapse.util import Clock

from tests import unittest
from tests.unittest import override_config

SMALL_PNG = unhexlify(
b"89504e470d0a1a0a0000000d4948445200000001000000010806"
b"0000001f15c4890000000a49444154789c63000100000500010d"
b"0a2db40000000049454e44ae426082"
)
turt2live marked this conversation as resolved.
Show resolved Hide resolved


class MediaDomainBlockingTests(unittest.HomeserverTestCase):
remote_media_id = "doesnotmatter"
remote_server_name = "evil.com"
erikjohnston marked this conversation as resolved.
Show resolved Hide resolved

def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
self.store = hs.get_datastores().main

# Inject a piece of media. We'll use this to ensure we're returning a sane
# response when we're not supposed to block it, distinguishing a media block
# from a regular 404.
file_id = "abcdefg12345"
file_info = FileInfo(server_name=self.remote_server_name, file_id=file_id)
with hs.get_media_repository().media_storage.store_into_file(file_info) as (
f,
fname,
finish,
):
f.write(SMALL_PNG)
self.get_success(finish())

self.get_success(
self.store.store_cached_remote_media(
origin=self.remote_server_name,
media_id=self.remote_media_id,
media_type="image/png",
media_length=1,
time_now_ms=clock.time_msec(),
upload_name="test.png",
filesystem_id=file_id,
)
)

def create_resource_dict(self) -> Dict[str, Resource]:
# We need to manually set the resource tree to include media, the
# default only does `/_matrix/client` APIs.
return {"/_matrix/media": self.hs.get_media_repository_resource()}

@override_config(
{
# Disable downloads from the domain we'll be trying to download from.
# Should result in a 404.
"prevent_downloads_from": ["evil.com"]
}
)
def test_cannot_download_blocked_media(self) -> None:
"""
Tests to ensure that remote media which is blocked cannot be downloaded.
"""
response = self.make_request(
"GET",
f"/_matrix/media/v3/download/evil.com/{self.remote_media_id}",
shorthand=False,
)
self.assertEqual(response.code, 404)

@override_config(
{
# Disable downloads from a domain we won't be requesting downloads from.
# This proves we haven't broken anything.
"prevent_downloads_from": ["not-listed.com"]
}
)
def test_remote_media_normally_unblocked(self) -> None:
"""
Tests to ensure that remote media is normally able to be downloaded
when no domain block is in place.
erikjohnston marked this conversation as resolved.
Show resolved Hide resolved
"""
response = self.make_request(
"GET",
f"/_matrix/media/v3/download/evil.com/{self.remote_media_id}",
shorthand=False,
)
self.assertEqual(response.code, 200)

@override_config(
{
# Disable downloads from the domain we'll be trying to download from.
# Should result in a 404.
"prevent_downloads_from": ["evil.com"],
"dynamic_thumbnails": True,
}
)
def test_cannot_download_blocked_media_thumbnail(self) -> None:
"""
Same test as test_cannot_download_blocked_media but for thumbnails.
"""
response = self.make_request(
"GET",
f"/_matrix/media/v3/thumbnail/evil.com/{self.remote_media_id}?width=100&height=100",
shorthand=False,
content={"width": 100, "height": 100},
)
self.assertEqual(response.code, 404)

@override_config(
{
# Disable downloads from a domain we won't be requesting downloads from.
# This proves we haven't broken anything.
"prevent_downloads_from": ["not-listed.com"],
"dynamic_thumbnails": True,
}
)
def test_remote_media_thumbnail_normally_unblocked(self) -> None:
"""
Same test as test_remote_media_normally_unblocked but for thumbnails.
"""
response = self.make_request(
"GET",
f"/_matrix/media/v3/thumbnail/evil.com/{self.remote_media_id}?width=100&height=100",
shorthand=False,
)
self.assertEqual(response.code, 200)