Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Commit

Permalink
Do not break URL previews if an image is unreachable. (#12950)
Browse files Browse the repository at this point in the history
Avoid breaking a URL preview completely if the chosen image 404s
or is unreachable for some other reason (e.g. DNS).
  • Loading branch information
clokep authored Jun 6, 2022
1 parent 1acc897 commit 148fe58
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 6 deletions.
1 change: 1 addition & 0 deletions changelog.d/12950.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix a long-standing bug where a URL preview would break if the image failed to download.
23 changes: 17 additions & 6 deletions synapse/rest/media/v1/preview_url_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,20 +586,33 @@ async def _precache_image_url(
og: The Open Graph dictionary. This is modified with image information.
"""
# If there's no image or it is blank, there's nothing to do.
if "og:image" not in og or not og["og:image"]:
if "og:image" not in og:
return

# Remove the raw image URL, this will be replaced with an MXC URL, if successful.
image_url = og.pop("og:image")
if not image_url:
return

# The image URL from the HTML might be relative to the previewed page,
# convert it to an URL which can be requested directly.
image_url = og["og:image"]
url_parts = urlparse(image_url)
if url_parts.scheme != "data":
image_url = urljoin(media_info.uri, image_url)

# FIXME: it might be cleaner to use the same flow as the main /preview_url
# request itself and benefit from the same caching etc. But for now we
# just rely on the caching on the master request to speed things up.
image_info = await self._handle_url(image_url, user, allow_data_urls=True)
try:
image_info = await self._handle_url(image_url, user, allow_data_urls=True)
except Exception as e:
# Pre-caching the image failed, don't block the entire URL preview.
logger.warning(
"Pre-caching image failed during URL preview: %s errored with %s",
image_url,
e,
)
return

if _is_media(image_info.media_type):
# TODO: make sure we don't choke on white-on-transparent images
Expand All @@ -611,13 +624,11 @@ async def _precache_image_url(
og["og:image:width"] = dims["width"]
og["og:image:height"] = dims["height"]
else:
logger.warning("Couldn't get dims for %s", og["og:image"])
logger.warning("Couldn't get dims for %s", image_url)

og["og:image"] = f"mxc://{self.server_name}/{image_info.filesystem_id}"
og["og:image:type"] = image_info.media_type
og["matrix:image:size"] = image_info.media_length
else:
del og["og:image"]

async def _handle_oembed_response(
self, url: str, media_info: MediaInfo, expiration_ms: int
Expand Down
35 changes: 35 additions & 0 deletions tests/rest/media/v1/test_url_preview.py
Original file line number Diff line number Diff line change
Expand Up @@ -656,6 +656,41 @@ def test_accept_language_config_option(self) -> None:
server.data,
)

def test_nonexistent_image(self) -> None:
"""If the preview image doesn't exist, ensure some data is returned."""
self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]

end_content = (
b"""<html><body><img src="http://cdn.matrix.org/foo.jpg"></body></html>"""
)

channel = self.make_request(
"GET",
"preview_url?url=http://matrix.org",
shorthand=False,
await_result=False,
)
self.pump()

client = self.reactor.tcpClients[0][2].buildProtocol(None)
server = AccumulatingProtocol()
server.makeConnection(FakeTransport(client, self.reactor))
client.makeConnection(FakeTransport(server, self.reactor))
client.dataReceived(
(
b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
b'Content-Type: text/html; charset="utf8"\r\n\r\n'
)
% (len(end_content),)
+ end_content
)

self.pump()
self.assertEqual(channel.code, 200)

# The image should not be in the result.
self.assertNotIn("og:image", channel.json_body)

def test_data_url(self) -> None:
"""
Requesting to preview a data URL is not supported.
Expand Down

0 comments on commit 148fe58

Please sign in to comment.