Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Commit

Permalink
Add an admin api to delete local media. (#8519)
Browse files Browse the repository at this point in the history
Related to: #6459, #3479

Add `DELETE /_synapse/admin/v1/media/<server_name>/<media_id>` to delete
a single file from server.
  • Loading branch information
dklimpel authored Oct 26, 2020
1 parent f6a3859 commit 49d72de
Show file tree
Hide file tree
Showing 7 changed files with 868 additions and 3 deletions.
1 change: 1 addition & 0 deletions changelog.d/8519.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add an admin api to delete a single file or files were not used for a defined time from server. Contributed by @dklimpel.
79 changes: 79 additions & 0 deletions docs/admin_api/media_admin_api.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,82 @@ Response:
"num_quarantined": 10 # The number of media items successfully quarantined
}
```

# Delete local media
This API deletes the *local* media from the disk of your own server.
This includes any local thumbnails and copies of media downloaded from
remote homeservers.
This API will not affect media that has been uploaded to external
media repositories (e.g https://github.com/turt2live/matrix-media-repo/).
See also [purge_remote_media.rst](purge_remote_media.rst).

## Delete a specific local media
Delete a specific `media_id`.

Request:

```
DELETE /_synapse/admin/v1/media/<server_name>/<media_id>
{}
```

URL Parameters

* `server_name`: string - The name of your local server (e.g `matrix.org`)
* `media_id`: string - The ID of the media (e.g `abcdefghijklmnopqrstuvwx`)

Response:

```json
{
"deleted_media": [
"abcdefghijklmnopqrstuvwx"
],
"total": 1
}
```

The following fields are returned in the JSON response body:

* `deleted_media`: an array of strings - List of deleted `media_id`
* `total`: integer - Total number of deleted `media_id`

## Delete local media by date or size

Request:

```
POST /_synapse/admin/v1/media/<server_name>/delete?before_ts=<before_ts>
{}
```

URL Parameters

* `server_name`: string - The name of your local server (e.g `matrix.org`).
* `before_ts`: string representing a positive integer - Unix timestamp in ms.
Files that were last used before this timestamp will be deleted. It is the timestamp of
last access and not the timestamp creation.
* `size_gt`: Optional - string representing a positive integer - Size of the media in bytes.
Files that are larger will be deleted. Defaults to `0`.
* `keep_profiles`: Optional - string representing a boolean - Switch to also delete files
that are still used in image data (e.g user profile, room avatar).
If `false` these files will be deleted. Defaults to `true`.

Response:

```json
{
"deleted_media": [
"abcdefghijklmnopqrstuvwx",
"abcdefghijklmnopqrstuvwz"
],
"total": 2
}
```

The following fields are returned in the JSON response body:

* `deleted_media`: an array of strings - List of deleted `media_id`
* `total`: integer - Total number of deleted `media_id`
81 changes: 79 additions & 2 deletions synapse/rest/admin/media.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@

import logging

from synapse.api.errors import AuthError
from synapse.http.servlet import RestServlet, parse_integer
from synapse.api.errors import AuthError, Codes, NotFoundError, SynapseError
from synapse.http.servlet import RestServlet, parse_boolean, parse_integer
from synapse.rest.admin._base import (
admin_patterns,
assert_requester_is_admin,
assert_user_is_admin,
historical_admin_path_patterns,
Expand Down Expand Up @@ -150,6 +151,80 @@ async def on_POST(self, request):
return 200, ret


class DeleteMediaByID(RestServlet):
"""Delete local media by a given ID. Removes it from this server.
"""

PATTERNS = admin_patterns("/media/(?P<server_name>[^/]+)/(?P<media_id>[^/]+)")

def __init__(self, hs):
self.store = hs.get_datastore()
self.auth = hs.get_auth()
self.server_name = hs.hostname
self.media_repository = hs.get_media_repository()

async def on_DELETE(self, request, server_name: str, media_id: str):
await assert_requester_is_admin(self.auth, request)

if self.server_name != server_name:
raise SynapseError(400, "Can only delete local media")

if await self.store.get_local_media(media_id) is None:
raise NotFoundError("Unknown media")

logging.info("Deleting local media by ID: %s", media_id)

deleted_media, total = await self.media_repository.delete_local_media(media_id)
return 200, {"deleted_media": deleted_media, "total": total}


class DeleteMediaByDateSize(RestServlet):
"""Delete local media and local copies of remote media by
timestamp and size.
"""

PATTERNS = admin_patterns("/media/(?P<server_name>[^/]+)/delete")

def __init__(self, hs):
self.store = hs.get_datastore()
self.auth = hs.get_auth()
self.server_name = hs.hostname
self.media_repository = hs.get_media_repository()

async def on_POST(self, request, server_name: str):
await assert_requester_is_admin(self.auth, request)

before_ts = parse_integer(request, "before_ts", required=True)
size_gt = parse_integer(request, "size_gt", default=0)
keep_profiles = parse_boolean(request, "keep_profiles", default=True)

if before_ts < 0:
raise SynapseError(
400,
"Query parameter before_ts must be a string representing a positive integer.",
errcode=Codes.INVALID_PARAM,
)
if size_gt < 0:
raise SynapseError(
400,
"Query parameter size_gt must be a string representing a positive integer.",
errcode=Codes.INVALID_PARAM,
)

if self.server_name != server_name:
raise SynapseError(400, "Can only delete local media")

logging.info(
"Deleting local media by timestamp: %s, size larger than: %s, keep profile media: %s"
% (before_ts, size_gt, keep_profiles)
)

deleted_media, total = await self.media_repository.delete_old_local_media(
before_ts, size_gt, keep_profiles
)
return 200, {"deleted_media": deleted_media, "total": total}


def register_servlets_for_media_repo(hs, http_server):
"""
Media repo specific APIs.
Expand All @@ -159,3 +234,5 @@ def register_servlets_for_media_repo(hs, http_server):
QuarantineMediaByID(hs).register(http_server)
QuarantineMediaByUser(hs).register(http_server)
ListMediaInRoom(hs).register(http_server)
DeleteMediaByID(hs).register(http_server)
DeleteMediaByDateSize(hs).register(http_server)
17 changes: 17 additions & 0 deletions synapse/rest/media/v1/filepath.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,23 @@ def local_media_thumbnail_rel(self, media_id, width, height, content_type, metho

local_media_thumbnail = _wrap_in_base_path(local_media_thumbnail_rel)

def local_media_thumbnail_dir(self, media_id: str) -> str:
"""
Retrieve the local store path of thumbnails of a given media_id
Args:
media_id: The media ID to query.
Returns:
Path of local_thumbnails from media_id
"""
return os.path.join(
self.base_path,
"local_thumbnails",
media_id[0:2],
media_id[2:4],
media_id[4:],
)

def remote_media_filepath_rel(self, server_name, file_id):
return os.path.join(
"remote_content", server_name, file_id[0:2], file_id[2:4], file_id[4:]
Expand Down
72 changes: 71 additions & 1 deletion synapse/rest/media/v1/media_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import logging
import os
import shutil
from typing import IO, Dict, Optional, Tuple
from typing import IO, Dict, List, Optional, Tuple

import twisted.internet.error
import twisted.web.http
Expand Down Expand Up @@ -767,6 +767,76 @@ async def delete_old_remote_media(self, before_ts):

return {"deleted": deleted}

async def delete_local_media(self, media_id: str) -> Tuple[List[str], int]:
"""
Delete the given local or remote media ID from this server
Args:
media_id: The media ID to delete.
Returns:
A tuple of (list of deleted media IDs, total deleted media IDs).
"""
return await self._remove_local_media_from_disk([media_id])

async def delete_old_local_media(
self, before_ts: int, size_gt: int = 0, keep_profiles: bool = True,
) -> Tuple[List[str], int]:
"""
Delete local or remote media from this server by size and timestamp. Removes
media files, any thumbnails and cached URLs.
Args:
before_ts: Unix timestamp in ms.
Files that were last used before this timestamp will be deleted
size_gt: Size of the media in bytes. Files that are larger will be deleted
keep_profiles: Switch to delete also files that are still used in image data
(e.g user profile, room avatar)
If false these files will be deleted
Returns:
A tuple of (list of deleted media IDs, total deleted media IDs).
"""
old_media = await self.store.get_local_media_before(
before_ts, size_gt, keep_profiles,
)
return await self._remove_local_media_from_disk(old_media)

async def _remove_local_media_from_disk(
self, media_ids: List[str]
) -> Tuple[List[str], int]:
"""
Delete local or remote media from this server. Removes media files,
any thumbnails and cached URLs.
Args:
media_ids: List of media_id to delete
Returns:
A tuple of (list of deleted media IDs, total deleted media IDs).
"""
removed_media = []
for media_id in media_ids:
logger.info("Deleting media with ID '%s'", media_id)
full_path = self.filepaths.local_media_filepath(media_id)
try:
os.remove(full_path)
except OSError as e:
logger.warning("Failed to remove file: %r: %s", full_path, e)
if e.errno == errno.ENOENT:
pass
else:
continue

thumbnail_dir = self.filepaths.local_media_thumbnail_dir(media_id)
shutil.rmtree(thumbnail_dir, ignore_errors=True)

await self.store.delete_remote_media(self.server_name, media_id)

await self.store.delete_url_cache((media_id,))
await self.store.delete_url_cache_media((media_id,))

removed_media.append(media_id)

return removed_media, len(removed_media)


class MediaRepositoryResource(Resource):
"""File uploading and downloading.
Expand Down
53 changes: 53 additions & 0 deletions synapse/storage/databases/main/media_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):

def __init__(self, database: DatabasePool, db_conn, hs):
super().__init__(database, db_conn, hs)
self.server_name = hs.hostname

async def get_local_media(self, media_id: str) -> Optional[Dict[str, Any]]:
"""Get the metadata for a local piece of media
Expand All @@ -115,6 +116,58 @@ async def get_local_media(self, media_id: str) -> Optional[Dict[str, Any]]:
desc="get_local_media",
)

async def get_local_media_before(
self, before_ts: int, size_gt: int, keep_profiles: bool,
) -> Optional[List[str]]:

# to find files that have never been accessed (last_access_ts IS NULL)
# compare with `created_ts`
sql = """
SELECT media_id
FROM local_media_repository AS lmr
WHERE
( last_access_ts < ?
OR ( created_ts < ? AND last_access_ts IS NULL ) )
AND media_length > ?
"""

if keep_profiles:
sql_keep = """
AND (
NOT EXISTS
(SELECT 1
FROM profiles
WHERE profiles.avatar_url = '{media_prefix}' || lmr.media_id)
AND NOT EXISTS
(SELECT 1
FROM groups
WHERE groups.avatar_url = '{media_prefix}' || lmr.media_id)
AND NOT EXISTS
(SELECT 1
FROM room_memberships
WHERE room_memberships.avatar_url = '{media_prefix}' || lmr.media_id)
AND NOT EXISTS
(SELECT 1
FROM user_directory
WHERE user_directory.avatar_url = '{media_prefix}' || lmr.media_id)
AND NOT EXISTS
(SELECT 1
FROM room_stats_state
WHERE room_stats_state.avatar = '{media_prefix}' || lmr.media_id)
)
""".format(
media_prefix="mxc://%s/" % (self.server_name,),
)
sql += sql_keep

def _get_local_media_before_txn(txn):
txn.execute(sql, (before_ts, before_ts, size_gt))
return [row[0] for row in txn]

return await self.db_pool.runInteraction(
"get_local_media_before", _get_local_media_before_txn
)

async def store_local_media(
self,
media_id,
Expand Down
Loading

0 comments on commit 49d72de

Please sign in to comment.