From 4fa2ff5d44f7c66c8a3f16ce9a544fd9631070b7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 29 Nov 2022 10:36:41 +0000 Subject: [PATCH 1/5] POC delete stale non-e2e devices for users (#14038) This should help reduce the number of devices e.g. simple bots the repeatedly login rack up. We only delete non-e2e devices as they should be safe to delete, whereas if we delete e2e devices for a user we may accidentally break their ability to receive e2e keys for a message. Co-authored-by: Patrick Cloke Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> --- changelog.d/14038.misc | 1 + synapse/handlers/device.py | 13 ++++- synapse/storage/databases/main/devices.py | 67 ++++++++++++++++++++++- tests/handlers/test_device.py | 2 +- tests/storage/test_client_ips.py | 4 +- 5 files changed, 83 insertions(+), 4 deletions(-) create mode 100644 changelog.d/14038.misc diff --git a/changelog.d/14038.misc b/changelog.d/14038.misc new file mode 100644 index 000000000000..f9bfc581ad53 --- /dev/null +++ b/changelog.d/14038.misc @@ -0,0 +1 @@ +Prune user's old devices on login if they have too many. diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index b1e55e1b9e4a..7c4dd8cf5a3d 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -421,6 +421,9 @@ async def check_device_registered( self._check_device_name_length(initial_device_display_name) + # Prune the user's device list if they already have a lot of devices. + await self._prune_too_many_devices(user_id) + if device_id is not None: new_device = await self.store.store_device( user_id=user_id, @@ -452,6 +455,14 @@ async def check_device_registered( raise errors.StoreError(500, "Couldn't generate a device ID.") + async def _prune_too_many_devices(self, user_id: str) -> None: + """Delete any excess old devices this user may have.""" + device_ids = await self.store.check_too_many_devices_for_user(user_id) + if not device_ids: + return + + await self.delete_devices(user_id, device_ids) + async def _delete_stale_devices(self) -> None: """Background task that deletes devices which haven't been accessed for more than a configured time period. @@ -481,7 +492,7 @@ async def delete_all_devices_for_user( device_ids = [d for d in device_ids if d != except_device_id] await self.delete_devices(user_id, device_ids) - async def delete_devices(self, user_id: str, device_ids: List[str]) -> None: + async def delete_devices(self, user_id: str, device_ids: Collection[str]) -> None: """Delete several devices Args: diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 8ba995df3b61..edbd1f9f2292 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -1538,6 +1538,70 @@ def _txn(txn: LoggingTransaction) -> int: return rows + async def check_too_many_devices_for_user(self, user_id: str) -> Collection[str]: + """Check if the user has a lot of devices, and if so return the set of + devices we can prune. + + This does *not* return hidden devices or devices with E2E keys. + """ + + num_devices = await self.db_pool.simple_select_one_onecol( + table="devices", + keyvalues={"user_id": user_id, "hidden": False}, + retcol="COALESCE(COUNT(*), 0)", + desc="count_devices", + ) + + # We let users have up to ten devices without pruning. + if num_devices <= 10: + return () + + # We prune everything older than N days. + max_last_seen = self._clock.time_msec() - 14 * 24 * 60 * 60 * 1000 + + if num_devices > 50: + # If the user has more than 50 devices, then we chose a last seen + # that ensures we keep at most 50 devices. + sql = """ + SELECT last_seen FROM devices + WHERE + user_id = ? + AND NOT hidden + AND last_seen IS NOT NULL + AND key_json IS NULL + ORDER BY last_seen DESC + LIMIT 1 + OFFSET 50 + """ + + rows = await self.db_pool.execute( + "check_too_many_devices_for_user_last_seen", None, sql, (user_id,) + ) + if rows: + max_last_seen = max(rows[0][0], max_last_seen) + + # Now fetch the devices to delete. + sql = """ + SELECT DISTINCT device_id FROM devices + LEFT JOIN e2e_device_keys_json USING (user_id, device_id) + WHERE + user_id = ? + AND NOT hidden + AND last_seen < ? + AND key_json IS NULL + """ + + def check_too_many_devices_for_user_txn( + txn: LoggingTransaction, + ) -> Collection[str]: + txn.execute(sql, (user_id, max_last_seen)) + return {device_id for device_id, in txn} + + return await self.db_pool.runInteraction( + "check_too_many_devices_for_user", + check_too_many_devices_for_user_txn, + ) + class DeviceStore(DeviceWorkerStore, DeviceBackgroundUpdateStore): # Because we have write access, this will be a StreamIdGenerator @@ -1596,6 +1660,7 @@ async def store_device( values={}, insertion_values={ "display_name": initial_device_display_name, + "last_seen": self._clock.time_msec(), "hidden": False, }, desc="store_device", @@ -1641,7 +1706,7 @@ async def store_device( ) raise StoreError(500, "Problem storing device.") - async def delete_devices(self, user_id: str, device_ids: List[str]) -> None: + async def delete_devices(self, user_id: str, device_ids: Collection[str]) -> None: """Deletes several devices. Args: diff --git a/tests/handlers/test_device.py b/tests/handlers/test_device.py index ce7525e29c0a..a456bffd632f 100644 --- a/tests/handlers/test_device.py +++ b/tests/handlers/test_device.py @@ -115,7 +115,7 @@ def test_get_devices_by_user(self) -> None: "device_id": "xyz", "display_name": "display 0", "last_seen_ip": None, - "last_seen_ts": None, + "last_seen_ts": 1000000, }, device_map["xyz"], ) diff --git a/tests/storage/test_client_ips.py b/tests/storage/test_client_ips.py index 49ad3c132425..a9af1babedf4 100644 --- a/tests/storage/test_client_ips.py +++ b/tests/storage/test_client_ips.py @@ -169,6 +169,8 @@ def test_get_last_client_ip_by_device(self, after_persisting: bool): ) ) + last_seen = self.clock.time_msec() + if after_persisting: # Trigger the storage loop self.reactor.advance(10) @@ -189,7 +191,7 @@ def test_get_last_client_ip_by_device(self, after_persisting: bool): "device_id": device_id, "ip": None, "user_agent": None, - "last_seen": None, + "last_seen": last_seen, }, ], ) From 36269c72e8973a5066d3cf07c3e021d0ffe879ac Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 29 Nov 2022 13:05:07 +0000 Subject: [PATCH 2/5] Fix `UndefinedColumn: column "key_json" does not exist` errors when handling users with more than 50 non-E2E devices (#14580) --- synapse/storage/databases/main/devices.py | 1 + 1 file changed, 1 insertion(+) diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index edbd1f9f2292..1e2425e6ed19 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -1564,6 +1564,7 @@ async def check_too_many_devices_for_user(self, user_id: str) -> Collection[str] # that ensures we keep at most 50 devices. sql = """ SELECT last_seen FROM devices + LEFT JOIN e2e_device_keys_json USING (user_id, device_id) WHERE user_id = ? AND NOT hidden From b10aa92610a36101c0b2eb3f5cfa5d796d7cb648 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 2 Dec 2022 11:12:16 +0000 Subject: [PATCH 3/5] Only delete N devices at a time --- synapse/handlers/device.py | 20 +++++++++++++++++++- synapse/storage/databases/main/devices.py | 9 +++++---- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index 7c4dd8cf5a3d..e1ab289b8a3f 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -52,6 +52,7 @@ from synapse.util.async_helpers import Linearizer from synapse.util.caches.expiringcache import ExpiringCache from synapse.util.cancellation import cancellable +from synapse.util.iterutils import batch_iter from synapse.util.metrics import measure_func from synapse.util.retryutils import NotRetryingDestination @@ -461,7 +462,24 @@ async def _prune_too_many_devices(self, user_id: str) -> None: if not device_ids: return - await self.delete_devices(user_id, device_ids) + # We don't want to block and try and delete tonnes of devices at once, + # so we cap the number of devices we delete synchronously. + first_batch, remaining_device_ids = device_ids[:10], device_ids[10:] + await self.delete_devices(user_id, first_batch) + + if not remaining_device_ids: + return + + # Now spawn a background loop that deletes the rest. + async def _prune_too_many_devices_loop() -> None: + for batch in batch_iter(remaining_device_ids, 10): + await self.delete_devices(user_id, batch) + + await self.clock.sleep(1) + + run_as_background_process( + "_prune_too_many_devices_loop", _prune_too_many_devices_loop + ) async def _delete_stale_devices(self) -> None: """Background task that deletes devices which haven't been accessed for more than diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 1e2425e6ed19..ae9e340ac309 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -1538,7 +1538,7 @@ def _txn(txn: LoggingTransaction) -> int: return rows - async def check_too_many_devices_for_user(self, user_id: str) -> Collection[str]: + async def check_too_many_devices_for_user(self, user_id: str) -> List[str]: """Check if the user has a lot of devices, and if so return the set of devices we can prune. @@ -1554,7 +1554,7 @@ async def check_too_many_devices_for_user(self, user_id: str) -> Collection[str] # We let users have up to ten devices without pruning. if num_devices <= 10: - return () + return [] # We prune everything older than N days. max_last_seen = self._clock.time_msec() - 14 * 24 * 60 * 60 * 1000 @@ -1590,13 +1590,14 @@ async def check_too_many_devices_for_user(self, user_id: str) -> Collection[str] AND NOT hidden AND last_seen < ? AND key_json IS NULL + ORDER BY last_seen """ def check_too_many_devices_for_user_txn( txn: LoggingTransaction, - ) -> Collection[str]: + ) -> List[str]: txn.execute(sql, (user_id, max_last_seen)) - return {device_id for device_id, in txn} + return [device_id for device_id, in txn] return await self.db_pool.runInteraction( "check_too_many_devices_for_user", From c9db5a1f4f202b9903dd519a49fb019978524253 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 2 Dec 2022 11:17:22 +0000 Subject: [PATCH 4/5] Add cache to delete devices to linearize deletes --- synapse/storage/databases/main/devices.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index ae9e340ac309..66d4d626b824 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -1708,7 +1708,15 @@ async def store_device( ) raise StoreError(500, "Problem storing device.") - async def delete_devices(self, user_id: str, device_ids: Collection[str]) -> None: + @cached(max_entries=0) + async def delete_device(self, user_id: str, device_id: str) -> None: + raise NotImplementedError() + + # Note: sometimes deleting rows out of `device_inbox` can take a long time, + # so we use a cache so that we deduplicate in flight requests to delete + # devices. + @cachedList(cached_method_name="delete_device", list_name="device_ids") + async def delete_devices(self, user_id: str, device_ids: Collection[str]) -> dict: """Deletes several devices. Args: @@ -1745,6 +1753,8 @@ def _delete_devices_txn(txn: LoggingTransaction) -> None: for device_id in device_ids: self.device_id_exists_cache.invalidate((user_id, device_id)) + return {} + async def update_device( self, user_id: str, device_id: str, new_display_name: Optional[str] = None ) -> None: From 838380c275c18c5ae293c82812d23752ce0fe819 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 2 Dec 2022 11:21:30 +0000 Subject: [PATCH 5/5] Newsfile --- changelog.d/{14038.misc => 14595.misc} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename changelog.d/{14038.misc => 14595.misc} (100%) diff --git a/changelog.d/14038.misc b/changelog.d/14595.misc similarity index 100% rename from changelog.d/14038.misc rename to changelog.d/14595.misc