Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Fix leaking per-room profiles for local users when rebuilding directory #10761

1 change: 1 addition & 0 deletions changelog.d/10761.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix leaking per-room nicknames and avatars to the user directory for local users when the user directory is rebuilt.
10 changes: 9 additions & 1 deletion docs/user_directory.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,15 @@ flush the current tables and regenerate the directory.
Data model
----------

There are five relevant tables:
There are five relevant tables that collectively form the "user directory".
Three of them track a master list of all the users we could search for.
The last two (collectively called the "search tables") track who can
see who.

From all of these tables we exclude three types of local user:
- "support users"
DMRobertson marked this conversation as resolved.
Show resolved Hide resolved
- appservice users
- deactivated users

* `user_directory`. This contains the user_id, display name and avatar we'll
return when you search the directory.
Expand Down
41 changes: 13 additions & 28 deletions synapse/handlers/user_directory.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,13 +114,8 @@ async def handle_local_profile_change(
"""
# FIXME(#3714): We should probably do this in the same worker as all
# the other changes.

# Support users are for diagnostics and should not appear in the user directory.
is_support = await self.store.is_support_user(user_id)
# When change profile information of deactivated user it should not appear in the user directory.
is_deactivated = await self.store.get_user_deactivated_status(user_id)

if not (is_support or is_deactivated):
excluded = await self.store.exclude_from_user_dir(user_id)
if not excluded:
await self.store.update_profile_in_user_dir(
user_id, profile.display_name, profile.avatar_url
)
Expand Down Expand Up @@ -201,7 +196,7 @@ async def _handle_delta(self, delta: Dict[str, Any]) -> None:
room_id, prev_event_id, event_id, typ
)
elif typ == EventTypes.Member:
if await self._user_omitted_from_directory(state_key):
if await self.store.exclude_from_user_dir(state_key):
return

joined = await self._get_key_change(
Expand Down Expand Up @@ -258,16 +253,6 @@ async def _handle_delta(self, delta: Dict[str, Any]) -> None:
else:
logger.debug("Ignoring irrelevant type: %r", typ)

async def _user_omitted_from_directory(self, user_id: str) -> bool:
"""We want to ignore events from "hidden" users who shouldn't be exposed
to real users."""
if await self.store.is_support_user(user_id):
return True
if self.store.get_if_app_services_interested_in_user(user_id):
return True

return False

async def _handle_room_publicity_change(
self,
room_id: str,
Expand Down Expand Up @@ -340,34 +325,34 @@ async def _handle_room_publicity_change(

async def _track_user_joined_room(self, room_id: str, user_id: str) -> None:
"""Someone's just joined a room. Add to `users_in_public_rooms` and
`users_who_share_private_rooms` if necessary."""
`users_who_share_private_rooms` if necessary.

The caller is responsible for ensuring that the given user may be
included in the user directory.
(See UserDirectoryStore.exclude_from_user_dir)
"""
is_public = await self.store.is_room_world_readable_or_publicly_joinable(
room_id
)
if is_public:
await self.store.add_users_in_public_rooms(room_id, (user_id,))
else:
other_users_in_room = await self.store.get_users_in_room(room_id)
users_in_room = await self.store.get_users_in_room(room_id)
other_users_in_room = [
other_user
for other_user in other_users_in_room
if not await self._user_omitted_from_directory(other_user)
user
for user in users_in_room
if user != user_id and not await self.store.exclude_from_user_dir(user)
]

to_insert = set()

# First, if they're our user then we need to update for every user
if self.is_mine_id(user_id):
for other_user_id in other_users_in_room:
if user_id == other_user_id:
continue

to_insert.add((user_id, other_user_id))

# Next we need to update for every local user in the room
for other_user_id in other_users_in_room:
if user_id == other_user_id:
continue
if self.is_mine_id(other_user_id):
to_insert.add((other_user_id, user_id))

Expand Down
74 changes: 45 additions & 29 deletions synapse/storage/databases/main/user_directory.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@

from synapse.api.constants import EventTypes, HistoryVisibility, JoinRules
from synapse.storage.database import DatabasePool, LoggingTransaction
from synapse.storage.databases.main.appservice import ApplicationServiceWorkerStore
from synapse.storage.databases.main.registration import RegistrationWorkerStore
from synapse.storage.databases.main.state import StateFilter
from synapse.storage.databases.main.state_deltas import StateDeltasStore
from synapse.storage.engines import PostgresEngine, Sqlite3Engine
Expand All @@ -28,7 +30,6 @@

logger = logging.getLogger(__name__)


TEMP_TABLE = "_temp_populate_user_directory"


Expand All @@ -37,7 +38,6 @@ class ProgressDict(TypedDict):


class UserDirectoryBackgroundUpdateStore(StateDeltasStore):

# How many records do we calculate before sending it to
# add_users_who_share_private_rooms?
SHARE_PRIVATE_WORKING_SET = 500
Expand Down Expand Up @@ -249,40 +249,34 @@ async def _populate_user_directory_process_single_room(self, room_id: str) -> No
if not is_in_room:
return

is_public = await self.is_room_world_readable_or_publicly_joinable(room_id)
# TODO: this will leak per-room profiles to the user directory.
# TODO: get_users_in_room_with_profiles returns per-room profiles. Leak!
users_with_profile = await self.get_users_in_room_with_profiles(room_id)

# Update each remote user in the user directory.
# (Entries for local users are managed by the UserDirectoryHandler
# and do not require us to peek at room state/events.)
users_with_profile = {
user_id: profile
for user_id, profile in users_with_profile.items()
if not await self.exclude_from_user_dir(user_id)
}

# Upsert a user_directory record for each remote user we see.
# (Local users are processed in `_populate_user_directory_users`.)
for user_id, profile in users_with_profile.items():
if self.hs.is_mine_id(user_id):
continue
await self.update_profile_in_user_dir(
user_id, profile.display_name, profile.avatar_url
)

to_insert = set()

# Now update the room sharing tables to include this room.
is_public = await self.is_room_world_readable_or_publicly_joinable(room_id)
if is_public:
for user_id in users_with_profile:
if self.get_if_app_services_interested_in_user(user_id):
continue

to_insert.add(user_id)

if to_insert:
await self.add_users_in_public_rooms(room_id, to_insert)
to_insert.clear()
if users_with_profile:
await self.add_users_in_public_rooms(room_id, users_with_profile.keys())
else:
to_insert = set()
for user_id in users_with_profile:
if not self.hs.is_mine_id(user_id):
continue

if self.get_if_app_services_interested_in_user(user_id):
continue

for other_user_id in users_with_profile:
if user_id == other_user_id:
continue
Expand All @@ -298,7 +292,6 @@ async def _populate_user_directory_process_single_room(self, room_id: str) -> No

if to_insert:
await self.add_users_who_share_private_room(room_id, to_insert)
to_insert.clear()

async def _populate_user_directory_process_users(
self, progress: ProgressDict, batch_size: int
Expand Down Expand Up @@ -343,10 +336,11 @@ def _get_next_batch(txn):
)

for user_id in users_to_work_on:
profile = await self.get_profileinfo(get_localpart_from_id(user_id))
await self.update_profile_in_user_dir(
user_id, profile.display_name, profile.avatar_url
)
if not await self.exclude_from_user_dir(user_id):
profile = await self.get_profileinfo(get_localpart_from_id(user_id))
DMRobertson marked this conversation as resolved.
Show resolved Hide resolved
await self.update_profile_in_user_dir(
user_id, profile.display_name, profile.avatar_url
)

# We've finished processing a user. Delete it from the table.
await self.db_pool.simple_delete_one(
Expand Down Expand Up @@ -521,17 +515,39 @@ async def get_user_in_directory(self, user_id: str) -> Optional[Dict[str, Any]]:
desc="get_user_in_directory",
)

async def update_user_directory_stream_pos(self, stream_id: int) -> None:
async def update_user_directory_stream_pos(self, stream_id: Optional[int]) -> None:
await self.db_pool.simple_update_one(
table="user_directory_stream_pos",
keyvalues={},
updatevalues={"stream_id": stream_id},
desc="update_user_directory_stream_pos",
)

async def exclude_from_user_dir(self, user_id: str) -> bool:
DMRobertson marked this conversation as resolved.
Show resolved Hide resolved
"""Certain classes of local user are omitted from the user directory.
Is this user one of them?
"""
if self.hs.is_mine_id(user_id):
# Support users are for diagnostics and should not appear in the user directory.
if await self.is_support_user(user_id):
return True

# Deactivated users aren't contactable, so should not appear in the user directory.
if await self.get_user_deactivated_status(user_id):
return True

# App service users aren't usually contactable, so exclude them.
if self.get_if_app_services_interested_in_user(user_id):
return True

return False

class UserDirectoryStore(UserDirectoryBackgroundUpdateStore):

class UserDirectoryStore(
UserDirectoryBackgroundUpdateStore,
ApplicationServiceWorkerStore,
RegistrationWorkerStore,
Comment on lines +544 to +546
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where did this come from/Can you explain? (sounds sarcastic — it's not. I just wonder if this is what we're meant to do. I guess the store is one of those big mixins we were talking about [finally clicked in my head]. I don't know enough about Python multi-inheritance rules — do we do this anywhere else? Will it cause problems?)

Copy link
Contributor Author

@DMRobertson DMRobertson Sep 7, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess the store is one of those big mixins we were talking about [finally clicked in my head].

DataStore is a class with a lot of parents; dunno if we'd consider that a mixin. (I think of a mixin as something that's designed to be used with multiple inheritance, but isn't really a standalone Thing/Entity by itself.)

But yeah, the DataStore inherits from 9001 other Stores but... they're not all independent. Sometimes they call each other's methods.

Being honest, my motivation is

I just wonder if this is what we're meant to do.

erm, pass. I think the status quo is "yeah it sucks, and @erikjohnston has a plan how we could do things better one day"

I don't know enough about Python multi-inheritance rules — do we do this anywhere else? Will it cause problems?)

I had to be careful to order the parent classes, or else Python will fail to create a class because it can't build a consistent MRO.

Will it cause problems?

Was hoping to rely on unit tests to figure that out. Would appreciate input from the rest of the team here.

Having looked over it, I think this these classes get pulled in by the UserDirectoryBackgroundUpdateStore, not the UserDirectoryStore.

):
# How many records do we calculate before sending it to
# add_users_who_share_private_rooms?
SHARE_PRIVATE_WORKING_SET = 500
Expand Down
Loading