Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Commit

Permalink
Add some miscellaneous comments around sync (#13474)
Browse files Browse the repository at this point in the history
Add some miscellaneous comments to document sync, especially around
`compute_state_delta`.

Signed-off-by: Sean Quah <seanq@matrix.org>

Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
  • Loading branch information
squahtx and richvdh authored Aug 10, 2022
1 parent 1b09b08 commit 51c01d4
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 40 deletions.
1 change: 1 addition & 0 deletions changelog.d/13474.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add some miscellaneous comments to document sync, especially around `compute_state_delta`.
116 changes: 78 additions & 38 deletions synapse/handlers/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,17 @@
# limitations under the License.
import itertools
import logging
from typing import TYPE_CHECKING, Any, Dict, FrozenSet, List, Optional, Set, Tuple
from typing import (
TYPE_CHECKING,
Any,
Dict,
FrozenSet,
List,
Optional,
Sequence,
Set,
Tuple,
)

import attr
from prometheus_client import Counter
Expand Down Expand Up @@ -89,7 +99,7 @@ class SyncConfig:
@attr.s(slots=True, frozen=True, auto_attribs=True)
class TimelineBatch:
prev_batch: StreamToken
events: List[EventBase]
events: Sequence[EventBase]
limited: bool
# A mapping of event ID to the bundled aggregations for the above events.
# This is only calculated if limited is true.
Expand Down Expand Up @@ -852,24 +862,35 @@ async def compute_state_delta(
now_token: StreamToken,
full_state: bool,
) -> MutableStateMap[EventBase]:
"""Works out the difference in state between the start of the timeline
and the previous sync.
"""Works out the difference in state between the end of the previous sync and
the start of the timeline.
Args:
room_id:
batch: The timeline batch for the room that will be sent to the user.
sync_config:
since_token: Token of the end of the previous batch. May be None.
since_token: Token of the end of the previous batch. May be `None`.
now_token: Token of the end of the current batch.
full_state: Whether to force returning the full state.
`lazy_load_members` still applies when `full_state` is `True`.
Returns:
The state to return in the sync response for the room.
Clients will overlay this onto the state at the end of the previous sync to
arrive at the state at the start of the timeline.
Clients will then overlay state events in the timeline to arrive at the
state at the end of the timeline, in preparation for the next sync.
"""
# TODO(mjark) Check if the state events were received by the server
# after the previous sync, since we need to include those state
# updates even if they occurred logically before the previous event.
# TODO(mjark) Check for new redactions in the state events.

with Measure(self.clock, "compute_state_delta"):

# The memberships needed for events in the timeline.
# Only calculated when `lazy_load_members` is on.
members_to_fetch = None

lazy_load_members = sync_config.filter_collection.lazy_load_members()
Expand Down Expand Up @@ -897,38 +918,46 @@ async def compute_state_delta(
else:
state_filter = StateFilter.all()

# The contribution to the room state from state events in the timeline.
# Only contains the last event for any given state key.
timeline_state = {
(event.type, event.state_key): event.event_id
for event in batch.events
if event.is_state()
}

# Now calculate the state to return in the sync response for the room.
# This is more or less the change in state between the end of the previous
# sync's timeline and the start of the current sync's timeline.
# See the docstring above for details.
state_ids: StateMap[str]

if full_state:
if batch:
current_state_ids = (
state_at_timeline_end = (
await self._state_storage_controller.get_state_ids_for_event(
batch.events[-1].event_id, state_filter=state_filter
)
)

state_ids = (
state_at_timeline_start = (
await self._state_storage_controller.get_state_ids_for_event(
batch.events[0].event_id, state_filter=state_filter
)
)

else:
current_state_ids = await self.get_state_at(
state_at_timeline_end = await self.get_state_at(
room_id, stream_position=now_token, state_filter=state_filter
)

state_ids = current_state_ids
state_at_timeline_start = state_at_timeline_end

state_ids = _calculate_state(
timeline_contains=timeline_state,
timeline_start=state_ids,
previous={},
current=current_state_ids,
timeline_start=state_at_timeline_start,
timeline_end=state_at_timeline_end,
previous_timeline_end={},
lazy_load_members=lazy_load_members,
)
elif batch.limited:
Expand Down Expand Up @@ -968,24 +997,23 @@ async def compute_state_delta(
)

if batch:
current_state_ids = (
state_at_timeline_end = (
await self._state_storage_controller.get_state_ids_for_event(
batch.events[-1].event_id, state_filter=state_filter
)
)
else:
# Its not clear how we get here, but empirically we do
# (#5407). Logging has been added elsewhere to try and
# figure out where this state comes from.
current_state_ids = await self.get_state_at(
# We can get here if the user has ignored the senders of all
# the recent events.
state_at_timeline_end = await self.get_state_at(
room_id, stream_position=now_token, state_filter=state_filter
)

state_ids = _calculate_state(
timeline_contains=timeline_state,
timeline_start=state_at_timeline_start,
previous=state_at_previous_sync,
current=current_state_ids,
timeline_end=state_at_timeline_end,
previous_timeline_end=state_at_previous_sync,
# we have to include LL members in case LL initial sync missed them
lazy_load_members=lazy_load_members,
)
Expand All @@ -1010,6 +1038,13 @@ async def compute_state_delta(
),
)

# At this point, if `lazy_load_members` is enabled, `state_ids` includes
# the memberships of all event senders in the timeline. This is because we
# may not have sent the memberships in a previous sync.

# When `include_redundant_members` is on, we send all the lazy-loaded
# memberships of event senders. Otherwise we make an effort to limit the set
# of memberships we send to those that we have not already sent to this client.
if lazy_load_members and not include_redundant_members:
cache_key = (sync_config.user.to_string(), sync_config.device_id)
cache = self.get_lazy_loaded_members_cache(cache_key)
Expand Down Expand Up @@ -2216,54 +2251,59 @@ def _action_has_highlight(actions: List[JsonDict]) -> bool:
def _calculate_state(
timeline_contains: StateMap[str],
timeline_start: StateMap[str],
previous: StateMap[str],
current: StateMap[str],
timeline_end: StateMap[str],
previous_timeline_end: StateMap[str],
lazy_load_members: bool,
) -> StateMap[str]:
"""Works out what state to include in a sync response.
Args:
timeline_contains: state in the timeline
timeline_start: state at the start of the timeline
previous: state at the end of the previous sync (or empty dict
timeline_end: state at the end of the timeline
previous_timeline_end: state at the end of the previous sync (or empty dict
if this is an initial sync)
current: state at the end of the timeline
lazy_load_members: whether to return members from timeline_start
or not. assumes that timeline_start has already been filtered to
include only the members the client needs to know about.
"""
event_id_to_key = {
e: key
for key, e in itertools.chain(
event_id_to_state_key = {
event_id: state_key
for state_key, event_id in itertools.chain(
timeline_contains.items(),
previous.items(),
timeline_start.items(),
current.items(),
timeline_end.items(),
previous_timeline_end.items(),
)
}

c_ids = set(current.values())
ts_ids = set(timeline_start.values())
p_ids = set(previous.values())
tc_ids = set(timeline_contains.values())
timeline_end_ids = set(timeline_end.values())
timeline_start_ids = set(timeline_start.values())
previous_timeline_end_ids = set(previous_timeline_end.values())
timeline_contains_ids = set(timeline_contains.values())

# If we are lazyloading room members, we explicitly add the membership events
# for the senders in the timeline into the state block returned by /sync,
# as we may not have sent them to the client before. We find these membership
# events by filtering them out of timeline_start, which has already been filtered
# to only include membership events for the senders in the timeline.
# In practice, we can do this by removing them from the p_ids list,
# which is the list of relevant state we know we have already sent to the client.
# In practice, we can do this by removing them from the previous_timeline_end_ids
# list, which is the list of relevant state we know we have already sent to the
# client.
# see https://github.com/matrix-org/synapse/pull/2970/files/efcdacad7d1b7f52f879179701c7e0d9b763511f#r204732809

if lazy_load_members:
p_ids.difference_update(
previous_timeline_end_ids.difference_update(
e for t, e in timeline_start.items() if t[0] == EventTypes.Member
)

state_ids = ((c_ids | ts_ids) - p_ids) - tc_ids
state_ids = (
(timeline_end_ids | timeline_start_ids)
- previous_timeline_end_ids
- timeline_contains_ids
)

return {event_id_to_key[e]: e for e in state_ids}
return {event_id_to_state_key[e]: e for e in state_ids}


@attr.s(slots=True, auto_attribs=True)
Expand Down
4 changes: 2 additions & 2 deletions synapse/visibility.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ async def filter_events_for_client(
* the user is not currently a member of the room, and:
* the user has not been a member of the room since the given
events
always_include_ids: set of event ids to specifically
include (unless sender is ignored)
always_include_ids: set of event ids to specifically include, if present
in events (unless sender is ignored)
filter_send_to_client: Whether we're checking an event that's going to be
sent to a client. This might not always be the case since this function can
also be called to check whether a user can see the state at a given point.
Expand Down

0 comments on commit 51c01d4

Please sign in to comment.