From f7630387937e5ca139bc2e56d594067d5308067b Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 28 Mar 2022 23:28:55 -0500 Subject: [PATCH 1/6] Explain how to decipher live and historic pagination tokens Fix https://github.com/matrix-org/synapse/issues/12305 --- synapse/types.py | 88 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 80 insertions(+), 8 deletions(-) diff --git a/synapse/types.py b/synapse/types.py index 5ce2a5b0a5ee..15112d4bd151 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -421,22 +421,92 @@ class RoomStreamToken: s0 s1 | | - [0] V [1] V [2] + [0] ▼ [1] ▼ [2] Tokens can either be a point in the live event stream or a cursor going through historic events. - When traversing the live event stream events are ordered by when they + When traversing the live event stream, events are ordered by when they arrived at the homeserver. - When traversing historic events the events are ordered by their depth in - the event graph "topological_ordering" and then by when they arrived at the - homeserver "stream_ordering". + When traversing historic events, events are first ordered by their + "depth" ("topological_ordering" in the event graph) and tie-broken by + "stream_ordering" (when the event arrived at the homeserver). + + --- Live tokens start with an "s" followed by the "stream_ordering" id of the - event it comes after. Historic tokens start with a "t" followed by the - "topological_ordering" id of the event it comes after, followed by "-", - followed by the "stream_ordering" id of the event it comes after. + event that comes after. The rest of the keys are joined together by + underscores in the following order and represent the position of various + data. + + ex. `s2633508_17_338_6732159_1082514_541479_274711_265584_1` + 1. `room_key`: `s2633508` -> `2633508` `stream_ordering` + 2. `presence_key`: `17` + 3. `typing_key`: `338` + 4. `receipt_key`: `6732159` + 5. `account_data_key`: `1082514` + 6. `push_rules_key`: `541479` + 7. `to_device_key`: `274711` + 8. `device_list_key`: `265584` + 9. `groups_key`: `1` + + For example, you can see how many of these keys correspond to the various + fields in the "/sync" response: + ``` + { + "next_batch": "s12_4_0_1_1_1_1_4_1", + "presence": { + "events": [] + }, + "device_lists": { + "changed": [] + }, + "rooms": { + "join": { + "!QrZlfIDQLNLdZHqTnt:hs1": { + "timeline": { + "events": [], + "prev_batch": "s10_4_0_1_1_1_1_4_1", + "limited": false + }, + "state": { + "events": [] + }, + "account_data": { + "events": [] + }, + "ephemeral": { + "events": [] + } + } + } + } + } + ``` + + --- + + Historic tokens start with a "t" followed by the "depth" + ("topological_ordering" in the event graph) of the event that comes after, + followed by "-", followed by the "stream_ordering" id of the event it comes + after along with rest of the same keys from the live tokens. + + You will see this type of token when using the "/messages" endpoint. + + ex. `t426-2633508_17_338_6732159_1082514_541479_274711_265584_1` + 1. `topological_ordering`: t426 -> `426` `depth` + 2. `room_key`: `2633508` `stream_ordering` + 3. `presence_key`: `17` + 4. `typing_key`: `338` + 5. `receipt_key`: `6732159` + 6. `account_data_key`: `1082514` + 7. `push_rules_key`: `541479` + 8. `to_device_key`: `274711` + 9. `device_list_key`: `265584` + 10. `groups_key`: `1` + + --- There is also a third mode for live tokens where the token starts with "m", which is sometimes used when using sharded event persisters. In this case @@ -460,6 +530,8 @@ class RoomStreamToken: commonly used instance names) are at positions 58 and 59 respectively, and all other instances are at position 56. + --- + Note: The `RoomStreamToken` cannot have both a topological part and an instance map. From 0a5bec2ca0ebcce8513d26c7f33b54721fdfeeee Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 28 Mar 2022 23:35:47 -0500 Subject: [PATCH 2/6] Add changelog --- changelog.d/12317.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/12317.misc diff --git a/changelog.d/12317.misc b/changelog.d/12317.misc new file mode 100644 index 000000000000..1dfee496d81f --- /dev/null +++ b/changelog.d/12317.misc @@ -0,0 +1 @@ +Update docstrings to explain how to decipher live and historic pagination tokens. From 1488f7073c4d47d3154efc84f5243f2b0e58b080 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 1 Apr 2022 17:32:51 -0500 Subject: [PATCH 3/6] Split key breakdown info out to StreamToken Addressing review from: - https://github.com/matrix-org/synapse/pull/12317#discussion_r838601965 - https://github.com/matrix-org/synapse/pull/12317#discussion_r838504262 --- synapse/types.py | 145 +++++++++++++++++++++++------------------------ 1 file changed, 71 insertions(+), 74 deletions(-) diff --git a/synapse/types.py b/synapse/types.py index 15112d4bd151..a51fd6c01c0b 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -426,85 +426,32 @@ class RoomStreamToken: Tokens can either be a point in the live event stream or a cursor going through historic events. - When traversing the live event stream, events are ordered by when they - arrived at the homeserver. + When traversing the live event stream, events are ordered by + `stream_ordering` (when they arrived at the homeserver). - When traversing historic events, events are first ordered by their - "depth" ("topological_ordering" in the event graph) and tie-broken by - "stream_ordering" (when the event arrived at the homeserver). + When traversing historic events, events are first ordered by their `depth` + (`topological_ordering` in the event graph) and tie-broken by + `stream_ordering` (when the event arrived at the homeserver). --- - Live tokens start with an "s" followed by the "stream_ordering" id of the - event that comes after. The rest of the keys are joined together by - underscores in the following order and represent the position of various - data. + Live tokens start with an "s" followed by the `stream_ordering` of the event + that comes before the position of the token. Said another way: + `stream_ordering` uniquely identifies a persisted event. The live token + means "the position just after the event identified by `stream_ordering`". + An example token is: - ex. `s2633508_17_338_6732159_1082514_541479_274711_265584_1` - 1. `room_key`: `s2633508` -> `2633508` `stream_ordering` - 2. `presence_key`: `17` - 3. `typing_key`: `338` - 4. `receipt_key`: `6732159` - 5. `account_data_key`: `1082514` - 6. `push_rules_key`: `541479` - 7. `to_device_key`: `274711` - 8. `device_list_key`: `265584` - 9. `groups_key`: `1` - - For example, you can see how many of these keys correspond to the various - fields in the "/sync" response: - ``` - { - "next_batch": "s12_4_0_1_1_1_1_4_1", - "presence": { - "events": [] - }, - "device_lists": { - "changed": [] - }, - "rooms": { - "join": { - "!QrZlfIDQLNLdZHqTnt:hs1": { - "timeline": { - "events": [], - "prev_batch": "s10_4_0_1_1_1_1_4_1", - "limited": false - }, - "state": { - "events": [] - }, - "account_data": { - "events": [] - }, - "ephemeral": { - "events": [] - } - } - } - } - } - ``` + s2633508 --- - Historic tokens start with a "t" followed by the "depth" - ("topological_ordering" in the event graph) of the event that comes after, - followed by "-", followed by the "stream_ordering" id of the event it comes - after along with rest of the same keys from the live tokens. - - You will see this type of token when using the "/messages" endpoint. - - ex. `t426-2633508_17_338_6732159_1082514_541479_274711_265584_1` - 1. `topological_ordering`: t426 -> `426` `depth` - 2. `room_key`: `2633508` `stream_ordering` - 3. `presence_key`: `17` - 4. `typing_key`: `338` - 5. `receipt_key`: `6732159` - 6. `account_data_key`: `1082514` - 7. `push_rules_key`: `541479` - 8. `to_device_key`: `274711` - 9. `device_list_key`: `265584` - 10. `groups_key`: `1` + Historic tokens start with a "t" followed by the `depth` + (`topological_ordering` in the event graph) of the event that comes before + the position of the token, followed by "-", followed by the + `stream_ordering` of the event it comes after along with rest of the same + keys from the live tokens. An example token is: + + t426-2633508 --- @@ -530,11 +477,11 @@ class RoomStreamToken: commonly used instance names) are at positions 58 and 59 respectively, and all other instances are at position 56. - --- - Note: The `RoomStreamToken` cannot have both a topological part and an instance map. + --- + For caching purposes, `RoomStreamToken`s and by extension, all their attributes, must be hashable. """ @@ -671,7 +618,57 @@ async def to_string(self, store: "DataStore") -> str: @attr.s(slots=True, frozen=True, auto_attribs=True) class StreamToken: - """A collection of positions within multiple streams. + """A collection of keys joined together by underscores in the following + order and represent the position in their respective streams. + + ex. `s2633508_17_338_6732159_1082514_541479_274711_265584_1` + 1. `room_key`: `s2633508` which is a `RoomStreamToken` + - `RoomStreamToken`'s can also look like `t426-2633508` or `m56~2.58~3.59` + - See the docstring for `RoomStreamToken` for more details. + 2. `presence_key`: `17` + 3. `typing_key`: `338` + 4. `receipt_key`: `6732159` + 5. `account_data_key`: `1082514` + 6. `push_rules_key`: `541479` + 7. `to_device_key`: `274711` + 8. `device_list_key`: `265584` + 9. `groups_key`: `1` + + You can see how many of these keys correspond to the various + fields in a "/sync" response: + ```json + { + "next_batch": "s12_4_0_1_1_1_1_4_1", + "presence": { + "events": [] + }, + "device_lists": { + "changed": [] + }, + "rooms": { + "join": { + "!QrZlfIDQLNLdZHqTnt:hs1": { + "timeline": { + "events": [], + "prev_batch": "s10_4_0_1_1_1_1_4_1", + "limited": false + }, + "state": { + "events": [] + }, + "account_data": { + "events": [] + }, + "ephemeral": { + "events": [] + } + } + } + } + } + ``` + + --- For caching purposes, `StreamToken`s and by extension, all their attributes, must be hashable. From 2757be736efb14e8b123c7283f2da53be46bdb55 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 1 Apr 2022 17:39:04 -0500 Subject: [PATCH 4/6] Add cross-reference to point people from RoomStreamToken to StreamToken for more info --- synapse/types.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/synapse/types.py b/synapse/types.py index a51fd6c01c0b..b4319c8a7193 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -433,6 +433,11 @@ class RoomStreamToken: (`topological_ordering` in the event graph) and tie-broken by `stream_ordering` (when the event arrived at the homeserver). + If you're looking for more info about what a token with all of the + underscores means, ex. + `s2633508_17_338_6732159_1082514_541479_274711_265584_1`, see the docstring + for `StreamToken` below. + --- Live tokens start with an "s" followed by the `stream_ordering` of the event From 5b2d216794f9d68ac211508a91ddd4dd66e6b777 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 4 Apr 2022 14:45:06 -0500 Subject: [PATCH 5/6] Update synapse/types.py Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> --- synapse/types.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/types.py b/synapse/types.py index b4319c8a7193..0d612cc61cda 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -453,8 +453,8 @@ class RoomStreamToken: Historic tokens start with a "t" followed by the `depth` (`topological_ordering` in the event graph) of the event that comes before the position of the token, followed by "-", followed by the - `stream_ordering` of the event it comes after along with rest of the same - keys from the live tokens. An example token is: + `stream_ordering` of the event that comes before the position of the token. + An example token is: t426-2633508 From 99732cbf8588e8bed2428a354aecb70bb205dd24 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 4 Apr 2022 14:47:21 -0500 Subject: [PATCH 6/6] Update synapse/types.py Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> --- synapse/types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/types.py b/synapse/types.py index 0d612cc61cda..91b7338f05a0 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -624,7 +624,7 @@ async def to_string(self, store: "DataStore") -> str: @attr.s(slots=True, frozen=True, auto_attribs=True) class StreamToken: """A collection of keys joined together by underscores in the following - order and represent the position in their respective streams. + order and which represent the position in their respective streams. ex. `s2633508_17_338_6732159_1082514_541479_274711_265584_1` 1. `room_key`: `s2633508` which is a `RoomStreamToken`