Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Draft: /messages investigation scratch pad1 #13440

Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
522c29b
Instrument /messages for understandable traces
MadLittleMods Jul 23, 2022
b6a18d2
Trace in Complement
MadLittleMods Aug 3, 2022
2504bc6
Merge branch 'madlittlemods/instrument-messages-tracing' into madlitt…
MadLittleMods Aug 3, 2022
9cd6320
Fix imports after OTEL changes
MadLittleMods Aug 3, 2022
c3f3e59
Merge branch 'madlittlemods/11850-migrate-to-opentelemetry' into madl…
MadLittleMods Aug 3, 2022
9f69182
Move Twisted git install where it was before
MadLittleMods Aug 3, 2022
2f75287
Fix @tag_args being one-off (ahead)
MadLittleMods Aug 3, 2022
fdce1c2
Allow @trace and @tag_args to be used together
MadLittleMods Aug 4, 2022
a7eabb7
Trace more
MadLittleMods Aug 4, 2022
13855c5
More tracing for federated side
MadLittleMods Aug 6, 2022
552b7f1
More tracing for federation
MadLittleMods Aug 6, 2022
c51883e
Add length to the list of events
MadLittleMods Aug 6, 2022
ee465f9
Fix some lints (mistakes) and better trace when fetching events
MadLittleMods Aug 6, 2022
aa5e925
Only set attribute if going forward
MadLittleMods Aug 9, 2022
2a467fd
Merge branch 'madlittlemods/11850-migrate-to-opentelemetry' into madl…
MadLittleMods Aug 9, 2022
597c3f2
Trace some results
MadLittleMods Aug 9, 2022
f4ec9d1
Instrument FederationStateIdsServlet
MadLittleMods Aug 10, 2022
898ba0e
More tracing
MadLittleMods Aug 11, 2022
53b8453
Refactor from feedback
MadLittleMods Aug 15, 2022
0f2bfa4
Merge branch 'madlittlemods/11850-migrate-to-opentelemetry' into madl…
MadLittleMods Aug 18, 2022
db04b16
Some cleanup
MadLittleMods Aug 18, 2022
4168ba5
Remove debug logs
MadLittleMods Aug 19, 2022
05e5113
Merge branch 'madlittlemods/11850-migrate-to-opentelemetry' into madl…
MadLittleMods Sep 20, 2022
d8899e4
Merge branch 'madlittlemods/11850-migrate-to-opentelemetry' into madl…
MadLittleMods Sep 26, 2022
04de9ea
Merge branch 'madlittlemods/11850-migrate-to-opentelemetry' into madl…
MadLittleMods Nov 18, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions docker/conf/homeserver.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -184,3 +184,21 @@ trusted_key_servers:

password_config:
MadLittleMods marked this conversation as resolved.
Show resolved Hide resolved
enabled: true


# foo
tracing:
enabled: true
sample_rate: 1
jaeger_exporter_config:
agent_host_name: host.docker.internal
agent_port: 6831
# Split UDP packets (UDP_PACKET_MAX_LENGTH is set to 65k in OpenTelemetry)
udp_split_oversized_batches: true
# If you define a collector, it will communicate directly to the collector,
# bypassing the agent
#
# It does not seem like the agent can keep up with the massive UDP load
# (1065 spans in one trace) so lets just use the HTTP collector endpoint
# instead which seems to work.
Comment on lines +203 to +205
Copy link
Contributor Author

@MadLittleMods MadLittleMods Aug 3, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder why this is the case? I was seeing this same behavior with the Jaeger opentracing stuff. Is the UDP connection being over saturated? Can the Jaeger agent in Docker not keep up? We see some spans come over but never the main servlet overarching one that is probably the last to be exported.

But using the HTTP Jaeger collector endpoint seems to work fine for getting the whole trace.

collector_endpoint: "http://host.docker.internal:14268/api/traces?format=jaeger.thrift"
8 changes: 7 additions & 1 deletion synapse/api/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,12 @@
from synapse.appservice import ApplicationService
from synapse.http import get_request_user_agent
from synapse.http.site import SynapseRequest
from synapse.logging.tracing import force_tracing, get_active_span, start_active_span
from synapse.logging.tracing import (
force_tracing,
get_active_span,
start_active_span,
trace,
)
from synapse.storage.databases.main.registration import TokenLookupResult
from synapse.types import Requester, UserID, create_requester

Expand Down Expand Up @@ -566,6 +571,7 @@ def get_access_token_from_request(request: Request) -> str:

return query_params[0].decode("ascii")

@trace
async def check_user_in_room_or_world_readable(
self, room_id: str, user_id: str, allow_departed_users: bool = False
) -> Tuple[str, Optional[str]]:
Expand Down
2 changes: 2 additions & 0 deletions synapse/federation/federation_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
)
from synapse.federation.transport.client import SendJoinResponse
from synapse.http.types import QueryParams
from synapse.logging.tracing import trace
from synapse.types import JsonDict, UserID, get_domain_from_id
from synapse.util.async_helpers import concurrently_execute
from synapse.util.caches.expiringcache import ExpiringCache
Expand Down Expand Up @@ -233,6 +234,7 @@ async def claim_client_keys(
destination, content, timeout
)

@trace
async def backfill(
self, dest: str, room_id: str, limit: int, extremities: Collection[str]
) -> Optional[List[EventBase]]:
Expand Down
2 changes: 2 additions & 0 deletions synapse/handlers/federation.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
from synapse.federation.federation_client import InvalidResponseError
from synapse.http.servlet import assert_params_in_dict
from synapse.logging.context import nested_logging_context
from synapse.logging.tracing import trace
from synapse.metrics.background_process_metrics import run_as_background_process
from synapse.module_api import NOT_SPAM
from synapse.replication.http.federation import (
Expand Down Expand Up @@ -180,6 +181,7 @@ def __init__(self, hs: "HomeServer"):
"resume_sync_partial_state_room", self._resume_sync_partial_state_room
)

@trace
async def maybe_backfill(
self, room_id: str, current_depth: int, limit: int
) -> bool:
Expand Down
5 changes: 5 additions & 0 deletions synapse/handlers/federation_event.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
from synapse.events.snapshot import EventContext
from synapse.federation.federation_client import InvalidResponseError
from synapse.logging.context import nested_logging_context
from synapse.logging.tracing import trace
from synapse.metrics.background_process_metrics import run_as_background_process
from synapse.replication.http.devices import ReplicationUserDevicesResyncRestServlet
from synapse.replication.http.federation import (
Expand Down Expand Up @@ -566,6 +567,7 @@ async def update_state_for_partial_state_event(
event.event_id
)

@trace
async def backfill(
self, dest: str, room_id: str, limit: int, extremities: Collection[str]
) -> None:
Expand Down Expand Up @@ -610,6 +612,7 @@ async def backfill(
backfilled=True,
)

@trace
async def _get_missing_events_for_pdu(
self, origin: str, pdu: EventBase, prevs: Set[str], min_depth: int
) -> None:
Expand Down Expand Up @@ -710,6 +713,7 @@ async def _get_missing_events_for_pdu(
logger.info("Got %d prev_events", len(missing_events))
await self._process_pulled_events(origin, missing_events, backfilled=False)

@trace
async def _process_pulled_events(
self, origin: str, events: Iterable[EventBase], backfilled: bool
) -> None:
Expand Down Expand Up @@ -748,6 +752,7 @@ async def _process_pulled_events(
with nested_logging_context(ev.event_id):
await self._process_pulled_event(origin, ev, backfilled=backfilled)

@trace
async def _process_pulled_event(
self, origin: str, event: EventBase, backfilled: bool
) -> None:
Expand Down
2 changes: 2 additions & 0 deletions synapse/handlers/pagination.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from synapse.api.filtering import Filter
from synapse.events.utils import SerializeEventConfig
from synapse.handlers.room import ShutdownRoomResponse
from synapse.logging.tracing import trace
from synapse.metrics.background_process_metrics import run_as_background_process
from synapse.storage.state import StateFilter
from synapse.streams.config import PaginationConfig
Expand Down Expand Up @@ -416,6 +417,7 @@ async def purge_room(self, room_id: str, force: bool = False) -> None:

await self._storage_controllers.purge_events.purge_room(room_id)

@trace
async def get_messages(
self,
requester: Requester,
Expand Down
2 changes: 2 additions & 0 deletions synapse/handlers/relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from synapse.api.constants import RelationTypes
from synapse.api.errors import SynapseError
from synapse.events import EventBase, relation_from_event
from synapse.logging.tracing import trace
from synapse.storage.databases.main.relations import _RelatedEvent
from synapse.types import JsonDict, Requester, StreamToken, UserID
from synapse.visibility import filter_events_for_client
Expand Down Expand Up @@ -361,6 +362,7 @@ async def _get_threads_for_events(

return results

@trace
async def get_bundled_aggregations(
self, events: Iterable[EventBase], user_id: str
) -> Dict[str, BundledAggregations]:
Expand Down
2 changes: 1 addition & 1 deletion synapse/storage/controllers/persist_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -781,7 +781,7 @@ async def _calculate_new_extremities(
stale_forward_extremities_counter.observe(len(stale))

return result

async def _get_new_state_after_events(
self,
room_id: str,
Expand Down
5 changes: 5 additions & 0 deletions synapse/storage/controllers/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

from synapse.api.constants import EventTypes
from synapse.events import EventBase
from synapse.logging.tracing import trace
from synapse.storage.state import StateFilter
from synapse.storage.util.partial_state_events_tracker import (
PartialCurrentStateTracker,
Expand Down Expand Up @@ -179,6 +180,7 @@ def _get_state_groups_from_groups(

return self.stores.state._get_state_groups_from_groups(groups, state_filter)

@trace
async def get_state_for_events(
self, event_ids: Collection[str], state_filter: Optional[StateFilter] = None
) -> Dict[str, StateMap[EventBase]]:
Expand Down Expand Up @@ -225,6 +227,7 @@ async def get_state_for_events(

return {event: event_to_state[event] for event in event_ids}

@trace
async def get_state_ids_for_events(
self,
event_ids: Collection[str],
Expand Down Expand Up @@ -287,6 +290,7 @@ async def get_state_for_event(
)
return state_map[event_id]

@trace
async def get_state_ids_for_event(
self, event_id: str, state_filter: Optional[StateFilter] = None
) -> StateMap[str]:
Expand Down Expand Up @@ -327,6 +331,7 @@ def get_state_for_groups(
groups, state_filter or StateFilter.all()
)

@trace
async def get_state_group_for_events(
self,
event_ids: Collection[str],
Expand Down
2 changes: 2 additions & 0 deletions synapse/storage/databases/main/stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
from synapse.api.filtering import Filter
from synapse.events import EventBase
from synapse.logging.context import make_deferred_yieldable, run_in_background
from synapse.logging.tracing import trace
from synapse.storage._base import SQLBaseStore
from synapse.storage.database import (
DatabasePool,
Expand Down Expand Up @@ -1346,6 +1347,7 @@ def _paginate_room_events_txn(

return rows, next_token

@trace
async def paginate_room_events(
self,
room_id: str,
Expand Down
2 changes: 2 additions & 0 deletions synapse/visibility.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from synapse.events import EventBase
from synapse.events.snapshot import EventContext
from synapse.events.utils import prune_event
from synapse.logging.tracing import trace
from synapse.storage.controllers import StorageControllers
from synapse.storage.databases.main import DataStore
from synapse.storage.state import StateFilter
Expand Down Expand Up @@ -51,6 +52,7 @@
_HISTORY_VIS_KEY: Final[Tuple[str, str]] = (EventTypes.RoomHistoryVisibility, "")


@trace
async def filter_events_for_client(
storage: StorageControllers,
user_id: str,
Expand Down