From 13b335a7102bbbd6b305bc91ad2a7063789e6ffa Mon Sep 17 00:00:00 2001 From: Simonas Kazlauskas Date: Sat, 30 Apr 2022 21:49:00 +0300 Subject: [PATCH] Remove ability to disable the client at runtime One of the requirements we would need to maintain is that the profiler is started and shutdown in the same thread in all cases. This seems like a hard problem, so lets just not solve it for now. I don't think people will have significant issues with the profiler just being there until the process terminates (not to mention that kernel is probably much better at cleaning up everything than we are anyway). --- tracing-tracy/src/lib.rs | 6 +- tracy-client/benches/state.rs | 19 +--- tracy-client/src/frame.rs | 26 +++-- tracy-client/src/lib.rs | 10 +- tracy-client/src/plot.rs | 6 +- tracy-client/src/span.rs | 11 +- tracy-client/src/state.rs | 194 ++++++++++++---------------------- tracy-client/tests/loom.rs | 102 ++++++++++-------- tracy-client/tests/tests.rs | 10 +- 9 files changed, 169 insertions(+), 215 deletions(-) diff --git a/tracing-tracy/src/lib.rs b/tracing-tracy/src/lib.rs index c4abf80..47b61ff 100644 --- a/tracing-tracy/src/lib.rs +++ b/tracing-tracy/src/lib.rs @@ -16,7 +16,7 @@ //! * Events show up as messages in Tracy, however Tracy can struggle with large numbers of //! messages; //! * Some additional functionality such as plotting and memory allocation profiling is only -//! available as part of the [tracy-client](tracy_client) crate. +//! available as part of the [tracy-client](client) crate. //! //! # Examples //! @@ -86,7 +86,7 @@ impl TracyLayer { Self { fmt: DefaultFields::default(), stack_depth: 64, - client: Client::enable(), + client: Client::start(), } } } @@ -185,7 +185,7 @@ where }; TRACY_SPAN_STACK.with(|s| { s.borrow_mut().push_back(( - self.client.span_alloc( + self.client.clone().span_alloc( self.truncate_to_length( &name, file, diff --git a/tracy-client/benches/state.rs b/tracy-client/benches/state.rs index 7ef024f..d0d3c46 100644 --- a/tracy-client/benches/state.rs +++ b/tracy-client/benches/state.rs @@ -12,21 +12,10 @@ fn bench_clone(c: &mut Criterion) { c.bench_function("clone", |b| b.iter(|| clients.push(client.clone()))); } -fn bench_start_stop(c: &mut Criterion) { - c.bench_function("start_stop", |b| b.iter(|| Client::start())); +fn bench_running(c: &mut Criterion) { + let _client = Client::start(); + c.bench_function("running", |b| b.iter(|| Client::running())); } -fn bench_counting(c: &mut Criterion) { - let client = Client::start(); - c.bench_function("counting", |b| b.iter(|| Client::start())); - drop(client); -} - -criterion_group!( - benches, - bench_start, - bench_clone, - bench_start_stop, - bench_counting, -); +criterion_group!(benches, bench_start, bench_clone, bench_running,); criterion_main!(benches); diff --git a/tracy-client/src/frame.rs b/tracy-client/src/frame.rs index 955a5dd..7a6a2a0 100644 --- a/tracy-client/src/frame.rs +++ b/tracy-client/src/frame.rs @@ -19,14 +19,14 @@ impl Client { /// /// In a traditional rendering scenarios a frame mark should be inserted after a buffer swap. /// - /// ```no_run + /// ``` /// use tracy_client::Client; /// # fn swap_buffers() {} - /// let client = tracy_client::Client::enable(); + /// # let client = tracy_client::Client::start(); /// // loop { /// // ... /// swap_buffers(); - /// client.frame_mark(); + /// Client::running().expect("client must be running").frame_mark(); /// // } /// ``` pub fn frame_mark(&self) { @@ -43,14 +43,16 @@ impl Client { /// Much like with the primary frame mark, the secondary (named) frame mark should be inserted /// after some continuously repeating operation finishes one iteration of its processing. /// - /// ```no_run - /// use tracy_client::{Client, frame_name}; + /// ``` + /// use tracy_client::frame_name; /// # fn physics_tick() {} - /// let client = tracy_client::Client::enable(); + /// # let client = tracy_client::Client::start(); /// // loop { /// // ... /// physics_tick(); - /// client.secondary_frame_mark(frame_name!("physics")); + /// tracy_client::Client::running() + /// .expect("client must be running") + /// .secondary_frame_mark(frame_name!("physics")); /// // } /// ``` pub fn secondary_frame_mark(&self, name: FrameName) { @@ -67,10 +69,12 @@ impl Client { /// /// # Examples /// - /// ```no_run - /// use tracy_client::{Client, frame_name}; - /// let client = tracy_client::Client::enable(); - /// client.non_continuous_frame(frame_name!("a frame")); + /// ``` + /// use tracy_client::frame_name; + /// # let client = tracy_client::Client::start(); + /// tracy_client::Client::running() + /// .expect("client must be running") + /// .non_continuous_frame(frame_name!("a frame")); /// ``` pub fn non_continuous_frame(&self, name: FrameName) -> Frame { #[cfg(feature = "enable")] diff --git a/tracy-client/src/lib.rs b/tracy-client/src/lib.rs index ada0d31..dbfc699 100644 --- a/tracy-client/src/lib.rs +++ b/tracy-client/src/lib.rs @@ -95,6 +95,9 @@ pub mod internal { /// still hasn't been delivered to the profiler application. pub struct Client(()); +/// Instrumentation methods for outputting events occurring at a specific instant. +/// +/// Data provided by this instrumentation can largely be considered to be equivalent to logs. impl Client { /// Output a message. /// @@ -120,7 +123,9 @@ impl Client { sys::___tracy_emit_messageC(message.as_ptr().cast(), message.len(), rgba >> 8, depth) } } +} +impl Client { /// Set the current thread name to the provided value. pub fn set_thread_name(&self, name: &str) { #[cfg(feature = "enable")] @@ -136,8 +141,8 @@ impl Client { /// /// See documentation for [`std::alloc`](std::alloc) for more information about global allocators. /// -/// Note that to use this wrapper correctly you must ensure that the client is enabled before the -/// first allocation occurs. The client must not not be disabled if this wrapper is used. +/// Note that this wrapper will start up the client on the first allocation, if not enabled +/// already. /// /// # Examples /// @@ -160,6 +165,7 @@ impl ProfiledAllocator { fn emit_alloc(&self, ptr: *mut u8, size: usize) { #[cfg(feature = "enable")] unsafe { + Client::start(); if self.1 == 0 { sys::___tracy_emit_memory_alloc(ptr.cast(), size, 1); } else { diff --git a/tracy-client/src/plot.rs b/tracy-client/src/plot.rs index 5fca73b..97026ed 100644 --- a/tracy-client/src/plot.rs +++ b/tracy-client/src/plot.rs @@ -13,8 +13,10 @@ impl Client { /// # Examples /// /// ``` - /// let client = tracy_client::Client::enable(); - /// client.plot(tracy_client::plot_name!("temperature"), 37.0); + /// # let client = tracy_client::Client::start(); + /// tracy_client::Client::running() + /// .expect("client must be running") + /// .plot(tracy_client::plot_name!("temperature"), 37.0); /// ``` pub fn plot(&self, plot_name: PlotName, value: f64) { #[cfg(feature = "enable")] diff --git a/tracy-client/src/span.rs b/tracy-client/src/span.rs index 0215e44..0582701 100644 --- a/tracy-client/src/span.rs +++ b/tracy-client/src/span.rs @@ -18,7 +18,6 @@ pub struct Span { /// A statically allocated location information for a span. /// /// Construct with the [`span_location!`](crate::span_location) macro. -#[cfg(feature = "enable")] pub struct SpanLocation { #[cfg(feature = "enable")] pub(crate) _function_name: CString, @@ -41,6 +40,8 @@ impl Client { /// `callstack_depth` specifies the maximum number of stack frames the client should collect. /// On some systems this value may be clamped to a maximum value supported by the target. /// + /// The [`span!`](crate::span!) macro is a convenience wrapper over this method. + /// /// # Example /// /// In the following example the span is created with the location at which the @@ -48,7 +49,7 @@ impl Client { /// /// ```rust /// use tracy_client::{Client, span_location}; - /// let client = Client::enable(); + /// let client = Client::start(); /// { /// let _span = client.span(span_location!("sleeping"), 100); /// std::thread::sleep(std::time::Duration::from_millis(100)); @@ -92,7 +93,7 @@ impl Client { /// /// ```rust /// use tracy_client::Client; - /// let client = Client::enable(); + /// let client = Client::start(); /// { /// let _span = client.span_alloc("hello", "my_function", "hello.rs", 42, 100); /// std::thread::sleep(std::time::Duration::from_millis(100)); @@ -221,7 +222,8 @@ macro_rules! span_location { /// Begin a span region, which will be terminated once `_span` goes out of scope: /// /// ``` -/// use tracy_client::span; +/// use tracy_client::{Client, span}; +/// # let _client = tracy_client::Client::start(); /// let _span = span!("some span"); /// ``` /// @@ -229,6 +231,7 @@ macro_rules! span_location { /// /// ``` /// use tracy_client::span; +/// # let _client = tracy_client::Client::start(); /// let _span = span!("some span", 32); /// ``` #[macro_export] diff --git a/tracy-client/src/state.rs b/tracy-client/src/state.rs index 119ef96..3e0f55c 100644 --- a/tracy-client/src/state.rs +++ b/tracy-client/src/state.rs @@ -7,8 +7,8 @@ use std::sync::atomic::Ordering; /// just as many times without any reprecursions. At the very least this could significantly /// help tests. /// -/// In order to do this we want to track 4 states that construct a following finite state -/// machine +/// We can also try to implement something like this ourselves. To do this we'd want to track 4 +/// states that construct a following finite state machine: /// /// ```text /// 0 = disabled -> 1 = enabling @@ -19,38 +19,30 @@ use std::sync::atomic::Ordering; /// And also include a reference count somewhere in there. Something we can throw in a static /// would be ideal. /// -/// Sadly, I am not aware of any libraries which would make this easier, so rolling out our own -/// it is then! +/// Alas, Tracy's extensive use of thread-local storage presents us with another problem – we must +/// start up and shut down the client within the same thread. A most straightforward soution for +/// that would be to run a separate thread that would be dedicated entirely to just starting up and +/// shutting down the profiler. /// -/// Oh and it'll definitely involve spinning, getting some vertigo medication is advised. -/// -/// <...ommitted...> -/// -/// We use a single Atomic to store this information. The 2 top-most bits will represent -/// the state bits and the rest will act as a counter. +/// All that seems like a major pain to implement, and so we’ll punt on disabling entirely until +/// somebody comes with a good use-case warranting that sort of complexity. #[cfg(feature = "enable")] #[cfg(not(loom))] -static ENABLE_STATE: std::sync::atomic::AtomicUsize = std::sync::atomic::AtomicUsize::new(0); +static CLIENT_STATE: std::sync::atomic::AtomicUsize = std::sync::atomic::AtomicUsize::new(0); #[cfg(loom)] loom::lazy_static! { - static ref ENABLE_STATE: loom::sync::atomic::AtomicUsize = + static ref CLIENT_STATE: loom::sync::atomic::AtomicUsize = loom::sync::atomic::AtomicUsize::new(0); } #[cfg(feature = "enable")] -const REFCOUNT_MASK: usize = usize::max_value() >> 2; -#[cfg(feature = "enable")] -const STATE_STEP: usize = REFCOUNT_MASK + 1; // Move forward by 1 step in the FSM +const STATE_STEP: usize = 1; // Move forward by 1 step in the FSM #[cfg(feature = "enable")] const STATE_DISABLED: usize = 0; #[cfg(feature = "enable")] const STATE_ENABLING: usize = STATE_DISABLED + STATE_STEP; #[cfg(feature = "enable")] const STATE_ENABLED: usize = STATE_ENABLING + STATE_STEP; -#[cfg(feature = "enable")] -const STATE_DISABLING: usize = STATE_ENABLED + STATE_STEP; -#[cfg(feature = "enable")] -const STATE_MASK: usize = STATE_DISABLING; #[cfg(feature = "enable")] #[inline(always)] @@ -65,74 +57,67 @@ fn spin_loop() { impl Client { /// Start the client. /// - /// The client must be started with this function before any instrumentation may be invoked. - /// This function can be called multiple times to obtain multiple `Client` values. Doing so - /// will increase a counter indicating number of active client values. + /// The client must be started with this function before any instrumentation is invoked + /// anywhere in the process. This function can be called multiple times to obtain multiple + /// `Client` values. /// /// The underying client implementation will be started up only if it wasn't already running /// yet. /// - /// Note that for heavily contended invocations of this method and [`Client::close`], the - /// implementation is biased towards keeping the client running. + /// Note that there currently isn't a mechanism to stop the client once it has been started. /// /// # Example /// /// ```rust - /// let client = tracy_client::Client::enable(); + /// // fn main() { + /// let _client = tracy_client::Client::start(); /// // ... - /// drop(client); + /// // } /// ``` pub fn start() -> Self { #[cfg(feature = "enable")] { - let old_state = loop { - let result = - ENABLE_STATE.fetch_update(Ordering::Relaxed, Ordering::Relaxed, |state| { - // Here we want to increment the reference count, and also apply the - // tansition from state 0 (disabled) to state 1 (enabling), if possible. - match state & STATE_MASK { - STATE_DISABLED => Some(STATE_ENABLING + 1), - STATE_ENABLED | STATE_ENABLING => Some(state + 1), - // Wait for the ongoing disable to complete. - STATE_DISABLING => None, - _ => unreachable!(), + let mut old_state = CLIENT_STATE.load(Ordering::Relaxed); + loop { + match old_state { + STATE_ENABLED => return Client(()), + STATE_ENABLING => { + while !Self::is_running() { + spin_loop(); } - }); - if let Ok(result) = result { - break result; - } - spin_loop(); - }; - match old_state & STATE_MASK { - STATE_DISABLED => { - unsafe { - // SAFE: This function must not be called if the profiler has already - // been enabled. While in practice calling this function multiple times - // will only serve to trigger an assertion, we cannot exactly rely on this, - // since it is an undocumented behaviour and the upstream might very well - // just decide to invoke UB instead. In the case there are multiple copies - // of `tracy-client` this invariant is not actually maintained , but - // otherwise this is sound due to the `ENABLE_STATE` that we manage. - // - // TODO: we _could_ define `ENABLE_STATE` in the `-sys` crate... - sys::___tracy_startup_profiler(); - // `STATE_DISABLED` became `STATE_ENABLING` in the `fetch_update` loop - // above. Now that the profile has been started, step the state forward - // once more so that it becomes a `STATE_ENABLED`. - ENABLE_STATE.fetch_add(STATE_STEP, Ordering::Release); - Client(()) + return Client(()); } - } - STATE_ENABLING => { - // Something else is already enabling the profiler. Wait for that to finish. - while ENABLE_STATE.load(Ordering::Relaxed) & STATE_MASK != STATE_ENABLED { - spin_loop(); + STATE_DISABLED => { + let result = CLIENT_STATE.compare_exchange_weak( + old_state, + STATE_ENABLING, + Ordering::Relaxed, + Ordering::Relaxed, + ); + if let Err(next_old_state) = result { + old_state = next_old_state; + continue; + } else { + unsafe { + // SAFE: This function must not be called if the profiler has + // already been enabled. While in practice calling this function + // multiple times will only serve to trigger an assertion, we + // cannot exactly rely on this, since it is an undocumented + // behaviour and the upstream might very well just decide to invoke + // UB instead. In the case there are multiple copies of + // `tracy-client` this invariant is not actually maintained, but + // otherwise this is sound due to the `ENABLE_STATE` that we + // manage. + // + // TODO: we _could_ define `ENABLE_STATE` in the `-sys` crate... + sys::___tracy_startup_profiler(); + CLIENT_STATE.store(STATE_ENABLED, Ordering::Release); + return Client(()); + } + } } - Client(()) + _ => unreachable!(), } - STATE_ENABLED => Client(()), - // The `fetch_update` loop cannot terminate in a DISABLING state. - _ => unreachable!(), } } #[cfg(not(feature = "enable"))] @@ -140,72 +125,35 @@ impl Client { } /// Obtain a client handle, but only if the client is already running. + #[inline(always)] pub fn running() -> Option { - ENABLE_STATE - .fetch_update(Ordering::Relaxed, Ordering::Relaxed, |state| { - match state & STATE_MASK { - STATE_ENABLED => Some(state + 1), - STATE_DISABLING | STATE_ENABLING | STATE_DISABLED => None, - _ => unreachable!(), - } - }) - .ok() - .map(|_| Client(())) + if Self::is_running() { + Some(Client(())) + } else { + None + } } /// Is the client already running? + #[inline(always)] pub fn is_running() -> bool { - ENABLE_STATE.load(Ordering::Relaxed) & STATE_MASK == STATE_ENABLED + #[cfg(feature = "enable")] + return CLIENT_STATE.load(Ordering::Relaxed) == STATE_ENABLED; + #[cfg(not(feature = "enable"))] + return true; } } impl Clone for Client { /// A cheaper alternative to [`Client::start`] or [`Client::running`] when there is already a /// handle handy. + #[inline(always)] fn clone(&self) -> Self { - // We already know that the state is `ENABLED`, so simply increment the counter. - #[cfg(feature = "enable")] - ENABLE_STATE.fetch_add(1, Ordering::Relaxed); + // We already know that the state is `ENABLED`, no need to check. Client(()) } } -/// Relinquish this Client handle. -/// -/// If this is the last live handle, the client will be disabled. Once the client has been -/// disabled, no other calls to the instrumentation APIs may be made. Note that unloading the -/// client will also discard any data collected up to that point. -/// -/// When using threads, especially detached ones, consider never calling `close` on at least -/// one of the handles, or at least use a Client handle for each thread that may be detached. -impl Drop for Client { - fn drop(&mut self) { - #[cfg(feature = "enable")] - { - let mut disable = false; - let _ = ENABLE_STATE.fetch_update(Ordering::Relaxed, Ordering::Relaxed, |state| { - // This is only reachable if the state is currently `STATE_ENABLED` and there's - // at least 1 remaining reference count. - Some(if state & REFCOUNT_MASK == 1 { - disable = true; - STATE_DISABLING - } else { - disable = false; - state - 1 - }) - }); - if disable { - unsafe { - // SAFE: we made sure to ensure there won't be any more instrumentations - // from at least this library. - sys::___tracy_shutdown_profiler(); - } - ENABLE_STATE.store(0, Ordering::Release); - } - } - } -} - #[cfg(all(test, feature = "enable"))] mod test { use super::*; @@ -215,11 +163,5 @@ mod test { assert_eq!(0, STATE_DISABLED); assert_eq!(STATE_DISABLED.wrapping_add(STATE_STEP), STATE_ENABLING); assert_eq!(STATE_ENABLING.wrapping_add(STATE_STEP), STATE_ENABLED); - assert_eq!(STATE_ENABLED.wrapping_add(STATE_STEP), STATE_DISABLING); - assert_eq!(STATE_DISABLING.wrapping_add(STATE_STEP), STATE_DISABLED); - assert_eq!( - (STATE_DISABLED | STATE_ENABLING | STATE_ENABLED | STATE_DISABLING) & REFCOUNT_MASK, - 0 - ); } } diff --git a/tracy-client/tests/loom.rs b/tracy-client/tests/loom.rs index 2490c7d..ef1e0a8 100644 --- a/tracy-client/tests/loom.rs +++ b/tracy-client/tests/loom.rs @@ -1,60 +1,76 @@ #[cfg(loom)] -use loom::thread; -#[cfg(not(loom))] -use std::thread; -use tracy_client::Client; +mod loom { -fn model(f: F) -where - F: Fn() + Sync + Send + 'static, -{ - #[cfg(not(loom))] - { - f() - } - #[cfg(loom)] + use loom::thread; + use tracy_client::Client; + + fn model(f: F) + where + F: Fn() + Sync + Send + 'static, { - let mut builder = loom::model::Builder::new(); - builder.preemption_bound = Some(3); - builder.check(f) + #[cfg(not(loom))] + { + f() + } + #[cfg(loom)] + { + let mut builder = loom::model::Builder::new(); + builder.preemption_bound = Some(3); + builder.check(f) + } } -} -fn main() { - model(|| { - let client = Client::start(); - assert!(Client::is_running()); - drop(client); - }); + fn main() { + model(|| { + let client = Client::start(); + assert!(Client::is_running()); + drop(client); + unsafe { + ___tracy_shutdown_profiler(); + } + }); - model(|| { - let t1 = thread::spawn(|| { + model(|| { + let t1 = thread::spawn(|| { + let client = Client::start(); + assert!(Client::is_running()); + drop(client); + }); let client = Client::start(); assert!(Client::is_running()); drop(client); + t1.join().unwrap(); + unsafe { + ___tracy_shutdown_profiler(); + } }); - let client = Client::start(); - assert!(Client::is_running()); - drop(client); - t1.join().unwrap(); - }); - model(|| { - let t1 = thread::spawn(move || { + + model(|| { + let t1 = thread::spawn(move || { + let client = Client::start(); + assert!(Client::is_running()); + let client2 = client.clone(); + assert!(Client::is_running()); + drop(client); + assert!(Client::is_running()); + drop(client2); + }); let client = Client::start(); assert!(Client::is_running()); let client2 = client.clone(); assert!(Client::is_running()); - drop(client); - assert!(Client::is_running()); drop(client2); + assert!(Client::is_running()); + drop(client); + t1.join().unwrap(); + unsafe { + ___tracy_shutdown_profiler(); + } }); - let client = Client::start(); - assert!(Client::is_running()); - let client2 = client.clone(); - assert!(Client::is_running()); - drop(client2); - assert!(Client::is_running()); - drop(client); - t1.join().unwrap(); - }); + } +} + +fn main() { + #[cfg(loom)] + loom::main(); } diff --git a/tracy-client/tests/tests.rs b/tracy-client/tests/tests.rs index 65f6944..5be8973 100644 --- a/tracy-client/tests/tests.rs +++ b/tracy-client/tests/tests.rs @@ -14,13 +14,6 @@ fn basic_zone() { } } -fn span_macro_no_client() { - let r = std::panic::catch_unwind(|| { - let _span = span!("sleep a little"); - }); - assert!(r.is_err()); -} - fn alloc_zone() { let client = Client::start(); let span = client.span_alloc("alloc_zone", "alloc_zone", file!(), line!(), 100); @@ -95,14 +88,13 @@ fn main() { #[cfg(not(loom))] { basic_zone(); - span_macro_no_client(); alloc_zone(); finish_frameset(); finish_secondary_frameset(); non_continuous_frameset(); plot_something(); - allocations(); message(); + allocations(); tls_confusion(); std::thread::spawn(|| { let _client = Client::start();