Skip to content

Commit

Permalink
codegen: format Python before writing to file (#2694)
Browse files Browse the repository at this point in the history
Formatting each Python file individually solves two problems:
* python files changing back and forth while codegen is being run
* modfiied timestamps being updated unnecessarily

It is also more functional and clean overall

---

I also introduced a simple way to run the codegen: `cargo codegen`. It
completely ignores the source hashes though, so beware of that. It has
some nice features though::

* It supports logging (`build.rs` swallows all output)
* It supports `--profile` to get a flamegraph of the build

### Checklist
* [x] I have read and agree to [Contributor
Guide](https://github.com/rerun-io/rerun/blob/main/CONTRIBUTING.md) and
the [Code of
Conduct](https://github.com/rerun-io/rerun/blob/main/CODE_OF_CONDUCT.md)
* [x] I've included a screenshot or gif (if applicable)
* [x] I have tested [demo.rerun.io](https://demo.rerun.io/pr/2694) (if
applicable)

- [PR Build Summary](https://build.rerun.io/pr/2694)
- [Docs
preview](https://rerun.io/preview/pr%3Aemilk%2Fcodegen-cleanup/docs)
- [Examples
preview](https://rerun.io/preview/pr%3Aemilk%2Fcodegen-cleanup/examples)
  • Loading branch information
emilk authored Jul 14, 2023
1 parent f61971c commit fcad6c4
Show file tree
Hide file tree
Showing 125 changed files with 458 additions and 337 deletions.
4 changes: 4 additions & 0 deletions .cargo/config.toml
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
[alias]
# `cargo rerun` is short a convenient shorthand, skipping the web viewer.
rerun = "run --package rerun-cli --no-default-features --features native_viewer --"

# `cargo rerun-web` is short a convenient shorthand for building & starting the web viewer.
rerun-web = "run --package rerun-cli --no-default-features --features web_viewer -- --web-viewer"

# Run the codegen. Pass --profile
codegen = "run --package re_types_builder --"

# To easily run examples on the web, see https://github.com/rukai/cargo-run-wasm.
# Temporary solution while we wait for our own xtasks!
run-wasm = "run --release --package run_wasm --"
Expand Down
8 changes: 5 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ polars-core = "0.29"
polars-lazy = "0.29"
polars-ops = "0.29"
puffin = "0.15"
puffin_http = "0.12"
rand = { version = "0.8", default-features = false }
rand_distr = { version = "0.4", default-features = false }
rayon = "1.7"
Expand Down
2 changes: 1 addition & 1 deletion crates/re_build_tools/src/hashing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ fn encode_hex(bytes: &[u8]) -> String {
s
}

/// Walks the directory at `path` in filename order.
/// Recursively walks the directory at `path` in filename order.
///
/// If `extensions` is specified, only files with the right extensions will be iterated.
/// Specified extensions should _not_ include the leading dot, e.g. `fbs` rather than `.fbs`.
Expand Down
12 changes: 12 additions & 0 deletions crates/re_tracing/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,17 @@ version.workspace = true
all-features = true


[features]
default = []

## Enable to easily host a puffin server. For binaries.
server = ["dep:puffin_http", "dep:re_log", "dep:rfd"]


[target.'cfg(not(target_arch = "wasm32"))'.dependencies]
puffin.workspace = true

# Optional dependencies:
puffin_http = { workspace = true, optional = true }
re_log = { workspace = true, optional = true }
rfd = { workspace = true, optional = true }
8 changes: 8 additions & 0 deletions crates/re_tracing/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
//! Helpers for tracing/spans/flamegraphs and such.
#[cfg(not(target_arch = "wasm32"))]
#[cfg(feature = "server")]
mod server;

#[cfg(not(target_arch = "wasm32"))]
#[cfg(feature = "server")]
pub use server::Profiler;

pub mod reexports {
#[cfg(not(target_arch = "wasm32"))]
pub use puffin;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@ pub struct Profiler {
server: Option<puffin_http::Server>,
}

impl Drop for Profiler {
fn drop(&mut self) {
// Commit the last stuff:
puffin::GlobalProfiler::lock().new_frame();
}
}

impl Profiler {
pub fn start(&mut self) {
puffin::set_scopes_on(true);
Expand All @@ -26,10 +33,7 @@ impl Profiler {
Some(puffin_server)
}
Err(err) => {
re_log::warn!(
"Failed to start puffin profiling server: {}",
re_error::format(&err)
);
re_log::warn!("Failed to start puffin profiling server: {err}");
None
}
};
Expand Down
1 change: 0 additions & 1 deletion crates/re_types/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,3 @@ re_types_builder.workspace = true

# External
rayon.workspace = true
xshell = "0.2"
75 changes: 7 additions & 68 deletions crates/re_types/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,12 @@
use std::path::PathBuf;

use xshell::{cmd, Shell};

use re_build_tools::{
compute_crate_hash, compute_dir_hash, compute_strings_hash, is_tracked_env_var_set, iter_dir,
read_versioning_hash, rerun_if_changed, rerun_if_changed_or_doesnt_exist,
write_versioning_hash,
};

// NOTE: Don't need to add extra context to xshell invocations, it does so on its own.

// ---

const SOURCE_HASH_PATH: &str = "./source_hash.txt";
Expand All @@ -21,7 +17,6 @@ const DOC_EXAMPLES_DIR_PATH: &str = "../../docs/code-examples";
const CPP_OUTPUT_DIR_PATH: &str = "../../rerun_cpp/src";
const RUST_OUTPUT_DIR_PATH: &str = ".";
const PYTHON_OUTPUT_DIR_PATH: &str = "../../rerun_py/rerun_sdk/rerun/_rerun2";
const PYTHON_PYPROJECT_PATH: &str = "../../rerun_py/pyproject.toml";

// located in PYTHON_OUTPUT_DIR_PATH
const ARCHETYPE_OVERRIDES_SUB_DIR_PATH: &str = "archetypes/_overrides";
Expand Down Expand Up @@ -109,75 +104,19 @@ fn main() {
join3(
|| re_types_builder::generate_cpp_code(CPP_OUTPUT_DIR_PATH, &objects, &arrow_registry),
|| re_types_builder::generate_rust_code(RUST_OUTPUT_DIR_PATH, &objects, &arrow_registry),
|| generate_and_format_python_code(&objects, &arrow_registry),
|| {
re_types_builder::generate_python_code(
PYTHON_OUTPUT_DIR_PATH,
&objects,
&arrow_registry,
);
},
);

write_versioning_hash(SOURCE_HASH_PATH, new_hash);
}

fn generate_and_format_python_code(
objects: &re_types_builder::Objects,
arrow_registry: &re_types_builder::ArrowRegistry,
) {
re_types_builder::generate_python_code(PYTHON_OUTPUT_DIR_PATH, objects, arrow_registry);

// TODO(emilk): format the python code _before_ writing them to file instead,
// just like we do with C++ and Rust.
// This should be doable py piping the code of each file to black/ruff via stdin.
// Why? Right now the python code is written once, then changed, which means
// it is in flux while building, which creates weird phantom git diffs for a few seconds,
// and also update the modified file stamps.

// NOTE: This requires both `black` and `ruff` to be in $PATH, but only for contributors,
// not end users.
// Even for contributors, `black` and `ruff` won't be needed unless they edit some of the
// .fbs files... and even then, this won't crash if they are missing, it will just fail to pass
// the CI!
//
// The order below is important and sadly we need to call black twice. Ruff does not yet
// fix line-length (See: https://github.com/astral-sh/ruff/issues/1904).
//
// 1) Call black, which among others things fixes line-length
// 2) Call ruff, which requires line-lengths to be correct
// 3) Call black again to cleanup some whitespace issues ruff might introduce

let sh = Shell::new().unwrap();
call_black(&sh);
call_ruff(&sh);
call_black(&sh);
}

// Do 3 things in parallel
fn join3(a: impl FnOnce() + Send, b: impl FnOnce() + Send, c: impl FnOnce() + Send) {
rayon::join(a, || rayon::join(b, c));
}

fn call_black(sh: &Shell) {
// NOTE: We're purposefully ignoring the error here.
//
// If the user doesn't have `black` in their $PATH, there's still no good reason to fail
// the build.
//
// The CI will catch the unformatted files at PR time and complain appropriately anyhow.
cmd!(
sh,
"black --config {PYTHON_PYPROJECT_PATH} {PYTHON_OUTPUT_DIR_PATH}"
)
.run()
.ok();
}

fn call_ruff(sh: &Shell) {
// NOTE: We're purposefully ignoring the error here.
//
// If the user doesn't have `ruff` in their $PATH, there's still no good reason to fail
// the build.
//
// The CI will catch the unformatted files at PR time and complain appropriately anyhow.
cmd!(
sh,
"ruff --config {PYTHON_PYPROJECT_PATH} --fix {PYTHON_OUTPUT_DIR_PATH}"
)
.run()
.ok();
}
2 changes: 1 addition & 1 deletion crates/re_types/source_hash.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# This is a sha256 hash for all direct and indirect dependencies of this crate's build script.
# It can be safely removed at anytime to force the build script to run again.
# Check out build.rs to see how it's computed.
114ba22844997f4dc1a9fc81ad83419d80cf124c412777c44e1b9b71b3e50510
d5ccdc80e148c8058d3885418f9c701fbae4a5fbb92defb9bd625ad27ed97a25
2 changes: 2 additions & 0 deletions crates/re_types_builder/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ all-features = true


[dependencies]
re_log.workspace = true
re_tracing = { workspace = true, features = ["server"] }

# External
anyhow.workspace = true
Expand Down
51 changes: 51 additions & 0 deletions crates/re_types_builder/src/bin/build_re_types.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
//! Helper binary for running the codegen manually. Useful during development!
const DEFINITIONS_DIR_PATH: &str = "crates/re_types/definitions";
const ENTRYPOINT_PATH: &str = "crates/re_types/definitions/rerun/archetypes.fbs";
const CPP_OUTPUT_DIR_PATH: &str = "rerun_cpp/src";
const RUST_OUTPUT_DIR_PATH: &str = "crates/re_types/.";
const PYTHON_OUTPUT_DIR_PATH: &str = "rerun_py/rerun_sdk/rerun/_rerun2";

fn main() {
re_log::setup_native_logging();

let mut profiler = re_tracing::Profiler::default();

for arg in std::env::args().skip(1) {
match arg.as_str() {
"--help" => {
println!("Usage: [--help] [--profile]");
return;
}
"--profile" => profiler.start(),
_ => {
eprintln!("Unknown argument: {arg:?}");
return;
}
}
}

re_log::info!("Running codegen…");
let (objects, arrow_registry) =
re_types_builder::generate_lang_agnostic(DEFINITIONS_DIR_PATH, ENTRYPOINT_PATH);

re_tracing::profile_scope!("Language-specific code-gen");
join3(
|| re_types_builder::generate_cpp_code(CPP_OUTPUT_DIR_PATH, &objects, &arrow_registry),
|| re_types_builder::generate_rust_code(RUST_OUTPUT_DIR_PATH, &objects, &arrow_registry),
|| {
re_types_builder::generate_python_code(
PYTHON_OUTPUT_DIR_PATH,
&objects,
&arrow_registry,
);
},
);

re_log::info!("Done.");
}

// Do 3 things in parallel
fn join3(a: impl FnOnce() + Send, b: impl FnOnce() + Send, c: impl FnOnce() + Send) {
rayon::join(a, || rayon::join(b, c));
}
19 changes: 19 additions & 0 deletions crates/re_types_builder/src/codegen/common.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
//! Helpers common to all codegen passes.
use std::collections::BTreeSet;

use camino::Utf8PathBuf;

use crate::Docs;

/// Retrieves the global and tagged documentation from a [`Docs`] object.
Expand Down Expand Up @@ -59,3 +63,18 @@ impl StringExt for String {
self
}
}

/// Remove all files in the given folder that are not in the given set.
pub fn remove_old_files_from_folder(folder_path: Utf8PathBuf, filepaths: &BTreeSet<Utf8PathBuf>) {
re_tracing::profile_function!();
for entry in std::fs::read_dir(folder_path).unwrap().flatten() {
let filepath = Utf8PathBuf::try_from(entry.path()).unwrap();
if filepath.as_str().ends_with("_ext.rs") {
continue;
}
if !filepaths.contains(&filepath) {
re_log::info!("Removing {filepath:?}");
std::fs::remove_file(filepath).ok();
}
}
}
8 changes: 1 addition & 7 deletions crates/re_types_builder/src/codegen/cpp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,7 @@ impl CppCodeGenerator {
filepaths.insert(filepath);
}

// Clean up old files:
for entry in std::fs::read_dir(folder_path).unwrap().flatten() {
let filepath = Utf8PathBuf::try_from(entry.path()).unwrap();
if !filepaths.contains(&filepath) {
std::fs::remove_file(filepath).ok();
}
}
super::common::remove_old_files_from_folder(folder_path, &filepaths);

filepaths
}
Expand Down
Loading

1 comment on commit fcad6c4

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Performance Alert ⚠️

Possible performance regression was detected for benchmark 'Rust Benchmark'.
Benchmark result of this commit is worse than the previous benchmark result exceeding threshold 1.25.

Benchmark suite Current: fcad6c4 Previous: f61971c Ratio
batch_points_arrow/encode_log_msg 90881 ns/iter (± 136) 48628 ns/iter (± 95) 1.87

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.