From 8101cdd33f8873b54af0262d3ab08bfa7ddd5d65 Mon Sep 17 00:00:00 2001 From: konstin Date: Wed, 21 Aug 2024 16:15:42 +0200 Subject: [PATCH] For a path dep such as the root project, uv can read metadata statically from `pyproject.toml` or dynamically from the build backend. Python's `packaging` [sorts](https://github.com/pypa/packaging/blob/cc938f984bbbe43c5734b9656c9837ab3a28191f/src/packaging/specifiers.py#L777) specifiers before emitting them, so all build backends built on top of it - such as hatchling - will change the specifier order compared to pyproject.toml. The core metadata spec does say "If a field is not marked as Dynamic, then the value of the field in any wheel built from the sdist MUST match the value in the sdist", but it doesn't specify if "match" means string equivalent or semantically equivalent, so it's arguable if that spec conformant. This change means that the specifiers have a different ordering when coming from the build backend than when read statically from pyproject.toml. Previously, we tried to read path dep metadata in order: * From the (built wheel) cache (`packaging` order) * From pyproject.toml (verbatim specifier) * From a fresh build (`packaging` order) This behaviour is unstable: On the first run, we cache is cold, so we read the verbatim specifier from `pyproject.toml`, then we build and store the metadata in the cache. On the second run, we read the `packaging` sorted specifier from the cache. Reproducer: ```shell rm -rf newproj uv init -q --no-config newproj cd newproj/ uv add -q "anyio>=4,<5" cat uv.lock | grep "requires-dist" uv sync -q cat uv.lock | grep "requires-dist" cd .. ``` ``` requires-dist = [{ name = "anyio", specifier = ">=4,<5" }] requires-dist = [{ name = "anyio", specifier = "<5,>=4" }] ``` A project either has static metadata, so we can read from pyproject.toml, or it doesn't, and we always read from the build through `packaging`. We can use this to stabilize the behavior by slightly switching the order. * From pyproject.toml (verbatim specifier) * From the (built wheel) cache (`packaging` order) * From a fresh build (`packaging` order) Potentially, we still want to sort the specifiers we get anyway, after all, the is no guarantee that the specifiers from a build backend are deterministic. But our metadata reading behavior should be independent of the cache state, hence changing the order in the PR. Fixes #6316 --- crates/uv-distribution/src/source/mod.rs | 138 +++++++++++++++-------- crates/uv/tests/sync.rs | 35 ++++++ 2 files changed, 129 insertions(+), 44 deletions(-) diff --git a/crates/uv-distribution/src/source/mod.rs b/crates/uv-distribution/src/source/mod.rs index a128aec519412..7672848c795cf 100644 --- a/crates/uv-distribution/src/source/mod.rs +++ b/crates/uv-distribution/src/source/mod.rs @@ -535,6 +535,17 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { // Scope all operations to the revision. Within the revision, there's no need to check for // freshness, since entries have to be fresher than the revision itself. let cache_shard = cache_shard.shard(revision.id()); + let source_dist_entry = cache_shard.entry(filename); + + // If the metadata is static, return it. + if let Some(metadata) = + Self::read_static_metadata(source, source_dist_entry.path(), subdirectory).await? + { + return Ok(ArchiveMetadata { + metadata: Metadata::from_metadata23(metadata), + hashes: revision.into_hashes(), + }); + } // If the cache contains compatible metadata, return it. let metadata_entry = cache_shard.entry(METADATA); @@ -547,8 +558,6 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { } // Otherwise, we either need to build the metadata or the wheel. - let source_dist_entry = cache_shard.entry(filename); - // If the backend supports `prepare_metadata_for_build_wheel`, use it. if let Some(metadata) = self .build_metadata(source, source_dist_entry.path(), subdirectory) @@ -764,6 +773,17 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { // Scope all operations to the revision. Within the revision, there's no need to check for // freshness, since entries have to be fresher than the revision itself. let cache_shard = cache_shard.shard(revision.id()); + let source_entry = cache_shard.entry("source"); + + // If the metadata is static, return it. + if let Some(metadata) = + Self::read_static_metadata(source, source_entry.path(), None).await? + { + return Ok(ArchiveMetadata { + metadata: Metadata::from_metadata23(metadata), + hashes: revision.into_hashes(), + }); + } // If the cache contains compatible metadata, return it. let metadata_entry = cache_shard.entry(METADATA); @@ -775,8 +795,6 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { }); } - let source_entry = cache_shard.entry("source"); - // If the backend supports `prepare_metadata_for_build_wheel`, use it. if let Some(metadata) = self .build_metadata(source, source_entry.path(), None) @@ -984,6 +1002,20 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { // freshness, since entries have to be fresher than the revision itself. let cache_shard = cache_shard.shard(revision.id()); + if let Some(metadata) = + Self::read_static_metadata(source, &resource.install_path, None).await? + { + return Ok(ArchiveMetadata::from( + Metadata::from_workspace( + metadata, + resource.install_path.as_ref(), + resource.lock_path.as_ref(), + self.build_context.sources(), + ) + .await?, + )); + } + // If the cache contains compatible metadata, return it. let metadata_entry = cache_shard.entry(METADATA); if let Some(metadata) = read_cached_metadata(&metadata_entry).await? { @@ -1210,8 +1242,24 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { let _lock = lock_shard(&cache_shard).await?; + let path = if let Some(subdirectory) = resource.subdirectory { + Cow::Owned(fetch.path().join(subdirectory)) + } else { + Cow::Borrowed(fetch.path()) + }; + + if let Some(metadata) = + Self::read_static_metadata(source, fetch.path(), resource.subdirectory).await? + { + return Ok(ArchiveMetadata::from( + Metadata::from_workspace(metadata, &path, &path, self.build_context.sources()) + .await?, + )); + } + // If the cache contains compatible metadata, return it. let metadata_entry = cache_shard.entry(METADATA); + if self .build_context .cache() @@ -1248,12 +1296,6 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { .await .map_err(Error::CacheWrite)?; - let path = if let Some(subdirectory) = resource.subdirectory { - Cow::Owned(fetch.path().join(subdirectory)) - } else { - Cow::Borrowed(fetch.path()) - }; - return Ok(ArchiveMetadata::from( Metadata::from_workspace(metadata, &path, &path, self.build_context.sources()) .await?, @@ -1471,6 +1513,47 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { ) -> Result, Error> { debug!("Preparing metadata for: {source}"); + // Setup the builder. + let mut builder = self + .build_context + .setup_build( + source_root, + subdirectory, + &source.to_string(), + source.as_dist(), + if source.is_editable() { + BuildKind::Editable + } else { + BuildKind::Wheel + }, + ) + .await + .map_err(Error::Build)?; + + // Build the metadata. + let dist_info = builder.metadata().await.map_err(Error::Build)?; + let Some(dist_info) = dist_info else { + return Ok(None); + }; + + // Read the metadata from disk. + debug!("Prepared metadata for: {source}"); + let content = fs::read(dist_info.join("METADATA")) + .await + .map_err(Error::CacheRead)?; + let metadata = Metadata23::parse_metadata(&content)?; + + // Validate the metadata. + validate(source, &metadata)?; + + Ok(Some(metadata)) + } + + async fn read_static_metadata( + source: &BuildableSource<'_>, + source_root: &Path, + subdirectory: Option<&Path>, + ) -> Result, Error> { // Attempt to read static metadata from the `PKG-INFO` file. match read_pkg_info(source_root, subdirectory).await { Ok(metadata) => { @@ -1521,40 +1604,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { Err(err) => return Err(err), } - // Setup the builder. - let mut builder = self - .build_context - .setup_build( - source_root, - subdirectory, - &source.to_string(), - source.as_dist(), - if source.is_editable() { - BuildKind::Editable - } else { - BuildKind::Wheel - }, - ) - .await - .map_err(Error::Build)?; - - // Build the metadata. - let dist_info = builder.metadata().await.map_err(Error::Build)?; - let Some(dist_info) = dist_info else { - return Ok(None); - }; - - // Read the metadata from disk. - debug!("Prepared metadata for: {source}"); - let content = fs::read(dist_info.join("METADATA")) - .await - .map_err(Error::CacheRead)?; - let metadata = Metadata23::parse_metadata(&content)?; - - // Validate the metadata. - validate(source, &metadata)?; - - Ok(Some(metadata)) + Ok(None) } /// Returns a GET [`reqwest::Request`] for the given URL. diff --git a/crates/uv/tests/sync.rs b/crates/uv/tests/sync.rs index 54c76c224fcc6..ef266683169fc 100644 --- a/crates/uv/tests/sync.rs +++ b/crates/uv/tests/sync.rs @@ -791,3 +791,38 @@ fn sync_environment() -> Result<()> { Ok(()) } + +/// Regression test for . +/// +/// Previously, we would read metadata statically from pyproject.toml and write that to `uv.lock`. In +/// this sync pass, we had also built the project with hatchling, which sorts specifiers by python +/// string sort through packaging. On the second run, we read the cache that now has the hatchling +/// sorting, changing the lockfile. +#[test] +fn read_metadata_statically_over_the_cache() -> Result<()> { + let context = TestContext::new("3.12"); + + let pyproject_toml = context.temp_dir.child("pyproject.toml"); + pyproject_toml.write_str( + r#" + [project] + name = "project" + version = "0.1.0" + requires-python = ">=3.12" + # Python string sorting is the other way round. + dependencies = ["anyio>=4,<5"] + "#, + )?; + + context.sync().assert().success(); + let lock1 = fs_err::read_to_string(context.temp_dir.join("uv.lock"))?; + // Assert we're reading static metadata. + assert!(lock1.contains(">=4,<5")); + assert!(!lock1.contains("<5,>=4")); + context.sync().assert().success(); + let lock2 = fs_err::read_to_string(context.temp_dir.join("uv.lock"))?; + // Assert stability. + assert_eq!(lock1, lock2); + + Ok(()) +}