Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf(cache): single cache file for remote modules #24983

Merged
merged 8 commits into from
Aug 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ chrono = { version = "0.4", default-features = false, features = ["std", "serde"
console_static_text = "=0.8.1"
data-encoding = "2.3.3"
data-url = "=0.3.0"
deno_cache_dir = "=0.10.2"
deno_cache_dir = "=0.11.0"
deno_package_json = { version = "=0.1.1", default-features = false }
dlopen2 = "0.6.1"
ecb = "=0.1.2"
Expand Down
8 changes: 2 additions & 6 deletions cli/cache/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,8 @@ pub const CACHE_PERM: u32 = 0o644;
pub struct RealDenoCacheEnv;

impl deno_cache_dir::DenoCacheEnv for RealDenoCacheEnv {
fn read_file_bytes(&self, path: &Path) -> std::io::Result<Option<Vec<u8>>> {
match std::fs::read(path) {
Ok(s) => Ok(Some(s)),
Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
Err(err) => Err(err),
}
fn read_file_bytes(&self, path: &Path) -> std::io::Result<Vec<u8>> {
std::fs::read(path)
}

fn atomic_write_file(
Expand Down
7 changes: 5 additions & 2 deletions cli/factory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -304,8 +304,11 @@ impl CliFactory {
let global_cache = self.global_http_cache()?.clone();
match self.cli_options()?.vendor_dir_path() {
Some(local_path) => {
let local_cache =
LocalHttpCache::new(local_path.clone(), global_cache);
let local_cache = LocalHttpCache::new(
local_path.clone(),
global_cache,
deno_cache_dir::GlobalToLocalCopy::Allow,
);
Ok(Arc::new(local_cache))
}
None => Ok(global_cache),
Expand Down
111 changes: 58 additions & 53 deletions cli/file_fetcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ use crate::http_util::HttpClientProvider;
use crate::util::progress_bar::ProgressBar;

use deno_ast::MediaType;
use deno_core::anyhow::bail;
use deno_core::anyhow::Context;
use deno_core::error::custom_error;
use deno_core::error::generic_error;
Expand Down Expand Up @@ -52,6 +51,25 @@ pub enum FileOrRedirect {
Redirect(ModuleSpecifier),
}

impl FileOrRedirect {
fn from_deno_cache_entry(
specifier: &ModuleSpecifier,
cache_entry: deno_cache_dir::CacheEntry,
) -> Result<Self, AnyError> {
if let Some(redirect_to) = cache_entry.metadata.headers.get("location") {
let redirect =
deno_core::resolve_import(redirect_to, specifier.as_str())?;
Ok(FileOrRedirect::Redirect(redirect))
} else {
Ok(FileOrRedirect::File(File {
specifier: specifier.clone(),
maybe_headers: Some(cache_entry.metadata.headers),
source: Arc::from(cache_entry.content),
}))
}
}
}

/// A structure representing a source file.
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct File {
Expand Down Expand Up @@ -238,45 +256,32 @@ impl FileFetcher {
);

let cache_key = self.http_cache.cache_item_key(specifier)?; // compute this once
let Some(headers) = self.http_cache.read_headers(&cache_key)? else {
return Ok(None);
};
if let Some(redirect_to) = headers.get("location") {
let redirect =
deno_core::resolve_import(redirect_to, specifier.as_str())?;
return Ok(Some(FileOrRedirect::Redirect(redirect)));
}
let result = self.http_cache.read_file_bytes(
let result = self.http_cache.get(
&cache_key,
maybe_checksum
.as_ref()
.map(|c| deno_cache_dir::Checksum::new(c.as_str())),
deno_cache_dir::GlobalToLocalCopy::Allow,
);
let bytes = match result {
Ok(Some(bytes)) => bytes,
Ok(None) => return Ok(None),
match result {
Ok(Some(cache_data)) => Ok(Some(FileOrRedirect::from_deno_cache_entry(
specifier, cache_data,
)?)),
Ok(None) => Ok(None),
Err(err) => match err {
deno_cache_dir::CacheReadFileError::Io(err) => return Err(err.into()),
deno_cache_dir::CacheReadFileError::Io(err) => Err(err.into()),
deno_cache_dir::CacheReadFileError::ChecksumIntegrity(err) => {
// convert to the equivalent deno_graph error so that it
// enhances it if this is passed to deno_graph
return Err(
Err(
deno_graph::source::ChecksumIntegrityError {
actual: err.actual,
expected: err.expected,
}
.into(),
);
)
}
},
};

Ok(Some(FileOrRedirect::File(File {
specifier: specifier.clone(),
maybe_headers: Some(headers),
source: Arc::from(bytes),
})))
}
}

/// Convert a data URL into a file, resulting in an error if the URL is
Expand Down Expand Up @@ -363,12 +368,30 @@ impl FileFetcher {
);
}

let maybe_etag = self
let maybe_etag_cache_entry = self
.http_cache
.cache_item_key(specifier)
.ok()
.and_then(|key| self.http_cache.read_headers(&key).ok().flatten())
.and_then(|headers| headers.get("etag").cloned());
.and_then(|key| {
self
.http_cache
.get(
&key,
maybe_checksum
.as_ref()
.map(|c| deno_cache_dir::Checksum::new(c.as_str())),
)
.ok()
.flatten()
})
.and_then(|cache_entry| {
cache_entry
.metadata
.headers
.get("etag")
.cloned()
.map(|etag| (cache_entry, etag))
});
let maybe_auth_token = self.auth_tokens.get(specifier);

async fn handle_request_or_server_error(
Expand All @@ -390,7 +413,6 @@ impl FileFetcher {
}
}

let mut maybe_etag = maybe_etag;
let mut retried = false; // retry intermittent failures
let result = loop {
let result = match self
Expand All @@ -399,31 +421,17 @@ impl FileFetcher {
.fetch_no_follow(FetchOnceArgs {
url: specifier.clone(),
maybe_accept: maybe_accept.map(ToOwned::to_owned),
maybe_etag: maybe_etag.clone(),
maybe_etag: maybe_etag_cache_entry
.as_ref()
.map(|(_, etag)| etag.clone()),
maybe_auth_token: maybe_auth_token.clone(),
maybe_progress_guard: maybe_progress_guard.as_ref(),
})
.await?
{
FetchOnceResult::NotModified => {
let file_or_redirect =
self.fetch_cached_no_follow(specifier, maybe_checksum)?;
match file_or_redirect {
Some(file_or_redirect) => Ok(file_or_redirect),
None => {
// Someone may have deleted the body from the cache since
// it's currently stored in a separate file from the headers,
// so delete the etag and try again
if maybe_etag.is_some() {
debug!("Cache body not found. Trying again without etag.");
maybe_etag = None;
continue;
} else {
// should never happen
bail!("Your deno cache directory is in an unrecoverable state. Please delete it and try again.")
}
}
}
let (cache_entry, _) = maybe_etag_cache_entry.unwrap();
FileOrRedirect::from_deno_cache_entry(specifier, cache_entry)
}
FetchOnceResult::Redirect(redirect_url, headers) => {
self.http_cache.set(specifier, headers, &[])?;
Expand Down Expand Up @@ -1480,13 +1488,10 @@ mod tests {
let cache_key = file_fetcher.http_cache.cache_item_key(url).unwrap();
let bytes = file_fetcher
.http_cache
.read_file_bytes(
&cache_key,
None,
deno_cache_dir::GlobalToLocalCopy::Allow,
)
.get(&cache_key, None)
.unwrap()
.unwrap();
.unwrap()
.content;
String::from_utf8(bytes).unwrap()
}

Expand Down
10 changes: 0 additions & 10 deletions cli/lsp/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,6 @@ use std::path::Path;
use std::sync::Arc;
use std::time::SystemTime;

/// In the LSP, we disallow the cache from automatically copying from
/// the global cache to the local cache for technical reasons.
///
/// 1. We need to verify the checksums from the lockfile are correct when
/// moving from the global to the local cache.
/// 2. We need to verify the checksums for JSR https specifiers match what
/// is found in the package's manifest.
pub const LSP_DISALLOW_GLOBAL_TO_LOCAL_COPY: deno_cache_dir::GlobalToLocalCopy =
deno_cache_dir::GlobalToLocalCopy::Disallow;

pub fn calculate_fs_version(
cache: &LspCache,
specifier: &ModuleSpecifier,
Expand Down
12 changes: 4 additions & 8 deletions cli/lsp/documents.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

use super::cache::calculate_fs_version;
use super::cache::LspCache;
use super::cache::LSP_DISALLOW_GLOBAL_TO_LOCAL_COPY;
use super::config::Config;
use super::resolver::LspResolver;
use super::testing::TestCollector;
Expand Down Expand Up @@ -872,22 +871,19 @@ impl FileSystemDocuments {
} else {
let http_cache = cache.for_specifier(file_referrer);
let cache_key = http_cache.cache_item_key(specifier).ok()?;
let bytes = http_cache
.read_file_bytes(&cache_key, None, LSP_DISALLOW_GLOBAL_TO_LOCAL_COPY)
.ok()??;
let specifier_headers = http_cache.read_headers(&cache_key).ok()??;
let cached_file = http_cache.get(&cache_key, None).ok()??;
let (_, maybe_charset) =
deno_graph::source::resolve_media_type_and_charset_from_headers(
specifier,
Some(&specifier_headers),
Some(&cached_file.metadata.headers),
);
let content = deno_graph::source::decode_owned_source(
specifier,
bytes,
cached_file.content,
maybe_charset,
)
.ok()?;
let maybe_headers = Some(specifier_headers);
let maybe_headers = Some(cached_file.metadata.headers);
Document::new(
specifier.clone(),
content.into(),
Expand Down
7 changes: 2 additions & 5 deletions cli/lsp/jsr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -258,12 +258,9 @@ fn read_cached_url(
cache: &Arc<dyn HttpCache>,
) -> Option<Vec<u8>> {
cache
.read_file_bytes(
&cache.cache_item_key(url).ok()?,
None,
deno_cache_dir::GlobalToLocalCopy::Disallow,
)
.get(&cache.cache_item_key(url).ok()?, None)
.ok()?
.map(|f| f.content)
}

#[derive(Debug)]
Expand Down
25 changes: 19 additions & 6 deletions tests/integration/jsr_tests.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.

use deno_cache_dir::HttpCache;
use deno_core::serde_json;
use deno_core::serde_json::json;
use deno_core::serde_json::Value;
use deno_lockfile::Lockfile;
Expand Down Expand Up @@ -184,13 +186,24 @@ fn reload_info_not_found_cache_but_exists_remote() {
let specifier =
Url::parse(&format!("http://127.0.0.1:4250/{}/meta.json", package))
.unwrap();
let registry_json_path = deno_dir
.path()
.join("deps")
.join(deno_cache_dir::url_to_filename(&specifier).unwrap());
let mut registry_json = registry_json_path.read_json_value();
let cache = deno_cache_dir::GlobalHttpCache::new(
deno_dir.path().join("deps").to_path_buf(),
deno_cache_dir::TestRealDenoCacheEnv,
);
let entry = cache
.get(&cache.cache_item_key(&specifier).unwrap(), None)
.unwrap()
.unwrap();
let mut registry_json: serde_json::Value =
serde_json::from_slice(&entry.content).unwrap();
remove_version(&mut registry_json, version);
registry_json_path.write_json(&registry_json);
cache
.set(
&specifier,
entry.metadata.headers.clone(),
registry_json.to_string().as_bytes(),
)
.unwrap();
}

// This tests that when a local machine doesn't have a version
Expand Down
33 changes: 0 additions & 33 deletions tests/integration/run_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4972,39 +4972,6 @@ console.log(add(3, 4));
output.assert_matches_text("[WILDCARD]5\n7\n");
}

#[test]
fn run_etag_delete_source_cache() {
let test_context = TestContextBuilder::new()
.use_temp_cwd()
.use_http_server()
.build();
test_context
.temp_dir()
.write("main.ts", "import 'http://localhost:4545/etag_script.ts'");
test_context
.new_command()
.args("cache main.ts")
.run()
.skip_output_check();

// The cache is currently stored unideally in two files where one file has the headers
// and the other contains the body. An issue can happen with the etag header where the
// headers file exists, but the body was deleted. We need to get the cache to gracefully
// handle this scenario.
let deno_dir = test_context.deno_dir().path();
let etag_script_path = deno_dir.join("deps/http/localhost_PORT4545/26110db7d42c9bad32386735cbc05c301f83e4393963deb8da14fec3b4202a13");
assert!(etag_script_path.exists());
etag_script_path.remove_file();

test_context
.new_command()
.args("cache --reload --log-level=debug main.ts")
.run()
.assert_matches_text(
"[WILDCARD]Cache body not found. Trying again without etag.[WILDCARD]",
);
}
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Deleted this test because it's not possible for the two files to get out of sync because there's only one file now.


#[test]
fn code_cache_test() {
let test_context = TestContextBuilder::new().use_temp_cwd().build();
Expand Down