Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: remove RawDataUrl and extract to deno_media_type #564

Merged
merged 2 commits into from
Jan 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ async-trait = "0.1.68"
capacity_builder = "0.5.0"
data-url = "0.3.0"
deno_ast = { version = "0.44.0", features = ["dep_analysis", "emit"] }
deno_media_type = "0.2.3"
deno_media_type = { version = "0.2.4", features = ["decoding", "data_url", "module_specifier"] }
deno_unsync.workspace = true
deno_path_util = "0.3.0"
deno_semver = "0.7.1"
Expand Down
16 changes: 11 additions & 5 deletions src/graph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ use crate::source::*;
use deno_ast::dep::DynamicDependencyKind;
use deno_ast::dep::ImportAttributes;
use deno_ast::dep::StaticDependencyKind;
use deno_ast::encoding::detect_charset;
use deno_ast::LineAndColumnIndex;
use deno_ast::MediaType;
use deno_ast::ParseDiagnostic;
Expand Down Expand Up @@ -2377,10 +2378,12 @@ pub(crate) async fn parse_module_source_and_info(
Some("json")
))
{
return match crate::source::decode_source(
&opts.specifier,
let charset = maybe_charset.unwrap_or_else(|| {
detect_charset(&opts.specifier, opts.content.as_ref())
});
return match deno_media_type::encoding::decode_arc_source(
charset,
opts.content,
maybe_charset,
) {
Ok(text) => Ok(ModuleSourceAndInfo::Json {
specifier: opts.specifier,
Expand Down Expand Up @@ -5395,10 +5398,13 @@ impl<'a> NpmSpecifierResolver<'a> {

fn new_source_with_text(
specifier: &ModuleSpecifier,
text: Arc<[u8]>,
bytes: Arc<[u8]>,
maybe_charset: Option<&str>,
) -> Result<Arc<str>, Box<ModuleError>> {
crate::source::decode_source(specifier, text, maybe_charset).map_err(|err| {
let charset = maybe_charset.unwrap_or_else(|| {
deno_media_type::encoding::detect_charset(specifier, bytes.as_ref())
});
deno_media_type::encoding::decode_arc_source(charset, bytes).map_err(|err| {
Box::new(ModuleError::LoadingErr(
specifier.clone(),
None,
Expand Down
1 change: 0 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ pub mod symbols;
mod fast_check;
pub mod packages;
pub mod source;
mod text_encoding;

use source::FileSystem;
use source::JsrUrlProvider;
Expand Down
256 changes: 3 additions & 253 deletions src/source/mod.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
// Copyright 2018-2024 the Deno authors. MIT license.

use std::borrow::Cow;
use std::collections::HashMap;
use std::fmt;
use std::path::Path;
use std::path::PathBuf;
use std::sync::Arc;

use async_trait::async_trait;
use data_url::DataUrl;
use deno_ast::data_url::RawDataUrl;
use deno_ast::MediaType;
use deno_ast::ModuleSpecifier;
use deno_error::JsErrorClass;
Expand All @@ -30,7 +29,6 @@ use crate::graph::Range;
use crate::module_specifier::resolve_import;
use crate::packages::JsrPackageInfo;
use crate::packages::JsrPackageVersionInfo;
use crate::text_encoding;
use crate::ModuleInfo;
use crate::NpmLoadError;
use crate::SpecifierError;
Expand Down Expand Up @@ -545,75 +543,15 @@ pub fn load_data_url(
specifier: &ModuleSpecifier,
) -> Result<Option<LoadResponse>, std::io::Error> {
let data_url = RawDataUrl::parse(specifier)?;
let (bytes, headers) = data_url.into_bytes_and_headers();
let (bytes, mime_type) = data_url.into_bytes_and_mime_type();
let headers = HashMap::from([("content-type".to_string(), mime_type)]);
Ok(Some(LoadResponse::Module {
specifier: specifier.clone(),
maybe_headers: Some(headers),
content: Arc::from(bytes),
}))
}

#[derive(Debug, Clone)]
pub struct RawDataUrl {
pub mime_type: String,
pub bytes: Vec<u8>,
}

impl RawDataUrl {
pub fn parse(specifier: &ModuleSpecifier) -> Result<Self, std::io::Error> {
use std::io::Error;
use std::io::ErrorKind;

fn unable_to_decode() -> Error {
Error::new(ErrorKind::InvalidData, "Unable to decode data url.")
}

let url =
DataUrl::process(specifier.as_str()).map_err(|_| unable_to_decode())?;
let (bytes, _) = url.decode_to_vec().map_err(|_| unable_to_decode())?;
Ok(RawDataUrl {
mime_type: url.mime_type().to_string(),
bytes,
})
}

pub fn charset(&self) -> Option<&str> {
get_mime_type_charset(&self.mime_type)
}

pub fn media_type(&self) -> MediaType {
let mut content_types = self.mime_type.split(';');
let Some(content_type) = content_types.next() else {
return MediaType::Unknown;
};
MediaType::from_content_type(
// this data url will be ignored when resolving the MediaType
// as in this rare case the MediaType is determined solely based
// on the provided content type
&ModuleSpecifier::parse("data:image/png;base64,").unwrap(),
content_type,
)
}

pub fn decode(self) -> Result<String, std::io::Error> {
let charset = get_mime_type_charset(&self.mime_type).unwrap_or("utf-8");
decode_owned_source_with_charset(self.bytes, charset)
}

pub fn into_bytes_and_headers(self) -> (Vec<u8>, HashMap<String, String>) {
let headers = HashMap::from([("content-type".to_string(), self.mime_type)]);
(self.bytes, headers)
}
}

fn get_mime_type_charset(mime_type: &str) -> Option<&str> {
mime_type
.split(';')
.skip(1)
.map(str::trim)
.find_map(|s| s.strip_prefix("charset="))
}

/// An implementation of the loader attribute where the responses are provided
/// ahead of time. This is useful for testing or
#[derive(Default)]
Expand Down Expand Up @@ -832,100 +770,6 @@ pub fn resolve_media_type_and_charset_from_content_type<'a>(
}
}

/// Decodes the source bytes into a string handling any encoding rules
/// where the bytes may be from a remote module, file module, or other.
pub fn decode_owned_source(
specifier: &ModuleSpecifier,
bytes: Vec<u8>,
maybe_charset: Option<&str>,
) -> Result<String, std::io::Error> {
let charset = maybe_charset.unwrap_or_else(|| {
if specifier.scheme() == "file" {
text_encoding::detect_charset(&bytes)
} else {
"utf-8"
}
});
decode_owned_source_with_charset(bytes, charset)
}

/// Decodes the source bytes into a string handling any encoding rules
/// where the source is a `file:` specifier.
pub fn decode_owned_file_source(
bytes: Vec<u8>,
) -> Result<String, std::io::Error> {
let charset = text_encoding::detect_charset(&bytes);
decode_owned_source_with_charset(bytes, charset)
}

fn decode_owned_source_with_charset(
bytes: Vec<u8>,
charset: &str,
) -> Result<String, std::io::Error> {
match text_encoding::convert_to_utf8(&bytes, charset)? {
Cow::Borrowed(text) => {
if text.starts_with(text_encoding::BOM_CHAR) {
Ok(text[text_encoding::BOM_CHAR.len_utf8()..].to_string())
} else {
Ok(
// SAFETY: we know it's a valid utf-8 string at this point
unsafe { String::from_utf8_unchecked(bytes) },
)
}
}
Cow::Owned(mut text) => {
text_encoding::strip_bom_mut(&mut text);
Ok(text)
}
}
}

/// Decodes the source bytes into a string handling any encoding rules
/// for local vs remote files and dealing with the charset.
pub fn decode_source(
specifier: &ModuleSpecifier,
bytes: Arc<[u8]>,
maybe_charset: Option<&str>,
) -> Result<Arc<str>, std::io::Error> {
let charset = maybe_charset.unwrap_or_else(|| {
if specifier.scheme() == "file" {
text_encoding::detect_charset(bytes.as_ref())
} else {
"utf-8"
}
});
decode_with_charset(bytes, charset)
}

fn decode_with_charset(
bytes: Arc<[u8]>,
charset: &str,
) -> Result<Arc<str>, std::io::Error> {
let text = match text_encoding::convert_to_utf8(bytes.as_ref(), charset)? {
Cow::Borrowed(text) => {
if text.starts_with(text_encoding::BOM_CHAR) {
text[text_encoding::BOM_CHAR.len_utf8()..].to_string()
} else {
return Ok(
// SAFETY: we know it's a valid utf-8 string at this point
unsafe {
let raw_ptr = Arc::into_raw(bytes);
Arc::from_raw(std::mem::transmute::<*const [u8], *const str>(
raw_ptr,
))
},
);
}
}
Cow::Owned(mut text) => {
text_encoding::strip_bom_mut(&mut text);
text
}
};
let text: Arc<str> = Arc::from(text);
Ok(text)
}

#[cfg(test)]
pub mod tests {
use super::*;
Expand Down Expand Up @@ -1008,98 +852,4 @@ pub mod tests {
}
);
}

#[test]
fn test_parse_valid_data_url() {
let valid_data_url = "data:text/plain;base64,SGVsbG8sIFdvcmxkIQ==";
let specifier = ModuleSpecifier::parse(valid_data_url).unwrap();
let raw_data_url = RawDataUrl::parse(&specifier).unwrap();
assert_eq!(raw_data_url.mime_type, "text/plain");
assert_eq!(raw_data_url.bytes, b"Hello, World!");
}

#[test]
fn test_charset_with_valid_mime_type() {
let raw_data_url = RawDataUrl {
mime_type: "text/plain; charset=utf-8".to_string(),
bytes: vec![],
};
assert_eq!(raw_data_url.charset(), Some("utf-8"));
}

#[test]
fn test_charset_with_no_charset_in_mime_type() {
let raw_data_url = RawDataUrl {
mime_type: "text/plain".to_string(),
bytes: vec![],
};
assert_eq!(raw_data_url.charset(), None);
}

#[test]
fn test_media_type_with_known_type() {
let raw_data_url = RawDataUrl {
mime_type: "application/javascript;charset=utf-8".to_string(),
bytes: vec![],
};
assert_eq!(raw_data_url.media_type(), MediaType::JavaScript);
}

#[test]
fn test_media_type_with_unknown_type() {
let raw_data_url = RawDataUrl {
mime_type: "unknown/unknown".to_string(),
bytes: vec![],
};
assert_eq!(raw_data_url.media_type(), MediaType::Unknown);
}

#[test]
fn test_decode_with_valid_charset() {
let raw_data_url = RawDataUrl {
mime_type: "text/plain; charset=utf-8".to_string(),
bytes: "Hello, World!".as_bytes().to_vec(),
};
assert_eq!(raw_data_url.decode().unwrap(), "Hello, World!");
}

#[test]
fn test_decode_with_invalid_charset() {
let raw_data_url = RawDataUrl {
mime_type: "text/plain; charset=invalid-charset".to_string(),
bytes: vec![],
};
assert!(raw_data_url.decode().is_err());
}

#[test]
fn test_into_bytes_and_headers() {
let raw_data_url = RawDataUrl {
mime_type: "text/plain; charset=utf-8".to_string(),
bytes: "Hello, World!".as_bytes().to_vec(),
};
let (bytes, headers) = raw_data_url.into_bytes_and_headers();
assert_eq!(bytes, "Hello, World!".as_bytes());
assert_eq!(
headers.get("content-type").unwrap(),
"text/plain; charset=utf-8"
);
}

#[test]
fn test_decode_owned_with_bom() {
let text = decode_owned_file_source(
format!("{}{}", text_encoding::BOM_CHAR, "Hello").into_bytes(),
)
.unwrap();
assert_eq!(text, "Hello");
}

#[test]
fn test_decode_with_charset_with_bom() {
let bytes = format!("{}{}", text_encoding::BOM_CHAR, "Hello").into_bytes();
let charset = "utf-8";
let text = decode_with_charset(Arc::from(bytes), charset).unwrap();
assert_eq!(text.as_ref(), "Hello");
}
}
Loading
Loading