From 1e13aaf06719a2a73c7f446aa85c88a8b6c67884 Mon Sep 17 00:00:00 2001 From: crowlkats Date: Mon, 1 Aug 2022 15:58:04 +0200 Subject: [PATCH 1/4] make URLPatternComponentResult::groups values optional --- src/component.rs | 4 ++-- src/lib.rs | 6 +++--- src/matcher.rs | 7 +++++-- src/quirks.rs | 2 +- src/regexp.rs | 8 ++++---- src/testdata/urlpatterntestdata.json | 30 ++++++++++++++++++---------- 6 files changed, 35 insertions(+), 22 deletions(-) diff --git a/src/component.rs b/src/component.rs index 830647a..9bd30ae 100644 --- a/src/component.rs +++ b/src/component.rs @@ -67,13 +67,13 @@ impl Component { pub(crate) fn create_match_result( &self, input: String, - exec_result: Vec<&str>, + exec_result: Vec>, ) -> crate::UrlPatternComponentResult { let groups = self .group_name_list .clone() .into_iter() - .zip(exec_result.into_iter().map(str::to_owned)) + .zip(exec_result.into_iter().map(|s| s.map(str::to_owned))) .collect(); crate::UrlPatternComponentResult { input, groups } } diff --git a/src/lib.rs b/src/lib.rs index e4fb5b4..6b55865 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -496,7 +496,7 @@ pub struct UrlPatternComponentResult { /// The matched input for this component. pub input: String, /// The values for all named groups in the pattern. - pub groups: std::collections::HashMap, + pub groups: std::collections::HashMap>, } #[cfg(test)] @@ -525,7 +525,7 @@ mod tests { #[derive(Debug, Deserialize)] struct ComponentResult { input: String, - groups: HashMap, + groups: HashMap>, } #[derive(Deserialize)] @@ -787,7 +787,7 @@ mod tests { if !exactly_empty_components .contains(&stringify!($component).to_owned()) { - groups.insert("0".to_owned(), "".to_owned()); + groups.insert("0".to_owned(), Some("".to_owned())); } UrlPatternComponentResult { input: "".to_owned(), diff --git a/src/matcher.rs b/src/matcher.rs index ff2ee24..8645ced 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -49,7 +49,10 @@ impl Matcher { } } - pub fn matches<'a>(&self, mut input: &'a str) -> Option> { + pub fn matches<'a>( + &self, + mut input: &'a str, + ) -> Option>> { let prefix_len = self.prefix.len(); let suffix_len = self.suffix.len(); let input_len = input.len(); @@ -82,7 +85,7 @@ impl Matcher { return None; } } - Some(vec![input]) + Some(vec![Some(input)]) } InnerMatcher::RegExp { regexp, .. } => { regexp.as_ref().unwrap().matches(input) diff --git a/src/quirks.rs b/src/quirks.rs index df2405b..fd92e1b 100644 --- a/src/quirks.rs +++ b/src/quirks.rs @@ -176,7 +176,7 @@ impl RegExp for EcmaRegexp { Ok(EcmaRegexp(pattern.to_string())) } - fn matches<'a>(&self, text: &'a str) -> Option> { + fn matches<'a>(&self, text: &'a str) -> Option>> { let regexp = regex::Regex::parse(&self.0).ok()?; regexp.matches(text) } diff --git a/src/regexp.rs b/src/regexp.rs index c9653f6..38aefc7 100644 --- a/src/regexp.rs +++ b/src/regexp.rs @@ -11,10 +11,10 @@ pub trait RegExp: Sized { /// of captures. The matches are returned in the order they appear in the /// regular expression. It is **not** prefixed with the full match. For groups /// that occur in the regular expression, but did not match, the corresponding - /// capture should be the empty string (""). + /// capture should be `None`. /// /// Returns `None` if the text does not match the regular expression. - fn matches<'a>(&self, text: &'a str) -> Option>; + fn matches<'a>(&self, text: &'a str) -> Option>>; } impl RegExp for regex::Regex { @@ -26,13 +26,13 @@ impl RegExp for regex::Regex { regex::Regex::new(pattern).map_err(|_| ()) } - fn matches<'a>(&self, text: &'a str) -> Option> { + fn matches<'a>(&self, text: &'a str) -> Option>> { let captures = self.captures(text)?; let captures = captures .iter() .skip(1) - .map(|c| c.map(|m| m.as_str()).unwrap_or("")) + .map(|c| c.map(|m| m.as_str())) .collect(); Some(captures) diff --git a/src/testdata/urlpatterntestdata.json b/src/testdata/urlpatterntestdata.json index 99044a8..e8e2209 100644 --- a/src/testdata/urlpatterntestdata.json +++ b/src/testdata/urlpatterntestdata.json @@ -354,8 +354,9 @@ { "pattern": [{ "pathname": "/foo/:bar?" }], "inputs": [{ "pathname": "/foo" }], + "//": "The `null` below is translated to undefined in the test harness.", "expected_match": { - "pathname": { "input": "/foo", "groups": { "bar": "" } } + "pathname": { "input": "/foo", "groups": { "bar": null } } } }, { @@ -419,8 +420,9 @@ { "pattern": [{ "pathname": "/foo/:bar*" }], "inputs": [{ "pathname": "/foo" }], + "//": "The `null` below is translated to undefined in the test harness.", "expected_match": { - "pathname": { "input": "/foo", "groups": { "bar": "" } } + "pathname": { "input": "/foo", "groups": { "bar": null } } } }, { @@ -473,15 +475,17 @@ "expected_obj": { "pathname": "/foo/*?" }, + "//": "The `null` below is translated to undefined in the test harness.", "expected_match": { - "pathname": { "input": "/foo", "groups": { "0": "" } } + "pathname": { "input": "/foo", "groups": { "0": null } } } }, { "pattern": [{ "pathname": "/foo/*?" }], "inputs": [{ "pathname": "/foo" }], + "//": "The `null` below is translated to undefined in the test harness.", "expected_match": { - "pathname": { "input": "/foo", "groups": { "0": "" } } + "pathname": { "input": "/foo", "groups": { "0": null } } } }, { @@ -657,15 +661,17 @@ "expected_obj": { "pathname": "/foo/**" }, + "//": "The `null` below is translated to undefined in the test harness.", "expected_match": { - "pathname": { "input": "/foo", "groups": { "0": "" } } + "pathname": { "input": "/foo", "groups": { "0": null } } } }, { "pattern": [{ "pathname": "/foo/**" }], "inputs": [{ "pathname": "/foo" }], + "//": "The `null` below is translated to undefined in the test harness.", "expected_match": { - "pathname": { "input": "/foo", "groups": { "0": "" } } + "pathname": { "input": "/foo", "groups": { "0": null } } } }, { @@ -1812,9 +1818,10 @@ "hostname": "(sub.)?example.com", "pathname": "/foo" }, + "//": "The `null` below is translated to undefined in the test harness.", "expected_match": { "protocol": { "input": "https", "groups": {} }, - "hostname": { "input": "example.com", "groups": { "0": "" } }, + "hostname": { "input": "example.com", "groups": { "0": null } }, "pathname": { "input": "/foo", "groups": {} } } }, @@ -1850,9 +1857,10 @@ "hostname": "(sub(?:.))?example.com", "pathname": "/foo" }, + "//": "The `null` below is translated to undefined in the test harness.", "expected_match": { "protocol": { "input": "https", "groups": {} }, - "hostname": { "input": "example.com", "groups": { "0": "" } }, + "hostname": { "input": "example.com", "groups": { "0": null } }, "pathname": { "input": "/foo", "groups": {} } } }, @@ -2299,9 +2307,10 @@ "protocol": "data", "pathname": "text/javascript,let x = 100/:tens?5;" }, + "//": "The `null` below is translated to undefined in the test harness.", "expected_match": { "protocol": { "input": "data", "groups": {} }, - "pathname": { "input": "text/javascript,let x = 100/5;", "groups": { "tens": "" } } + "pathname": { "input": "text/javascript,let x = 100/5;", "groups": { "tens": null } } } }, { @@ -2623,8 +2632,9 @@ "expected_obj": { "pathname": "*(.*)?" }, + "//": "The `null` below is translated to undefined in the test harness.", "expected_match": { - "pathname": { "input": "foobar", "groups": { "0": "foobar", "1": "" }} + "pathname": { "input": "foobar", "groups": { "0": "foobar", "1": null }} } }, { From 5ccc94931524c4db176485658e0ef48a744c87d4 Mon Sep 17 00:00:00 2001 From: crowlkats Date: Fri, 26 Jul 2024 00:28:10 +0200 Subject: [PATCH 2/4] update crates and tests --- Cargo.toml | 4 ++-- src/lib.rs | 2 +- src/testdata/urlpatterntestdata.json | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 49b62b0..348d554 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,8 +9,8 @@ license = "MIT" [dependencies] derive_more = "0.99.16" -url = "2.2.2" -regex = "1.4.3" +url = "2.5.2" +regex = "1.10.5" serde = { version = "1.0.127", features = ["derive"] } unic-ucd-ident = { version = "0.9.0", features = ["id"] } diff --git a/src/lib.rs b/src/lib.rs index 64627d5..c123e97 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -202,7 +202,7 @@ fn is_absolute_pathname( /// // Match the pattern against a URL. /// let url = "https://example.com/users/123".parse().unwrap(); /// let result = pattern.exec(UrlPatternMatchInput::Url(url)).unwrap().unwrap(); -/// assert_eq!(result.pathname.groups.get("id").unwrap(), "123"); +/// assert_eq!(result.pathname.groups.get("id").unwrap().as_ref().unwrap(), "123"); ///# } /// ``` #[derive(Debug)] diff --git a/src/testdata/urlpatterntestdata.json b/src/testdata/urlpatterntestdata.json index e8e2209..a5fd15b 100644 --- a/src/testdata/urlpatterntestdata.json +++ b/src/testdata/urlpatterntestdata.json @@ -2425,7 +2425,6 @@ "expected_obj": "error" }, { - "skip": "bug in rust-url: https://github.com/servo/rust-url/pull/718", "pattern": [{ "hostname": "bad|hostname" }], "expected_obj": "error" }, @@ -2627,6 +2626,7 @@ } }, { + "skip": "only works in ecmascript variety of regex", "pattern": [{ "pathname": "*{}**?" }], "inputs": [{ "pathname": "foobar" }], "expected_obj": { From 34326dbcccd74baa28db30bdd87d0a4959361baf Mon Sep 17 00:00:00 2001 From: crowlkats Date: Fri, 26 Jul 2024 11:46:26 +0200 Subject: [PATCH 3/4] downgrade url crate --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 72e0754..c902d5f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,7 @@ repository = "https://github.com/denoland/rust-urlpattern" license = "MIT" [dependencies] -url = "2.5.2" +url = "2.4.1" regex = "1.10.5" serde = { version = "1.0.127", features = ["derive"] } unic-ucd-ident = { version = "0.9.0", features = ["id"] } From 02515cc673a1b9395dcfebdbc3f5058222f618b4 Mon Sep 17 00:00:00 2001 From: Luca Casonato Date: Fri, 26 Jul 2024 11:57:40 +0200 Subject: [PATCH 4/4] clippy --- src/parser.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.rs b/src/parser.rs index 699d3ab..7c328c0 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -386,7 +386,7 @@ where if name_token.is_some() || regexp_or_wildcard_token.is_some() { let mut prefix = String::new(); if let Some(char_token) = char_token { - prefix = char_token.value.to_owned(); + char_token.value.clone_into(&mut prefix); } if !prefix.is_empty() && prefix != options.prefix_code_point { parser.pending_fixed_value.push_str(&prefix);