Skip to content

Commit

Permalink
Treat url(<string>) as a normal functions, per spec change.
Browse files Browse the repository at this point in the history
Only unquoted URLs are special tokens now. Use `Parser::expect_url`.

This is a [breaking-change]. The version number was incremented accordingly.

This change will help with servo/servo#7767
  • Loading branch information
SimonSapin committed Oct 9, 2015
1 parent 920c23a commit 9201f47
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 64 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]

name = "cssparser"
version = "0.3.9"
version = "0.4.0"
authors = [ "Simon Sapin <simon.sapin@exyr.org>" ]

description = "Rust implementation of CSS Syntax Level 3"
Expand Down
20 changes: 13 additions & 7 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ impl<'i, 't> Parser<'i, 't> {
/// This can help tell e.g. `color: green;` from `color: green 4px;`
#[inline]
pub fn parse_entirely<F, T>(&mut self, parse: F) -> Result<T, ()>
where F: FnOnce(&mut Parser) -> Result<T, ()> {
where F: FnOnce(&mut Parser<'i, 't>) -> Result<T, ()> {
let result = parse(self);
try!(self.expect_exhausted());
result
Expand Down Expand Up @@ -374,7 +374,7 @@ impl<'i, 't> Parser<'i, 't> {
/// The result is overridden to `Err(())` if the closure leaves some input before that point.
#[inline]
pub fn parse_nested_block<F, T>(&mut self, parse: F) -> Result <T, ()>
where F: FnOnce(&mut Parser) -> Result<T, ()> {
where F: for<'tt> FnOnce(&mut Parser<'i, 'tt>) -> Result<T, ()> {
let block_type = self.at_start_of.take().expect("\
A nested parser can only be created when a Function, \
ParenthesisBlock, SquareBracketBlock, or CurlyBracketBlock \
Expand Down Expand Up @@ -412,7 +412,7 @@ impl<'i, 't> Parser<'i, 't> {
#[inline]
pub fn parse_until_before<F, T>(&mut self, delimiters: Delimiters, parse: F)
-> Result <T, ()>
where F: FnOnce(&mut Parser) -> Result<T, ()> {
where F: for<'tt> FnOnce(&mut Parser<'i, 'tt>) -> Result<T, ()> {
let delimiters = self.stop_before | delimiters;
let result;
// Introduce a new scope to limit duration of nested_parser’s borrow
Expand Down Expand Up @@ -451,7 +451,7 @@ impl<'i, 't> Parser<'i, 't> {
#[inline]
pub fn parse_until_after<F, T>(&mut self, delimiters: Delimiters, parse: F)
-> Result <T, ()>
where F: FnOnce(&mut Parser) -> Result<T, ()> {
where F: for<'tt> FnOnce(&mut Parser<'i, 'tt>) -> Result<T, ()> {
let result = self.parse_until_before(delimiters, parse);
let next_byte = self.tokenizer.next_byte();
if next_byte.is_some() && !self.stop_before.contains(Delimiters::from_byte(next_byte)) {
Expand Down Expand Up @@ -481,7 +481,7 @@ impl<'i, 't> Parser<'i, 't> {

/// Parse a <ident-token> whose unescaped value is an ASCII-insensitive match for the given value.
#[inline]
pub fn expect_ident_matching<'a>(&mut self, expected_value: &str) -> Result<(), ()> {
pub fn expect_ident_matching(&mut self, expected_value: &str) -> Result<(), ()> {
match try!(self.next()) {
Token::Ident(ref value) if value.eq_ignore_ascii_case(expected_value) => Ok(()),
_ => Err(())
Expand Down Expand Up @@ -511,7 +511,10 @@ impl<'i, 't> Parser<'i, 't> {
#[inline]
pub fn expect_url(&mut self) -> Result<Cow<'i, str>, ()> {
match try!(self.next()) {
Token::Url(value) => Ok(value),
Token::UnquotedUrl(value) => Ok(value),
Token::Function(ref name) if name.eq_ignore_ascii_case("url") => {
self.parse_nested_block(|input| input.expect_string())
},
_ => Err(())
}
}
Expand All @@ -520,8 +523,11 @@ impl<'i, 't> Parser<'i, 't> {
#[inline]
pub fn expect_url_or_string(&mut self) -> Result<Cow<'i, str>, ()> {
match try!(self.next()) {
Token::Url(value) => Ok(value),
Token::UnquotedUrl(value) => Ok(value),
Token::QuotedString(value) => Ok(value),
Token::Function(ref name) if name.eq_ignore_ascii_case("url") => {
self.parse_nested_block(|input| input.expect_string())
},
_ => Err(())
}
}
Expand Down
26 changes: 23 additions & 3 deletions src/serializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,9 @@ impl<'a> ToCss for Token<'a> {
try!(serialize_identifier(&**value, dest));
}
Token::QuotedString(ref value) => try!(serialize_string(&**value, dest)),
Token::Url(ref value) => {
Token::UnquotedUrl(ref value) => {
try!(dest.write_str("url("));
try!(serialize_string(&**value, dest));
try!(serialize_unquoted_url(&**value, dest));
try!(dest.write_str(")"));
},
Token::Delim(value) => try!(write!(dest, "{}", value)),
Expand Down Expand Up @@ -213,6 +213,26 @@ fn serialize_name<W>(value: &str, dest: &mut W) -> fmt::Result where W:fmt::Writ
}


fn serialize_unquoted_url<W>(value: &str, dest: &mut W) -> fmt::Result where W:fmt::Write {
let mut chunk_start = 0;
for (i, b) in value.bytes().enumerate() {
let hex = match b {
b'\0' ... b' ' | b'\x7F' => true,
b'(' | b')' | b'"' | b'\'' | b'\\' => false,
_ => continue
};
try!(dest.write_str(&value[chunk_start..i]));
if hex {
try!(write!(dest, "\\{:X} ", b));
} else {
try!(write!(dest, "\\{}", b as char));
}
chunk_start = i + 1;
}
dest.write_str(&value[chunk_start..])
}


/// Write a double-quoted CSS string token, escaping content as necessary.
pub fn serialize_string<W>(value: &str, dest: &mut W) -> fmt::Result where W: fmt::Write {
try!(dest.write_str("\""));
Expand Down Expand Up @@ -382,7 +402,7 @@ impl<'a> Token<'a> {
TokenSerializationType(match *self {
Token::Ident(_) => Ident,
Token::AtKeyword(_) | Token::Hash(_) | Token::IDHash(_) => AtKeywordOrHash,
Token::Url(_) | Token::BadUrl => UrlOrBadUrl,
Token::UnquotedUrl(_) | Token::BadUrl => UrlOrBadUrl,
Token::Delim('#') => DelimHash,
Token::Delim('@') => DelimAt,
Token::Delim('.') | Token::Delim('+') => DelimDotOrPlus,
Expand Down
63 changes: 32 additions & 31 deletions src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,11 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

use std::borrow::Cow::Borrowed;
use std::borrow::Cow::{self, Borrowed};
use std::fs::File;
use std::io::{self, Write};
use std::path::Path;
use std::process::Command;
use std::mem;
use rustc_serialize::json::{self, Json, ToJson};
use tempdir::TempDir;

Expand Down Expand Up @@ -74,14 +73,8 @@ fn almost_equals(a: &Json, b: &Json) -> bool {
fn normalize(json: &mut Json) {
match *json {
Json::Array(ref mut list) => {
match find_url(list) {
Some(Ok(url)) => *list = vec!["url".to_json(), Json::String(url)],
Some(Err(())) => *list = vec!["error".to_json(), "bad-url".to_json()],
None => {
for item in list.iter_mut() {
normalize(item)
}
}
for item in list.iter_mut() {
normalize(item)
}
}
Json::String(ref mut s) => {
Expand All @@ -93,26 +86,6 @@ fn normalize(json: &mut Json) {
}
}

fn find_url(list: &mut [Json]) -> Option<Result<String, ()>> {
if list.len() < 2 ||
list[0].as_string() != Some("function") ||
list[1].as_string() != Some("url") {
return None
}

let mut args = list[2..].iter_mut().filter(|a| a.as_string() != Some(" "));
if let (Some(&mut Json::Array(ref mut arg)), None) = (args.next(), args.next()) {
if arg.len() == 2 && arg[0].as_string() == Some("string") {
if let &mut Json::String(ref mut value) = &mut arg[1] {
return Some(Ok(mem::replace(value, String::new())))
}
}
}

Some(Err(()))
}


fn assert_json_eq(results: json::Json, mut expected: json::Json, message: String) {
normalize(&mut expected);
if !almost_equals(&results, &expected) {
Expand Down Expand Up @@ -281,6 +254,34 @@ fn outer_block_end_consumed() {
assert_eq!(input.next(), Err(()));
}

#[test]
fn unquoted_url_escaping() {
let token = Token::UnquotedUrl("\
\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\
\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f \
!\"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]\
^_`abcdefghijklmnopqrstuvwxyz{|}~\x7fé\
".into());
let serialized = token.to_css_string();
assert_eq!(serialized, "\
url(\
\\1 \\2 \\3 \\4 \\5 \\6 \\7 \\8 \\9 \\A \\B \\C \\D \\E \\F \\10 \
\\11 \\12 \\13 \\14 \\15 \\16 \\17 \\18 \\19 \\1A \\1B \\1C \\1D \\1E \\1F \\20 \
!\\\"#$%&\\'\\(\\)*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]\
^_`abcdefghijklmnopqrstuvwxyz{|}~\\7F é\
)\
");
assert_eq!(Parser::new(&serialized).next(), Ok(token))
}

#[test]
fn test_expect_url() {
fn parse(s: &str) -> Result<Cow<str>, ()> {
Parser::new(s).expect_url()
}
assert_eq!(parse("url()").unwrap(), "");
}


fn run_color_tests<F: Fn(Result<Color, ()>) -> Json>(json_data: &str, to_json: F) {
run_json_tests(json_data, |input| {
Expand Down Expand Up @@ -606,7 +607,7 @@ fn one_component_value_to_json(token: Token, input: &mut Parser) -> Json {
Token::Hash(value) => JArray!["hash", value, "unrestricted"],
Token::IDHash(value) => JArray!["hash", value, "id"],
Token::QuotedString(value) => JArray!["string", value],
Token::Url(value) => JArray!["url", value],
Token::UnquotedUrl(value) => JArray!["url", value],
Token::Delim('\\') => "\\".to_json(),
Token::Delim(value) => value.to_string().to_json(),

Expand Down
43 changes: 21 additions & 22 deletions src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ pub enum Token<'a> {
/// A [`<url-token>`](https://drafts.csswg.org/css-syntax/#url-token-diagram) or `url( <string-token> )` function
///
/// The value does not include the `url(` `)` markers or the quotes.
Url(Cow<'a, str>),
UnquotedUrl(Cow<'a, str>),

/// A `<delim-token>`
Delim(char),
Expand Down Expand Up @@ -628,7 +628,7 @@ fn consume_ident_like<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
if !tokenizer.is_eof() && tokenizer.next_char() == '(' {
tokenizer.advance(1);
if value.eq_ignore_ascii_case("url") {
consume_url(tokenizer)
consume_unquoted_url(tokenizer).unwrap_or(Function(value))
} else {
if tokenizer.var_functions == VarFunctions::LookingForThem &&
value.eq_ignore_ascii_case("var") {
Expand Down Expand Up @@ -791,31 +791,30 @@ fn consume_numeric<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
}


fn consume_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
while !tokenizer.is_eof() {
match tokenizer.next_char() {
' ' | '\t' | '\n' | '\r' | '\x0C' => tokenizer.advance(1),
'"' => return consume_quoted_url(tokenizer, false),
'\'' => return consume_quoted_url(tokenizer, true),
')' => { tokenizer.advance(1); break },
_ => return consume_unquoted_url(tokenizer),
}
}
return Url(Borrowed(""));

fn consume_quoted_url<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool) -> Token<'a> {
match consume_quoted_string(tokenizer, single_quote) {
Ok(value) => consume_url_end(tokenizer, value),
Err(()) => consume_bad_url(tokenizer),
fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>, ()> {
for (offset, c) in tokenizer.input[tokenizer.position..].char_indices() {
match c {
' ' | '\t' | '\n' | '\r' | '\x0C' => {},
'"' | '\'' => return Err(()), // Do not advance
')' => {
tokenizer.advance(offset + 1);
return Ok(UnquotedUrl(Borrowed("")));
}
_ => {
tokenizer.advance(offset);
return Ok(consume_unquoted_url(tokenizer))
}
}
}
tokenizer.position = tokenizer.input.len();
return Ok(UnquotedUrl(Borrowed("")));

fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
let start_pos = tokenizer.position();
let mut string;
loop {
if tokenizer.is_eof() {
return Url(Borrowed(tokenizer.slice_from(start_pos)))
return UnquotedUrl(Borrowed(tokenizer.slice_from(start_pos)))
}
match tokenizer.next_char() {
' ' | '\t' | '\n' | '\r' | '\x0C' => {
Expand All @@ -826,7 +825,7 @@ fn consume_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
')' => {
let value = tokenizer.slice_from(start_pos);
tokenizer.advance(1);
return Url(Borrowed(value))
return UnquotedUrl(Borrowed(value))
}
'\x01'...'\x08' | '\x0B' | '\x0E'...'\x1F' | '\x7F' // non-printable
| '"' | '\'' | '(' => {
Expand Down Expand Up @@ -861,7 +860,7 @@ fn consume_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
};
string.push(next_char)
}
Url(Owned(string))
UnquotedUrl(Owned(string))
}

fn consume_url_end<'a>(tokenizer: &mut Tokenizer<'a>, string: Cow<'a, str>) -> Token<'a> {
Expand All @@ -872,7 +871,7 @@ fn consume_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
_ => return consume_bad_url(tokenizer)
}
}
Url(string)
UnquotedUrl(string)
}

fn consume_bad_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
Expand Down

0 comments on commit 9201f47

Please sign in to comment.