Skip to content

Commit

Permalink
Auto merge of #90 - servo:url-functions, r=mbrubeck
Browse files Browse the repository at this point in the history
Treat url(<string>) as a normal functions, per spec change.

Only unquoted URLs are special tokens now. Use `Parser::expect_url`.

This is a [breaking-change]. The version number was incremented accordingly.

This change will help with servo/servo#7767

This triggers rust-lang/rust#28934 and fails to build in the current Rust nightly, but works fine in the Rust version that Servo currently use. Hopefully that rustc bug will be fixed before we need to upgrade Rust in Servo.

r? @mbrubeck

<!-- Reviewable:start -->
[<img src="https://reviewable.io/review_button.png" height=40 alt="Review on Reviewable"/>](https://reviewable.io/reviews/servo/rust-cssparser/90)
<!-- Reviewable:end -->
  • Loading branch information
bors-servo committed Oct 9, 2015
2 parents 920c23a + 3e2e0b5 commit ec47650
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 64 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]

name = "cssparser"
version = "0.3.9"
version = "0.4.0"
authors = [ "Simon Sapin <simon.sapin@exyr.org>" ]

description = "Rust implementation of CSS Syntax Level 3"
Expand Down
20 changes: 13 additions & 7 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ impl<'i, 't> Parser<'i, 't> {
/// This can help tell e.g. `color: green;` from `color: green 4px;`
#[inline]
pub fn parse_entirely<F, T>(&mut self, parse: F) -> Result<T, ()>
where F: FnOnce(&mut Parser) -> Result<T, ()> {
where F: FnOnce(&mut Parser<'i, 't>) -> Result<T, ()> {
let result = parse(self);
try!(self.expect_exhausted());
result
Expand Down Expand Up @@ -374,7 +374,7 @@ impl<'i, 't> Parser<'i, 't> {
/// The result is overridden to `Err(())` if the closure leaves some input before that point.
#[inline]
pub fn parse_nested_block<F, T>(&mut self, parse: F) -> Result <T, ()>
where F: FnOnce(&mut Parser) -> Result<T, ()> {
where F: for<'tt> FnOnce(&mut Parser<'i, 'tt>) -> Result<T, ()> {
let block_type = self.at_start_of.take().expect("\
A nested parser can only be created when a Function, \
ParenthesisBlock, SquareBracketBlock, or CurlyBracketBlock \
Expand Down Expand Up @@ -412,7 +412,7 @@ impl<'i, 't> Parser<'i, 't> {
#[inline]
pub fn parse_until_before<F, T>(&mut self, delimiters: Delimiters, parse: F)
-> Result <T, ()>
where F: FnOnce(&mut Parser) -> Result<T, ()> {
where F: for<'tt> FnOnce(&mut Parser<'i, 'tt>) -> Result<T, ()> {
let delimiters = self.stop_before | delimiters;
let result;
// Introduce a new scope to limit duration of nested_parser’s borrow
Expand Down Expand Up @@ -451,7 +451,7 @@ impl<'i, 't> Parser<'i, 't> {
#[inline]
pub fn parse_until_after<F, T>(&mut self, delimiters: Delimiters, parse: F)
-> Result <T, ()>
where F: FnOnce(&mut Parser) -> Result<T, ()> {
where F: for<'tt> FnOnce(&mut Parser<'i, 'tt>) -> Result<T, ()> {
let result = self.parse_until_before(delimiters, parse);
let next_byte = self.tokenizer.next_byte();
if next_byte.is_some() && !self.stop_before.contains(Delimiters::from_byte(next_byte)) {
Expand Down Expand Up @@ -481,7 +481,7 @@ impl<'i, 't> Parser<'i, 't> {

/// Parse a <ident-token> whose unescaped value is an ASCII-insensitive match for the given value.
#[inline]
pub fn expect_ident_matching<'a>(&mut self, expected_value: &str) -> Result<(), ()> {
pub fn expect_ident_matching(&mut self, expected_value: &str) -> Result<(), ()> {
match try!(self.next()) {
Token::Ident(ref value) if value.eq_ignore_ascii_case(expected_value) => Ok(()),
_ => Err(())
Expand Down Expand Up @@ -511,7 +511,10 @@ impl<'i, 't> Parser<'i, 't> {
#[inline]
pub fn expect_url(&mut self) -> Result<Cow<'i, str>, ()> {
match try!(self.next()) {
Token::Url(value) => Ok(value),
Token::UnquotedUrl(value) => Ok(value),
Token::Function(ref name) if name.eq_ignore_ascii_case("url") => {
self.parse_nested_block(|input| input.expect_string())
},
_ => Err(())
}
}
Expand All @@ -520,8 +523,11 @@ impl<'i, 't> Parser<'i, 't> {
#[inline]
pub fn expect_url_or_string(&mut self) -> Result<Cow<'i, str>, ()> {
match try!(self.next()) {
Token::Url(value) => Ok(value),
Token::UnquotedUrl(value) => Ok(value),
Token::QuotedString(value) => Ok(value),
Token::Function(ref name) if name.eq_ignore_ascii_case("url") => {
self.parse_nested_block(|input| input.expect_string())
},
_ => Err(())
}
}
Expand Down
26 changes: 23 additions & 3 deletions src/serializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,9 @@ impl<'a> ToCss for Token<'a> {
try!(serialize_identifier(&**value, dest));
}
Token::QuotedString(ref value) => try!(serialize_string(&**value, dest)),
Token::Url(ref value) => {
Token::UnquotedUrl(ref value) => {
try!(dest.write_str("url("));
try!(serialize_string(&**value, dest));
try!(serialize_unquoted_url(&**value, dest));
try!(dest.write_str(")"));
},
Token::Delim(value) => try!(write!(dest, "{}", value)),
Expand Down Expand Up @@ -213,6 +213,26 @@ fn serialize_name<W>(value: &str, dest: &mut W) -> fmt::Result where W:fmt::Writ
}


fn serialize_unquoted_url<W>(value: &str, dest: &mut W) -> fmt::Result where W:fmt::Write {
let mut chunk_start = 0;
for (i, b) in value.bytes().enumerate() {
let hex = match b {
b'\0' ... b' ' | b'\x7F' => true,
b'(' | b')' | b'"' | b'\'' | b'\\' => false,
_ => continue
};
try!(dest.write_str(&value[chunk_start..i]));
if hex {
try!(write!(dest, "\\{:X} ", b));
} else {
try!(write!(dest, "\\{}", b as char));
}
chunk_start = i + 1;
}
dest.write_str(&value[chunk_start..])
}


/// Write a double-quoted CSS string token, escaping content as necessary.
pub fn serialize_string<W>(value: &str, dest: &mut W) -> fmt::Result where W: fmt::Write {
try!(dest.write_str("\""));
Expand Down Expand Up @@ -382,7 +402,7 @@ impl<'a> Token<'a> {
TokenSerializationType(match *self {
Token::Ident(_) => Ident,
Token::AtKeyword(_) | Token::Hash(_) | Token::IDHash(_) => AtKeywordOrHash,
Token::Url(_) | Token::BadUrl => UrlOrBadUrl,
Token::UnquotedUrl(_) | Token::BadUrl => UrlOrBadUrl,
Token::Delim('#') => DelimHash,
Token::Delim('@') => DelimAt,
Token::Delim('.') | Token::Delim('+') => DelimDotOrPlus,
Expand Down
71 changes: 40 additions & 31 deletions src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,11 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

use std::borrow::Cow::Borrowed;
use std::borrow::Cow::{self, Borrowed};
use std::fs::File;
use std::io::{self, Write};
use std::path::Path;
use std::process::Command;
use std::mem;
use rustc_serialize::json::{self, Json, ToJson};
use tempdir::TempDir;

Expand Down Expand Up @@ -74,14 +73,8 @@ fn almost_equals(a: &Json, b: &Json) -> bool {
fn normalize(json: &mut Json) {
match *json {
Json::Array(ref mut list) => {
match find_url(list) {
Some(Ok(url)) => *list = vec!["url".to_json(), Json::String(url)],
Some(Err(())) => *list = vec!["error".to_json(), "bad-url".to_json()],
None => {
for item in list.iter_mut() {
normalize(item)
}
}
for item in list.iter_mut() {
normalize(item)
}
}
Json::String(ref mut s) => {
Expand All @@ -93,26 +86,6 @@ fn normalize(json: &mut Json) {
}
}

fn find_url(list: &mut [Json]) -> Option<Result<String, ()>> {
if list.len() < 2 ||
list[0].as_string() != Some("function") ||
list[1].as_string() != Some("url") {
return None
}

let mut args = list[2..].iter_mut().filter(|a| a.as_string() != Some(" "));
if let (Some(&mut Json::Array(ref mut arg)), None) = (args.next(), args.next()) {
if arg.len() == 2 && arg[0].as_string() == Some("string") {
if let &mut Json::String(ref mut value) = &mut arg[1] {
return Some(Ok(mem::replace(value, String::new())))
}
}
}

Some(Err(()))
}


fn assert_json_eq(results: json::Json, mut expected: json::Json, message: String) {
normalize(&mut expected);
if !almost_equals(&results, &expected) {
Expand Down Expand Up @@ -281,6 +254,42 @@ fn outer_block_end_consumed() {
assert_eq!(input.next(), Err(()));
}

#[test]
fn unquoted_url_escaping() {
let token = Token::UnquotedUrl("\
\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\
\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f \
!\"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]\
^_`abcdefghijklmnopqrstuvwxyz{|}~\x7fé\
".into());
let serialized = token.to_css_string();
assert_eq!(serialized, "\
url(\
\\1 \\2 \\3 \\4 \\5 \\6 \\7 \\8 \\9 \\A \\B \\C \\D \\E \\F \\10 \
\\11 \\12 \\13 \\14 \\15 \\16 \\17 \\18 \\19 \\1A \\1B \\1C \\1D \\1E \\1F \\20 \
!\\\"#$%&\\'\\(\\)*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]\
^_`abcdefghijklmnopqrstuvwxyz{|}~\\7F é\
)\
");
assert_eq!(Parser::new(&serialized).next(), Ok(token))
}

#[test]
fn test_expect_url() {
fn parse(s: &str) -> Result<Cow<str>, ()> {
Parser::new(s).expect_url()
}
assert_eq!(parse("url()").unwrap(), "");
assert_eq!(parse("url( ").unwrap(), "");
assert_eq!(parse("url( abc").unwrap(), "abc");
assert_eq!(parse("url( abc \t)").unwrap(), "abc");
assert_eq!(parse("url( 'abc' \t)").unwrap(), "abc");
assert_eq!(parse("url(abc more stuff)"), Err(()));
// The grammar at https://drafts.csswg.org/css-values/#urls plans for `<url-modifier>*`
// at the position of "more stuff", but no such modifier is defined yet.
assert_eq!(parse("url('abc' more stuff)"), Err(()));
}


fn run_color_tests<F: Fn(Result<Color, ()>) -> Json>(json_data: &str, to_json: F) {
run_json_tests(json_data, |input| {
Expand Down Expand Up @@ -606,7 +615,7 @@ fn one_component_value_to_json(token: Token, input: &mut Parser) -> Json {
Token::Hash(value) => JArray!["hash", value, "unrestricted"],
Token::IDHash(value) => JArray!["hash", value, "id"],
Token::QuotedString(value) => JArray!["string", value],
Token::Url(value) => JArray!["url", value],
Token::UnquotedUrl(value) => JArray!["url", value],
Token::Delim('\\') => "\\".to_json(),
Token::Delim(value) => value.to_string().to_json(),

Expand Down
43 changes: 21 additions & 22 deletions src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ pub enum Token<'a> {
/// A [`<url-token>`](https://drafts.csswg.org/css-syntax/#url-token-diagram) or `url( <string-token> )` function
///
/// The value does not include the `url(` `)` markers or the quotes.
Url(Cow<'a, str>),
UnquotedUrl(Cow<'a, str>),

/// A `<delim-token>`
Delim(char),
Expand Down Expand Up @@ -628,7 +628,7 @@ fn consume_ident_like<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
if !tokenizer.is_eof() && tokenizer.next_char() == '(' {
tokenizer.advance(1);
if value.eq_ignore_ascii_case("url") {
consume_url(tokenizer)
consume_unquoted_url(tokenizer).unwrap_or(Function(value))
} else {
if tokenizer.var_functions == VarFunctions::LookingForThem &&
value.eq_ignore_ascii_case("var") {
Expand Down Expand Up @@ -791,31 +791,30 @@ fn consume_numeric<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
}


fn consume_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
while !tokenizer.is_eof() {
match tokenizer.next_char() {
' ' | '\t' | '\n' | '\r' | '\x0C' => tokenizer.advance(1),
'"' => return consume_quoted_url(tokenizer, false),
'\'' => return consume_quoted_url(tokenizer, true),
')' => { tokenizer.advance(1); break },
_ => return consume_unquoted_url(tokenizer),
}
}
return Url(Borrowed(""));

fn consume_quoted_url<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool) -> Token<'a> {
match consume_quoted_string(tokenizer, single_quote) {
Ok(value) => consume_url_end(tokenizer, value),
Err(()) => consume_bad_url(tokenizer),
fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>, ()> {
for (offset, c) in tokenizer.input[tokenizer.position..].char_indices() {
match c {
' ' | '\t' | '\n' | '\r' | '\x0C' => {},
'"' | '\'' => return Err(()), // Do not advance
')' => {
tokenizer.advance(offset + 1);
return Ok(UnquotedUrl(Borrowed("")));
}
_ => {
tokenizer.advance(offset);
return Ok(consume_unquoted_url(tokenizer))
}
}
}
tokenizer.position = tokenizer.input.len();
return Ok(UnquotedUrl(Borrowed("")));

fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
let start_pos = tokenizer.position();
let mut string;
loop {
if tokenizer.is_eof() {
return Url(Borrowed(tokenizer.slice_from(start_pos)))
return UnquotedUrl(Borrowed(tokenizer.slice_from(start_pos)))
}
match tokenizer.next_char() {
' ' | '\t' | '\n' | '\r' | '\x0C' => {
Expand All @@ -826,7 +825,7 @@ fn consume_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
')' => {
let value = tokenizer.slice_from(start_pos);
tokenizer.advance(1);
return Url(Borrowed(value))
return UnquotedUrl(Borrowed(value))
}
'\x01'...'\x08' | '\x0B' | '\x0E'...'\x1F' | '\x7F' // non-printable
| '"' | '\'' | '(' => {
Expand Down Expand Up @@ -861,7 +860,7 @@ fn consume_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
};
string.push(next_char)
}
Url(Owned(string))
UnquotedUrl(Owned(string))
}

fn consume_url_end<'a>(tokenizer: &mut Tokenizer<'a>, string: Cow<'a, str>) -> Token<'a> {
Expand All @@ -872,7 +871,7 @@ fn consume_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
_ => return consume_bad_url(tokenizer)
}
}
Url(string)
UnquotedUrl(string)
}

fn consume_bad_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
Expand Down

0 comments on commit ec47650

Please sign in to comment.