Skip to content

Commit

Permalink
LibURL+LibWeb: Make URL::basic_parse return an Optional<URL>
Browse files Browse the repository at this point in the history
URL::basic_parse has a subtle bug where the resulting URL is not set
to valid when StateOveride is provided and the URL parser early returns
a valid URL.

This has not surfaced as a problem so far, as the only users of the
state override API provide an already valid URL buffer and also ignore
the result of basic parsing with a state override.

However, this bug surfaces implementing the URL pattern spec, which as
part of URL canonicalization:
 * Provides a dummy URL record
 * Basic URL parses that URL with state override
 * Checks the result of the URL parser to validate the URL

While we could set URL validity on every early return of the URL parser
during state override, it has been a long standing FIXME around the code
to try and remove the awkward validity state of the URL class. So this
commit makes the first stage of this change by migrating the basic
parser API to return Optional, which also happens to make this subtle
issue not a problem any more.
  • Loading branch information
shannonbooth authored and trflynn89 committed Jan 11, 2025
1 parent b6ec055 commit 5bed8f4
Show file tree
Hide file tree
Showing 9 changed files with 56 additions and 56 deletions.
2 changes: 1 addition & 1 deletion Libraries/LibURL/Parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -708,7 +708,7 @@ String Parser::percent_encode_after_encoding(TextCodec::Encoder& encoder, String
}

// https://url.spec.whatwg.org/#concept-basic-url-parser
URL Parser::basic_parse(StringView raw_input, Optional<URL const&> base_url, URL* url, Optional<State> state_override, Optional<StringView> encoding)
Optional<URL> Parser::basic_parse(StringView raw_input, Optional<URL const&> base_url, URL* url, Optional<State> state_override, Optional<StringView> encoding)
{
dbgln_if(URL_PARSER_DEBUG, "URL::Parser::basic_parse: Parsing '{}'", raw_input);

Expand Down
2 changes: 1 addition & 1 deletion Libraries/LibURL/Parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ class Parser {
}

// https://url.spec.whatwg.org/#concept-basic-url-parser
static URL basic_parse(StringView input, Optional<URL const&> base_url = {}, URL* url = nullptr, Optional<State> state_override = {}, Optional<StringView> encoding = {});
static Optional<URL> basic_parse(StringView input, Optional<URL const&> base_url = {}, URL* url = nullptr, Optional<State> state_override = {}, Optional<StringView> encoding = {});

// https://url.spec.whatwg.org/#string-percent-encode-after-encoding
static String percent_encode_after_encoding(TextCodec::Encoder&, StringView input, PercentEncodeSet percent_encode_set, bool space_as_plus = false);
Expand Down
14 changes: 9 additions & 5 deletions Libraries/LibURL/URL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ namespace URL {

// FIXME: It could make sense to force users of URL to use URL::Parser::basic_parse() explicitly instead of using a constructor.
URL::URL(StringView string)
: URL(Parser::basic_parse(string))
: URL(Parser::basic_parse(string).value_or(URL {}))
{
if constexpr (URL_PARSER_DEBUG) {
if (m_data->valid)
Expand All @@ -38,7 +38,11 @@ URL URL::complete_url(StringView relative_url) const
if (!is_valid())
return {};

return Parser::basic_parse(relative_url, *this);
auto result = Parser::basic_parse(relative_url, *this);
if (!result.has_value())
return {};

return result.release_value();
}

ByteString URL::path_segment_at_index(size_t index) const
Expand Down Expand Up @@ -367,12 +371,12 @@ Origin URL::origin() const
auto path_url = Parser::basic_parse(serialize_path());

// 3. If pathURL is failure, then return a new opaque origin.
if (!path_url.is_valid())
if (!path_url.has_value())
return Origin {};

// 4. If pathURL’s scheme is "http", "https", or "file", then return pathURL’s origin.
if (path_url.scheme().is_one_of("http"sv, "https"sv, "file"sv))
return path_url.origin();
if (path_url->scheme().is_one_of("http"sv, "https"sv, "file"sv))
return path_url->origin();

// 5. Return a new opaque origin.
return Origin {};
Expand Down
4 changes: 2 additions & 2 deletions Libraries/LibWeb/CSS/Fetch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,14 @@ void fetch_a_style_resource(String const& url_value, CSSStyleSheet const& sheet,

// 3. Let parsedUrl be the result of the URL parser steps with urlValue’s url and base. If the algorithm returns an error, return.
auto parsed_url = URL::Parser::basic_parse(url_value, base);
if (!parsed_url.is_valid())
if (!parsed_url.has_value())
return;

// 4. Let req be a new request whose url is parsedUrl, whose destination is destination, mode is corsMode,
// origin is environmentSettings’s origin, credentials mode is "same-origin", use-url-credentials flag is set,
// client is environmentSettings, and whose referrer is environmentSettings’s API base URL.
auto request = Fetch::Infrastructure::Request::create(vm);
request->set_url(parsed_url);
request->set_url(parsed_url.release_value());
request->set_destination(destination);
request->set_mode(cors_mode == CorsMode::Cors ? Fetch::Infrastructure::Request::Mode::CORS : Fetch::Infrastructure::Request::Mode::NoCORS);
request->set_origin(environment_settings.origin());
Expand Down
30 changes: 13 additions & 17 deletions Libraries/LibWeb/DOMURL/DOMURL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,6 @@ GC::Ref<DOMURL> DOMURL::create(JS::Realm& realm, URL::URL url, GC::Ref<URLSearch
// https://url.spec.whatwg.org/#api-url-parser
static Optional<URL::URL> parse_api_url(String const& url, Optional<String> const& base)
{
// FIXME: We somewhat awkwardly have two failure states encapsulated in the return type (and convert between them in the steps),
// ideally we'd get rid of URL's valid flag

// 1. Let parsedBase be null.
Optional<URL::URL> parsed_base;

Expand All @@ -40,15 +37,14 @@ static Optional<URL::URL> parse_api_url(String const& url, Optional<String> cons
auto parsed_base_url = URL::Parser::basic_parse(*base);

// 2. If parsedBase is failure, then return failure.
if (!parsed_base_url.is_valid())
if (!parsed_base_url.has_value())
return {};

parsed_base = parsed_base_url;
}

// 3. Return the result of running the basic URL parser on url with parsedBase.
auto parsed = URL::Parser::basic_parse(url, parsed_base);
return parsed.is_valid() ? parsed : Optional<URL::URL> {};
return URL::Parser::basic_parse(url, parsed_base);
}

// https://url.spec.whatwg.org/#url-initialize
Expand Down Expand Up @@ -183,17 +179,17 @@ String DOMURL::to_json() const
}

// https://url.spec.whatwg.org/#ref-for-dom-url-href②
WebIDL::ExceptionOr<void> DOMURL::set_href(String const& href)
WebIDL::ExceptionOr<void> DOMURL::set_href(String const& value)
{
// 1. Let parsedURL be the result of running the basic URL parser on the given value.
URL::URL parsed_url = href;
auto parsed_url = URL::Parser::basic_parse(value);

// 2. If parsedURL is failure, then throw a TypeError.
if (!parsed_url.is_valid())
if (!parsed_url.has_value())
return WebIDL::SimpleException { WebIDL::SimpleExceptionType::TypeError, "Invalid URL"sv };

// 3. Set this’s URL to parsedURL.
m_url = move(parsed_url);
m_url = parsed_url.release_value();

// 4. Empty this’s query object’s list.
m_query->m_list.clear();
Expand Down Expand Up @@ -509,25 +505,25 @@ URL::URL parse(StringView input, Optional<URL::URL const&> base_url, Optional<St
auto url = URL::Parser::basic_parse(input, base_url, {}, {}, encoding);

// 2. If url is failure, return failure.
if (!url.is_valid())
return {};
if (!url.has_value())
return {}; // FIXME: Migrate this API to return an OptionalNone on failure.

// 3. If url’s scheme is not "blob", return url.
if (url.scheme() != "blob")
return url;
if (url->scheme() != "blob")
return url.release_value();

// 4. Set url’s blob URL entry to the result of resolving the blob URL url, if that did not return failure, and null otherwise.
auto blob_url_entry = FileAPI::resolve_a_blob_url(url);
auto blob_url_entry = FileAPI::resolve_a_blob_url(*url);
if (blob_url_entry.has_value()) {
url.set_blob_url_entry(URL::BlobURLEntry {
url->set_blob_url_entry(URL::BlobURLEntry {
.type = blob_url_entry->object->type(),
.byte_buffer = MUST(ByteBuffer::copy(blob_url_entry->object->raw_bytes())),
.environment_origin = blob_url_entry->environment->origin(),
});
}

// 5. Return url
return url;
return url.release_value();
}

}
2 changes: 1 addition & 1 deletion Libraries/LibWeb/HTML/Location.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ WebIDL::ExceptionOr<void> Location::set_protocol(String const& value)
auto possible_failure = URL::Parser::basic_parse(value, {}, &copy_url, URL::Parser::State::SchemeStart);

// 5. If possibleFailure is failure, then throw a "SyntaxError" DOMException.
if (!possible_failure.is_valid())
if (!possible_failure.has_value())
return WebIDL::SyntaxError::create(realm(), MUST(String::formatted("Failed to set protocol. '{}' is an invalid protocol", value)));

// 6. if copyURL's scheme is not an HTTP(S) scheme, then terminate these steps.
Expand Down
6 changes: 3 additions & 3 deletions Libraries/LibWeb/HTML/NavigatorBeacon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ WebIDL::ExceptionOr<bool> NavigatorBeaconMixin::send_beacon(String const& url, O

// 3. Set parsedUrl to the result of the URL parser steps with url and base. If the algorithm returns an error, or if parsedUrl's scheme is not "http" or "https", throw a "TypeError" exception and terminate these steps.
auto parsed_url = URL::Parser::basic_parse(url, base_url);
if (!parsed_url.is_valid())
if (!parsed_url.has_value())
return WebIDL::SimpleException { WebIDL::SimpleExceptionType::TypeError, MUST(String::formatted("Beacon URL {} is invalid.", url)) };
if (parsed_url.scheme() != "http" && parsed_url.scheme() != "https")
if (parsed_url->scheme() != "http" && parsed_url->scheme() != "https")
return WebIDL::SimpleException { WebIDL::SimpleExceptionType::TypeError, MUST(String::formatted("Beacon URL {} must be either http:// or https://.", url)) };

// 4. Let headerList be an empty list.
Expand Down Expand Up @@ -76,7 +76,7 @@ WebIDL::ExceptionOr<bool> NavigatorBeaconMixin::send_beacon(String const& url, O
auto req = Fetch::Infrastructure::Request::create(vm);
req->set_method(MUST(ByteBuffer::copy("POST"sv.bytes()))); // method: POST
req->set_client(&relevant_settings_object); // client: this's relevant settings object
req->set_url_list({ parsed_url }); // url: parsedUrl
req->set_url_list({ parsed_url.release_value() }); // url: parsedUrl
req->set_header_list(header_list); // header list: headerList
req->set_origin(origin); // origin: origin
req->set_keepalive(true); // keepalive: true
Expand Down
46 changes: 23 additions & 23 deletions Tests/LibURL/TestURL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -342,36 +342,36 @@ TEST_CASE(unicode)
TEST_CASE(query_with_non_ascii)
{
{
URL::URL url = URL::Parser::basic_parse("http://example.com/?utf8=✓"sv);
EXPECT(url.is_valid());
EXPECT_EQ(url.serialize_path(), "/"sv);
EXPECT_EQ(url.query(), "utf8=%E2%9C%93");
EXPECT(!url.fragment().has_value());
Optional<URL::URL> url = URL::Parser::basic_parse("http://example.com/?utf8=✓"sv);
EXPECT(url.has_value());
EXPECT_EQ(url->serialize_path(), "/"sv);
EXPECT_EQ(url->query(), "utf8=%E2%9C%93");
EXPECT(!url->fragment().has_value());
}
{
URL::URL url = URL::Parser::basic_parse("http://example.com/?shift_jis=✓"sv, {}, nullptr, {}, "shift_jis"sv);
EXPECT(url.is_valid());
EXPECT_EQ(url.serialize_path(), "/"sv);
EXPECT_EQ(url.query(), "shift_jis=%26%2310003%3B");
EXPECT(!url.fragment().has_value());
Optional<URL::URL> url = URL::Parser::basic_parse("http://example.com/?shift_jis=✓"sv, {}, nullptr, {}, "shift_jis"sv);
EXPECT(url.has_value());
EXPECT_EQ(url->serialize_path(), "/"sv);
EXPECT_EQ(url->query(), "shift_jis=%26%2310003%3B");
EXPECT(!url->fragment().has_value());
}
}

TEST_CASE(fragment_with_non_ascii)
{
{
URL::URL url = URL::Parser::basic_parse("http://example.com/#✓"sv);
EXPECT(url.is_valid());
EXPECT_EQ(url.serialize_path(), "/"sv);
EXPECT(!url.query().has_value());
EXPECT_EQ(url.fragment(), "%E2%9C%93");
Optional<URL::URL> url = URL::Parser::basic_parse("http://example.com/#✓"sv);
EXPECT(url.has_value());
EXPECT_EQ(url->serialize_path(), "/"sv);
EXPECT(!url->query().has_value());
EXPECT_EQ(url->fragment(), "%E2%9C%93");
}
{
URL::URL url = URL::Parser::basic_parse("http://example.com/#✓"sv, {}, nullptr, {}, "shift_jis"sv);
EXPECT(url.is_valid());
EXPECT_EQ(url.serialize_path(), "/"sv);
EXPECT(!url.query().has_value());
EXPECT_EQ(url.fragment(), "%E2%9C%93");
Optional<URL::URL> url = URL::Parser::basic_parse("http://example.com/#✓"sv, {}, nullptr, {}, "shift_jis"sv);
EXPECT(url.has_value());
EXPECT_EQ(url->serialize_path(), "/"sv);
EXPECT(!url->query().has_value());
EXPECT_EQ(url->fragment(), "%E2%9C%93");
}
}

Expand All @@ -392,9 +392,9 @@ TEST_CASE(complete_file_url_with_base)
TEST_CASE(empty_url_with_base_url)
{
URL::URL base_url { "https://foo.com/"sv };
URL::URL parsed_url = URL::Parser::basic_parse(""sv, base_url);
EXPECT_EQ(parsed_url.is_valid(), true);
EXPECT(base_url.equals(parsed_url));
Optional<URL::URL> parsed_url = URL::Parser::basic_parse(""sv, base_url);
EXPECT_EQ(parsed_url.has_value(), true);
EXPECT(base_url.equals(*parsed_url));
}

TEST_CASE(google_street_view)
Expand Down
6 changes: 3 additions & 3 deletions Utilities/xml.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -365,13 +365,13 @@ static auto parse(StringView contents)
.resolve_external_resource = [&](XML::SystemID const& system_id, Optional<XML::PublicID> const&) -> ErrorOr<Variant<ByteString, Vector<XML::MarkupDeclaration>>> {
auto base = URL::create_with_file_scheme(s_path);
auto url = URL::Parser::basic_parse(system_id.system_literal, base);
if (!url.is_valid())
if (!url.has_value())
return Error::from_string_literal("Invalid URL");

if (url.scheme() != "file")
if (url->scheme() != "file")
return Error::from_string_literal("NYI: Nonlocal entity");

auto file = TRY(Core::File::open(URL::percent_decode(url.serialize_path()), Core::File::OpenMode::Read));
auto file = TRY(Core::File::open(URL::percent_decode(url->serialize_path()), Core::File::OpenMode::Read));
return ByteString::copy(TRY(file->read_until_eof()));
},
},
Expand Down

0 comments on commit 5bed8f4

Please sign in to comment.