Skip to content

Commit

Permalink
add unit tests from pdfium and work on edge cases
Browse files Browse the repository at this point in the history
  • Loading branch information
ryzokuken committed Jan 10, 2025
1 parent fc90838 commit 169b949
Showing 1 changed file with 81 additions and 20 deletions.
101 changes: 81 additions & 20 deletions test/unit/autolinker_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,12 @@ describe("autolinker", function () {
});

it("should correctly find simple valid URLs", function () {
const matches = Autolinker.findLinks(
"http://subdomain.example.com/path/to/page?query=param\nwww.example.com/path/to/resource\nhttp://example.com/path?query=value#fragment"
);
const links = [
"http://subdomain.example.com/path/to/page?query=param",
"www.example.com/path/to/resource",
"http://example.com/path?query=value#fragment",
];
const matches = Autolinker.findLinks(links.join("\n"));
expect(matches.length).toEqual(3);
expect(matches[0].url).toEqual(
"http://subdomain.example.com/path/to/page?query=param"
Expand All @@ -36,19 +39,43 @@ describe("autolinker", function () {
});

it("should correctly find emails", function () {
const matches = Autolinker.findLinks(
"mailto:username@example.com\nmailto:someone@subdomain.example.com"
);
const emails = [
"mailto:username@example.com",
"mailto:someone@subdomain.example.com",
// "peter@abc.d",
// "red.teddy.b@abc.com",
// "abc_@gmail.com", // '_' is ok before '@'.
// "dummy-hi@gmail.com", // '-' is ok in user name.
// "a..df@gmail.com", // Stop at consecutive '.'.
// ".john@yahoo.com", // Remove heading '.'.
// "abc@xyz.org?/", // Trim ending invalid chars.
// "fan{abc@xyz.org", // Trim beginning invalid chars.
// "fan@g.com..", // Trim the ending periods.
// "CAP.cap@Gmail.Com", // Keep the original case.
];
const matches = Autolinker.findLinks(emails.join("\n"));
expect(matches.length).toEqual(2);
expect(matches[0].url).toEqual("mailto:username@example.com");
expect(matches[1].url).toEqual("mailto:someone@subdomain.example.com");
// expect(matches[2].url).toEqual("mailto:peter@abc.d");
// expect(matches[3].url).toEqual("mailto:red.teddy.b@abc.com");
});

it("should correctly handle complex or edge cases", function () {
const matches = Autolinker.findLinks(
"https://example.com/path/to/page?query=param&another=val#section\nwww.example.com/resource/(parentheses)-allowed/\nhttp://example.com/path_with_underscores\nhttp://www.example.com:8080/port/test\nhttps://example.com/encoded%20spaces%20in%20path\nmailto:hello+world@example.com"
);
expect(matches.length).toEqual(6);
const links = [
"https://example.com/path/to/page?query=param&another=val#section",
"www.example.com/resource/(parentheses)-allowed/",
"http://example.com/path_with_underscores",
"http://www.example.com:8080/port/test",
"https://example.com/encoded%20spaces%20in%20path",
"mailto:hello+world@example.com",
"www.abc.com/#%%^&&*(",
"www.a.com/#a=@?q=rr&r=y",
"http://a.com/1/2/3/4\\5\\6",
"http://www.example.com/foo;bar",
];
const matches = Autolinker.findLinks(links.join("\n"));
expect(matches.length).toEqual(10);
expect(matches[0].url).toEqual(
"https://example.com/path/to/page?query=param&another=val#section"
);
Expand All @@ -61,19 +88,43 @@ describe("autolinker", function () {
"https://example.com/encoded%20spaces%20in%20path"
);
expect(matches[5].url).toEqual("mailto:hello+world@example.com");
// expect(matches[6].url).toEqual("http://www.abc.com/#%%^&&*("); TODO: Fix error in regex to get this right.
expect(matches[7].url).toEqual("http://www.a.com/#a=@?q=rr&r=y");
expect(matches[8].url).toEqual("http://a.com/1/2/3/4/5/6");
expect(matches[9].url).toEqual("http://www.example.com/foo;bar");
});

it("shouldn't find false positives", function () {
const matches = Autolinker.findLinks(
"not a valid URL\nhtp://misspelled-protocol.com\nexample.com (missing protocol)\nhttps://[::1] (IPv6 loopback)\nhttp:// (just protocol)"
);
const links = [
"not a valid URL",
"htp://misspelled-protocol.com",
"example.com (missing protocol)",
"https://[::1] (IPv6 loopback)",
"http:// (just protocol)",
"", // Blank.
"http", // No colon.
"www.", // Missing domain.
"https-and-www", // Dash not colon.
"http:/abc.com", // Missing slash.
"http://((()),", // Only invalid chars in host name.
"ftp://example.com", // Ftp scheme is not supported.
"http:example.com", // Missing slashes.
"http//[example.com", // Invalid IPv6 address.
"http//[00:00:00:00:00:00", // Invalid IPv6 address.
"http//[]", // Empty IPv6 address.
"abc.example.com", // URL without scheme.
];
const matches = Autolinker.findLinks(links.join("\n"));
expect(matches.length).toEqual(0);
});

it("should correctly find links among mixed content", function () {
const matches = Autolinker.findLinks(
"Here's a URL: https://example.com and an email: mailto:test@example.com\nwww.example.com and more text\nCheck this: http://example.com/path?query=1 and this mailto:info@domain.com"
);
const links = [
"Here's a URL: https://example.com and an email: mailto:test@example.com",
"www.example.com and more text",
"Check this: http://example.com/path?query=1 and this mailto:info@domain.com",
];
const matches = Autolinker.findLinks(links.join("\n"));
expect(matches.length).toEqual(5);
expect(matches[0].url).toEqual("https://example.com/");
expect(matches[1].url).toEqual("mailto:test@example.com");
Expand All @@ -83,15 +134,25 @@ describe("autolinker", function () {
});

it("should correctly work with special characters", function () {
const matches = Autolinker.findLinks(
"https://example.com/path/to/page?query=value&symbol=£\nmailto:user.name+alias@example-domain.com\nhttp://example.com/@user\nhttps://example.com/path#@anchor"
);
expect(matches.length).toEqual(4);
const links = [
"https://example.com/path/to/page?query=value&symbol=£",
"mailto:user.name+alias@example-domain.com",
"http://example.com/@user",
"https://example.com/path#@anchor",
"www.测试.net",
"www.测试。net。",
"www.测试.net;",
];
const matches = Autolinker.findLinks(links.join("\n"));
expect(matches.length).toEqual(6);
expect(matches[0].url).toEqual(
"https://example.com/path/to/page?query=value&symbol=%C2%A3"
);
expect(matches[1].url).toEqual("mailto:user.name+alias@example-domain.com");
expect(matches[2].url).toEqual("http://example.com/@user");
expect(matches[3].url).toEqual("https://example.com/path#@anchor");
expect(matches[4].url).toEqual("http://www.xn--0zwm56d.net/");
expect(matches[5].url).toEqual("http://www.xn--0zwm56d.net/");
// expect(matches[6].url).toEqual("http://www.xn--0zwm56d.net/");
});
});

0 comments on commit 169b949

Please sign in to comment.