diff --git a/src/node_i18n.cc b/src/node_i18n.cc index dc50f9995a695e..f35bf2685592a4 100644 --- a/src/node_i18n.cc +++ b/src/node_i18n.cc @@ -450,6 +450,9 @@ int32_t ToUnicode(MaybeStackBuffer* buf, &info, &status); + // Do not check info.errors like we do with ToASCII since ToUnicode always + // returns a string, despite any possible errors that may have occurred. + if (status == U_BUFFER_OVERFLOW_ERROR) { status = U_ZERO_ERROR; buf->AllocateSufficientStorage(len); @@ -477,9 +480,18 @@ int32_t ToUnicode(MaybeStackBuffer* buf, int32_t ToASCII(MaybeStackBuffer* buf, const char* input, size_t length, - bool lenient) { + enum idna_mode mode) { UErrorCode status = U_ZERO_ERROR; - uint32_t options = UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_CHECK_BIDI; + uint32_t options = // CheckHyphens = false; handled later + UIDNA_CHECK_BIDI | // CheckBidi = true + UIDNA_CHECK_CONTEXTJ | // CheckJoiners = true + UIDNA_NONTRANSITIONAL_TO_ASCII; // Nontransitional_Processing + if (mode == IDNA_STRICT) { + options |= UIDNA_USE_STD3_RULES; // UseSTD3ASCIIRules = beStrict + // VerifyDnsLength = beStrict; + // handled later + } + UIDNA* uidna = uidna_openUTS46(options, &status); if (U_FAILURE(status)) return -1; @@ -501,21 +513,17 @@ int32_t ToASCII(MaybeStackBuffer* buf, &status); } - // The WHATWG URL "domain to ASCII" algorithm explicitly sets the - // VerifyDnsLength flag to false, which disables the domain name length - // verification step in ToASCII (as specified by UTS #46). Unfortunately, - // ICU4C's IDNA module does not support disabling this flag through `options`, - // so just filter out the errors that may be caused by the verification step - // afterwards. - info.errors &= ~UIDNA_ERROR_EMPTY_LABEL; - info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG; - info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG; - - // These error conditions are mandated unconditionally by UTS #46 version - // 9.0.0 (rev. 17), but were found to be incompatible with actual domain - // names in the wild. As such, in the current UTS #46 draft (rev. 18) these - // checks are made optional depending on the CheckHyphens flag, which will be - // disabled in WHATWG URL's "domain to ASCII" algorithm soon. + // In UTS #46 which specifies ToASCII, certain error conditions are + // configurable through options, and the WHATWG URL Standard promptly elects + // to disable some of them to accomodate for real-world use cases. + // Unfortunately, ICU4C's IDNA module does not support disabling some of + // these options through `options` above, and thus continues throwing + // unnecessary errors. To counter this situation, we just filter out the + // errors that may have happened afterwards, before deciding whether to + // return an error from this function. + + // CheckHyphens = false + // (Specified in the current UTS #46 draft rev. 18.) // Refs: // - https://github.com/whatwg/url/issues/53 // - https://github.com/whatwg/url/pull/309 @@ -526,7 +534,14 @@ int32_t ToASCII(MaybeStackBuffer* buf, info.errors &= ~UIDNA_ERROR_LEADING_HYPHEN; info.errors &= ~UIDNA_ERROR_TRAILING_HYPHEN; - if (U_FAILURE(status) || (!lenient && info.errors != 0)) { + if (mode != IDNA_STRICT) { + // VerifyDnsLength = beStrict + info.errors &= ~UIDNA_ERROR_EMPTY_LABEL; + info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG; + info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG; + } + + if (U_FAILURE(status) || (mode != IDNA_LENIENT && info.errors != 0)) { len = -1; buf->SetLength(0); } else { @@ -564,9 +579,10 @@ static void ToASCII(const FunctionCallbackInfo& args) { Utf8Value val(env->isolate(), args[0]); // optional arg bool lenient = args[1]->BooleanValue(env->context()).FromJust(); + enum idna_mode mode = lenient ? IDNA_LENIENT : IDNA_DEFAULT; MaybeStackBuffer buf; - int32_t len = ToASCII(&buf, *val, val.length(), lenient); + int32_t len = ToASCII(&buf, *val, val.length(), mode); if (len < 0) { return env->ThrowError("Cannot convert name to ASCII"); diff --git a/src/node_i18n.h b/src/node_i18n.h index cc1f3e6ea53569..adf9feb414df5c 100644 --- a/src/node_i18n.h +++ b/src/node_i18n.h @@ -37,10 +37,26 @@ namespace i18n { bool InitializeICUDirectory(const std::string& path); +enum idna_mode { + // Default mode for maximum compatibility. + IDNA_DEFAULT, + // Ignore all errors in IDNA conversion, if possible. + IDNA_LENIENT, + // Enforce STD3 rules (UseSTD3ASCIIRules) and DNS length restrictions + // (VerifyDnsLength). Corresponds to `beStrict` flag in the "domain to ASCII" + // algorithm. + IDNA_STRICT +}; + +// Implements the WHATWG URL Standard "domain to ASCII" algorithm. +// https://url.spec.whatwg.org/#concept-domain-to-ascii int32_t ToASCII(MaybeStackBuffer* buf, const char* input, size_t length, - bool lenient = false); + enum idna_mode mode = IDNA_DEFAULT); + +// Implements the WHATWG URL Standard "domain to Unicode" algorithm. +// https://url.spec.whatwg.org/#concept-domain-to-unicode int32_t ToUnicode(MaybeStackBuffer* buf, const char* input, size_t length); diff --git a/test/fixtures/url-idna.js b/test/fixtures/url-idna.js index cbfe702e9372bf..4b8f5a48cc9646 100644 --- a/test/fixtures/url-idna.js +++ b/test/fixtures/url-idna.js @@ -1,223 +1,215 @@ 'use strict'; // Credit for list: http://www.i18nguy.com/markup/idna-examples.html -module.exports = { - valid: [ - { ascii: 'xn--mgbaal8b0b9b2b.icom.museum', - unicode: 'افغانستا.icom.museum' - }, - { - ascii: 'xn--lgbbat1ad8j.icom.museum', - unicode: 'الجزائر.icom.museum' - }, - { - ascii: 'xn--sterreich-z7a.icom.museum', - unicode: 'österreich.icom.museum' - }, - { - ascii: 'xn--54b6eqazv8bc7e.icom.museum', - unicode: 'বাংলাদেশ.icom.museum' - }, - { - ascii: 'xn--80abmy0agn7e.icom.museum', - unicode: 'беларусь.icom.museum' - }, - { - ascii: 'xn--belgi-rsa.icom.museum', - unicode: 'belgië.icom.museum' - }, - { - ascii: 'xn--80abgvm6a7d2b.icom.museum', - unicode: 'българия.icom.museum' - }, - { - ascii: 'xn--mgbfqim.icom.museum', - unicode: 'تشادر.icom.museum' - }, - { - ascii: 'xn--fiqs8s.icom.museum', - unicode: '中国.icom.museum' - }, - { - ascii: 'xn--mgbu4chg.icom.museum', - unicode: 'القمر.icom.museum' - }, - { - ascii: 'xn--vxakcego.icom.museum', - unicode: 'κυπρος.icom.museum' - }, - { - ascii: 'xn--eskrepublika-ebb62d.icom.museum', - unicode: 'českárepublika.icom.museum' - }, - { - ascii: 'xn--wgbh1c.icom.museum', - unicode: 'مصر.icom.museum' - }, - { - ascii: 'xn--hxakic4aa.icom.museum', - unicode: 'ελλάδα.icom.museum' - }, - { - ascii: 'xn--magyarorszg-t7a.icom.museum', - unicode: 'magyarország.icom.museum' - }, - { - ascii: 'xn--sland-ysa.icom.museum', - unicode: 'ísland.icom.museum' - }, - { - ascii: 'xn--h2brj9c.icom.museum', - unicode: 'भारत.icom.museum' - }, - { - ascii: 'xn--mgba3a4fra.icom.museum', - unicode: 'ايران.icom.museum' - }, - { - ascii: 'xn--ire-9la.icom.museum', - unicode: 'éire.icom.museum' - }, - { - ascii: 'xn--4dbklr2c8d.xn--4dbrk0ce.museum', - unicode: 'איקו״ם.ישראל.museum' - }, - { - ascii: 'xn--wgv71a.icom.museum', - unicode: '日本.icom.museum' - }, - { - ascii: 'xn--igbhzh7gpa.icom.museum', - unicode: 'الأردن.icom.museum' - }, - { - ascii: 'xn--80aaa0a6awh12ed.icom.museum', - unicode: 'қазақстан.icom.museum' - }, - { - ascii: 'xn--3e0b707e.icom.museum', - unicode: '한국.icom.museum' - }, - { - ascii: 'xn--80afmksoji0fc.icom.museum', - unicode: 'кыргызстан.icom.museum' - }, - { - ascii: 'xn--q7ce6a.icom.museum', - unicode: 'ລາວ.icom.museum' - }, - { - ascii: 'xn--mgbb7fjb.icom.museum', - unicode: 'لبنان.icom.museum' - }, - { - ascii: 'xn--80aaldqjmmi6x.icom.museum', - unicode: 'македонија.icom.museum' - }, - { - ascii: 'xn--mgbah1a3hjkrd.icom.museum', - unicode: 'موريتانيا.icom.museum' - }, - { - ascii: 'xn--mxico-bsa.icom.museum', - unicode: 'méxico.icom.museum' - }, - { - ascii: 'xn--c1aqabffc0aq.icom.museum', - unicode: 'монголулс.icom.museum' - }, - { - ascii: 'xn--mgbc0a9azcg.icom.museum', - unicode: 'المغرب.icom.museum' - }, - { - ascii: 'xn--l2bey1c2b.icom.museum', - unicode: 'नेपाल.icom.museum' - }, - { - ascii: 'xn--mgb9awbf.icom.museum', - unicode: 'عمان.icom.museum' - }, - { - ascii: 'xn--wgbl6a.icom.museum', - unicode: 'قطر.icom.museum' - }, - { - ascii: 'xn--romnia-yta.icom.museum', - unicode: 'românia.icom.museum' - }, - { - ascii: 'xn--h1alffa9f.xn--h1aegh.museum', - unicode: 'россия.иком.museum' - }, - { - ascii: 'xn--80aaabm1ab4blmeec9e7n.xn--h1aegh.museum', - unicode: 'србијаицрнагора.иком.museum' - }, - { - ascii: 'xn--xkc2al3hye2a.icom.museum', - unicode: 'இலங்கை.icom.museum' - }, - { - ascii: 'xn--espaa-rta.icom.museum', - unicode: 'españa.icom.museum' - }, - { - ascii: 'xn--o3cw4h.icom.museum', - unicode: 'ไทย.icom.museum' - }, - { - ascii: 'xn--pgbs0dh.icom.museum', - unicode: 'تونس.icom.museum' - }, - { - ascii: 'xn--trkiye-3ya.icom.museum', - unicode: 'türkiye.icom.museum' - }, - { - ascii: 'xn--80aaxgrpt.icom.museum', - unicode: 'украина.icom.museum' - }, - { - ascii: 'xn--vitnam-jk8b.icom.museum', - unicode: 'việtnam.icom.museum' - }, - // long label - { - ascii: `${'a'.repeat(64)}.com`, - unicode: `${'a'.repeat(64)}.com`, - }, - // long URL - { - ascii: `${`${'a'.repeat(64)}.`.repeat(4)}com`, - unicode: `${`${'a'.repeat(64)}.`.repeat(4)}com` - }, - // URLs with hyphen - { - ascii: 'r4---sn-a5mlrn7s.gevideo.com', - unicode: 'r4---sn-a5mlrn7s.gevideo.com' - }, - { - ascii: '-sn-a5mlrn7s.gevideo.com', - unicode: '-sn-a5mlrn7s.gevideo.com' - }, - { - ascii: 'sn-a5mlrn7s-.gevideo.com', - unicode: 'sn-a5mlrn7s-.gevideo.com' - }, - { - ascii: '-sn-a5mlrn7s-.gevideo.com', - unicode: '-sn-a5mlrn7s-.gevideo.com' - }, - { - ascii: '-sn--a5mlrn7s-.gevideo.com', - unicode: '-sn--a5mlrn7s-.gevideo.com' - } - ], - invalid: [ - // invalid character - '\ufffd.com', - // invalid bi-directional character - 'تشادرlatin.icom.museum' - ] -} +module.exports = [ + { ascii: 'xn--mgbaal8b0b9b2b.icom.museum', + unicode: 'افغانستا.icom.museum' + }, + { + ascii: 'xn--lgbbat1ad8j.icom.museum', + unicode: 'الجزائر.icom.museum' + }, + { + ascii: 'xn--sterreich-z7a.icom.museum', + unicode: 'österreich.icom.museum' + }, + { + ascii: 'xn--54b6eqazv8bc7e.icom.museum', + unicode: 'বাংলাদেশ.icom.museum' + }, + { + ascii: 'xn--80abmy0agn7e.icom.museum', + unicode: 'беларусь.icom.museum' + }, + { + ascii: 'xn--belgi-rsa.icom.museum', + unicode: 'belgië.icom.museum' + }, + { + ascii: 'xn--80abgvm6a7d2b.icom.museum', + unicode: 'българия.icom.museum' + }, + { + ascii: 'xn--mgbfqim.icom.museum', + unicode: 'تشادر.icom.museum' + }, + { + ascii: 'xn--fiqs8s.icom.museum', + unicode: '中国.icom.museum' + }, + { + ascii: 'xn--mgbu4chg.icom.museum', + unicode: 'القمر.icom.museum' + }, + { + ascii: 'xn--vxakcego.icom.museum', + unicode: 'κυπρος.icom.museum' + }, + { + ascii: 'xn--eskrepublika-ebb62d.icom.museum', + unicode: 'českárepublika.icom.museum' + }, + { + ascii: 'xn--wgbh1c.icom.museum', + unicode: 'مصر.icom.museum' + }, + { + ascii: 'xn--hxakic4aa.icom.museum', + unicode: 'ελλάδα.icom.museum' + }, + { + ascii: 'xn--magyarorszg-t7a.icom.museum', + unicode: 'magyarország.icom.museum' + }, + { + ascii: 'xn--sland-ysa.icom.museum', + unicode: 'ísland.icom.museum' + }, + { + ascii: 'xn--h2brj9c.icom.museum', + unicode: 'भारत.icom.museum' + }, + { + ascii: 'xn--mgba3a4fra.icom.museum', + unicode: 'ايران.icom.museum' + }, + { + ascii: 'xn--ire-9la.icom.museum', + unicode: 'éire.icom.museum' + }, + { + ascii: 'xn--4dbklr2c8d.xn--4dbrk0ce.museum', + unicode: 'איקו״ם.ישראל.museum' + }, + { + ascii: 'xn--wgv71a.icom.museum', + unicode: '日本.icom.museum' + }, + { + ascii: 'xn--igbhzh7gpa.icom.museum', + unicode: 'الأردن.icom.museum' + }, + { + ascii: 'xn--80aaa0a6awh12ed.icom.museum', + unicode: 'қазақстан.icom.museum' + }, + { + ascii: 'xn--3e0b707e.icom.museum', + unicode: '한국.icom.museum' + }, + { + ascii: 'xn--80afmksoji0fc.icom.museum', + unicode: 'кыргызстан.icom.museum' + }, + { + ascii: 'xn--q7ce6a.icom.museum', + unicode: 'ລາວ.icom.museum' + }, + { + ascii: 'xn--mgbb7fjb.icom.museum', + unicode: 'لبنان.icom.museum' + }, + { + ascii: 'xn--80aaldqjmmi6x.icom.museum', + unicode: 'македонија.icom.museum' + }, + { + ascii: 'xn--mgbah1a3hjkrd.icom.museum', + unicode: 'موريتانيا.icom.museum' + }, + { + ascii: 'xn--mxico-bsa.icom.museum', + unicode: 'méxico.icom.museum' + }, + { + ascii: 'xn--c1aqabffc0aq.icom.museum', + unicode: 'монголулс.icom.museum' + }, + { + ascii: 'xn--mgbc0a9azcg.icom.museum', + unicode: 'المغرب.icom.museum' + }, + { + ascii: 'xn--l2bey1c2b.icom.museum', + unicode: 'नेपाल.icom.museum' + }, + { + ascii: 'xn--mgb9awbf.icom.museum', + unicode: 'عمان.icom.museum' + }, + { + ascii: 'xn--wgbl6a.icom.museum', + unicode: 'قطر.icom.museum' + }, + { + ascii: 'xn--romnia-yta.icom.museum', + unicode: 'românia.icom.museum' + }, + { + ascii: 'xn--h1alffa9f.xn--h1aegh.museum', + unicode: 'россия.иком.museum' + }, + { + ascii: 'xn--80aaabm1ab4blmeec9e7n.xn--h1aegh.museum', + unicode: 'србијаицрнагора.иком.museum' + }, + { + ascii: 'xn--xkc2al3hye2a.icom.museum', + unicode: 'இலங்கை.icom.museum' + }, + { + ascii: 'xn--espaa-rta.icom.museum', + unicode: 'españa.icom.museum' + }, + { + ascii: 'xn--o3cw4h.icom.museum', + unicode: 'ไทย.icom.museum' + }, + { + ascii: 'xn--pgbs0dh.icom.museum', + unicode: 'تونس.icom.museum' + }, + { + ascii: 'xn--trkiye-3ya.icom.museum', + unicode: 'türkiye.icom.museum' + }, + { + ascii: 'xn--80aaxgrpt.icom.museum', + unicode: 'украина.icom.museum' + }, + { + ascii: 'xn--vitnam-jk8b.icom.museum', + unicode: 'việtnam.icom.museum' + }, + // long label + { + ascii: `${'a'.repeat(64)}.com`, + unicode: `${'a'.repeat(64)}.com`, + }, + // long URL + { + ascii: `${`${'a'.repeat(64)}.`.repeat(4)}com`, + unicode: `${`${'a'.repeat(64)}.`.repeat(4)}com` + }, + // URLs with hyphen + { + ascii: 'r4---sn-a5mlrn7s.gevideo.com', + unicode: 'r4---sn-a5mlrn7s.gevideo.com' + }, + { + ascii: '-sn-a5mlrn7s.gevideo.com', + unicode: '-sn-a5mlrn7s.gevideo.com' + }, + { + ascii: 'sn-a5mlrn7s-.gevideo.com', + unicode: 'sn-a5mlrn7s-.gevideo.com' + }, + { + ascii: '-sn-a5mlrn7s-.gevideo.com', + unicode: '-sn-a5mlrn7s-.gevideo.com' + }, + { + ascii: '-sn--a5mlrn7s-.gevideo.com', + unicode: '-sn--a5mlrn7s-.gevideo.com' + } +]; diff --git a/test/fixtures/url-toascii.js b/test/fixtures/url-toascii.js new file mode 100644 index 00000000000000..ea5e0f22ba1b5f --- /dev/null +++ b/test/fixtures/url-toascii.js @@ -0,0 +1,156 @@ +'use strict'; + +/* WPT Refs: + https://github.com/w3c/web-platform-tests/blob/4839a0a804/url/toascii.json + License: http://www.w3.org/Consortium/Legal/2008/04-testsuite-copyright.html +*/ +module.exports = +[ + "This resource is focused on highlighting issues with UTS #46 ToASCII", + { + "comment": "Label with hyphens in 3rd and 4th position", + "input": "aa--", + "output": "aa--" + }, + { + "input": "a†--", + "output": "xn--a---kp0a" + }, + { + "input": "ab--c", + "output": "ab--c" + }, + { + "comment": "Label with leading hyphen", + "input": "-x", + "output": "-x" + }, + { + "input": "-†", + "output": "xn----xhn" + }, + { + "input": "-x.xn--nxa", + "output": "-x.xn--nxa" + }, + { + "input": "-x.β", + "output": "-x.xn--nxa" + }, + { + "comment": "Label with trailing hyphen", + "input": "x-.xn--nxa", + "output": "x-.xn--nxa" + }, + { + "input": "x-.β", + "output": "x-.xn--nxa" + }, + { + "comment": "Empty labels", + "input": "x..xn--nxa", + "output": "x..xn--nxa" + }, + { + "input": "x..β", + "output": "x..xn--nxa" + }, + { + "comment": "Invalid Punycode", + "input": "xn--a", + "output": null + }, + { + "input": "xn--a.xn--nxa", + "output": null + }, + { + "input": "xn--a.β", + "output": null + }, + { + "comment": "Valid Punycode", + "input": "xn--nxa.xn--nxa", + "output": "xn--nxa.xn--nxa" + }, + { + "comment": "Mixed", + "input": "xn--nxa.β", + "output": "xn--nxa.xn--nxa" + }, + { + "input": "ab--c.xn--nxa", + "output": "ab--c.xn--nxa" + }, + { + "input": "ab--c.β", + "output": "ab--c.xn--nxa" + }, + { + "comment": "CheckJoiners is true", + "input": "\u200D.example", + "output": null + }, + { + "input": "xn--1ug.example", + "output": null + }, + { + "comment": "CheckBidi is true", + "input": "يa", + "output": null + }, + { + "input": "xn--a-yoc", + "output": null + }, + { + "comment": "processing_option is Nontransitional_Processing", + "input": "ශ්‍රී", + "output": "xn--10cl1a0b660p" + }, + { + "input": "نامه‌ای", + "output": "xn--mgba3gch31f060k" + }, + { + "comment": "U+FFFD", + "input": "\uFFFD.com", + "output": null + }, + { + "comment": "U+FFFD character encoded in Punycode", + "input": "xn--zn7c.com", + "output": null + }, + { + "comment": "Label longer than 63 code points", + "input": "x01234567890123456789012345678901234567890123456789012345678901x", + "output": "x01234567890123456789012345678901234567890123456789012345678901x" + }, + { + "input": "x01234567890123456789012345678901234567890123456789012345678901†", + "output": "xn--x01234567890123456789012345678901234567890123456789012345678901-6963b" + }, + { + "input": "x01234567890123456789012345678901234567890123456789012345678901x.xn--nxa", + "output": "x01234567890123456789012345678901234567890123456789012345678901x.xn--nxa" + }, + { + "input": "x01234567890123456789012345678901234567890123456789012345678901x.β", + "output": "x01234567890123456789012345678901234567890123456789012345678901x.xn--nxa" + }, + { + "comment": "Domain excluding TLD longer than 253 code points", + "input": "01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x", + "output": "01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x" + }, + { + "input": "01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.xn--nxa", + "output": "01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.xn--nxa" + }, + { + "input": "01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.β", + "output": "01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.xn--nxa" + } +] diff --git a/test/parallel/test-icu-punycode.js b/test/parallel/test-icu-punycode.js index ba2014bdc85a2f..13db2dd7bacede 100644 --- a/test/parallel/test-icu-punycode.js +++ b/test/parallel/test-icu-punycode.js @@ -10,9 +10,10 @@ const icu = process.binding('icu'); const assert = require('assert'); const tests = require('../fixtures/url-idna.js'); +const wptToASCIITests = require('../fixtures/url-toascii.js'); { - for (const [i, { ascii, unicode }] of tests.valid.entries()) { + for (const [i, { ascii, unicode }] of tests.entries()) { assert.strictEqual(ascii, icu.toASCII(unicode), `toASCII(${i + 1})`); assert.strictEqual(unicode, icu.toUnicode(ascii), `toUnicode(${i + 1})`); assert.strictEqual(ascii, icu.toASCII(icu.toUnicode(ascii)), @@ -23,13 +24,24 @@ const tests = require('../fixtures/url-idna.js'); } { - for (const [i, url] of tests.invalid.entries()) { - assert.throws(() => icu.toASCII(url), - /^Error: Cannot convert name to ASCII$/, - `ToASCII invalid case ${i + 1}`); - assert.doesNotThrow(() => icu.toASCII(url, true), - `ToASCII invalid case ${i + 1} in lenient mode`); - assert.doesNotThrow(() => icu.toUnicode(url), - `ToUnicode invalid case ${i + 1}`); + for (const [i, test] of wptToASCIITests.entries()) { + if (typeof test === 'string') + continue; // skip comments + const { comment, input, output } = test; + let caseComment = `case ${i + 1}`; + if (comment) + caseComment += ` (${comment})`; + if (output === null) { + assert.throws(() => icu.toASCII(input), + /^Error: Cannot convert name to ASCII$/, + `ToASCII ${caseComment}`); + assert.doesNotThrow(() => icu.toASCII(input, true), + `ToASCII ${caseComment} in lenient mode`); + } else { + assert.strictEqual(icu.toASCII(input), output, `ToASCII ${caseComment}`); + assert.strictEqual(icu.toASCII(input, true), output, + `ToASCII ${caseComment} in lenient mode`); + } + assert.doesNotThrow(() => icu.toUnicode(input), `ToUnicode ${caseComment}`); } } diff --git a/test/parallel/test-whatwg-url-domainto.js b/test/parallel/test-whatwg-url-domainto.js index 90d9ee4a8c4648..b399f24136e14b 100644 --- a/test/parallel/test-whatwg-url-domainto.js +++ b/test/parallel/test-whatwg-url-domainto.js @@ -11,6 +11,7 @@ const { domainToASCII, domainToUnicode } = require('url'); // Tests below are not from WPT. const tests = require('../fixtures/url-idna.js'); +const wptToASCIITests = require('../fixtures/url-toascii.js'); { const expectedError = common.expectsError( @@ -22,7 +23,7 @@ const tests = require('../fixtures/url-idna.js'); } { - for (const [i, { ascii, unicode }] of tests.valid.entries()) { + for (const [i, { ascii, unicode }] of tests.entries()) { assert.strictEqual(ascii, domainToASCII(unicode), `domainToASCII(${i + 1})`); assert.strictEqual(unicode, domainToUnicode(ascii), @@ -35,8 +36,20 @@ const tests = require('../fixtures/url-idna.js'); } { - for (const [i, url] of tests.invalid.entries()) { - assert.strictEqual(domainToASCII(url), '', `Invalid case ${i + 1}`); - assert.strictEqual(domainToUnicode(url), '', `Invalid case ${i + 1}`); + for (const [i, test] of wptToASCIITests.entries()) { + if (typeof test === 'string') + continue; // skip comments + const { comment, input, output } = test; + let caseComment = `Case ${i + 1}`; + if (comment) + caseComment += ` (${comment})`; + if (output === null) { + assert.strictEqual(domainToASCII(input), '', caseComment); + assert.strictEqual(domainToUnicode(input), '', caseComment); + } else { + assert.strictEqual(domainToASCII(input), output, caseComment); + const roundtripped = domainToASCII(domainToUnicode(input)); + assert.strictEqual(roundtripped, output, caseComment); + } } } diff --git a/test/parallel/test-whatwg-url-toascii.js b/test/parallel/test-whatwg-url-toascii.js new file mode 100644 index 00000000000000..bd986c96a47a84 --- /dev/null +++ b/test/parallel/test-whatwg-url-toascii.js @@ -0,0 +1,85 @@ +'use strict'; +const common = require('../common'); +const path = require('path'); +const { URL } = require('url'); +const { test, assert_equals, assert_throws } = require('../common/wpt'); + +if (!common.hasIntl) { + // A handful of the tests fail when ICU is not included. + common.skip('missing Intl'); + return; +} + +const request = { + response: require(path.join(common.fixturesDir, 'url-toascii')) +}; + +/* eslint-disable */ +/* WPT Refs: + https://github.com/w3c/web-platform-tests/blob/4839a0a804/url/toascii.window.js + License: http://www.w3.org/Consortium/Legal/2008/04-testsuite-copyright.html +*/ +// async_test(t => { +// const request = new XMLHttpRequest() +// request.open("GET", "toascii.json") +// request.send() +// request.responseType = "json" +// request.onload = t.step_func_done(() => { + runTests(request.response) +// }) +// }, "Loading data…") + +function makeURL(type, input) { + input = "https://" + input + "/x" + if(type === "url") { + return new URL(input) + } else { + const url = document.createElement(type) + url.href = input + return url + } +} + +function runTests(tests) { + for(var i = 0, l = tests.length; i < l; i++) { + let hostTest = tests[i] + if (typeof hostTest === "string") { + continue // skip comments + } + const typeName = { "url": "URL", "a": "", "area": "" } + // ;["url", "a", "area"].forEach((type) => { + ;["url"].forEach((type) => { + test(() => { + if(hostTest.output !== null) { + const url = makeURL("url", hostTest.input) + assert_equals(url.host, hostTest.output) + assert_equals(url.hostname, hostTest.output) + assert_equals(url.pathname, "/x") + assert_equals(url.href, "https://" + hostTest.output + "/x") + } else { + if(type === "url") { + assert_throws(new TypeError, () => makeURL("url", hostTest.input)) + } else { + const url = makeURL(type, hostTest.input) + assert_equals(url.host, "") + assert_equals(url.hostname, "") + assert_equals(url.pathname, "") + assert_equals(url.href, "https://" + hostTest.input + "/x") + } + } + }, hostTest.input + " (using " + typeName[type] + ")") + ;["host", "hostname"].forEach((val) => { + test(() => { + const url = makeURL(type, "x") + url[val] = hostTest.input + if(hostTest.output !== null) { + assert_equals(url[val], hostTest.output) + } else { + assert_equals(url[val], "x") + } + }, hostTest.input + " (using " + typeName[type] + "." + val + ")") + }) + }) + } +} +/* eslint-enable */