diff --git a/spec/std/punycode_spec.cr b/spec/std/punycode_spec.cr new file mode 100644 index 000000000000..302e9361e1a0 --- /dev/null +++ b/spec/std/punycode_spec.cr @@ -0,0 +1,29 @@ +require "spec" +require "punycode" + +describe Punycode do + [ + {"3年B組金八先生", "3B-ww4c5e180e575a65lsy2b"}, + {"安室奈美恵-with-SUPER-MONKEYS", "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n"}, + {"Hello-Another-Way-それぞれの場所", "Hello-Another-Way--fc4qua05auwb3674vfr0b"}, + {"ひとつ屋根の下2", "2-u9tlzr9756bt3uc0v"}, + {"MajiでKoiする5秒前", "MajiKoi5-783gue6qz075azm5e"}, + {"パフィーdeルンバ", "de-jg4avhby1noc0d"}, + {"そのスピードで", "d9juau41awczczp"}, + {"Hello-Another-Way-それぞれ", "Hello-Another-Way--fc4qua97gba"}, + ].each do |example| + dec, enc = example + + it "encodes #{dec} to #{enc}" do + Punycode.encode(dec).should eq enc + end + + it "decodes #{enc} to #{dec}" do + Punycode.decode(enc).should eq dec + end + end + + it "translate to ascii only host name" do + Punycode.to_ascii("test.テスト.テスト").should eq "test.xn--zckzah.xn--zckzah" + end +end diff --git a/src/punycode.cr b/src/punycode.cr new file mode 100644 index 000000000000..ba81356764e7 --- /dev/null +++ b/src/punycode.cr @@ -0,0 +1,168 @@ +module Punycode + class Error < Exception; end + + BASE = 36 + TMIN = 1 + TMAX = 26 + SKEW = 38 + DAMP = 700 + INITIAL_BIAS = 72 + INITIAL_N = 128 + + DELIMITER = '-' + + BASE36 = "abcdefghijklmnopqrstuvwxyz0123456789" + + private def self.adapt(delta, numpoints, firsttime) + delta /= firsttime ? DAMP : 2 + delta += delta / numpoints + k = 0 + while delta > ((BASE - TMIN) * TMAX) / 2 + delta /= BASE - TMIN + k += BASE + end + k + (((BASE - TMIN + 1) * delta) / (delta + SKEW)) + end + + def self.encode(string : String) + encode string.chars + end + + def self.encode(chars) + String.build { |io| encode chars, io } + end + + def self.encode(string : String, io) + encode string.chars, io + end + + def self.encode(chars, io) + h = 0 + all = true + others = [] of Char + + chars.each do |c| + if c < '\u0080' + h += 1 + io << c + all = false + else + others.push c + end + end + + return if others.empty? + others.sort! + io << DELIMITER unless all + + delta = 0_u32 + n = INITIAL_N + bias = INITIAL_BIAS + firsttime = true + prev = nil + + h += 1 + others.each do |m| + next if m == prev + prev = m + + raise Error.new("overflow") if m.ord - n > (Int32::MAX - delta) / h + delta += (m.ord - n) * h + n = m.ord + 1 + + chars.each do |c| + if c < m + raise Error.new("overflow") if delta > Int32::MAX - 1 + delta += 1 + elsif c == m + q = delta + k = BASE + loop do + t = k <= bias ? TMIN : k >= bias + TMAX ? TMAX : k - bias + break if q < t + io << BASE36[t + ((q - t) % (BASE - t))] + q = (q - t) / (BASE - t) + k += BASE + end + io << BASE36[q] + + bias = adapt delta, h, firsttime + delta = 0 + h += 1 + firsttime = false + end + end + delta += 1 + end + end + + def self.decode(string) + if delim = string.rindex(DELIMITER) + output = string[0...delim].chars + delim += 1 + else + output = [] of Char + delim = 0 + end + + n = INITIAL_N + bias = INITIAL_BIAS + i = 0 + init = true + w = oldi = k = 0 + + string[delim..-1].each_char do |c| + if init + w = 1 + oldi = i + k = BASE + init = false + end + + digit = 'a' <= c && c <= 'z' ? c.ord - 0x61 : 'A' <= c && c <= 'z' ? c.ord - 0x41 : '0' <= c && c <= '9' ? c.ord - 0x30 + 26 : -1 + raise Error.new("invalid input") if digit == -1 + + i += digit * w + t = k <= bias ? TMIN : k >= bias + TMAX ? TMAX : k - bias + + unless digit < t + w *= BASE - t + k += BASE + else + outsize = output.size + 1 + bias = adapt i - oldi, outsize, oldi == 0 + n += i / outsize + i %= outsize + output.insert i, n.chr + i += 1 + init = true + end + end + + raise Error.new "invalid input" unless init + + output.join + end + + def self.to_ascii(string) + return string if string.ascii_only? + + String.build do |io| + first = true + string.split('.').each do |part| + unless first + io << "." + end + + if part.ascii_only? + io << part + else + io << "xn--" + encode part, io + end + + first = false + end + end + end +end diff --git a/src/socket/ip_socket.cr b/src/socket/ip_socket.cr index 989da4b6362e..49d82a82737f 100644 --- a/src/socket/ip_socket.cr +++ b/src/socket/ip_socket.cr @@ -1,3 +1,5 @@ +require "punycode" + class IPSocket < Socket def local_address sockaddr = uninitialized LibC::SockAddrIn6 @@ -45,6 +47,12 @@ class IPSocket < Socket end def self.getaddrinfo(host, port, family, socktype, protocol = LibC::IPPROTO_IP, timeout = nil) + # RFC 3986 says: + # > When a non-ASCII registered name represents an internationalized domain name + # > intended for resolution via the DNS, the name must be transformed to the IDNA + # > encoding [RFC3490] prior to name lookup. + host = Punycode.to_ascii host + hints = LibC::Addrinfo.new hints.family = (family || LibC::AF_UNSPEC).to_i32 hints.socktype = socktype