Skip to content

Commit

Permalink
Add punycode support and integrate with DNS lookup
Browse files Browse the repository at this point in the history
  • Loading branch information
makenowjust committed May 2, 2016
1 parent 1ee669e commit 87efb3e
Show file tree
Hide file tree
Showing 3 changed files with 205 additions and 0 deletions.
29 changes: 29 additions & 0 deletions spec/std/punycode_spec.cr
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
require "spec"
require "punycode"

describe Punycode do
[
{"3年B組金八先生", "3B-ww4c5e180e575a65lsy2b"},
{"安室奈美恵-with-SUPER-MONKEYS", "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n"},
{"Hello-Another-Way-それぞれの場所", "Hello-Another-Way--fc4qua05auwb3674vfr0b"},
{"ひとつ屋根の下2", "2-u9tlzr9756bt3uc0v"},
{"MajiでKoiする5秒前", "MajiKoi5-783gue6qz075azm5e"},
{"パフィーdeルンバ", "de-jg4avhby1noc0d"},
{"そのスピードで", "d9juau41awczczp"},
{"Hello-Another-Way-それぞれ", "Hello-Another-Way--fc4qua97gba"},
].each do |example|
dec, enc = example

it "encodes #{dec} to #{enc}" do
Punycode.encode(dec).should eq enc
end

it "decodes #{enc} to #{dec}" do
Punycode.decode(enc).should eq dec
end
end

it "translate to ascii only host name" do
Punycode.to_ascii("test.テスト.テスト").should eq "test.xn--zckzah.xn--zckzah"
end
end
168 changes: 168 additions & 0 deletions src/punycode.cr
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
module Punycode
class Error < Exception; end

BASE = 36
TMIN = 1
TMAX = 26
SKEW = 38
DAMP = 700
INITIAL_BIAS = 72
INITIAL_N = 128

DELIMITER = '-'

BASE36 = "abcdefghijklmnopqrstuvwxyz0123456789"

private def self.adapt(delta, numpoints, firsttime)
delta /= firsttime ? DAMP : 2
delta += delta / numpoints
k = 0
while delta > ((BASE - TMIN) * TMAX) / 2
delta /= BASE - TMIN
k += BASE
end
k + (((BASE - TMIN + 1) * delta) / (delta + SKEW))
end

def self.encode(string : String)
encode string.chars
end

def self.encode(chars)
String.build { |io| encode chars, io }
end

def self.encode(string : String, io)
encode string.chars, io
end

def self.encode(chars, io)
h = 0
all = true
others = [] of Char

chars.each do |c|
if c < '\u0080'
h += 1
io << c
all = false
else
others.push c
end
end

return if others.empty?
others.sort!
io << DELIMITER unless all

delta = 0_u32
n = INITIAL_N
bias = INITIAL_BIAS
firsttime = true
prev = nil

h += 1
others.each do |m|
next if m == prev
prev = m

raise Error.new("overflow") if m.ord - n > (Int32::MAX - delta) / h
delta += (m.ord - n) * h
n = m.ord + 1

chars.each do |c|
if c < m
raise Error.new("overflow") if delta > Int32::MAX - 1
delta += 1
elsif c == m
q = delta
k = BASE
loop do
t = k <= bias ? TMIN : k >= bias + TMAX ? TMAX : k - bias
break if q < t
io << BASE36[t + ((q - t) % (BASE - t))]
q = (q - t) / (BASE - t)
k += BASE
end
io << BASE36[q]

bias = adapt delta, h, firsttime
delta = 0
h += 1
firsttime = false
end
end
delta += 1
end
end

def self.decode(string)
if delim = string.rindex(DELIMITER)
output = string[0...delim].chars
delim += 1
else
output = [] of Char
delim = 0
end

n = INITIAL_N
bias = INITIAL_BIAS
i = 0
init = true
w = oldi = k = 0

string[delim..-1].each_char do |c|
if init
w = 1
oldi = i
k = BASE
init = false
end

digit = 'a' <= c && c <= 'z' ? c.ord - 0x61 : 'A' <= c && c <= 'z' ? c.ord - 0x41 : '0' <= c && c <= '9' ? c.ord - 0x30 + 26 : -1
raise Error.new("invalid input") if digit == -1

i += digit * w
t = k <= bias ? TMIN : k >= bias + TMAX ? TMAX : k - bias

unless digit < t
w *= BASE - t
k += BASE
else
outsize = output.size + 1
bias = adapt i - oldi, outsize, oldi == 0
n += i / outsize
i %= outsize
output.insert i, n.chr
i += 1
init = true
end
end

raise Error.new "invalid input" unless init

output.join
end

def self.to_ascii(string)
return string if string.ascii_only?

String.build do |io|
first = true
string.split('.').each do |part|
unless first
io << "."
end

if part.ascii_only?
io << part
else
io << "xn--"
encode part, io
end

first = false
end
end
end
end
8 changes: 8 additions & 0 deletions src/socket/ip_socket.cr
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
require "punycode"

class IPSocket < Socket
def local_address
sockaddr = uninitialized LibC::SockAddrIn6
Expand Down Expand Up @@ -45,6 +47,12 @@ class IPSocket < Socket
end

def self.getaddrinfo(host, port, family, socktype, protocol = LibC::IPPROTO_IP, timeout = nil)
# RFC 3986 says:
# > When a non-ASCII registered name represents an internationalized domain name
# > intended for resolution via the DNS, the name must be transformed to the IDNA
# > encoding [RFC3490] prior to name lookup.
host = Punycode.to_ascii host

hints = LibC::Addrinfo.new
hints.family = (family || LibC::AF_UNSPEC).to_i32
hints.socktype = socktype
Expand Down

0 comments on commit 87efb3e

Please sign in to comment.