-
-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add punycode support and integrate with DNS lookup
- Loading branch information
1 parent
1ee669e
commit 87efb3e
Showing
3 changed files
with
205 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
require "spec" | ||
require "punycode" | ||
|
||
describe Punycode do | ||
[ | ||
{"3年B組金八先生", "3B-ww4c5e180e575a65lsy2b"}, | ||
{"安室奈美恵-with-SUPER-MONKEYS", "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n"}, | ||
{"Hello-Another-Way-それぞれの場所", "Hello-Another-Way--fc4qua05auwb3674vfr0b"}, | ||
{"ひとつ屋根の下2", "2-u9tlzr9756bt3uc0v"}, | ||
{"MajiでKoiする5秒前", "MajiKoi5-783gue6qz075azm5e"}, | ||
{"パフィーdeルンバ", "de-jg4avhby1noc0d"}, | ||
{"そのスピードで", "d9juau41awczczp"}, | ||
{"Hello-Another-Way-それぞれ", "Hello-Another-Way--fc4qua97gba"}, | ||
].each do |example| | ||
dec, enc = example | ||
|
||
it "encodes #{dec} to #{enc}" do | ||
Punycode.encode(dec).should eq enc | ||
end | ||
|
||
it "decodes #{enc} to #{dec}" do | ||
Punycode.decode(enc).should eq dec | ||
end | ||
end | ||
|
||
it "translate to ascii only host name" do | ||
Punycode.to_ascii("test.テスト.テスト").should eq "test.xn--zckzah.xn--zckzah" | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,168 @@ | ||
module Punycode | ||
class Error < Exception; end | ||
|
||
BASE = 36 | ||
TMIN = 1 | ||
TMAX = 26 | ||
SKEW = 38 | ||
DAMP = 700 | ||
INITIAL_BIAS = 72 | ||
INITIAL_N = 128 | ||
|
||
DELIMITER = '-' | ||
|
||
BASE36 = "abcdefghijklmnopqrstuvwxyz0123456789" | ||
|
||
private def self.adapt(delta, numpoints, firsttime) | ||
delta /= firsttime ? DAMP : 2 | ||
delta += delta / numpoints | ||
k = 0 | ||
while delta > ((BASE - TMIN) * TMAX) / 2 | ||
delta /= BASE - TMIN | ||
k += BASE | ||
end | ||
k + (((BASE - TMIN + 1) * delta) / (delta + SKEW)) | ||
end | ||
|
||
def self.encode(string : String) | ||
encode string.chars | ||
end | ||
|
||
def self.encode(chars) | ||
String.build { |io| encode chars, io } | ||
end | ||
|
||
def self.encode(string : String, io) | ||
encode string.chars, io | ||
end | ||
|
||
def self.encode(chars, io) | ||
h = 0 | ||
all = true | ||
others = [] of Char | ||
|
||
chars.each do |c| | ||
if c < '\u0080' | ||
h += 1 | ||
io << c | ||
all = false | ||
else | ||
others.push c | ||
end | ||
end | ||
|
||
return if others.empty? | ||
others.sort! | ||
io << DELIMITER unless all | ||
|
||
delta = 0_u32 | ||
n = INITIAL_N | ||
bias = INITIAL_BIAS | ||
firsttime = true | ||
prev = nil | ||
|
||
h += 1 | ||
others.each do |m| | ||
next if m == prev | ||
prev = m | ||
|
||
raise Error.new("overflow") if m.ord - n > (Int32::MAX - delta) / h | ||
delta += (m.ord - n) * h | ||
n = m.ord + 1 | ||
|
||
chars.each do |c| | ||
if c < m | ||
raise Error.new("overflow") if delta > Int32::MAX - 1 | ||
delta += 1 | ||
elsif c == m | ||
q = delta | ||
k = BASE | ||
loop do | ||
t = k <= bias ? TMIN : k >= bias + TMAX ? TMAX : k - bias | ||
break if q < t | ||
io << BASE36[t + ((q - t) % (BASE - t))] | ||
q = (q - t) / (BASE - t) | ||
k += BASE | ||
end | ||
io << BASE36[q] | ||
|
||
bias = adapt delta, h, firsttime | ||
delta = 0 | ||
h += 1 | ||
firsttime = false | ||
end | ||
end | ||
delta += 1 | ||
end | ||
end | ||
|
||
def self.decode(string) | ||
if delim = string.rindex(DELIMITER) | ||
output = string[0...delim].chars | ||
delim += 1 | ||
else | ||
output = [] of Char | ||
delim = 0 | ||
end | ||
|
||
n = INITIAL_N | ||
bias = INITIAL_BIAS | ||
i = 0 | ||
init = true | ||
w = oldi = k = 0 | ||
|
||
string[delim..-1].each_char do |c| | ||
if init | ||
w = 1 | ||
oldi = i | ||
k = BASE | ||
init = false | ||
end | ||
|
||
digit = 'a' <= c && c <= 'z' ? c.ord - 0x61 : 'A' <= c && c <= 'z' ? c.ord - 0x41 : '0' <= c && c <= '9' ? c.ord - 0x30 + 26 : -1 | ||
raise Error.new("invalid input") if digit == -1 | ||
|
||
i += digit * w | ||
t = k <= bias ? TMIN : k >= bias + TMAX ? TMAX : k - bias | ||
|
||
unless digit < t | ||
w *= BASE - t | ||
k += BASE | ||
else | ||
outsize = output.size + 1 | ||
bias = adapt i - oldi, outsize, oldi == 0 | ||
n += i / outsize | ||
i %= outsize | ||
output.insert i, n.chr | ||
i += 1 | ||
init = true | ||
end | ||
end | ||
|
||
raise Error.new "invalid input" unless init | ||
|
||
output.join | ||
end | ||
|
||
def self.to_ascii(string) | ||
return string if string.ascii_only? | ||
|
||
String.build do |io| | ||
first = true | ||
string.split('.').each do |part| | ||
unless first | ||
io << "." | ||
end | ||
|
||
if part.ascii_only? | ||
io << part | ||
else | ||
io << "xn--" | ||
encode part, io | ||
end | ||
|
||
first = false | ||
end | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters