1 module requests.idna.punycode.codec;
2 
3 /***********************************************************
4  * Adapted from https://gist.github.com/bnoordhuis/1035947 *
5  ***********************************************************/
6 
7 import std.stdio;
8 import std.uni;
9 import std.ascii;
10 import std.exception;
11 import std.typecons;
12 import std.algorithm;
13 import std.functional;
14 import std.array;
15 import std.format;
16 import std.conv;
17 import std.utf;
18 import std.exception;
19 import std.string;
20 
21 enum base = 36,
22      tmin = 1, tmax = 26,
23      skew = 38, damp = 700,
24      initial_bias = 72,
25      initial_n = 0x80,
26      delimiter = 0x2D,
27      unicode_max = 0x11_0000;
28 
29 class DecodeException: Exception {
30     this(string msg, string file = __FILE__, size_t line = __LINE__) pure @safe {
31         super(msg, file, line);
32     }
33 }
34 
35 size_t next_smallest_codepoint(in uint[] extended, size_t n) pure nothrow @safe @nogc {
36     size_t m = unicode_max; // Unicode's upper bound + 1
37 
38     foreach(c; extended) {
39         if ( c >= n &&  c < m ) {
40             m = c;
41         }
42     }
43     assert(m < 0x110000);
44     return m;
45 }
46 
47 uint encode_digit(size_t d) pure nothrow @safe @nogc {
48     assert(d < base);
49     return cast(uint)(d + (d < 26 ? 97 : 22));
50 }
51 
52 uint decode_digit(uint d) pure @safe {
53     if ( d >= 48 && d <= 57 ) {
54         return d - 22; // 0..9
55     }
56     if ( d >= 65 && d <= 90 ) {
57         return d - 65; // A..Z
58     }
59     if ( d >= 97 && d <= 122 ) {
60         return d - 97; // a..z
61     }
62     throw new DecodeException("unexpected symbod %c while decoding".format(cast(dchar)d));
63 }
64 
65 size_t threshold(size_t k, size_t bias) pure nothrow @safe @nogc {
66     if ( k <= bias + tmin ) {
67         return tmin;
68     }
69     if ( k >= bias + tmax ) {
70         return tmax;
71     }
72     return k - bias;
73 }
74 
75 size_t adapt_bias(size_t delta, size_t n_points, bool is_first) pure nothrow @safe @nogc {
76     delta  = delta / (is_first ? damp : 2);
77     delta += delta / n_points;
78 
79     immutable s = (base - tmin);
80     immutable t = (s * tmax) / 2; // threshold=455
81     auto k = 0;
82 
83     while (delta > t) {
84         delta = delta / s;
85         k += base;
86     }
87     auto a = (base - tmin + 1) * delta;
88     auto b = (delta + skew);
89 
90     return k + (a / b);
91 }
92 
93 uint[] encode_int(size_t bias, size_t delta) pure nothrow @safe {
94     uint[] result;
95 
96     size_t k = base;
97     size_t q = delta;
98 
99     while ( true ) {
100         immutable size_t t = threshold(k, bias);
101         if ( q < t ) {
102             result ~= encode_digit(q);
103             break;
104         }
105         auto c = t + ((q - t) % (base - t));
106         q = (q - t) / (base - t);
107         k += base;
108         result ~= encode_digit(c);
109     }
110     return result;
111 }
112 
113 string encode(string input) pure @safe nothrow {
114     immutable uint[] source = input.byUTF!dchar.map!(c => cast(uint)c).array;
115     immutable uint[] extended = source.filter!(not!isASCII).array;
116     if ( extended.length == 0 ) {
117         return input;
118     }
119     immutable uint[] basic = source.filter!isASCII.array;
120     auto b = basic.length;
121     auto h = b;
122 
123     size_t n = initial_n;
124     size_t bias = initial_bias;
125     size_t delta = 0;
126 
127     char[] output = basic.map!(c => cast(char)c).array;
128 
129     if ( output.length ) {
130         output ~= cast(uint)'-';
131     }
132 
133     while ( h < source.length ) {
134         immutable size_t m = next_smallest_codepoint(extended, n);
135         delta += (m - n) * (h + 1);
136         n = m;
137         foreach(c; source) {
138             if ( c < n) {
139                 delta++;
140                 // TODO check overflow
141             }
142             if ( c == n ) {
143                 auto e = encode_int(bias, delta);
144                 output ~= e.map!(c => cast(char)c).array;
145                 bias = adapt_bias(delta, h + 1, b == h);
146                 delta = 0;
147                 h++;
148             }
149         }
150         delta++;
151         n++;
152     }
153     return to!string(output);
154 }
155 
156 string decode(string input) pure @safe {
157 
158     auto b = input.lastIndexOf('-') + 1;
159     immutable uint[] source = input.byUTF!char.map!(c => cast(uint)c).array;
160     uint[]           output = b > 0 ? source[0..b-1].dup : [];
161 
162     size_t i = 0;
163     size_t n = initial_n;
164     size_t bias = initial_bias;
165 
166     while (b < source.length) {
167         size_t org_i = i;
168         size_t k = base;
169         size_t w = 1;
170 
171         while ( true ) {
172             if ( b >= source.length ) {
173                 throw new DecodeException("Got overflow decoding string %s".format(input));
174             }
175             immutable next_digit = source[b];
176             if (!next_digit.isASCII ) {
177                 throw new DecodeException("Trying to decode improper code %d".format(next_digit));
178             }
179             immutable d = decode_digit(source[b]);
180             b += 1;
181 
182             // TODO overflow check
183             i += d * w;
184 
185             immutable t = threshold(k, bias);
186             if ( d < t ) {
187                 break;
188             }
189             // TODO overflow check
190             w *= base - t;
191             k += base;
192         }
193         size_t x = 1 + output.length;
194         bias = adapt_bias(i - org_i, x, org_i == 0);
195 
196         n += i / x;
197         i %= x;
198         if ( n >= unicode_max ) {
199             throw new DecodeException("Got overflow decoding string %s".format(input));
200         }
201         output.insertInPlace(i, cast(uint)n);
202         i += 1;
203     }
204     return output.map!(c => cast(dchar)c).toUTF8;
205 }
206 
207 unittest {
208     import std.algorithm.comparison;
209 
210     auto pairs = [
211         ["пpи-вeт", "p-e-gdd2a4b0a"],
212         ["bücher", "bcher-kva"],
213         ["а2б1¢𓃰", "21-6kcf07233afs7b"],
214         ["例子", "fsqu00a"],
215         ["उदाहरण", "p1b6ci4b4b3a"],
216         ["παράδειγμα", "hxajbheg2az3al"],
217         ["실례", "9n2bp8q"],
218         ["例え", "r8jz45g"],
219         ["உதாரணம்", "zkc6cc5bi7f6e"]
220     ];
221     foreach(p; pairs) {
222         assert(encode(p[0]) == p[1]);
223         assert(decode(p[1]) == p[0]);
224     }
225     assertThrown!DecodeException(decode("99999999999")); // overflow
226     assertThrown!DecodeException(decode("1𓃰2𓃰3𓃰")); // not a valid string to decode
227     assertThrown!DecodeException(decode("ab+"));         // not a valid string to decode
228 }