1 module requests.idna.punycode.codec; 2 3 /*********************************************************** 4 * Adapted from https://gist.github.com/bnoordhuis/1035947 * 5 ***********************************************************/ 6 7 import std.stdio; 8 import std.uni; 9 import std.ascii; 10 import std.exception; 11 import std.typecons; 12 import std.algorithm; 13 import std.functional; 14 import std.array; 15 import std.format; 16 import std.conv; 17 import std.utf; 18 import std.exception; 19 import std.string; 20 21 enum base = 36, 22 tmin = 1, tmax = 26, 23 skew = 38, damp = 700, 24 initial_bias = 72, 25 initial_n = 0x80, 26 delimiter = 0x2D, 27 unicode_max = 0x11_0000; 28 29 class DecodeException: Exception { 30 this(string msg, string file = __FILE__, size_t line = __LINE__) pure @safe { 31 super(msg, file, line); 32 } 33 } 34 35 size_t next_smallest_codepoint(in uint[] extended, size_t n) pure nothrow @safe @nogc { 36 size_t m = unicode_max; // Unicode's upper bound + 1 37 38 foreach(c; extended) { 39 if ( c >= n && c < m ) { 40 m = c; 41 } 42 } 43 assert(m < 0x110000); 44 return m; 45 } 46 47 uint encode_digit(size_t d) pure nothrow @safe @nogc { 48 assert(d < base); 49 return cast(uint)(d + (d < 26 ? 97 : 22)); 50 } 51 52 uint decode_digit(uint d) pure @safe { 53 if ( d >= 48 && d <= 57 ) { 54 return d - 22; // 0..9 55 } 56 if ( d >= 65 && d <= 90 ) { 57 return d - 65; // A..Z 58 } 59 if ( d >= 97 && d <= 122 ) { 60 return d - 97; // a..z 61 } 62 throw new DecodeException("unexpected symbod %c while decoding".format(cast(dchar)d)); 63 } 64 65 size_t threshold(size_t k, size_t bias) pure nothrow @safe @nogc { 66 if ( k <= bias + tmin ) { 67 return tmin; 68 } 69 if ( k >= bias + tmax ) { 70 return tmax; 71 } 72 return k - bias; 73 } 74 75 size_t adapt_bias(size_t delta, size_t n_points, bool is_first) pure nothrow @safe @nogc { 76 delta = delta / (is_first ? damp : 2); 77 delta += delta / n_points; 78 79 immutable s = (base - tmin); 80 immutable t = (s * tmax) / 2; // threshold=455 81 auto k = 0; 82 83 while (delta > t) { 84 delta = delta / s; 85 k += base; 86 } 87 auto a = (base - tmin + 1) * delta; 88 auto b = (delta + skew); 89 90 return k + (a / b); 91 } 92 93 uint[] encode_int(size_t bias, size_t delta) pure nothrow @safe { 94 uint[] result; 95 96 size_t k = base; 97 size_t q = delta; 98 99 while ( true ) { 100 immutable size_t t = threshold(k, bias); 101 if ( q < t ) { 102 result ~= encode_digit(q); 103 break; 104 } 105 auto c = t + ((q - t) % (base - t)); 106 q = (q - t) / (base - t); 107 k += base; 108 result ~= encode_digit(c); 109 } 110 return result; 111 } 112 113 string encode(string input) pure @safe nothrow { 114 immutable uint[] source = input.byUTF!dchar.map!(c => cast(uint)c).array; 115 immutable uint[] extended = source.filter!(not!isASCII).array; 116 if ( extended.length == 0 ) { 117 return input; 118 } 119 immutable uint[] basic = source.filter!isASCII.array; 120 auto b = basic.length; 121 auto h = b; 122 123 size_t n = initial_n; 124 size_t bias = initial_bias; 125 size_t delta = 0; 126 127 char[] output = basic.map!(c => cast(char)c).array; 128 129 if ( output.length ) { 130 output ~= cast(uint)'-'; 131 } 132 133 while ( h < source.length ) { 134 immutable size_t m = next_smallest_codepoint(extended, n); 135 delta += (m - n) * (h + 1); 136 n = m; 137 foreach(c; source) { 138 if ( c < n) { 139 delta++; 140 // TODO check overflow 141 } 142 if ( c == n ) { 143 auto e = encode_int(bias, delta); 144 output ~= e.map!(c => cast(char)c).array; 145 bias = adapt_bias(delta, h + 1, b == h); 146 delta = 0; 147 h++; 148 } 149 } 150 delta++; 151 n++; 152 } 153 return to!string(output); 154 } 155 156 string decode(string input) pure @safe { 157 158 auto b = input.lastIndexOf('-') + 1; 159 immutable uint[] source = input.byUTF!char.map!(c => cast(uint)c).array; 160 uint[] output = b > 0 ? source[0..b-1].dup : []; 161 162 size_t i = 0; 163 size_t n = initial_n; 164 size_t bias = initial_bias; 165 166 while (b < source.length) { 167 size_t org_i = i; 168 size_t k = base; 169 size_t w = 1; 170 171 while ( true ) { 172 if ( b >= source.length ) { 173 throw new DecodeException("Got overflow decoding string %s".format(input)); 174 } 175 immutable next_digit = source[b]; 176 if (!next_digit.isASCII ) { 177 throw new DecodeException("Trying to decode improper code %d".format(next_digit)); 178 } 179 immutable d = decode_digit(source[b]); 180 b += 1; 181 182 // TODO overflow check 183 i += d * w; 184 185 immutable t = threshold(k, bias); 186 if ( d < t ) { 187 break; 188 } 189 // TODO overflow check 190 w *= base - t; 191 k += base; 192 } 193 size_t x = 1 + output.length; 194 bias = adapt_bias(i - org_i, x, org_i == 0); 195 196 n += i / x; 197 i %= x; 198 if ( n >= unicode_max ) { 199 throw new DecodeException("Got overflow decoding string %s".format(input)); 200 } 201 output.insertInPlace(i, cast(uint)n); 202 i += 1; 203 } 204 return output.map!(c => cast(dchar)c).toUTF8; 205 } 206 207 unittest { 208 import std.algorithm.comparison; 209 210 auto pairs = [ 211 ["пpи-вeт", "p-e-gdd2a4b0a"], 212 ["bücher", "bcher-kva"], 213 ["а2б1¢𓃰", "21-6kcf07233afs7b"], 214 ["例子", "fsqu00a"], 215 ["उदाहरण", "p1b6ci4b4b3a"], 216 ["παράδειγμα", "hxajbheg2az3al"], 217 ["실례", "9n2bp8q"], 218 ["例え", "r8jz45g"], 219 ["உதாரணம்", "zkc6cc5bi7f6e"] 220 ]; 221 foreach(p; pairs) { 222 assert(encode(p[0]) == p[1]); 223 assert(decode(p[1]) == p[0]); 224 } 225 assertThrown!DecodeException(decode("99999999999")); // overflow 226 assertThrown!DecodeException(decode("1𓃰2𓃰3𓃰")); // not a valid string to decode 227 assertThrown!DecodeException(decode("ab+")); // not a valid string to decode 228 }