1 module requests.idna; 2 3 import std.format; 4 import std.exception; 5 import std.uni; 6 import std.ascii; 7 import std.range; 8 import std.algorithm; 9 import std.regex; 10 import std.functional; 11 12 static import requests.idna.punycode; 13 14 15 private { 16 static immutable _alabel_prefix = "xn--"; 17 static immutable _unicode_dots_re = "[\\.\u002e\u3002\uff0e\uff61]"; 18 } 19 20 class IDNAException: Exception { 21 this(string msg, string file = __FILE__, size_t line = __LINE__) pure @safe { 22 super(msg, file, line); 23 } 24 } 25 26 bool valid_label_length(string label) pure nothrow @nogc @safe { 27 return label.length <= 63; 28 } 29 30 bool valid_string_length(string label, bool trailing_dot = false) pure nothrow @safe @nogc { 31 return label.length <= (trailing_dot ? 254 : 253); 32 } 33 34 bool check_hyphen_ok(string label) pure @safe { 35 if ( label[0] == '-' || label[$-1] == '-' ) { 36 throw new IDNAException("Label can't start or ends with hyphen"); 37 } 38 if ( label.length>=4 && label[2..4] == "--" ) { 39 throw new IDNAException("Label can't have hyphens in 3 and 4 positions"); 40 } 41 return true; 42 } 43 44 bool check_nfc(string label) @safe { 45 if ( label !is normalize(label) ) { 46 throw new IDNAException("label %s is not normalized".format(label)); 47 } 48 return true; 49 } 50 51 bool check_initial_combiner(string label) pure @safe { 52 if ( combiningClass(label.front) ) { 53 throw new IDNAException("Label begins with an illegal combining character"); 54 } 55 return true; 56 } 57 58 bool check_label(string label) @safe { 59 60 if ( label.length == 0 ) { 61 throw new IDNAException("Empty label"); 62 } 63 check_hyphen_ok(label); 64 check_nfc(label); 65 check_initial_combiner(label); 66 67 return true; 68 } 69 70 string alabel(string label) @safe { 71 // convert u-label to a-label 72 check_label(label); 73 auto result = _alabel_prefix ~ requests.idna.punycode.encode(label); 74 if ( !valid_label_length(result) ) { 75 throw new IDNAException("Label %s too long".format(result)); 76 } 77 return result; 78 } 79 80 string encode_label(string label) @safe { 81 if ( label.count!(not!isASCII) == 0 ) 82 return label; 83 return alabel(label); 84 } 85 86 string idn_encode(string domain) @safe { 87 if ( domain.count!(not!isASCII) == 0 ) 88 return domain; 89 auto src = domain.toLower; 90 auto ulabels = src.splitter(regex(_unicode_dots_re)); 91 string encoded = ulabels.map!encode_label.join("."); 92 if ( !valid_string_length(encoded) ) { 93 throw new IDNAException("Encoded domain name is too long"); 94 } 95 return encoded; 96 } 97 98 unittest { 99 import std.stdio; 100 import std.array; 101 102 immutable tld_strings = [ 103 ["\u6d4b\u8bd5", "xn--0zwm56d"], 104 ["\u092a\u0930\u0940\u0915\u094d\u0937\u093e", "xn--11b5bs3a9aj6g"], 105 ["\ud55c\uad6d", "xn--3e0b707e"], 106 ["\u0438\u0441\u043f\u044b\u0442\u0430\u043d\u0438\u0435", "xn--80akhbyknj4f"], 107 ["\u0441\u0440\u0431", "xn--90a3ac"], 108 ["\ud14c\uc2a4\ud2b8", "xn--9t4b11yi5a"], 109 ["\u0b9a\u0bbf\u0b99\u0bcd\u0b95\u0baa\u0bcd\u0baa\u0bc2\u0bb0\u0bcd", "xn--clchc0ea0b2g2a9gcd"], 110 ["\u05d8\u05e2\u05e1\u05d8", "xn--deba0ad"], 111 ["\u4e2d\u56fd", "xn--fiqs8s"], 112 ["\u4e2d\u570b", "xn--fiqz9s"], 113 ["\u0c2d\u0c3e\u0c30\u0c24\u0c4d", "xn--fpcrj9c3d"], 114 ["\u6e2c\u8a66", "xn--g6w251d"], 115 ["\u0aad\u0abe\u0ab0\u0aa4", "xn--gecrj9c"], 116 ["\u092d\u093e\u0930\u0924", "xn--h2brj9c"], 117 ["\u0622\u0632\u0645\u0627\u06cc\u0634\u06cc", "xn--hgbk6aj7f53bba"], 118 ["\u0baa\u0bb0\u0bbf\u0b9f\u0bcd\u0b9a\u0bc8", "xn--hlcj6aya9esc7a"], 119 ["\u0443\u043a\u0440", "xn--j1amh"], 120 ["\u9999\u6e2f", "xn--j6w193g"], 121 ["\u03b4\u03bf\u03ba\u03b9\u03bc\u03ae", "xn--jxalpdlp"], 122 ["\u0625\u062e\u062a\u0628\u0627\u0631", "xn--kgbechtv"], 123 ["\u53f0\u6e7e", "xn--kprw13d"], 124 ["\u53f0\u7063", "xn--kpry57d"], 125 ["\u0627\u0644\u062c\u0632\u0627\u0626\u0631", "xn--lgbbat1ad8j"], 126 ["\u0639\u0645\u0627\u0646", "xn--mgb9awbf"], 127 ["\u0627\u06cc\u0631\u0627\u0646", "xn--mgba3a4f16a"], 128 ["\u0627\u0645\u0627\u0631\u0627\u062a", "xn--mgbaam7a8h"], 129 ["\u067e\u0627\u06a9\u0633\u062a\u0627\u0646", "xn--mgbai9azgqp6j"], 130 ["\u0627\u0644\u0627\u0631\u062f\u0646", "xn--mgbayh7gpa"], 131 ["\u0628\u06be\u0627\u0631\u062a", "xn--mgbbh1a71e"], 132 ["\u0627\u0644\u0645\u063a\u0631\u0628", "xn--mgbc0a9azcg"], 133 ["\u0627\u0644\u0633\u0639\u0648\u062f\u064a\u0629", "xn--mgberp4a5d4ar"], 134 ["\u10d2\u10d4", "xn--node"], 135 ["\u0e44\u0e17\u0e22", "xn--o3cw4h"], 136 ["\u0633\u0648\u0631\u064a\u0629", "xn--ogbpf8fl"], 137 ["\u0440\u0444", "xn--p1ai"], 138 ["\u062a\u0648\u0646\u0633", "xn--pgbs0dh"], 139 ["\u0645\u0635\u0631", "xn--wgbh1c"], 140 ["\u0642\u0637\u0631", "xn--wgbl6a"], 141 ["\u0b87\u0bb2\u0b99\u0bcd\u0b95\u0bc8", "xn--xkc2al3hye2a"], 142 ["\u65b0\u52a0\u5761", "xn--yfro4i67o"], 143 ["\u0641\u0644\u0633\u0637\u064a\u0646", "xn--ygbi2ammx"], 144 ["\u30c6\u30b9\u30c8", "xn--zckzah"], 145 ["\u049b\u0430\u0437", "xn--80ao21a"], 146 ["\u0645\u0644\u064a\u0633\u064a\u0627", "xn--mgbx4cd0ab"], 147 ["\u043c\u043e\u043d", "xn--l1acc"], 148 ["\u0633\u0648\u062f\u0627\u0646", "xn--mgbpl2fh"] 149 // 150 // these strings do not pass normalization test 151 // 152 //["\u0dbd\u0d82\u0d9a\u0dcf", "xn--fzc2c9e2c"], 153 //["\u09ad\u09be\u09b0\u09a4", "xn--45brj9c"], 154 //["\u09ac\u09be\u0982\u09b2\u09be", "xn--54b7fta0cc"], 155 //["\u0a2d\u0a3e\u0a30\u0a24", "xn--s9brj9c"], 156 //["\u0b87\u0ba8\u0bcd\u0ba4\u0bbf\u0baf\u0bbe", "xn--xkc2dl3a5ee0h"], 157 ]; 158 assert(valid_label_length("abc")); 159 assert(!valid_label_length("a".replicate(64))); 160 161 assert(valid_string_length("a".replicate(253))); 162 assert(!valid_string_length("a".replicate(254))); 163 assert(valid_string_length("a".replicate(254), true)); 164 165 assert(check_hyphen_ok("ab")); 166 assertThrown!IDNAException(check_hyphen_ok("-abcd")); 167 assertThrown!IDNAException(check_hyphen_ok("abcd-")); 168 assertThrown!IDNAException(check_hyphen_ok("ab--cd")); 169 170 assert(check_nfc("привіт")); 171 assert(check_nfc("\u03D3")); 172 assertThrown!IDNAException(check_nfc("\u03D2\u0301")); 173 174 assert(check_initial_combiner("n\u0303")); 175 assertThrown!IDNAException(check_initial_combiner("\u0303n")); 176 177 foreach(p; tld_strings) { 178 string u = p[0]; 179 string a = p[1]; 180 assert(alabel(u) == a); 181 } 182 assert(toLower("Тест") == "тест"); 183 assert(idn_encode("abc.de") == "abc.de"); 184 assert(idn_encode("тест") != "тест"); 185 assert(idn_encode("\u30c6\u30b9\u30c8.xn--zckzah") == "xn--zckzah.xn--zckzah"); 186 assert(idn_encode("\u30c6\u30b9\u30c8\uff0e\u30c6\u30b9\u30c8") == "xn--zckzah.xn--zckzah"); 187 assert(idn_encode("\u0521\u0525\u0523-\u0523\u0523-----\u0521\u0523\u0523\u0523.aa") == "xn---------90gglbagaar.aa"); 188 }