]>
Commit | Line | Data |
---|---|---|
729e4ab9 A |
1 | /* |
2 | ******************************************************************************* | |
57a6839d | 3 | * Copyright (C) 2010-2014, International Business Machines |
729e4ab9 A |
4 | * Corporation and others. All Rights Reserved. |
5 | ******************************************************************************* | |
6 | * file name: uts46.cpp | |
7 | * encoding: US-ASCII | |
8 | * tab size: 8 (not used) | |
9 | * indentation:4 | |
10 | * | |
11 | * created on: 2010mar09 | |
12 | * created by: Markus W. Scherer | |
13 | */ | |
14 | ||
15 | #include "unicode/utypes.h" | |
16 | ||
17 | #if !UCONFIG_NO_IDNA | |
18 | ||
19 | #include "unicode/idna.h" | |
20 | #include "unicode/normalizer2.h" | |
4388f060 | 21 | #include "unicode/uscript.h" |
729e4ab9 | 22 | #include "unicode/ustring.h" |
4388f060 | 23 | #include "unicode/utf16.h" |
729e4ab9 A |
24 | #include "cmemory.h" |
25 | #include "cstring.h" | |
26 | #include "punycode.h" | |
4388f060 | 27 | #include "ubidi_props.h" |
729e4ab9 A |
28 | #include "ustr_imp.h" |
29 | ||
30 | #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) | |
31 | ||
32 | // Note about tests for UIDNA_ERROR_DOMAIN_NAME_TOO_LONG: | |
33 | // | |
34 | // The domain name length limit is 255 octets in an internal DNS representation | |
35 | // where the last ("root") label is the empty label | |
36 | // represented by length byte 0 alone. | |
37 | // In a conventional string, this translates to 253 characters, or 254 | |
38 | // if there is a trailing dot for the root label. | |
39 | ||
40 | U_NAMESPACE_BEGIN | |
41 | ||
42 | // Severe errors which usually result in a U+FFFD replacement character in the result string. | |
43 | const uint32_t severeErrors= | |
44 | UIDNA_ERROR_LEADING_COMBINING_MARK| | |
45 | UIDNA_ERROR_DISALLOWED| | |
46 | UIDNA_ERROR_PUNYCODE| | |
47 | UIDNA_ERROR_LABEL_HAS_DOT| | |
48 | UIDNA_ERROR_INVALID_ACE_LABEL; | |
49 | ||
50 | static inline UBool | |
51 | isASCIIString(const UnicodeString &dest) { | |
52 | const UChar *s=dest.getBuffer(); | |
53 | const UChar *limit=s+dest.length(); | |
54 | while(s<limit) { | |
55 | if(*s++>0x7f) { | |
56 | return FALSE; | |
57 | } | |
58 | } | |
59 | return TRUE; | |
60 | } | |
61 | ||
62 | static UBool | |
63 | isASCIIOkBiDi(const UChar *s, int32_t length); | |
64 | ||
65 | static UBool | |
66 | isASCIIOkBiDi(const char *s, int32_t length); | |
67 | ||
68 | // IDNA class default implementations -------------------------------------- *** | |
69 | ||
4388f060 A |
70 | IDNA::~IDNA() {} |
71 | ||
729e4ab9 A |
72 | void |
73 | IDNA::labelToASCII_UTF8(const StringPiece &label, ByteSink &dest, | |
74 | IDNAInfo &info, UErrorCode &errorCode) const { | |
75 | if(U_SUCCESS(errorCode)) { | |
76 | UnicodeString destString; | |
77 | labelToASCII(UnicodeString::fromUTF8(label), destString, | |
78 | info, errorCode).toUTF8(dest); | |
79 | } | |
80 | } | |
81 | ||
82 | void | |
83 | IDNA::labelToUnicodeUTF8(const StringPiece &label, ByteSink &dest, | |
84 | IDNAInfo &info, UErrorCode &errorCode) const { | |
85 | if(U_SUCCESS(errorCode)) { | |
86 | UnicodeString destString; | |
87 | labelToUnicode(UnicodeString::fromUTF8(label), destString, | |
88 | info, errorCode).toUTF8(dest); | |
89 | } | |
90 | } | |
91 | ||
92 | void | |
93 | IDNA::nameToASCII_UTF8(const StringPiece &name, ByteSink &dest, | |
94 | IDNAInfo &info, UErrorCode &errorCode) const { | |
95 | if(U_SUCCESS(errorCode)) { | |
96 | UnicodeString destString; | |
97 | nameToASCII(UnicodeString::fromUTF8(name), destString, | |
98 | info, errorCode).toUTF8(dest); | |
99 | } | |
100 | } | |
101 | ||
102 | void | |
103 | IDNA::nameToUnicodeUTF8(const StringPiece &name, ByteSink &dest, | |
104 | IDNAInfo &info, UErrorCode &errorCode) const { | |
105 | if(U_SUCCESS(errorCode)) { | |
106 | UnicodeString destString; | |
107 | nameToUnicode(UnicodeString::fromUTF8(name), destString, | |
108 | info, errorCode).toUTF8(dest); | |
109 | } | |
110 | } | |
111 | ||
729e4ab9 A |
112 | // UTS46 class declaration ------------------------------------------------- *** |
113 | ||
114 | class UTS46 : public IDNA { | |
115 | public: | |
116 | UTS46(uint32_t options, UErrorCode &errorCode); | |
117 | virtual ~UTS46(); | |
118 | ||
119 | virtual UnicodeString & | |
120 | labelToASCII(const UnicodeString &label, UnicodeString &dest, | |
121 | IDNAInfo &info, UErrorCode &errorCode) const; | |
122 | ||
123 | virtual UnicodeString & | |
124 | labelToUnicode(const UnicodeString &label, UnicodeString &dest, | |
125 | IDNAInfo &info, UErrorCode &errorCode) const; | |
126 | ||
127 | virtual UnicodeString & | |
128 | nameToASCII(const UnicodeString &name, UnicodeString &dest, | |
129 | IDNAInfo &info, UErrorCode &errorCode) const; | |
130 | ||
131 | virtual UnicodeString & | |
132 | nameToUnicode(const UnicodeString &name, UnicodeString &dest, | |
133 | IDNAInfo &info, UErrorCode &errorCode) const; | |
134 | ||
135 | virtual void | |
136 | labelToASCII_UTF8(const StringPiece &label, ByteSink &dest, | |
137 | IDNAInfo &info, UErrorCode &errorCode) const; | |
138 | ||
139 | virtual void | |
140 | labelToUnicodeUTF8(const StringPiece &label, ByteSink &dest, | |
141 | IDNAInfo &info, UErrorCode &errorCode) const; | |
142 | ||
143 | virtual void | |
144 | nameToASCII_UTF8(const StringPiece &name, ByteSink &dest, | |
145 | IDNAInfo &info, UErrorCode &errorCode) const; | |
146 | ||
147 | virtual void | |
148 | nameToUnicodeUTF8(const StringPiece &name, ByteSink &dest, | |
149 | IDNAInfo &info, UErrorCode &errorCode) const; | |
150 | ||
151 | private: | |
152 | UnicodeString & | |
153 | process(const UnicodeString &src, | |
154 | UBool isLabel, UBool toASCII, | |
155 | UnicodeString &dest, | |
156 | IDNAInfo &info, UErrorCode &errorCode) const; | |
157 | ||
158 | void | |
159 | processUTF8(const StringPiece &src, | |
160 | UBool isLabel, UBool toASCII, | |
161 | ByteSink &dest, | |
162 | IDNAInfo &info, UErrorCode &errorCode) const; | |
163 | ||
164 | UnicodeString & | |
165 | processUnicode(const UnicodeString &src, | |
166 | int32_t labelStart, int32_t mappingStart, | |
167 | UBool isLabel, UBool toASCII, | |
168 | UnicodeString &dest, | |
169 | IDNAInfo &info, UErrorCode &errorCode) const; | |
170 | ||
171 | // returns the new dest.length() | |
172 | int32_t | |
173 | mapDevChars(UnicodeString &dest, int32_t labelStart, int32_t mappingStart, | |
174 | UErrorCode &errorCode) const; | |
175 | ||
176 | // returns the new label length | |
177 | int32_t | |
178 | processLabel(UnicodeString &dest, | |
179 | int32_t labelStart, int32_t labelLength, | |
180 | UBool toASCII, | |
181 | IDNAInfo &info, UErrorCode &errorCode) const; | |
182 | int32_t | |
183 | markBadACELabel(UnicodeString &dest, | |
184 | int32_t labelStart, int32_t labelLength, | |
185 | UBool toASCII, IDNAInfo &info) const; | |
186 | ||
187 | void | |
188 | checkLabelBiDi(const UChar *label, int32_t labelLength, IDNAInfo &info) const; | |
189 | ||
190 | UBool | |
191 | isLabelOkContextJ(const UChar *label, int32_t labelLength) const; | |
192 | ||
4388f060 A |
193 | void |
194 | checkLabelContextO(const UChar *label, int32_t labelLength, IDNAInfo &info) const; | |
195 | ||
729e4ab9 A |
196 | const Normalizer2 &uts46Norm2; // uts46.nrm |
197 | uint32_t options; | |
198 | }; | |
199 | ||
200 | IDNA * | |
201 | IDNA::createUTS46Instance(uint32_t options, UErrorCode &errorCode) { | |
202 | if(U_SUCCESS(errorCode)) { | |
203 | IDNA *idna=new UTS46(options, errorCode); | |
204 | if(idna==NULL) { | |
205 | errorCode=U_MEMORY_ALLOCATION_ERROR; | |
206 | } else if(U_FAILURE(errorCode)) { | |
207 | delete idna; | |
208 | idna=NULL; | |
209 | } | |
210 | return idna; | |
211 | } else { | |
212 | return NULL; | |
213 | } | |
214 | } | |
215 | ||
216 | // UTS46 implementation ---------------------------------------------------- *** | |
217 | ||
218 | UTS46::UTS46(uint32_t opt, UErrorCode &errorCode) | |
219 | : uts46Norm2(*Normalizer2::getInstance(NULL, "uts46", UNORM2_COMPOSE, errorCode)), | |
220 | options(opt) {} | |
221 | ||
222 | UTS46::~UTS46() {} | |
223 | ||
224 | UnicodeString & | |
225 | UTS46::labelToASCII(const UnicodeString &label, UnicodeString &dest, | |
226 | IDNAInfo &info, UErrorCode &errorCode) const { | |
227 | return process(label, TRUE, TRUE, dest, info, errorCode); | |
228 | } | |
229 | ||
230 | UnicodeString & | |
231 | UTS46::labelToUnicode(const UnicodeString &label, UnicodeString &dest, | |
232 | IDNAInfo &info, UErrorCode &errorCode) const { | |
233 | return process(label, TRUE, FALSE, dest, info, errorCode); | |
234 | } | |
235 | ||
236 | UnicodeString & | |
237 | UTS46::nameToASCII(const UnicodeString &name, UnicodeString &dest, | |
238 | IDNAInfo &info, UErrorCode &errorCode) const { | |
239 | process(name, FALSE, TRUE, dest, info, errorCode); | |
240 | if( dest.length()>=254 && (info.errors&UIDNA_ERROR_DOMAIN_NAME_TOO_LONG)==0 && | |
241 | isASCIIString(dest) && | |
242 | (dest.length()>254 || dest[253]!=0x2e) | |
243 | ) { | |
244 | info.errors|=UIDNA_ERROR_DOMAIN_NAME_TOO_LONG; | |
245 | } | |
246 | return dest; | |
247 | } | |
248 | ||
249 | UnicodeString & | |
250 | UTS46::nameToUnicode(const UnicodeString &name, UnicodeString &dest, | |
251 | IDNAInfo &info, UErrorCode &errorCode) const { | |
252 | return process(name, FALSE, FALSE, dest, info, errorCode); | |
253 | } | |
254 | ||
255 | void | |
256 | UTS46::labelToASCII_UTF8(const StringPiece &label, ByteSink &dest, | |
257 | IDNAInfo &info, UErrorCode &errorCode) const { | |
258 | processUTF8(label, TRUE, TRUE, dest, info, errorCode); | |
259 | } | |
260 | ||
261 | void | |
262 | UTS46::labelToUnicodeUTF8(const StringPiece &label, ByteSink &dest, | |
263 | IDNAInfo &info, UErrorCode &errorCode) const { | |
264 | processUTF8(label, TRUE, FALSE, dest, info, errorCode); | |
265 | } | |
266 | ||
267 | void | |
268 | UTS46::nameToASCII_UTF8(const StringPiece &name, ByteSink &dest, | |
269 | IDNAInfo &info, UErrorCode &errorCode) const { | |
270 | processUTF8(name, FALSE, TRUE, dest, info, errorCode); | |
271 | } | |
272 | ||
273 | void | |
274 | UTS46::nameToUnicodeUTF8(const StringPiece &name, ByteSink &dest, | |
275 | IDNAInfo &info, UErrorCode &errorCode) const { | |
276 | processUTF8(name, FALSE, FALSE, dest, info, errorCode); | |
277 | } | |
278 | ||
279 | // UTS #46 data for ASCII characters. | |
280 | // The normalizer (using uts46.nrm) maps uppercase ASCII letters to lowercase | |
281 | // and passes through all other ASCII characters. | |
282 | // If UIDNA_USE_STD3_RULES is set, then non-LDH characters are disallowed | |
283 | // using this data. | |
284 | // The ASCII fastpath also uses this data. | |
285 | // Values: -1=disallowed 0==valid 1==mapped (lowercase) | |
286 | static const int8_t asciiData[128]={ | |
287 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
288 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
289 | // 002D..002E; valid # HYPHEN-MINUS..FULL STOP | |
290 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, -1, | |
291 | // 0030..0039; valid # DIGIT ZERO..DIGIT NINE | |
292 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, | |
293 | // 0041..005A; mapped # LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z | |
294 | -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
295 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, | |
296 | // 0061..007A; valid # LATIN SMALL LETTER A..LATIN SMALL LETTER Z | |
297 | -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
298 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1 | |
299 | }; | |
300 | ||
301 | UnicodeString & | |
302 | UTS46::process(const UnicodeString &src, | |
303 | UBool isLabel, UBool toASCII, | |
304 | UnicodeString &dest, | |
305 | IDNAInfo &info, UErrorCode &errorCode) const { | |
306 | // uts46Norm2.normalize() would do all of this error checking and setup, | |
307 | // but with the ASCII fastpath we do not always call it, and do not | |
308 | // call it first. | |
309 | if(U_FAILURE(errorCode)) { | |
310 | dest.setToBogus(); | |
311 | return dest; | |
312 | } | |
313 | const UChar *srcArray=src.getBuffer(); | |
314 | if(&dest==&src || srcArray==NULL) { | |
315 | errorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
316 | dest.setToBogus(); | |
317 | return dest; | |
318 | } | |
319 | // Arguments are fine, reset output values. | |
320 | dest.remove(); | |
321 | info.reset(); | |
322 | int32_t srcLength=src.length(); | |
323 | if(srcLength==0) { | |
324 | if(toASCII) { | |
325 | info.errors|=UIDNA_ERROR_EMPTY_LABEL; | |
326 | } | |
327 | return dest; | |
328 | } | |
329 | UChar *destArray=dest.getBuffer(srcLength); | |
330 | if(destArray==NULL) { | |
331 | errorCode=U_MEMORY_ALLOCATION_ERROR; | |
332 | return dest; | |
333 | } | |
334 | // ASCII fastpath | |
335 | UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; | |
336 | int32_t labelStart=0; | |
337 | int32_t i; | |
338 | for(i=0;; ++i) { | |
339 | if(i==srcLength) { | |
340 | if(toASCII) { | |
341 | if((i-labelStart)>63) { | |
342 | info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; | |
343 | } | |
344 | // There is a trailing dot if labelStart==i. | |
345 | if(!isLabel && i>=254 && (i>254 || labelStart<i)) { | |
346 | info.errors|=UIDNA_ERROR_DOMAIN_NAME_TOO_LONG; | |
347 | } | |
348 | } | |
349 | info.errors|=info.labelErrors; | |
350 | dest.releaseBuffer(i); | |
351 | return dest; | |
352 | } | |
353 | UChar c=srcArray[i]; | |
354 | if(c>0x7f) { | |
355 | break; | |
356 | } | |
357 | int cData=asciiData[c]; | |
358 | if(cData>0) { | |
359 | destArray[i]=c+0x20; // Lowercase an uppercase ASCII letter. | |
360 | } else if(cData<0 && disallowNonLDHDot) { | |
361 | break; // Replacing with U+FFFD can be complicated for toASCII. | |
362 | } else { | |
363 | destArray[i]=c; | |
364 | if(c==0x2d) { // hyphen | |
365 | if(i==(labelStart+3) && srcArray[i-1]==0x2d) { | |
366 | // "??--..." is Punycode or forbidden. | |
367 | ++i; // '-' was copied to dest already | |
368 | break; | |
369 | } | |
370 | if(i==labelStart) { | |
371 | // label starts with "-" | |
372 | info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; | |
373 | } | |
374 | if((i+1)==srcLength || srcArray[i+1]==0x2e) { | |
375 | // label ends with "-" | |
376 | info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN; | |
377 | } | |
378 | } else if(c==0x2e) { // dot | |
379 | if(isLabel) { | |
380 | // Replacing with U+FFFD can be complicated for toASCII. | |
381 | ++i; // '.' was copied to dest already | |
382 | break; | |
383 | } | |
384 | if(toASCII) { | |
57a6839d | 385 | if(i==labelStart) { |
729e4ab9 A |
386 | info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; |
387 | } else if((i-labelStart)>63) { | |
388 | info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; | |
389 | } | |
390 | } | |
391 | info.errors|=info.labelErrors; | |
392 | info.labelErrors=0; | |
393 | labelStart=i+1; | |
394 | } | |
395 | } | |
396 | } | |
397 | info.errors|=info.labelErrors; | |
398 | dest.releaseBuffer(i); | |
399 | processUnicode(src, labelStart, i, isLabel, toASCII, dest, info, errorCode); | |
400 | if( info.isBiDi && U_SUCCESS(errorCode) && (info.errors&severeErrors)==0 && | |
401 | (!info.isOkBiDi || (labelStart>0 && !isASCIIOkBiDi(dest.getBuffer(), labelStart))) | |
402 | ) { | |
403 | info.errors|=UIDNA_ERROR_BIDI; | |
404 | } | |
405 | return dest; | |
406 | } | |
407 | ||
408 | void | |
409 | UTS46::processUTF8(const StringPiece &src, | |
410 | UBool isLabel, UBool toASCII, | |
411 | ByteSink &dest, | |
412 | IDNAInfo &info, UErrorCode &errorCode) const { | |
413 | if(U_FAILURE(errorCode)) { | |
414 | return; | |
415 | } | |
416 | const char *srcArray=src.data(); | |
417 | int32_t srcLength=src.length(); | |
418 | if(srcArray==NULL && srcLength!=0) { | |
419 | errorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
420 | return; | |
421 | } | |
422 | // Arguments are fine, reset output values. | |
423 | info.reset(); | |
424 | if(srcLength==0) { | |
425 | if(toASCII) { | |
426 | info.errors|=UIDNA_ERROR_EMPTY_LABEL; | |
427 | } | |
428 | dest.Flush(); | |
429 | return; | |
430 | } | |
431 | UnicodeString destString; | |
432 | int32_t labelStart=0; | |
433 | if(srcLength<=256) { // length of stackArray[] | |
434 | // ASCII fastpath | |
435 | char stackArray[256]; | |
436 | int32_t destCapacity; | |
437 | char *destArray=dest.GetAppendBuffer(srcLength, srcLength+20, | |
438 | stackArray, LENGTHOF(stackArray), &destCapacity); | |
439 | UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; | |
440 | int32_t i; | |
441 | for(i=0;; ++i) { | |
442 | if(i==srcLength) { | |
443 | if(toASCII) { | |
444 | if((i-labelStart)>63) { | |
445 | info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; | |
446 | } | |
447 | // There is a trailing dot if labelStart==i. | |
448 | if(!isLabel && i>=254 && (i>254 || labelStart<i)) { | |
449 | info.errors|=UIDNA_ERROR_DOMAIN_NAME_TOO_LONG; | |
450 | } | |
451 | } | |
452 | info.errors|=info.labelErrors; | |
453 | dest.Append(destArray, i); | |
454 | dest.Flush(); | |
455 | return; | |
456 | } | |
457 | char c=srcArray[i]; | |
458 | if((int8_t)c<0) { // (uint8_t)c>0x7f | |
459 | break; | |
460 | } | |
461 | int cData=asciiData[(int)c]; // Cast: gcc warns about indexing with a char. | |
462 | if(cData>0) { | |
463 | destArray[i]=c+0x20; // Lowercase an uppercase ASCII letter. | |
464 | } else if(cData<0 && disallowNonLDHDot) { | |
465 | break; // Replacing with U+FFFD can be complicated for toASCII. | |
466 | } else { | |
467 | destArray[i]=c; | |
468 | if(c==0x2d) { // hyphen | |
469 | if(i==(labelStart+3) && srcArray[i-1]==0x2d) { | |
470 | // "??--..." is Punycode or forbidden. | |
471 | break; | |
472 | } | |
473 | if(i==labelStart) { | |
474 | // label starts with "-" | |
475 | info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; | |
476 | } | |
477 | if((i+1)==srcLength || srcArray[i+1]==0x2e) { | |
478 | // label ends with "-" | |
479 | info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN; | |
480 | } | |
481 | } else if(c==0x2e) { // dot | |
482 | if(isLabel) { | |
483 | break; // Replacing with U+FFFD can be complicated for toASCII. | |
484 | } | |
485 | if(toASCII) { | |
57a6839d | 486 | if(i==labelStart) { |
729e4ab9 A |
487 | info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; |
488 | } else if((i-labelStart)>63) { | |
489 | info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; | |
490 | } | |
491 | } | |
492 | info.errors|=info.labelErrors; | |
493 | info.labelErrors=0; | |
494 | labelStart=i+1; | |
495 | } | |
496 | } | |
497 | } | |
498 | info.errors|=info.labelErrors; | |
499 | // Convert the processed ASCII prefix of the current label to UTF-16. | |
500 | int32_t mappingStart=i-labelStart; | |
501 | destString=UnicodeString::fromUTF8(StringPiece(destArray+labelStart, mappingStart)); | |
502 | // Output the previous ASCII labels and process the rest of src in UTF-16. | |
503 | dest.Append(destArray, labelStart); | |
504 | processUnicode(UnicodeString::fromUTF8(StringPiece(src, labelStart)), 0, mappingStart, | |
505 | isLabel, toASCII, | |
506 | destString, info, errorCode); | |
507 | } else { | |
508 | // src is too long for the ASCII fastpath implementation. | |
509 | processUnicode(UnicodeString::fromUTF8(src), 0, 0, | |
510 | isLabel, toASCII, | |
511 | destString, info, errorCode); | |
512 | } | |
513 | destString.toUTF8(dest); // calls dest.Flush() | |
514 | if(toASCII && !isLabel) { | |
515 | // length==labelStart==254 means that there is a trailing dot (ok) and | |
516 | // destString is empty (do not index at 253-labelStart). | |
517 | int32_t length=labelStart+destString.length(); | |
518 | if( length>=254 && isASCIIString(destString) && | |
519 | (length>254 || | |
520 | (labelStart<254 && destString[253-labelStart]!=0x2e)) | |
521 | ) { | |
522 | info.errors|=UIDNA_ERROR_DOMAIN_NAME_TOO_LONG; | |
523 | } | |
524 | } | |
525 | if( info.isBiDi && U_SUCCESS(errorCode) && (info.errors&severeErrors)==0 && | |
526 | (!info.isOkBiDi || (labelStart>0 && !isASCIIOkBiDi(srcArray, labelStart))) | |
527 | ) { | |
528 | info.errors|=UIDNA_ERROR_BIDI; | |
529 | } | |
530 | } | |
531 | ||
532 | UnicodeString & | |
533 | UTS46::processUnicode(const UnicodeString &src, | |
534 | int32_t labelStart, int32_t mappingStart, | |
535 | UBool isLabel, UBool toASCII, | |
536 | UnicodeString &dest, | |
537 | IDNAInfo &info, UErrorCode &errorCode) const { | |
538 | if(mappingStart==0) { | |
539 | uts46Norm2.normalize(src, dest, errorCode); | |
540 | } else { | |
541 | uts46Norm2.normalizeSecondAndAppend(dest, src.tempSubString(mappingStart), errorCode); | |
542 | } | |
543 | if(U_FAILURE(errorCode)) { | |
544 | return dest; | |
545 | } | |
546 | UBool doMapDevChars= | |
547 | toASCII ? (options&UIDNA_NONTRANSITIONAL_TO_ASCII)==0 : | |
548 | (options&UIDNA_NONTRANSITIONAL_TO_UNICODE)==0; | |
549 | const UChar *destArray=dest.getBuffer(); | |
550 | int32_t destLength=dest.length(); | |
551 | int32_t labelLimit=labelStart; | |
552 | while(labelLimit<destLength) { | |
553 | UChar c=destArray[labelLimit]; | |
554 | if(c==0x2e && !isLabel) { | |
555 | int32_t labelLength=labelLimit-labelStart; | |
556 | int32_t newLength=processLabel(dest, labelStart, labelLength, | |
557 | toASCII, info, errorCode); | |
558 | info.errors|=info.labelErrors; | |
559 | info.labelErrors=0; | |
560 | if(U_FAILURE(errorCode)) { | |
561 | return dest; | |
562 | } | |
563 | destArray=dest.getBuffer(); | |
564 | destLength+=newLength-labelLength; | |
565 | labelLimit=labelStart+=newLength+1; | |
566 | } else if(0xdf<=c && c<=0x200d && (c==0xdf || c==0x3c2 || c>=0x200c)) { | |
567 | info.isTransDiff=TRUE; | |
568 | if(doMapDevChars) { | |
569 | destLength=mapDevChars(dest, labelStart, labelLimit, errorCode); | |
570 | if(U_FAILURE(errorCode)) { | |
571 | return dest; | |
572 | } | |
573 | destArray=dest.getBuffer(); | |
574 | // Do not increment labelLimit in case c was removed. | |
575 | // All deviation characters have been mapped, no need to check for them again. | |
576 | doMapDevChars=FALSE; | |
577 | } else { | |
578 | ++labelLimit; | |
579 | } | |
580 | } else { | |
581 | ++labelLimit; | |
582 | } | |
583 | } | |
584 | // Permit an empty label at the end (0<labelStart==labelLimit==destLength is ok) | |
585 | // but not an empty label elsewhere nor a completely empty domain name. | |
586 | // processLabel() sets UIDNA_ERROR_EMPTY_LABEL when labelLength==0. | |
587 | if(0==labelStart || labelStart<labelLimit) { | |
588 | processLabel(dest, labelStart, labelLimit-labelStart, | |
589 | toASCII, info, errorCode); | |
590 | info.errors|=info.labelErrors; | |
591 | } | |
592 | return dest; | |
593 | } | |
594 | ||
595 | int32_t | |
596 | UTS46::mapDevChars(UnicodeString &dest, int32_t labelStart, int32_t mappingStart, | |
597 | UErrorCode &errorCode) const { | |
598 | int32_t length=dest.length(); | |
599 | UChar *s=dest.getBuffer(dest[mappingStart]==0xdf ? length+1 : length); | |
600 | if(s==NULL) { | |
601 | errorCode=U_MEMORY_ALLOCATION_ERROR; | |
602 | return length; | |
603 | } | |
604 | int32_t capacity=dest.getCapacity(); | |
605 | UBool didMapDevChars=FALSE; | |
606 | int32_t readIndex=mappingStart, writeIndex=mappingStart; | |
607 | do { | |
608 | UChar c=s[readIndex++]; | |
609 | switch(c) { | |
610 | case 0xdf: | |
611 | // Map sharp s to ss. | |
612 | didMapDevChars=TRUE; | |
613 | s[writeIndex++]=0x73; // Replace sharp s with first s. | |
614 | // Insert second s and account for possible buffer reallocation. | |
615 | if(writeIndex==readIndex) { | |
616 | if(length==capacity) { | |
617 | dest.releaseBuffer(length); | |
618 | s=dest.getBuffer(length+1); | |
619 | if(s==NULL) { | |
620 | errorCode=U_MEMORY_ALLOCATION_ERROR; | |
621 | return length; | |
622 | } | |
623 | capacity=dest.getCapacity(); | |
624 | } | |
625 | u_memmove(s+writeIndex+1, s+writeIndex, length-writeIndex); | |
626 | ++readIndex; | |
627 | } | |
628 | s[writeIndex++]=0x73; | |
629 | ++length; | |
630 | break; | |
631 | case 0x3c2: // Map final sigma to nonfinal sigma. | |
632 | didMapDevChars=TRUE; | |
633 | s[writeIndex++]=0x3c3; | |
634 | break; | |
635 | case 0x200c: // Ignore/remove ZWNJ. | |
636 | case 0x200d: // Ignore/remove ZWJ. | |
637 | didMapDevChars=TRUE; | |
638 | --length; | |
639 | break; | |
640 | default: | |
641 | // Only really necessary if writeIndex was different from readIndex. | |
642 | s[writeIndex++]=c; | |
643 | break; | |
644 | } | |
645 | } while(writeIndex<length); | |
646 | dest.releaseBuffer(length); | |
647 | if(didMapDevChars) { | |
648 | // Mapping deviation characters might have resulted in an un-NFC string. | |
649 | // We could use either the NFC or the UTS #46 normalizer. | |
650 | // By using the UTS #46 normalizer again, we avoid having to load a second .nrm data file. | |
651 | UnicodeString normalized; | |
652 | uts46Norm2.normalize(dest.tempSubString(labelStart), normalized, errorCode); | |
653 | if(U_SUCCESS(errorCode)) { | |
654 | dest.replace(labelStart, 0x7fffffff, normalized); | |
655 | return dest.length(); | |
656 | } | |
657 | } | |
658 | return length; | |
659 | } | |
660 | ||
661 | // Some non-ASCII characters are equivalent to sequences with | |
662 | // non-LDH ASCII characters. To find them: | |
663 | // grep disallowed_STD3_valid IdnaMappingTable.txt (or uts46.txt) | |
664 | static inline UBool | |
665 | isNonASCIIDisallowedSTD3Valid(UChar32 c) { | |
666 | return c==0x2260 || c==0x226E || c==0x226F; | |
667 | } | |
668 | ||
669 | // Replace the label in dest with the label string, if the label was modified. | |
670 | // If &label==&dest then the label was modified in-place and labelLength | |
671 | // is the new label length, different from label.length(). | |
672 | // If &label!=&dest then labelLength==label.length(). | |
673 | // Returns labelLength (= the new label length). | |
674 | static int32_t | |
675 | replaceLabel(UnicodeString &dest, int32_t destLabelStart, int32_t destLabelLength, | |
676 | const UnicodeString &label, int32_t labelLength) { | |
677 | if(&label!=&dest) { | |
678 | dest.replace(destLabelStart, destLabelLength, label); | |
679 | } | |
680 | return labelLength; | |
681 | } | |
682 | ||
683 | int32_t | |
684 | UTS46::processLabel(UnicodeString &dest, | |
685 | int32_t labelStart, int32_t labelLength, | |
686 | UBool toASCII, | |
687 | IDNAInfo &info, UErrorCode &errorCode) const { | |
688 | UnicodeString fromPunycode; | |
689 | UnicodeString *labelString; | |
690 | const UChar *label=dest.getBuffer()+labelStart; | |
691 | int32_t destLabelStart=labelStart; | |
692 | int32_t destLabelLength=labelLength; | |
693 | UBool wasPunycode; | |
694 | if(labelLength>=4 && label[0]==0x78 && label[1]==0x6e && label[2]==0x2d && label[3]==0x2d) { | |
695 | // Label starts with "xn--", try to un-Punycode it. | |
696 | wasPunycode=TRUE; | |
697 | UChar *unicodeBuffer=fromPunycode.getBuffer(-1); // capacity==-1: most labels should fit | |
698 | if(unicodeBuffer==NULL) { | |
699 | // Should never occur if we used capacity==-1 which uses the internal buffer. | |
700 | errorCode=U_MEMORY_ALLOCATION_ERROR; | |
701 | return labelLength; | |
702 | } | |
703 | UErrorCode punycodeErrorCode=U_ZERO_ERROR; | |
704 | int32_t unicodeLength=u_strFromPunycode(label+4, labelLength-4, | |
705 | unicodeBuffer, fromPunycode.getCapacity(), | |
706 | NULL, &punycodeErrorCode); | |
707 | if(punycodeErrorCode==U_BUFFER_OVERFLOW_ERROR) { | |
708 | fromPunycode.releaseBuffer(0); | |
709 | unicodeBuffer=fromPunycode.getBuffer(unicodeLength); | |
710 | if(unicodeBuffer==NULL) { | |
711 | errorCode=U_MEMORY_ALLOCATION_ERROR; | |
712 | return labelLength; | |
713 | } | |
714 | punycodeErrorCode=U_ZERO_ERROR; | |
715 | unicodeLength=u_strFromPunycode(label+4, labelLength-4, | |
716 | unicodeBuffer, fromPunycode.getCapacity(), | |
717 | NULL, &punycodeErrorCode); | |
718 | } | |
719 | fromPunycode.releaseBuffer(unicodeLength); | |
720 | if(U_FAILURE(punycodeErrorCode)) { | |
721 | info.labelErrors|=UIDNA_ERROR_PUNYCODE; | |
722 | return markBadACELabel(dest, labelStart, labelLength, toASCII, info); | |
723 | } | |
724 | // Check for NFC, and for characters that are not | |
725 | // valid or deviation characters according to the normalizer. | |
726 | // If there is something wrong, then the string will change. | |
727 | // Note that the normalizer passes through non-LDH ASCII and deviation characters. | |
728 | // Deviation characters are ok in Punycode even in transitional processing. | |
729 | // In the code further below, if we find non-LDH ASCII and we have UIDNA_USE_STD3_RULES | |
730 | // then we will set UIDNA_ERROR_INVALID_ACE_LABEL there too. | |
731 | UBool isValid=uts46Norm2.isNormalized(fromPunycode, errorCode); | |
732 | if(U_FAILURE(errorCode)) { | |
733 | return labelLength; | |
734 | } | |
735 | if(!isValid) { | |
736 | info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL; | |
737 | return markBadACELabel(dest, labelStart, labelLength, toASCII, info); | |
738 | } | |
739 | labelString=&fromPunycode; | |
740 | label=fromPunycode.getBuffer(); | |
741 | labelStart=0; | |
742 | labelLength=fromPunycode.length(); | |
743 | } else { | |
744 | wasPunycode=FALSE; | |
745 | labelString=&dest; | |
746 | } | |
747 | // Validity check | |
748 | if(labelLength==0) { | |
749 | if(toASCII) { | |
750 | info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; | |
751 | } | |
752 | return replaceLabel(dest, destLabelStart, destLabelLength, *labelString, labelLength); | |
753 | } | |
754 | // labelLength>0 | |
755 | if(labelLength>=4 && label[2]==0x2d && label[3]==0x2d) { | |
756 | // label starts with "??--" | |
757 | info.labelErrors|=UIDNA_ERROR_HYPHEN_3_4; | |
758 | } | |
759 | if(label[0]==0x2d) { | |
760 | // label starts with "-" | |
761 | info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; | |
762 | } | |
763 | if(label[labelLength-1]==0x2d) { | |
764 | // label ends with "-" | |
765 | info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN; | |
766 | } | |
767 | // If the label was not a Punycode label, then it was the result of | |
768 | // mapping, normalization and label segmentation. | |
769 | // If the label was in Punycode, then we mapped it again above | |
770 | // and checked its validity. | |
771 | // Now we handle the STD3 restriction to LDH characters (if set) | |
772 | // and we look for U+FFFD which indicates disallowed characters | |
773 | // in a non-Punycode label or U+FFFD itself in a Punycode label. | |
774 | // We also check for dots which can come from the input to a single-label function. | |
775 | // Ok to cast away const because we own the UnicodeString. | |
776 | UChar *s=(UChar *)label; | |
777 | const UChar *limit=label+labelLength; | |
778 | UChar oredChars=0; | |
779 | // If we enforce STD3 rules, then ASCII characters other than LDH and dot are disallowed. | |
780 | UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; | |
781 | do { | |
782 | UChar c=*s; | |
783 | if(c<=0x7f) { | |
784 | if(c==0x2e) { | |
785 | info.labelErrors|=UIDNA_ERROR_LABEL_HAS_DOT; | |
786 | *s=0xfffd; | |
787 | } else if(disallowNonLDHDot && asciiData[c]<0) { | |
788 | info.labelErrors|=UIDNA_ERROR_DISALLOWED; | |
789 | *s=0xfffd; | |
790 | } | |
791 | } else { | |
792 | oredChars|=c; | |
793 | if(disallowNonLDHDot && isNonASCIIDisallowedSTD3Valid(c)) { | |
794 | info.labelErrors|=UIDNA_ERROR_DISALLOWED; | |
795 | *s=0xfffd; | |
796 | } else if(c==0xfffd) { | |
797 | info.labelErrors|=UIDNA_ERROR_DISALLOWED; | |
798 | } | |
799 | } | |
800 | ++s; | |
801 | } while(s<limit); | |
802 | // Check for a leading combining mark after other validity checks | |
803 | // so that we don't report UIDNA_ERROR_DISALLOWED for the U+FFFD from here. | |
804 | UChar32 c; | |
805 | int32_t cpLength=0; | |
806 | // "Unsafe" is ok because unpaired surrogates were mapped to U+FFFD. | |
807 | U16_NEXT_UNSAFE(label, cpLength, c); | |
808 | if((U_GET_GC_MASK(c)&U_GC_M_MASK)!=0) { | |
809 | info.labelErrors|=UIDNA_ERROR_LEADING_COMBINING_MARK; | |
810 | labelString->replace(labelStart, cpLength, (UChar)0xfffd); | |
811 | label=labelString->getBuffer()+labelStart; | |
812 | labelLength+=1-cpLength; | |
813 | if(labelString==&dest) { | |
814 | destLabelLength=labelLength; | |
815 | } | |
816 | } | |
817 | if((info.labelErrors&severeErrors)==0) { | |
818 | // Do contextual checks only if we do not have U+FFFD from a severe error | |
819 | // because U+FFFD can make these checks fail. | |
820 | if((options&UIDNA_CHECK_BIDI)!=0 && (!info.isBiDi || info.isOkBiDi)) { | |
821 | checkLabelBiDi(label, labelLength, info); | |
822 | } | |
823 | if( (options&UIDNA_CHECK_CONTEXTJ)!=0 && (oredChars&0x200c)==0x200c && | |
824 | !isLabelOkContextJ(label, labelLength) | |
825 | ) { | |
826 | info.labelErrors|=UIDNA_ERROR_CONTEXTJ; | |
827 | } | |
4388f060 A |
828 | if((options&UIDNA_CHECK_CONTEXTO)!=0 && oredChars>=0xb7) { |
829 | checkLabelContextO(label, labelLength, info); | |
830 | } | |
729e4ab9 A |
831 | if(toASCII) { |
832 | if(wasPunycode) { | |
833 | // Leave a Punycode label unchanged if it has no severe errors. | |
834 | if(destLabelLength>63) { | |
835 | info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; | |
836 | } | |
837 | return destLabelLength; | |
838 | } else if(oredChars>=0x80) { | |
839 | // Contains non-ASCII characters. | |
840 | UnicodeString punycode; | |
841 | UChar *buffer=punycode.getBuffer(63); // 63==maximum DNS label length | |
842 | if(buffer==NULL) { | |
843 | errorCode=U_MEMORY_ALLOCATION_ERROR; | |
844 | return destLabelLength; | |
845 | } | |
846 | buffer[0]=0x78; // Write "xn--". | |
847 | buffer[1]=0x6e; | |
848 | buffer[2]=0x2d; | |
849 | buffer[3]=0x2d; | |
850 | int32_t punycodeLength=u_strToPunycode(label, labelLength, | |
851 | buffer+4, punycode.getCapacity()-4, | |
852 | NULL, &errorCode); | |
853 | if(errorCode==U_BUFFER_OVERFLOW_ERROR) { | |
854 | errorCode=U_ZERO_ERROR; | |
855 | punycode.releaseBuffer(4); | |
856 | buffer=punycode.getBuffer(4+punycodeLength); | |
857 | if(buffer==NULL) { | |
858 | errorCode=U_MEMORY_ALLOCATION_ERROR; | |
859 | return destLabelLength; | |
860 | } | |
861 | punycodeLength=u_strToPunycode(label, labelLength, | |
862 | buffer+4, punycode.getCapacity()-4, | |
863 | NULL, &errorCode); | |
864 | } | |
865 | punycodeLength+=4; | |
866 | punycode.releaseBuffer(punycodeLength); | |
867 | if(U_FAILURE(errorCode)) { | |
868 | return destLabelLength; | |
869 | } | |
870 | if(punycodeLength>63) { | |
871 | info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; | |
872 | } | |
873 | return replaceLabel(dest, destLabelStart, destLabelLength, | |
874 | punycode, punycodeLength); | |
875 | } else { | |
876 | // all-ASCII label | |
877 | if(labelLength>63) { | |
878 | info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; | |
879 | } | |
880 | } | |
881 | } | |
882 | } else { | |
883 | // If a Punycode label has severe errors, | |
884 | // then leave it but make sure it does not look valid. | |
885 | if(wasPunycode) { | |
886 | info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL; | |
887 | return markBadACELabel(dest, destLabelStart, destLabelLength, toASCII, info); | |
888 | } | |
889 | } | |
890 | return replaceLabel(dest, destLabelStart, destLabelLength, *labelString, labelLength); | |
891 | } | |
892 | ||
893 | // Make sure an ACE label does not look valid. | |
894 | // Append U+FFFD if the label has only LDH characters. | |
895 | // If UIDNA_USE_STD3_RULES, also replace disallowed ASCII characters with U+FFFD. | |
896 | int32_t | |
897 | UTS46::markBadACELabel(UnicodeString &dest, | |
898 | int32_t labelStart, int32_t labelLength, | |
899 | UBool toASCII, IDNAInfo &info) const { | |
900 | UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; | |
901 | UBool isASCII=TRUE; | |
902 | UBool onlyLDH=TRUE; | |
903 | const UChar *label=dest.getBuffer()+labelStart; | |
904 | // Ok to cast away const because we own the UnicodeString. | |
905 | UChar *s=(UChar *)label+4; // After the initial "xn--". | |
906 | const UChar *limit=label+labelLength; | |
907 | do { | |
908 | UChar c=*s; | |
909 | if(c<=0x7f) { | |
910 | if(c==0x2e) { | |
911 | info.labelErrors|=UIDNA_ERROR_LABEL_HAS_DOT; | |
912 | *s=0xfffd; | |
913 | isASCII=onlyLDH=FALSE; | |
914 | } else if(asciiData[c]<0) { | |
915 | onlyLDH=FALSE; | |
916 | if(disallowNonLDHDot) { | |
917 | *s=0xfffd; | |
918 | isASCII=FALSE; | |
919 | } | |
920 | } | |
921 | } else { | |
922 | isASCII=onlyLDH=FALSE; | |
923 | } | |
924 | } while(++s<limit); | |
925 | if(onlyLDH) { | |
926 | dest.insert(labelStart+labelLength, (UChar)0xfffd); | |
927 | ++labelLength; | |
928 | } else { | |
929 | if(toASCII && isASCII && labelLength>63) { | |
930 | info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; | |
931 | } | |
932 | } | |
933 | return labelLength; | |
934 | } | |
935 | ||
936 | const uint32_t L_MASK=U_MASK(U_LEFT_TO_RIGHT); | |
937 | const uint32_t R_AL_MASK=U_MASK(U_RIGHT_TO_LEFT)|U_MASK(U_RIGHT_TO_LEFT_ARABIC); | |
938 | const uint32_t L_R_AL_MASK=L_MASK|R_AL_MASK; | |
939 | ||
940 | const uint32_t R_AL_AN_MASK=R_AL_MASK|U_MASK(U_ARABIC_NUMBER); | |
941 | ||
942 | const uint32_t EN_AN_MASK=U_MASK(U_EUROPEAN_NUMBER)|U_MASK(U_ARABIC_NUMBER); | |
943 | const uint32_t R_AL_EN_AN_MASK=R_AL_MASK|EN_AN_MASK; | |
944 | const uint32_t L_EN_MASK=L_MASK|U_MASK(U_EUROPEAN_NUMBER); | |
945 | ||
946 | const uint32_t ES_CS_ET_ON_BN_NSM_MASK= | |
947 | U_MASK(U_EUROPEAN_NUMBER_SEPARATOR)| | |
948 | U_MASK(U_COMMON_NUMBER_SEPARATOR)| | |
949 | U_MASK(U_EUROPEAN_NUMBER_TERMINATOR)| | |
950 | U_MASK(U_OTHER_NEUTRAL)| | |
951 | U_MASK(U_BOUNDARY_NEUTRAL)| | |
952 | U_MASK(U_DIR_NON_SPACING_MARK); | |
953 | const uint32_t L_EN_ES_CS_ET_ON_BN_NSM_MASK=L_EN_MASK|ES_CS_ET_ON_BN_NSM_MASK; | |
954 | const uint32_t R_AL_AN_EN_ES_CS_ET_ON_BN_NSM_MASK=R_AL_MASK|EN_AN_MASK|ES_CS_ET_ON_BN_NSM_MASK; | |
955 | ||
956 | // We scan the whole label and check both for whether it contains RTL characters | |
957 | // and whether it passes the BiDi Rule. | |
958 | // In a BiDi domain name, all labels must pass the BiDi Rule, but we might find | |
959 | // that a domain name is a BiDi domain name (has an RTL label) only after | |
960 | // processing several earlier labels. | |
961 | void | |
962 | UTS46::checkLabelBiDi(const UChar *label, int32_t labelLength, IDNAInfo &info) const { | |
963 | // IDNA2008 BiDi rule | |
964 | // Get the directionality of the first character. | |
965 | UChar32 c; | |
966 | int32_t i=0; | |
967 | U16_NEXT_UNSAFE(label, i, c); | |
968 | uint32_t firstMask=U_MASK(u_charDirection(c)); | |
969 | // 1. The first character must be a character with BIDI property L, R | |
970 | // or AL. If it has the R or AL property, it is an RTL label; if it | |
971 | // has the L property, it is an LTR label. | |
972 | if((firstMask&~L_R_AL_MASK)!=0) { | |
973 | info.isOkBiDi=FALSE; | |
974 | } | |
975 | // Get the directionality of the last non-NSM character. | |
976 | uint32_t lastMask; | |
977 | for(;;) { | |
978 | if(i>=labelLength) { | |
979 | lastMask=firstMask; | |
980 | break; | |
981 | } | |
982 | U16_PREV_UNSAFE(label, labelLength, c); | |
983 | UCharDirection dir=u_charDirection(c); | |
984 | if(dir!=U_DIR_NON_SPACING_MARK) { | |
985 | lastMask=U_MASK(dir); | |
986 | break; | |
987 | } | |
988 | } | |
989 | // 3. In an RTL label, the end of the label must be a character with | |
990 | // BIDI property R, AL, EN or AN, followed by zero or more | |
991 | // characters with BIDI property NSM. | |
992 | // 6. In an LTR label, the end of the label must be a character with | |
993 | // BIDI property L or EN, followed by zero or more characters with | |
994 | // BIDI property NSM. | |
995 | if( (firstMask&L_MASK)!=0 ? | |
996 | (lastMask&~L_EN_MASK)!=0 : | |
997 | (lastMask&~R_AL_EN_AN_MASK)!=0 | |
998 | ) { | |
999 | info.isOkBiDi=FALSE; | |
1000 | } | |
1001 | // Get the directionalities of the intervening characters. | |
1002 | uint32_t mask=0; | |
1003 | while(i<labelLength) { | |
1004 | U16_NEXT_UNSAFE(label, i, c); | |
1005 | mask|=U_MASK(u_charDirection(c)); | |
1006 | } | |
1007 | if(firstMask&L_MASK) { | |
1008 | // 5. In an LTR label, only characters with the BIDI properties L, EN, | |
1009 | // ES, CS, ET, ON, BN and NSM are allowed. | |
1010 | if((mask&~L_EN_ES_CS_ET_ON_BN_NSM_MASK)!=0) { | |
1011 | info.isOkBiDi=FALSE; | |
1012 | } | |
1013 | } else { | |
1014 | // 2. In an RTL label, only characters with the BIDI properties R, AL, | |
1015 | // AN, EN, ES, CS, ET, ON, BN and NSM are allowed. | |
1016 | if((mask&~R_AL_AN_EN_ES_CS_ET_ON_BN_NSM_MASK)!=0) { | |
1017 | info.isOkBiDi=FALSE; | |
1018 | } | |
1019 | // 4. In an RTL label, if an EN is present, no AN may be present, and | |
1020 | // vice versa. | |
1021 | if((mask&EN_AN_MASK)==EN_AN_MASK) { | |
1022 | info.isOkBiDi=FALSE; | |
1023 | } | |
1024 | } | |
1025 | // An RTL label is a label that contains at least one character of type | |
1026 | // R, AL or AN. [...] | |
1027 | // A "BIDI domain name" is a domain name that contains at least one RTL | |
1028 | // label. [...] | |
1029 | // The following rule, consisting of six conditions, applies to labels | |
1030 | // in BIDI domain names. | |
1031 | if(((firstMask|mask|lastMask)&R_AL_AN_MASK)!=0) { | |
1032 | info.isBiDi=TRUE; | |
1033 | } | |
1034 | } | |
1035 | ||
1036 | // Special code for the ASCII prefix of a BiDi domain name. | |
1037 | // The ASCII prefix is all-LTR. | |
1038 | ||
1039 | // IDNA2008 BiDi rule, parts relevant to ASCII labels: | |
1040 | // 1. The first character must be a character with BIDI property L [...] | |
1041 | // 5. In an LTR label, only characters with the BIDI properties L, EN, | |
1042 | // ES, CS, ET, ON, BN and NSM are allowed. | |
1043 | // 6. In an LTR label, the end of the label must be a character with | |
1044 | // BIDI property L or EN [...] | |
1045 | ||
1046 | // UTF-16 version, called for mapped ASCII prefix. | |
1047 | // Cannot contain uppercase A-Z. | |
1048 | // s[length-1] must be the trailing dot. | |
1049 | static UBool | |
1050 | isASCIIOkBiDi(const UChar *s, int32_t length) { | |
1051 | int32_t labelStart=0; | |
1052 | for(int32_t i=0; i<length; ++i) { | |
1053 | UChar c=s[i]; | |
1054 | if(c==0x2e) { // dot | |
1055 | if(i>labelStart) { | |
1056 | c=s[i-1]; | |
1057 | if(!(0x61<=c && c<=0x7a) && !(0x30<=c && c<=0x39)) { | |
1058 | // Last character in the label is not an L or EN. | |
1059 | return FALSE; | |
1060 | } | |
1061 | } | |
1062 | labelStart=i+1; | |
1063 | } else if(i==labelStart) { | |
1064 | if(!(0x61<=c && c<=0x7a)) { | |
1065 | // First character in the label is not an L. | |
1066 | return FALSE; | |
1067 | } | |
1068 | } else { | |
1069 | if(c<=0x20 && (c>=0x1c || (9<=c && c<=0xd))) { | |
1070 | // Intermediate character in the label is a B, S or WS. | |
1071 | return FALSE; | |
1072 | } | |
1073 | } | |
1074 | } | |
1075 | return TRUE; | |
1076 | } | |
1077 | ||
1078 | // UTF-8 version, called for source ASCII prefix. | |
1079 | // Can contain uppercase A-Z. | |
1080 | // s[length-1] must be the trailing dot. | |
1081 | static UBool | |
1082 | isASCIIOkBiDi(const char *s, int32_t length) { | |
1083 | int32_t labelStart=0; | |
1084 | for(int32_t i=0; i<length; ++i) { | |
1085 | char c=s[i]; | |
1086 | if(c==0x2e) { // dot | |
1087 | if(i>labelStart) { | |
1088 | c=s[i-1]; | |
1089 | if(!(0x61<=c && c<=0x7a) && !(0x41<=c && c<=0x5a) && !(0x30<=c && c<=0x39)) { | |
1090 | // Last character in the label is not an L or EN. | |
1091 | return FALSE; | |
1092 | } | |
1093 | } | |
1094 | labelStart=i+1; | |
1095 | } else if(i==labelStart) { | |
1096 | if(!(0x61<=c && c<=0x7a) && !(0x41<=c && c<=0x5a)) { | |
1097 | // First character in the label is not an L. | |
1098 | return FALSE; | |
1099 | } | |
1100 | } else { | |
1101 | if(c<=0x20 && (c>=0x1c || (9<=c && c<=0xd))) { | |
1102 | // Intermediate character in the label is a B, S or WS. | |
1103 | return FALSE; | |
1104 | } | |
1105 | } | |
1106 | } | |
1107 | return TRUE; | |
1108 | } | |
1109 | ||
1110 | UBool | |
1111 | UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const { | |
4388f060 | 1112 | const UBiDiProps *bdp=ubidi_getSingleton(); |
729e4ab9 A |
1113 | // [IDNA2008-Tables] |
1114 | // 200C..200D ; CONTEXTJ # ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER | |
1115 | for(int32_t i=0; i<labelLength; ++i) { | |
1116 | if(label[i]==0x200c) { | |
1117 | // Appendix A.1. ZERO WIDTH NON-JOINER | |
1118 | // Rule Set: | |
1119 | // False; | |
1120 | // If Canonical_Combining_Class(Before(cp)) .eq. Virama Then True; | |
1121 | // If RegExpMatch((Joining_Type:{L,D})(Joining_Type:T)*\u200C | |
1122 | // (Joining_Type:T)*(Joining_Type:{R,D})) Then True; | |
1123 | if(i==0) { | |
1124 | return FALSE; | |
1125 | } | |
1126 | UChar32 c; | |
1127 | int32_t j=i; | |
1128 | U16_PREV_UNSAFE(label, j, c); | |
4388f060 | 1129 | if(uts46Norm2.getCombiningClass(c)==9) { |
729e4ab9 A |
1130 | continue; |
1131 | } | |
1132 | // check precontext (Joining_Type:{L,D})(Joining_Type:T)* | |
1133 | for(;;) { | |
4388f060 | 1134 | UJoiningType type=ubidi_getJoiningType(bdp, c); |
729e4ab9 A |
1135 | if(type==U_JT_TRANSPARENT) { |
1136 | if(j==0) { | |
1137 | return FALSE; | |
1138 | } | |
1139 | U16_PREV_UNSAFE(label, j, c); | |
1140 | } else if(type==U_JT_LEFT_JOINING || type==U_JT_DUAL_JOINING) { | |
1141 | break; // precontext fulfilled | |
1142 | } else { | |
1143 | return FALSE; | |
1144 | } | |
1145 | } | |
1146 | // check postcontext (Joining_Type:T)*(Joining_Type:{R,D}) | |
1147 | for(j=i+1;;) { | |
1148 | if(j==labelLength) { | |
1149 | return FALSE; | |
1150 | } | |
1151 | U16_NEXT_UNSAFE(label, j, c); | |
4388f060 | 1152 | UJoiningType type=ubidi_getJoiningType(bdp, c); |
729e4ab9 A |
1153 | if(type==U_JT_TRANSPARENT) { |
1154 | // just skip this character | |
1155 | } else if(type==U_JT_RIGHT_JOINING || type==U_JT_DUAL_JOINING) { | |
1156 | break; // postcontext fulfilled | |
1157 | } else { | |
1158 | return FALSE; | |
1159 | } | |
1160 | } | |
1161 | } else if(label[i]==0x200d) { | |
1162 | // Appendix A.2. ZERO WIDTH JOINER (U+200D) | |
1163 | // Rule Set: | |
1164 | // False; | |
1165 | // If Canonical_Combining_Class(Before(cp)) .eq. Virama Then True; | |
1166 | if(i==0) { | |
1167 | return FALSE; | |
1168 | } | |
1169 | UChar32 c; | |
1170 | int32_t j=i; | |
1171 | U16_PREV_UNSAFE(label, j, c); | |
4388f060 | 1172 | if(uts46Norm2.getCombiningClass(c)!=9) { |
729e4ab9 A |
1173 | return FALSE; |
1174 | } | |
1175 | } | |
1176 | } | |
1177 | return TRUE; | |
1178 | } | |
1179 | ||
4388f060 A |
1180 | void |
1181 | UTS46::checkLabelContextO(const UChar *label, int32_t labelLength, IDNAInfo &info) const { | |
1182 | int32_t labelEnd=labelLength-1; // inclusive | |
1183 | int32_t arabicDigits=0; // -1 for 066x, +1 for 06Fx | |
1184 | for(int32_t i=0; i<=labelEnd; ++i) { | |
1185 | UChar32 c=label[i]; | |
1186 | if(c<0xb7) { | |
1187 | // ASCII fastpath | |
1188 | } else if(c<=0x6f9) { | |
1189 | if(c==0xb7) { | |
1190 | // Appendix A.3. MIDDLE DOT (U+00B7) | |
1191 | // Rule Set: | |
1192 | // False; | |
1193 | // If Before(cp) .eq. U+006C And | |
1194 | // After(cp) .eq. U+006C Then True; | |
1195 | if(!(0<i && label[i-1]==0x6c && | |
1196 | i<labelEnd && label[i+1]==0x6c)) { | |
1197 | info.labelErrors|=UIDNA_ERROR_CONTEXTO_PUNCTUATION; | |
1198 | } | |
1199 | } else if(c==0x375) { | |
1200 | // Appendix A.4. GREEK LOWER NUMERAL SIGN (KERAIA) (U+0375) | |
1201 | // Rule Set: | |
1202 | // False; | |
1203 | // If Script(After(cp)) .eq. Greek Then True; | |
1204 | UScriptCode script=USCRIPT_INVALID_CODE; | |
1205 | if(i<labelEnd) { | |
1206 | UErrorCode errorCode=U_ZERO_ERROR; | |
1207 | int32_t j=i+1; | |
1208 | U16_NEXT(label, j, labelLength, c); | |
1209 | script=uscript_getScript(c, &errorCode); | |
1210 | } | |
1211 | if(script!=USCRIPT_GREEK) { | |
1212 | info.labelErrors|=UIDNA_ERROR_CONTEXTO_PUNCTUATION; | |
1213 | } | |
1214 | } else if(c==0x5f3 || c==0x5f4) { | |
1215 | // Appendix A.5. HEBREW PUNCTUATION GERESH (U+05F3) | |
1216 | // Rule Set: | |
1217 | // False; | |
1218 | // If Script(Before(cp)) .eq. Hebrew Then True; | |
1219 | // | |
1220 | // Appendix A.6. HEBREW PUNCTUATION GERSHAYIM (U+05F4) | |
1221 | // Rule Set: | |
1222 | // False; | |
1223 | // If Script(Before(cp)) .eq. Hebrew Then True; | |
1224 | UScriptCode script=USCRIPT_INVALID_CODE; | |
1225 | if(0<i) { | |
1226 | UErrorCode errorCode=U_ZERO_ERROR; | |
1227 | int32_t j=i; | |
1228 | U16_PREV(label, 0, j, c); | |
1229 | script=uscript_getScript(c, &errorCode); | |
1230 | } | |
1231 | if(script!=USCRIPT_HEBREW) { | |
1232 | info.labelErrors|=UIDNA_ERROR_CONTEXTO_PUNCTUATION; | |
1233 | } | |
1234 | } else if(0x660<=c /* && c<=0x6f9 */) { | |
1235 | // Appendix A.8. ARABIC-INDIC DIGITS (0660..0669) | |
1236 | // Rule Set: | |
1237 | // True; | |
1238 | // For All Characters: | |
1239 | // If cp .in. 06F0..06F9 Then False; | |
1240 | // End For; | |
1241 | // | |
1242 | // Appendix A.9. EXTENDED ARABIC-INDIC DIGITS (06F0..06F9) | |
1243 | // Rule Set: | |
1244 | // True; | |
1245 | // For All Characters: | |
1246 | // If cp .in. 0660..0669 Then False; | |
1247 | // End For; | |
1248 | if(c<=0x669) { | |
1249 | if(arabicDigits>0) { | |
1250 | info.labelErrors|=UIDNA_ERROR_CONTEXTO_DIGITS; | |
1251 | } | |
1252 | arabicDigits=-1; | |
1253 | } else if(0x6f0<=c) { | |
1254 | if(arabicDigits<0) { | |
1255 | info.labelErrors|=UIDNA_ERROR_CONTEXTO_DIGITS; | |
1256 | } | |
1257 | arabicDigits=1; | |
1258 | } | |
1259 | } | |
1260 | } else if(c==0x30fb) { | |
1261 | // Appendix A.7. KATAKANA MIDDLE DOT (U+30FB) | |
1262 | // Rule Set: | |
1263 | // False; | |
1264 | // For All Characters: | |
1265 | // If Script(cp) .in. {Hiragana, Katakana, Han} Then True; | |
1266 | // End For; | |
1267 | UErrorCode errorCode=U_ZERO_ERROR; | |
1268 | for(int j=0;;) { | |
1269 | if(j>labelEnd) { | |
1270 | info.labelErrors|=UIDNA_ERROR_CONTEXTO_PUNCTUATION; | |
1271 | break; | |
1272 | } | |
1273 | U16_NEXT(label, j, labelLength, c); | |
1274 | UScriptCode script=uscript_getScript(c, &errorCode); | |
1275 | if(script==USCRIPT_HIRAGANA || script==USCRIPT_KATAKANA || script==USCRIPT_HAN) { | |
1276 | break; | |
1277 | } | |
1278 | } | |
1279 | } | |
1280 | } | |
1281 | } | |
1282 | ||
729e4ab9 A |
1283 | U_NAMESPACE_END |
1284 | ||
1285 | // C API ------------------------------------------------------------------- *** | |
1286 | ||
1287 | U_NAMESPACE_USE | |
1288 | ||
51004dcb | 1289 | U_CAPI UIDNA * U_EXPORT2 |
729e4ab9 A |
1290 | uidna_openUTS46(uint32_t options, UErrorCode *pErrorCode) { |
1291 | return reinterpret_cast<UIDNA *>(IDNA::createUTS46Instance(options, *pErrorCode)); | |
1292 | } | |
1293 | ||
51004dcb | 1294 | U_CAPI void U_EXPORT2 |
729e4ab9 A |
1295 | uidna_close(UIDNA *idna) { |
1296 | delete reinterpret_cast<IDNA *>(idna); | |
1297 | } | |
1298 | ||
1299 | static UBool | |
1300 | checkArgs(const void *label, int32_t length, | |
1301 | void *dest, int32_t capacity, | |
1302 | UIDNAInfo *pInfo, UErrorCode *pErrorCode) { | |
1303 | if(U_FAILURE(*pErrorCode)) { | |
1304 | return FALSE; | |
1305 | } | |
1306 | // sizeof(UIDNAInfo)=16 in the first API version. | |
1307 | if(pInfo==NULL || pInfo->size<16) { | |
1308 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
1309 | return FALSE; | |
1310 | } | |
1311 | if( (label==NULL ? length!=0 : length<-1) || | |
1312 | (dest==NULL ? capacity!=0 : capacity<0) || | |
1313 | (dest==label && label!=NULL) | |
1314 | ) { | |
1315 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
1316 | return FALSE; | |
1317 | } | |
1318 | // Set all *pInfo bytes to 0 except for the size field itself. | |
1319 | uprv_memset(&pInfo->size+1, 0, pInfo->size-sizeof(pInfo->size)); | |
1320 | return TRUE; | |
1321 | } | |
1322 | ||
1323 | static void | |
1324 | idnaInfoToStruct(IDNAInfo &info, UIDNAInfo *pInfo) { | |
1325 | pInfo->isTransitionalDifferent=info.isTransitionalDifferent(); | |
1326 | pInfo->errors=info.getErrors(); | |
1327 | } | |
1328 | ||
51004dcb | 1329 | U_CAPI int32_t U_EXPORT2 |
729e4ab9 A |
1330 | uidna_labelToASCII(const UIDNA *idna, |
1331 | const UChar *label, int32_t length, | |
1332 | UChar *dest, int32_t capacity, | |
1333 | UIDNAInfo *pInfo, UErrorCode *pErrorCode) { | |
1334 | if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) { | |
1335 | return 0; | |
1336 | } | |
1337 | UnicodeString src((UBool)(length<0), label, length); | |
1338 | UnicodeString destString(dest, 0, capacity); | |
1339 | IDNAInfo info; | |
1340 | reinterpret_cast<const IDNA *>(idna)->labelToASCII(src, destString, info, *pErrorCode); | |
1341 | idnaInfoToStruct(info, pInfo); | |
1342 | return destString.extract(dest, capacity, *pErrorCode); | |
1343 | } | |
1344 | ||
51004dcb | 1345 | U_CAPI int32_t U_EXPORT2 |
729e4ab9 A |
1346 | uidna_labelToUnicode(const UIDNA *idna, |
1347 | const UChar *label, int32_t length, | |
1348 | UChar *dest, int32_t capacity, | |
1349 | UIDNAInfo *pInfo, UErrorCode *pErrorCode) { | |
1350 | if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) { | |
1351 | return 0; | |
1352 | } | |
1353 | UnicodeString src((UBool)(length<0), label, length); | |
1354 | UnicodeString destString(dest, 0, capacity); | |
1355 | IDNAInfo info; | |
1356 | reinterpret_cast<const IDNA *>(idna)->labelToUnicode(src, destString, info, *pErrorCode); | |
1357 | idnaInfoToStruct(info, pInfo); | |
1358 | return destString.extract(dest, capacity, *pErrorCode); | |
1359 | } | |
1360 | ||
51004dcb | 1361 | U_CAPI int32_t U_EXPORT2 |
729e4ab9 A |
1362 | uidna_nameToASCII(const UIDNA *idna, |
1363 | const UChar *name, int32_t length, | |
1364 | UChar *dest, int32_t capacity, | |
1365 | UIDNAInfo *pInfo, UErrorCode *pErrorCode) { | |
1366 | if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) { | |
1367 | return 0; | |
1368 | } | |
1369 | UnicodeString src((UBool)(length<0), name, length); | |
1370 | UnicodeString destString(dest, 0, capacity); | |
1371 | IDNAInfo info; | |
1372 | reinterpret_cast<const IDNA *>(idna)->nameToASCII(src, destString, info, *pErrorCode); | |
1373 | idnaInfoToStruct(info, pInfo); | |
1374 | return destString.extract(dest, capacity, *pErrorCode); | |
1375 | } | |
1376 | ||
51004dcb | 1377 | U_CAPI int32_t U_EXPORT2 |
729e4ab9 A |
1378 | uidna_nameToUnicode(const UIDNA *idna, |
1379 | const UChar *name, int32_t length, | |
1380 | UChar *dest, int32_t capacity, | |
1381 | UIDNAInfo *pInfo, UErrorCode *pErrorCode) { | |
1382 | if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) { | |
1383 | return 0; | |
1384 | } | |
1385 | UnicodeString src((UBool)(length<0), name, length); | |
1386 | UnicodeString destString(dest, 0, capacity); | |
1387 | IDNAInfo info; | |
1388 | reinterpret_cast<const IDNA *>(idna)->nameToUnicode(src, destString, info, *pErrorCode); | |
1389 | idnaInfoToStruct(info, pInfo); | |
1390 | return destString.extract(dest, capacity, *pErrorCode); | |
1391 | } | |
1392 | ||
51004dcb | 1393 | U_CAPI int32_t U_EXPORT2 |
729e4ab9 A |
1394 | uidna_labelToASCII_UTF8(const UIDNA *idna, |
1395 | const char *label, int32_t length, | |
1396 | char *dest, int32_t capacity, | |
1397 | UIDNAInfo *pInfo, UErrorCode *pErrorCode) { | |
1398 | if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) { | |
1399 | return 0; | |
1400 | } | |
1401 | StringPiece src(label, length<0 ? uprv_strlen(label) : length); | |
1402 | CheckedArrayByteSink sink(dest, capacity); | |
1403 | IDNAInfo info; | |
1404 | reinterpret_cast<const IDNA *>(idna)->labelToASCII_UTF8(src, sink, info, *pErrorCode); | |
1405 | idnaInfoToStruct(info, pInfo); | |
1406 | return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode); | |
1407 | } | |
1408 | ||
51004dcb | 1409 | U_CAPI int32_t U_EXPORT2 |
729e4ab9 A |
1410 | uidna_labelToUnicodeUTF8(const UIDNA *idna, |
1411 | const char *label, int32_t length, | |
1412 | char *dest, int32_t capacity, | |
1413 | UIDNAInfo *pInfo, UErrorCode *pErrorCode) { | |
1414 | if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) { | |
1415 | return 0; | |
1416 | } | |
1417 | StringPiece src(label, length<0 ? uprv_strlen(label) : length); | |
1418 | CheckedArrayByteSink sink(dest, capacity); | |
1419 | IDNAInfo info; | |
1420 | reinterpret_cast<const IDNA *>(idna)->labelToUnicodeUTF8(src, sink, info, *pErrorCode); | |
1421 | idnaInfoToStruct(info, pInfo); | |
1422 | return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode); | |
1423 | } | |
1424 | ||
51004dcb | 1425 | U_CAPI int32_t U_EXPORT2 |
729e4ab9 A |
1426 | uidna_nameToASCII_UTF8(const UIDNA *idna, |
1427 | const char *name, int32_t length, | |
1428 | char *dest, int32_t capacity, | |
1429 | UIDNAInfo *pInfo, UErrorCode *pErrorCode) { | |
1430 | if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) { | |
1431 | return 0; | |
1432 | } | |
1433 | StringPiece src(name, length<0 ? uprv_strlen(name) : length); | |
1434 | CheckedArrayByteSink sink(dest, capacity); | |
1435 | IDNAInfo info; | |
1436 | reinterpret_cast<const IDNA *>(idna)->nameToASCII_UTF8(src, sink, info, *pErrorCode); | |
1437 | idnaInfoToStruct(info, pInfo); | |
1438 | return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode); | |
1439 | } | |
1440 | ||
51004dcb | 1441 | U_CAPI int32_t U_EXPORT2 |
729e4ab9 A |
1442 | uidna_nameToUnicodeUTF8(const UIDNA *idna, |
1443 | const char *name, int32_t length, | |
1444 | char *dest, int32_t capacity, | |
1445 | UIDNAInfo *pInfo, UErrorCode *pErrorCode) { | |
1446 | if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) { | |
1447 | return 0; | |
1448 | } | |
1449 | StringPiece src(name, length<0 ? uprv_strlen(name) : length); | |
1450 | CheckedArrayByteSink sink(dest, capacity); | |
1451 | IDNAInfo info; | |
1452 | reinterpret_cast<const IDNA *>(idna)->nameToUnicodeUTF8(src, sink, info, *pErrorCode); | |
1453 | idnaInfoToStruct(info, pInfo); | |
1454 | return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode); | |
1455 | } | |
1456 | ||
1457 | #endif // UCONFIG_NO_IDNA |