]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
b75a7d8f A |
3 | /* |
4 | ******************************************************************************* | |
5 | * | |
b331163b | 6 | * Copyright (C) 2003-2014, International Business Machines |
b75a7d8f A |
7 | * Corporation and others. All Rights Reserved. |
8 | * | |
9 | ******************************************************************************* | |
10 | * file name: uidna.cpp | |
f3c0d7a5 | 11 | * encoding: UTF-8 |
b75a7d8f A |
12 | * tab size: 8 (not used) |
13 | * indentation:4 | |
14 | * | |
15 | * created on: 2003feb1 | |
16 | * created by: Ram Viswanadha | |
17 | */ | |
18 | ||
19 | #include "unicode/utypes.h" | |
20 | ||
21 | #if !UCONFIG_NO_IDNA | |
22 | ||
23 | #include "unicode/uidna.h" | |
24 | #include "unicode/ustring.h" | |
374ca955 | 25 | #include "unicode/usprep.h" |
b75a7d8f A |
26 | #include "punycode.h" |
27 | #include "ustr_imp.h" | |
28 | #include "cmemory.h" | |
46f4442e | 29 | #include "uassert.h" |
b75a7d8f A |
30 | #include "sprpimpl.h" |
31 | ||
32 | /* it is official IDNA ACE Prefix is "xn--" */ | |
33 | static const UChar ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ; | |
34 | #define ACE_PREFIX_LENGTH 4 | |
35 | ||
36 | #define MAX_LABEL_LENGTH 63 | |
46f4442e A |
37 | /* The Max length of the labels should not be more than MAX_LABEL_LENGTH */ |
38 | #define MAX_LABEL_BUFFER_SIZE 100 | |
39 | ||
40 | #define MAX_DOMAIN_NAME_LENGTH 255 | |
41 | /* The Max length of the domain names should not be more than MAX_DOMAIN_NAME_LENGTH */ | |
42 | #define MAX_IDN_BUFFER_SIZE MAX_DOMAIN_NAME_LENGTH+1 | |
b75a7d8f | 43 | |
b75a7d8f | 44 | #define LOWER_CASE_DELTA 0x0020 |
46f4442e | 45 | #define HYPHEN 0x002D |
b75a7d8f | 46 | #define FULL_STOP 0x002E |
46f4442e A |
47 | #define CAPITAL_A 0x0041 |
48 | #define CAPITAL_Z 0x005A | |
49 | ||
b75a7d8f A |
50 | inline static UChar |
51 | toASCIILower(UChar ch){ | |
52 | if(CAPITAL_A <= ch && ch <= CAPITAL_Z){ | |
53 | return ch + LOWER_CASE_DELTA; | |
54 | } | |
55 | return ch; | |
56 | } | |
57 | ||
58 | inline static UBool | |
59 | startsWithPrefix(const UChar* src , int32_t srcLength){ | |
60 | UBool startsWithPrefix = TRUE; | |
61 | ||
62 | if(srcLength < ACE_PREFIX_LENGTH){ | |
63 | return FALSE; | |
64 | } | |
65 | ||
66 | for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){ | |
67 | if(toASCIILower(src[i]) != ACE_PREFIX[i]){ | |
68 | startsWithPrefix = FALSE; | |
69 | } | |
70 | } | |
71 | return startsWithPrefix; | |
72 | } | |
73 | ||
b75a7d8f A |
74 | |
75 | inline static int32_t | |
76 | compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len, | |
77 | const UChar* s2, int32_t s2Len){ | |
78 | ||
79 | int32_t minLength; | |
80 | int32_t lengthResult; | |
81 | ||
82 | // are we comparing different lengths? | |
83 | if(s1Len != s2Len) { | |
84 | if(s1Len < s2Len) { | |
85 | minLength = s1Len; | |
86 | lengthResult = -1; | |
87 | } else { | |
88 | minLength = s2Len; | |
89 | lengthResult = 1; | |
90 | } | |
91 | } else { | |
92 | // ok the lengths are equal | |
93 | minLength = s1Len; | |
94 | lengthResult = 0; | |
95 | } | |
96 | ||
97 | UChar c1,c2; | |
98 | int32_t rc; | |
99 | ||
100 | for(int32_t i =0;/* no condition */;i++) { | |
101 | ||
102 | /* If we reach the ends of both strings then they match */ | |
103 | if(i == minLength) { | |
104 | return lengthResult; | |
105 | } | |
106 | ||
107 | c1 = s1[i]; | |
108 | c2 = s2[i]; | |
109 | ||
110 | /* Case-insensitive comparison */ | |
111 | if(c1!=c2) { | |
112 | rc=(int32_t)toASCIILower(c1)-(int32_t)toASCIILower(c2); | |
113 | if(rc!=0) { | |
114 | lengthResult=rc; | |
115 | break; | |
116 | } | |
117 | } | |
118 | } | |
119 | return lengthResult; | |
120 | } | |
121 | ||
122 | ||
374ca955 A |
123 | /** |
124 | * Ascertain if the given code point is a label separator as | |
125 | * defined by the IDNA RFC | |
126 | * | |
127 | * @param ch The code point to be ascertained | |
128 | * @return true if the char is a label separator | |
73c04bcf | 129 | * @stable ICU 2.8 |
374ca955 A |
130 | */ |
131 | static inline UBool isLabelSeparator(UChar ch){ | |
132 | switch(ch){ | |
133 | case 0x002e: | |
134 | case 0x3002: | |
135 | case 0xFF0E: | |
136 | case 0xFF61: | |
137 | return TRUE; | |
138 | default: | |
139 | return FALSE; | |
b75a7d8f | 140 | } |
374ca955 A |
141 | } |
142 | ||
143 | // returns the length of the label excluding the separator | |
144 | // if *limit == separator then the length returned does not include | |
145 | // the separtor. | |
146 | static inline int32_t | |
46f4442e A |
147 | getNextSeparator(UChar *src, int32_t srcLength, |
148 | UChar **limit, UBool *done){ | |
374ca955 A |
149 | if(srcLength == -1){ |
150 | int32_t i; | |
151 | for(i=0 ; ;i++){ | |
152 | if(src[i] == 0){ | |
153 | *limit = src + i; // point to null | |
154 | *done = TRUE; | |
155 | return i; | |
156 | } | |
157 | if(isLabelSeparator(src[i])){ | |
158 | *limit = src + (i+1); // go past the delimiter | |
159 | return i; | |
160 | ||
161 | } | |
162 | } | |
163 | }else{ | |
164 | int32_t i; | |
165 | for(i=0;i<srcLength;i++){ | |
166 | if(isLabelSeparator(src[i])){ | |
167 | *limit = src + (i+1); // go past the delimiter | |
168 | return i; | |
169 | } | |
170 | } | |
171 | // we have not found the delimiter | |
172 | // if(i==srcLength) | |
173 | *limit = src+srcLength; | |
174 | *done = TRUE; | |
175 | ||
176 | return i; | |
b75a7d8f | 177 | } |
374ca955 A |
178 | } |
179 | static inline UBool isLDHChar(UChar ch){ | |
180 | // high runner case | |
181 | if(ch>0x007A){ | |
182 | return FALSE; | |
183 | } | |
184 | //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A] | |
185 | if( (ch==0x002D) || | |
186 | (0x0030 <= ch && ch <= 0x0039) || | |
187 | (0x0041 <= ch && ch <= 0x005A) || | |
188 | (0x0061 <= ch && ch <= 0x007A) | |
189 | ){ | |
190 | return TRUE; | |
191 | } | |
192 | return FALSE; | |
193 | } | |
194 | ||
195 | static int32_t | |
196 | _internal_toASCII(const UChar* src, int32_t srcLength, | |
197 | UChar* dest, int32_t destCapacity, | |
198 | int32_t options, | |
199 | UStringPrepProfile* nameprep, | |
200 | UParseError* parseError, | |
46f4442e A |
201 | UErrorCode* status) |
202 | { | |
374ca955 | 203 | |
46f4442e | 204 | // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too. |
b75a7d8f A |
205 | UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE]; |
206 | //initialize pointers to stack buffers | |
207 | UChar *b1 = b1Stack, *b2 = b2Stack; | |
73c04bcf | 208 | int32_t b1Len=0, b2Len, |
b75a7d8f A |
209 | b1Capacity = MAX_LABEL_BUFFER_SIZE, |
210 | b2Capacity = MAX_LABEL_BUFFER_SIZE , | |
211 | reqLength=0; | |
212 | ||
374ca955 | 213 | int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0; |
b75a7d8f A |
214 | UBool* caseFlags = NULL; |
215 | ||
216 | // the source contains all ascii codepoints | |
217 | UBool srcIsASCII = TRUE; | |
218 | // assume the source contains all LDH codepoints | |
219 | UBool srcIsLDH = TRUE; | |
220 | ||
221 | int32_t j=0; | |
222 | ||
223 | //get the options | |
b75a7d8f | 224 | UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0); |
374ca955 | 225 | |
b75a7d8f | 226 | int32_t failPos = -1; |
b75a7d8f | 227 | |
73c04bcf A |
228 | if(srcLength == -1){ |
229 | srcLength = u_strlen(src); | |
230 | } | |
231 | ||
232 | if(srcLength > b1Capacity){ | |
233 | b1 = (UChar*) uprv_malloc(srcLength * U_SIZEOF_UCHAR); | |
b75a7d8f A |
234 | if(b1==NULL){ |
235 | *status = U_MEMORY_ALLOCATION_ERROR; | |
236 | goto CLEANUP; | |
237 | } | |
73c04bcf A |
238 | b1Capacity = srcLength; |
239 | } | |
b75a7d8f | 240 | |
73c04bcf A |
241 | // step 1 |
242 | for( j=0;j<srcLength;j++){ | |
243 | if(src[j] > 0x7F){ | |
244 | srcIsASCII = FALSE; | |
245 | } | |
246 | b1[b1Len++] = src[j]; | |
247 | } | |
248 | ||
249 | // step 2 is performed only if the source contains non ASCII | |
250 | if(srcIsASCII == FALSE){ | |
b75a7d8f | 251 | |
73c04bcf A |
252 | // step 2 |
253 | b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status); | |
254 | ||
255 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
256 | // redo processing of string | |
257 | // we do not have enough room so grow the buffer | |
258 | if(b1 != b1Stack){ | |
259 | uprv_free(b1); | |
260 | } | |
261 | b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
262 | if(b1==NULL){ | |
263 | *status = U_MEMORY_ALLOCATION_ERROR; | |
264 | goto CLEANUP; | |
265 | } | |
266 | ||
267 | *status = U_ZERO_ERROR; // reset error | |
268 | ||
269 | b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status); | |
270 | } | |
b75a7d8f A |
271 | } |
272 | // error bail out | |
273 | if(U_FAILURE(*status)){ | |
274 | goto CLEANUP; | |
275 | } | |
73c04bcf A |
276 | if(b1Len == 0){ |
277 | *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; | |
278 | goto CLEANUP; | |
279 | } | |
b75a7d8f | 280 | |
73c04bcf A |
281 | // for step 3 & 4 |
282 | srcIsASCII = TRUE; | |
b75a7d8f | 283 | for( j=0;j<b1Len;j++){ |
73c04bcf | 284 | // check if output of usprep_prepare is all ASCII |
b75a7d8f A |
285 | if(b1[j] > 0x7F){ |
286 | srcIsASCII = FALSE; | |
374ca955 | 287 | }else if(isLDHChar(b1[j])==FALSE){ // if the char is in ASCII range verify that it is an LDH character |
b75a7d8f A |
288 | srcIsLDH = FALSE; |
289 | failPos = j; | |
290 | } | |
291 | } | |
b75a7d8f A |
292 | if(useSTD3ASCIIRules == TRUE){ |
293 | // verify 3a and 3b | |
374ca955 A |
294 | // 3(a) Verify the absence of non-LDH ASCII code points; that is, the |
295 | // absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F. | |
296 | // 3(b) Verify the absence of leading and trailing hyphen-minus; that | |
297 | // is, the absence of U+002D at the beginning and end of the | |
298 | // sequence. | |
299 | if( srcIsLDH == FALSE /* source at this point should not contain anyLDH characters */ | |
b75a7d8f A |
300 | || b1[0] == HYPHEN || b1[b1Len-1] == HYPHEN){ |
301 | *status = U_IDNA_STD3_ASCII_RULES_ERROR; | |
302 | ||
303 | /* populate the parseError struct */ | |
304 | if(srcIsLDH==FALSE){ | |
305 | // failPos is always set the index of failure | |
306 | uprv_syntaxError(b1,failPos, b1Len,parseError); | |
307 | }else if(b1[0] == HYPHEN){ | |
308 | // fail position is 0 | |
309 | uprv_syntaxError(b1,0,b1Len,parseError); | |
310 | }else{ | |
311 | // the last index in the source is always length-1 | |
312 | uprv_syntaxError(b1, (b1Len>0) ? b1Len-1 : b1Len, b1Len,parseError); | |
313 | } | |
314 | ||
315 | goto CLEANUP; | |
316 | } | |
317 | } | |
73c04bcf | 318 | // Step 4: if the source is ASCII then proceed to step 8 |
b75a7d8f A |
319 | if(srcIsASCII){ |
320 | if(b1Len <= destCapacity){ | |
a62d09fc | 321 | u_memmove(dest, b1, b1Len); |
b75a7d8f A |
322 | reqLength = b1Len; |
323 | }else{ | |
324 | reqLength = b1Len; | |
325 | goto CLEANUP; | |
326 | } | |
327 | }else{ | |
328 | // step 5 : verify the sequence does not begin with ACE prefix | |
329 | if(!startsWithPrefix(b1,b1Len)){ | |
330 | ||
331 | //step 6: encode the sequence with punycode | |
332 | ||
333 | // do not preserve the case flags for now! | |
334 | // TODO: Preserve the case while implementing the RFE | |
335 | // caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool)); | |
336 | // uprv_memset(caseFlags,TRUE,b1Len); | |
337 | ||
338 | b2Len = u_strToPunycode(b1,b1Len,b2,b2Capacity,caseFlags, status); | |
339 | ||
340 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
341 | // redo processing of string | |
342 | /* we do not have enough room so grow the buffer*/ | |
343 | b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); | |
344 | if(b2 == NULL){ | |
345 | *status = U_MEMORY_ALLOCATION_ERROR; | |
346 | goto CLEANUP; | |
347 | } | |
348 | ||
349 | *status = U_ZERO_ERROR; // reset error | |
350 | ||
351 | b2Len = u_strToPunycode(b1,b1Len,b2,b2Len,caseFlags, status); | |
352 | } | |
353 | //error bail out | |
354 | if(U_FAILURE(*status)){ | |
355 | goto CLEANUP; | |
356 | } | |
357 | // TODO : Reconsider while implementing the case preserve RFE | |
358 | // convert all codepoints to lower case ASCII | |
359 | // toASCIILower(b2,b2Len); | |
360 | reqLength = b2Len+ACE_PREFIX_LENGTH; | |
361 | ||
362 | if(reqLength > destCapacity){ | |
363 | *status = U_BUFFER_OVERFLOW_ERROR; | |
364 | goto CLEANUP; | |
365 | } | |
366 | //Step 7: prepend the ACE prefix | |
a62d09fc | 367 | u_memcpy(dest, ACE_PREFIX, ACE_PREFIX_LENGTH); |
b75a7d8f | 368 | //Step 6: copy the contents in b2 into dest |
a62d09fc | 369 | u_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len); |
b75a7d8f A |
370 | |
371 | }else{ | |
372 | *status = U_IDNA_ACE_PREFIX_ERROR; | |
373 | //position of failure is 0 | |
374 | uprv_syntaxError(b1,0,b1Len,parseError); | |
375 | goto CLEANUP; | |
376 | } | |
377 | } | |
46f4442e | 378 | // step 8: verify the length of label |
b75a7d8f A |
379 | if(reqLength > MAX_LABEL_LENGTH){ |
380 | *status = U_IDNA_LABEL_TOO_LONG_ERROR; | |
381 | } | |
382 | ||
383 | CLEANUP: | |
384 | if(b1 != b1Stack){ | |
385 | uprv_free(b1); | |
386 | } | |
387 | if(b2 != b2Stack){ | |
388 | uprv_free(b2); | |
389 | } | |
390 | uprv_free(caseFlags); | |
391 | ||
b75a7d8f A |
392 | return u_terminateUChars(dest, destCapacity, reqLength, status); |
393 | } | |
394 | ||
374ca955 A |
395 | static int32_t |
396 | _internal_toUnicode(const UChar* src, int32_t srcLength, | |
397 | UChar* dest, int32_t destCapacity, | |
398 | int32_t options, | |
399 | UStringPrepProfile* nameprep, | |
400 | UParseError* parseError, | |
46f4442e A |
401 | UErrorCode* status) |
402 | { | |
b75a7d8f A |
403 | |
404 | //get the options | |
46f4442e | 405 | //UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0); |
374ca955 | 406 | int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0; |
46f4442e A |
407 | |
408 | // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too. | |
b75a7d8f A |
409 | UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE]; |
410 | ||
411 | //initialize pointers to stack buffers | |
412 | UChar *b1 = b1Stack, *b2 = b2Stack, *b1Prime=NULL, *b3=b3Stack; | |
b331163b | 413 | int32_t b1Len = 0, b2Len, b1PrimeLen, b3Len, |
b75a7d8f A |
414 | b1Capacity = MAX_LABEL_BUFFER_SIZE, |
415 | b2Capacity = MAX_LABEL_BUFFER_SIZE, | |
416 | b3Capacity = MAX_LABEL_BUFFER_SIZE, | |
417 | reqLength=0; | |
374ca955 | 418 | |
b75a7d8f A |
419 | UBool* caseFlags = NULL; |
420 | ||
421 | UBool srcIsASCII = TRUE; | |
46f4442e A |
422 | /*UBool srcIsLDH = TRUE; |
423 | int32_t failPos =0;*/ | |
b75a7d8f | 424 | |
b75a7d8f A |
425 | // step 1: find out if all the codepoints in src are ASCII |
426 | if(srcLength==-1){ | |
427 | srcLength = 0; | |
428 | for(;src[srcLength]!=0;){ | |
429 | if(src[srcLength]> 0x7f){ | |
430 | srcIsASCII = FALSE; | |
46f4442e | 431 | }/*else if(isLDHChar(src[srcLength])==FALSE){ |
374ca955 A |
432 | // here we do not assemble surrogates |
433 | // since we know that LDH code points | |
434 | // are in the ASCII range only | |
b75a7d8f A |
435 | srcIsLDH = FALSE; |
436 | failPos = srcLength; | |
46f4442e | 437 | }*/ |
b75a7d8f A |
438 | srcLength++; |
439 | } | |
374ca955 | 440 | }else if(srcLength > 0){ |
b75a7d8f A |
441 | for(int32_t j=0; j<srcLength; j++){ |
442 | if(src[j]> 0x7f){ | |
443 | srcIsASCII = FALSE; | |
46f4442e | 444 | }/*else if(isLDHChar(src[j])==FALSE){ |
374ca955 A |
445 | // here we do not assemble surrogates |
446 | // since we know that LDH code points | |
447 | // are in the ASCII range only | |
b75a7d8f A |
448 | srcIsLDH = FALSE; |
449 | failPos = j; | |
46f4442e | 450 | }*/ |
b75a7d8f | 451 | } |
374ca955 A |
452 | }else{ |
453 | return 0; | |
b75a7d8f | 454 | } |
46f4442e | 455 | |
b75a7d8f A |
456 | if(srcIsASCII == FALSE){ |
457 | // step 2: process the string | |
374ca955 | 458 | b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status); |
b75a7d8f A |
459 | if(*status == U_BUFFER_OVERFLOW_ERROR){ |
460 | // redo processing of string | |
461 | /* we do not have enough room so grow the buffer*/ | |
462 | b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
463 | if(b1==NULL){ | |
464 | *status = U_MEMORY_ALLOCATION_ERROR; | |
465 | goto CLEANUP; | |
466 | } | |
467 | ||
468 | *status = U_ZERO_ERROR; // reset error | |
469 | ||
374ca955 | 470 | b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status); |
b75a7d8f A |
471 | } |
472 | //bail out on error | |
473 | if(U_FAILURE(*status)){ | |
474 | goto CLEANUP; | |
475 | } | |
476 | }else{ | |
477 | ||
478 | //just point src to b1 | |
479 | b1 = (UChar*) src; | |
480 | b1Len = srcLength; | |
481 | } | |
482 | ||
46f4442e A |
483 | // The RFC states that |
484 | // <quote> | |
485 | // ToUnicode never fails. If any step fails, then the original input | |
486 | // is returned immediately in that step. | |
487 | // </quote> | |
488 | ||
b75a7d8f | 489 | //step 3: verify ACE Prefix |
46f4442e A |
490 | if(startsWithPrefix(b1,b1Len)){ |
491 | ||
b75a7d8f A |
492 | //step 4: Remove the ACE Prefix |
493 | b1Prime = b1 + ACE_PREFIX_LENGTH; | |
494 | b1PrimeLen = b1Len - ACE_PREFIX_LENGTH; | |
495 | ||
496 | //step 5: Decode using punycode | |
497 | b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Capacity, caseFlags,status); | |
46f4442e | 498 | |
b75a7d8f A |
499 | if(*status == U_BUFFER_OVERFLOW_ERROR){ |
500 | // redo processing of string | |
501 | /* we do not have enough room so grow the buffer*/ | |
502 | b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); | |
503 | if(b2==NULL){ | |
504 | *status = U_MEMORY_ALLOCATION_ERROR; | |
505 | goto CLEANUP; | |
506 | } | |
507 | ||
508 | *status = U_ZERO_ERROR; // reset error | |
46f4442e | 509 | |
b75a7d8f | 510 | b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Len, caseFlags, status); |
b75a7d8f | 511 | } |
46f4442e A |
512 | |
513 | ||
b75a7d8f | 514 | //step 6:Apply toASCII |
46f4442e A |
515 | b3Len = uidna_toASCII(b2, b2Len, b3, b3Capacity, options, parseError, status); |
516 | ||
b75a7d8f A |
517 | if(*status == U_BUFFER_OVERFLOW_ERROR){ |
518 | // redo processing of string | |
519 | /* we do not have enough room so grow the buffer*/ | |
520 | b3 = (UChar*) uprv_malloc(b3Len * U_SIZEOF_UCHAR); | |
521 | if(b3==NULL){ | |
522 | *status = U_MEMORY_ALLOCATION_ERROR; | |
523 | goto CLEANUP; | |
524 | } | |
525 | ||
526 | *status = U_ZERO_ERROR; // reset error | |
46f4442e | 527 | |
b75a7d8f | 528 | b3Len = uidna_toASCII(b2,b2Len,b3,b3Len,options,parseError, status); |
46f4442e | 529 | |
b75a7d8f A |
530 | } |
531 | //bail out on error | |
532 | if(U_FAILURE(*status)){ | |
533 | goto CLEANUP; | |
534 | } | |
535 | ||
536 | //step 7: verify | |
537 | if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){ | |
46f4442e A |
538 | // Cause the original to be returned. |
539 | *status = U_IDNA_VERIFICATION_ERROR; | |
b75a7d8f A |
540 | goto CLEANUP; |
541 | } | |
542 | ||
543 | //step 8: return output of step 5 | |
544 | reqLength = b2Len; | |
545 | if(b2Len <= destCapacity) { | |
a62d09fc | 546 | u_memmove(dest, b2, b2Len); |
b75a7d8f | 547 | } |
46f4442e A |
548 | } |
549 | else{ | |
550 | // See the start of this if statement for why this is commented out. | |
b75a7d8f | 551 | // verify that STD3 ASCII rules are satisfied |
46f4442e A |
552 | /*if(useSTD3ASCIIRules == TRUE){ |
553 | if( srcIsLDH == FALSE // source contains some non-LDH characters | |
b75a7d8f A |
554 | || src[0] == HYPHEN || src[srcLength-1] == HYPHEN){ |
555 | *status = U_IDNA_STD3_ASCII_RULES_ERROR; | |
556 | ||
46f4442e | 557 | // populate the parseError struct |
b75a7d8f A |
558 | if(srcIsLDH==FALSE){ |
559 | // failPos is always set the index of failure | |
560 | uprv_syntaxError(src,failPos, srcLength,parseError); | |
561 | }else if(src[0] == HYPHEN){ | |
562 | // fail position is 0 | |
563 | uprv_syntaxError(src,0,srcLength,parseError); | |
564 | }else{ | |
565 | // the last index in the source is always length-1 | |
566 | uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError); | |
567 | } | |
568 | ||
569 | goto CLEANUP; | |
570 | } | |
46f4442e A |
571 | }*/ |
572 | // just return the source | |
b75a7d8f A |
573 | //copy the source to destination |
574 | if(srcLength <= destCapacity){ | |
a62d09fc | 575 | u_memmove(dest, src, srcLength); |
b75a7d8f A |
576 | } |
577 | reqLength = srcLength; | |
578 | } | |
579 | ||
46f4442e | 580 | |
b75a7d8f A |
581 | CLEANUP: |
582 | ||
583 | if(b1 != b1Stack && b1!=src){ | |
584 | uprv_free(b1); | |
585 | } | |
586 | if(b2 != b2Stack){ | |
587 | uprv_free(b2); | |
588 | } | |
589 | uprv_free(caseFlags); | |
46f4442e | 590 | |
b75a7d8f A |
591 | // The RFC states that |
592 | // <quote> | |
593 | // ToUnicode never fails. If any step fails, then the original input | |
594 | // is returned immediately in that step. | |
595 | // </quote> | |
596 | // So if any step fails lets copy source to destination | |
597 | if(U_FAILURE(*status)){ | |
598 | //copy the source to destination | |
599 | if(dest && srcLength <= destCapacity){ | |
46f4442e A |
600 | // srcLength should have already been set earlier. |
601 | U_ASSERT(srcLength >= 0); | |
a62d09fc | 602 | u_memmove(dest, src, srcLength); |
b75a7d8f A |
603 | } |
604 | reqLength = srcLength; | |
46f4442e | 605 | *status = U_ZERO_ERROR; |
b75a7d8f A |
606 | } |
607 | ||
608 | return u_terminateUChars(dest, destCapacity, reqLength, status); | |
609 | } | |
610 | ||
374ca955 A |
611 | U_CAPI int32_t U_EXPORT2 |
612 | uidna_toASCII(const UChar* src, int32_t srcLength, | |
613 | UChar* dest, int32_t destCapacity, | |
614 | int32_t options, | |
615 | UParseError* parseError, | |
616 | UErrorCode* status){ | |
617 | ||
618 | if(status == NULL || U_FAILURE(*status)){ | |
619 | return 0; | |
620 | } | |
621 | if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ | |
622 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
623 | return 0; | |
624 | } | |
b75a7d8f | 625 | |
729e4ab9 | 626 | UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); |
374ca955 A |
627 | |
628 | if(U_FAILURE(*status)){ | |
629 | return -1; | |
b75a7d8f | 630 | } |
374ca955 A |
631 | |
632 | int32_t retLen = _internal_toASCII(src, srcLength, dest, destCapacity, options, nameprep, parseError, status); | |
633 | ||
634 | /* close the profile*/ | |
635 | usprep_close(nameprep); | |
636 | ||
637 | return retLen; | |
b75a7d8f A |
638 | } |
639 | ||
374ca955 A |
640 | U_CAPI int32_t U_EXPORT2 |
641 | uidna_toUnicode(const UChar* src, int32_t srcLength, | |
642 | UChar* dest, int32_t destCapacity, | |
643 | int32_t options, | |
644 | UParseError* parseError, | |
645 | UErrorCode* status){ | |
646 | ||
647 | if(status == NULL || U_FAILURE(*status)){ | |
648 | return 0; | |
649 | } | |
650 | if( (src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ | |
651 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
652 | return 0; | |
653 | } | |
46f4442e | 654 | |
729e4ab9 | 655 | UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); |
374ca955 A |
656 | |
657 | if(U_FAILURE(*status)){ | |
658 | return -1; | |
659 | } | |
660 | ||
661 | int32_t retLen = _internal_toUnicode(src, srcLength, dest, destCapacity, options, nameprep, parseError, status); | |
662 | ||
663 | usprep_close(nameprep); | |
664 | ||
665 | return retLen; | |
666 | } | |
667 | ||
668 | ||
b75a7d8f A |
669 | U_CAPI int32_t U_EXPORT2 |
670 | uidna_IDNToASCII( const UChar *src, int32_t srcLength, | |
671 | UChar* dest, int32_t destCapacity, | |
672 | int32_t options, | |
673 | UParseError *parseError, | |
674 | UErrorCode *status){ | |
675 | ||
676 | if(status == NULL || U_FAILURE(*status)){ | |
677 | return 0; | |
678 | } | |
679 | if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ | |
680 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
681 | return 0; | |
682 | } | |
683 | ||
684 | int32_t reqLength = 0; | |
685 | ||
729e4ab9 | 686 | UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); |
b75a7d8f A |
687 | |
688 | if(U_FAILURE(*status)){ | |
689 | return 0; | |
690 | } | |
691 | ||
692 | //initialize pointers | |
693 | UChar *delimiter = (UChar*)src; | |
694 | UChar *labelStart = (UChar*)src; | |
695 | UChar *currentDest = (UChar*) dest; | |
696 | int32_t remainingLen = srcLength; | |
697 | int32_t remainingDestCapacity = destCapacity; | |
698 | int32_t labelLen = 0, labelReqLength = 0; | |
699 | UBool done = FALSE; | |
700 | ||
701 | ||
702 | for(;;){ | |
703 | ||
374ca955 | 704 | labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done); |
73c04bcf A |
705 | labelReqLength = 0; |
706 | if(!(labelLen==0 && done)){// make sure this is not a root label separator. | |
b75a7d8f | 707 | |
73c04bcf A |
708 | labelReqLength = _internal_toASCII( labelStart, labelLen, |
709 | currentDest, remainingDestCapacity, | |
710 | options, nameprep, | |
711 | parseError, status); | |
712 | ||
713 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
714 | ||
715 | *status = U_ZERO_ERROR; // reset error | |
716 | remainingDestCapacity = 0; | |
717 | } | |
b75a7d8f A |
718 | } |
719 | ||
720 | ||
721 | if(U_FAILURE(*status)){ | |
722 | break; | |
723 | } | |
724 | ||
725 | reqLength +=labelReqLength; | |
726 | // adjust the destination pointer | |
727 | if(labelReqLength < remainingDestCapacity){ | |
728 | currentDest = currentDest + labelReqLength; | |
729 | remainingDestCapacity -= labelReqLength; | |
730 | }else{ | |
731 | // should never occur | |
732 | remainingDestCapacity = 0; | |
733 | } | |
46f4442e | 734 | |
b75a7d8f A |
735 | if(done == TRUE){ |
736 | break; | |
737 | } | |
738 | ||
739 | // add the label separator | |
740 | if(remainingDestCapacity > 0){ | |
741 | *currentDest++ = FULL_STOP; | |
742 | remainingDestCapacity--; | |
743 | } | |
46f4442e | 744 | reqLength++; |
b75a7d8f A |
745 | |
746 | labelStart = delimiter; | |
747 | if(remainingLen >0 ){ | |
73c04bcf | 748 | remainingLen = (int32_t)(srcLength - (delimiter - src)); |
b75a7d8f A |
749 | } |
750 | ||
751 | } | |
46f4442e A |
752 | |
753 | if(reqLength > MAX_DOMAIN_NAME_LENGTH){ | |
754 | *status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR; | |
755 | } | |
756 | ||
374ca955 | 757 | usprep_close(nameprep); |
b75a7d8f A |
758 | |
759 | return u_terminateUChars(dest, destCapacity, reqLength, status); | |
760 | } | |
761 | ||
762 | U_CAPI int32_t U_EXPORT2 | |
763 | uidna_IDNToUnicode( const UChar* src, int32_t srcLength, | |
764 | UChar* dest, int32_t destCapacity, | |
765 | int32_t options, | |
766 | UParseError* parseError, | |
767 | UErrorCode* status){ | |
768 | ||
769 | if(status == NULL || U_FAILURE(*status)){ | |
770 | return 0; | |
771 | } | |
772 | if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ | |
773 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
774 | return 0; | |
775 | } | |
776 | ||
777 | int32_t reqLength = 0; | |
778 | ||
729e4ab9 | 779 | UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); |
b75a7d8f A |
780 | |
781 | if(U_FAILURE(*status)){ | |
782 | return 0; | |
783 | } | |
784 | ||
785 | //initialize pointers | |
786 | UChar *delimiter = (UChar*)src; | |
787 | UChar *labelStart = (UChar*)src; | |
788 | UChar *currentDest = (UChar*) dest; | |
789 | int32_t remainingLen = srcLength; | |
790 | int32_t remainingDestCapacity = destCapacity; | |
791 | int32_t labelLen = 0, labelReqLength = 0; | |
792 | UBool done = FALSE; | |
793 | ||
b75a7d8f A |
794 | for(;;){ |
795 | ||
374ca955 | 796 | labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done); |
b75a7d8f | 797 | |
46f4442e A |
798 | // The RFC states that |
799 | // <quote> | |
800 | // ToUnicode never fails. If any step fails, then the original input | |
801 | // is returned immediately in that step. | |
802 | // </quote> | |
803 | // _internal_toUnicode will copy the label. | |
804 | /*if(labelLen==0 && done==FALSE){ | |
73c04bcf | 805 | *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; |
46f4442e A |
806 | break; |
807 | }*/ | |
808 | ||
374ca955 A |
809 | labelReqLength = _internal_toUnicode(labelStart, labelLen, |
810 | currentDest, remainingDestCapacity, | |
811 | options, nameprep, | |
812 | parseError, status); | |
b75a7d8f A |
813 | |
814 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
b75a7d8f A |
815 | *status = U_ZERO_ERROR; // reset error |
816 | remainingDestCapacity = 0; | |
817 | } | |
818 | ||
b75a7d8f A |
819 | if(U_FAILURE(*status)){ |
820 | break; | |
821 | } | |
822 | ||
823 | reqLength +=labelReqLength; | |
824 | // adjust the destination pointer | |
825 | if(labelReqLength < remainingDestCapacity){ | |
826 | currentDest = currentDest + labelReqLength; | |
827 | remainingDestCapacity -= labelReqLength; | |
828 | }else{ | |
829 | // should never occur | |
830 | remainingDestCapacity = 0; | |
831 | } | |
832 | ||
833 | if(done == TRUE){ | |
834 | break; | |
835 | } | |
836 | ||
837 | // add the label separator | |
46f4442e | 838 | // Unlike the ToASCII operation we don't normalize the label separators |
b75a7d8f | 839 | if(remainingDestCapacity > 0){ |
46f4442e | 840 | *currentDest++ = *(labelStart + labelLen); |
b75a7d8f A |
841 | remainingDestCapacity--; |
842 | } | |
46f4442e | 843 | reqLength++; |
b75a7d8f A |
844 | |
845 | labelStart = delimiter; | |
846 | if(remainingLen >0 ){ | |
73c04bcf | 847 | remainingLen = (int32_t)(srcLength - (delimiter - src)); |
b75a7d8f A |
848 | } |
849 | ||
850 | } | |
46f4442e A |
851 | |
852 | if(reqLength > MAX_DOMAIN_NAME_LENGTH){ | |
853 | *status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR; | |
854 | } | |
855 | ||
374ca955 | 856 | usprep_close(nameprep); |
b75a7d8f A |
857 | |
858 | return u_terminateUChars(dest, destCapacity, reqLength, status); | |
859 | } | |
860 | ||
861 | U_CAPI int32_t U_EXPORT2 | |
862 | uidna_compare( const UChar *s1, int32_t length1, | |
863 | const UChar *s2, int32_t length2, | |
864 | int32_t options, | |
865 | UErrorCode* status){ | |
866 | ||
867 | if(status == NULL || U_FAILURE(*status)){ | |
868 | return -1; | |
869 | } | |
870 | ||
871 | UChar b1Stack[MAX_IDN_BUFFER_SIZE], b2Stack[MAX_IDN_BUFFER_SIZE]; | |
872 | UChar *b1 = b1Stack, *b2 = b2Stack; | |
873 | int32_t b1Len, b2Len, b1Capacity = MAX_IDN_BUFFER_SIZE, b2Capacity = MAX_IDN_BUFFER_SIZE; | |
874 | int32_t result=-1; | |
875 | ||
876 | UParseError parseError; | |
877 | ||
878 | b1Len = uidna_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, status); | |
879 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
880 | // redo processing of string | |
881 | b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
882 | if(b1==NULL){ | |
883 | *status = U_MEMORY_ALLOCATION_ERROR; | |
884 | goto CLEANUP; | |
885 | } | |
886 | ||
887 | *status = U_ZERO_ERROR; // reset error | |
888 | ||
889 | b1Len = uidna_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, status); | |
890 | ||
891 | } | |
892 | ||
893 | b2Len = uidna_IDNToASCII(s2,length2, b2,b2Capacity, options, &parseError, status); | |
894 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
895 | // redo processing of string | |
896 | b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); | |
897 | if(b2==NULL){ | |
898 | *status = U_MEMORY_ALLOCATION_ERROR; | |
899 | goto CLEANUP; | |
900 | } | |
901 | ||
902 | *status = U_ZERO_ERROR; // reset error | |
903 | ||
904 | b2Len = uidna_IDNToASCII(s2, length2, b2, b2Len, options, &parseError, status); | |
905 | ||
906 | } | |
907 | // when toASCII is applied all label separators are replaced with FULL_STOP | |
908 | result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len); | |
909 | ||
910 | CLEANUP: | |
911 | if(b1 != b1Stack){ | |
912 | uprv_free(b1); | |
913 | } | |
914 | ||
915 | if(b2 != b2Stack){ | |
916 | uprv_free(b2); | |
917 | } | |
918 | ||
919 | return result; | |
920 | } | |
921 | ||
922 | #endif /* #if !UCONFIG_NO_IDNA */ |