]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ******************************************************************************* | |
3 | * | |
b331163b | 4 | * Copyright (C) 2003-2014, International Business Machines |
b75a7d8f A |
5 | * Corporation and others. All Rights Reserved. |
6 | * | |
7 | ******************************************************************************* | |
8 | * file name: uidna.cpp | |
9 | * encoding: US-ASCII | |
10 | * tab size: 8 (not used) | |
11 | * indentation:4 | |
12 | * | |
13 | * created on: 2003feb1 | |
14 | * created by: Ram Viswanadha | |
15 | */ | |
16 | ||
17 | #include "unicode/utypes.h" | |
18 | ||
19 | #if !UCONFIG_NO_IDNA | |
20 | ||
21 | #include "unicode/uidna.h" | |
22 | #include "unicode/ustring.h" | |
374ca955 | 23 | #include "unicode/usprep.h" |
b75a7d8f A |
24 | #include "punycode.h" |
25 | #include "ustr_imp.h" | |
26 | #include "cmemory.h" | |
46f4442e | 27 | #include "uassert.h" |
b75a7d8f A |
28 | #include "sprpimpl.h" |
29 | ||
30 | /* it is official IDNA ACE Prefix is "xn--" */ | |
31 | static const UChar ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ; | |
32 | #define ACE_PREFIX_LENGTH 4 | |
33 | ||
34 | #define MAX_LABEL_LENGTH 63 | |
46f4442e A |
35 | /* The Max length of the labels should not be more than MAX_LABEL_LENGTH */ |
36 | #define MAX_LABEL_BUFFER_SIZE 100 | |
37 | ||
38 | #define MAX_DOMAIN_NAME_LENGTH 255 | |
39 | /* The Max length of the domain names should not be more than MAX_DOMAIN_NAME_LENGTH */ | |
40 | #define MAX_IDN_BUFFER_SIZE MAX_DOMAIN_NAME_LENGTH+1 | |
b75a7d8f | 41 | |
b75a7d8f | 42 | #define LOWER_CASE_DELTA 0x0020 |
46f4442e | 43 | #define HYPHEN 0x002D |
b75a7d8f | 44 | #define FULL_STOP 0x002E |
46f4442e A |
45 | #define CAPITAL_A 0x0041 |
46 | #define CAPITAL_Z 0x005A | |
47 | ||
b75a7d8f A |
48 | inline static UChar |
49 | toASCIILower(UChar ch){ | |
50 | if(CAPITAL_A <= ch && ch <= CAPITAL_Z){ | |
51 | return ch + LOWER_CASE_DELTA; | |
52 | } | |
53 | return ch; | |
54 | } | |
55 | ||
56 | inline static UBool | |
57 | startsWithPrefix(const UChar* src , int32_t srcLength){ | |
58 | UBool startsWithPrefix = TRUE; | |
59 | ||
60 | if(srcLength < ACE_PREFIX_LENGTH){ | |
61 | return FALSE; | |
62 | } | |
63 | ||
64 | for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){ | |
65 | if(toASCIILower(src[i]) != ACE_PREFIX[i]){ | |
66 | startsWithPrefix = FALSE; | |
67 | } | |
68 | } | |
69 | return startsWithPrefix; | |
70 | } | |
71 | ||
b75a7d8f A |
72 | |
73 | inline static int32_t | |
74 | compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len, | |
75 | const UChar* s2, int32_t s2Len){ | |
76 | ||
77 | int32_t minLength; | |
78 | int32_t lengthResult; | |
79 | ||
80 | // are we comparing different lengths? | |
81 | if(s1Len != s2Len) { | |
82 | if(s1Len < s2Len) { | |
83 | minLength = s1Len; | |
84 | lengthResult = -1; | |
85 | } else { | |
86 | minLength = s2Len; | |
87 | lengthResult = 1; | |
88 | } | |
89 | } else { | |
90 | // ok the lengths are equal | |
91 | minLength = s1Len; | |
92 | lengthResult = 0; | |
93 | } | |
94 | ||
95 | UChar c1,c2; | |
96 | int32_t rc; | |
97 | ||
98 | for(int32_t i =0;/* no condition */;i++) { | |
99 | ||
100 | /* If we reach the ends of both strings then they match */ | |
101 | if(i == minLength) { | |
102 | return lengthResult; | |
103 | } | |
104 | ||
105 | c1 = s1[i]; | |
106 | c2 = s2[i]; | |
107 | ||
108 | /* Case-insensitive comparison */ | |
109 | if(c1!=c2) { | |
110 | rc=(int32_t)toASCIILower(c1)-(int32_t)toASCIILower(c2); | |
111 | if(rc!=0) { | |
112 | lengthResult=rc; | |
113 | break; | |
114 | } | |
115 | } | |
116 | } | |
117 | return lengthResult; | |
118 | } | |
119 | ||
120 | ||
374ca955 A |
121 | /** |
122 | * Ascertain if the given code point is a label separator as | |
123 | * defined by the IDNA RFC | |
124 | * | |
125 | * @param ch The code point to be ascertained | |
126 | * @return true if the char is a label separator | |
73c04bcf | 127 | * @stable ICU 2.8 |
374ca955 A |
128 | */ |
129 | static inline UBool isLabelSeparator(UChar ch){ | |
130 | switch(ch){ | |
131 | case 0x002e: | |
132 | case 0x3002: | |
133 | case 0xFF0E: | |
134 | case 0xFF61: | |
135 | return TRUE; | |
136 | default: | |
137 | return FALSE; | |
b75a7d8f | 138 | } |
374ca955 A |
139 | } |
140 | ||
141 | // returns the length of the label excluding the separator | |
142 | // if *limit == separator then the length returned does not include | |
143 | // the separtor. | |
144 | static inline int32_t | |
46f4442e A |
145 | getNextSeparator(UChar *src, int32_t srcLength, |
146 | UChar **limit, UBool *done){ | |
374ca955 A |
147 | if(srcLength == -1){ |
148 | int32_t i; | |
149 | for(i=0 ; ;i++){ | |
150 | if(src[i] == 0){ | |
151 | *limit = src + i; // point to null | |
152 | *done = TRUE; | |
153 | return i; | |
154 | } | |
155 | if(isLabelSeparator(src[i])){ | |
156 | *limit = src + (i+1); // go past the delimiter | |
157 | return i; | |
158 | ||
159 | } | |
160 | } | |
161 | }else{ | |
162 | int32_t i; | |
163 | for(i=0;i<srcLength;i++){ | |
164 | if(isLabelSeparator(src[i])){ | |
165 | *limit = src + (i+1); // go past the delimiter | |
166 | return i; | |
167 | } | |
168 | } | |
169 | // we have not found the delimiter | |
170 | // if(i==srcLength) | |
171 | *limit = src+srcLength; | |
172 | *done = TRUE; | |
173 | ||
174 | return i; | |
b75a7d8f | 175 | } |
374ca955 A |
176 | } |
177 | static inline UBool isLDHChar(UChar ch){ | |
178 | // high runner case | |
179 | if(ch>0x007A){ | |
180 | return FALSE; | |
181 | } | |
182 | //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A] | |
183 | if( (ch==0x002D) || | |
184 | (0x0030 <= ch && ch <= 0x0039) || | |
185 | (0x0041 <= ch && ch <= 0x005A) || | |
186 | (0x0061 <= ch && ch <= 0x007A) | |
187 | ){ | |
188 | return TRUE; | |
189 | } | |
190 | return FALSE; | |
191 | } | |
192 | ||
193 | static int32_t | |
194 | _internal_toASCII(const UChar* src, int32_t srcLength, | |
195 | UChar* dest, int32_t destCapacity, | |
196 | int32_t options, | |
197 | UStringPrepProfile* nameprep, | |
198 | UParseError* parseError, | |
46f4442e A |
199 | UErrorCode* status) |
200 | { | |
374ca955 | 201 | |
46f4442e | 202 | // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too. |
b75a7d8f A |
203 | UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE]; |
204 | //initialize pointers to stack buffers | |
205 | UChar *b1 = b1Stack, *b2 = b2Stack; | |
73c04bcf | 206 | int32_t b1Len=0, b2Len, |
b75a7d8f A |
207 | b1Capacity = MAX_LABEL_BUFFER_SIZE, |
208 | b2Capacity = MAX_LABEL_BUFFER_SIZE , | |
209 | reqLength=0; | |
210 | ||
374ca955 | 211 | int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0; |
b75a7d8f A |
212 | UBool* caseFlags = NULL; |
213 | ||
214 | // the source contains all ascii codepoints | |
215 | UBool srcIsASCII = TRUE; | |
216 | // assume the source contains all LDH codepoints | |
217 | UBool srcIsLDH = TRUE; | |
218 | ||
219 | int32_t j=0; | |
220 | ||
221 | //get the options | |
b75a7d8f | 222 | UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0); |
374ca955 | 223 | |
b75a7d8f | 224 | int32_t failPos = -1; |
b75a7d8f | 225 | |
73c04bcf A |
226 | if(srcLength == -1){ |
227 | srcLength = u_strlen(src); | |
228 | } | |
229 | ||
230 | if(srcLength > b1Capacity){ | |
231 | b1 = (UChar*) uprv_malloc(srcLength * U_SIZEOF_UCHAR); | |
b75a7d8f A |
232 | if(b1==NULL){ |
233 | *status = U_MEMORY_ALLOCATION_ERROR; | |
234 | goto CLEANUP; | |
235 | } | |
73c04bcf A |
236 | b1Capacity = srcLength; |
237 | } | |
b75a7d8f | 238 | |
73c04bcf A |
239 | // step 1 |
240 | for( j=0;j<srcLength;j++){ | |
241 | if(src[j] > 0x7F){ | |
242 | srcIsASCII = FALSE; | |
243 | } | |
244 | b1[b1Len++] = src[j]; | |
245 | } | |
246 | ||
247 | // step 2 is performed only if the source contains non ASCII | |
248 | if(srcIsASCII == FALSE){ | |
b75a7d8f | 249 | |
73c04bcf A |
250 | // step 2 |
251 | b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status); | |
252 | ||
253 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
254 | // redo processing of string | |
255 | // we do not have enough room so grow the buffer | |
256 | if(b1 != b1Stack){ | |
257 | uprv_free(b1); | |
258 | } | |
259 | b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
260 | if(b1==NULL){ | |
261 | *status = U_MEMORY_ALLOCATION_ERROR; | |
262 | goto CLEANUP; | |
263 | } | |
264 | ||
265 | *status = U_ZERO_ERROR; // reset error | |
266 | ||
267 | b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status); | |
268 | } | |
b75a7d8f A |
269 | } |
270 | // error bail out | |
271 | if(U_FAILURE(*status)){ | |
272 | goto CLEANUP; | |
273 | } | |
73c04bcf A |
274 | if(b1Len == 0){ |
275 | *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; | |
276 | goto CLEANUP; | |
277 | } | |
b75a7d8f | 278 | |
73c04bcf A |
279 | // for step 3 & 4 |
280 | srcIsASCII = TRUE; | |
b75a7d8f | 281 | for( j=0;j<b1Len;j++){ |
73c04bcf | 282 | // check if output of usprep_prepare is all ASCII |
b75a7d8f A |
283 | if(b1[j] > 0x7F){ |
284 | srcIsASCII = FALSE; | |
374ca955 | 285 | }else if(isLDHChar(b1[j])==FALSE){ // if the char is in ASCII range verify that it is an LDH character |
b75a7d8f A |
286 | srcIsLDH = FALSE; |
287 | failPos = j; | |
288 | } | |
289 | } | |
b75a7d8f A |
290 | if(useSTD3ASCIIRules == TRUE){ |
291 | // verify 3a and 3b | |
374ca955 A |
292 | // 3(a) Verify the absence of non-LDH ASCII code points; that is, the |
293 | // absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F. | |
294 | // 3(b) Verify the absence of leading and trailing hyphen-minus; that | |
295 | // is, the absence of U+002D at the beginning and end of the | |
296 | // sequence. | |
297 | if( srcIsLDH == FALSE /* source at this point should not contain anyLDH characters */ | |
b75a7d8f A |
298 | || b1[0] == HYPHEN || b1[b1Len-1] == HYPHEN){ |
299 | *status = U_IDNA_STD3_ASCII_RULES_ERROR; | |
300 | ||
301 | /* populate the parseError struct */ | |
302 | if(srcIsLDH==FALSE){ | |
303 | // failPos is always set the index of failure | |
304 | uprv_syntaxError(b1,failPos, b1Len,parseError); | |
305 | }else if(b1[0] == HYPHEN){ | |
306 | // fail position is 0 | |
307 | uprv_syntaxError(b1,0,b1Len,parseError); | |
308 | }else{ | |
309 | // the last index in the source is always length-1 | |
310 | uprv_syntaxError(b1, (b1Len>0) ? b1Len-1 : b1Len, b1Len,parseError); | |
311 | } | |
312 | ||
313 | goto CLEANUP; | |
314 | } | |
315 | } | |
73c04bcf | 316 | // Step 4: if the source is ASCII then proceed to step 8 |
b75a7d8f A |
317 | if(srcIsASCII){ |
318 | if(b1Len <= destCapacity){ | |
a62d09fc | 319 | u_memmove(dest, b1, b1Len); |
b75a7d8f A |
320 | reqLength = b1Len; |
321 | }else{ | |
322 | reqLength = b1Len; | |
323 | goto CLEANUP; | |
324 | } | |
325 | }else{ | |
326 | // step 5 : verify the sequence does not begin with ACE prefix | |
327 | if(!startsWithPrefix(b1,b1Len)){ | |
328 | ||
329 | //step 6: encode the sequence with punycode | |
330 | ||
331 | // do not preserve the case flags for now! | |
332 | // TODO: Preserve the case while implementing the RFE | |
333 | // caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool)); | |
334 | // uprv_memset(caseFlags,TRUE,b1Len); | |
335 | ||
336 | b2Len = u_strToPunycode(b1,b1Len,b2,b2Capacity,caseFlags, status); | |
337 | ||
338 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
339 | // redo processing of string | |
340 | /* we do not have enough room so grow the buffer*/ | |
341 | b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); | |
342 | if(b2 == NULL){ | |
343 | *status = U_MEMORY_ALLOCATION_ERROR; | |
344 | goto CLEANUP; | |
345 | } | |
346 | ||
347 | *status = U_ZERO_ERROR; // reset error | |
348 | ||
349 | b2Len = u_strToPunycode(b1,b1Len,b2,b2Len,caseFlags, status); | |
350 | } | |
351 | //error bail out | |
352 | if(U_FAILURE(*status)){ | |
353 | goto CLEANUP; | |
354 | } | |
355 | // TODO : Reconsider while implementing the case preserve RFE | |
356 | // convert all codepoints to lower case ASCII | |
357 | // toASCIILower(b2,b2Len); | |
358 | reqLength = b2Len+ACE_PREFIX_LENGTH; | |
359 | ||
360 | if(reqLength > destCapacity){ | |
361 | *status = U_BUFFER_OVERFLOW_ERROR; | |
362 | goto CLEANUP; | |
363 | } | |
364 | //Step 7: prepend the ACE prefix | |
a62d09fc | 365 | u_memcpy(dest, ACE_PREFIX, ACE_PREFIX_LENGTH); |
b75a7d8f | 366 | //Step 6: copy the contents in b2 into dest |
a62d09fc | 367 | u_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len); |
b75a7d8f A |
368 | |
369 | }else{ | |
370 | *status = U_IDNA_ACE_PREFIX_ERROR; | |
371 | //position of failure is 0 | |
372 | uprv_syntaxError(b1,0,b1Len,parseError); | |
373 | goto CLEANUP; | |
374 | } | |
375 | } | |
46f4442e | 376 | // step 8: verify the length of label |
b75a7d8f A |
377 | if(reqLength > MAX_LABEL_LENGTH){ |
378 | *status = U_IDNA_LABEL_TOO_LONG_ERROR; | |
379 | } | |
380 | ||
381 | CLEANUP: | |
382 | if(b1 != b1Stack){ | |
383 | uprv_free(b1); | |
384 | } | |
385 | if(b2 != b2Stack){ | |
386 | uprv_free(b2); | |
387 | } | |
388 | uprv_free(caseFlags); | |
389 | ||
b75a7d8f A |
390 | return u_terminateUChars(dest, destCapacity, reqLength, status); |
391 | } | |
392 | ||
374ca955 A |
393 | static int32_t |
394 | _internal_toUnicode(const UChar* src, int32_t srcLength, | |
395 | UChar* dest, int32_t destCapacity, | |
396 | int32_t options, | |
397 | UStringPrepProfile* nameprep, | |
398 | UParseError* parseError, | |
46f4442e A |
399 | UErrorCode* status) |
400 | { | |
b75a7d8f A |
401 | |
402 | //get the options | |
46f4442e | 403 | //UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0); |
374ca955 | 404 | int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0; |
46f4442e A |
405 | |
406 | // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too. | |
b75a7d8f A |
407 | UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE]; |
408 | ||
409 | //initialize pointers to stack buffers | |
410 | UChar *b1 = b1Stack, *b2 = b2Stack, *b1Prime=NULL, *b3=b3Stack; | |
b331163b | 411 | int32_t b1Len = 0, b2Len, b1PrimeLen, b3Len, |
b75a7d8f A |
412 | b1Capacity = MAX_LABEL_BUFFER_SIZE, |
413 | b2Capacity = MAX_LABEL_BUFFER_SIZE, | |
414 | b3Capacity = MAX_LABEL_BUFFER_SIZE, | |
415 | reqLength=0; | |
374ca955 | 416 | |
b75a7d8f A |
417 | UBool* caseFlags = NULL; |
418 | ||
419 | UBool srcIsASCII = TRUE; | |
46f4442e A |
420 | /*UBool srcIsLDH = TRUE; |
421 | int32_t failPos =0;*/ | |
b75a7d8f | 422 | |
b75a7d8f A |
423 | // step 1: find out if all the codepoints in src are ASCII |
424 | if(srcLength==-1){ | |
425 | srcLength = 0; | |
426 | for(;src[srcLength]!=0;){ | |
427 | if(src[srcLength]> 0x7f){ | |
428 | srcIsASCII = FALSE; | |
46f4442e | 429 | }/*else if(isLDHChar(src[srcLength])==FALSE){ |
374ca955 A |
430 | // here we do not assemble surrogates |
431 | // since we know that LDH code points | |
432 | // are in the ASCII range only | |
b75a7d8f A |
433 | srcIsLDH = FALSE; |
434 | failPos = srcLength; | |
46f4442e | 435 | }*/ |
b75a7d8f A |
436 | srcLength++; |
437 | } | |
374ca955 | 438 | }else if(srcLength > 0){ |
b75a7d8f A |
439 | for(int32_t j=0; j<srcLength; j++){ |
440 | if(src[j]> 0x7f){ | |
441 | srcIsASCII = FALSE; | |
46f4442e | 442 | }/*else if(isLDHChar(src[j])==FALSE){ |
374ca955 A |
443 | // here we do not assemble surrogates |
444 | // since we know that LDH code points | |
445 | // are in the ASCII range only | |
b75a7d8f A |
446 | srcIsLDH = FALSE; |
447 | failPos = j; | |
46f4442e | 448 | }*/ |
b75a7d8f | 449 | } |
374ca955 A |
450 | }else{ |
451 | return 0; | |
b75a7d8f | 452 | } |
46f4442e | 453 | |
b75a7d8f A |
454 | if(srcIsASCII == FALSE){ |
455 | // step 2: process the string | |
374ca955 | 456 | b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status); |
b75a7d8f A |
457 | if(*status == U_BUFFER_OVERFLOW_ERROR){ |
458 | // redo processing of string | |
459 | /* we do not have enough room so grow the buffer*/ | |
460 | b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
461 | if(b1==NULL){ | |
462 | *status = U_MEMORY_ALLOCATION_ERROR; | |
463 | goto CLEANUP; | |
464 | } | |
465 | ||
466 | *status = U_ZERO_ERROR; // reset error | |
467 | ||
374ca955 | 468 | b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status); |
b75a7d8f A |
469 | } |
470 | //bail out on error | |
471 | if(U_FAILURE(*status)){ | |
472 | goto CLEANUP; | |
473 | } | |
474 | }else{ | |
475 | ||
476 | //just point src to b1 | |
477 | b1 = (UChar*) src; | |
478 | b1Len = srcLength; | |
479 | } | |
480 | ||
46f4442e A |
481 | // The RFC states that |
482 | // <quote> | |
483 | // ToUnicode never fails. If any step fails, then the original input | |
484 | // is returned immediately in that step. | |
485 | // </quote> | |
486 | ||
b75a7d8f | 487 | //step 3: verify ACE Prefix |
46f4442e A |
488 | if(startsWithPrefix(b1,b1Len)){ |
489 | ||
b75a7d8f A |
490 | //step 4: Remove the ACE Prefix |
491 | b1Prime = b1 + ACE_PREFIX_LENGTH; | |
492 | b1PrimeLen = b1Len - ACE_PREFIX_LENGTH; | |
493 | ||
494 | //step 5: Decode using punycode | |
495 | b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Capacity, caseFlags,status); | |
46f4442e | 496 | |
b75a7d8f A |
497 | if(*status == U_BUFFER_OVERFLOW_ERROR){ |
498 | // redo processing of string | |
499 | /* we do not have enough room so grow the buffer*/ | |
500 | b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); | |
501 | if(b2==NULL){ | |
502 | *status = U_MEMORY_ALLOCATION_ERROR; | |
503 | goto CLEANUP; | |
504 | } | |
505 | ||
506 | *status = U_ZERO_ERROR; // reset error | |
46f4442e | 507 | |
b75a7d8f | 508 | b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Len, caseFlags, status); |
b75a7d8f | 509 | } |
46f4442e A |
510 | |
511 | ||
b75a7d8f | 512 | //step 6:Apply toASCII |
46f4442e A |
513 | b3Len = uidna_toASCII(b2, b2Len, b3, b3Capacity, options, parseError, status); |
514 | ||
b75a7d8f A |
515 | if(*status == U_BUFFER_OVERFLOW_ERROR){ |
516 | // redo processing of string | |
517 | /* we do not have enough room so grow the buffer*/ | |
518 | b3 = (UChar*) uprv_malloc(b3Len * U_SIZEOF_UCHAR); | |
519 | if(b3==NULL){ | |
520 | *status = U_MEMORY_ALLOCATION_ERROR; | |
521 | goto CLEANUP; | |
522 | } | |
523 | ||
524 | *status = U_ZERO_ERROR; // reset error | |
46f4442e | 525 | |
b75a7d8f | 526 | b3Len = uidna_toASCII(b2,b2Len,b3,b3Len,options,parseError, status); |
46f4442e | 527 | |
b75a7d8f A |
528 | } |
529 | //bail out on error | |
530 | if(U_FAILURE(*status)){ | |
531 | goto CLEANUP; | |
532 | } | |
533 | ||
534 | //step 7: verify | |
535 | if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){ | |
46f4442e A |
536 | // Cause the original to be returned. |
537 | *status = U_IDNA_VERIFICATION_ERROR; | |
b75a7d8f A |
538 | goto CLEANUP; |
539 | } | |
540 | ||
541 | //step 8: return output of step 5 | |
542 | reqLength = b2Len; | |
543 | if(b2Len <= destCapacity) { | |
a62d09fc | 544 | u_memmove(dest, b2, b2Len); |
b75a7d8f | 545 | } |
46f4442e A |
546 | } |
547 | else{ | |
548 | // See the start of this if statement for why this is commented out. | |
b75a7d8f | 549 | // verify that STD3 ASCII rules are satisfied |
46f4442e A |
550 | /*if(useSTD3ASCIIRules == TRUE){ |
551 | if( srcIsLDH == FALSE // source contains some non-LDH characters | |
b75a7d8f A |
552 | || src[0] == HYPHEN || src[srcLength-1] == HYPHEN){ |
553 | *status = U_IDNA_STD3_ASCII_RULES_ERROR; | |
554 | ||
46f4442e | 555 | // populate the parseError struct |
b75a7d8f A |
556 | if(srcIsLDH==FALSE){ |
557 | // failPos is always set the index of failure | |
558 | uprv_syntaxError(src,failPos, srcLength,parseError); | |
559 | }else if(src[0] == HYPHEN){ | |
560 | // fail position is 0 | |
561 | uprv_syntaxError(src,0,srcLength,parseError); | |
562 | }else{ | |
563 | // the last index in the source is always length-1 | |
564 | uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError); | |
565 | } | |
566 | ||
567 | goto CLEANUP; | |
568 | } | |
46f4442e A |
569 | }*/ |
570 | // just return the source | |
b75a7d8f A |
571 | //copy the source to destination |
572 | if(srcLength <= destCapacity){ | |
a62d09fc | 573 | u_memmove(dest, src, srcLength); |
b75a7d8f A |
574 | } |
575 | reqLength = srcLength; | |
576 | } | |
577 | ||
46f4442e | 578 | |
b75a7d8f A |
579 | CLEANUP: |
580 | ||
581 | if(b1 != b1Stack && b1!=src){ | |
582 | uprv_free(b1); | |
583 | } | |
584 | if(b2 != b2Stack){ | |
585 | uprv_free(b2); | |
586 | } | |
587 | uprv_free(caseFlags); | |
46f4442e | 588 | |
b75a7d8f A |
589 | // The RFC states that |
590 | // <quote> | |
591 | // ToUnicode never fails. If any step fails, then the original input | |
592 | // is returned immediately in that step. | |
593 | // </quote> | |
594 | // So if any step fails lets copy source to destination | |
595 | if(U_FAILURE(*status)){ | |
596 | //copy the source to destination | |
597 | if(dest && srcLength <= destCapacity){ | |
46f4442e A |
598 | // srcLength should have already been set earlier. |
599 | U_ASSERT(srcLength >= 0); | |
a62d09fc | 600 | u_memmove(dest, src, srcLength); |
b75a7d8f A |
601 | } |
602 | reqLength = srcLength; | |
46f4442e | 603 | *status = U_ZERO_ERROR; |
b75a7d8f A |
604 | } |
605 | ||
606 | return u_terminateUChars(dest, destCapacity, reqLength, status); | |
607 | } | |
608 | ||
374ca955 A |
609 | U_CAPI int32_t U_EXPORT2 |
610 | uidna_toASCII(const UChar* src, int32_t srcLength, | |
611 | UChar* dest, int32_t destCapacity, | |
612 | int32_t options, | |
613 | UParseError* parseError, | |
614 | UErrorCode* status){ | |
615 | ||
616 | if(status == NULL || U_FAILURE(*status)){ | |
617 | return 0; | |
618 | } | |
619 | if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ | |
620 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
621 | return 0; | |
622 | } | |
b75a7d8f | 623 | |
729e4ab9 | 624 | UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); |
374ca955 A |
625 | |
626 | if(U_FAILURE(*status)){ | |
627 | return -1; | |
b75a7d8f | 628 | } |
374ca955 A |
629 | |
630 | int32_t retLen = _internal_toASCII(src, srcLength, dest, destCapacity, options, nameprep, parseError, status); | |
631 | ||
632 | /* close the profile*/ | |
633 | usprep_close(nameprep); | |
634 | ||
635 | return retLen; | |
b75a7d8f A |
636 | } |
637 | ||
374ca955 A |
638 | U_CAPI int32_t U_EXPORT2 |
639 | uidna_toUnicode(const UChar* src, int32_t srcLength, | |
640 | UChar* dest, int32_t destCapacity, | |
641 | int32_t options, | |
642 | UParseError* parseError, | |
643 | UErrorCode* status){ | |
644 | ||
645 | if(status == NULL || U_FAILURE(*status)){ | |
646 | return 0; | |
647 | } | |
648 | if( (src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ | |
649 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
650 | return 0; | |
651 | } | |
46f4442e | 652 | |
729e4ab9 | 653 | UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); |
374ca955 A |
654 | |
655 | if(U_FAILURE(*status)){ | |
656 | return -1; | |
657 | } | |
658 | ||
659 | int32_t retLen = _internal_toUnicode(src, srcLength, dest, destCapacity, options, nameprep, parseError, status); | |
660 | ||
661 | usprep_close(nameprep); | |
662 | ||
663 | return retLen; | |
664 | } | |
665 | ||
666 | ||
b75a7d8f A |
667 | U_CAPI int32_t U_EXPORT2 |
668 | uidna_IDNToASCII( const UChar *src, int32_t srcLength, | |
669 | UChar* dest, int32_t destCapacity, | |
670 | int32_t options, | |
671 | UParseError *parseError, | |
672 | UErrorCode *status){ | |
673 | ||
674 | if(status == NULL || U_FAILURE(*status)){ | |
675 | return 0; | |
676 | } | |
677 | if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ | |
678 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
679 | return 0; | |
680 | } | |
681 | ||
682 | int32_t reqLength = 0; | |
683 | ||
729e4ab9 | 684 | UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); |
b75a7d8f A |
685 | |
686 | if(U_FAILURE(*status)){ | |
687 | return 0; | |
688 | } | |
689 | ||
690 | //initialize pointers | |
691 | UChar *delimiter = (UChar*)src; | |
692 | UChar *labelStart = (UChar*)src; | |
693 | UChar *currentDest = (UChar*) dest; | |
694 | int32_t remainingLen = srcLength; | |
695 | int32_t remainingDestCapacity = destCapacity; | |
696 | int32_t labelLen = 0, labelReqLength = 0; | |
697 | UBool done = FALSE; | |
698 | ||
699 | ||
700 | for(;;){ | |
701 | ||
374ca955 | 702 | labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done); |
73c04bcf A |
703 | labelReqLength = 0; |
704 | if(!(labelLen==0 && done)){// make sure this is not a root label separator. | |
b75a7d8f | 705 | |
73c04bcf A |
706 | labelReqLength = _internal_toASCII( labelStart, labelLen, |
707 | currentDest, remainingDestCapacity, | |
708 | options, nameprep, | |
709 | parseError, status); | |
710 | ||
711 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
712 | ||
713 | *status = U_ZERO_ERROR; // reset error | |
714 | remainingDestCapacity = 0; | |
715 | } | |
b75a7d8f A |
716 | } |
717 | ||
718 | ||
719 | if(U_FAILURE(*status)){ | |
720 | break; | |
721 | } | |
722 | ||
723 | reqLength +=labelReqLength; | |
724 | // adjust the destination pointer | |
725 | if(labelReqLength < remainingDestCapacity){ | |
726 | currentDest = currentDest + labelReqLength; | |
727 | remainingDestCapacity -= labelReqLength; | |
728 | }else{ | |
729 | // should never occur | |
730 | remainingDestCapacity = 0; | |
731 | } | |
46f4442e | 732 | |
b75a7d8f A |
733 | if(done == TRUE){ |
734 | break; | |
735 | } | |
736 | ||
737 | // add the label separator | |
738 | if(remainingDestCapacity > 0){ | |
739 | *currentDest++ = FULL_STOP; | |
740 | remainingDestCapacity--; | |
741 | } | |
46f4442e | 742 | reqLength++; |
b75a7d8f A |
743 | |
744 | labelStart = delimiter; | |
745 | if(remainingLen >0 ){ | |
73c04bcf | 746 | remainingLen = (int32_t)(srcLength - (delimiter - src)); |
b75a7d8f A |
747 | } |
748 | ||
749 | } | |
46f4442e A |
750 | |
751 | if(reqLength > MAX_DOMAIN_NAME_LENGTH){ | |
752 | *status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR; | |
753 | } | |
754 | ||
374ca955 | 755 | usprep_close(nameprep); |
b75a7d8f A |
756 | |
757 | return u_terminateUChars(dest, destCapacity, reqLength, status); | |
758 | } | |
759 | ||
760 | U_CAPI int32_t U_EXPORT2 | |
761 | uidna_IDNToUnicode( const UChar* src, int32_t srcLength, | |
762 | UChar* dest, int32_t destCapacity, | |
763 | int32_t options, | |
764 | UParseError* parseError, | |
765 | UErrorCode* status){ | |
766 | ||
767 | if(status == NULL || U_FAILURE(*status)){ | |
768 | return 0; | |
769 | } | |
770 | if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ | |
771 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
772 | return 0; | |
773 | } | |
774 | ||
775 | int32_t reqLength = 0; | |
776 | ||
729e4ab9 | 777 | UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); |
b75a7d8f A |
778 | |
779 | if(U_FAILURE(*status)){ | |
780 | return 0; | |
781 | } | |
782 | ||
783 | //initialize pointers | |
784 | UChar *delimiter = (UChar*)src; | |
785 | UChar *labelStart = (UChar*)src; | |
786 | UChar *currentDest = (UChar*) dest; | |
787 | int32_t remainingLen = srcLength; | |
788 | int32_t remainingDestCapacity = destCapacity; | |
789 | int32_t labelLen = 0, labelReqLength = 0; | |
790 | UBool done = FALSE; | |
791 | ||
b75a7d8f A |
792 | for(;;){ |
793 | ||
374ca955 | 794 | labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done); |
b75a7d8f | 795 | |
46f4442e A |
796 | // The RFC states that |
797 | // <quote> | |
798 | // ToUnicode never fails. If any step fails, then the original input | |
799 | // is returned immediately in that step. | |
800 | // </quote> | |
801 | // _internal_toUnicode will copy the label. | |
802 | /*if(labelLen==0 && done==FALSE){ | |
73c04bcf | 803 | *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; |
46f4442e A |
804 | break; |
805 | }*/ | |
806 | ||
374ca955 A |
807 | labelReqLength = _internal_toUnicode(labelStart, labelLen, |
808 | currentDest, remainingDestCapacity, | |
809 | options, nameprep, | |
810 | parseError, status); | |
b75a7d8f A |
811 | |
812 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
b75a7d8f A |
813 | *status = U_ZERO_ERROR; // reset error |
814 | remainingDestCapacity = 0; | |
815 | } | |
816 | ||
b75a7d8f A |
817 | if(U_FAILURE(*status)){ |
818 | break; | |
819 | } | |
820 | ||
821 | reqLength +=labelReqLength; | |
822 | // adjust the destination pointer | |
823 | if(labelReqLength < remainingDestCapacity){ | |
824 | currentDest = currentDest + labelReqLength; | |
825 | remainingDestCapacity -= labelReqLength; | |
826 | }else{ | |
827 | // should never occur | |
828 | remainingDestCapacity = 0; | |
829 | } | |
830 | ||
831 | if(done == TRUE){ | |
832 | break; | |
833 | } | |
834 | ||
835 | // add the label separator | |
46f4442e | 836 | // Unlike the ToASCII operation we don't normalize the label separators |
b75a7d8f | 837 | if(remainingDestCapacity > 0){ |
46f4442e | 838 | *currentDest++ = *(labelStart + labelLen); |
b75a7d8f A |
839 | remainingDestCapacity--; |
840 | } | |
46f4442e | 841 | reqLength++; |
b75a7d8f A |
842 | |
843 | labelStart = delimiter; | |
844 | if(remainingLen >0 ){ | |
73c04bcf | 845 | remainingLen = (int32_t)(srcLength - (delimiter - src)); |
b75a7d8f A |
846 | } |
847 | ||
848 | } | |
46f4442e A |
849 | |
850 | if(reqLength > MAX_DOMAIN_NAME_LENGTH){ | |
851 | *status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR; | |
852 | } | |
853 | ||
374ca955 | 854 | usprep_close(nameprep); |
b75a7d8f A |
855 | |
856 | return u_terminateUChars(dest, destCapacity, reqLength, status); | |
857 | } | |
858 | ||
859 | U_CAPI int32_t U_EXPORT2 | |
860 | uidna_compare( const UChar *s1, int32_t length1, | |
861 | const UChar *s2, int32_t length2, | |
862 | int32_t options, | |
863 | UErrorCode* status){ | |
864 | ||
865 | if(status == NULL || U_FAILURE(*status)){ | |
866 | return -1; | |
867 | } | |
868 | ||
869 | UChar b1Stack[MAX_IDN_BUFFER_SIZE], b2Stack[MAX_IDN_BUFFER_SIZE]; | |
870 | UChar *b1 = b1Stack, *b2 = b2Stack; | |
871 | int32_t b1Len, b2Len, b1Capacity = MAX_IDN_BUFFER_SIZE, b2Capacity = MAX_IDN_BUFFER_SIZE; | |
872 | int32_t result=-1; | |
873 | ||
874 | UParseError parseError; | |
875 | ||
876 | b1Len = uidna_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, status); | |
877 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
878 | // redo processing of string | |
879 | b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
880 | if(b1==NULL){ | |
881 | *status = U_MEMORY_ALLOCATION_ERROR; | |
882 | goto CLEANUP; | |
883 | } | |
884 | ||
885 | *status = U_ZERO_ERROR; // reset error | |
886 | ||
887 | b1Len = uidna_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, status); | |
888 | ||
889 | } | |
890 | ||
891 | b2Len = uidna_IDNToASCII(s2,length2, b2,b2Capacity, options, &parseError, status); | |
892 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
893 | // redo processing of string | |
894 | b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); | |
895 | if(b2==NULL){ | |
896 | *status = U_MEMORY_ALLOCATION_ERROR; | |
897 | goto CLEANUP; | |
898 | } | |
899 | ||
900 | *status = U_ZERO_ERROR; // reset error | |
901 | ||
902 | b2Len = uidna_IDNToASCII(s2, length2, b2, b2Len, options, &parseError, status); | |
903 | ||
904 | } | |
905 | // when toASCII is applied all label separators are replaced with FULL_STOP | |
906 | result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len); | |
907 | ||
908 | CLEANUP: | |
909 | if(b1 != b1Stack){ | |
910 | uprv_free(b1); | |
911 | } | |
912 | ||
913 | if(b2 != b2Stack){ | |
914 | uprv_free(b2); | |
915 | } | |
916 | ||
917 | return result; | |
918 | } | |
919 | ||
920 | #endif /* #if !UCONFIG_NO_IDNA */ |