]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ******************************************************************************* | |
3 | * | |
729e4ab9 | 4 | * Copyright (C) 2003-2009, International Business Machines |
b75a7d8f A |
5 | * Corporation and others. All Rights Reserved. |
6 | * | |
7 | ******************************************************************************* | |
8 | * file name: uidna.cpp | |
9 | * encoding: US-ASCII | |
10 | * tab size: 8 (not used) | |
11 | * indentation:4 | |
12 | * | |
13 | * created on: 2003feb1 | |
14 | * created by: Ram Viswanadha | |
15 | */ | |
16 | ||
17 | #include "unicode/utypes.h" | |
18 | ||
19 | #if !UCONFIG_NO_IDNA | |
20 | ||
21 | #include "unicode/uidna.h" | |
22 | #include "unicode/ustring.h" | |
374ca955 | 23 | #include "unicode/usprep.h" |
b75a7d8f A |
24 | #include "punycode.h" |
25 | #include "ustr_imp.h" | |
26 | #include "cmemory.h" | |
46f4442e | 27 | #include "uassert.h" |
b75a7d8f A |
28 | #include "sprpimpl.h" |
29 | ||
30 | /* it is official IDNA ACE Prefix is "xn--" */ | |
31 | static const UChar ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ; | |
32 | #define ACE_PREFIX_LENGTH 4 | |
33 | ||
34 | #define MAX_LABEL_LENGTH 63 | |
46f4442e A |
35 | /* The Max length of the labels should not be more than MAX_LABEL_LENGTH */ |
36 | #define MAX_LABEL_BUFFER_SIZE 100 | |
37 | ||
38 | #define MAX_DOMAIN_NAME_LENGTH 255 | |
39 | /* The Max length of the domain names should not be more than MAX_DOMAIN_NAME_LENGTH */ | |
40 | #define MAX_IDN_BUFFER_SIZE MAX_DOMAIN_NAME_LENGTH+1 | |
b75a7d8f | 41 | |
b75a7d8f | 42 | #define LOWER_CASE_DELTA 0x0020 |
46f4442e | 43 | #define HYPHEN 0x002D |
b75a7d8f | 44 | #define FULL_STOP 0x002E |
46f4442e A |
45 | #define CAPITAL_A 0x0041 |
46 | #define CAPITAL_Z 0x005A | |
47 | ||
b75a7d8f A |
48 | inline static UChar |
49 | toASCIILower(UChar ch){ | |
50 | if(CAPITAL_A <= ch && ch <= CAPITAL_Z){ | |
51 | return ch + LOWER_CASE_DELTA; | |
52 | } | |
53 | return ch; | |
54 | } | |
55 | ||
56 | inline static UBool | |
57 | startsWithPrefix(const UChar* src , int32_t srcLength){ | |
58 | UBool startsWithPrefix = TRUE; | |
59 | ||
60 | if(srcLength < ACE_PREFIX_LENGTH){ | |
61 | return FALSE; | |
62 | } | |
63 | ||
64 | for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){ | |
65 | if(toASCIILower(src[i]) != ACE_PREFIX[i]){ | |
66 | startsWithPrefix = FALSE; | |
67 | } | |
68 | } | |
69 | return startsWithPrefix; | |
70 | } | |
71 | ||
b75a7d8f A |
72 | |
73 | inline static int32_t | |
74 | compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len, | |
75 | const UChar* s2, int32_t s2Len){ | |
76 | ||
77 | int32_t minLength; | |
78 | int32_t lengthResult; | |
79 | ||
80 | // are we comparing different lengths? | |
81 | if(s1Len != s2Len) { | |
82 | if(s1Len < s2Len) { | |
83 | minLength = s1Len; | |
84 | lengthResult = -1; | |
85 | } else { | |
86 | minLength = s2Len; | |
87 | lengthResult = 1; | |
88 | } | |
89 | } else { | |
90 | // ok the lengths are equal | |
91 | minLength = s1Len; | |
92 | lengthResult = 0; | |
93 | } | |
94 | ||
95 | UChar c1,c2; | |
96 | int32_t rc; | |
97 | ||
98 | for(int32_t i =0;/* no condition */;i++) { | |
99 | ||
100 | /* If we reach the ends of both strings then they match */ | |
101 | if(i == minLength) { | |
102 | return lengthResult; | |
103 | } | |
104 | ||
105 | c1 = s1[i]; | |
106 | c2 = s2[i]; | |
107 | ||
108 | /* Case-insensitive comparison */ | |
109 | if(c1!=c2) { | |
110 | rc=(int32_t)toASCIILower(c1)-(int32_t)toASCIILower(c2); | |
111 | if(rc!=0) { | |
112 | lengthResult=rc; | |
113 | break; | |
114 | } | |
115 | } | |
116 | } | |
117 | return lengthResult; | |
118 | } | |
119 | ||
120 | ||
374ca955 A |
121 | /** |
122 | * Ascertain if the given code point is a label separator as | |
123 | * defined by the IDNA RFC | |
124 | * | |
125 | * @param ch The code point to be ascertained | |
126 | * @return true if the char is a label separator | |
73c04bcf | 127 | * @stable ICU 2.8 |
374ca955 A |
128 | */ |
129 | static inline UBool isLabelSeparator(UChar ch){ | |
130 | switch(ch){ | |
131 | case 0x002e: | |
132 | case 0x3002: | |
133 | case 0xFF0E: | |
134 | case 0xFF61: | |
135 | return TRUE; | |
136 | default: | |
137 | return FALSE; | |
b75a7d8f | 138 | } |
374ca955 A |
139 | } |
140 | ||
141 | // returns the length of the label excluding the separator | |
142 | // if *limit == separator then the length returned does not include | |
143 | // the separtor. | |
144 | static inline int32_t | |
46f4442e A |
145 | getNextSeparator(UChar *src, int32_t srcLength, |
146 | UChar **limit, UBool *done){ | |
374ca955 A |
147 | if(srcLength == -1){ |
148 | int32_t i; | |
149 | for(i=0 ; ;i++){ | |
150 | if(src[i] == 0){ | |
151 | *limit = src + i; // point to null | |
152 | *done = TRUE; | |
153 | return i; | |
154 | } | |
155 | if(isLabelSeparator(src[i])){ | |
156 | *limit = src + (i+1); // go past the delimiter | |
157 | return i; | |
158 | ||
159 | } | |
160 | } | |
161 | }else{ | |
162 | int32_t i; | |
163 | for(i=0;i<srcLength;i++){ | |
164 | if(isLabelSeparator(src[i])){ | |
165 | *limit = src + (i+1); // go past the delimiter | |
166 | return i; | |
167 | } | |
168 | } | |
169 | // we have not found the delimiter | |
170 | // if(i==srcLength) | |
171 | *limit = src+srcLength; | |
172 | *done = TRUE; | |
173 | ||
174 | return i; | |
b75a7d8f | 175 | } |
374ca955 A |
176 | } |
177 | static inline UBool isLDHChar(UChar ch){ | |
178 | // high runner case | |
179 | if(ch>0x007A){ | |
180 | return FALSE; | |
181 | } | |
182 | //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A] | |
183 | if( (ch==0x002D) || | |
184 | (0x0030 <= ch && ch <= 0x0039) || | |
185 | (0x0041 <= ch && ch <= 0x005A) || | |
186 | (0x0061 <= ch && ch <= 0x007A) | |
187 | ){ | |
188 | return TRUE; | |
189 | } | |
190 | return FALSE; | |
191 | } | |
192 | ||
193 | static int32_t | |
194 | _internal_toASCII(const UChar* src, int32_t srcLength, | |
195 | UChar* dest, int32_t destCapacity, | |
196 | int32_t options, | |
197 | UStringPrepProfile* nameprep, | |
198 | UParseError* parseError, | |
46f4442e A |
199 | UErrorCode* status) |
200 | { | |
374ca955 | 201 | |
46f4442e | 202 | // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too. |
b75a7d8f A |
203 | UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE]; |
204 | //initialize pointers to stack buffers | |
205 | UChar *b1 = b1Stack, *b2 = b2Stack; | |
73c04bcf | 206 | int32_t b1Len=0, b2Len, |
b75a7d8f A |
207 | b1Capacity = MAX_LABEL_BUFFER_SIZE, |
208 | b2Capacity = MAX_LABEL_BUFFER_SIZE , | |
209 | reqLength=0; | |
210 | ||
374ca955 | 211 | int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0; |
b75a7d8f A |
212 | UBool* caseFlags = NULL; |
213 | ||
214 | // the source contains all ascii codepoints | |
215 | UBool srcIsASCII = TRUE; | |
216 | // assume the source contains all LDH codepoints | |
217 | UBool srcIsLDH = TRUE; | |
218 | ||
219 | int32_t j=0; | |
220 | ||
221 | //get the options | |
b75a7d8f | 222 | UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0); |
374ca955 | 223 | |
b75a7d8f | 224 | int32_t failPos = -1; |
b75a7d8f | 225 | |
73c04bcf A |
226 | if(srcLength == -1){ |
227 | srcLength = u_strlen(src); | |
228 | } | |
229 | ||
230 | if(srcLength > b1Capacity){ | |
231 | b1 = (UChar*) uprv_malloc(srcLength * U_SIZEOF_UCHAR); | |
b75a7d8f A |
232 | if(b1==NULL){ |
233 | *status = U_MEMORY_ALLOCATION_ERROR; | |
234 | goto CLEANUP; | |
235 | } | |
73c04bcf A |
236 | b1Capacity = srcLength; |
237 | } | |
b75a7d8f | 238 | |
73c04bcf A |
239 | // step 1 |
240 | for( j=0;j<srcLength;j++){ | |
241 | if(src[j] > 0x7F){ | |
242 | srcIsASCII = FALSE; | |
243 | } | |
244 | b1[b1Len++] = src[j]; | |
245 | } | |
246 | ||
247 | // step 2 is performed only if the source contains non ASCII | |
248 | if(srcIsASCII == FALSE){ | |
b75a7d8f | 249 | |
73c04bcf A |
250 | // step 2 |
251 | b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status); | |
252 | ||
253 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
254 | // redo processing of string | |
255 | // we do not have enough room so grow the buffer | |
256 | if(b1 != b1Stack){ | |
257 | uprv_free(b1); | |
258 | } | |
259 | b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
260 | if(b1==NULL){ | |
261 | *status = U_MEMORY_ALLOCATION_ERROR; | |
262 | goto CLEANUP; | |
263 | } | |
264 | ||
265 | *status = U_ZERO_ERROR; // reset error | |
266 | ||
267 | b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status); | |
268 | } | |
b75a7d8f A |
269 | } |
270 | // error bail out | |
271 | if(U_FAILURE(*status)){ | |
272 | goto CLEANUP; | |
273 | } | |
73c04bcf A |
274 | if(b1Len == 0){ |
275 | *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; | |
276 | goto CLEANUP; | |
277 | } | |
b75a7d8f | 278 | |
73c04bcf A |
279 | // for step 3 & 4 |
280 | srcIsASCII = TRUE; | |
b75a7d8f | 281 | for( j=0;j<b1Len;j++){ |
73c04bcf | 282 | // check if output of usprep_prepare is all ASCII |
b75a7d8f A |
283 | if(b1[j] > 0x7F){ |
284 | srcIsASCII = FALSE; | |
374ca955 | 285 | }else if(isLDHChar(b1[j])==FALSE){ // if the char is in ASCII range verify that it is an LDH character |
b75a7d8f A |
286 | srcIsLDH = FALSE; |
287 | failPos = j; | |
288 | } | |
289 | } | |
b75a7d8f A |
290 | if(useSTD3ASCIIRules == TRUE){ |
291 | // verify 3a and 3b | |
374ca955 A |
292 | // 3(a) Verify the absence of non-LDH ASCII code points; that is, the |
293 | // absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F. | |
294 | // 3(b) Verify the absence of leading and trailing hyphen-minus; that | |
295 | // is, the absence of U+002D at the beginning and end of the | |
296 | // sequence. | |
297 | if( srcIsLDH == FALSE /* source at this point should not contain anyLDH characters */ | |
b75a7d8f A |
298 | || b1[0] == HYPHEN || b1[b1Len-1] == HYPHEN){ |
299 | *status = U_IDNA_STD3_ASCII_RULES_ERROR; | |
300 | ||
301 | /* populate the parseError struct */ | |
302 | if(srcIsLDH==FALSE){ | |
303 | // failPos is always set the index of failure | |
304 | uprv_syntaxError(b1,failPos, b1Len,parseError); | |
305 | }else if(b1[0] == HYPHEN){ | |
306 | // fail position is 0 | |
307 | uprv_syntaxError(b1,0,b1Len,parseError); | |
308 | }else{ | |
309 | // the last index in the source is always length-1 | |
310 | uprv_syntaxError(b1, (b1Len>0) ? b1Len-1 : b1Len, b1Len,parseError); | |
311 | } | |
312 | ||
313 | goto CLEANUP; | |
314 | } | |
315 | } | |
73c04bcf | 316 | // Step 4: if the source is ASCII then proceed to step 8 |
b75a7d8f A |
317 | if(srcIsASCII){ |
318 | if(b1Len <= destCapacity){ | |
319 | uprv_memmove(dest, b1, b1Len * U_SIZEOF_UCHAR); | |
320 | reqLength = b1Len; | |
321 | }else{ | |
322 | reqLength = b1Len; | |
323 | goto CLEANUP; | |
324 | } | |
325 | }else{ | |
326 | // step 5 : verify the sequence does not begin with ACE prefix | |
327 | if(!startsWithPrefix(b1,b1Len)){ | |
328 | ||
329 | //step 6: encode the sequence with punycode | |
330 | ||
331 | // do not preserve the case flags for now! | |
332 | // TODO: Preserve the case while implementing the RFE | |
333 | // caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool)); | |
334 | // uprv_memset(caseFlags,TRUE,b1Len); | |
335 | ||
336 | b2Len = u_strToPunycode(b1,b1Len,b2,b2Capacity,caseFlags, status); | |
337 | ||
338 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
339 | // redo processing of string | |
340 | /* we do not have enough room so grow the buffer*/ | |
341 | b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); | |
342 | if(b2 == NULL){ | |
343 | *status = U_MEMORY_ALLOCATION_ERROR; | |
344 | goto CLEANUP; | |
345 | } | |
346 | ||
347 | *status = U_ZERO_ERROR; // reset error | |
348 | ||
349 | b2Len = u_strToPunycode(b1,b1Len,b2,b2Len,caseFlags, status); | |
350 | } | |
351 | //error bail out | |
352 | if(U_FAILURE(*status)){ | |
353 | goto CLEANUP; | |
354 | } | |
355 | // TODO : Reconsider while implementing the case preserve RFE | |
356 | // convert all codepoints to lower case ASCII | |
357 | // toASCIILower(b2,b2Len); | |
358 | reqLength = b2Len+ACE_PREFIX_LENGTH; | |
359 | ||
360 | if(reqLength > destCapacity){ | |
361 | *status = U_BUFFER_OVERFLOW_ERROR; | |
362 | goto CLEANUP; | |
363 | } | |
364 | //Step 7: prepend the ACE prefix | |
365 | uprv_memcpy(dest,ACE_PREFIX,ACE_PREFIX_LENGTH * U_SIZEOF_UCHAR); | |
366 | //Step 6: copy the contents in b2 into dest | |
367 | uprv_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len * U_SIZEOF_UCHAR); | |
368 | ||
369 | }else{ | |
370 | *status = U_IDNA_ACE_PREFIX_ERROR; | |
371 | //position of failure is 0 | |
372 | uprv_syntaxError(b1,0,b1Len,parseError); | |
373 | goto CLEANUP; | |
374 | } | |
375 | } | |
46f4442e | 376 | // step 8: verify the length of label |
b75a7d8f A |
377 | if(reqLength > MAX_LABEL_LENGTH){ |
378 | *status = U_IDNA_LABEL_TOO_LONG_ERROR; | |
379 | } | |
380 | ||
381 | CLEANUP: | |
382 | if(b1 != b1Stack){ | |
383 | uprv_free(b1); | |
384 | } | |
385 | if(b2 != b2Stack){ | |
386 | uprv_free(b2); | |
387 | } | |
388 | uprv_free(caseFlags); | |
389 | ||
b75a7d8f A |
390 | return u_terminateUChars(dest, destCapacity, reqLength, status); |
391 | } | |
392 | ||
374ca955 A |
393 | static int32_t |
394 | _internal_toUnicode(const UChar* src, int32_t srcLength, | |
395 | UChar* dest, int32_t destCapacity, | |
396 | int32_t options, | |
397 | UStringPrepProfile* nameprep, | |
398 | UParseError* parseError, | |
46f4442e A |
399 | UErrorCode* status) |
400 | { | |
b75a7d8f A |
401 | |
402 | //get the options | |
46f4442e | 403 | //UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0); |
374ca955 | 404 | int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0; |
46f4442e A |
405 | |
406 | // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too. | |
b75a7d8f A |
407 | UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE]; |
408 | ||
409 | //initialize pointers to stack buffers | |
410 | UChar *b1 = b1Stack, *b2 = b2Stack, *b1Prime=NULL, *b3=b3Stack; | |
411 | int32_t b1Len, b2Len, b1PrimeLen, b3Len, | |
412 | b1Capacity = MAX_LABEL_BUFFER_SIZE, | |
413 | b2Capacity = MAX_LABEL_BUFFER_SIZE, | |
414 | b3Capacity = MAX_LABEL_BUFFER_SIZE, | |
415 | reqLength=0; | |
374ca955 | 416 | |
b75a7d8f A |
417 | b1Len = 0; |
418 | UBool* caseFlags = NULL; | |
419 | ||
420 | UBool srcIsASCII = TRUE; | |
46f4442e A |
421 | /*UBool srcIsLDH = TRUE; |
422 | int32_t failPos =0;*/ | |
b75a7d8f | 423 | |
b75a7d8f A |
424 | // step 1: find out if all the codepoints in src are ASCII |
425 | if(srcLength==-1){ | |
426 | srcLength = 0; | |
427 | for(;src[srcLength]!=0;){ | |
428 | if(src[srcLength]> 0x7f){ | |
429 | srcIsASCII = FALSE; | |
46f4442e | 430 | }/*else if(isLDHChar(src[srcLength])==FALSE){ |
374ca955 A |
431 | // here we do not assemble surrogates |
432 | // since we know that LDH code points | |
433 | // are in the ASCII range only | |
b75a7d8f A |
434 | srcIsLDH = FALSE; |
435 | failPos = srcLength; | |
46f4442e | 436 | }*/ |
b75a7d8f A |
437 | srcLength++; |
438 | } | |
374ca955 | 439 | }else if(srcLength > 0){ |
b75a7d8f A |
440 | for(int32_t j=0; j<srcLength; j++){ |
441 | if(src[j]> 0x7f){ | |
442 | srcIsASCII = FALSE; | |
46f4442e | 443 | }/*else if(isLDHChar(src[j])==FALSE){ |
374ca955 A |
444 | // here we do not assemble surrogates |
445 | // since we know that LDH code points | |
446 | // are in the ASCII range only | |
b75a7d8f A |
447 | srcIsLDH = FALSE; |
448 | failPos = j; | |
46f4442e | 449 | }*/ |
b75a7d8f | 450 | } |
374ca955 A |
451 | }else{ |
452 | return 0; | |
b75a7d8f | 453 | } |
46f4442e | 454 | |
b75a7d8f A |
455 | if(srcIsASCII == FALSE){ |
456 | // step 2: process the string | |
374ca955 | 457 | b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status); |
b75a7d8f A |
458 | if(*status == U_BUFFER_OVERFLOW_ERROR){ |
459 | // redo processing of string | |
460 | /* we do not have enough room so grow the buffer*/ | |
461 | b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
462 | if(b1==NULL){ | |
463 | *status = U_MEMORY_ALLOCATION_ERROR; | |
464 | goto CLEANUP; | |
465 | } | |
466 | ||
467 | *status = U_ZERO_ERROR; // reset error | |
468 | ||
374ca955 | 469 | b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status); |
b75a7d8f A |
470 | } |
471 | //bail out on error | |
472 | if(U_FAILURE(*status)){ | |
473 | goto CLEANUP; | |
474 | } | |
475 | }else{ | |
476 | ||
477 | //just point src to b1 | |
478 | b1 = (UChar*) src; | |
479 | b1Len = srcLength; | |
480 | } | |
481 | ||
46f4442e A |
482 | // The RFC states that |
483 | // <quote> | |
484 | // ToUnicode never fails. If any step fails, then the original input | |
485 | // is returned immediately in that step. | |
486 | // </quote> | |
487 | ||
b75a7d8f | 488 | //step 3: verify ACE Prefix |
46f4442e A |
489 | if(startsWithPrefix(b1,b1Len)){ |
490 | ||
b75a7d8f A |
491 | //step 4: Remove the ACE Prefix |
492 | b1Prime = b1 + ACE_PREFIX_LENGTH; | |
493 | b1PrimeLen = b1Len - ACE_PREFIX_LENGTH; | |
494 | ||
495 | //step 5: Decode using punycode | |
496 | b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Capacity, caseFlags,status); | |
46f4442e | 497 | |
b75a7d8f A |
498 | if(*status == U_BUFFER_OVERFLOW_ERROR){ |
499 | // redo processing of string | |
500 | /* we do not have enough room so grow the buffer*/ | |
501 | b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); | |
502 | if(b2==NULL){ | |
503 | *status = U_MEMORY_ALLOCATION_ERROR; | |
504 | goto CLEANUP; | |
505 | } | |
506 | ||
507 | *status = U_ZERO_ERROR; // reset error | |
46f4442e | 508 | |
b75a7d8f | 509 | b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Len, caseFlags, status); |
b75a7d8f | 510 | } |
46f4442e A |
511 | |
512 | ||
b75a7d8f | 513 | //step 6:Apply toASCII |
46f4442e A |
514 | b3Len = uidna_toASCII(b2, b2Len, b3, b3Capacity, options, parseError, status); |
515 | ||
b75a7d8f A |
516 | if(*status == U_BUFFER_OVERFLOW_ERROR){ |
517 | // redo processing of string | |
518 | /* we do not have enough room so grow the buffer*/ | |
519 | b3 = (UChar*) uprv_malloc(b3Len * U_SIZEOF_UCHAR); | |
520 | if(b3==NULL){ | |
521 | *status = U_MEMORY_ALLOCATION_ERROR; | |
522 | goto CLEANUP; | |
523 | } | |
524 | ||
525 | *status = U_ZERO_ERROR; // reset error | |
46f4442e | 526 | |
b75a7d8f | 527 | b3Len = uidna_toASCII(b2,b2Len,b3,b3Len,options,parseError, status); |
46f4442e | 528 | |
b75a7d8f A |
529 | } |
530 | //bail out on error | |
531 | if(U_FAILURE(*status)){ | |
532 | goto CLEANUP; | |
533 | } | |
534 | ||
535 | //step 7: verify | |
536 | if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){ | |
46f4442e A |
537 | // Cause the original to be returned. |
538 | *status = U_IDNA_VERIFICATION_ERROR; | |
b75a7d8f A |
539 | goto CLEANUP; |
540 | } | |
541 | ||
542 | //step 8: return output of step 5 | |
543 | reqLength = b2Len; | |
544 | if(b2Len <= destCapacity) { | |
545 | uprv_memmove(dest, b2, b2Len * U_SIZEOF_UCHAR); | |
546 | } | |
46f4442e A |
547 | } |
548 | else{ | |
549 | // See the start of this if statement for why this is commented out. | |
b75a7d8f | 550 | // verify that STD3 ASCII rules are satisfied |
46f4442e A |
551 | /*if(useSTD3ASCIIRules == TRUE){ |
552 | if( srcIsLDH == FALSE // source contains some non-LDH characters | |
b75a7d8f A |
553 | || src[0] == HYPHEN || src[srcLength-1] == HYPHEN){ |
554 | *status = U_IDNA_STD3_ASCII_RULES_ERROR; | |
555 | ||
46f4442e | 556 | // populate the parseError struct |
b75a7d8f A |
557 | if(srcIsLDH==FALSE){ |
558 | // failPos is always set the index of failure | |
559 | uprv_syntaxError(src,failPos, srcLength,parseError); | |
560 | }else if(src[0] == HYPHEN){ | |
561 | // fail position is 0 | |
562 | uprv_syntaxError(src,0,srcLength,parseError); | |
563 | }else{ | |
564 | // the last index in the source is always length-1 | |
565 | uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError); | |
566 | } | |
567 | ||
568 | goto CLEANUP; | |
569 | } | |
46f4442e A |
570 | }*/ |
571 | // just return the source | |
b75a7d8f A |
572 | //copy the source to destination |
573 | if(srcLength <= destCapacity){ | |
574 | uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR); | |
575 | } | |
576 | reqLength = srcLength; | |
577 | } | |
578 | ||
46f4442e | 579 | |
b75a7d8f A |
580 | CLEANUP: |
581 | ||
582 | if(b1 != b1Stack && b1!=src){ | |
583 | uprv_free(b1); | |
584 | } | |
585 | if(b2 != b2Stack){ | |
586 | uprv_free(b2); | |
587 | } | |
588 | uprv_free(caseFlags); | |
46f4442e | 589 | |
b75a7d8f A |
590 | // The RFC states that |
591 | // <quote> | |
592 | // ToUnicode never fails. If any step fails, then the original input | |
593 | // is returned immediately in that step. | |
594 | // </quote> | |
595 | // So if any step fails lets copy source to destination | |
596 | if(U_FAILURE(*status)){ | |
597 | //copy the source to destination | |
598 | if(dest && srcLength <= destCapacity){ | |
46f4442e A |
599 | // srcLength should have already been set earlier. |
600 | U_ASSERT(srcLength >= 0); | |
b75a7d8f | 601 | uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR); |
b75a7d8f A |
602 | } |
603 | reqLength = srcLength; | |
46f4442e | 604 | *status = U_ZERO_ERROR; |
b75a7d8f A |
605 | } |
606 | ||
607 | return u_terminateUChars(dest, destCapacity, reqLength, status); | |
608 | } | |
609 | ||
374ca955 A |
610 | U_CAPI int32_t U_EXPORT2 |
611 | uidna_toASCII(const UChar* src, int32_t srcLength, | |
612 | UChar* dest, int32_t destCapacity, | |
613 | int32_t options, | |
614 | UParseError* parseError, | |
615 | UErrorCode* status){ | |
616 | ||
617 | if(status == NULL || U_FAILURE(*status)){ | |
618 | return 0; | |
619 | } | |
620 | if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ | |
621 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
622 | return 0; | |
623 | } | |
b75a7d8f | 624 | |
729e4ab9 | 625 | UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); |
374ca955 A |
626 | |
627 | if(U_FAILURE(*status)){ | |
628 | return -1; | |
b75a7d8f | 629 | } |
374ca955 A |
630 | |
631 | int32_t retLen = _internal_toASCII(src, srcLength, dest, destCapacity, options, nameprep, parseError, status); | |
632 | ||
633 | /* close the profile*/ | |
634 | usprep_close(nameprep); | |
635 | ||
636 | return retLen; | |
b75a7d8f A |
637 | } |
638 | ||
374ca955 A |
639 | U_CAPI int32_t U_EXPORT2 |
640 | uidna_toUnicode(const UChar* src, int32_t srcLength, | |
641 | UChar* dest, int32_t destCapacity, | |
642 | int32_t options, | |
643 | UParseError* parseError, | |
644 | UErrorCode* status){ | |
645 | ||
646 | if(status == NULL || U_FAILURE(*status)){ | |
647 | return 0; | |
648 | } | |
649 | if( (src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ | |
650 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
651 | return 0; | |
652 | } | |
46f4442e | 653 | |
729e4ab9 | 654 | UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); |
374ca955 A |
655 | |
656 | if(U_FAILURE(*status)){ | |
657 | return -1; | |
658 | } | |
659 | ||
660 | int32_t retLen = _internal_toUnicode(src, srcLength, dest, destCapacity, options, nameprep, parseError, status); | |
661 | ||
662 | usprep_close(nameprep); | |
663 | ||
664 | return retLen; | |
665 | } | |
666 | ||
667 | ||
b75a7d8f A |
668 | U_CAPI int32_t U_EXPORT2 |
669 | uidna_IDNToASCII( const UChar *src, int32_t srcLength, | |
670 | UChar* dest, int32_t destCapacity, | |
671 | int32_t options, | |
672 | UParseError *parseError, | |
673 | UErrorCode *status){ | |
674 | ||
675 | if(status == NULL || U_FAILURE(*status)){ | |
676 | return 0; | |
677 | } | |
678 | if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ | |
679 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
680 | return 0; | |
681 | } | |
682 | ||
683 | int32_t reqLength = 0; | |
684 | ||
729e4ab9 | 685 | UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); |
b75a7d8f A |
686 | |
687 | if(U_FAILURE(*status)){ | |
688 | return 0; | |
689 | } | |
690 | ||
691 | //initialize pointers | |
692 | UChar *delimiter = (UChar*)src; | |
693 | UChar *labelStart = (UChar*)src; | |
694 | UChar *currentDest = (UChar*) dest; | |
695 | int32_t remainingLen = srcLength; | |
696 | int32_t remainingDestCapacity = destCapacity; | |
697 | int32_t labelLen = 0, labelReqLength = 0; | |
698 | UBool done = FALSE; | |
699 | ||
700 | ||
701 | for(;;){ | |
702 | ||
374ca955 | 703 | labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done); |
73c04bcf A |
704 | labelReqLength = 0; |
705 | if(!(labelLen==0 && done)){// make sure this is not a root label separator. | |
b75a7d8f | 706 | |
73c04bcf A |
707 | labelReqLength = _internal_toASCII( labelStart, labelLen, |
708 | currentDest, remainingDestCapacity, | |
709 | options, nameprep, | |
710 | parseError, status); | |
711 | ||
712 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
713 | ||
714 | *status = U_ZERO_ERROR; // reset error | |
715 | remainingDestCapacity = 0; | |
716 | } | |
b75a7d8f A |
717 | } |
718 | ||
719 | ||
720 | if(U_FAILURE(*status)){ | |
721 | break; | |
722 | } | |
723 | ||
724 | reqLength +=labelReqLength; | |
725 | // adjust the destination pointer | |
726 | if(labelReqLength < remainingDestCapacity){ | |
727 | currentDest = currentDest + labelReqLength; | |
728 | remainingDestCapacity -= labelReqLength; | |
729 | }else{ | |
730 | // should never occur | |
731 | remainingDestCapacity = 0; | |
732 | } | |
46f4442e | 733 | |
b75a7d8f A |
734 | if(done == TRUE){ |
735 | break; | |
736 | } | |
737 | ||
738 | // add the label separator | |
739 | if(remainingDestCapacity > 0){ | |
740 | *currentDest++ = FULL_STOP; | |
741 | remainingDestCapacity--; | |
742 | } | |
46f4442e | 743 | reqLength++; |
b75a7d8f A |
744 | |
745 | labelStart = delimiter; | |
746 | if(remainingLen >0 ){ | |
73c04bcf | 747 | remainingLen = (int32_t)(srcLength - (delimiter - src)); |
b75a7d8f A |
748 | } |
749 | ||
750 | } | |
46f4442e A |
751 | |
752 | if(reqLength > MAX_DOMAIN_NAME_LENGTH){ | |
753 | *status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR; | |
754 | } | |
755 | ||
374ca955 | 756 | usprep_close(nameprep); |
b75a7d8f A |
757 | |
758 | return u_terminateUChars(dest, destCapacity, reqLength, status); | |
759 | } | |
760 | ||
761 | U_CAPI int32_t U_EXPORT2 | |
762 | uidna_IDNToUnicode( const UChar* src, int32_t srcLength, | |
763 | UChar* dest, int32_t destCapacity, | |
764 | int32_t options, | |
765 | UParseError* parseError, | |
766 | UErrorCode* status){ | |
767 | ||
768 | if(status == NULL || U_FAILURE(*status)){ | |
769 | return 0; | |
770 | } | |
771 | if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ | |
772 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
773 | return 0; | |
774 | } | |
775 | ||
776 | int32_t reqLength = 0; | |
777 | ||
729e4ab9 | 778 | UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); |
b75a7d8f A |
779 | |
780 | if(U_FAILURE(*status)){ | |
781 | return 0; | |
782 | } | |
783 | ||
784 | //initialize pointers | |
785 | UChar *delimiter = (UChar*)src; | |
786 | UChar *labelStart = (UChar*)src; | |
787 | UChar *currentDest = (UChar*) dest; | |
788 | int32_t remainingLen = srcLength; | |
789 | int32_t remainingDestCapacity = destCapacity; | |
790 | int32_t labelLen = 0, labelReqLength = 0; | |
791 | UBool done = FALSE; | |
792 | ||
b75a7d8f A |
793 | for(;;){ |
794 | ||
374ca955 | 795 | labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done); |
b75a7d8f | 796 | |
46f4442e A |
797 | // The RFC states that |
798 | // <quote> | |
799 | // ToUnicode never fails. If any step fails, then the original input | |
800 | // is returned immediately in that step. | |
801 | // </quote> | |
802 | // _internal_toUnicode will copy the label. | |
803 | /*if(labelLen==0 && done==FALSE){ | |
73c04bcf | 804 | *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; |
46f4442e A |
805 | break; |
806 | }*/ | |
807 | ||
374ca955 A |
808 | labelReqLength = _internal_toUnicode(labelStart, labelLen, |
809 | currentDest, remainingDestCapacity, | |
810 | options, nameprep, | |
811 | parseError, status); | |
b75a7d8f A |
812 | |
813 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
b75a7d8f A |
814 | *status = U_ZERO_ERROR; // reset error |
815 | remainingDestCapacity = 0; | |
816 | } | |
817 | ||
b75a7d8f A |
818 | if(U_FAILURE(*status)){ |
819 | break; | |
820 | } | |
821 | ||
822 | reqLength +=labelReqLength; | |
823 | // adjust the destination pointer | |
824 | if(labelReqLength < remainingDestCapacity){ | |
825 | currentDest = currentDest + labelReqLength; | |
826 | remainingDestCapacity -= labelReqLength; | |
827 | }else{ | |
828 | // should never occur | |
829 | remainingDestCapacity = 0; | |
830 | } | |
831 | ||
832 | if(done == TRUE){ | |
833 | break; | |
834 | } | |
835 | ||
836 | // add the label separator | |
46f4442e | 837 | // Unlike the ToASCII operation we don't normalize the label separators |
b75a7d8f | 838 | if(remainingDestCapacity > 0){ |
46f4442e | 839 | *currentDest++ = *(labelStart + labelLen); |
b75a7d8f A |
840 | remainingDestCapacity--; |
841 | } | |
46f4442e | 842 | reqLength++; |
b75a7d8f A |
843 | |
844 | labelStart = delimiter; | |
845 | if(remainingLen >0 ){ | |
73c04bcf | 846 | remainingLen = (int32_t)(srcLength - (delimiter - src)); |
b75a7d8f A |
847 | } |
848 | ||
849 | } | |
46f4442e A |
850 | |
851 | if(reqLength > MAX_DOMAIN_NAME_LENGTH){ | |
852 | *status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR; | |
853 | } | |
854 | ||
374ca955 | 855 | usprep_close(nameprep); |
b75a7d8f A |
856 | |
857 | return u_terminateUChars(dest, destCapacity, reqLength, status); | |
858 | } | |
859 | ||
860 | U_CAPI int32_t U_EXPORT2 | |
861 | uidna_compare( const UChar *s1, int32_t length1, | |
862 | const UChar *s2, int32_t length2, | |
863 | int32_t options, | |
864 | UErrorCode* status){ | |
865 | ||
866 | if(status == NULL || U_FAILURE(*status)){ | |
867 | return -1; | |
868 | } | |
869 | ||
870 | UChar b1Stack[MAX_IDN_BUFFER_SIZE], b2Stack[MAX_IDN_BUFFER_SIZE]; | |
871 | UChar *b1 = b1Stack, *b2 = b2Stack; | |
872 | int32_t b1Len, b2Len, b1Capacity = MAX_IDN_BUFFER_SIZE, b2Capacity = MAX_IDN_BUFFER_SIZE; | |
873 | int32_t result=-1; | |
874 | ||
875 | UParseError parseError; | |
876 | ||
877 | b1Len = uidna_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, status); | |
878 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
879 | // redo processing of string | |
880 | b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
881 | if(b1==NULL){ | |
882 | *status = U_MEMORY_ALLOCATION_ERROR; | |
883 | goto CLEANUP; | |
884 | } | |
885 | ||
886 | *status = U_ZERO_ERROR; // reset error | |
887 | ||
888 | b1Len = uidna_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, status); | |
889 | ||
890 | } | |
891 | ||
892 | b2Len = uidna_IDNToASCII(s2,length2, b2,b2Capacity, options, &parseError, status); | |
893 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
894 | // redo processing of string | |
895 | b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); | |
896 | if(b2==NULL){ | |
897 | *status = U_MEMORY_ALLOCATION_ERROR; | |
898 | goto CLEANUP; | |
899 | } | |
900 | ||
901 | *status = U_ZERO_ERROR; // reset error | |
902 | ||
903 | b2Len = uidna_IDNToASCII(s2, length2, b2, b2Len, options, &parseError, status); | |
904 | ||
905 | } | |
906 | // when toASCII is applied all label separators are replaced with FULL_STOP | |
907 | result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len); | |
908 | ||
909 | CLEANUP: | |
910 | if(b1 != b1Stack){ | |
911 | uprv_free(b1); | |
912 | } | |
913 | ||
914 | if(b2 != b2Stack){ | |
915 | uprv_free(b2); | |
916 | } | |
917 | ||
918 | return result; | |
919 | } | |
920 | ||
921 | #endif /* #if !UCONFIG_NO_IDNA */ |