]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ******************************************************************************* | |
3 | * | |
73c04bcf | 4 | * Copyright (C) 2003-2007, International Business Machines |
b75a7d8f A |
5 | * Corporation and others. All Rights Reserved. |
6 | * | |
7 | ******************************************************************************* | |
8 | * file name: uidna.cpp | |
9 | * encoding: US-ASCII | |
10 | * tab size: 8 (not used) | |
11 | * indentation:4 | |
12 | * | |
13 | * created on: 2003feb1 | |
14 | * created by: Ram Viswanadha | |
15 | */ | |
16 | ||
17 | #include "unicode/utypes.h" | |
18 | ||
19 | #if !UCONFIG_NO_IDNA | |
20 | ||
21 | #include "unicode/uidna.h" | |
22 | #include "unicode/ustring.h" | |
374ca955 | 23 | #include "unicode/usprep.h" |
b75a7d8f A |
24 | #include "punycode.h" |
25 | #include "ustr_imp.h" | |
26 | #include "cmemory.h" | |
27 | #include "sprpimpl.h" | |
28 | ||
29 | /* it is official IDNA ACE Prefix is "xn--" */ | |
30 | static const UChar ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ; | |
31 | #define ACE_PREFIX_LENGTH 4 | |
32 | ||
33 | #define MAX_LABEL_LENGTH 63 | |
34 | #define HYPHEN 0x002D | |
35 | /* The Max length of the labels should not be more than 64 */ | |
36 | #define MAX_LABEL_BUFFER_SIZE 100 | |
37 | #define MAX_IDN_BUFFER_SIZE 300 | |
38 | ||
39 | #define CAPITAL_A 0x0041 | |
40 | #define CAPITAL_Z 0x005A | |
41 | #define LOWER_CASE_DELTA 0x0020 | |
42 | #define FULL_STOP 0x002E | |
374ca955 | 43 | #define DATA_FILE_NAME "uidna" |
b75a7d8f A |
44 | |
45 | inline static UChar | |
46 | toASCIILower(UChar ch){ | |
47 | if(CAPITAL_A <= ch && ch <= CAPITAL_Z){ | |
48 | return ch + LOWER_CASE_DELTA; | |
49 | } | |
50 | return ch; | |
51 | } | |
52 | ||
53 | inline static UBool | |
54 | startsWithPrefix(const UChar* src , int32_t srcLength){ | |
55 | UBool startsWithPrefix = TRUE; | |
56 | ||
57 | if(srcLength < ACE_PREFIX_LENGTH){ | |
58 | return FALSE; | |
59 | } | |
60 | ||
61 | for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){ | |
62 | if(toASCIILower(src[i]) != ACE_PREFIX[i]){ | |
63 | startsWithPrefix = FALSE; | |
64 | } | |
65 | } | |
66 | return startsWithPrefix; | |
67 | } | |
68 | ||
b75a7d8f A |
69 | |
70 | inline static int32_t | |
71 | compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len, | |
72 | const UChar* s2, int32_t s2Len){ | |
73 | ||
74 | int32_t minLength; | |
75 | int32_t lengthResult; | |
76 | ||
77 | // are we comparing different lengths? | |
78 | if(s1Len != s2Len) { | |
79 | if(s1Len < s2Len) { | |
80 | minLength = s1Len; | |
81 | lengthResult = -1; | |
82 | } else { | |
83 | minLength = s2Len; | |
84 | lengthResult = 1; | |
85 | } | |
86 | } else { | |
87 | // ok the lengths are equal | |
88 | minLength = s1Len; | |
89 | lengthResult = 0; | |
90 | } | |
91 | ||
92 | UChar c1,c2; | |
93 | int32_t rc; | |
94 | ||
95 | for(int32_t i =0;/* no condition */;i++) { | |
96 | ||
97 | /* If we reach the ends of both strings then they match */ | |
98 | if(i == minLength) { | |
99 | return lengthResult; | |
100 | } | |
101 | ||
102 | c1 = s1[i]; | |
103 | c2 = s2[i]; | |
104 | ||
105 | /* Case-insensitive comparison */ | |
106 | if(c1!=c2) { | |
107 | rc=(int32_t)toASCIILower(c1)-(int32_t)toASCIILower(c2); | |
108 | if(rc!=0) { | |
109 | lengthResult=rc; | |
110 | break; | |
111 | } | |
112 | } | |
113 | } | |
114 | return lengthResult; | |
115 | } | |
116 | ||
117 | ||
374ca955 A |
118 | /** |
119 | * Ascertain if the given code point is a label separator as | |
120 | * defined by the IDNA RFC | |
121 | * | |
122 | * @param ch The code point to be ascertained | |
123 | * @return true if the char is a label separator | |
73c04bcf | 124 | * @stable ICU 2.8 |
374ca955 A |
125 | */ |
126 | static inline UBool isLabelSeparator(UChar ch){ | |
127 | switch(ch){ | |
128 | case 0x002e: | |
129 | case 0x3002: | |
130 | case 0xFF0E: | |
131 | case 0xFF61: | |
132 | return TRUE; | |
133 | default: | |
134 | return FALSE; | |
b75a7d8f | 135 | } |
374ca955 A |
136 | } |
137 | ||
138 | // returns the length of the label excluding the separator | |
139 | // if *limit == separator then the length returned does not include | |
140 | // the separtor. | |
141 | static inline int32_t | |
142 | getNextSeparator(UChar *src,int32_t srcLength, | |
143 | UChar **limit, | |
144 | UBool *done){ | |
145 | if(srcLength == -1){ | |
146 | int32_t i; | |
147 | for(i=0 ; ;i++){ | |
148 | if(src[i] == 0){ | |
149 | *limit = src + i; // point to null | |
150 | *done = TRUE; | |
151 | return i; | |
152 | } | |
153 | if(isLabelSeparator(src[i])){ | |
154 | *limit = src + (i+1); // go past the delimiter | |
155 | return i; | |
156 | ||
157 | } | |
158 | } | |
159 | }else{ | |
160 | int32_t i; | |
161 | for(i=0;i<srcLength;i++){ | |
162 | if(isLabelSeparator(src[i])){ | |
163 | *limit = src + (i+1); // go past the delimiter | |
164 | return i; | |
165 | } | |
166 | } | |
167 | // we have not found the delimiter | |
168 | // if(i==srcLength) | |
169 | *limit = src+srcLength; | |
170 | *done = TRUE; | |
171 | ||
172 | return i; | |
b75a7d8f | 173 | } |
374ca955 A |
174 | } |
175 | static inline UBool isLDHChar(UChar ch){ | |
176 | // high runner case | |
177 | if(ch>0x007A){ | |
178 | return FALSE; | |
179 | } | |
180 | //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A] | |
181 | if( (ch==0x002D) || | |
182 | (0x0030 <= ch && ch <= 0x0039) || | |
183 | (0x0041 <= ch && ch <= 0x005A) || | |
184 | (0x0061 <= ch && ch <= 0x007A) | |
185 | ){ | |
186 | return TRUE; | |
187 | } | |
188 | return FALSE; | |
189 | } | |
190 | ||
191 | static int32_t | |
192 | _internal_toASCII(const UChar* src, int32_t srcLength, | |
193 | UChar* dest, int32_t destCapacity, | |
194 | int32_t options, | |
195 | UStringPrepProfile* nameprep, | |
196 | UParseError* parseError, | |
197 | UErrorCode* status){ | |
198 | ||
b75a7d8f A |
199 | UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE]; |
200 | //initialize pointers to stack buffers | |
201 | UChar *b1 = b1Stack, *b2 = b2Stack; | |
73c04bcf | 202 | int32_t b1Len=0, b2Len, |
b75a7d8f A |
203 | b1Capacity = MAX_LABEL_BUFFER_SIZE, |
204 | b2Capacity = MAX_LABEL_BUFFER_SIZE , | |
205 | reqLength=0; | |
206 | ||
374ca955 | 207 | int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0; |
b75a7d8f A |
208 | UBool* caseFlags = NULL; |
209 | ||
210 | // the source contains all ascii codepoints | |
211 | UBool srcIsASCII = TRUE; | |
212 | // assume the source contains all LDH codepoints | |
213 | UBool srcIsLDH = TRUE; | |
214 | ||
215 | int32_t j=0; | |
216 | ||
217 | //get the options | |
b75a7d8f | 218 | UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0); |
374ca955 | 219 | |
b75a7d8f | 220 | int32_t failPos = -1; |
b75a7d8f | 221 | |
73c04bcf A |
222 | if(srcLength == -1){ |
223 | srcLength = u_strlen(src); | |
224 | } | |
225 | ||
226 | if(srcLength > b1Capacity){ | |
227 | b1 = (UChar*) uprv_malloc(srcLength * U_SIZEOF_UCHAR); | |
b75a7d8f A |
228 | if(b1==NULL){ |
229 | *status = U_MEMORY_ALLOCATION_ERROR; | |
230 | goto CLEANUP; | |
231 | } | |
73c04bcf A |
232 | b1Capacity = srcLength; |
233 | } | |
b75a7d8f | 234 | |
73c04bcf A |
235 | // step 1 |
236 | for( j=0;j<srcLength;j++){ | |
237 | if(src[j] > 0x7F){ | |
238 | srcIsASCII = FALSE; | |
239 | } | |
240 | b1[b1Len++] = src[j]; | |
241 | } | |
242 | ||
243 | // step 2 is performed only if the source contains non ASCII | |
244 | if(srcIsASCII == FALSE){ | |
b75a7d8f | 245 | |
73c04bcf A |
246 | // step 2 |
247 | b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status); | |
248 | ||
249 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
250 | // redo processing of string | |
251 | // we do not have enough room so grow the buffer | |
252 | if(b1 != b1Stack){ | |
253 | uprv_free(b1); | |
254 | } | |
255 | b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
256 | if(b1==NULL){ | |
257 | *status = U_MEMORY_ALLOCATION_ERROR; | |
258 | goto CLEANUP; | |
259 | } | |
260 | ||
261 | *status = U_ZERO_ERROR; // reset error | |
262 | ||
263 | b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status); | |
264 | } | |
b75a7d8f A |
265 | } |
266 | // error bail out | |
267 | if(U_FAILURE(*status)){ | |
268 | goto CLEANUP; | |
269 | } | |
73c04bcf A |
270 | if(b1Len == 0){ |
271 | *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; | |
272 | goto CLEANUP; | |
273 | } | |
b75a7d8f | 274 | |
73c04bcf A |
275 | // for step 3 & 4 |
276 | srcIsASCII = TRUE; | |
b75a7d8f | 277 | for( j=0;j<b1Len;j++){ |
73c04bcf | 278 | // check if output of usprep_prepare is all ASCII |
b75a7d8f A |
279 | if(b1[j] > 0x7F){ |
280 | srcIsASCII = FALSE; | |
374ca955 | 281 | }else if(isLDHChar(b1[j])==FALSE){ // if the char is in ASCII range verify that it is an LDH character |
b75a7d8f A |
282 | srcIsLDH = FALSE; |
283 | failPos = j; | |
284 | } | |
285 | } | |
b75a7d8f A |
286 | if(useSTD3ASCIIRules == TRUE){ |
287 | // verify 3a and 3b | |
374ca955 A |
288 | // 3(a) Verify the absence of non-LDH ASCII code points; that is, the |
289 | // absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F. | |
290 | // 3(b) Verify the absence of leading and trailing hyphen-minus; that | |
291 | // is, the absence of U+002D at the beginning and end of the | |
292 | // sequence. | |
293 | if( srcIsLDH == FALSE /* source at this point should not contain anyLDH characters */ | |
b75a7d8f A |
294 | || b1[0] == HYPHEN || b1[b1Len-1] == HYPHEN){ |
295 | *status = U_IDNA_STD3_ASCII_RULES_ERROR; | |
296 | ||
297 | /* populate the parseError struct */ | |
298 | if(srcIsLDH==FALSE){ | |
299 | // failPos is always set the index of failure | |
300 | uprv_syntaxError(b1,failPos, b1Len,parseError); | |
301 | }else if(b1[0] == HYPHEN){ | |
302 | // fail position is 0 | |
303 | uprv_syntaxError(b1,0,b1Len,parseError); | |
304 | }else{ | |
305 | // the last index in the source is always length-1 | |
306 | uprv_syntaxError(b1, (b1Len>0) ? b1Len-1 : b1Len, b1Len,parseError); | |
307 | } | |
308 | ||
309 | goto CLEANUP; | |
310 | } | |
311 | } | |
73c04bcf | 312 | // Step 4: if the source is ASCII then proceed to step 8 |
b75a7d8f A |
313 | if(srcIsASCII){ |
314 | if(b1Len <= destCapacity){ | |
315 | uprv_memmove(dest, b1, b1Len * U_SIZEOF_UCHAR); | |
316 | reqLength = b1Len; | |
317 | }else{ | |
318 | reqLength = b1Len; | |
319 | goto CLEANUP; | |
320 | } | |
321 | }else{ | |
322 | // step 5 : verify the sequence does not begin with ACE prefix | |
323 | if(!startsWithPrefix(b1,b1Len)){ | |
324 | ||
325 | //step 6: encode the sequence with punycode | |
326 | ||
327 | // do not preserve the case flags for now! | |
328 | // TODO: Preserve the case while implementing the RFE | |
329 | // caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool)); | |
330 | // uprv_memset(caseFlags,TRUE,b1Len); | |
331 | ||
332 | b2Len = u_strToPunycode(b1,b1Len,b2,b2Capacity,caseFlags, status); | |
333 | ||
334 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
335 | // redo processing of string | |
336 | /* we do not have enough room so grow the buffer*/ | |
337 | b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); | |
338 | if(b2 == NULL){ | |
339 | *status = U_MEMORY_ALLOCATION_ERROR; | |
340 | goto CLEANUP; | |
341 | } | |
342 | ||
343 | *status = U_ZERO_ERROR; // reset error | |
344 | ||
345 | b2Len = u_strToPunycode(b1,b1Len,b2,b2Len,caseFlags, status); | |
346 | } | |
347 | //error bail out | |
348 | if(U_FAILURE(*status)){ | |
349 | goto CLEANUP; | |
350 | } | |
351 | // TODO : Reconsider while implementing the case preserve RFE | |
352 | // convert all codepoints to lower case ASCII | |
353 | // toASCIILower(b2,b2Len); | |
354 | reqLength = b2Len+ACE_PREFIX_LENGTH; | |
355 | ||
356 | if(reqLength > destCapacity){ | |
357 | *status = U_BUFFER_OVERFLOW_ERROR; | |
358 | goto CLEANUP; | |
359 | } | |
360 | //Step 7: prepend the ACE prefix | |
361 | uprv_memcpy(dest,ACE_PREFIX,ACE_PREFIX_LENGTH * U_SIZEOF_UCHAR); | |
362 | //Step 6: copy the contents in b2 into dest | |
363 | uprv_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len * U_SIZEOF_UCHAR); | |
364 | ||
365 | }else{ | |
366 | *status = U_IDNA_ACE_PREFIX_ERROR; | |
367 | //position of failure is 0 | |
368 | uprv_syntaxError(b1,0,b1Len,parseError); | |
369 | goto CLEANUP; | |
370 | } | |
371 | } | |
73c04bcf | 372 | // step 8: verify the length of lable |
b75a7d8f A |
373 | if(reqLength > MAX_LABEL_LENGTH){ |
374 | *status = U_IDNA_LABEL_TOO_LONG_ERROR; | |
375 | } | |
376 | ||
377 | CLEANUP: | |
378 | if(b1 != b1Stack){ | |
379 | uprv_free(b1); | |
380 | } | |
381 | if(b2 != b2Stack){ | |
382 | uprv_free(b2); | |
383 | } | |
384 | uprv_free(caseFlags); | |
385 | ||
b75a7d8f A |
386 | return u_terminateUChars(dest, destCapacity, reqLength, status); |
387 | } | |
388 | ||
374ca955 A |
389 | static int32_t |
390 | _internal_toUnicode(const UChar* src, int32_t srcLength, | |
391 | UChar* dest, int32_t destCapacity, | |
392 | int32_t options, | |
393 | UStringPrepProfile* nameprep, | |
394 | UParseError* parseError, | |
395 | UErrorCode* status){ | |
b75a7d8f A |
396 | |
397 | //get the options | |
b75a7d8f | 398 | UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0); |
374ca955 | 399 | int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0; |
b75a7d8f A |
400 | |
401 | UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE]; | |
402 | ||
403 | //initialize pointers to stack buffers | |
404 | UChar *b1 = b1Stack, *b2 = b2Stack, *b1Prime=NULL, *b3=b3Stack; | |
405 | int32_t b1Len, b2Len, b1PrimeLen, b3Len, | |
406 | b1Capacity = MAX_LABEL_BUFFER_SIZE, | |
407 | b2Capacity = MAX_LABEL_BUFFER_SIZE, | |
408 | b3Capacity = MAX_LABEL_BUFFER_SIZE, | |
409 | reqLength=0; | |
374ca955 | 410 | |
b75a7d8f A |
411 | b1Len = 0; |
412 | UBool* caseFlags = NULL; | |
413 | ||
414 | UBool srcIsASCII = TRUE; | |
415 | UBool srcIsLDH = TRUE; | |
416 | int32_t failPos =0; | |
417 | ||
b75a7d8f A |
418 | // step 1: find out if all the codepoints in src are ASCII |
419 | if(srcLength==-1){ | |
420 | srcLength = 0; | |
421 | for(;src[srcLength]!=0;){ | |
422 | if(src[srcLength]> 0x7f){ | |
423 | srcIsASCII = FALSE; | |
374ca955 A |
424 | }else if(isLDHChar(src[srcLength])==FALSE){ |
425 | // here we do not assemble surrogates | |
426 | // since we know that LDH code points | |
427 | // are in the ASCII range only | |
b75a7d8f A |
428 | srcIsLDH = FALSE; |
429 | failPos = srcLength; | |
430 | } | |
431 | srcLength++; | |
432 | } | |
374ca955 | 433 | }else if(srcLength > 0){ |
b75a7d8f A |
434 | for(int32_t j=0; j<srcLength; j++){ |
435 | if(src[j]> 0x7f){ | |
436 | srcIsASCII = FALSE; | |
374ca955 A |
437 | }else if(isLDHChar(src[j])==FALSE){ |
438 | // here we do not assemble surrogates | |
439 | // since we know that LDH code points | |
440 | // are in the ASCII range only | |
b75a7d8f A |
441 | srcIsLDH = FALSE; |
442 | failPos = j; | |
443 | } | |
444 | } | |
374ca955 A |
445 | }else{ |
446 | return 0; | |
b75a7d8f A |
447 | } |
448 | ||
449 | if(srcIsASCII == FALSE){ | |
450 | // step 2: process the string | |
374ca955 | 451 | b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status); |
b75a7d8f A |
452 | if(*status == U_BUFFER_OVERFLOW_ERROR){ |
453 | // redo processing of string | |
454 | /* we do not have enough room so grow the buffer*/ | |
455 | b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
456 | if(b1==NULL){ | |
457 | *status = U_MEMORY_ALLOCATION_ERROR; | |
458 | goto CLEANUP; | |
459 | } | |
460 | ||
461 | *status = U_ZERO_ERROR; // reset error | |
462 | ||
374ca955 | 463 | b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status); |
b75a7d8f A |
464 | } |
465 | //bail out on error | |
466 | if(U_FAILURE(*status)){ | |
467 | goto CLEANUP; | |
468 | } | |
469 | }else{ | |
470 | ||
471 | //just point src to b1 | |
472 | b1 = (UChar*) src; | |
473 | b1Len = srcLength; | |
474 | } | |
475 | ||
476 | //step 3: verify ACE Prefix | |
477 | if(startsWithPrefix(src,srcLength)){ | |
73c04bcf | 478 | |
b75a7d8f A |
479 | //step 4: Remove the ACE Prefix |
480 | b1Prime = b1 + ACE_PREFIX_LENGTH; | |
481 | b1PrimeLen = b1Len - ACE_PREFIX_LENGTH; | |
482 | ||
483 | //step 5: Decode using punycode | |
484 | b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Capacity, caseFlags,status); | |
485 | ||
486 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
487 | // redo processing of string | |
488 | /* we do not have enough room so grow the buffer*/ | |
489 | b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); | |
490 | if(b2==NULL){ | |
491 | *status = U_MEMORY_ALLOCATION_ERROR; | |
492 | goto CLEANUP; | |
493 | } | |
494 | ||
495 | *status = U_ZERO_ERROR; // reset error | |
496 | ||
497 | b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Len, caseFlags, status); | |
498 | ||
499 | } | |
500 | ||
b75a7d8f A |
501 | //step 6:Apply toASCII |
502 | b3Len = uidna_toASCII(b2, b2Len, b3, b3Capacity,options,parseError, status); | |
73c04bcf | 503 | |
b75a7d8f A |
504 | if(*status == U_BUFFER_OVERFLOW_ERROR){ |
505 | // redo processing of string | |
506 | /* we do not have enough room so grow the buffer*/ | |
507 | b3 = (UChar*) uprv_malloc(b3Len * U_SIZEOF_UCHAR); | |
508 | if(b3==NULL){ | |
509 | *status = U_MEMORY_ALLOCATION_ERROR; | |
510 | goto CLEANUP; | |
511 | } | |
512 | ||
513 | *status = U_ZERO_ERROR; // reset error | |
514 | ||
515 | b3Len = uidna_toASCII(b2,b2Len,b3,b3Len,options,parseError, status); | |
516 | ||
517 | } | |
73c04bcf | 518 | |
b75a7d8f A |
519 | //bail out on error |
520 | if(U_FAILURE(*status)){ | |
521 | goto CLEANUP; | |
522 | } | |
523 | ||
524 | //step 7: verify | |
525 | if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){ | |
526 | *status = U_IDNA_VERIFICATION_ERROR; | |
527 | goto CLEANUP; | |
528 | } | |
529 | ||
530 | //step 8: return output of step 5 | |
531 | reqLength = b2Len; | |
532 | if(b2Len <= destCapacity) { | |
533 | uprv_memmove(dest, b2, b2Len * U_SIZEOF_UCHAR); | |
534 | } | |
535 | }else{ | |
536 | // verify that STD3 ASCII rules are satisfied | |
537 | if(useSTD3ASCIIRules == TRUE){ | |
538 | if( srcIsLDH == FALSE /* source contains some non-LDH characters */ | |
539 | || src[0] == HYPHEN || src[srcLength-1] == HYPHEN){ | |
540 | *status = U_IDNA_STD3_ASCII_RULES_ERROR; | |
541 | ||
542 | /* populate the parseError struct */ | |
543 | if(srcIsLDH==FALSE){ | |
544 | // failPos is always set the index of failure | |
545 | uprv_syntaxError(src,failPos, srcLength,parseError); | |
546 | }else if(src[0] == HYPHEN){ | |
547 | // fail position is 0 | |
548 | uprv_syntaxError(src,0,srcLength,parseError); | |
549 | }else{ | |
550 | // the last index in the source is always length-1 | |
551 | uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError); | |
552 | } | |
553 | ||
554 | goto CLEANUP; | |
555 | } | |
556 | } | |
557 | //copy the source to destination | |
558 | if(srcLength <= destCapacity){ | |
559 | uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR); | |
560 | } | |
561 | reqLength = srcLength; | |
562 | } | |
563 | ||
564 | CLEANUP: | |
565 | ||
566 | if(b1 != b1Stack && b1!=src){ | |
567 | uprv_free(b1); | |
568 | } | |
569 | if(b2 != b2Stack){ | |
570 | uprv_free(b2); | |
571 | } | |
572 | uprv_free(caseFlags); | |
573 | ||
374ca955 | 574 | |
b75a7d8f A |
575 | // The RFC states that |
576 | // <quote> | |
577 | // ToUnicode never fails. If any step fails, then the original input | |
578 | // is returned immediately in that step. | |
579 | // </quote> | |
580 | // So if any step fails lets copy source to destination | |
581 | if(U_FAILURE(*status)){ | |
582 | //copy the source to destination | |
583 | if(dest && srcLength <= destCapacity){ | |
584 | if(srcLength == -1) { | |
585 | uprv_memmove(dest,src,u_strlen(src)* U_SIZEOF_UCHAR); | |
586 | } else { | |
587 | uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR); | |
588 | } | |
589 | } | |
590 | reqLength = srcLength; | |
591 | } | |
592 | ||
593 | return u_terminateUChars(dest, destCapacity, reqLength, status); | |
594 | } | |
595 | ||
374ca955 A |
596 | U_CAPI int32_t U_EXPORT2 |
597 | uidna_toASCII(const UChar* src, int32_t srcLength, | |
598 | UChar* dest, int32_t destCapacity, | |
599 | int32_t options, | |
600 | UParseError* parseError, | |
601 | UErrorCode* status){ | |
602 | ||
603 | if(status == NULL || U_FAILURE(*status)){ | |
604 | return 0; | |
605 | } | |
606 | if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ | |
607 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
608 | return 0; | |
609 | } | |
b75a7d8f | 610 | |
374ca955 A |
611 | UStringPrepProfile* nameprep = usprep_open(NULL,DATA_FILE_NAME, status); |
612 | ||
613 | if(U_FAILURE(*status)){ | |
614 | return -1; | |
b75a7d8f | 615 | } |
374ca955 A |
616 | |
617 | int32_t retLen = _internal_toASCII(src, srcLength, dest, destCapacity, options, nameprep, parseError, status); | |
618 | ||
619 | /* close the profile*/ | |
620 | usprep_close(nameprep); | |
621 | ||
622 | return retLen; | |
b75a7d8f A |
623 | } |
624 | ||
374ca955 A |
625 | U_CAPI int32_t U_EXPORT2 |
626 | uidna_toUnicode(const UChar* src, int32_t srcLength, | |
627 | UChar* dest, int32_t destCapacity, | |
628 | int32_t options, | |
629 | UParseError* parseError, | |
630 | UErrorCode* status){ | |
631 | ||
632 | if(status == NULL || U_FAILURE(*status)){ | |
633 | return 0; | |
634 | } | |
635 | if( (src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ | |
636 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
637 | return 0; | |
638 | } | |
639 | ||
640 | UStringPrepProfile* nameprep = usprep_open(NULL, DATA_FILE_NAME, status); | |
641 | ||
642 | if(U_FAILURE(*status)){ | |
643 | return -1; | |
644 | } | |
645 | ||
646 | int32_t retLen = _internal_toUnicode(src, srcLength, dest, destCapacity, options, nameprep, parseError, status); | |
647 | ||
648 | usprep_close(nameprep); | |
649 | ||
650 | return retLen; | |
651 | } | |
652 | ||
653 | ||
b75a7d8f A |
654 | U_CAPI int32_t U_EXPORT2 |
655 | uidna_IDNToASCII( const UChar *src, int32_t srcLength, | |
656 | UChar* dest, int32_t destCapacity, | |
657 | int32_t options, | |
658 | UParseError *parseError, | |
659 | UErrorCode *status){ | |
660 | ||
661 | if(status == NULL || U_FAILURE(*status)){ | |
662 | return 0; | |
663 | } | |
664 | if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ | |
665 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
666 | return 0; | |
667 | } | |
668 | ||
669 | int32_t reqLength = 0; | |
670 | ||
374ca955 | 671 | UStringPrepProfile* nameprep = usprep_open(NULL, DATA_FILE_NAME, status); |
b75a7d8f A |
672 | |
673 | if(U_FAILURE(*status)){ | |
674 | return 0; | |
675 | } | |
676 | ||
677 | //initialize pointers | |
678 | UChar *delimiter = (UChar*)src; | |
679 | UChar *labelStart = (UChar*)src; | |
680 | UChar *currentDest = (UChar*) dest; | |
681 | int32_t remainingLen = srcLength; | |
682 | int32_t remainingDestCapacity = destCapacity; | |
683 | int32_t labelLen = 0, labelReqLength = 0; | |
684 | UBool done = FALSE; | |
685 | ||
686 | ||
687 | for(;;){ | |
688 | ||
374ca955 | 689 | labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done); |
73c04bcf A |
690 | labelReqLength = 0; |
691 | if(!(labelLen==0 && done)){// make sure this is not a root label separator. | |
b75a7d8f | 692 | |
73c04bcf A |
693 | labelReqLength = _internal_toASCII( labelStart, labelLen, |
694 | currentDest, remainingDestCapacity, | |
695 | options, nameprep, | |
696 | parseError, status); | |
697 | ||
698 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
699 | ||
700 | *status = U_ZERO_ERROR; // reset error | |
701 | remainingDestCapacity = 0; | |
702 | } | |
b75a7d8f A |
703 | } |
704 | ||
705 | ||
706 | if(U_FAILURE(*status)){ | |
707 | break; | |
708 | } | |
709 | ||
710 | reqLength +=labelReqLength; | |
711 | // adjust the destination pointer | |
712 | if(labelReqLength < remainingDestCapacity){ | |
713 | currentDest = currentDest + labelReqLength; | |
714 | remainingDestCapacity -= labelReqLength; | |
715 | }else{ | |
716 | // should never occur | |
717 | remainingDestCapacity = 0; | |
718 | } | |
719 | if(done == TRUE){ | |
720 | break; | |
721 | } | |
722 | ||
723 | // add the label separator | |
724 | if(remainingDestCapacity > 0){ | |
725 | *currentDest++ = FULL_STOP; | |
726 | remainingDestCapacity--; | |
727 | } | |
728 | reqLength++; | |
729 | ||
730 | labelStart = delimiter; | |
731 | if(remainingLen >0 ){ | |
73c04bcf | 732 | remainingLen = (int32_t)(srcLength - (delimiter - src)); |
b75a7d8f A |
733 | } |
734 | ||
735 | } | |
736 | ||
374ca955 | 737 | usprep_close(nameprep); |
b75a7d8f A |
738 | |
739 | return u_terminateUChars(dest, destCapacity, reqLength, status); | |
740 | } | |
741 | ||
742 | U_CAPI int32_t U_EXPORT2 | |
743 | uidna_IDNToUnicode( const UChar* src, int32_t srcLength, | |
744 | UChar* dest, int32_t destCapacity, | |
745 | int32_t options, | |
746 | UParseError* parseError, | |
747 | UErrorCode* status){ | |
748 | ||
749 | if(status == NULL || U_FAILURE(*status)){ | |
750 | return 0; | |
751 | } | |
752 | if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ | |
753 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
754 | return 0; | |
755 | } | |
756 | ||
757 | int32_t reqLength = 0; | |
758 | ||
374ca955 | 759 | UStringPrepProfile* nameprep = usprep_open(NULL, DATA_FILE_NAME, status); |
b75a7d8f A |
760 | |
761 | if(U_FAILURE(*status)){ | |
762 | return 0; | |
763 | } | |
764 | ||
765 | //initialize pointers | |
766 | UChar *delimiter = (UChar*)src; | |
767 | UChar *labelStart = (UChar*)src; | |
768 | UChar *currentDest = (UChar*) dest; | |
769 | int32_t remainingLen = srcLength; | |
770 | int32_t remainingDestCapacity = destCapacity; | |
771 | int32_t labelLen = 0, labelReqLength = 0; | |
772 | UBool done = FALSE; | |
773 | ||
774 | ||
775 | for(;;){ | |
776 | ||
374ca955 | 777 | labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done); |
b75a7d8f | 778 | |
73c04bcf A |
779 | if(labelLen==0 && done==FALSE){ |
780 | *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; | |
781 | } | |
374ca955 A |
782 | labelReqLength = _internal_toUnicode(labelStart, labelLen, |
783 | currentDest, remainingDestCapacity, | |
784 | options, nameprep, | |
785 | parseError, status); | |
b75a7d8f A |
786 | |
787 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
788 | ||
789 | *status = U_ZERO_ERROR; // reset error | |
790 | remainingDestCapacity = 0; | |
791 | } | |
792 | ||
793 | ||
794 | if(U_FAILURE(*status)){ | |
795 | break; | |
796 | } | |
797 | ||
798 | reqLength +=labelReqLength; | |
799 | // adjust the destination pointer | |
800 | if(labelReqLength < remainingDestCapacity){ | |
801 | currentDest = currentDest + labelReqLength; | |
802 | remainingDestCapacity -= labelReqLength; | |
803 | }else{ | |
804 | // should never occur | |
805 | remainingDestCapacity = 0; | |
806 | } | |
807 | ||
808 | if(done == TRUE){ | |
809 | break; | |
810 | } | |
811 | ||
812 | // add the label separator | |
813 | if(remainingDestCapacity > 0){ | |
814 | *currentDest++ = FULL_STOP; | |
815 | remainingDestCapacity--; | |
816 | } | |
817 | reqLength++; | |
818 | ||
819 | labelStart = delimiter; | |
820 | if(remainingLen >0 ){ | |
73c04bcf | 821 | remainingLen = (int32_t)(srcLength - (delimiter - src)); |
b75a7d8f A |
822 | } |
823 | ||
824 | } | |
825 | ||
374ca955 | 826 | usprep_close(nameprep); |
b75a7d8f A |
827 | |
828 | return u_terminateUChars(dest, destCapacity, reqLength, status); | |
829 | } | |
830 | ||
831 | U_CAPI int32_t U_EXPORT2 | |
832 | uidna_compare( const UChar *s1, int32_t length1, | |
833 | const UChar *s2, int32_t length2, | |
834 | int32_t options, | |
835 | UErrorCode* status){ | |
836 | ||
837 | if(status == NULL || U_FAILURE(*status)){ | |
838 | return -1; | |
839 | } | |
840 | ||
841 | UChar b1Stack[MAX_IDN_BUFFER_SIZE], b2Stack[MAX_IDN_BUFFER_SIZE]; | |
842 | UChar *b1 = b1Stack, *b2 = b2Stack; | |
843 | int32_t b1Len, b2Len, b1Capacity = MAX_IDN_BUFFER_SIZE, b2Capacity = MAX_IDN_BUFFER_SIZE; | |
844 | int32_t result=-1; | |
845 | ||
846 | UParseError parseError; | |
847 | ||
848 | b1Len = uidna_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, status); | |
849 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
850 | // redo processing of string | |
851 | b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
852 | if(b1==NULL){ | |
853 | *status = U_MEMORY_ALLOCATION_ERROR; | |
854 | goto CLEANUP; | |
855 | } | |
856 | ||
857 | *status = U_ZERO_ERROR; // reset error | |
858 | ||
859 | b1Len = uidna_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, status); | |
860 | ||
861 | } | |
862 | ||
863 | b2Len = uidna_IDNToASCII(s2,length2, b2,b2Capacity, options, &parseError, status); | |
864 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
865 | // redo processing of string | |
866 | b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); | |
867 | if(b2==NULL){ | |
868 | *status = U_MEMORY_ALLOCATION_ERROR; | |
869 | goto CLEANUP; | |
870 | } | |
871 | ||
872 | *status = U_ZERO_ERROR; // reset error | |
873 | ||
874 | b2Len = uidna_IDNToASCII(s2, length2, b2, b2Len, options, &parseError, status); | |
875 | ||
876 | } | |
877 | // when toASCII is applied all label separators are replaced with FULL_STOP | |
878 | result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len); | |
879 | ||
880 | CLEANUP: | |
881 | if(b1 != b1Stack){ | |
882 | uprv_free(b1); | |
883 | } | |
884 | ||
885 | if(b2 != b2Stack){ | |
886 | uprv_free(b2); | |
887 | } | |
888 | ||
889 | return result; | |
890 | } | |
891 | ||
892 | #endif /* #if !UCONFIG_NO_IDNA */ |