]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/uidna.cpp
ICU-6.2.22.tar.gz
[apple/icu.git] / icuSources / common / uidna.cpp
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2003-2004, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: uidna.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2003feb1
14 * created by: Ram Viswanadha
15 */
16
17 #include "unicode/utypes.h"
18
19 #if !UCONFIG_NO_IDNA
20
21 #include "unicode/uidna.h"
22 #include "unicode/ustring.h"
23 #include "unicode/usprep.h"
24 #include "punycode.h"
25 #include "ustr_imp.h"
26 #include "cmemory.h"
27 #include "sprpimpl.h"
28
29 /* it is official IDNA ACE Prefix is "xn--" */
30 static const UChar ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ;
31 #define ACE_PREFIX_LENGTH 4
32
33 #define MAX_LABEL_LENGTH 63
34 #define HYPHEN 0x002D
35 /* The Max length of the labels should not be more than 64 */
36 #define MAX_LABEL_BUFFER_SIZE 100
37 #define MAX_IDN_BUFFER_SIZE 300
38
39 #define CAPITAL_A 0x0041
40 #define CAPITAL_Z 0x005A
41 #define LOWER_CASE_DELTA 0x0020
42 #define FULL_STOP 0x002E
43 #define DATA_FILE_NAME "uidna"
44
45 inline static UChar
46 toASCIILower(UChar ch){
47 if(CAPITAL_A <= ch && ch <= CAPITAL_Z){
48 return ch + LOWER_CASE_DELTA;
49 }
50 return ch;
51 }
52
53 inline static UBool
54 startsWithPrefix(const UChar* src , int32_t srcLength){
55 UBool startsWithPrefix = TRUE;
56
57 if(srcLength < ACE_PREFIX_LENGTH){
58 return FALSE;
59 }
60
61 for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){
62 if(toASCIILower(src[i]) != ACE_PREFIX[i]){
63 startsWithPrefix = FALSE;
64 }
65 }
66 return startsWithPrefix;
67 }
68
69 inline static void
70 toASCIILower(UChar* src, int32_t srcLen){
71 for(int32_t i=0; i<srcLen; i++){
72 src[i] = toASCIILower(src[i]);
73 }
74 }
75
76 inline static int32_t
77 compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len,
78 const UChar* s2, int32_t s2Len){
79
80 int32_t minLength;
81 int32_t lengthResult;
82
83 // are we comparing different lengths?
84 if(s1Len != s2Len) {
85 if(s1Len < s2Len) {
86 minLength = s1Len;
87 lengthResult = -1;
88 } else {
89 minLength = s2Len;
90 lengthResult = 1;
91 }
92 } else {
93 // ok the lengths are equal
94 minLength = s1Len;
95 lengthResult = 0;
96 }
97
98 UChar c1,c2;
99 int32_t rc;
100
101 for(int32_t i =0;/* no condition */;i++) {
102
103 /* If we reach the ends of both strings then they match */
104 if(i == minLength) {
105 return lengthResult;
106 }
107
108 c1 = s1[i];
109 c2 = s2[i];
110
111 /* Case-insensitive comparison */
112 if(c1!=c2) {
113 rc=(int32_t)toASCIILower(c1)-(int32_t)toASCIILower(c2);
114 if(rc!=0) {
115 lengthResult=rc;
116 break;
117 }
118 }
119 }
120 return lengthResult;
121 }
122
123
124 /**
125 * Ascertain if the given code point is a label separator as
126 * defined by the IDNA RFC
127 *
128 * @param ch The code point to be ascertained
129 * @return true if the char is a label separator
130 * @draft ICU 2.8
131 */
132 static inline UBool isLabelSeparator(UChar ch){
133 switch(ch){
134 case 0x002e:
135 case 0x3002:
136 case 0xFF0E:
137 case 0xFF61:
138 return TRUE;
139 default:
140 return FALSE;
141 }
142 }
143
144 // returns the length of the label excluding the separator
145 // if *limit == separator then the length returned does not include
146 // the separtor.
147 static inline int32_t
148 getNextSeparator(UChar *src,int32_t srcLength,
149 UChar **limit,
150 UBool *done){
151 if(srcLength == -1){
152 int32_t i;
153 for(i=0 ; ;i++){
154 if(src[i] == 0){
155 *limit = src + i; // point to null
156 *done = TRUE;
157 return i;
158 }
159 if(isLabelSeparator(src[i])){
160 *limit = src + (i+1); // go past the delimiter
161 return i;
162
163 }
164 }
165 }else{
166 int32_t i;
167 for(i=0;i<srcLength;i++){
168 if(isLabelSeparator(src[i])){
169 *limit = src + (i+1); // go past the delimiter
170 return i;
171 }
172 }
173 // we have not found the delimiter
174 // if(i==srcLength)
175 *limit = src+srcLength;
176 *done = TRUE;
177
178 return i;
179 }
180 }
181 static inline UBool isLDHChar(UChar ch){
182 // high runner case
183 if(ch>0x007A){
184 return FALSE;
185 }
186 //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
187 if( (ch==0x002D) ||
188 (0x0030 <= ch && ch <= 0x0039) ||
189 (0x0041 <= ch && ch <= 0x005A) ||
190 (0x0061 <= ch && ch <= 0x007A)
191 ){
192 return TRUE;
193 }
194 return FALSE;
195 }
196
197 static int32_t
198 _internal_toASCII(const UChar* src, int32_t srcLength,
199 UChar* dest, int32_t destCapacity,
200 int32_t options,
201 UStringPrepProfile* nameprep,
202 UParseError* parseError,
203 UErrorCode* status){
204
205 UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE];
206 //initialize pointers to stack buffers
207 UChar *b1 = b1Stack, *b2 = b2Stack;
208 int32_t b1Len, b2Len,
209 b1Capacity = MAX_LABEL_BUFFER_SIZE,
210 b2Capacity = MAX_LABEL_BUFFER_SIZE ,
211 reqLength=0;
212
213 int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0;
214 UBool* caseFlags = NULL;
215
216 // the source contains all ascii codepoints
217 UBool srcIsASCII = TRUE;
218 // assume the source contains all LDH codepoints
219 UBool srcIsLDH = TRUE;
220
221 int32_t j=0;
222
223 //get the options
224 UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0);
225
226 int32_t failPos = -1;
227
228 // step 2
229 b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status);
230
231 if(*status == U_BUFFER_OVERFLOW_ERROR){
232 // redo processing of string
233 // we do not have enough room so grow the buffer
234 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
235 if(b1==NULL){
236 *status = U_MEMORY_ALLOCATION_ERROR;
237 goto CLEANUP;
238 }
239
240 *status = U_ZERO_ERROR; // reset error
241
242 b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status);
243 }
244 // error bail out
245 if(U_FAILURE(*status)){
246 goto CLEANUP;
247 }
248
249 // step 3 & 4
250 for( j=0;j<b1Len;j++){
251 if(b1[j] > 0x7F){
252 srcIsASCII = FALSE;
253 }else if(isLDHChar(b1[j])==FALSE){ // if the char is in ASCII range verify that it is an LDH character
254 srcIsLDH = FALSE;
255 failPos = j;
256 }
257 }
258
259 if(useSTD3ASCIIRules == TRUE){
260 // verify 3a and 3b
261 // 3(a) Verify the absence of non-LDH ASCII code points; that is, the
262 // absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F.
263 // 3(b) Verify the absence of leading and trailing hyphen-minus; that
264 // is, the absence of U+002D at the beginning and end of the
265 // sequence.
266 if( srcIsLDH == FALSE /* source at this point should not contain anyLDH characters */
267 || b1[0] == HYPHEN || b1[b1Len-1] == HYPHEN){
268 *status = U_IDNA_STD3_ASCII_RULES_ERROR;
269
270 /* populate the parseError struct */
271 if(srcIsLDH==FALSE){
272 // failPos is always set the index of failure
273 uprv_syntaxError(b1,failPos, b1Len,parseError);
274 }else if(b1[0] == HYPHEN){
275 // fail position is 0
276 uprv_syntaxError(b1,0,b1Len,parseError);
277 }else{
278 // the last index in the source is always length-1
279 uprv_syntaxError(b1, (b1Len>0) ? b1Len-1 : b1Len, b1Len,parseError);
280 }
281
282 goto CLEANUP;
283 }
284 }
285 if(srcIsASCII){
286 if(b1Len <= destCapacity){
287 uprv_memmove(dest, b1, b1Len * U_SIZEOF_UCHAR);
288 reqLength = b1Len;
289 }else{
290 reqLength = b1Len;
291 goto CLEANUP;
292 }
293 }else{
294 // step 5 : verify the sequence does not begin with ACE prefix
295 if(!startsWithPrefix(b1,b1Len)){
296
297 //step 6: encode the sequence with punycode
298
299 // do not preserve the case flags for now!
300 // TODO: Preserve the case while implementing the RFE
301 // caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool));
302 // uprv_memset(caseFlags,TRUE,b1Len);
303
304 b2Len = u_strToPunycode(b1,b1Len,b2,b2Capacity,caseFlags, status);
305
306 if(*status == U_BUFFER_OVERFLOW_ERROR){
307 // redo processing of string
308 /* we do not have enough room so grow the buffer*/
309 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
310 if(b2 == NULL){
311 *status = U_MEMORY_ALLOCATION_ERROR;
312 goto CLEANUP;
313 }
314
315 *status = U_ZERO_ERROR; // reset error
316
317 b2Len = u_strToPunycode(b1,b1Len,b2,b2Len,caseFlags, status);
318 }
319 //error bail out
320 if(U_FAILURE(*status)){
321 goto CLEANUP;
322 }
323 // TODO : Reconsider while implementing the case preserve RFE
324 // convert all codepoints to lower case ASCII
325 // toASCIILower(b2,b2Len);
326 reqLength = b2Len+ACE_PREFIX_LENGTH;
327
328 if(reqLength > destCapacity){
329 *status = U_BUFFER_OVERFLOW_ERROR;
330 goto CLEANUP;
331 }
332 //Step 7: prepend the ACE prefix
333 uprv_memcpy(dest,ACE_PREFIX,ACE_PREFIX_LENGTH * U_SIZEOF_UCHAR);
334 //Step 6: copy the contents in b2 into dest
335 uprv_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len * U_SIZEOF_UCHAR);
336
337 }else{
338 *status = U_IDNA_ACE_PREFIX_ERROR;
339 //position of failure is 0
340 uprv_syntaxError(b1,0,b1Len,parseError);
341 goto CLEANUP;
342 }
343 }
344
345 if(reqLength > MAX_LABEL_LENGTH){
346 *status = U_IDNA_LABEL_TOO_LONG_ERROR;
347 }
348
349 CLEANUP:
350 if(b1 != b1Stack){
351 uprv_free(b1);
352 }
353 if(b2 != b2Stack){
354 uprv_free(b2);
355 }
356 uprv_free(caseFlags);
357
358 return u_terminateUChars(dest, destCapacity, reqLength, status);
359 }
360
361 static int32_t
362 _internal_toUnicode(const UChar* src, int32_t srcLength,
363 UChar* dest, int32_t destCapacity,
364 int32_t options,
365 UStringPrepProfile* nameprep,
366 UParseError* parseError,
367 UErrorCode* status){
368
369 //get the options
370 UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0);
371 int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0;
372
373 UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE];
374
375 //initialize pointers to stack buffers
376 UChar *b1 = b1Stack, *b2 = b2Stack, *b1Prime=NULL, *b3=b3Stack;
377 int32_t b1Len, b2Len, b1PrimeLen, b3Len,
378 b1Capacity = MAX_LABEL_BUFFER_SIZE,
379 b2Capacity = MAX_LABEL_BUFFER_SIZE,
380 b3Capacity = MAX_LABEL_BUFFER_SIZE,
381 reqLength=0;
382
383 b1Len = 0;
384 UBool* caseFlags = NULL;
385
386 UBool srcIsASCII = TRUE;
387 UBool srcIsLDH = TRUE;
388 int32_t failPos =0;
389
390 // step 1: find out if all the codepoints in src are ASCII
391 if(srcLength==-1){
392 srcLength = 0;
393 for(;src[srcLength]!=0;){
394 if(src[srcLength]> 0x7f){
395 srcIsASCII = FALSE;
396 }else if(isLDHChar(src[srcLength])==FALSE){
397 // here we do not assemble surrogates
398 // since we know that LDH code points
399 // are in the ASCII range only
400 srcIsLDH = FALSE;
401 failPos = srcLength;
402 }
403 srcLength++;
404 }
405 }else if(srcLength > 0){
406 for(int32_t j=0; j<srcLength; j++){
407 if(src[j]> 0x7f){
408 srcIsASCII = FALSE;
409 }else if(isLDHChar(src[j])==FALSE){
410 // here we do not assemble surrogates
411 // since we know that LDH code points
412 // are in the ASCII range only
413 srcIsLDH = FALSE;
414 failPos = j;
415 }
416 }
417 }else{
418 return 0;
419 }
420
421 if(srcIsASCII == FALSE){
422 // step 2: process the string
423 b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status);
424 if(*status == U_BUFFER_OVERFLOW_ERROR){
425 // redo processing of string
426 /* we do not have enough room so grow the buffer*/
427 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
428 if(b1==NULL){
429 *status = U_MEMORY_ALLOCATION_ERROR;
430 goto CLEANUP;
431 }
432
433 *status = U_ZERO_ERROR; // reset error
434
435 b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status);
436 }
437 //bail out on error
438 if(U_FAILURE(*status)){
439 goto CLEANUP;
440 }
441 }else{
442
443 //just point src to b1
444 b1 = (UChar*) src;
445 b1Len = srcLength;
446 }
447
448 //step 3: verify ACE Prefix
449 if(startsWithPrefix(src,srcLength)){
450
451 //step 4: Remove the ACE Prefix
452 b1Prime = b1 + ACE_PREFIX_LENGTH;
453 b1PrimeLen = b1Len - ACE_PREFIX_LENGTH;
454
455 //step 5: Decode using punycode
456 b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Capacity, caseFlags,status);
457
458 if(*status == U_BUFFER_OVERFLOW_ERROR){
459 // redo processing of string
460 /* we do not have enough room so grow the buffer*/
461 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
462 if(b2==NULL){
463 *status = U_MEMORY_ALLOCATION_ERROR;
464 goto CLEANUP;
465 }
466
467 *status = U_ZERO_ERROR; // reset error
468
469 b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Len, caseFlags, status);
470
471 }
472
473
474 //step 6:Apply toASCII
475 b3Len = uidna_toASCII(b2, b2Len, b3, b3Capacity,options,parseError, status);
476
477 if(*status == U_BUFFER_OVERFLOW_ERROR){
478 // redo processing of string
479 /* we do not have enough room so grow the buffer*/
480 b3 = (UChar*) uprv_malloc(b3Len * U_SIZEOF_UCHAR);
481 if(b3==NULL){
482 *status = U_MEMORY_ALLOCATION_ERROR;
483 goto CLEANUP;
484 }
485
486 *status = U_ZERO_ERROR; // reset error
487
488 b3Len = uidna_toASCII(b2,b2Len,b3,b3Len,options,parseError, status);
489
490 }
491 //bail out on error
492 if(U_FAILURE(*status)){
493 goto CLEANUP;
494 }
495
496 //step 7: verify
497 if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){
498 *status = U_IDNA_VERIFICATION_ERROR;
499 goto CLEANUP;
500 }
501
502 //step 8: return output of step 5
503 reqLength = b2Len;
504 if(b2Len <= destCapacity) {
505 uprv_memmove(dest, b2, b2Len * U_SIZEOF_UCHAR);
506 }
507 }else{
508 // verify that STD3 ASCII rules are satisfied
509 if(useSTD3ASCIIRules == TRUE){
510 if( srcIsLDH == FALSE /* source contains some non-LDH characters */
511 || src[0] == HYPHEN || src[srcLength-1] == HYPHEN){
512 *status = U_IDNA_STD3_ASCII_RULES_ERROR;
513
514 /* populate the parseError struct */
515 if(srcIsLDH==FALSE){
516 // failPos is always set the index of failure
517 uprv_syntaxError(src,failPos, srcLength,parseError);
518 }else if(src[0] == HYPHEN){
519 // fail position is 0
520 uprv_syntaxError(src,0,srcLength,parseError);
521 }else{
522 // the last index in the source is always length-1
523 uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError);
524 }
525
526 goto CLEANUP;
527 }
528 }
529 //copy the source to destination
530 if(srcLength <= destCapacity){
531 uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR);
532 }
533 reqLength = srcLength;
534 }
535
536 CLEANUP:
537
538 if(b1 != b1Stack && b1!=src){
539 uprv_free(b1);
540 }
541 if(b2 != b2Stack){
542 uprv_free(b2);
543 }
544 uprv_free(caseFlags);
545
546
547 // The RFC states that
548 // <quote>
549 // ToUnicode never fails. If any step fails, then the original input
550 // is returned immediately in that step.
551 // </quote>
552 // So if any step fails lets copy source to destination
553 if(U_FAILURE(*status)){
554 //copy the source to destination
555 if(dest && srcLength <= destCapacity){
556 if(srcLength == -1) {
557 uprv_memmove(dest,src,u_strlen(src)* U_SIZEOF_UCHAR);
558 } else {
559 uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR);
560 }
561 }
562 reqLength = srcLength;
563 }
564
565 return u_terminateUChars(dest, destCapacity, reqLength, status);
566 }
567
568 U_CAPI int32_t U_EXPORT2
569 uidna_toASCII(const UChar* src, int32_t srcLength,
570 UChar* dest, int32_t destCapacity,
571 int32_t options,
572 UParseError* parseError,
573 UErrorCode* status){
574
575 if(status == NULL || U_FAILURE(*status)){
576 return 0;
577 }
578 if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
579 *status = U_ILLEGAL_ARGUMENT_ERROR;
580 return 0;
581 }
582
583 UStringPrepProfile* nameprep = usprep_open(NULL,DATA_FILE_NAME, status);
584
585 if(U_FAILURE(*status)){
586 return -1;
587 }
588
589 int32_t retLen = _internal_toASCII(src, srcLength, dest, destCapacity, options, nameprep, parseError, status);
590
591 /* close the profile*/
592 usprep_close(nameprep);
593
594 return retLen;
595 }
596
597 U_CAPI int32_t U_EXPORT2
598 uidna_toUnicode(const UChar* src, int32_t srcLength,
599 UChar* dest, int32_t destCapacity,
600 int32_t options,
601 UParseError* parseError,
602 UErrorCode* status){
603
604 if(status == NULL || U_FAILURE(*status)){
605 return 0;
606 }
607 if( (src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
608 *status = U_ILLEGAL_ARGUMENT_ERROR;
609 return 0;
610 }
611
612 UStringPrepProfile* nameprep = usprep_open(NULL, DATA_FILE_NAME, status);
613
614 if(U_FAILURE(*status)){
615 return -1;
616 }
617
618 int32_t retLen = _internal_toUnicode(src, srcLength, dest, destCapacity, options, nameprep, parseError, status);
619
620 usprep_close(nameprep);
621
622 return retLen;
623 }
624
625
626 U_CAPI int32_t U_EXPORT2
627 uidna_IDNToASCII( const UChar *src, int32_t srcLength,
628 UChar* dest, int32_t destCapacity,
629 int32_t options,
630 UParseError *parseError,
631 UErrorCode *status){
632
633 if(status == NULL || U_FAILURE(*status)){
634 return 0;
635 }
636 if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
637 *status = U_ILLEGAL_ARGUMENT_ERROR;
638 return 0;
639 }
640
641 int32_t reqLength = 0;
642
643 UStringPrepProfile* nameprep = usprep_open(NULL, DATA_FILE_NAME, status);
644
645 if(U_FAILURE(*status)){
646 return 0;
647 }
648
649 //initialize pointers
650 UChar *delimiter = (UChar*)src;
651 UChar *labelStart = (UChar*)src;
652 UChar *currentDest = (UChar*) dest;
653 int32_t remainingLen = srcLength;
654 int32_t remainingDestCapacity = destCapacity;
655 int32_t labelLen = 0, labelReqLength = 0;
656 UBool done = FALSE;
657
658
659 for(;;){
660
661 labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done);
662
663 labelReqLength = _internal_toASCII( labelStart, labelLen,
664 currentDest, remainingDestCapacity,
665 options, nameprep,
666 parseError, status);
667
668 if(*status == U_BUFFER_OVERFLOW_ERROR){
669
670 *status = U_ZERO_ERROR; // reset error
671 remainingDestCapacity = 0;
672 }
673
674
675 if(U_FAILURE(*status)){
676 break;
677 }
678
679 reqLength +=labelReqLength;
680 // adjust the destination pointer
681 if(labelReqLength < remainingDestCapacity){
682 currentDest = currentDest + labelReqLength;
683 remainingDestCapacity -= labelReqLength;
684 }else{
685 // should never occur
686 remainingDestCapacity = 0;
687 }
688 if(done == TRUE){
689 break;
690 }
691
692 // add the label separator
693 if(remainingDestCapacity > 0){
694 *currentDest++ = FULL_STOP;
695 remainingDestCapacity--;
696 }
697 reqLength++;
698
699 labelStart = delimiter;
700 if(remainingLen >0 ){
701 remainingLen = srcLength - (delimiter - src);
702 }
703
704 }
705
706 usprep_close(nameprep);
707
708 return u_terminateUChars(dest, destCapacity, reqLength, status);
709 }
710
711 U_CAPI int32_t U_EXPORT2
712 uidna_IDNToUnicode( const UChar* src, int32_t srcLength,
713 UChar* dest, int32_t destCapacity,
714 int32_t options,
715 UParseError* parseError,
716 UErrorCode* status){
717
718 if(status == NULL || U_FAILURE(*status)){
719 return 0;
720 }
721 if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
722 *status = U_ILLEGAL_ARGUMENT_ERROR;
723 return 0;
724 }
725
726 int32_t reqLength = 0;
727
728 UStringPrepProfile* nameprep = usprep_open(NULL, DATA_FILE_NAME, status);
729
730 if(U_FAILURE(*status)){
731 return 0;
732 }
733
734 //initialize pointers
735 UChar *delimiter = (UChar*)src;
736 UChar *labelStart = (UChar*)src;
737 UChar *currentDest = (UChar*) dest;
738 int32_t remainingLen = srcLength;
739 int32_t remainingDestCapacity = destCapacity;
740 int32_t labelLen = 0, labelReqLength = 0;
741 UBool done = FALSE;
742
743
744 for(;;){
745
746 labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done);
747
748 labelReqLength = _internal_toUnicode(labelStart, labelLen,
749 currentDest, remainingDestCapacity,
750 options, nameprep,
751 parseError, status);
752
753 if(*status == U_BUFFER_OVERFLOW_ERROR){
754
755 *status = U_ZERO_ERROR; // reset error
756 remainingDestCapacity = 0;
757 }
758
759
760 if(U_FAILURE(*status)){
761 break;
762 }
763
764 reqLength +=labelReqLength;
765 // adjust the destination pointer
766 if(labelReqLength < remainingDestCapacity){
767 currentDest = currentDest + labelReqLength;
768 remainingDestCapacity -= labelReqLength;
769 }else{
770 // should never occur
771 remainingDestCapacity = 0;
772 }
773
774 if(done == TRUE){
775 break;
776 }
777
778 // add the label separator
779 if(remainingDestCapacity > 0){
780 *currentDest++ = FULL_STOP;
781 remainingDestCapacity--;
782 }
783 reqLength++;
784
785 labelStart = delimiter;
786 if(remainingLen >0 ){
787 remainingLen = srcLength - (delimiter - src);
788 }
789
790 }
791
792 usprep_close(nameprep);
793
794 return u_terminateUChars(dest, destCapacity, reqLength, status);
795 }
796
797 U_CAPI int32_t U_EXPORT2
798 uidna_compare( const UChar *s1, int32_t length1,
799 const UChar *s2, int32_t length2,
800 int32_t options,
801 UErrorCode* status){
802
803 if(status == NULL || U_FAILURE(*status)){
804 return -1;
805 }
806
807 UChar b1Stack[MAX_IDN_BUFFER_SIZE], b2Stack[MAX_IDN_BUFFER_SIZE];
808 UChar *b1 = b1Stack, *b2 = b2Stack;
809 int32_t b1Len, b2Len, b1Capacity = MAX_IDN_BUFFER_SIZE, b2Capacity = MAX_IDN_BUFFER_SIZE;
810 int32_t result=-1;
811
812 UParseError parseError;
813
814 b1Len = uidna_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, status);
815 if(*status == U_BUFFER_OVERFLOW_ERROR){
816 // redo processing of string
817 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
818 if(b1==NULL){
819 *status = U_MEMORY_ALLOCATION_ERROR;
820 goto CLEANUP;
821 }
822
823 *status = U_ZERO_ERROR; // reset error
824
825 b1Len = uidna_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, status);
826
827 }
828
829 b2Len = uidna_IDNToASCII(s2,length2, b2,b2Capacity, options, &parseError, status);
830 if(*status == U_BUFFER_OVERFLOW_ERROR){
831 // redo processing of string
832 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
833 if(b2==NULL){
834 *status = U_MEMORY_ALLOCATION_ERROR;
835 goto CLEANUP;
836 }
837
838 *status = U_ZERO_ERROR; // reset error
839
840 b2Len = uidna_IDNToASCII(s2, length2, b2, b2Len, options, &parseError, status);
841
842 }
843 // when toASCII is applied all label separators are replaced with FULL_STOP
844 result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len);
845
846 CLEANUP:
847 if(b1 != b1Stack){
848 uprv_free(b1);
849 }
850
851 if(b2 != b2Stack){
852 uprv_free(b2);
853 }
854
855 return result;
856 }
857
858 #endif /* #if !UCONFIG_NO_IDNA */