]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/intltest/idnaref.cpp
ICU-6.2.22.tar.gz
[apple/icu.git] / icuSources / test / intltest / idnaref.cpp
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2003-2004, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: idnaref.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2003feb1
14 * created by: Ram Viswanadha
15 */
16
17 #include "unicode/utypes.h"
18
19 #if !UCONFIG_NO_IDNA && !UCONFIG_NO_TRANSLITERATION
20 #include "idnaref.h"
21 #include "punyref.h"
22 #include "ustr_imp.h"
23 #include "cmemory.h"
24 #include "sprpimpl.h"
25 #include "nptrans.h"
26 #include "testidna.h"
27 #include "punycode.h"
28 #include "unicode/ustring.h"
29
30 /* it is official IDNA ACE Prefix is "xn--" */
31 static const UChar ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ;
32 #define ACE_PREFIX_LENGTH 4
33
34 #define MAX_LABEL_LENGTH 63
35 #define HYPHEN 0x002D
36 /* The Max length of the labels should not be more than 64 */
37 #define MAX_LABEL_BUFFER_SIZE 100
38 #define MAX_IDN_BUFFER_SIZE 300
39
40 #define CAPITAL_A 0x0041
41 #define CAPITAL_Z 0x005A
42 #define LOWER_CASE_DELTA 0x0020
43 #define FULL_STOP 0x002E
44
45
46 inline static UBool
47 startsWithPrefix(const UChar* src , int32_t srcLength){
48 UBool startsWithPrefix = TRUE;
49
50 if(srcLength < ACE_PREFIX_LENGTH){
51 return FALSE;
52 }
53
54 for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){
55 if(u_tolower(src[i]) != ACE_PREFIX[i]){
56 startsWithPrefix = FALSE;
57 }
58 }
59 return startsWithPrefix;
60 }
61
62 inline static UChar
63 toASCIILower(UChar ch){
64 if(CAPITAL_A <= ch && ch <= CAPITAL_Z){
65 return ch + LOWER_CASE_DELTA;
66 }
67 return ch;
68 }
69
70 inline static int32_t
71 compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len,
72 const UChar* s2, int32_t s2Len){
73 if(s1Len != s2Len){
74 return (s1Len > s2Len) ? s1Len : s2Len;
75 }
76 UChar c1,c2;
77 int32_t rc;
78
79 for(int32_t i =0;/* no condition */;i++) {
80 /* If we reach the ends of both strings then they match */
81 if(i == s1Len) {
82 return 0;
83 }
84
85 c1 = s1[i];
86 c2 = s2[i];
87
88 /* Case-insensitive comparison */
89 if(c1!=c2) {
90 rc=(int32_t)toASCIILower(c1)-(int32_t)toASCIILower(c2);
91 if(rc!=0) {
92 return rc;
93 }
94 }
95 }
96
97 }
98
99 static UErrorCode getError(enum punycode_status status){
100 switch(status){
101 case punycode_success:
102 return U_ZERO_ERROR;
103 case punycode_bad_input: /* Input is invalid. */
104 return U_INVALID_CHAR_FOUND;
105 case punycode_big_output: /* Output would exceed the space provided. */
106 return U_BUFFER_OVERFLOW_ERROR;
107 case punycode_overflow : /* Input requires wider integers to process. */
108 return U_INDEX_OUTOFBOUNDS_ERROR;
109 default:
110 return U_INTERNAL_PROGRAM_ERROR;
111 }
112 }
113
114 static inline int32_t convertASCIIToUChars(const char* src,UChar* dest, int32_t length){
115 int i;
116 for(i=0;i<length;i++){
117 dest[i] = src[i];
118 }
119 return i;
120 }
121 static inline int32_t convertUCharsToASCII(const UChar* src,char* dest, int32_t length){
122 int i;
123 for(i=0;i<length;i++){
124 dest[i] = (char)src[i];
125 }
126 return i;
127 }
128 // wrapper around the reference Punycode implementation
129 static int32_t convertToPuny(const UChar* src, int32_t srcLength,
130 UChar* dest, int32_t destCapacity,
131 UErrorCode& status){
132 uint32_t b1Stack[MAX_LABEL_BUFFER_SIZE];
133 int32_t b1Len = 0, b1Capacity = MAX_LABEL_BUFFER_SIZE;
134 uint32_t* b1 = b1Stack;
135 char b2Stack[MAX_LABEL_BUFFER_SIZE];
136 char* b2 = b2Stack;
137 int32_t b2Len =MAX_LABEL_BUFFER_SIZE ;
138 punycode_status error;
139 unsigned char* caseFlags = NULL;
140
141 u_strToUTF32((UChar32*)b1,b1Capacity,&b1Len,src,srcLength,&status);
142 if(status == U_BUFFER_OVERFLOW_ERROR){
143 // redo processing of string
144 /* we do not have enough room so grow the buffer*/
145 b1 = (uint32_t*) uprv_malloc(b1Len * sizeof(uint32_t));
146 if(b1==NULL){
147 status = U_MEMORY_ALLOCATION_ERROR;
148 goto CLEANUP;
149 }
150
151 status = U_ZERO_ERROR; // reset error
152
153 u_strToUTF32((UChar32*)b1,b1Len,&b1Len,src,srcLength,&status);
154 }
155 if(U_FAILURE(status)){
156 goto CLEANUP;
157 }
158
159 //caseFlags = (unsigned char*) uprv_malloc(b1Len *sizeof(unsigned char));
160
161 error = punycode_encode(b1Len,b1,caseFlags, (uint32_t*)&b2Len, b2);
162 status = getError(error);
163
164 if(status == U_BUFFER_OVERFLOW_ERROR){
165 /* we do not have enough room so grow the buffer*/
166 b2 = (char*) uprv_malloc( b2Len * sizeof(char));
167 if(b2==NULL){
168 status = U_MEMORY_ALLOCATION_ERROR;
169 goto CLEANUP;
170 }
171
172 status = U_ZERO_ERROR; // reset error
173
174 punycode_status error = punycode_encode(b1Len,b1,caseFlags, (uint32_t*)&b2Len, b2);
175 status = getError(error);
176 }
177 if(U_FAILURE(status)){
178 goto CLEANUP;
179 }
180
181 if(b2Len < destCapacity){
182 convertASCIIToUChars(b2,dest,b2Len);
183 }else{
184 status =U_BUFFER_OVERFLOW_ERROR;
185 }
186
187 CLEANUP:
188 if(b1Stack != b1){
189 uprv_free(b1);
190 }
191 if(b2Stack != b2){
192 uprv_free(b2);
193 }
194 uprv_free(caseFlags);
195
196 return b2Len;
197 }
198
199 static int32_t convertFromPuny( const UChar* src, int32_t srcLength,
200 UChar* dest, int32_t destCapacity,
201 UErrorCode& status){
202 char b1Stack[MAX_LABEL_BUFFER_SIZE];
203 char* b1 = b1Stack;
204 int32_t destLen =0;
205
206 convertUCharsToASCII(src, b1,srcLength);
207
208 uint32_t b2Stack[MAX_LABEL_BUFFER_SIZE];
209 uint32_t* b2 = b2Stack;
210 int32_t b2Len =MAX_LABEL_BUFFER_SIZE;
211 unsigned char* caseFlags = NULL; //(unsigned char*) uprv_malloc(srcLength * sizeof(unsigned char*));
212 punycode_status error = punycode_decode(srcLength,b1,(uint32_t*)&b2Len,b2,caseFlags);
213 status = getError(error);
214 if(status == U_BUFFER_OVERFLOW_ERROR){
215 b2 = (uint32_t*) uprv_malloc(b2Len * sizeof(uint32_t));
216 if(b2 == NULL){
217 status = U_MEMORY_ALLOCATION_ERROR;
218 goto CLEANUP;
219 }
220 error = punycode_decode(srcLength,b1,(uint32_t*)&b2Len,b2,caseFlags);
221 status = getError(error);
222 }
223
224 if(U_FAILURE(status)){
225 goto CLEANUP;
226 }
227
228 u_strFromUTF32(dest,destCapacity,&destLen,(UChar32*)b2,b2Len,&status);
229
230 CLEANUP:
231 if(b1Stack != b1){
232 uprv_free(b1);
233 }
234 if(b2Stack != b2){
235 uprv_free(b2);
236 }
237 uprv_free(caseFlags);
238
239 return destLen;
240 }
241
242
243
244 U_CFUNC int32_t U_EXPORT2
245 idnaref_toASCII(const UChar* src, int32_t srcLength,
246 UChar* dest, int32_t destCapacity,
247 int32_t options,
248 UParseError* parseError,
249 UErrorCode* status){
250
251 if(status == NULL || U_FAILURE(*status)){
252 return 0;
253 }
254 if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
255 *status = U_ILLEGAL_ARGUMENT_ERROR;
256 return 0;
257 }
258 UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE];
259 //initialize pointers to stack buffers
260 UChar *b1 = b1Stack, *b2 = b2Stack;
261 int32_t b1Len, b2Len,
262 b1Capacity = MAX_LABEL_BUFFER_SIZE,
263 b2Capacity = MAX_LABEL_BUFFER_SIZE ,
264 reqLength=0;
265
266 //get the options
267 UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0);
268 UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0);
269
270 UBool* caseFlags = NULL;
271
272 // assume the source contains all ascii codepoints
273 UBool srcIsASCII = TRUE;
274 // assume the source contains all LDH codepoints
275 UBool srcIsLDH = TRUE;
276 int32_t j=0;
277 // UParseError parseError;
278 // step 2
279 NamePrepTransform* prep = TestIDNA::getInstance(*status);
280
281 if(U_FAILURE(*status)){
282 goto CLEANUP;
283 }
284
285 b1Len = prep->process(src,srcLength,b1, b1Capacity,allowUnassigned,parseError,*status);
286
287 if(*status == U_BUFFER_OVERFLOW_ERROR){
288 // redo processing of string
289 /* we do not have enough room so grow the buffer*/
290 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
291 if(b1==NULL){
292 *status = U_MEMORY_ALLOCATION_ERROR;
293 goto CLEANUP;
294 }
295
296 *status = U_ZERO_ERROR; // reset error
297
298 b1Len = prep->process(src,srcLength,b1, b1Len,allowUnassigned, parseError, *status);
299 }
300 // error bail out
301 if(U_FAILURE(*status)){
302 goto CLEANUP;
303 }
304
305 // step 3 & 4
306 for( j=0;j<b1Len;j++){
307 if(b1[j] > 0x7F){
308 srcIsASCII = FALSE;
309 }else if(prep->isLDHChar(b1[j])==FALSE){ // if the char is in ASCII range verify that it is an LDH character{
310 srcIsLDH = FALSE;
311 }
312 }
313
314 if(useSTD3ASCIIRules == TRUE){
315 // verify 3a and 3b
316 if( srcIsLDH == FALSE /* source contains some non-LDH characters */
317 || b1[0] == HYPHEN || b1[b1Len-1] == HYPHEN){
318 *status = U_IDNA_STD3_ASCII_RULES_ERROR;
319 goto CLEANUP;
320 }
321 }
322 if(srcIsASCII){
323 if(b1Len <= destCapacity){
324 uprv_memmove(dest, b1, b1Len * U_SIZEOF_UCHAR);
325 reqLength = b1Len;
326 }else{
327 reqLength = b1Len;
328 goto CLEANUP;
329 }
330 }else{
331 // step 5 : verify the sequence does not begin with ACE prefix
332 if(!startsWithPrefix(b1,b1Len)){
333
334 //step 6: encode the sequence with punycode
335 //caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool));
336
337 b2Len = convertToPuny(b1,b1Len, b2,b2Capacity,*status);
338 //b2Len = u_strToPunycode(b2,b2Capacity,b1,b1Len, caseFlags, status);
339 if(*status == U_BUFFER_OVERFLOW_ERROR){
340 // redo processing of string
341 /* we do not have enough room so grow the buffer*/
342 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
343 if(b2 == NULL){
344 *status = U_MEMORY_ALLOCATION_ERROR;
345 goto CLEANUP;
346 }
347
348 *status = U_ZERO_ERROR; // reset error
349
350 b2Len = convertToPuny(b1, b1Len, b2, b2Len, *status);
351 //b2Len = u_strToPunycode(b2,b2Len,b1,b1Len, caseFlags, status);
352
353 }
354 //error bail out
355 if(U_FAILURE(*status)){
356 goto CLEANUP;
357 }
358 reqLength = b2Len+ACE_PREFIX_LENGTH;
359
360 if(reqLength > destCapacity){
361 *status = U_BUFFER_OVERFLOW_ERROR;
362 goto CLEANUP;
363 }
364 //Step 7: prepend the ACE prefix
365 uprv_memcpy(dest,ACE_PREFIX,ACE_PREFIX_LENGTH * U_SIZEOF_UCHAR);
366 //Step 6: copy the contents in b2 into dest
367 uprv_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len * U_SIZEOF_UCHAR);
368
369 }else{
370 *status = U_IDNA_ACE_PREFIX_ERROR;
371 goto CLEANUP;
372 }
373 }
374
375 if(reqLength > MAX_LABEL_LENGTH){
376 *status = U_IDNA_LABEL_TOO_LONG_ERROR;
377 }
378
379 CLEANUP:
380 if(b1 != b1Stack){
381 uprv_free(b1);
382 }
383 if(b2 != b2Stack){
384 uprv_free(b2);
385 }
386 uprv_free(caseFlags);
387
388 // delete prep;
389
390 return u_terminateUChars(dest, destCapacity, reqLength, status);
391 }
392
393
394 U_CFUNC int32_t U_EXPORT2
395 idnaref_toUnicode(const UChar* src, int32_t srcLength,
396 UChar* dest, int32_t destCapacity,
397 int32_t options,
398 UParseError* parseError,
399 UErrorCode* status){
400
401 if(status == NULL || U_FAILURE(*status)){
402 return 0;
403 }
404 if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
405 *status = U_ILLEGAL_ARGUMENT_ERROR;
406 return 0;
407 }
408
409
410
411 UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE];
412
413 //initialize pointers to stack buffers
414 UChar *b1 = b1Stack, *b2 = b2Stack, *b1Prime=NULL, *b3=b3Stack;
415 int32_t b1Len, b2Len, b1PrimeLen, b3Len,
416 b1Capacity = MAX_LABEL_BUFFER_SIZE,
417 b2Capacity = MAX_LABEL_BUFFER_SIZE,
418 b3Capacity = MAX_LABEL_BUFFER_SIZE,
419 reqLength=0;
420 // UParseError parseError;
421
422 NamePrepTransform* prep = TestIDNA::getInstance(*status);
423 b1Len = 0;
424 UBool* caseFlags = NULL;
425
426 //get the options
427 UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0);
428 UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0);
429
430 UBool srcIsASCII = TRUE;
431 UBool srcIsLDH = TRUE;
432 int32_t failPos =0;
433
434 if(U_FAILURE(*status)){
435 goto CLEANUP;
436 }
437 // step 1: find out if all the codepoints in src are ASCII
438 if(srcLength==-1){
439 srcLength = 0;
440 for(;src[srcLength]!=0;){
441 if(src[srcLength]> 0x7f){
442 srcIsASCII = FALSE;
443 }if(prep->isLDHChar(src[srcLength])==FALSE){
444 // here we do not assemble surrogates
445 // since we know that LDH code points
446 // are in the ASCII range only
447 srcIsLDH = FALSE;
448 failPos = srcLength;
449 }
450 srcLength++;
451 }
452 }else{
453 for(int32_t j=0; j<srcLength; j++){
454 if(src[j]> 0x7f){
455 srcIsASCII = FALSE;
456 }else if(prep->isLDHChar(src[j])==FALSE){
457 // here we do not assemble surrogates
458 // since we know that LDH code points
459 // are in the ASCII range only
460 srcIsLDH = FALSE;
461 failPos = j;
462 }
463 }
464 }
465
466 if(srcIsASCII == FALSE){
467 // step 2: process the string
468 b1Len = prep->process(src,srcLength,b1,b1Capacity,allowUnassigned, parseError, *status);
469 if(*status == U_BUFFER_OVERFLOW_ERROR){
470 // redo processing of string
471 /* we do not have enough room so grow the buffer*/
472 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
473 if(b1==NULL){
474 *status = U_MEMORY_ALLOCATION_ERROR;
475 goto CLEANUP;
476 }
477
478 *status = U_ZERO_ERROR; // reset error
479
480 b1Len = prep->process(src,srcLength,b1, b1Len,allowUnassigned, parseError, *status);
481 }
482 //bail out on error
483 if(U_FAILURE(*status)){
484 goto CLEANUP;
485 }
486 }else{
487
488 // copy everything to b1
489 if(srcLength < b1Capacity){
490 uprv_memmove(b1,src, srcLength * U_SIZEOF_UCHAR);
491 }else{
492 /* we do not have enough room so grow the buffer*/
493 b1 = (UChar*) uprv_malloc(srcLength * U_SIZEOF_UCHAR);
494 if(b1==NULL){
495 *status = U_MEMORY_ALLOCATION_ERROR;
496 goto CLEANUP;
497 }
498 uprv_memmove(b1,src, srcLength * U_SIZEOF_UCHAR);
499 }
500 b1Len = srcLength;
501 }
502 //step 3: verify ACE Prefix
503 if(startsWithPrefix(src,srcLength)){
504
505 //step 4: Remove the ACE Prefix
506 b1Prime = b1 + ACE_PREFIX_LENGTH;
507 b1PrimeLen = b1Len - ACE_PREFIX_LENGTH;
508
509 //step 5: Decode using punycode
510 b2Len = convertFromPuny(b1Prime,b1PrimeLen, b2, b2Capacity, *status);
511 //b2Len = u_strFromPunycode(b2, b2Capacity,b1Prime,b1PrimeLen, caseFlags, status);
512
513 if(*status == U_BUFFER_OVERFLOW_ERROR){
514 // redo processing of string
515 /* we do not have enough room so grow the buffer*/
516 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
517 if(b2==NULL){
518 *status = U_MEMORY_ALLOCATION_ERROR;
519 goto CLEANUP;
520 }
521
522 *status = U_ZERO_ERROR; // reset error
523
524 b2Len = convertFromPuny(b1Prime,b1PrimeLen, b2, b2Len, *status);
525 //b2Len = u_strFromPunycode(b2, b2Len,b1Prime,b1PrimeLen,caseFlags, status);
526 }
527
528
529 //step 6:Apply toASCII
530 b3Len = idnaref_toASCII(b2,b2Len,b3,b3Capacity,options,parseError, status);
531
532 if(*status == U_BUFFER_OVERFLOW_ERROR){
533 // redo processing of string
534 /* we do not have enough room so grow the buffer*/
535 b3 = (UChar*) uprv_malloc(b3Len * U_SIZEOF_UCHAR);
536 if(b3==NULL){
537 *status = U_MEMORY_ALLOCATION_ERROR;
538 goto CLEANUP;
539 }
540
541 *status = U_ZERO_ERROR; // reset error
542
543 b3Len = idnaref_toASCII(b2,b2Len,b3,b3Len, options, parseError, status);
544
545 }
546 //bail out on error
547 if(U_FAILURE(*status)){
548 goto CLEANUP;
549 }
550
551 //step 7: verify
552 if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){
553 *status = U_IDNA_VERIFICATION_ERROR;
554 goto CLEANUP;
555 }
556
557 //step 8: return output of step 5
558 reqLength = b2Len;
559 if(b2Len <= destCapacity) {
560 uprv_memmove(dest, b2, b2Len * U_SIZEOF_UCHAR);
561 }
562 }else{
563 // verify that STD3 ASCII rules are satisfied
564 if(useSTD3ASCIIRules == TRUE){
565 if( srcIsLDH == FALSE /* source contains some non-LDH characters */
566 || src[0] == HYPHEN || src[srcLength-1] == HYPHEN){
567 *status = U_IDNA_STD3_ASCII_RULES_ERROR;
568
569 /* populate the parseError struct */
570 if(srcIsLDH==FALSE){
571 // failPos is always set the index of failure
572 uprv_syntaxError(src,failPos, srcLength,parseError);
573 }else if(src[0] == HYPHEN){
574 // fail position is 0
575 uprv_syntaxError(src,0,srcLength,parseError);
576 }else{
577 // the last index in the source is always length-1
578 uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError);
579 }
580
581 goto CLEANUP;
582 }
583 }
584 //copy the source to destination
585 if(srcLength <= destCapacity){
586 uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR);
587 }
588 reqLength = srcLength;
589 }
590
591 CLEANUP:
592
593 if(b1 != b1Stack){
594 uprv_free(b1);
595 }
596 if(b2 != b2Stack){
597 uprv_free(b2);
598 }
599 uprv_free(caseFlags);
600
601 // delete prep;
602
603 return u_terminateUChars(dest, destCapacity, reqLength, status);
604 }
605
606
607 static int32_t
608 getNextSeparator(UChar *src,int32_t srcLength,NamePrepTransform* prep,
609 UChar **limit,
610 UBool *done,
611 UErrorCode *status){
612 if(srcLength == -1){
613 int32_t i;
614 for(i=0 ; ;i++){
615 if(src[i] == 0){
616 *limit = src + i; // point to null
617 *done = TRUE;
618 return i;
619 }
620 if(prep->isLabelSeparator(src[i],*status)){
621 *limit = src + (i+1); // go past the delimiter
622 return i;
623
624 }
625 }
626 }else{
627 int32_t i;
628 for(i=0;i<srcLength;i++){
629 if(prep->isLabelSeparator(src[i],*status)){
630 *limit = src + (i+1); // go past the delimiter
631 return i;
632 }
633 }
634 // we have not found the delimiter
635 if(i==srcLength){
636 *limit = src+srcLength;
637 *done = TRUE;
638 }
639 return i;
640 }
641 }
642
643 U_CFUNC int32_t U_EXPORT2
644 idnaref_IDNToASCII( const UChar* src, int32_t srcLength,
645 UChar* dest, int32_t destCapacity,
646 int32_t options,
647 UParseError* parseError,
648 UErrorCode* status){
649
650 if(status == NULL || U_FAILURE(*status)){
651 return 0;
652 }
653 if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
654 *status = U_ILLEGAL_ARGUMENT_ERROR;
655 return 0;
656 }
657
658 int32_t reqLength = 0;
659 // UParseError parseError;
660
661 NamePrepTransform* prep = TestIDNA::getInstance(*status);
662
663 //initialize pointers to stack buffers
664 UChar b1Stack[MAX_LABEL_BUFFER_SIZE];
665 UChar *b1 = b1Stack;
666 int32_t b1Len, labelLen;
667 UChar* delimiter = (UChar*)src;
668 UChar* labelStart = (UChar*)src;
669 int32_t remainingLen = srcLength;
670 int32_t b1Capacity = MAX_LABEL_BUFFER_SIZE;
671
672 //get the options
673 // UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0);
674 // UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0);
675 UBool done = FALSE;
676
677 if(U_FAILURE(*status)){
678 goto CLEANUP;
679 }
680
681
682 if(srcLength == -1){
683 for(;;){
684
685 if(*delimiter == 0){
686 break;
687 }
688
689 labelLen = getNextSeparator(labelStart, -1, prep, &delimiter, &done, status);
690
691 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Capacity,
692 options, parseError, status);
693
694 if(*status == U_BUFFER_OVERFLOW_ERROR){
695 // redo processing of string
696 /* we do not have enough room so grow the buffer*/
697 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
698 if(b1==NULL){
699 *status = U_MEMORY_ALLOCATION_ERROR;
700 goto CLEANUP;
701 }
702
703 *status = U_ZERO_ERROR; // reset error
704
705 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Len,
706 options, parseError, status);
707
708 }
709
710 if(U_FAILURE(*status)){
711 goto CLEANUP;
712 }
713 int32_t tempLen = (reqLength + b1Len );
714 // copy to dest
715 if( tempLen< destCapacity){
716 uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR);
717 }
718
719 reqLength = tempLen;
720
721 // add the label separator
722 if(done == FALSE){
723 if(reqLength < destCapacity){
724 dest[reqLength] = FULL_STOP;
725 }
726 reqLength++;
727 }
728
729 labelStart = delimiter;
730 }
731 }else{
732 for(;;){
733
734 if(delimiter == src+srcLength){
735 break;
736 }
737
738 labelLen = getNextSeparator(labelStart, remainingLen, prep, &delimiter, &done, status);
739
740 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Capacity,
741 options,parseError, status);
742
743 if(*status == U_BUFFER_OVERFLOW_ERROR){
744 // redo processing of string
745 /* we do not have enough room so grow the buffer*/
746 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
747 if(b1==NULL){
748 *status = U_MEMORY_ALLOCATION_ERROR;
749 goto CLEANUP;
750 }
751
752 *status = U_ZERO_ERROR; // reset error
753
754 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Len,
755 options, parseError, status);
756
757 }
758
759 if(U_FAILURE(*status)){
760 goto CLEANUP;
761 }
762 int32_t tempLen = (reqLength + b1Len );
763 // copy to dest
764 if( tempLen< destCapacity){
765 uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR);
766 }
767
768 reqLength = tempLen;
769
770 // add the label separator
771 if(done == FALSE){
772 if(reqLength < destCapacity){
773 dest[reqLength] = FULL_STOP;
774 }
775 reqLength++;
776 }
777
778 labelStart = delimiter;
779 remainingLen = srcLength - (delimiter - src);
780 }
781 }
782
783
784 CLEANUP:
785
786 if(b1 != b1Stack){
787 uprv_free(b1);
788 }
789
790 // delete prep;
791
792 return u_terminateUChars(dest, destCapacity, reqLength, status);
793 }
794
795 U_CFUNC int32_t U_EXPORT2
796 idnaref_IDNToUnicode( const UChar* src, int32_t srcLength,
797 UChar* dest, int32_t destCapacity,
798 int32_t options,
799 UParseError* parseError,
800 UErrorCode* status){
801
802 if(status == NULL || U_FAILURE(*status)){
803 return 0;
804 }
805 if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
806 *status = U_ILLEGAL_ARGUMENT_ERROR;
807 return 0;
808 }
809
810 int32_t reqLength = 0;
811
812 UBool done = FALSE;
813
814 NamePrepTransform* prep = TestIDNA::getInstance(*status);
815
816 //initialize pointers to stack buffers
817 UChar b1Stack[MAX_LABEL_BUFFER_SIZE];
818 UChar *b1 = b1Stack;
819 int32_t b1Len, labelLen;
820 UChar* delimiter = (UChar*)src;
821 UChar* labelStart = (UChar*)src;
822 int32_t remainingLen = srcLength;
823 int32_t b1Capacity = MAX_LABEL_BUFFER_SIZE;
824
825 //get the options
826 // UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0);
827 // UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0);
828
829 if(U_FAILURE(*status)){
830 goto CLEANUP;
831 }
832
833 if(srcLength == -1){
834 for(;;){
835
836 if(*delimiter == 0){
837 break;
838 }
839
840 labelLen = getNextSeparator(labelStart, -1, prep, &delimiter, &done, status);
841
842 b1Len = idnaref_toUnicode(labelStart, labelLen, b1, b1Capacity,
843 options, parseError, status);
844
845 if(*status == U_BUFFER_OVERFLOW_ERROR){
846 // redo processing of string
847 /* we do not have enough room so grow the buffer*/
848 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
849 if(b1==NULL){
850 *status = U_MEMORY_ALLOCATION_ERROR;
851 goto CLEANUP;
852 }
853
854 *status = U_ZERO_ERROR; // reset error
855
856 b1Len = idnaref_toUnicode( labelStart, labelLen, b1, b1Len,
857 options, parseError, status);
858
859 }
860
861 if(U_FAILURE(*status)){
862 goto CLEANUP;
863 }
864 int32_t tempLen = (reqLength + b1Len );
865 // copy to dest
866 if( tempLen< destCapacity){
867 uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR);
868 }
869
870 reqLength = tempLen;
871 // add the label separator
872 if(done == FALSE){
873 if(reqLength < destCapacity){
874 dest[reqLength] = FULL_STOP;
875 }
876 reqLength++;
877 }
878
879 labelStart = delimiter;
880 }
881 }else{
882 for(;;){
883
884 if(delimiter == src+srcLength){
885 break;
886 }
887
888 labelLen = getNextSeparator(labelStart, remainingLen, prep, &delimiter, &done, status);
889
890 b1Len = idnaref_toUnicode( labelStart,labelLen, b1, b1Capacity,
891 options, parseError, status);
892
893 if(*status == U_BUFFER_OVERFLOW_ERROR){
894 // redo processing of string
895 /* we do not have enough room so grow the buffer*/
896 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
897 if(b1==NULL){
898 *status = U_MEMORY_ALLOCATION_ERROR;
899 goto CLEANUP;
900 }
901
902 *status = U_ZERO_ERROR; // reset error
903
904 b1Len = idnaref_toUnicode( labelStart, labelLen, b1, b1Len,
905 options, parseError, status);
906
907 }
908
909 if(U_FAILURE(*status)){
910 goto CLEANUP;
911 }
912 int32_t tempLen = (reqLength + b1Len );
913 // copy to dest
914 if( tempLen< destCapacity){
915 uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR);
916 }
917
918 reqLength = tempLen;
919
920 // add the label separator
921 if(done == FALSE){
922 if(reqLength < destCapacity){
923 dest[reqLength] = FULL_STOP;
924 }
925 reqLength++;
926 }
927
928 labelStart = delimiter;
929 remainingLen = srcLength - (delimiter - src);
930 }
931 }
932
933 CLEANUP:
934
935 if(b1 != b1Stack){
936 uprv_free(b1);
937 }
938
939 // delete prep;
940
941 return u_terminateUChars(dest, destCapacity, reqLength, status);
942 }
943
944 U_CFUNC int32_t U_EXPORT2
945 idnaref_compare( const UChar *s1, int32_t length1,
946 const UChar *s2, int32_t length2,
947 int32_t options,
948 UErrorCode* status){
949
950 if(status == NULL || U_FAILURE(*status)){
951 return -1;
952 }
953
954 UChar b1Stack[MAX_IDN_BUFFER_SIZE], b2Stack[MAX_IDN_BUFFER_SIZE];
955 UChar *b1 = b1Stack, *b2 = b2Stack;
956 int32_t b1Len, b2Len, b1Capacity = MAX_IDN_BUFFER_SIZE, b2Capacity = MAX_IDN_BUFFER_SIZE;
957 int32_t result = -1;
958
959 UParseError parseError;
960
961 b1Len = idnaref_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, status);
962 if(*status == U_BUFFER_OVERFLOW_ERROR){
963 // redo processing of string
964 /* we do not have enough room so grow the buffer*/
965 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
966 if(b1==NULL){
967 *status = U_MEMORY_ALLOCATION_ERROR;
968 goto CLEANUP;
969 }
970
971 *status = U_ZERO_ERROR; // reset error
972
973 b1Len = idnaref_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, status);
974
975 }
976
977 b2Len = idnaref_IDNToASCII(s2,length2,b2,b2Capacity,options, &parseError, status);
978 if(*status == U_BUFFER_OVERFLOW_ERROR){
979 // redo processing of string
980 /* we do not have enough room so grow the buffer*/
981 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
982 if(b2==NULL){
983 *status = U_MEMORY_ALLOCATION_ERROR;
984 goto CLEANUP;
985 }
986
987 *status = U_ZERO_ERROR; // reset error
988
989 b2Len = idnaref_IDNToASCII(s2,length2,b2,b2Len,options, &parseError, status);
990
991 }
992 // when toASCII is applied all label separators are replaced with FULL_STOP
993 result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len);
994
995 CLEANUP:
996 if(b1 != b1Stack){
997 uprv_free(b1);
998 }
999
1000 if(b2 != b2Stack){
1001 uprv_free(b2);
1002 }
1003
1004 return result;
1005 }
1006 #endif /* #if !UCONFIG_NO_IDNA */