]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/intltest/idnaref.cpp
ICU-8.11.4.tar.gz
[apple/icu.git] / icuSources / test / intltest / idnaref.cpp
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2003-2007, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: idnaref.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2003feb1
14 * created by: Ram Viswanadha
15 */
16
17 #include "unicode/utypes.h"
18
19 #if !UCONFIG_NO_IDNA && !UCONFIG_NO_TRANSLITERATION
20 #include "idnaref.h"
21 #include "punyref.h"
22 #include "ustr_imp.h"
23 #include "cmemory.h"
24 #include "sprpimpl.h"
25 #include "nptrans.h"
26 #include "testidna.h"
27 #include "punycode.h"
28 #include "unicode/ustring.h"
29
30 /* it is official IDNA ACE Prefix is "xn--" */
31 static const UChar ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ;
32 #define ACE_PREFIX_LENGTH 4
33
34 #define MAX_LABEL_LENGTH 63
35 #define HYPHEN 0x002D
36 /* The Max length of the labels should not be more than 64 */
37 #define MAX_LABEL_BUFFER_SIZE 100
38 #define MAX_IDN_BUFFER_SIZE 300
39
40 #define CAPITAL_A 0x0041
41 #define CAPITAL_Z 0x005A
42 #define LOWER_CASE_DELTA 0x0020
43 #define FULL_STOP 0x002E
44
45
46 inline static UBool
47 startsWithPrefix(const UChar* src , int32_t srcLength){
48 UBool startsWithPrefix = TRUE;
49
50 if(srcLength < ACE_PREFIX_LENGTH){
51 return FALSE;
52 }
53
54 for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){
55 if(u_tolower(src[i]) != ACE_PREFIX[i]){
56 startsWithPrefix = FALSE;
57 }
58 }
59 return startsWithPrefix;
60 }
61
62 inline static UChar
63 toASCIILower(UChar ch){
64 if(CAPITAL_A <= ch && ch <= CAPITAL_Z){
65 return ch + LOWER_CASE_DELTA;
66 }
67 return ch;
68 }
69
70 inline static int32_t
71 compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len,
72 const UChar* s2, int32_t s2Len){
73 if(s1Len != s2Len){
74 return (s1Len > s2Len) ? s1Len : s2Len;
75 }
76 UChar c1,c2;
77 int32_t rc;
78
79 for(int32_t i =0;/* no condition */;i++) {
80 /* If we reach the ends of both strings then they match */
81 if(i == s1Len) {
82 return 0;
83 }
84
85 c1 = s1[i];
86 c2 = s2[i];
87
88 /* Case-insensitive comparison */
89 if(c1!=c2) {
90 rc=(int32_t)toASCIILower(c1)-(int32_t)toASCIILower(c2);
91 if(rc!=0) {
92 return rc;
93 }
94 }
95 }
96
97 }
98
99 static UErrorCode getError(enum punycode_status status){
100 switch(status){
101 case punycode_success:
102 return U_ZERO_ERROR;
103 case punycode_bad_input: /* Input is invalid. */
104 return U_INVALID_CHAR_FOUND;
105 case punycode_big_output: /* Output would exceed the space provided. */
106 return U_BUFFER_OVERFLOW_ERROR;
107 case punycode_overflow : /* Input requires wider integers to process. */
108 return U_INDEX_OUTOFBOUNDS_ERROR;
109 default:
110 return U_INTERNAL_PROGRAM_ERROR;
111 }
112 }
113
114 static inline int32_t convertASCIIToUChars(const char* src,UChar* dest, int32_t length){
115 int i;
116 for(i=0;i<length;i++){
117 dest[i] = src[i];
118 }
119 return i;
120 }
121 static inline int32_t convertUCharsToASCII(const UChar* src,char* dest, int32_t length){
122 int i;
123 for(i=0;i<length;i++){
124 dest[i] = (char)src[i];
125 }
126 return i;
127 }
128 // wrapper around the reference Punycode implementation
129 static int32_t convertToPuny(const UChar* src, int32_t srcLength,
130 UChar* dest, int32_t destCapacity,
131 UErrorCode& status){
132 uint32_t b1Stack[MAX_LABEL_BUFFER_SIZE];
133 int32_t b1Len = 0, b1Capacity = MAX_LABEL_BUFFER_SIZE;
134 uint32_t* b1 = b1Stack;
135 char b2Stack[MAX_LABEL_BUFFER_SIZE];
136 char* b2 = b2Stack;
137 int32_t b2Len =MAX_LABEL_BUFFER_SIZE ;
138 punycode_status error;
139 unsigned char* caseFlags = NULL;
140
141 u_strToUTF32((UChar32*)b1,b1Capacity,&b1Len,src,srcLength,&status);
142 if(status == U_BUFFER_OVERFLOW_ERROR){
143 // redo processing of string
144 /* we do not have enough room so grow the buffer*/
145 b1 = (uint32_t*) uprv_malloc(b1Len * sizeof(uint32_t));
146 if(b1==NULL){
147 status = U_MEMORY_ALLOCATION_ERROR;
148 goto CLEANUP;
149 }
150
151 status = U_ZERO_ERROR; // reset error
152
153 u_strToUTF32((UChar32*)b1,b1Len,&b1Len,src,srcLength,&status);
154 }
155 if(U_FAILURE(status)){
156 goto CLEANUP;
157 }
158
159 //caseFlags = (unsigned char*) uprv_malloc(b1Len *sizeof(unsigned char));
160
161 error = punycode_encode(b1Len,b1,caseFlags, (uint32_t*)&b2Len, b2);
162 status = getError(error);
163
164 if(status == U_BUFFER_OVERFLOW_ERROR){
165 /* we do not have enough room so grow the buffer*/
166 b2 = (char*) uprv_malloc( b2Len * sizeof(char));
167 if(b2==NULL){
168 status = U_MEMORY_ALLOCATION_ERROR;
169 goto CLEANUP;
170 }
171
172 status = U_ZERO_ERROR; // reset error
173
174 punycode_status error = punycode_encode(b1Len,b1,caseFlags, (uint32_t*)&b2Len, b2);
175 status = getError(error);
176 }
177 if(U_FAILURE(status)){
178 goto CLEANUP;
179 }
180
181 if(b2Len < destCapacity){
182 convertASCIIToUChars(b2,dest,b2Len);
183 }else{
184 status =U_BUFFER_OVERFLOW_ERROR;
185 }
186
187 CLEANUP:
188 if(b1Stack != b1){
189 uprv_free(b1);
190 }
191 if(b2Stack != b2){
192 uprv_free(b2);
193 }
194 uprv_free(caseFlags);
195
196 return b2Len;
197 }
198
199 static int32_t convertFromPuny( const UChar* src, int32_t srcLength,
200 UChar* dest, int32_t destCapacity,
201 UErrorCode& status){
202 char b1Stack[MAX_LABEL_BUFFER_SIZE];
203 char* b1 = b1Stack;
204 int32_t destLen =0;
205
206 convertUCharsToASCII(src, b1,srcLength);
207
208 uint32_t b2Stack[MAX_LABEL_BUFFER_SIZE];
209 uint32_t* b2 = b2Stack;
210 int32_t b2Len =MAX_LABEL_BUFFER_SIZE;
211 unsigned char* caseFlags = NULL; //(unsigned char*) uprv_malloc(srcLength * sizeof(unsigned char*));
212 punycode_status error = punycode_decode(srcLength,b1,(uint32_t*)&b2Len,b2,caseFlags);
213 status = getError(error);
214 if(status == U_BUFFER_OVERFLOW_ERROR){
215 b2 = (uint32_t*) uprv_malloc(b2Len * sizeof(uint32_t));
216 if(b2 == NULL){
217 status = U_MEMORY_ALLOCATION_ERROR;
218 goto CLEANUP;
219 }
220 error = punycode_decode(srcLength,b1,(uint32_t*)&b2Len,b2,caseFlags);
221 status = getError(error);
222 }
223
224 if(U_FAILURE(status)){
225 goto CLEANUP;
226 }
227
228 u_strFromUTF32(dest,destCapacity,&destLen,(UChar32*)b2,b2Len,&status);
229
230 CLEANUP:
231 if(b1Stack != b1){
232 uprv_free(b1);
233 }
234 if(b2Stack != b2){
235 uprv_free(b2);
236 }
237 uprv_free(caseFlags);
238
239 return destLen;
240 }
241
242
243 U_CFUNC int32_t U_EXPORT2
244 idnaref_toASCII(const UChar* src, int32_t srcLength,
245 UChar* dest, int32_t destCapacity,
246 int32_t options,
247 UParseError* parseError,
248 UErrorCode* status){
249
250 if(status == NULL || U_FAILURE(*status)){
251 return 0;
252 }
253 if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
254 *status = U_ILLEGAL_ARGUMENT_ERROR;
255 return 0;
256 }
257 UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE];
258 //initialize pointers to stack buffers
259 UChar *b1 = b1Stack, *b2 = b2Stack;
260 int32_t b1Len=0, b2Len=0,
261 b1Capacity = MAX_LABEL_BUFFER_SIZE,
262 b2Capacity = MAX_LABEL_BUFFER_SIZE ,
263 reqLength=0;
264
265 //get the options
266 UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0);
267 UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0);
268
269 UBool* caseFlags = NULL;
270
271 // assume the source contains all ascii codepoints
272 UBool srcIsASCII = TRUE;
273 // assume the source contains all LDH codepoints
274 UBool srcIsLDH = TRUE;
275 int32_t j=0;
276
277 NamePrepTransform* prep = TestIDNA::getInstance(*status);
278
279 if(U_FAILURE(*status)){
280 goto CLEANUP;
281 }
282
283 if(srcLength == -1){
284 srcLength = u_strlen(src);
285 }
286 if(srcLength > b1Capacity){
287 b1 = (UChar*) uprv_malloc(srcLength * U_SIZEOF_UCHAR);
288 if(b1==NULL){
289 *status = U_MEMORY_ALLOCATION_ERROR;
290 goto CLEANUP;
291 }
292 b1Capacity = srcLength;
293 }
294 // step 1
295 for( j=0;j<srcLength;j++){
296 if(src[j] > 0x7F){
297 srcIsASCII = FALSE;
298 }
299 b1[b1Len++] = src[j];
300 }
301
302 // step 2
303 b1Len = prep->process(src,srcLength,b1, b1Capacity,allowUnassigned,parseError,*status);
304
305 if(*status == U_BUFFER_OVERFLOW_ERROR){
306 // redo processing of string
307 /* we do not have enough room so grow the buffer*/
308 if(b1 != b1Stack){
309 uprv_free(b1);
310 }
311 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
312 if(b1==NULL){
313 *status = U_MEMORY_ALLOCATION_ERROR;
314 goto CLEANUP;
315 }
316
317 *status = U_ZERO_ERROR; // reset error
318
319 b1Len = prep->process(src,srcLength,b1, b1Len,allowUnassigned, parseError, *status);
320 }
321 // error bail out
322 if(U_FAILURE(*status)){
323 goto CLEANUP;
324 }
325
326 if(b1Len == 0){
327 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
328 goto CLEANUP;
329 }
330
331 srcIsASCII = TRUE;
332 // step 3 & 4
333 for( j=0;j<b1Len;j++){
334 if(b1[j] > 0x7F){// check if output of usprep_prepare is all ASCII
335 srcIsASCII = FALSE;
336 }else if(prep->isLDHChar(b1[j])==FALSE){ // if the char is in ASCII range verify that it is an LDH character{
337 srcIsLDH = FALSE;
338 }
339 }
340
341 if(useSTD3ASCIIRules == TRUE){
342 // verify 3a and 3b
343 if( srcIsLDH == FALSE /* source contains some non-LDH characters */
344 || b1[0] == HYPHEN || b1[b1Len-1] == HYPHEN){
345 *status = U_IDNA_STD3_ASCII_RULES_ERROR;
346 goto CLEANUP;
347 }
348 }
349 if(srcIsASCII){
350 if(b1Len <= destCapacity){
351 uprv_memmove(dest, b1, b1Len * U_SIZEOF_UCHAR);
352 reqLength = b1Len;
353 }else{
354 reqLength = b1Len;
355 goto CLEANUP;
356 }
357 }else{
358 // step 5 : verify the sequence does not begin with ACE prefix
359 if(!startsWithPrefix(b1,b1Len)){
360
361 //step 6: encode the sequence with punycode
362 //caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool));
363
364 b2Len = convertToPuny(b1,b1Len, b2,b2Capacity,*status);
365 //b2Len = u_strToPunycode(b2,b2Capacity,b1,b1Len, caseFlags, status);
366 if(*status == U_BUFFER_OVERFLOW_ERROR){
367 // redo processing of string
368 /* we do not have enough room so grow the buffer*/
369 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
370 if(b2 == NULL){
371 *status = U_MEMORY_ALLOCATION_ERROR;
372 goto CLEANUP;
373 }
374
375 *status = U_ZERO_ERROR; // reset error
376
377 b2Len = convertToPuny(b1, b1Len, b2, b2Len, *status);
378 //b2Len = u_strToPunycode(b2,b2Len,b1,b1Len, caseFlags, status);
379
380 }
381 //error bail out
382 if(U_FAILURE(*status)){
383 goto CLEANUP;
384 }
385 reqLength = b2Len+ACE_PREFIX_LENGTH;
386
387 if(reqLength > destCapacity){
388 *status = U_BUFFER_OVERFLOW_ERROR;
389 goto CLEANUP;
390 }
391 //Step 7: prepend the ACE prefix
392 uprv_memcpy(dest,ACE_PREFIX,ACE_PREFIX_LENGTH * U_SIZEOF_UCHAR);
393 //Step 6: copy the contents in b2 into dest
394 uprv_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len * U_SIZEOF_UCHAR);
395
396 }else{
397 *status = U_IDNA_ACE_PREFIX_ERROR;
398 goto CLEANUP;
399 }
400 }
401
402 if(reqLength > MAX_LABEL_LENGTH){
403 *status = U_IDNA_LABEL_TOO_LONG_ERROR;
404 }
405
406 CLEANUP:
407 if(b1 != b1Stack){
408 uprv_free(b1);
409 }
410 if(b2 != b2Stack){
411 uprv_free(b2);
412 }
413 uprv_free(caseFlags);
414
415 // delete prep;
416
417 return u_terminateUChars(dest, destCapacity, reqLength, status);
418 }
419
420
421 U_CFUNC int32_t U_EXPORT2
422 idnaref_toUnicode(const UChar* src, int32_t srcLength,
423 UChar* dest, int32_t destCapacity,
424 int32_t options,
425 UParseError* parseError,
426 UErrorCode* status){
427
428 if(status == NULL || U_FAILURE(*status)){
429 return 0;
430 }
431 if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
432 *status = U_ILLEGAL_ARGUMENT_ERROR;
433 return 0;
434 }
435
436
437
438 UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE];
439
440 //initialize pointers to stack buffers
441 UChar *b1 = b1Stack, *b2 = b2Stack, *b1Prime=NULL, *b3=b3Stack;
442 int32_t b1Len, b2Len, b1PrimeLen, b3Len,
443 b1Capacity = MAX_LABEL_BUFFER_SIZE,
444 b2Capacity = MAX_LABEL_BUFFER_SIZE,
445 b3Capacity = MAX_LABEL_BUFFER_SIZE,
446 reqLength=0;
447 // UParseError parseError;
448
449 NamePrepTransform* prep = TestIDNA::getInstance(*status);
450 b1Len = 0;
451 UBool* caseFlags = NULL;
452
453 //get the options
454 UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0);
455 UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0);
456
457 UBool srcIsASCII = TRUE;
458 UBool srcIsLDH = TRUE;
459 int32_t failPos =0;
460
461 if(U_FAILURE(*status)){
462 goto CLEANUP;
463 }
464 // step 1: find out if all the codepoints in src are ASCII
465 if(srcLength==-1){
466 srcLength = 0;
467 for(;src[srcLength]!=0;){
468 if(src[srcLength]> 0x7f){
469 srcIsASCII = FALSE;
470 }if(prep->isLDHChar(src[srcLength])==FALSE){
471 // here we do not assemble surrogates
472 // since we know that LDH code points
473 // are in the ASCII range only
474 srcIsLDH = FALSE;
475 failPos = srcLength;
476 }
477 srcLength++;
478 }
479 }else{
480 for(int32_t j=0; j<srcLength; j++){
481 if(src[j]> 0x7f){
482 srcIsASCII = FALSE;
483 }else if(prep->isLDHChar(src[j])==FALSE){
484 // here we do not assemble surrogates
485 // since we know that LDH code points
486 // are in the ASCII range only
487 srcIsLDH = FALSE;
488 failPos = j;
489 }
490 }
491 }
492
493 if(srcIsASCII == FALSE){
494 // step 2: process the string
495 b1Len = prep->process(src,srcLength,b1,b1Capacity,allowUnassigned, parseError, *status);
496 if(*status == U_BUFFER_OVERFLOW_ERROR){
497 // redo processing of string
498 /* we do not have enough room so grow the buffer*/
499 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
500 if(b1==NULL){
501 *status = U_MEMORY_ALLOCATION_ERROR;
502 goto CLEANUP;
503 }
504
505 *status = U_ZERO_ERROR; // reset error
506
507 b1Len = prep->process(src,srcLength,b1, b1Len,allowUnassigned, parseError, *status);
508 }
509 //bail out on error
510 if(U_FAILURE(*status)){
511 goto CLEANUP;
512 }
513 }else{
514
515 // copy everything to b1
516 if(srcLength < b1Capacity){
517 uprv_memmove(b1,src, srcLength * U_SIZEOF_UCHAR);
518 }else{
519 /* we do not have enough room so grow the buffer*/
520 b1 = (UChar*) uprv_malloc(srcLength * U_SIZEOF_UCHAR);
521 if(b1==NULL){
522 *status = U_MEMORY_ALLOCATION_ERROR;
523 goto CLEANUP;
524 }
525 uprv_memmove(b1,src, srcLength * U_SIZEOF_UCHAR);
526 }
527 b1Len = srcLength;
528 }
529 //step 3: verify ACE Prefix
530 if(startsWithPrefix(src,srcLength)){
531
532 //step 4: Remove the ACE Prefix
533 b1Prime = b1 + ACE_PREFIX_LENGTH;
534 b1PrimeLen = b1Len - ACE_PREFIX_LENGTH;
535
536 //step 5: Decode using punycode
537 b2Len = convertFromPuny(b1Prime,b1PrimeLen, b2, b2Capacity, *status);
538 //b2Len = u_strFromPunycode(b2, b2Capacity,b1Prime,b1PrimeLen, caseFlags, status);
539
540 if(*status == U_BUFFER_OVERFLOW_ERROR){
541 // redo processing of string
542 /* we do not have enough room so grow the buffer*/
543 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
544 if(b2==NULL){
545 *status = U_MEMORY_ALLOCATION_ERROR;
546 goto CLEANUP;
547 }
548
549 *status = U_ZERO_ERROR; // reset error
550
551 b2Len = convertFromPuny(b1Prime,b1PrimeLen, b2, b2Len, *status);
552 //b2Len = u_strFromPunycode(b2, b2Len,b1Prime,b1PrimeLen,caseFlags, status);
553 }
554
555
556 //step 6:Apply toASCII
557 b3Len = idnaref_toASCII(b2,b2Len,b3,b3Capacity,options,parseError, status);
558
559 if(*status == U_BUFFER_OVERFLOW_ERROR){
560 // redo processing of string
561 /* we do not have enough room so grow the buffer*/
562 b3 = (UChar*) uprv_malloc(b3Len * U_SIZEOF_UCHAR);
563 if(b3==NULL){
564 *status = U_MEMORY_ALLOCATION_ERROR;
565 goto CLEANUP;
566 }
567
568 *status = U_ZERO_ERROR; // reset error
569
570 b3Len = idnaref_toASCII(b2,b2Len,b3,b3Len, options, parseError, status);
571
572 }
573 //bail out on error
574 if(U_FAILURE(*status)){
575 goto CLEANUP;
576 }
577
578 //step 7: verify
579 if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){
580 *status = U_IDNA_VERIFICATION_ERROR;
581 goto CLEANUP;
582 }
583
584 //step 8: return output of step 5
585 reqLength = b2Len;
586 if(b2Len <= destCapacity) {
587 uprv_memmove(dest, b2, b2Len * U_SIZEOF_UCHAR);
588 }
589 }else{
590 // verify that STD3 ASCII rules are satisfied
591 if(useSTD3ASCIIRules == TRUE){
592 if( srcIsLDH == FALSE /* source contains some non-LDH characters */
593 || src[0] == HYPHEN || src[srcLength-1] == HYPHEN){
594 *status = U_IDNA_STD3_ASCII_RULES_ERROR;
595
596 /* populate the parseError struct */
597 if(srcIsLDH==FALSE){
598 // failPos is always set the index of failure
599 uprv_syntaxError(src,failPos, srcLength,parseError);
600 }else if(src[0] == HYPHEN){
601 // fail position is 0
602 uprv_syntaxError(src,0,srcLength,parseError);
603 }else{
604 // the last index in the source is always length-1
605 uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError);
606 }
607
608 goto CLEANUP;
609 }
610 }
611 //copy the source to destination
612 if(srcLength <= destCapacity){
613 uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR);
614 }
615 reqLength = srcLength;
616 }
617
618 CLEANUP:
619
620 if(b1 != b1Stack){
621 uprv_free(b1);
622 }
623 if(b2 != b2Stack){
624 uprv_free(b2);
625 }
626 uprv_free(caseFlags);
627
628 // delete prep;
629
630 return u_terminateUChars(dest, destCapacity, reqLength, status);
631 }
632
633
634 static int32_t
635 getNextSeparator(UChar *src,int32_t srcLength,NamePrepTransform* prep,
636 UChar **limit,
637 UBool *done,
638 UErrorCode *status){
639 if(srcLength == -1){
640 int32_t i;
641 for(i=0 ; ;i++){
642 if(src[i] == 0){
643 *limit = src + i; // point to null
644 *done = TRUE;
645 return i;
646 }
647 if(prep->isLabelSeparator(src[i],*status)){
648 *limit = src + (i+1); // go past the delimiter
649 return i;
650
651 }
652 }
653 }else{
654 int32_t i;
655 for(i=0;i<srcLength;i++){
656 if(prep->isLabelSeparator(src[i],*status)){
657 *limit = src + (i+1); // go past the delimiter
658 return i;
659 }
660 }
661 // we have not found the delimiter
662 if(i==srcLength){
663 *limit = src+srcLength;
664 *done = TRUE;
665 }
666 return i;
667 }
668 }
669
670 U_CFUNC int32_t U_EXPORT2
671 idnaref_IDNToASCII( const UChar* src, int32_t srcLength,
672 UChar* dest, int32_t destCapacity,
673 int32_t options,
674 UParseError* parseError,
675 UErrorCode* status){
676
677 if(status == NULL || U_FAILURE(*status)){
678 return 0;
679 }
680 if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
681 *status = U_ILLEGAL_ARGUMENT_ERROR;
682 return 0;
683 }
684
685 int32_t reqLength = 0;
686 // UParseError parseError;
687
688 NamePrepTransform* prep = TestIDNA::getInstance(*status);
689
690 //initialize pointers to stack buffers
691 UChar b1Stack[MAX_LABEL_BUFFER_SIZE];
692 UChar *b1 = b1Stack;
693 int32_t b1Len, labelLen;
694 UChar* delimiter = (UChar*)src;
695 UChar* labelStart = (UChar*)src;
696 int32_t remainingLen = srcLength;
697 int32_t b1Capacity = MAX_LABEL_BUFFER_SIZE;
698
699 //get the options
700 // UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0);
701 // UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0);
702 UBool done = FALSE;
703
704 if(U_FAILURE(*status)){
705 goto CLEANUP;
706 }
707
708
709 if(srcLength == -1){
710 for(;;){
711
712 if(*delimiter == 0){
713 break;
714 }
715
716 labelLen = getNextSeparator(labelStart, -1, prep, &delimiter, &done, status);
717 b1Len = 0;
718 if(!(labelLen==0 && done)){// make sure this is not a root label separator.
719
720 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Capacity,
721 options, parseError, status);
722
723 if(*status == U_BUFFER_OVERFLOW_ERROR){
724 // redo processing of string
725 /* we do not have enough room so grow the buffer*/
726 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
727 if(b1==NULL){
728 *status = U_MEMORY_ALLOCATION_ERROR;
729 goto CLEANUP;
730 }
731
732 *status = U_ZERO_ERROR; // reset error
733
734 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Len,
735 options, parseError, status);
736
737 }
738 }
739
740 if(U_FAILURE(*status)){
741 goto CLEANUP;
742 }
743 int32_t tempLen = (reqLength + b1Len );
744 // copy to dest
745 if( tempLen< destCapacity){
746 uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR);
747 }
748
749 reqLength = tempLen;
750
751 // add the label separator
752 if(done == FALSE){
753 if(reqLength < destCapacity){
754 dest[reqLength] = FULL_STOP;
755 }
756 reqLength++;
757 }
758
759 labelStart = delimiter;
760 }
761 }else{
762 for(;;){
763
764 if(delimiter == src+srcLength){
765 break;
766 }
767
768 labelLen = getNextSeparator(labelStart, remainingLen, prep, &delimiter, &done, status);
769
770 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Capacity,
771 options,parseError, status);
772
773 if(*status == U_BUFFER_OVERFLOW_ERROR){
774 // redo processing of string
775 /* we do not have enough room so grow the buffer*/
776 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
777 if(b1==NULL){
778 *status = U_MEMORY_ALLOCATION_ERROR;
779 goto CLEANUP;
780 }
781
782 *status = U_ZERO_ERROR; // reset error
783
784 b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Len,
785 options, parseError, status);
786
787 }
788
789 if(U_FAILURE(*status)){
790 goto CLEANUP;
791 }
792 int32_t tempLen = (reqLength + b1Len );
793 // copy to dest
794 if( tempLen< destCapacity){
795 uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR);
796 }
797
798 reqLength = tempLen;
799
800 // add the label separator
801 if(done == FALSE){
802 if(reqLength < destCapacity){
803 dest[reqLength] = FULL_STOP;
804 }
805 reqLength++;
806 }
807
808 labelStart = delimiter;
809 remainingLen = srcLength - (delimiter - src);
810 }
811 }
812
813
814 CLEANUP:
815
816 if(b1 != b1Stack){
817 uprv_free(b1);
818 }
819
820 // delete prep;
821
822 return u_terminateUChars(dest, destCapacity, reqLength, status);
823 }
824
825 U_CFUNC int32_t U_EXPORT2
826 idnaref_IDNToUnicode( const UChar* src, int32_t srcLength,
827 UChar* dest, int32_t destCapacity,
828 int32_t options,
829 UParseError* parseError,
830 UErrorCode* status){
831
832 if(status == NULL || U_FAILURE(*status)){
833 return 0;
834 }
835 if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
836 *status = U_ILLEGAL_ARGUMENT_ERROR;
837 return 0;
838 }
839
840 int32_t reqLength = 0;
841
842 UBool done = FALSE;
843
844 NamePrepTransform* prep = TestIDNA::getInstance(*status);
845
846 //initialize pointers to stack buffers
847 UChar b1Stack[MAX_LABEL_BUFFER_SIZE];
848 UChar *b1 = b1Stack;
849 int32_t b1Len, labelLen;
850 UChar* delimiter = (UChar*)src;
851 UChar* labelStart = (UChar*)src;
852 int32_t remainingLen = srcLength;
853 int32_t b1Capacity = MAX_LABEL_BUFFER_SIZE;
854
855 //get the options
856 // UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0);
857 // UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0);
858
859 if(U_FAILURE(*status)){
860 goto CLEANUP;
861 }
862
863 if(srcLength == -1){
864 for(;;){
865
866 if(*delimiter == 0){
867 break;
868 }
869
870 labelLen = getNextSeparator(labelStart, -1, prep, &delimiter, &done, status);
871
872 if(labelLen==0 && done==FALSE){
873 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
874 }
875 b1Len = idnaref_toUnicode(labelStart, labelLen, b1, b1Capacity,
876 options, parseError, status);
877
878 if(*status == U_BUFFER_OVERFLOW_ERROR){
879 // redo processing of string
880 /* we do not have enough room so grow the buffer*/
881 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
882 if(b1==NULL){
883 *status = U_MEMORY_ALLOCATION_ERROR;
884 goto CLEANUP;
885 }
886
887 *status = U_ZERO_ERROR; // reset error
888
889 b1Len = idnaref_toUnicode( labelStart, labelLen, b1, b1Len,
890 options, parseError, status);
891
892 }
893
894 if(U_FAILURE(*status)){
895 goto CLEANUP;
896 }
897 int32_t tempLen = (reqLength + b1Len );
898 // copy to dest
899 if( tempLen< destCapacity){
900 uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR);
901 }
902
903 reqLength = tempLen;
904 // add the label separator
905 if(done == FALSE){
906 if(reqLength < destCapacity){
907 dest[reqLength] = FULL_STOP;
908 }
909 reqLength++;
910 }
911
912 labelStart = delimiter;
913 }
914 }else{
915 for(;;){
916
917 if(delimiter == src+srcLength){
918 break;
919 }
920
921 labelLen = getNextSeparator(labelStart, remainingLen, prep, &delimiter, &done, status);
922
923 if(labelLen==0 && done==FALSE){
924 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
925 }
926
927 b1Len = idnaref_toUnicode( labelStart,labelLen, b1, b1Capacity,
928 options, parseError, status);
929
930 if(*status == U_BUFFER_OVERFLOW_ERROR){
931 // redo processing of string
932 /* we do not have enough room so grow the buffer*/
933 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
934 if(b1==NULL){
935 *status = U_MEMORY_ALLOCATION_ERROR;
936 goto CLEANUP;
937 }
938
939 *status = U_ZERO_ERROR; // reset error
940
941 b1Len = idnaref_toUnicode( labelStart, labelLen, b1, b1Len,
942 options, parseError, status);
943
944 }
945
946 if(U_FAILURE(*status)){
947 goto CLEANUP;
948 }
949 int32_t tempLen = (reqLength + b1Len );
950 // copy to dest
951 if( tempLen< destCapacity){
952 uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR);
953 }
954
955 reqLength = tempLen;
956
957 // add the label separator
958 if(done == FALSE){
959 if(reqLength < destCapacity){
960 dest[reqLength] = FULL_STOP;
961 }
962 reqLength++;
963 }
964
965 labelStart = delimiter;
966 remainingLen = srcLength - (delimiter - src);
967 }
968 }
969
970 CLEANUP:
971
972 if(b1 != b1Stack){
973 uprv_free(b1);
974 }
975
976 // delete prep;
977
978 return u_terminateUChars(dest, destCapacity, reqLength, status);
979 }
980
981 U_CFUNC int32_t U_EXPORT2
982 idnaref_compare( const UChar *s1, int32_t length1,
983 const UChar *s2, int32_t length2,
984 int32_t options,
985 UErrorCode* status){
986
987 if(status == NULL || U_FAILURE(*status)){
988 return -1;
989 }
990
991 UChar b1Stack[MAX_IDN_BUFFER_SIZE], b2Stack[MAX_IDN_BUFFER_SIZE];
992 UChar *b1 = b1Stack, *b2 = b2Stack;
993 int32_t b1Len, b2Len, b1Capacity = MAX_IDN_BUFFER_SIZE, b2Capacity = MAX_IDN_BUFFER_SIZE;
994 int32_t result = -1;
995
996 UParseError parseError;
997
998 b1Len = idnaref_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, status);
999 if(*status == U_BUFFER_OVERFLOW_ERROR){
1000 // redo processing of string
1001 /* we do not have enough room so grow the buffer*/
1002 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
1003 if(b1==NULL){
1004 *status = U_MEMORY_ALLOCATION_ERROR;
1005 goto CLEANUP;
1006 }
1007
1008 *status = U_ZERO_ERROR; // reset error
1009
1010 b1Len = idnaref_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, status);
1011
1012 }
1013
1014 b2Len = idnaref_IDNToASCII(s2,length2,b2,b2Capacity,options, &parseError, status);
1015 if(*status == U_BUFFER_OVERFLOW_ERROR){
1016 // redo processing of string
1017 /* we do not have enough room so grow the buffer*/
1018 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
1019 if(b2==NULL){
1020 *status = U_MEMORY_ALLOCATION_ERROR;
1021 goto CLEANUP;
1022 }
1023
1024 *status = U_ZERO_ERROR; // reset error
1025
1026 b2Len = idnaref_IDNToASCII(s2,length2,b2,b2Len,options, &parseError, status);
1027
1028 }
1029 // when toASCII is applied all label separators are replaced with FULL_STOP
1030 result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len);
1031
1032 CLEANUP:
1033 if(b1 != b1Stack){
1034 uprv_free(b1);
1035 }
1036
1037 if(b2 != b2Stack){
1038 uprv_free(b2);
1039 }
1040
1041 return result;
1042 }
1043 #endif /* #if !UCONFIG_NO_IDNA */