]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/uidna.cpp
ICU-8.11.4.tar.gz
[apple/icu.git] / icuSources / common / uidna.cpp
CommitLineData
b75a7d8f
A
1/*
2 *******************************************************************************
3 *
73c04bcf 4 * Copyright (C) 2003-2007, International Business Machines
b75a7d8f
A
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: uidna.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2003feb1
14 * created by: Ram Viswanadha
15 */
16
17#include "unicode/utypes.h"
18
19#if !UCONFIG_NO_IDNA
20
21#include "unicode/uidna.h"
22#include "unicode/ustring.h"
374ca955 23#include "unicode/usprep.h"
b75a7d8f
A
24#include "punycode.h"
25#include "ustr_imp.h"
26#include "cmemory.h"
27#include "sprpimpl.h"
28
29/* it is official IDNA ACE Prefix is "xn--" */
30static const UChar ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ;
31#define ACE_PREFIX_LENGTH 4
32
33#define MAX_LABEL_LENGTH 63
34#define HYPHEN 0x002D
35/* The Max length of the labels should not be more than 64 */
36#define MAX_LABEL_BUFFER_SIZE 100
37#define MAX_IDN_BUFFER_SIZE 300
38
39#define CAPITAL_A 0x0041
40#define CAPITAL_Z 0x005A
41#define LOWER_CASE_DELTA 0x0020
42#define FULL_STOP 0x002E
374ca955 43#define DATA_FILE_NAME "uidna"
b75a7d8f
A
44
45inline static UChar
46toASCIILower(UChar ch){
47 if(CAPITAL_A <= ch && ch <= CAPITAL_Z){
48 return ch + LOWER_CASE_DELTA;
49 }
50 return ch;
51}
52
53inline static UBool
54startsWithPrefix(const UChar* src , int32_t srcLength){
55 UBool startsWithPrefix = TRUE;
56
57 if(srcLength < ACE_PREFIX_LENGTH){
58 return FALSE;
59 }
60
61 for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){
62 if(toASCIILower(src[i]) != ACE_PREFIX[i]){
63 startsWithPrefix = FALSE;
64 }
65 }
66 return startsWithPrefix;
67}
68
b75a7d8f
A
69
70inline static int32_t
71compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len,
72 const UChar* s2, int32_t s2Len){
73
74 int32_t minLength;
75 int32_t lengthResult;
76
77 // are we comparing different lengths?
78 if(s1Len != s2Len) {
79 if(s1Len < s2Len) {
80 minLength = s1Len;
81 lengthResult = -1;
82 } else {
83 minLength = s2Len;
84 lengthResult = 1;
85 }
86 } else {
87 // ok the lengths are equal
88 minLength = s1Len;
89 lengthResult = 0;
90 }
91
92 UChar c1,c2;
93 int32_t rc;
94
95 for(int32_t i =0;/* no condition */;i++) {
96
97 /* If we reach the ends of both strings then they match */
98 if(i == minLength) {
99 return lengthResult;
100 }
101
102 c1 = s1[i];
103 c2 = s2[i];
104
105 /* Case-insensitive comparison */
106 if(c1!=c2) {
107 rc=(int32_t)toASCIILower(c1)-(int32_t)toASCIILower(c2);
108 if(rc!=0) {
109 lengthResult=rc;
110 break;
111 }
112 }
113 }
114 return lengthResult;
115}
116
117
374ca955
A
118/**
119 * Ascertain if the given code point is a label separator as
120 * defined by the IDNA RFC
121 *
122 * @param ch The code point to be ascertained
123 * @return true if the char is a label separator
73c04bcf 124 * @stable ICU 2.8
374ca955
A
125 */
126static inline UBool isLabelSeparator(UChar ch){
127 switch(ch){
128 case 0x002e:
129 case 0x3002:
130 case 0xFF0E:
131 case 0xFF61:
132 return TRUE;
133 default:
134 return FALSE;
b75a7d8f 135 }
374ca955
A
136}
137
138// returns the length of the label excluding the separator
139// if *limit == separator then the length returned does not include
140// the separtor.
141static inline int32_t
142getNextSeparator(UChar *src,int32_t srcLength,
143 UChar **limit,
144 UBool *done){
145 if(srcLength == -1){
146 int32_t i;
147 for(i=0 ; ;i++){
148 if(src[i] == 0){
149 *limit = src + i; // point to null
150 *done = TRUE;
151 return i;
152 }
153 if(isLabelSeparator(src[i])){
154 *limit = src + (i+1); // go past the delimiter
155 return i;
156
157 }
158 }
159 }else{
160 int32_t i;
161 for(i=0;i<srcLength;i++){
162 if(isLabelSeparator(src[i])){
163 *limit = src + (i+1); // go past the delimiter
164 return i;
165 }
166 }
167 // we have not found the delimiter
168 // if(i==srcLength)
169 *limit = src+srcLength;
170 *done = TRUE;
171
172 return i;
b75a7d8f 173 }
374ca955
A
174}
175static inline UBool isLDHChar(UChar ch){
176 // high runner case
177 if(ch>0x007A){
178 return FALSE;
179 }
180 //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
181 if( (ch==0x002D) ||
182 (0x0030 <= ch && ch <= 0x0039) ||
183 (0x0041 <= ch && ch <= 0x005A) ||
184 (0x0061 <= ch && ch <= 0x007A)
185 ){
186 return TRUE;
187 }
188 return FALSE;
189}
190
191static int32_t
192_internal_toASCII(const UChar* src, int32_t srcLength,
193 UChar* dest, int32_t destCapacity,
194 int32_t options,
195 UStringPrepProfile* nameprep,
196 UParseError* parseError,
197 UErrorCode* status){
198
b75a7d8f
A
199 UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE];
200 //initialize pointers to stack buffers
201 UChar *b1 = b1Stack, *b2 = b2Stack;
73c04bcf 202 int32_t b1Len=0, b2Len,
b75a7d8f
A
203 b1Capacity = MAX_LABEL_BUFFER_SIZE,
204 b2Capacity = MAX_LABEL_BUFFER_SIZE ,
205 reqLength=0;
206
374ca955 207 int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0;
b75a7d8f
A
208 UBool* caseFlags = NULL;
209
210 // the source contains all ascii codepoints
211 UBool srcIsASCII = TRUE;
212 // assume the source contains all LDH codepoints
213 UBool srcIsLDH = TRUE;
214
215 int32_t j=0;
216
217 //get the options
b75a7d8f 218 UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0);
374ca955 219
b75a7d8f 220 int32_t failPos = -1;
b75a7d8f 221
73c04bcf
A
222 if(srcLength == -1){
223 srcLength = u_strlen(src);
224 }
225
226 if(srcLength > b1Capacity){
227 b1 = (UChar*) uprv_malloc(srcLength * U_SIZEOF_UCHAR);
b75a7d8f
A
228 if(b1==NULL){
229 *status = U_MEMORY_ALLOCATION_ERROR;
230 goto CLEANUP;
231 }
73c04bcf
A
232 b1Capacity = srcLength;
233 }
b75a7d8f 234
73c04bcf
A
235 // step 1
236 for( j=0;j<srcLength;j++){
237 if(src[j] > 0x7F){
238 srcIsASCII = FALSE;
239 }
240 b1[b1Len++] = src[j];
241 }
242
243 // step 2 is performed only if the source contains non ASCII
244 if(srcIsASCII == FALSE){
b75a7d8f 245
73c04bcf
A
246 // step 2
247 b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status);
248
249 if(*status == U_BUFFER_OVERFLOW_ERROR){
250 // redo processing of string
251 // we do not have enough room so grow the buffer
252 if(b1 != b1Stack){
253 uprv_free(b1);
254 }
255 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
256 if(b1==NULL){
257 *status = U_MEMORY_ALLOCATION_ERROR;
258 goto CLEANUP;
259 }
260
261 *status = U_ZERO_ERROR; // reset error
262
263 b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status);
264 }
b75a7d8f
A
265 }
266 // error bail out
267 if(U_FAILURE(*status)){
268 goto CLEANUP;
269 }
73c04bcf
A
270 if(b1Len == 0){
271 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
272 goto CLEANUP;
273 }
b75a7d8f 274
73c04bcf
A
275 // for step 3 & 4
276 srcIsASCII = TRUE;
b75a7d8f 277 for( j=0;j<b1Len;j++){
73c04bcf 278 // check if output of usprep_prepare is all ASCII
b75a7d8f
A
279 if(b1[j] > 0x7F){
280 srcIsASCII = FALSE;
374ca955 281 }else if(isLDHChar(b1[j])==FALSE){ // if the char is in ASCII range verify that it is an LDH character
b75a7d8f
A
282 srcIsLDH = FALSE;
283 failPos = j;
284 }
285 }
b75a7d8f
A
286 if(useSTD3ASCIIRules == TRUE){
287 // verify 3a and 3b
374ca955
A
288 // 3(a) Verify the absence of non-LDH ASCII code points; that is, the
289 // absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F.
290 // 3(b) Verify the absence of leading and trailing hyphen-minus; that
291 // is, the absence of U+002D at the beginning and end of the
292 // sequence.
293 if( srcIsLDH == FALSE /* source at this point should not contain anyLDH characters */
b75a7d8f
A
294 || b1[0] == HYPHEN || b1[b1Len-1] == HYPHEN){
295 *status = U_IDNA_STD3_ASCII_RULES_ERROR;
296
297 /* populate the parseError struct */
298 if(srcIsLDH==FALSE){
299 // failPos is always set the index of failure
300 uprv_syntaxError(b1,failPos, b1Len,parseError);
301 }else if(b1[0] == HYPHEN){
302 // fail position is 0
303 uprv_syntaxError(b1,0,b1Len,parseError);
304 }else{
305 // the last index in the source is always length-1
306 uprv_syntaxError(b1, (b1Len>0) ? b1Len-1 : b1Len, b1Len,parseError);
307 }
308
309 goto CLEANUP;
310 }
311 }
73c04bcf 312 // Step 4: if the source is ASCII then proceed to step 8
b75a7d8f
A
313 if(srcIsASCII){
314 if(b1Len <= destCapacity){
315 uprv_memmove(dest, b1, b1Len * U_SIZEOF_UCHAR);
316 reqLength = b1Len;
317 }else{
318 reqLength = b1Len;
319 goto CLEANUP;
320 }
321 }else{
322 // step 5 : verify the sequence does not begin with ACE prefix
323 if(!startsWithPrefix(b1,b1Len)){
324
325 //step 6: encode the sequence with punycode
326
327 // do not preserve the case flags for now!
328 // TODO: Preserve the case while implementing the RFE
329 // caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool));
330 // uprv_memset(caseFlags,TRUE,b1Len);
331
332 b2Len = u_strToPunycode(b1,b1Len,b2,b2Capacity,caseFlags, status);
333
334 if(*status == U_BUFFER_OVERFLOW_ERROR){
335 // redo processing of string
336 /* we do not have enough room so grow the buffer*/
337 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
338 if(b2 == NULL){
339 *status = U_MEMORY_ALLOCATION_ERROR;
340 goto CLEANUP;
341 }
342
343 *status = U_ZERO_ERROR; // reset error
344
345 b2Len = u_strToPunycode(b1,b1Len,b2,b2Len,caseFlags, status);
346 }
347 //error bail out
348 if(U_FAILURE(*status)){
349 goto CLEANUP;
350 }
351 // TODO : Reconsider while implementing the case preserve RFE
352 // convert all codepoints to lower case ASCII
353 // toASCIILower(b2,b2Len);
354 reqLength = b2Len+ACE_PREFIX_LENGTH;
355
356 if(reqLength > destCapacity){
357 *status = U_BUFFER_OVERFLOW_ERROR;
358 goto CLEANUP;
359 }
360 //Step 7: prepend the ACE prefix
361 uprv_memcpy(dest,ACE_PREFIX,ACE_PREFIX_LENGTH * U_SIZEOF_UCHAR);
362 //Step 6: copy the contents in b2 into dest
363 uprv_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len * U_SIZEOF_UCHAR);
364
365 }else{
366 *status = U_IDNA_ACE_PREFIX_ERROR;
367 //position of failure is 0
368 uprv_syntaxError(b1,0,b1Len,parseError);
369 goto CLEANUP;
370 }
371 }
73c04bcf 372 // step 8: verify the length of lable
b75a7d8f
A
373 if(reqLength > MAX_LABEL_LENGTH){
374 *status = U_IDNA_LABEL_TOO_LONG_ERROR;
375 }
376
377CLEANUP:
378 if(b1 != b1Stack){
379 uprv_free(b1);
380 }
381 if(b2 != b2Stack){
382 uprv_free(b2);
383 }
384 uprv_free(caseFlags);
385
b75a7d8f
A
386 return u_terminateUChars(dest, destCapacity, reqLength, status);
387}
388
374ca955
A
389static int32_t
390_internal_toUnicode(const UChar* src, int32_t srcLength,
391 UChar* dest, int32_t destCapacity,
392 int32_t options,
393 UStringPrepProfile* nameprep,
394 UParseError* parseError,
395 UErrorCode* status){
b75a7d8f
A
396
397 //get the options
b75a7d8f 398 UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0);
374ca955 399 int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0;
b75a7d8f
A
400
401 UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE];
402
403 //initialize pointers to stack buffers
404 UChar *b1 = b1Stack, *b2 = b2Stack, *b1Prime=NULL, *b3=b3Stack;
405 int32_t b1Len, b2Len, b1PrimeLen, b3Len,
406 b1Capacity = MAX_LABEL_BUFFER_SIZE,
407 b2Capacity = MAX_LABEL_BUFFER_SIZE,
408 b3Capacity = MAX_LABEL_BUFFER_SIZE,
409 reqLength=0;
374ca955 410
b75a7d8f
A
411 b1Len = 0;
412 UBool* caseFlags = NULL;
413
414 UBool srcIsASCII = TRUE;
415 UBool srcIsLDH = TRUE;
416 int32_t failPos =0;
417
b75a7d8f
A
418 // step 1: find out if all the codepoints in src are ASCII
419 if(srcLength==-1){
420 srcLength = 0;
421 for(;src[srcLength]!=0;){
422 if(src[srcLength]> 0x7f){
423 srcIsASCII = FALSE;
374ca955
A
424 }else if(isLDHChar(src[srcLength])==FALSE){
425 // here we do not assemble surrogates
426 // since we know that LDH code points
427 // are in the ASCII range only
b75a7d8f
A
428 srcIsLDH = FALSE;
429 failPos = srcLength;
430 }
431 srcLength++;
432 }
374ca955 433 }else if(srcLength > 0){
b75a7d8f
A
434 for(int32_t j=0; j<srcLength; j++){
435 if(src[j]> 0x7f){
436 srcIsASCII = FALSE;
374ca955
A
437 }else if(isLDHChar(src[j])==FALSE){
438 // here we do not assemble surrogates
439 // since we know that LDH code points
440 // are in the ASCII range only
b75a7d8f
A
441 srcIsLDH = FALSE;
442 failPos = j;
443 }
444 }
374ca955
A
445 }else{
446 return 0;
b75a7d8f
A
447 }
448
449 if(srcIsASCII == FALSE){
450 // step 2: process the string
374ca955 451 b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status);
b75a7d8f
A
452 if(*status == U_BUFFER_OVERFLOW_ERROR){
453 // redo processing of string
454 /* we do not have enough room so grow the buffer*/
455 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
456 if(b1==NULL){
457 *status = U_MEMORY_ALLOCATION_ERROR;
458 goto CLEANUP;
459 }
460
461 *status = U_ZERO_ERROR; // reset error
462
374ca955 463 b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status);
b75a7d8f
A
464 }
465 //bail out on error
466 if(U_FAILURE(*status)){
467 goto CLEANUP;
468 }
469 }else{
470
471 //just point src to b1
472 b1 = (UChar*) src;
473 b1Len = srcLength;
474 }
475
476 //step 3: verify ACE Prefix
477 if(startsWithPrefix(src,srcLength)){
73c04bcf 478
b75a7d8f
A
479 //step 4: Remove the ACE Prefix
480 b1Prime = b1 + ACE_PREFIX_LENGTH;
481 b1PrimeLen = b1Len - ACE_PREFIX_LENGTH;
482
483 //step 5: Decode using punycode
484 b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Capacity, caseFlags,status);
485
486 if(*status == U_BUFFER_OVERFLOW_ERROR){
487 // redo processing of string
488 /* we do not have enough room so grow the buffer*/
489 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
490 if(b2==NULL){
491 *status = U_MEMORY_ALLOCATION_ERROR;
492 goto CLEANUP;
493 }
494
495 *status = U_ZERO_ERROR; // reset error
496
497 b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Len, caseFlags, status);
498
499 }
500
b75a7d8f
A
501 //step 6:Apply toASCII
502 b3Len = uidna_toASCII(b2, b2Len, b3, b3Capacity,options,parseError, status);
73c04bcf 503
b75a7d8f
A
504 if(*status == U_BUFFER_OVERFLOW_ERROR){
505 // redo processing of string
506 /* we do not have enough room so grow the buffer*/
507 b3 = (UChar*) uprv_malloc(b3Len * U_SIZEOF_UCHAR);
508 if(b3==NULL){
509 *status = U_MEMORY_ALLOCATION_ERROR;
510 goto CLEANUP;
511 }
512
513 *status = U_ZERO_ERROR; // reset error
514
515 b3Len = uidna_toASCII(b2,b2Len,b3,b3Len,options,parseError, status);
516
517 }
73c04bcf 518
b75a7d8f
A
519 //bail out on error
520 if(U_FAILURE(*status)){
521 goto CLEANUP;
522 }
523
524 //step 7: verify
525 if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){
526 *status = U_IDNA_VERIFICATION_ERROR;
527 goto CLEANUP;
528 }
529
530 //step 8: return output of step 5
531 reqLength = b2Len;
532 if(b2Len <= destCapacity) {
533 uprv_memmove(dest, b2, b2Len * U_SIZEOF_UCHAR);
534 }
535 }else{
536 // verify that STD3 ASCII rules are satisfied
537 if(useSTD3ASCIIRules == TRUE){
538 if( srcIsLDH == FALSE /* source contains some non-LDH characters */
539 || src[0] == HYPHEN || src[srcLength-1] == HYPHEN){
540 *status = U_IDNA_STD3_ASCII_RULES_ERROR;
541
542 /* populate the parseError struct */
543 if(srcIsLDH==FALSE){
544 // failPos is always set the index of failure
545 uprv_syntaxError(src,failPos, srcLength,parseError);
546 }else if(src[0] == HYPHEN){
547 // fail position is 0
548 uprv_syntaxError(src,0,srcLength,parseError);
549 }else{
550 // the last index in the source is always length-1
551 uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError);
552 }
553
554 goto CLEANUP;
555 }
556 }
557 //copy the source to destination
558 if(srcLength <= destCapacity){
559 uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR);
560 }
561 reqLength = srcLength;
562 }
563
564CLEANUP:
565
566 if(b1 != b1Stack && b1!=src){
567 uprv_free(b1);
568 }
569 if(b2 != b2Stack){
570 uprv_free(b2);
571 }
572 uprv_free(caseFlags);
573
374ca955 574
b75a7d8f
A
575 // The RFC states that
576 // <quote>
577 // ToUnicode never fails. If any step fails, then the original input
578 // is returned immediately in that step.
579 // </quote>
580 // So if any step fails lets copy source to destination
581 if(U_FAILURE(*status)){
582 //copy the source to destination
583 if(dest && srcLength <= destCapacity){
584 if(srcLength == -1) {
585 uprv_memmove(dest,src,u_strlen(src)* U_SIZEOF_UCHAR);
586 } else {
587 uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR);
588 }
589 }
590 reqLength = srcLength;
591 }
592
593 return u_terminateUChars(dest, destCapacity, reqLength, status);
594}
595
374ca955
A
596U_CAPI int32_t U_EXPORT2
597uidna_toASCII(const UChar* src, int32_t srcLength,
598 UChar* dest, int32_t destCapacity,
599 int32_t options,
600 UParseError* parseError,
601 UErrorCode* status){
602
603 if(status == NULL || U_FAILURE(*status)){
604 return 0;
605 }
606 if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
607 *status = U_ILLEGAL_ARGUMENT_ERROR;
608 return 0;
609 }
b75a7d8f 610
374ca955
A
611 UStringPrepProfile* nameprep = usprep_open(NULL,DATA_FILE_NAME, status);
612
613 if(U_FAILURE(*status)){
614 return -1;
b75a7d8f 615 }
374ca955
A
616
617 int32_t retLen = _internal_toASCII(src, srcLength, dest, destCapacity, options, nameprep, parseError, status);
618
619 /* close the profile*/
620 usprep_close(nameprep);
621
622 return retLen;
b75a7d8f
A
623}
624
374ca955
A
625U_CAPI int32_t U_EXPORT2
626uidna_toUnicode(const UChar* src, int32_t srcLength,
627 UChar* dest, int32_t destCapacity,
628 int32_t options,
629 UParseError* parseError,
630 UErrorCode* status){
631
632 if(status == NULL || U_FAILURE(*status)){
633 return 0;
634 }
635 if( (src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
636 *status = U_ILLEGAL_ARGUMENT_ERROR;
637 return 0;
638 }
639
640 UStringPrepProfile* nameprep = usprep_open(NULL, DATA_FILE_NAME, status);
641
642 if(U_FAILURE(*status)){
643 return -1;
644 }
645
646 int32_t retLen = _internal_toUnicode(src, srcLength, dest, destCapacity, options, nameprep, parseError, status);
647
648 usprep_close(nameprep);
649
650 return retLen;
651}
652
653
b75a7d8f
A
654U_CAPI int32_t U_EXPORT2
655uidna_IDNToASCII( const UChar *src, int32_t srcLength,
656 UChar* dest, int32_t destCapacity,
657 int32_t options,
658 UParseError *parseError,
659 UErrorCode *status){
660
661 if(status == NULL || U_FAILURE(*status)){
662 return 0;
663 }
664 if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
665 *status = U_ILLEGAL_ARGUMENT_ERROR;
666 return 0;
667 }
668
669 int32_t reqLength = 0;
670
374ca955 671 UStringPrepProfile* nameprep = usprep_open(NULL, DATA_FILE_NAME, status);
b75a7d8f
A
672
673 if(U_FAILURE(*status)){
674 return 0;
675 }
676
677 //initialize pointers
678 UChar *delimiter = (UChar*)src;
679 UChar *labelStart = (UChar*)src;
680 UChar *currentDest = (UChar*) dest;
681 int32_t remainingLen = srcLength;
682 int32_t remainingDestCapacity = destCapacity;
683 int32_t labelLen = 0, labelReqLength = 0;
684 UBool done = FALSE;
685
686
687 for(;;){
688
374ca955 689 labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done);
73c04bcf
A
690 labelReqLength = 0;
691 if(!(labelLen==0 && done)){// make sure this is not a root label separator.
b75a7d8f 692
73c04bcf
A
693 labelReqLength = _internal_toASCII( labelStart, labelLen,
694 currentDest, remainingDestCapacity,
695 options, nameprep,
696 parseError, status);
697
698 if(*status == U_BUFFER_OVERFLOW_ERROR){
699
700 *status = U_ZERO_ERROR; // reset error
701 remainingDestCapacity = 0;
702 }
b75a7d8f
A
703 }
704
705
706 if(U_FAILURE(*status)){
707 break;
708 }
709
710 reqLength +=labelReqLength;
711 // adjust the destination pointer
712 if(labelReqLength < remainingDestCapacity){
713 currentDest = currentDest + labelReqLength;
714 remainingDestCapacity -= labelReqLength;
715 }else{
716 // should never occur
717 remainingDestCapacity = 0;
718 }
719 if(done == TRUE){
720 break;
721 }
722
723 // add the label separator
724 if(remainingDestCapacity > 0){
725 *currentDest++ = FULL_STOP;
726 remainingDestCapacity--;
727 }
728 reqLength++;
729
730 labelStart = delimiter;
731 if(remainingLen >0 ){
73c04bcf 732 remainingLen = (int32_t)(srcLength - (delimiter - src));
b75a7d8f
A
733 }
734
735 }
736
374ca955 737 usprep_close(nameprep);
b75a7d8f
A
738
739 return u_terminateUChars(dest, destCapacity, reqLength, status);
740}
741
742U_CAPI int32_t U_EXPORT2
743uidna_IDNToUnicode( const UChar* src, int32_t srcLength,
744 UChar* dest, int32_t destCapacity,
745 int32_t options,
746 UParseError* parseError,
747 UErrorCode* status){
748
749 if(status == NULL || U_FAILURE(*status)){
750 return 0;
751 }
752 if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){
753 *status = U_ILLEGAL_ARGUMENT_ERROR;
754 return 0;
755 }
756
757 int32_t reqLength = 0;
758
374ca955 759 UStringPrepProfile* nameprep = usprep_open(NULL, DATA_FILE_NAME, status);
b75a7d8f
A
760
761 if(U_FAILURE(*status)){
762 return 0;
763 }
764
765 //initialize pointers
766 UChar *delimiter = (UChar*)src;
767 UChar *labelStart = (UChar*)src;
768 UChar *currentDest = (UChar*) dest;
769 int32_t remainingLen = srcLength;
770 int32_t remainingDestCapacity = destCapacity;
771 int32_t labelLen = 0, labelReqLength = 0;
772 UBool done = FALSE;
773
774
775 for(;;){
776
374ca955 777 labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done);
b75a7d8f 778
73c04bcf
A
779 if(labelLen==0 && done==FALSE){
780 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
781 }
374ca955
A
782 labelReqLength = _internal_toUnicode(labelStart, labelLen,
783 currentDest, remainingDestCapacity,
784 options, nameprep,
785 parseError, status);
b75a7d8f
A
786
787 if(*status == U_BUFFER_OVERFLOW_ERROR){
788
789 *status = U_ZERO_ERROR; // reset error
790 remainingDestCapacity = 0;
791 }
792
793
794 if(U_FAILURE(*status)){
795 break;
796 }
797
798 reqLength +=labelReqLength;
799 // adjust the destination pointer
800 if(labelReqLength < remainingDestCapacity){
801 currentDest = currentDest + labelReqLength;
802 remainingDestCapacity -= labelReqLength;
803 }else{
804 // should never occur
805 remainingDestCapacity = 0;
806 }
807
808 if(done == TRUE){
809 break;
810 }
811
812 // add the label separator
813 if(remainingDestCapacity > 0){
814 *currentDest++ = FULL_STOP;
815 remainingDestCapacity--;
816 }
817 reqLength++;
818
819 labelStart = delimiter;
820 if(remainingLen >0 ){
73c04bcf 821 remainingLen = (int32_t)(srcLength - (delimiter - src));
b75a7d8f
A
822 }
823
824 }
825
374ca955 826 usprep_close(nameprep);
b75a7d8f
A
827
828 return u_terminateUChars(dest, destCapacity, reqLength, status);
829}
830
831U_CAPI int32_t U_EXPORT2
832uidna_compare( const UChar *s1, int32_t length1,
833 const UChar *s2, int32_t length2,
834 int32_t options,
835 UErrorCode* status){
836
837 if(status == NULL || U_FAILURE(*status)){
838 return -1;
839 }
840
841 UChar b1Stack[MAX_IDN_BUFFER_SIZE], b2Stack[MAX_IDN_BUFFER_SIZE];
842 UChar *b1 = b1Stack, *b2 = b2Stack;
843 int32_t b1Len, b2Len, b1Capacity = MAX_IDN_BUFFER_SIZE, b2Capacity = MAX_IDN_BUFFER_SIZE;
844 int32_t result=-1;
845
846 UParseError parseError;
847
848 b1Len = uidna_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, status);
849 if(*status == U_BUFFER_OVERFLOW_ERROR){
850 // redo processing of string
851 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
852 if(b1==NULL){
853 *status = U_MEMORY_ALLOCATION_ERROR;
854 goto CLEANUP;
855 }
856
857 *status = U_ZERO_ERROR; // reset error
858
859 b1Len = uidna_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, status);
860
861 }
862
863 b2Len = uidna_IDNToASCII(s2,length2, b2,b2Capacity, options, &parseError, status);
864 if(*status == U_BUFFER_OVERFLOW_ERROR){
865 // redo processing of string
866 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
867 if(b2==NULL){
868 *status = U_MEMORY_ALLOCATION_ERROR;
869 goto CLEANUP;
870 }
871
872 *status = U_ZERO_ERROR; // reset error
873
874 b2Len = uidna_IDNToASCII(s2, length2, b2, b2Len, options, &parseError, status);
875
876 }
877 // when toASCII is applied all label separators are replaced with FULL_STOP
878 result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len);
879
880CLEANUP:
881 if(b1 != b1Stack){
882 uprv_free(b1);
883 }
884
885 if(b2 != b2Stack){
886 uprv_free(b2);
887 }
888
889 return result;
890}
891
892#endif /* #if !UCONFIG_NO_IDNA */