]>
Commit | Line | Data |
---|---|---|
1 | // © 2016 and later: Unicode, Inc. and others. | |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
3 | /* | |
4 | ******************************************************************************* | |
5 | * | |
6 | * Copyright (C) 2003-2011, International Business Machines | |
7 | * Corporation and others. All Rights Reserved. | |
8 | * | |
9 | ******************************************************************************* | |
10 | * file name: idnaref.cpp | |
11 | * encoding: UTF-8 | |
12 | * tab size: 8 (not used) | |
13 | * indentation:4 | |
14 | * | |
15 | * created on: 2003feb1 | |
16 | * created by: Ram Viswanadha | |
17 | */ | |
18 | ||
19 | #include "unicode/utypes.h" | |
20 | ||
21 | #if !UCONFIG_NO_IDNA && !UCONFIG_NO_TRANSLITERATION | |
22 | #include "idnaref.h" | |
23 | #include "punyref.h" | |
24 | #include "ustr_imp.h" | |
25 | #include "cmemory.h" | |
26 | #include "sprpimpl.h" | |
27 | #include "nptrans.h" | |
28 | #include "testidna.h" | |
29 | #include "punycode.h" | |
30 | #include "unicode/ustring.h" | |
31 | ||
32 | /* it is official IDNA ACE Prefix is "xn--" */ | |
33 | static const UChar ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ; | |
34 | #define ACE_PREFIX_LENGTH 4 | |
35 | ||
36 | #define MAX_LABEL_LENGTH 63 | |
37 | #define HYPHEN 0x002D | |
38 | /* The Max length of the labels should not be more than 64 */ | |
39 | #define MAX_LABEL_BUFFER_SIZE 100 | |
40 | #define MAX_IDN_BUFFER_SIZE 300 | |
41 | ||
42 | #define CAPITAL_A 0x0041 | |
43 | #define CAPITAL_Z 0x005A | |
44 | #define LOWER_CASE_DELTA 0x0020 | |
45 | #define FULL_STOP 0x002E | |
46 | ||
47 | ||
48 | inline static UBool | |
49 | startsWithPrefix(const UChar* src , int32_t srcLength){ | |
50 | UBool startsWithPrefix = TRUE; | |
51 | ||
52 | if(srcLength < ACE_PREFIX_LENGTH){ | |
53 | return FALSE; | |
54 | } | |
55 | ||
56 | for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){ | |
57 | if(u_tolower(src[i]) != ACE_PREFIX[i]){ | |
58 | startsWithPrefix = FALSE; | |
59 | } | |
60 | } | |
61 | return startsWithPrefix; | |
62 | } | |
63 | ||
64 | inline static UChar | |
65 | toASCIILower(UChar ch){ | |
66 | if(CAPITAL_A <= ch && ch <= CAPITAL_Z){ | |
67 | return ch + LOWER_CASE_DELTA; | |
68 | } | |
69 | return ch; | |
70 | } | |
71 | ||
72 | inline static int32_t | |
73 | compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len, | |
74 | const UChar* s2, int32_t s2Len){ | |
75 | if(s1Len != s2Len){ | |
76 | return (s1Len > s2Len) ? s1Len : s2Len; | |
77 | } | |
78 | UChar c1,c2; | |
79 | int32_t rc; | |
80 | ||
81 | for(int32_t i =0;/* no condition */;i++) { | |
82 | /* If we reach the ends of both strings then they match */ | |
83 | if(i == s1Len) { | |
84 | return 0; | |
85 | } | |
86 | ||
87 | c1 = s1[i]; | |
88 | c2 = s2[i]; | |
89 | ||
90 | /* Case-insensitive comparison */ | |
91 | if(c1!=c2) { | |
92 | rc=(int32_t)toASCIILower(c1)-(int32_t)toASCIILower(c2); | |
93 | if(rc!=0) { | |
94 | return rc; | |
95 | } | |
96 | } | |
97 | } | |
98 | ||
99 | } | |
100 | ||
101 | static UErrorCode getError(enum punycode_status status){ | |
102 | switch(status){ | |
103 | case punycode_success: | |
104 | return U_ZERO_ERROR; | |
105 | case punycode_bad_input: /* Input is invalid. */ | |
106 | return U_INVALID_CHAR_FOUND; | |
107 | case punycode_big_output: /* Output would exceed the space provided. */ | |
108 | return U_BUFFER_OVERFLOW_ERROR; | |
109 | case punycode_overflow : /* Input requires wider integers to process. */ | |
110 | return U_INDEX_OUTOFBOUNDS_ERROR; | |
111 | default: | |
112 | return U_INTERNAL_PROGRAM_ERROR; | |
113 | } | |
114 | } | |
115 | ||
116 | static inline int32_t convertASCIIToUChars(const char* src,UChar* dest, int32_t length){ | |
117 | int i; | |
118 | for(i=0;i<length;i++){ | |
119 | dest[i] = src[i]; | |
120 | } | |
121 | return i; | |
122 | } | |
123 | static inline int32_t convertUCharsToASCII(const UChar* src,char* dest, int32_t length){ | |
124 | int i; | |
125 | for(i=0;i<length;i++){ | |
126 | dest[i] = (char)src[i]; | |
127 | } | |
128 | return i; | |
129 | } | |
130 | // wrapper around the reference Punycode implementation | |
131 | static int32_t convertToPuny(const UChar* src, int32_t srcLength, | |
132 | UChar* dest, int32_t destCapacity, | |
133 | UErrorCode& status){ | |
134 | uint32_t b1Stack[MAX_LABEL_BUFFER_SIZE]; | |
135 | int32_t b1Len = 0, b1Capacity = MAX_LABEL_BUFFER_SIZE; | |
136 | uint32_t* b1 = b1Stack; | |
137 | char b2Stack[MAX_LABEL_BUFFER_SIZE]; | |
138 | char* b2 = b2Stack; | |
139 | int32_t b2Len =MAX_LABEL_BUFFER_SIZE ; | |
140 | punycode_status error; | |
141 | unsigned char* caseFlags = NULL; | |
142 | ||
143 | u_strToUTF32((UChar32*)b1,b1Capacity,&b1Len,src,srcLength,&status); | |
144 | if(status == U_BUFFER_OVERFLOW_ERROR){ | |
145 | // redo processing of string | |
146 | /* we do not have enough room so grow the buffer*/ | |
147 | b1 = (uint32_t*) uprv_malloc(b1Len * sizeof(uint32_t)); | |
148 | if(b1==NULL){ | |
149 | status = U_MEMORY_ALLOCATION_ERROR; | |
150 | goto CLEANUP; | |
151 | } | |
152 | ||
153 | status = U_ZERO_ERROR; // reset error | |
154 | ||
155 | u_strToUTF32((UChar32*)b1,b1Len,&b1Len,src,srcLength,&status); | |
156 | } | |
157 | if(U_FAILURE(status)){ | |
158 | goto CLEANUP; | |
159 | } | |
160 | ||
161 | //caseFlags = (unsigned char*) uprv_malloc(b1Len *sizeof(unsigned char)); | |
162 | ||
163 | error = punycode_encode(b1Len,b1,caseFlags, (uint32_t*)&b2Len, b2); | |
164 | status = getError(error); | |
165 | ||
166 | if(status == U_BUFFER_OVERFLOW_ERROR){ | |
167 | /* we do not have enough room so grow the buffer*/ | |
168 | b2 = (char*) uprv_malloc( b2Len * sizeof(char)); | |
169 | if(b2==NULL){ | |
170 | status = U_MEMORY_ALLOCATION_ERROR; | |
171 | goto CLEANUP; | |
172 | } | |
173 | ||
174 | status = U_ZERO_ERROR; // reset error | |
175 | ||
176 | punycode_status error = punycode_encode(b1Len,b1,caseFlags, (uint32_t*)&b2Len, b2); | |
177 | status = getError(error); | |
178 | } | |
179 | if(U_FAILURE(status)){ | |
180 | goto CLEANUP; | |
181 | } | |
182 | ||
183 | if(b2Len < destCapacity){ | |
184 | convertASCIIToUChars(b2,dest,b2Len); | |
185 | }else{ | |
186 | status =U_BUFFER_OVERFLOW_ERROR; | |
187 | } | |
188 | ||
189 | CLEANUP: | |
190 | if(b1Stack != b1){ | |
191 | uprv_free(b1); | |
192 | } | |
193 | if(b2Stack != b2){ | |
194 | uprv_free(b2); | |
195 | } | |
196 | uprv_free(caseFlags); | |
197 | ||
198 | return b2Len; | |
199 | } | |
200 | ||
201 | static int32_t convertFromPuny( const UChar* src, int32_t srcLength, | |
202 | UChar* dest, int32_t destCapacity, | |
203 | UErrorCode& status){ | |
204 | char b1Stack[MAX_LABEL_BUFFER_SIZE]; | |
205 | char* b1 = b1Stack; | |
206 | int32_t destLen =0; | |
207 | ||
208 | convertUCharsToASCII(src, b1,srcLength); | |
209 | ||
210 | uint32_t b2Stack[MAX_LABEL_BUFFER_SIZE]; | |
211 | uint32_t* b2 = b2Stack; | |
212 | int32_t b2Len =MAX_LABEL_BUFFER_SIZE; | |
213 | unsigned char* caseFlags = NULL; //(unsigned char*) uprv_malloc(srcLength * sizeof(unsigned char*)); | |
214 | punycode_status error = punycode_decode(srcLength,b1,(uint32_t*)&b2Len,b2,caseFlags); | |
215 | status = getError(error); | |
216 | if(status == U_BUFFER_OVERFLOW_ERROR){ | |
217 | b2 = (uint32_t*) uprv_malloc(b2Len * sizeof(uint32_t)); | |
218 | if(b2 == NULL){ | |
219 | status = U_MEMORY_ALLOCATION_ERROR; | |
220 | goto CLEANUP; | |
221 | } | |
222 | error = punycode_decode(srcLength,b1,(uint32_t*)&b2Len,b2,caseFlags); | |
223 | status = getError(error); | |
224 | } | |
225 | ||
226 | if(U_FAILURE(status)){ | |
227 | goto CLEANUP; | |
228 | } | |
229 | ||
230 | u_strFromUTF32(dest,destCapacity,&destLen,(UChar32*)b2,b2Len,&status); | |
231 | ||
232 | CLEANUP: | |
233 | if(b1Stack != b1){ | |
234 | uprv_free(b1); | |
235 | } | |
236 | if(b2Stack != b2){ | |
237 | uprv_free(b2); | |
238 | } | |
239 | uprv_free(caseFlags); | |
240 | ||
241 | return destLen; | |
242 | } | |
243 | ||
244 | ||
245 | U_CFUNC int32_t U_EXPORT2 | |
246 | idnaref_toASCII(const UChar* src, int32_t srcLength, | |
247 | UChar* dest, int32_t destCapacity, | |
248 | int32_t options, | |
249 | UParseError* parseError, | |
250 | UErrorCode* status){ | |
251 | ||
252 | if(status == NULL || U_FAILURE(*status)){ | |
253 | return 0; | |
254 | } | |
255 | if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ | |
256 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
257 | return 0; | |
258 | } | |
259 | UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE]; | |
260 | //initialize pointers to stack buffers | |
261 | UChar *b1 = b1Stack, *b2 = b2Stack; | |
262 | int32_t b1Len=0, b2Len=0, | |
263 | b1Capacity = MAX_LABEL_BUFFER_SIZE, | |
264 | b2Capacity = MAX_LABEL_BUFFER_SIZE , | |
265 | reqLength=0; | |
266 | ||
267 | //get the options | |
268 | UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0); | |
269 | UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0); | |
270 | ||
271 | UBool* caseFlags = NULL; | |
272 | ||
273 | // assume the source contains all ascii codepoints | |
274 | UBool srcIsASCII = TRUE; | |
275 | // assume the source contains all LDH codepoints | |
276 | UBool srcIsLDH = TRUE; | |
277 | int32_t j=0; | |
278 | ||
279 | if(srcLength == -1){ | |
280 | srcLength = u_strlen(src); | |
281 | } | |
282 | ||
283 | // step 1 | |
284 | for( j=0;j<srcLength;j++){ | |
285 | if(src[j] > 0x7F){ | |
286 | srcIsASCII = FALSE; | |
287 | } | |
288 | b1[b1Len++] = src[j]; | |
289 | } | |
290 | ||
291 | NamePrepTransform* prep = TestIDNA::getInstance(*status); | |
292 | if(U_FAILURE(*status)){ | |
293 | goto CLEANUP; | |
294 | } | |
295 | ||
296 | // step 2 is performed only if the source contains non ASCII | |
297 | if (!srcIsASCII) { | |
298 | b1Len = prep->process(src,srcLength,b1, b1Capacity,allowUnassigned,parseError,*status); | |
299 | ||
300 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
301 | // redo processing of string | |
302 | /* we do not have enough room so grow the buffer*/ | |
303 | b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
304 | if(b1==NULL){ | |
305 | *status = U_MEMORY_ALLOCATION_ERROR; | |
306 | goto CLEANUP; | |
307 | } | |
308 | ||
309 | *status = U_ZERO_ERROR; // reset error | |
310 | ||
311 | b1Len = prep->process(src,srcLength,b1, b1Len,allowUnassigned, parseError, *status); | |
312 | } | |
313 | // error bail out | |
314 | if(U_FAILURE(*status)){ | |
315 | goto CLEANUP; | |
316 | } | |
317 | } | |
318 | ||
319 | if(b1Len == 0){ | |
320 | *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; | |
321 | goto CLEANUP; | |
322 | } | |
323 | ||
324 | srcIsASCII = TRUE; | |
325 | // step 3 & 4 | |
326 | for( j=0;j<b1Len;j++){ | |
327 | if(b1[j] > 0x7F){// check if output of usprep_prepare is all ASCII | |
328 | srcIsASCII = FALSE; | |
329 | }else if(prep->isLDHChar(b1[j])==FALSE){ // if the char is in ASCII range verify that it is an LDH character{ | |
330 | srcIsLDH = FALSE; | |
331 | } | |
332 | } | |
333 | ||
334 | if(useSTD3ASCIIRules == TRUE){ | |
335 | // verify 3a and 3b | |
336 | if( srcIsLDH == FALSE /* source contains some non-LDH characters */ | |
337 | || b1[0] == HYPHEN || b1[b1Len-1] == HYPHEN){ | |
338 | *status = U_IDNA_STD3_ASCII_RULES_ERROR; | |
339 | goto CLEANUP; | |
340 | } | |
341 | } | |
342 | if(srcIsASCII){ | |
343 | if(b1Len <= destCapacity){ | |
344 | u_memmove(dest, b1, b1Len); | |
345 | reqLength = b1Len; | |
346 | }else{ | |
347 | reqLength = b1Len; | |
348 | goto CLEANUP; | |
349 | } | |
350 | }else{ | |
351 | // step 5 : verify the sequence does not begin with ACE prefix | |
352 | if(!startsWithPrefix(b1,b1Len)){ | |
353 | ||
354 | //step 6: encode the sequence with punycode | |
355 | //caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool)); | |
356 | ||
357 | b2Len = convertToPuny(b1,b1Len, b2,b2Capacity,*status); | |
358 | //b2Len = u_strToPunycode(b2,b2Capacity,b1,b1Len, caseFlags, status); | |
359 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
360 | // redo processing of string | |
361 | /* we do not have enough room so grow the buffer*/ | |
362 | b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); | |
363 | if(b2 == NULL){ | |
364 | *status = U_MEMORY_ALLOCATION_ERROR; | |
365 | goto CLEANUP; | |
366 | } | |
367 | ||
368 | *status = U_ZERO_ERROR; // reset error | |
369 | ||
370 | b2Len = convertToPuny(b1, b1Len, b2, b2Len, *status); | |
371 | //b2Len = u_strToPunycode(b2,b2Len,b1,b1Len, caseFlags, status); | |
372 | ||
373 | } | |
374 | //error bail out | |
375 | if(U_FAILURE(*status)){ | |
376 | goto CLEANUP; | |
377 | } | |
378 | reqLength = b2Len+ACE_PREFIX_LENGTH; | |
379 | ||
380 | if(reqLength > destCapacity){ | |
381 | *status = U_BUFFER_OVERFLOW_ERROR; | |
382 | goto CLEANUP; | |
383 | } | |
384 | //Step 7: prepend the ACE prefix | |
385 | u_memcpy(dest, ACE_PREFIX, ACE_PREFIX_LENGTH); | |
386 | //Step 6: copy the contents in b2 into dest | |
387 | u_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len); | |
388 | ||
389 | }else{ | |
390 | *status = U_IDNA_ACE_PREFIX_ERROR; | |
391 | goto CLEANUP; | |
392 | } | |
393 | } | |
394 | ||
395 | if(reqLength > MAX_LABEL_LENGTH){ | |
396 | *status = U_IDNA_LABEL_TOO_LONG_ERROR; | |
397 | } | |
398 | ||
399 | CLEANUP: | |
400 | if(b1 != b1Stack){ | |
401 | uprv_free(b1); | |
402 | } | |
403 | if(b2 != b2Stack){ | |
404 | uprv_free(b2); | |
405 | } | |
406 | uprv_free(caseFlags); | |
407 | ||
408 | // delete prep; | |
409 | ||
410 | return u_terminateUChars(dest, destCapacity, reqLength, status); | |
411 | } | |
412 | ||
413 | ||
414 | U_CFUNC int32_t U_EXPORT2 | |
415 | idnaref_toUnicode(const UChar* src, int32_t srcLength, | |
416 | UChar* dest, int32_t destCapacity, | |
417 | int32_t options, | |
418 | UParseError* parseError, | |
419 | UErrorCode* status){ | |
420 | ||
421 | if(status == NULL || U_FAILURE(*status)){ | |
422 | return 0; | |
423 | } | |
424 | if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ | |
425 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
426 | return 0; | |
427 | } | |
428 | ||
429 | ||
430 | ||
431 | UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE]; | |
432 | ||
433 | //initialize pointers to stack buffers | |
434 | UChar *b1 = b1Stack, *b2 = b2Stack, *b1Prime=NULL, *b3=b3Stack; | |
435 | int32_t b1Len, b2Len, b1PrimeLen, b3Len, | |
436 | b1Capacity = MAX_LABEL_BUFFER_SIZE, | |
437 | b2Capacity = MAX_LABEL_BUFFER_SIZE, | |
438 | b3Capacity = MAX_LABEL_BUFFER_SIZE, | |
439 | reqLength=0; | |
440 | // UParseError parseError; | |
441 | ||
442 | NamePrepTransform* prep = TestIDNA::getInstance(*status); | |
443 | b1Len = 0; | |
444 | UBool* caseFlags = NULL; | |
445 | ||
446 | //get the options | |
447 | UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0); | |
448 | UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0); | |
449 | ||
450 | UBool srcIsASCII = TRUE; | |
451 | UBool srcIsLDH = TRUE; | |
452 | int32_t failPos =0; | |
453 | ||
454 | if(U_FAILURE(*status)){ | |
455 | goto CLEANUP; | |
456 | } | |
457 | // step 1: find out if all the codepoints in src are ASCII | |
458 | if(srcLength==-1){ | |
459 | srcLength = 0; | |
460 | for(;src[srcLength]!=0;){ | |
461 | if(src[srcLength]> 0x7f){ | |
462 | srcIsASCII = FALSE; | |
463 | }if(prep->isLDHChar(src[srcLength])==FALSE){ | |
464 | // here we do not assemble surrogates | |
465 | // since we know that LDH code points | |
466 | // are in the ASCII range only | |
467 | srcIsLDH = FALSE; | |
468 | failPos = srcLength; | |
469 | } | |
470 | srcLength++; | |
471 | } | |
472 | }else{ | |
473 | for(int32_t j=0; j<srcLength; j++){ | |
474 | if(src[j]> 0x7f){ | |
475 | srcIsASCII = FALSE; | |
476 | }else if(prep->isLDHChar(src[j])==FALSE){ | |
477 | // here we do not assemble surrogates | |
478 | // since we know that LDH code points | |
479 | // are in the ASCII range only | |
480 | srcIsLDH = FALSE; | |
481 | failPos = j; | |
482 | } | |
483 | } | |
484 | } | |
485 | ||
486 | if(srcIsASCII == FALSE){ | |
487 | // step 2: process the string | |
488 | b1Len = prep->process(src,srcLength,b1,b1Capacity,allowUnassigned, parseError, *status); | |
489 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
490 | // redo processing of string | |
491 | /* we do not have enough room so grow the buffer*/ | |
492 | b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
493 | if(b1==NULL){ | |
494 | *status = U_MEMORY_ALLOCATION_ERROR; | |
495 | goto CLEANUP; | |
496 | } | |
497 | ||
498 | *status = U_ZERO_ERROR; // reset error | |
499 | ||
500 | b1Len = prep->process(src,srcLength,b1, b1Len,allowUnassigned, parseError, *status); | |
501 | } | |
502 | //bail out on error | |
503 | if(U_FAILURE(*status)){ | |
504 | goto CLEANUP; | |
505 | } | |
506 | }else{ | |
507 | ||
508 | // copy everything to b1 | |
509 | if(srcLength < b1Capacity){ | |
510 | u_memmove(b1, src, srcLength); | |
511 | }else{ | |
512 | /* we do not have enough room so grow the buffer*/ | |
513 | b1 = (UChar*) uprv_malloc(srcLength * U_SIZEOF_UCHAR); | |
514 | if(b1==NULL){ | |
515 | *status = U_MEMORY_ALLOCATION_ERROR; | |
516 | goto CLEANUP; | |
517 | } | |
518 | u_memmove(b1, src, srcLength); | |
519 | } | |
520 | b1Len = srcLength; | |
521 | } | |
522 | //step 3: verify ACE Prefix | |
523 | if(startsWithPrefix(src,srcLength)){ | |
524 | ||
525 | //step 4: Remove the ACE Prefix | |
526 | b1Prime = b1 + ACE_PREFIX_LENGTH; | |
527 | b1PrimeLen = b1Len - ACE_PREFIX_LENGTH; | |
528 | ||
529 | //step 5: Decode using punycode | |
530 | b2Len = convertFromPuny(b1Prime,b1PrimeLen, b2, b2Capacity, *status); | |
531 | //b2Len = u_strFromPunycode(b2, b2Capacity,b1Prime,b1PrimeLen, caseFlags, status); | |
532 | ||
533 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
534 | // redo processing of string | |
535 | /* we do not have enough room so grow the buffer*/ | |
536 | b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); | |
537 | if(b2==NULL){ | |
538 | *status = U_MEMORY_ALLOCATION_ERROR; | |
539 | goto CLEANUP; | |
540 | } | |
541 | ||
542 | *status = U_ZERO_ERROR; // reset error | |
543 | ||
544 | b2Len = convertFromPuny(b1Prime,b1PrimeLen, b2, b2Len, *status); | |
545 | //b2Len = u_strFromPunycode(b2, b2Len,b1Prime,b1PrimeLen,caseFlags, status); | |
546 | } | |
547 | ||
548 | ||
549 | //step 6:Apply toASCII | |
550 | b3Len = idnaref_toASCII(b2,b2Len,b3,b3Capacity,options,parseError, status); | |
551 | ||
552 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
553 | // redo processing of string | |
554 | /* we do not have enough room so grow the buffer*/ | |
555 | b3 = (UChar*) uprv_malloc(b3Len * U_SIZEOF_UCHAR); | |
556 | if(b3==NULL){ | |
557 | *status = U_MEMORY_ALLOCATION_ERROR; | |
558 | goto CLEANUP; | |
559 | } | |
560 | ||
561 | *status = U_ZERO_ERROR; // reset error | |
562 | ||
563 | b3Len = idnaref_toASCII(b2,b2Len,b3,b3Len, options, parseError, status); | |
564 | ||
565 | } | |
566 | //bail out on error | |
567 | if(U_FAILURE(*status)){ | |
568 | goto CLEANUP; | |
569 | } | |
570 | ||
571 | //step 7: verify | |
572 | if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){ | |
573 | *status = U_IDNA_VERIFICATION_ERROR; | |
574 | goto CLEANUP; | |
575 | } | |
576 | ||
577 | //step 8: return output of step 5 | |
578 | reqLength = b2Len; | |
579 | if(b2Len <= destCapacity) { | |
580 | u_memmove(dest, b2, b2Len); | |
581 | } | |
582 | }else{ | |
583 | // verify that STD3 ASCII rules are satisfied | |
584 | if(useSTD3ASCIIRules == TRUE){ | |
585 | if( srcIsLDH == FALSE /* source contains some non-LDH characters */ | |
586 | || src[0] == HYPHEN || src[srcLength-1] == HYPHEN){ | |
587 | *status = U_IDNA_STD3_ASCII_RULES_ERROR; | |
588 | ||
589 | /* populate the parseError struct */ | |
590 | if(srcIsLDH==FALSE){ | |
591 | // failPos is always set the index of failure | |
592 | uprv_syntaxError(src,failPos, srcLength,parseError); | |
593 | }else if(src[0] == HYPHEN){ | |
594 | // fail position is 0 | |
595 | uprv_syntaxError(src,0,srcLength,parseError); | |
596 | }else{ | |
597 | // the last index in the source is always length-1 | |
598 | uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError); | |
599 | } | |
600 | ||
601 | goto CLEANUP; | |
602 | } | |
603 | } | |
604 | //copy the source to destination | |
605 | if(srcLength <= destCapacity){ | |
606 | u_memmove(dest, src, srcLength); | |
607 | } | |
608 | reqLength = srcLength; | |
609 | } | |
610 | ||
611 | CLEANUP: | |
612 | ||
613 | if(b1 != b1Stack){ | |
614 | uprv_free(b1); | |
615 | } | |
616 | if(b2 != b2Stack){ | |
617 | uprv_free(b2); | |
618 | } | |
619 | uprv_free(caseFlags); | |
620 | ||
621 | // The RFC states that | |
622 | // <quote> | |
623 | // ToUnicode never fails. If any step fails, then the original input | |
624 | // is returned immediately in that step. | |
625 | // </quote> | |
626 | // So if any step fails lets copy source to destination | |
627 | if(U_FAILURE(*status)){ | |
628 | //copy the source to destination | |
629 | if(dest && srcLength <= destCapacity){ | |
630 | if(srcLength == -1) { | |
631 | u_memmove(dest, src, u_strlen(src)); | |
632 | } else { | |
633 | u_memmove(dest, src, srcLength); | |
634 | } | |
635 | } | |
636 | reqLength = srcLength; | |
637 | *status = U_ZERO_ERROR; | |
638 | } | |
639 | return u_terminateUChars(dest, destCapacity, reqLength, status); | |
640 | } | |
641 | ||
642 | ||
643 | static int32_t | |
644 | getNextSeparator(UChar *src,int32_t srcLength,NamePrepTransform* prep, | |
645 | UChar **limit, | |
646 | UBool *done, | |
647 | UErrorCode *status){ | |
648 | if(srcLength == -1){ | |
649 | int32_t i; | |
650 | for(i=0 ; ;i++){ | |
651 | if(src[i] == 0){ | |
652 | *limit = src + i; // point to null | |
653 | *done = TRUE; | |
654 | return i; | |
655 | } | |
656 | if(prep->isLabelSeparator(src[i],*status)){ | |
657 | *limit = src + (i+1); // go past the delimiter | |
658 | return i; | |
659 | ||
660 | } | |
661 | } | |
662 | }else{ | |
663 | int32_t i; | |
664 | for(i=0;i<srcLength;i++){ | |
665 | if(prep->isLabelSeparator(src[i],*status)){ | |
666 | *limit = src + (i+1); // go past the delimiter | |
667 | return i; | |
668 | } | |
669 | } | |
670 | // we have not found the delimiter | |
671 | if(i==srcLength){ | |
672 | *limit = src+srcLength; | |
673 | *done = TRUE; | |
674 | } | |
675 | return i; | |
676 | } | |
677 | } | |
678 | ||
679 | U_CFUNC int32_t U_EXPORT2 | |
680 | idnaref_IDNToASCII( const UChar* src, int32_t srcLength, | |
681 | UChar* dest, int32_t destCapacity, | |
682 | int32_t options, | |
683 | UParseError* parseError, | |
684 | UErrorCode* status){ | |
685 | ||
686 | if(status == NULL || U_FAILURE(*status)){ | |
687 | return 0; | |
688 | } | |
689 | if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ | |
690 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
691 | return 0; | |
692 | } | |
693 | ||
694 | int32_t reqLength = 0; | |
695 | // UParseError parseError; | |
696 | ||
697 | NamePrepTransform* prep = TestIDNA::getInstance(*status); | |
698 | ||
699 | //initialize pointers to stack buffers | |
700 | UChar b1Stack[MAX_LABEL_BUFFER_SIZE]; | |
701 | UChar *b1 = b1Stack; | |
702 | int32_t b1Len, labelLen; | |
703 | UChar* delimiter = (UChar*)src; | |
704 | UChar* labelStart = (UChar*)src; | |
705 | int32_t remainingLen = srcLength; | |
706 | int32_t b1Capacity = MAX_LABEL_BUFFER_SIZE; | |
707 | ||
708 | //get the options | |
709 | // UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0); | |
710 | // UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0); | |
711 | UBool done = FALSE; | |
712 | ||
713 | if(U_FAILURE(*status)){ | |
714 | goto CLEANUP; | |
715 | } | |
716 | ||
717 | ||
718 | if(srcLength == -1){ | |
719 | for(;;){ | |
720 | ||
721 | if(*delimiter == 0){ | |
722 | break; | |
723 | } | |
724 | ||
725 | labelLen = getNextSeparator(labelStart, -1, prep, &delimiter, &done, status); | |
726 | b1Len = 0; | |
727 | if(!(labelLen==0 && done)){// make sure this is not a root label separator. | |
728 | ||
729 | b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Capacity, | |
730 | options, parseError, status); | |
731 | ||
732 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
733 | // redo processing of string | |
734 | /* we do not have enough room so grow the buffer*/ | |
735 | b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
736 | if(b1==NULL){ | |
737 | *status = U_MEMORY_ALLOCATION_ERROR; | |
738 | goto CLEANUP; | |
739 | } | |
740 | ||
741 | *status = U_ZERO_ERROR; // reset error | |
742 | ||
743 | b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Len, | |
744 | options, parseError, status); | |
745 | ||
746 | } | |
747 | } | |
748 | ||
749 | if(U_FAILURE(*status)){ | |
750 | goto CLEANUP; | |
751 | } | |
752 | int32_t tempLen = (reqLength + b1Len ); | |
753 | // copy to dest | |
754 | if( tempLen< destCapacity){ | |
755 | u_memmove(dest+reqLength, b1, b1Len); | |
756 | } | |
757 | ||
758 | reqLength = tempLen; | |
759 | ||
760 | // add the label separator | |
761 | if(done == FALSE){ | |
762 | if(reqLength < destCapacity){ | |
763 | dest[reqLength] = FULL_STOP; | |
764 | } | |
765 | reqLength++; | |
766 | } | |
767 | ||
768 | labelStart = delimiter; | |
769 | } | |
770 | }else{ | |
771 | for(;;){ | |
772 | ||
773 | if(delimiter == src+srcLength){ | |
774 | break; | |
775 | } | |
776 | ||
777 | labelLen = getNextSeparator(labelStart, remainingLen, prep, &delimiter, &done, status); | |
778 | ||
779 | b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Capacity, | |
780 | options,parseError, status); | |
781 | ||
782 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
783 | // redo processing of string | |
784 | /* we do not have enough room so grow the buffer*/ | |
785 | b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
786 | if(b1==NULL){ | |
787 | *status = U_MEMORY_ALLOCATION_ERROR; | |
788 | goto CLEANUP; | |
789 | } | |
790 | ||
791 | *status = U_ZERO_ERROR; // reset error | |
792 | ||
793 | b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Len, | |
794 | options, parseError, status); | |
795 | ||
796 | } | |
797 | ||
798 | if(U_FAILURE(*status)){ | |
799 | goto CLEANUP; | |
800 | } | |
801 | int32_t tempLen = (reqLength + b1Len ); | |
802 | // copy to dest | |
803 | if( tempLen< destCapacity){ | |
804 | u_memmove(dest+reqLength, b1, b1Len); | |
805 | } | |
806 | ||
807 | reqLength = tempLen; | |
808 | ||
809 | // add the label separator | |
810 | if(done == FALSE){ | |
811 | if(reqLength < destCapacity){ | |
812 | dest[reqLength] = FULL_STOP; | |
813 | } | |
814 | reqLength++; | |
815 | } | |
816 | ||
817 | labelStart = delimiter; | |
818 | remainingLen = static_cast<int32_t>(srcLength - (delimiter - src)); | |
819 | } | |
820 | } | |
821 | ||
822 | ||
823 | CLEANUP: | |
824 | ||
825 | if(b1 != b1Stack){ | |
826 | uprv_free(b1); | |
827 | } | |
828 | ||
829 | // delete prep; | |
830 | ||
831 | return u_terminateUChars(dest, destCapacity, reqLength, status); | |
832 | } | |
833 | ||
834 | U_CFUNC int32_t U_EXPORT2 | |
835 | idnaref_IDNToUnicode( const UChar* src, int32_t srcLength, | |
836 | UChar* dest, int32_t destCapacity, | |
837 | int32_t options, | |
838 | UParseError* parseError, | |
839 | UErrorCode* status){ | |
840 | ||
841 | if(status == NULL || U_FAILURE(*status)){ | |
842 | return 0; | |
843 | } | |
844 | if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ | |
845 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
846 | return 0; | |
847 | } | |
848 | ||
849 | int32_t reqLength = 0; | |
850 | ||
851 | UBool done = FALSE; | |
852 | ||
853 | NamePrepTransform* prep = TestIDNA::getInstance(*status); | |
854 | ||
855 | //initialize pointers to stack buffers | |
856 | UChar b1Stack[MAX_LABEL_BUFFER_SIZE]; | |
857 | UChar *b1 = b1Stack; | |
858 | int32_t b1Len, labelLen; | |
859 | UChar* delimiter = (UChar*)src; | |
860 | UChar* labelStart = (UChar*)src; | |
861 | int32_t remainingLen = srcLength; | |
862 | int32_t b1Capacity = MAX_LABEL_BUFFER_SIZE; | |
863 | ||
864 | //get the options | |
865 | // UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0); | |
866 | // UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0); | |
867 | ||
868 | if(U_FAILURE(*status)){ | |
869 | goto CLEANUP; | |
870 | } | |
871 | ||
872 | if(srcLength == -1){ | |
873 | for(;;){ | |
874 | ||
875 | if(*delimiter == 0){ | |
876 | break; | |
877 | } | |
878 | ||
879 | labelLen = getNextSeparator(labelStart, -1, prep, &delimiter, &done, status); | |
880 | ||
881 | if(labelLen==0 && done==FALSE){ | |
882 | *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; | |
883 | } | |
884 | b1Len = idnaref_toUnicode(labelStart, labelLen, b1, b1Capacity, | |
885 | options, parseError, status); | |
886 | ||
887 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
888 | // redo processing of string | |
889 | /* we do not have enough room so grow the buffer*/ | |
890 | b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
891 | if(b1==NULL){ | |
892 | *status = U_MEMORY_ALLOCATION_ERROR; | |
893 | goto CLEANUP; | |
894 | } | |
895 | ||
896 | *status = U_ZERO_ERROR; // reset error | |
897 | ||
898 | b1Len = idnaref_toUnicode( labelStart, labelLen, b1, b1Len, | |
899 | options, parseError, status); | |
900 | ||
901 | } | |
902 | ||
903 | if(U_FAILURE(*status)){ | |
904 | goto CLEANUP; | |
905 | } | |
906 | int32_t tempLen = (reqLength + b1Len ); | |
907 | // copy to dest | |
908 | if( tempLen< destCapacity){ | |
909 | u_memmove(dest+reqLength, b1, b1Len); | |
910 | } | |
911 | ||
912 | reqLength = tempLen; | |
913 | // add the label separator | |
914 | if(done == FALSE){ | |
915 | if(reqLength < destCapacity){ | |
916 | dest[reqLength] = FULL_STOP; | |
917 | } | |
918 | reqLength++; | |
919 | } | |
920 | ||
921 | labelStart = delimiter; | |
922 | } | |
923 | }else{ | |
924 | for(;;){ | |
925 | ||
926 | if(delimiter == src+srcLength){ | |
927 | break; | |
928 | } | |
929 | ||
930 | labelLen = getNextSeparator(labelStart, remainingLen, prep, &delimiter, &done, status); | |
931 | ||
932 | if(labelLen==0 && done==FALSE){ | |
933 | *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; | |
934 | } | |
935 | ||
936 | b1Len = idnaref_toUnicode( labelStart,labelLen, b1, b1Capacity, | |
937 | options, parseError, status); | |
938 | ||
939 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
940 | // redo processing of string | |
941 | /* we do not have enough room so grow the buffer*/ | |
942 | b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
943 | if(b1==NULL){ | |
944 | *status = U_MEMORY_ALLOCATION_ERROR; | |
945 | goto CLEANUP; | |
946 | } | |
947 | ||
948 | *status = U_ZERO_ERROR; // reset error | |
949 | ||
950 | b1Len = idnaref_toUnicode( labelStart, labelLen, b1, b1Len, | |
951 | options, parseError, status); | |
952 | ||
953 | } | |
954 | ||
955 | if(U_FAILURE(*status)){ | |
956 | goto CLEANUP; | |
957 | } | |
958 | int32_t tempLen = (reqLength + b1Len ); | |
959 | // copy to dest | |
960 | if( tempLen< destCapacity){ | |
961 | u_memmove(dest+reqLength, b1, b1Len); | |
962 | } | |
963 | ||
964 | reqLength = tempLen; | |
965 | ||
966 | // add the label separator | |
967 | if(done == FALSE){ | |
968 | if(reqLength < destCapacity){ | |
969 | dest[reqLength] = FULL_STOP; | |
970 | } | |
971 | reqLength++; | |
972 | } | |
973 | ||
974 | labelStart = delimiter; | |
975 | remainingLen = static_cast<int32_t>(srcLength - (delimiter - src)); | |
976 | } | |
977 | } | |
978 | ||
979 | CLEANUP: | |
980 | ||
981 | if(b1 != b1Stack){ | |
982 | uprv_free(b1); | |
983 | } | |
984 | ||
985 | // delete prep; | |
986 | ||
987 | return u_terminateUChars(dest, destCapacity, reqLength, status); | |
988 | } | |
989 | ||
990 | U_CFUNC int32_t U_EXPORT2 | |
991 | idnaref_compare( const UChar *s1, int32_t length1, | |
992 | const UChar *s2, int32_t length2, | |
993 | int32_t options, | |
994 | UErrorCode* status){ | |
995 | ||
996 | if(status == NULL || U_FAILURE(*status)){ | |
997 | return -1; | |
998 | } | |
999 | ||
1000 | UChar b1Stack[MAX_IDN_BUFFER_SIZE], b2Stack[MAX_IDN_BUFFER_SIZE]; | |
1001 | UChar *b1 = b1Stack, *b2 = b2Stack; | |
1002 | int32_t b1Len, b2Len, b1Capacity = MAX_IDN_BUFFER_SIZE, b2Capacity = MAX_IDN_BUFFER_SIZE; | |
1003 | int32_t result = -1; | |
1004 | ||
1005 | UParseError parseError; | |
1006 | ||
1007 | b1Len = idnaref_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, status); | |
1008 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
1009 | // redo processing of string | |
1010 | /* we do not have enough room so grow the buffer*/ | |
1011 | b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
1012 | if(b1==NULL){ | |
1013 | *status = U_MEMORY_ALLOCATION_ERROR; | |
1014 | goto CLEANUP; | |
1015 | } | |
1016 | ||
1017 | *status = U_ZERO_ERROR; // reset error | |
1018 | ||
1019 | b1Len = idnaref_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, status); | |
1020 | ||
1021 | } | |
1022 | ||
1023 | b2Len = idnaref_IDNToASCII(s2,length2,b2,b2Capacity,options, &parseError, status); | |
1024 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
1025 | // redo processing of string | |
1026 | /* we do not have enough room so grow the buffer*/ | |
1027 | b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); | |
1028 | if(b2==NULL){ | |
1029 | *status = U_MEMORY_ALLOCATION_ERROR; | |
1030 | goto CLEANUP; | |
1031 | } | |
1032 | ||
1033 | *status = U_ZERO_ERROR; // reset error | |
1034 | ||
1035 | b2Len = idnaref_IDNToASCII(s2,length2,b2,b2Len,options, &parseError, status); | |
1036 | ||
1037 | } | |
1038 | // when toASCII is applied all label separators are replaced with FULL_STOP | |
1039 | result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len); | |
1040 | ||
1041 | CLEANUP: | |
1042 | if(b1 != b1Stack){ | |
1043 | uprv_free(b1); | |
1044 | } | |
1045 | ||
1046 | if(b2 != b2Stack){ | |
1047 | uprv_free(b2); | |
1048 | } | |
1049 | ||
1050 | return result; | |
1051 | } | |
1052 | #endif /* #if !UCONFIG_NO_IDNA */ |