]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | ******************************************************************************* | |
3 | * | |
4 | * Copyright (C) 2003-2011, International Business Machines | |
5 | * Corporation and others. All Rights Reserved. | |
6 | * | |
7 | ******************************************************************************* | |
8 | * file name: idnaref.cpp | |
9 | * encoding: US-ASCII | |
10 | * tab size: 8 (not used) | |
11 | * indentation:4 | |
12 | * | |
13 | * created on: 2003feb1 | |
14 | * created by: Ram Viswanadha | |
15 | */ | |
16 | ||
17 | #include "unicode/utypes.h" | |
18 | ||
19 | #if !UCONFIG_NO_IDNA && !UCONFIG_NO_TRANSLITERATION | |
20 | #include "idnaref.h" | |
21 | #include "punyref.h" | |
22 | #include "ustr_imp.h" | |
23 | #include "cmemory.h" | |
24 | #include "sprpimpl.h" | |
25 | #include "nptrans.h" | |
26 | #include "testidna.h" | |
27 | #include "punycode.h" | |
28 | #include "unicode/ustring.h" | |
29 | ||
30 | /* it is official IDNA ACE Prefix is "xn--" */ | |
31 | static const UChar ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ; | |
32 | #define ACE_PREFIX_LENGTH 4 | |
33 | ||
34 | #define MAX_LABEL_LENGTH 63 | |
35 | #define HYPHEN 0x002D | |
36 | /* The Max length of the labels should not be more than 64 */ | |
37 | #define MAX_LABEL_BUFFER_SIZE 100 | |
38 | #define MAX_IDN_BUFFER_SIZE 300 | |
39 | ||
40 | #define CAPITAL_A 0x0041 | |
41 | #define CAPITAL_Z 0x005A | |
42 | #define LOWER_CASE_DELTA 0x0020 | |
43 | #define FULL_STOP 0x002E | |
44 | ||
45 | ||
46 | inline static UBool | |
47 | startsWithPrefix(const UChar* src , int32_t srcLength){ | |
48 | UBool startsWithPrefix = TRUE; | |
49 | ||
50 | if(srcLength < ACE_PREFIX_LENGTH){ | |
51 | return FALSE; | |
52 | } | |
53 | ||
54 | for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){ | |
55 | if(u_tolower(src[i]) != ACE_PREFIX[i]){ | |
56 | startsWithPrefix = FALSE; | |
57 | } | |
58 | } | |
59 | return startsWithPrefix; | |
60 | } | |
61 | ||
62 | inline static UChar | |
63 | toASCIILower(UChar ch){ | |
64 | if(CAPITAL_A <= ch && ch <= CAPITAL_Z){ | |
65 | return ch + LOWER_CASE_DELTA; | |
66 | } | |
67 | return ch; | |
68 | } | |
69 | ||
70 | inline static int32_t | |
71 | compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len, | |
72 | const UChar* s2, int32_t s2Len){ | |
73 | if(s1Len != s2Len){ | |
74 | return (s1Len > s2Len) ? s1Len : s2Len; | |
75 | } | |
76 | UChar c1,c2; | |
77 | int32_t rc; | |
78 | ||
79 | for(int32_t i =0;/* no condition */;i++) { | |
80 | /* If we reach the ends of both strings then they match */ | |
81 | if(i == s1Len) { | |
82 | return 0; | |
83 | } | |
84 | ||
85 | c1 = s1[i]; | |
86 | c2 = s2[i]; | |
87 | ||
88 | /* Case-insensitive comparison */ | |
89 | if(c1!=c2) { | |
90 | rc=(int32_t)toASCIILower(c1)-(int32_t)toASCIILower(c2); | |
91 | if(rc!=0) { | |
92 | return rc; | |
93 | } | |
94 | } | |
95 | } | |
96 | ||
97 | } | |
98 | ||
99 | static UErrorCode getError(enum punycode_status status){ | |
100 | switch(status){ | |
101 | case punycode_success: | |
102 | return U_ZERO_ERROR; | |
103 | case punycode_bad_input: /* Input is invalid. */ | |
104 | return U_INVALID_CHAR_FOUND; | |
105 | case punycode_big_output: /* Output would exceed the space provided. */ | |
106 | return U_BUFFER_OVERFLOW_ERROR; | |
107 | case punycode_overflow : /* Input requires wider integers to process. */ | |
108 | return U_INDEX_OUTOFBOUNDS_ERROR; | |
109 | default: | |
110 | return U_INTERNAL_PROGRAM_ERROR; | |
111 | } | |
112 | } | |
113 | ||
114 | static inline int32_t convertASCIIToUChars(const char* src,UChar* dest, int32_t length){ | |
115 | int i; | |
116 | for(i=0;i<length;i++){ | |
117 | dest[i] = src[i]; | |
118 | } | |
119 | return i; | |
120 | } | |
121 | static inline int32_t convertUCharsToASCII(const UChar* src,char* dest, int32_t length){ | |
122 | int i; | |
123 | for(i=0;i<length;i++){ | |
124 | dest[i] = (char)src[i]; | |
125 | } | |
126 | return i; | |
127 | } | |
128 | // wrapper around the reference Punycode implementation | |
129 | static int32_t convertToPuny(const UChar* src, int32_t srcLength, | |
130 | UChar* dest, int32_t destCapacity, | |
131 | UErrorCode& status){ | |
132 | uint32_t b1Stack[MAX_LABEL_BUFFER_SIZE]; | |
133 | int32_t b1Len = 0, b1Capacity = MAX_LABEL_BUFFER_SIZE; | |
134 | uint32_t* b1 = b1Stack; | |
135 | char b2Stack[MAX_LABEL_BUFFER_SIZE]; | |
136 | char* b2 = b2Stack; | |
137 | int32_t b2Len =MAX_LABEL_BUFFER_SIZE ; | |
138 | punycode_status error; | |
139 | unsigned char* caseFlags = NULL; | |
140 | ||
141 | u_strToUTF32((UChar32*)b1,b1Capacity,&b1Len,src,srcLength,&status); | |
142 | if(status == U_BUFFER_OVERFLOW_ERROR){ | |
143 | // redo processing of string | |
144 | /* we do not have enough room so grow the buffer*/ | |
145 | b1 = (uint32_t*) uprv_malloc(b1Len * sizeof(uint32_t)); | |
146 | if(b1==NULL){ | |
147 | status = U_MEMORY_ALLOCATION_ERROR; | |
148 | goto CLEANUP; | |
149 | } | |
150 | ||
151 | status = U_ZERO_ERROR; // reset error | |
152 | ||
153 | u_strToUTF32((UChar32*)b1,b1Len,&b1Len,src,srcLength,&status); | |
154 | } | |
155 | if(U_FAILURE(status)){ | |
156 | goto CLEANUP; | |
157 | } | |
158 | ||
159 | //caseFlags = (unsigned char*) uprv_malloc(b1Len *sizeof(unsigned char)); | |
160 | ||
161 | error = punycode_encode(b1Len,b1,caseFlags, (uint32_t*)&b2Len, b2); | |
162 | status = getError(error); | |
163 | ||
164 | if(status == U_BUFFER_OVERFLOW_ERROR){ | |
165 | /* we do not have enough room so grow the buffer*/ | |
166 | b2 = (char*) uprv_malloc( b2Len * sizeof(char)); | |
167 | if(b2==NULL){ | |
168 | status = U_MEMORY_ALLOCATION_ERROR; | |
169 | goto CLEANUP; | |
170 | } | |
171 | ||
172 | status = U_ZERO_ERROR; // reset error | |
173 | ||
174 | punycode_status error = punycode_encode(b1Len,b1,caseFlags, (uint32_t*)&b2Len, b2); | |
175 | status = getError(error); | |
176 | } | |
177 | if(U_FAILURE(status)){ | |
178 | goto CLEANUP; | |
179 | } | |
180 | ||
181 | if(b2Len < destCapacity){ | |
182 | convertASCIIToUChars(b2,dest,b2Len); | |
183 | }else{ | |
184 | status =U_BUFFER_OVERFLOW_ERROR; | |
185 | } | |
186 | ||
187 | CLEANUP: | |
188 | if(b1Stack != b1){ | |
189 | uprv_free(b1); | |
190 | } | |
191 | if(b2Stack != b2){ | |
192 | uprv_free(b2); | |
193 | } | |
194 | uprv_free(caseFlags); | |
195 | ||
196 | return b2Len; | |
197 | } | |
198 | ||
199 | static int32_t convertFromPuny( const UChar* src, int32_t srcLength, | |
200 | UChar* dest, int32_t destCapacity, | |
201 | UErrorCode& status){ | |
202 | char b1Stack[MAX_LABEL_BUFFER_SIZE]; | |
203 | char* b1 = b1Stack; | |
204 | int32_t destLen =0; | |
205 | ||
206 | convertUCharsToASCII(src, b1,srcLength); | |
207 | ||
208 | uint32_t b2Stack[MAX_LABEL_BUFFER_SIZE]; | |
209 | uint32_t* b2 = b2Stack; | |
210 | int32_t b2Len =MAX_LABEL_BUFFER_SIZE; | |
211 | unsigned char* caseFlags = NULL; //(unsigned char*) uprv_malloc(srcLength * sizeof(unsigned char*)); | |
212 | punycode_status error = punycode_decode(srcLength,b1,(uint32_t*)&b2Len,b2,caseFlags); | |
213 | status = getError(error); | |
214 | if(status == U_BUFFER_OVERFLOW_ERROR){ | |
215 | b2 = (uint32_t*) uprv_malloc(b2Len * sizeof(uint32_t)); | |
216 | if(b2 == NULL){ | |
217 | status = U_MEMORY_ALLOCATION_ERROR; | |
218 | goto CLEANUP; | |
219 | } | |
220 | error = punycode_decode(srcLength,b1,(uint32_t*)&b2Len,b2,caseFlags); | |
221 | status = getError(error); | |
222 | } | |
223 | ||
224 | if(U_FAILURE(status)){ | |
225 | goto CLEANUP; | |
226 | } | |
227 | ||
228 | u_strFromUTF32(dest,destCapacity,&destLen,(UChar32*)b2,b2Len,&status); | |
229 | ||
230 | CLEANUP: | |
231 | if(b1Stack != b1){ | |
232 | uprv_free(b1); | |
233 | } | |
234 | if(b2Stack != b2){ | |
235 | uprv_free(b2); | |
236 | } | |
237 | uprv_free(caseFlags); | |
238 | ||
239 | return destLen; | |
240 | } | |
241 | ||
242 | ||
243 | U_CFUNC int32_t U_EXPORT2 | |
244 | idnaref_toASCII(const UChar* src, int32_t srcLength, | |
245 | UChar* dest, int32_t destCapacity, | |
246 | int32_t options, | |
247 | UParseError* parseError, | |
248 | UErrorCode* status){ | |
249 | ||
250 | if(status == NULL || U_FAILURE(*status)){ | |
251 | return 0; | |
252 | } | |
253 | if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ | |
254 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
255 | return 0; | |
256 | } | |
257 | UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE]; | |
258 | //initialize pointers to stack buffers | |
259 | UChar *b1 = b1Stack, *b2 = b2Stack; | |
260 | int32_t b1Len=0, b2Len=0, | |
261 | b1Capacity = MAX_LABEL_BUFFER_SIZE, | |
262 | b2Capacity = MAX_LABEL_BUFFER_SIZE , | |
263 | reqLength=0; | |
264 | ||
265 | //get the options | |
266 | UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0); | |
267 | UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0); | |
268 | ||
269 | UBool* caseFlags = NULL; | |
270 | ||
271 | // assume the source contains all ascii codepoints | |
272 | UBool srcIsASCII = TRUE; | |
273 | // assume the source contains all LDH codepoints | |
274 | UBool srcIsLDH = TRUE; | |
275 | int32_t j=0; | |
276 | ||
277 | if(srcLength == -1){ | |
278 | srcLength = u_strlen(src); | |
279 | } | |
280 | ||
281 | // step 1 | |
282 | for( j=0;j<srcLength;j++){ | |
283 | if(src[j] > 0x7F){ | |
284 | srcIsASCII = FALSE; | |
285 | } | |
286 | b1[b1Len++] = src[j]; | |
287 | } | |
288 | ||
289 | NamePrepTransform* prep = TestIDNA::getInstance(*status); | |
290 | if(U_FAILURE(*status)){ | |
291 | goto CLEANUP; | |
292 | } | |
293 | ||
294 | // step 2 is performed only if the source contains non ASCII | |
295 | if (!srcIsASCII) { | |
296 | b1Len = prep->process(src,srcLength,b1, b1Capacity,allowUnassigned,parseError,*status); | |
297 | ||
298 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
299 | // redo processing of string | |
300 | /* we do not have enough room so grow the buffer*/ | |
301 | b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
302 | if(b1==NULL){ | |
303 | *status = U_MEMORY_ALLOCATION_ERROR; | |
304 | goto CLEANUP; | |
305 | } | |
306 | ||
307 | *status = U_ZERO_ERROR; // reset error | |
308 | ||
309 | b1Len = prep->process(src,srcLength,b1, b1Len,allowUnassigned, parseError, *status); | |
310 | } | |
311 | // error bail out | |
312 | if(U_FAILURE(*status)){ | |
313 | goto CLEANUP; | |
314 | } | |
315 | } | |
316 | ||
317 | if(b1Len == 0){ | |
318 | *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; | |
319 | goto CLEANUP; | |
320 | } | |
321 | ||
322 | srcIsASCII = TRUE; | |
323 | // step 3 & 4 | |
324 | for( j=0;j<b1Len;j++){ | |
325 | if(b1[j] > 0x7F){// check if output of usprep_prepare is all ASCII | |
326 | srcIsASCII = FALSE; | |
327 | }else if(prep->isLDHChar(b1[j])==FALSE){ // if the char is in ASCII range verify that it is an LDH character{ | |
328 | srcIsLDH = FALSE; | |
329 | } | |
330 | } | |
331 | ||
332 | if(useSTD3ASCIIRules == TRUE){ | |
333 | // verify 3a and 3b | |
334 | if( srcIsLDH == FALSE /* source contains some non-LDH characters */ | |
335 | || b1[0] == HYPHEN || b1[b1Len-1] == HYPHEN){ | |
336 | *status = U_IDNA_STD3_ASCII_RULES_ERROR; | |
337 | goto CLEANUP; | |
338 | } | |
339 | } | |
340 | if(srcIsASCII){ | |
341 | if(b1Len <= destCapacity){ | |
342 | uprv_memmove(dest, b1, b1Len * U_SIZEOF_UCHAR); | |
343 | reqLength = b1Len; | |
344 | }else{ | |
345 | reqLength = b1Len; | |
346 | goto CLEANUP; | |
347 | } | |
348 | }else{ | |
349 | // step 5 : verify the sequence does not begin with ACE prefix | |
350 | if(!startsWithPrefix(b1,b1Len)){ | |
351 | ||
352 | //step 6: encode the sequence with punycode | |
353 | //caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool)); | |
354 | ||
355 | b2Len = convertToPuny(b1,b1Len, b2,b2Capacity,*status); | |
356 | //b2Len = u_strToPunycode(b2,b2Capacity,b1,b1Len, caseFlags, status); | |
357 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
358 | // redo processing of string | |
359 | /* we do not have enough room so grow the buffer*/ | |
360 | b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); | |
361 | if(b2 == NULL){ | |
362 | *status = U_MEMORY_ALLOCATION_ERROR; | |
363 | goto CLEANUP; | |
364 | } | |
365 | ||
366 | *status = U_ZERO_ERROR; // reset error | |
367 | ||
368 | b2Len = convertToPuny(b1, b1Len, b2, b2Len, *status); | |
369 | //b2Len = u_strToPunycode(b2,b2Len,b1,b1Len, caseFlags, status); | |
370 | ||
371 | } | |
372 | //error bail out | |
373 | if(U_FAILURE(*status)){ | |
374 | goto CLEANUP; | |
375 | } | |
376 | reqLength = b2Len+ACE_PREFIX_LENGTH; | |
377 | ||
378 | if(reqLength > destCapacity){ | |
379 | *status = U_BUFFER_OVERFLOW_ERROR; | |
380 | goto CLEANUP; | |
381 | } | |
382 | //Step 7: prepend the ACE prefix | |
383 | uprv_memcpy(dest,ACE_PREFIX,ACE_PREFIX_LENGTH * U_SIZEOF_UCHAR); | |
384 | //Step 6: copy the contents in b2 into dest | |
385 | uprv_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len * U_SIZEOF_UCHAR); | |
386 | ||
387 | }else{ | |
388 | *status = U_IDNA_ACE_PREFIX_ERROR; | |
389 | goto CLEANUP; | |
390 | } | |
391 | } | |
392 | ||
393 | if(reqLength > MAX_LABEL_LENGTH){ | |
394 | *status = U_IDNA_LABEL_TOO_LONG_ERROR; | |
395 | } | |
396 | ||
397 | CLEANUP: | |
398 | if(b1 != b1Stack){ | |
399 | uprv_free(b1); | |
400 | } | |
401 | if(b2 != b2Stack){ | |
402 | uprv_free(b2); | |
403 | } | |
404 | uprv_free(caseFlags); | |
405 | ||
406 | // delete prep; | |
407 | ||
408 | return u_terminateUChars(dest, destCapacity, reqLength, status); | |
409 | } | |
410 | ||
411 | ||
412 | U_CFUNC int32_t U_EXPORT2 | |
413 | idnaref_toUnicode(const UChar* src, int32_t srcLength, | |
414 | UChar* dest, int32_t destCapacity, | |
415 | int32_t options, | |
416 | UParseError* parseError, | |
417 | UErrorCode* status){ | |
418 | ||
419 | if(status == NULL || U_FAILURE(*status)){ | |
420 | return 0; | |
421 | } | |
422 | if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ | |
423 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
424 | return 0; | |
425 | } | |
426 | ||
427 | ||
428 | ||
429 | UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE]; | |
430 | ||
431 | //initialize pointers to stack buffers | |
432 | UChar *b1 = b1Stack, *b2 = b2Stack, *b1Prime=NULL, *b3=b3Stack; | |
433 | int32_t b1Len, b2Len, b1PrimeLen, b3Len, | |
434 | b1Capacity = MAX_LABEL_BUFFER_SIZE, | |
435 | b2Capacity = MAX_LABEL_BUFFER_SIZE, | |
436 | b3Capacity = MAX_LABEL_BUFFER_SIZE, | |
437 | reqLength=0; | |
438 | // UParseError parseError; | |
439 | ||
440 | NamePrepTransform* prep = TestIDNA::getInstance(*status); | |
441 | b1Len = 0; | |
442 | UBool* caseFlags = NULL; | |
443 | ||
444 | //get the options | |
445 | UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0); | |
446 | UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0); | |
447 | ||
448 | UBool srcIsASCII = TRUE; | |
449 | UBool srcIsLDH = TRUE; | |
450 | int32_t failPos =0; | |
451 | ||
452 | if(U_FAILURE(*status)){ | |
453 | goto CLEANUP; | |
454 | } | |
455 | // step 1: find out if all the codepoints in src are ASCII | |
456 | if(srcLength==-1){ | |
457 | srcLength = 0; | |
458 | for(;src[srcLength]!=0;){ | |
459 | if(src[srcLength]> 0x7f){ | |
460 | srcIsASCII = FALSE; | |
461 | }if(prep->isLDHChar(src[srcLength])==FALSE){ | |
462 | // here we do not assemble surrogates | |
463 | // since we know that LDH code points | |
464 | // are in the ASCII range only | |
465 | srcIsLDH = FALSE; | |
466 | failPos = srcLength; | |
467 | } | |
468 | srcLength++; | |
469 | } | |
470 | }else{ | |
471 | for(int32_t j=0; j<srcLength; j++){ | |
472 | if(src[j]> 0x7f){ | |
473 | srcIsASCII = FALSE; | |
474 | }else if(prep->isLDHChar(src[j])==FALSE){ | |
475 | // here we do not assemble surrogates | |
476 | // since we know that LDH code points | |
477 | // are in the ASCII range only | |
478 | srcIsLDH = FALSE; | |
479 | failPos = j; | |
480 | } | |
481 | } | |
482 | } | |
483 | ||
484 | if(srcIsASCII == FALSE){ | |
485 | // step 2: process the string | |
486 | b1Len = prep->process(src,srcLength,b1,b1Capacity,allowUnassigned, parseError, *status); | |
487 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
488 | // redo processing of string | |
489 | /* we do not have enough room so grow the buffer*/ | |
490 | b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
491 | if(b1==NULL){ | |
492 | *status = U_MEMORY_ALLOCATION_ERROR; | |
493 | goto CLEANUP; | |
494 | } | |
495 | ||
496 | *status = U_ZERO_ERROR; // reset error | |
497 | ||
498 | b1Len = prep->process(src,srcLength,b1, b1Len,allowUnassigned, parseError, *status); | |
499 | } | |
500 | //bail out on error | |
501 | if(U_FAILURE(*status)){ | |
502 | goto CLEANUP; | |
503 | } | |
504 | }else{ | |
505 | ||
506 | // copy everything to b1 | |
507 | if(srcLength < b1Capacity){ | |
508 | uprv_memmove(b1,src, srcLength * U_SIZEOF_UCHAR); | |
509 | }else{ | |
510 | /* we do not have enough room so grow the buffer*/ | |
511 | b1 = (UChar*) uprv_malloc(srcLength * U_SIZEOF_UCHAR); | |
512 | if(b1==NULL){ | |
513 | *status = U_MEMORY_ALLOCATION_ERROR; | |
514 | goto CLEANUP; | |
515 | } | |
516 | uprv_memmove(b1,src, srcLength * U_SIZEOF_UCHAR); | |
517 | } | |
518 | b1Len = srcLength; | |
519 | } | |
520 | //step 3: verify ACE Prefix | |
521 | if(startsWithPrefix(src,srcLength)){ | |
522 | ||
523 | //step 4: Remove the ACE Prefix | |
524 | b1Prime = b1 + ACE_PREFIX_LENGTH; | |
525 | b1PrimeLen = b1Len - ACE_PREFIX_LENGTH; | |
526 | ||
527 | //step 5: Decode using punycode | |
528 | b2Len = convertFromPuny(b1Prime,b1PrimeLen, b2, b2Capacity, *status); | |
529 | //b2Len = u_strFromPunycode(b2, b2Capacity,b1Prime,b1PrimeLen, caseFlags, status); | |
530 | ||
531 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
532 | // redo processing of string | |
533 | /* we do not have enough room so grow the buffer*/ | |
534 | b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); | |
535 | if(b2==NULL){ | |
536 | *status = U_MEMORY_ALLOCATION_ERROR; | |
537 | goto CLEANUP; | |
538 | } | |
539 | ||
540 | *status = U_ZERO_ERROR; // reset error | |
541 | ||
542 | b2Len = convertFromPuny(b1Prime,b1PrimeLen, b2, b2Len, *status); | |
543 | //b2Len = u_strFromPunycode(b2, b2Len,b1Prime,b1PrimeLen,caseFlags, status); | |
544 | } | |
545 | ||
546 | ||
547 | //step 6:Apply toASCII | |
548 | b3Len = idnaref_toASCII(b2,b2Len,b3,b3Capacity,options,parseError, status); | |
549 | ||
550 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
551 | // redo processing of string | |
552 | /* we do not have enough room so grow the buffer*/ | |
553 | b3 = (UChar*) uprv_malloc(b3Len * U_SIZEOF_UCHAR); | |
554 | if(b3==NULL){ | |
555 | *status = U_MEMORY_ALLOCATION_ERROR; | |
556 | goto CLEANUP; | |
557 | } | |
558 | ||
559 | *status = U_ZERO_ERROR; // reset error | |
560 | ||
561 | b3Len = idnaref_toASCII(b2,b2Len,b3,b3Len, options, parseError, status); | |
562 | ||
563 | } | |
564 | //bail out on error | |
565 | if(U_FAILURE(*status)){ | |
566 | goto CLEANUP; | |
567 | } | |
568 | ||
569 | //step 7: verify | |
570 | if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){ | |
571 | *status = U_IDNA_VERIFICATION_ERROR; | |
572 | goto CLEANUP; | |
573 | } | |
574 | ||
575 | //step 8: return output of step 5 | |
576 | reqLength = b2Len; | |
577 | if(b2Len <= destCapacity) { | |
578 | uprv_memmove(dest, b2, b2Len * U_SIZEOF_UCHAR); | |
579 | } | |
580 | }else{ | |
581 | // verify that STD3 ASCII rules are satisfied | |
582 | if(useSTD3ASCIIRules == TRUE){ | |
583 | if( srcIsLDH == FALSE /* source contains some non-LDH characters */ | |
584 | || src[0] == HYPHEN || src[srcLength-1] == HYPHEN){ | |
585 | *status = U_IDNA_STD3_ASCII_RULES_ERROR; | |
586 | ||
587 | /* populate the parseError struct */ | |
588 | if(srcIsLDH==FALSE){ | |
589 | // failPos is always set the index of failure | |
590 | uprv_syntaxError(src,failPos, srcLength,parseError); | |
591 | }else if(src[0] == HYPHEN){ | |
592 | // fail position is 0 | |
593 | uprv_syntaxError(src,0,srcLength,parseError); | |
594 | }else{ | |
595 | // the last index in the source is always length-1 | |
596 | uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError); | |
597 | } | |
598 | ||
599 | goto CLEANUP; | |
600 | } | |
601 | } | |
602 | //copy the source to destination | |
603 | if(srcLength <= destCapacity){ | |
604 | uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR); | |
605 | } | |
606 | reqLength = srcLength; | |
607 | } | |
608 | ||
609 | CLEANUP: | |
610 | ||
611 | if(b1 != b1Stack){ | |
612 | uprv_free(b1); | |
613 | } | |
614 | if(b2 != b2Stack){ | |
615 | uprv_free(b2); | |
616 | } | |
617 | uprv_free(caseFlags); | |
618 | ||
619 | // The RFC states that | |
620 | // <quote> | |
621 | // ToUnicode never fails. If any step fails, then the original input | |
622 | // is returned immediately in that step. | |
623 | // </quote> | |
624 | // So if any step fails lets copy source to destination | |
625 | if(U_FAILURE(*status)){ | |
626 | //copy the source to destination | |
627 | if(dest && srcLength <= destCapacity){ | |
628 | if(srcLength == -1) { | |
629 | uprv_memmove(dest,src,u_strlen(src)* U_SIZEOF_UCHAR); | |
630 | } else { | |
631 | uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR); | |
632 | } | |
633 | } | |
634 | reqLength = srcLength; | |
635 | *status = U_ZERO_ERROR; | |
636 | } | |
637 | return u_terminateUChars(dest, destCapacity, reqLength, status); | |
638 | } | |
639 | ||
640 | ||
641 | static int32_t | |
642 | getNextSeparator(UChar *src,int32_t srcLength,NamePrepTransform* prep, | |
643 | UChar **limit, | |
644 | UBool *done, | |
645 | UErrorCode *status){ | |
646 | if(srcLength == -1){ | |
647 | int32_t i; | |
648 | for(i=0 ; ;i++){ | |
649 | if(src[i] == 0){ | |
650 | *limit = src + i; // point to null | |
651 | *done = TRUE; | |
652 | return i; | |
653 | } | |
654 | if(prep->isLabelSeparator(src[i],*status)){ | |
655 | *limit = src + (i+1); // go past the delimiter | |
656 | return i; | |
657 | ||
658 | } | |
659 | } | |
660 | }else{ | |
661 | int32_t i; | |
662 | for(i=0;i<srcLength;i++){ | |
663 | if(prep->isLabelSeparator(src[i],*status)){ | |
664 | *limit = src + (i+1); // go past the delimiter | |
665 | return i; | |
666 | } | |
667 | } | |
668 | // we have not found the delimiter | |
669 | if(i==srcLength){ | |
670 | *limit = src+srcLength; | |
671 | *done = TRUE; | |
672 | } | |
673 | return i; | |
674 | } | |
675 | } | |
676 | ||
677 | U_CFUNC int32_t U_EXPORT2 | |
678 | idnaref_IDNToASCII( const UChar* src, int32_t srcLength, | |
679 | UChar* dest, int32_t destCapacity, | |
680 | int32_t options, | |
681 | UParseError* parseError, | |
682 | UErrorCode* status){ | |
683 | ||
684 | if(status == NULL || U_FAILURE(*status)){ | |
685 | return 0; | |
686 | } | |
687 | if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ | |
688 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
689 | return 0; | |
690 | } | |
691 | ||
692 | int32_t reqLength = 0; | |
693 | // UParseError parseError; | |
694 | ||
695 | NamePrepTransform* prep = TestIDNA::getInstance(*status); | |
696 | ||
697 | //initialize pointers to stack buffers | |
698 | UChar b1Stack[MAX_LABEL_BUFFER_SIZE]; | |
699 | UChar *b1 = b1Stack; | |
700 | int32_t b1Len, labelLen; | |
701 | UChar* delimiter = (UChar*)src; | |
702 | UChar* labelStart = (UChar*)src; | |
703 | int32_t remainingLen = srcLength; | |
704 | int32_t b1Capacity = MAX_LABEL_BUFFER_SIZE; | |
705 | ||
706 | //get the options | |
707 | // UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0); | |
708 | // UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0); | |
709 | UBool done = FALSE; | |
710 | ||
711 | if(U_FAILURE(*status)){ | |
712 | goto CLEANUP; | |
713 | } | |
714 | ||
715 | ||
716 | if(srcLength == -1){ | |
717 | for(;;){ | |
718 | ||
719 | if(*delimiter == 0){ | |
720 | break; | |
721 | } | |
722 | ||
723 | labelLen = getNextSeparator(labelStart, -1, prep, &delimiter, &done, status); | |
724 | b1Len = 0; | |
725 | if(!(labelLen==0 && done)){// make sure this is not a root label separator. | |
726 | ||
727 | b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Capacity, | |
728 | options, parseError, status); | |
729 | ||
730 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
731 | // redo processing of string | |
732 | /* we do not have enough room so grow the buffer*/ | |
733 | b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
734 | if(b1==NULL){ | |
735 | *status = U_MEMORY_ALLOCATION_ERROR; | |
736 | goto CLEANUP; | |
737 | } | |
738 | ||
739 | *status = U_ZERO_ERROR; // reset error | |
740 | ||
741 | b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Len, | |
742 | options, parseError, status); | |
743 | ||
744 | } | |
745 | } | |
746 | ||
747 | if(U_FAILURE(*status)){ | |
748 | goto CLEANUP; | |
749 | } | |
750 | int32_t tempLen = (reqLength + b1Len ); | |
751 | // copy to dest | |
752 | if( tempLen< destCapacity){ | |
753 | uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR); | |
754 | } | |
755 | ||
756 | reqLength = tempLen; | |
757 | ||
758 | // add the label separator | |
759 | if(done == FALSE){ | |
760 | if(reqLength < destCapacity){ | |
761 | dest[reqLength] = FULL_STOP; | |
762 | } | |
763 | reqLength++; | |
764 | } | |
765 | ||
766 | labelStart = delimiter; | |
767 | } | |
768 | }else{ | |
769 | for(;;){ | |
770 | ||
771 | if(delimiter == src+srcLength){ | |
772 | break; | |
773 | } | |
774 | ||
775 | labelLen = getNextSeparator(labelStart, remainingLen, prep, &delimiter, &done, status); | |
776 | ||
777 | b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Capacity, | |
778 | options,parseError, status); | |
779 | ||
780 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
781 | // redo processing of string | |
782 | /* we do not have enough room so grow the buffer*/ | |
783 | b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
784 | if(b1==NULL){ | |
785 | *status = U_MEMORY_ALLOCATION_ERROR; | |
786 | goto CLEANUP; | |
787 | } | |
788 | ||
789 | *status = U_ZERO_ERROR; // reset error | |
790 | ||
791 | b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Len, | |
792 | options, parseError, status); | |
793 | ||
794 | } | |
795 | ||
796 | if(U_FAILURE(*status)){ | |
797 | goto CLEANUP; | |
798 | } | |
799 | int32_t tempLen = (reqLength + b1Len ); | |
800 | // copy to dest | |
801 | if( tempLen< destCapacity){ | |
802 | uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR); | |
803 | } | |
804 | ||
805 | reqLength = tempLen; | |
806 | ||
807 | // add the label separator | |
808 | if(done == FALSE){ | |
809 | if(reqLength < destCapacity){ | |
810 | dest[reqLength] = FULL_STOP; | |
811 | } | |
812 | reqLength++; | |
813 | } | |
814 | ||
815 | labelStart = delimiter; | |
816 | remainingLen = srcLength - (delimiter - src); | |
817 | } | |
818 | } | |
819 | ||
820 | ||
821 | CLEANUP: | |
822 | ||
823 | if(b1 != b1Stack){ | |
824 | uprv_free(b1); | |
825 | } | |
826 | ||
827 | // delete prep; | |
828 | ||
829 | return u_terminateUChars(dest, destCapacity, reqLength, status); | |
830 | } | |
831 | ||
832 | U_CFUNC int32_t U_EXPORT2 | |
833 | idnaref_IDNToUnicode( const UChar* src, int32_t srcLength, | |
834 | UChar* dest, int32_t destCapacity, | |
835 | int32_t options, | |
836 | UParseError* parseError, | |
837 | UErrorCode* status){ | |
838 | ||
839 | if(status == NULL || U_FAILURE(*status)){ | |
840 | return 0; | |
841 | } | |
842 | if((src == NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ | |
843 | *status = U_ILLEGAL_ARGUMENT_ERROR; | |
844 | return 0; | |
845 | } | |
846 | ||
847 | int32_t reqLength = 0; | |
848 | ||
849 | UBool done = FALSE; | |
850 | ||
851 | NamePrepTransform* prep = TestIDNA::getInstance(*status); | |
852 | ||
853 | //initialize pointers to stack buffers | |
854 | UChar b1Stack[MAX_LABEL_BUFFER_SIZE]; | |
855 | UChar *b1 = b1Stack; | |
856 | int32_t b1Len, labelLen; | |
857 | UChar* delimiter = (UChar*)src; | |
858 | UChar* labelStart = (UChar*)src; | |
859 | int32_t remainingLen = srcLength; | |
860 | int32_t b1Capacity = MAX_LABEL_BUFFER_SIZE; | |
861 | ||
862 | //get the options | |
863 | // UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0); | |
864 | // UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0); | |
865 | ||
866 | if(U_FAILURE(*status)){ | |
867 | goto CLEANUP; | |
868 | } | |
869 | ||
870 | if(srcLength == -1){ | |
871 | for(;;){ | |
872 | ||
873 | if(*delimiter == 0){ | |
874 | break; | |
875 | } | |
876 | ||
877 | labelLen = getNextSeparator(labelStart, -1, prep, &delimiter, &done, status); | |
878 | ||
879 | if(labelLen==0 && done==FALSE){ | |
880 | *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; | |
881 | } | |
882 | b1Len = idnaref_toUnicode(labelStart, labelLen, b1, b1Capacity, | |
883 | options, parseError, status); | |
884 | ||
885 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
886 | // redo processing of string | |
887 | /* we do not have enough room so grow the buffer*/ | |
888 | b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
889 | if(b1==NULL){ | |
890 | *status = U_MEMORY_ALLOCATION_ERROR; | |
891 | goto CLEANUP; | |
892 | } | |
893 | ||
894 | *status = U_ZERO_ERROR; // reset error | |
895 | ||
896 | b1Len = idnaref_toUnicode( labelStart, labelLen, b1, b1Len, | |
897 | options, parseError, status); | |
898 | ||
899 | } | |
900 | ||
901 | if(U_FAILURE(*status)){ | |
902 | goto CLEANUP; | |
903 | } | |
904 | int32_t tempLen = (reqLength + b1Len ); | |
905 | // copy to dest | |
906 | if( tempLen< destCapacity){ | |
907 | uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR); | |
908 | } | |
909 | ||
910 | reqLength = tempLen; | |
911 | // add the label separator | |
912 | if(done == FALSE){ | |
913 | if(reqLength < destCapacity){ | |
914 | dest[reqLength] = FULL_STOP; | |
915 | } | |
916 | reqLength++; | |
917 | } | |
918 | ||
919 | labelStart = delimiter; | |
920 | } | |
921 | }else{ | |
922 | for(;;){ | |
923 | ||
924 | if(delimiter == src+srcLength){ | |
925 | break; | |
926 | } | |
927 | ||
928 | labelLen = getNextSeparator(labelStart, remainingLen, prep, &delimiter, &done, status); | |
929 | ||
930 | if(labelLen==0 && done==FALSE){ | |
931 | *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; | |
932 | } | |
933 | ||
934 | b1Len = idnaref_toUnicode( labelStart,labelLen, b1, b1Capacity, | |
935 | options, parseError, status); | |
936 | ||
937 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
938 | // redo processing of string | |
939 | /* we do not have enough room so grow the buffer*/ | |
940 | b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
941 | if(b1==NULL){ | |
942 | *status = U_MEMORY_ALLOCATION_ERROR; | |
943 | goto CLEANUP; | |
944 | } | |
945 | ||
946 | *status = U_ZERO_ERROR; // reset error | |
947 | ||
948 | b1Len = idnaref_toUnicode( labelStart, labelLen, b1, b1Len, | |
949 | options, parseError, status); | |
950 | ||
951 | } | |
952 | ||
953 | if(U_FAILURE(*status)){ | |
954 | goto CLEANUP; | |
955 | } | |
956 | int32_t tempLen = (reqLength + b1Len ); | |
957 | // copy to dest | |
958 | if( tempLen< destCapacity){ | |
959 | uprv_memmove(dest+reqLength, b1, b1Len * U_SIZEOF_UCHAR); | |
960 | } | |
961 | ||
962 | reqLength = tempLen; | |
963 | ||
964 | // add the label separator | |
965 | if(done == FALSE){ | |
966 | if(reqLength < destCapacity){ | |
967 | dest[reqLength] = FULL_STOP; | |
968 | } | |
969 | reqLength++; | |
970 | } | |
971 | ||
972 | labelStart = delimiter; | |
973 | remainingLen = srcLength - (delimiter - src); | |
974 | } | |
975 | } | |
976 | ||
977 | CLEANUP: | |
978 | ||
979 | if(b1 != b1Stack){ | |
980 | uprv_free(b1); | |
981 | } | |
982 | ||
983 | // delete prep; | |
984 | ||
985 | return u_terminateUChars(dest, destCapacity, reqLength, status); | |
986 | } | |
987 | ||
988 | U_CFUNC int32_t U_EXPORT2 | |
989 | idnaref_compare( const UChar *s1, int32_t length1, | |
990 | const UChar *s2, int32_t length2, | |
991 | int32_t options, | |
992 | UErrorCode* status){ | |
993 | ||
994 | if(status == NULL || U_FAILURE(*status)){ | |
995 | return -1; | |
996 | } | |
997 | ||
998 | UChar b1Stack[MAX_IDN_BUFFER_SIZE], b2Stack[MAX_IDN_BUFFER_SIZE]; | |
999 | UChar *b1 = b1Stack, *b2 = b2Stack; | |
1000 | int32_t b1Len, b2Len, b1Capacity = MAX_IDN_BUFFER_SIZE, b2Capacity = MAX_IDN_BUFFER_SIZE; | |
1001 | int32_t result = -1; | |
1002 | ||
1003 | UParseError parseError; | |
1004 | ||
1005 | b1Len = idnaref_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, status); | |
1006 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
1007 | // redo processing of string | |
1008 | /* we do not have enough room so grow the buffer*/ | |
1009 | b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); | |
1010 | if(b1==NULL){ | |
1011 | *status = U_MEMORY_ALLOCATION_ERROR; | |
1012 | goto CLEANUP; | |
1013 | } | |
1014 | ||
1015 | *status = U_ZERO_ERROR; // reset error | |
1016 | ||
1017 | b1Len = idnaref_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, status); | |
1018 | ||
1019 | } | |
1020 | ||
1021 | b2Len = idnaref_IDNToASCII(s2,length2,b2,b2Capacity,options, &parseError, status); | |
1022 | if(*status == U_BUFFER_OVERFLOW_ERROR){ | |
1023 | // redo processing of string | |
1024 | /* we do not have enough room so grow the buffer*/ | |
1025 | b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); | |
1026 | if(b2==NULL){ | |
1027 | *status = U_MEMORY_ALLOCATION_ERROR; | |
1028 | goto CLEANUP; | |
1029 | } | |
1030 | ||
1031 | *status = U_ZERO_ERROR; // reset error | |
1032 | ||
1033 | b2Len = idnaref_IDNToASCII(s2,length2,b2,b2Len,options, &parseError, status); | |
1034 | ||
1035 | } | |
1036 | // when toASCII is applied all label separators are replaced with FULL_STOP | |
1037 | result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len); | |
1038 | ||
1039 | CLEANUP: | |
1040 | if(b1 != b1Stack){ | |
1041 | uprv_free(b1); | |
1042 | } | |
1043 | ||
1044 | if(b2 != b2Stack){ | |
1045 | uprv_free(b2); | |
1046 | } | |
1047 | ||
1048 | return result; | |
1049 | } | |
1050 | #endif /* #if !UCONFIG_NO_IDNA */ |