/*
*******************************************************************************
*
- * Copyright (C) 2003, International Business Machines
+ * Copyright (C) 2003-2007, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
#if !UCONFIG_NO_IDNA && !UCONFIG_NO_TRANSLITERATION
#include "idnaref.h"
-#include "strprep.h"
#include "punyref.h"
#include "ustr_imp.h"
#include "cmemory.h"
#include "unicode/ustring.h"
/* it is official IDNA ACE Prefix is "xn--" */
-static const UChar ACE_PREFIX[] ={ 0x0058,0x004E,0x002d,0x002d } ;
+static const UChar ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ;
#define ACE_PREFIX_LENGTH 4
#define MAX_LABEL_LENGTH 63
#define HYPHEN 0x002D
/* The Max length of the labels should not be more than 64 */
-#define MAX_LABEL_BUFFER_SIZE 100
+#define MAX_LABEL_BUFFER_SIZE 100
#define MAX_IDN_BUFFER_SIZE 300
#define CAPITAL_A 0x0041
#define FULL_STOP 0x002E
-NamePrepTransform* TestIDNA::prep = NULL;
-
-NamePrepTransform* TestIDNA::getInstance(UErrorCode& status){
- if(TestIDNA::prep == NULL){
- UParseError parseError;
- TestIDNA::prep = NamePrepTransform::createInstance(parseError, status);
- if(TestIDNA::prep ==NULL){
- //status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- }
- return TestIDNA::prep;
-
-}
-
-inline static UBool
+inline static UBool
startsWithPrefix(const UChar* src , int32_t srcLength){
UBool startsWithPrefix = TRUE;
}
for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){
- if(u_toupper(src[i]) != ACE_PREFIX[i]){
+ if(u_tolower(src[i]) != ACE_PREFIX[i]){
startsWithPrefix = FALSE;
}
}
return startsWithPrefix;
}
-inline static UChar
+inline static UChar
toASCIILower(UChar ch){
if(CAPITAL_A <= ch && ch <= CAPITAL_Z){
return ch + LOWER_CASE_DELTA;
}
inline static int32_t
-compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len,
+compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len,
const UChar* s2, int32_t s2Len){
if(s1Len != s2Len){
return (s1Len > s2Len) ? s1Len : s2Len;
if(i == s1Len) {
return 0;
}
-
+
c1 = s1[i];
c2 = s2[i];
-
+
/* Case-insensitive comparison */
if(c1!=c2) {
rc=(int32_t)toASCIILower(c1)-(int32_t)toASCIILower(c2);
}
}
}
-
+
}
static UErrorCode getError(enum punycode_status status){
}
return i;
}
-// wrapper around the reference Punycode implementation
-static int32_t convertToPuny(const UChar* src, int32_t srcLength,
- UChar* dest, int32_t destCapacity,
+// wrapper around the reference Punycode implementation
+static int32_t convertToPuny(const UChar* src, int32_t srcLength,
+ UChar* dest, int32_t destCapacity,
UErrorCode& status){
uint32_t b1Stack[MAX_LABEL_BUFFER_SIZE];
int32_t b1Len = 0, b1Capacity = MAX_LABEL_BUFFER_SIZE;
}
status = U_ZERO_ERROR; // reset error
-
+
u_strToUTF32((UChar32*)b1,b1Len,&b1Len,src,srcLength,&status);
}
if(U_FAILURE(status)){
error = punycode_encode(b1Len,b1,caseFlags, (uint32_t*)&b2Len, b2);
status = getError(error);
-
+
if(status == U_BUFFER_OVERFLOW_ERROR){
/* we do not have enough room so grow the buffer*/
b2 = (char*) uprv_malloc( b2Len * sizeof(char));
if(U_FAILURE(status)){
goto CLEANUP;
}
-
+
if(b2Len < destCapacity){
convertASCIIToUChars(b2,dest,b2Len);
}else{
}
static int32_t convertFromPuny( const UChar* src, int32_t srcLength,
- UChar* dest, int32_t destCapacity,
+ UChar* dest, int32_t destCapacity,
UErrorCode& status){
char b1Stack[MAX_LABEL_BUFFER_SIZE];
char* b1 = b1Stack;
}
uprv_free(caseFlags);
- return destLen;
+ return destLen;
}
-
-U_CFUNC int32_t
-idnaref_toASCII(const UChar* src, int32_t srcLength,
- UChar* dest, int32_t destCapacity,
+U_CFUNC int32_t U_EXPORT2
+idnaref_toASCII(const UChar* src, int32_t srcLength,
+ UChar* dest, int32_t destCapacity,
int32_t options,
- UParseError* parseError,
+ UParseError* parseError,
UErrorCode* status){
-
+
if(status == NULL || U_FAILURE(*status)){
return 0;
}
UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE];
//initialize pointers to stack buffers
UChar *b1 = b1Stack, *b2 = b2Stack;
- int32_t b1Len, b2Len,
- b1Capacity = MAX_LABEL_BUFFER_SIZE,
+ int32_t b1Len=0, b2Len=0,
+ b1Capacity = MAX_LABEL_BUFFER_SIZE,
b2Capacity = MAX_LABEL_BUFFER_SIZE ,
reqLength=0;
- //get the options
+ //get the options
UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0);
UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0);
UBool* caseFlags = NULL;
-
+
// assume the source contains all ascii codepoints
UBool srcIsASCII = TRUE;
// assume the source contains all LDH codepoints
- UBool srcIsLDH = TRUE;
+ UBool srcIsLDH = TRUE;
int32_t j=0;
-// UParseError parseError;
+
+ if(srcLength == -1){
+ srcLength = u_strlen(src);
+ }
+
+ // step 1
+ for( j=0;j<srcLength;j++){
+ if(src[j] > 0x7F){
+ srcIsASCII = FALSE;
+ }
+ b1[b1Len++] = src[j];
+ }
// step 2
NamePrepTransform* prep = TestIDNA::getInstance(*status);
if(U_FAILURE(*status)){
goto CLEANUP;
}
-
+
b1Len = prep->process(src,srcLength,b1, b1Capacity,allowUnassigned,parseError,*status);
-
+
if(*status == U_BUFFER_OVERFLOW_ERROR){
// redo processing of string
/* we do not have enough room so grow the buffer*/
}
*status = U_ZERO_ERROR; // reset error
-
+
b1Len = prep->process(src,srcLength,b1, b1Len,allowUnassigned, parseError, *status);
}
// error bail out
goto CLEANUP;
}
+ if(b1Len == 0){
+ *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
+ goto CLEANUP;
+ }
+
+ srcIsASCII = TRUE;
// step 3 & 4
for( j=0;j<b1Len;j++){
- if(b1[j] > 0x7F) srcIsASCII = FALSE;
- srcIsLDH = prep->isLDHChar(b1[j]);
+ if(b1[j] > 0x7F){// check if output of usprep_prepare is all ASCII
+ srcIsASCII = FALSE;
+ }else if(prep->isLDHChar(b1[j])==FALSE){ // if the char is in ASCII range verify that it is an LDH character{
+ srcIsLDH = FALSE;
+ }
}
-
+
if(useSTD3ASCIIRules == TRUE){
// verify 3a and 3b
if( srcIsLDH == FALSE /* source contains some non-LDH characters */
uprv_memmove(dest, b1, b1Len * U_SIZEOF_UCHAR);
reqLength = b1Len;
}else{
- reqLength = b1Len;
+ reqLength = b1Len;
goto CLEANUP;
}
}else{
if(*status == U_BUFFER_OVERFLOW_ERROR){
// redo processing of string
/* we do not have enough room so grow the buffer*/
- b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
+ b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
if(b2 == NULL){
*status = U_MEMORY_ALLOCATION_ERROR;
goto CLEANUP;
}
*status = U_ZERO_ERROR; // reset error
-
+
b2Len = convertToPuny(b1, b1Len, b2, b2Len, *status);
//b2Len = u_strToPunycode(b2,b2Len,b1,b1Len, caseFlags, status);
uprv_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len * U_SIZEOF_UCHAR);
}else{
- *status = U_IDNA_ACE_PREFIX_ERROR;
+ *status = U_IDNA_ACE_PREFIX_ERROR;
goto CLEANUP;
}
}
uprv_free(b2);
}
uprv_free(caseFlags);
-
+
// delete prep;
return u_terminateUChars(dest, destCapacity, reqLength, status);
}
-U_CFUNC int32_t
+U_CFUNC int32_t U_EXPORT2
idnaref_toUnicode(const UChar* src, int32_t srcLength,
- UChar* dest, int32_t destCapacity,
+ UChar* dest, int32_t destCapacity,
int32_t options,
- UParseError* parseError,
+ UParseError* parseError,
UErrorCode* status){
if(status == NULL || U_FAILURE(*status)){
}
-
+
UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE];
//initialize pointers to stack buffers
UChar *b1 = b1Stack, *b2 = b2Stack, *b1Prime=NULL, *b3=b3Stack;
int32_t b1Len, b2Len, b1PrimeLen, b3Len,
- b1Capacity = MAX_LABEL_BUFFER_SIZE,
+ b1Capacity = MAX_LABEL_BUFFER_SIZE,
b2Capacity = MAX_LABEL_BUFFER_SIZE,
b3Capacity = MAX_LABEL_BUFFER_SIZE,
reqLength=0;
// UParseError parseError;
-
+
NamePrepTransform* prep = TestIDNA::getInstance(*status);
b1Len = 0;
UBool* caseFlags = NULL;
- //get the options
+ //get the options
UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0);
UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0);
if(U_FAILURE(*status)){
goto CLEANUP;
}
- // step 1: find out if all the codepoints in src are ASCII
+ // step 1: find out if all the codepoints in src are ASCII
if(srcLength==-1){
srcLength = 0;
for(;src[srcLength]!=0;){
if(src[srcLength]> 0x7f){
srcIsASCII = FALSE;
- }
- // here we do not assemble surrogates
- // since we know that LDH code points
- // are in the ASCII range only
- if(prep->isLDHChar(src[srcLength])==FALSE){
+ }if(prep->isLDHChar(src[srcLength])==FALSE){
+ // here we do not assemble surrogates
+ // since we know that LDH code points
+ // are in the ASCII range only
srcIsLDH = FALSE;
failPos = srcLength;
}
for(int32_t j=0; j<srcLength; j++){
if(src[j]> 0x7f){
srcIsASCII = FALSE;
- }
- // here we do not assemble surrogates
- // since we know that LDH code points
- // are in the ASCII range only
- if(prep->isLDHChar(src[j])==FALSE){
+ }else if(prep->isLDHChar(src[j])==FALSE){
+ // here we do not assemble surrogates
+ // since we know that LDH code points
+ // are in the ASCII range only
srcIsLDH = FALSE;
failPos = j;
}
}
*status = U_ZERO_ERROR; // reset error
-
+
b1Len = prep->process(src,srcLength,b1, b1Len,allowUnassigned, parseError, *status);
}
//bail out on error
}
*status = U_ZERO_ERROR; // reset error
-
+
b2Len = convertFromPuny(b1Prime,b1PrimeLen, b2, b2Len, *status);
//b2Len = u_strFromPunycode(b2, b2Len,b1Prime,b1PrimeLen,caseFlags, status);
}
-
-
+
+
//step 6:Apply toASCII
b3Len = idnaref_toASCII(b2,b2Len,b3,b3Capacity,options,parseError, status);
}
*status = U_ZERO_ERROR; // reset error
-
+
b3Len = idnaref_toASCII(b2,b2Len,b3,b3Len, options, parseError, status);
-
+
}
//bail out on error
if(U_FAILURE(*status)){
//step 7: verify
if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){
- *status = U_IDNA_VERIFICATION_ERROR;
+ *status = U_IDNA_VERIFICATION_ERROR;
goto CLEANUP;
}
// failPos is always set the index of failure
uprv_syntaxError(src,failPos, srcLength,parseError);
}else if(src[0] == HYPHEN){
- // fail position is 0
+ // fail position is 0
uprv_syntaxError(src,0,srcLength,parseError);
}else{
// the last index in the source is always length-1
uprv_free(b2);
}
uprv_free(caseFlags);
-
-// delete prep;
+ // The RFC states that
+ // <quote>
+ // ToUnicode never fails. If any step fails, then the original input
+ // is returned immediately in that step.
+ // </quote>
+ // So if any step fails lets copy source to destination
+ if(U_FAILURE(*status)){
+ //copy the source to destination
+ if(dest && srcLength <= destCapacity){
+ if(srcLength == -1) {
+ uprv_memmove(dest,src,u_strlen(src)* U_SIZEOF_UCHAR);
+ } else {
+ uprv_memmove(dest,src,srcLength * U_SIZEOF_UCHAR);
+ }
+ }
+ reqLength = srcLength;
+ *status = U_ZERO_ERROR;
+ }
return u_terminateUChars(dest, destCapacity, reqLength, status);
}
if(prep->isLabelSeparator(src[i],*status)){
*limit = src + (i+1); // go past the delimiter
return i;
-
+
}
}
}else{
}
}
-U_CFUNC int32_t
+U_CFUNC int32_t U_EXPORT2
idnaref_IDNToASCII( const UChar* src, int32_t srcLength,
- UChar* dest, int32_t destCapacity,
- int32_t options,
- UParseError* parseError,
+ UChar* dest, int32_t destCapacity,
+ int32_t options,
+ UParseError* parseError,
UErrorCode* status){
if(status == NULL || U_FAILURE(*status)){
// UParseError parseError;
NamePrepTransform* prep = TestIDNA::getInstance(*status);
-
+
//initialize pointers to stack buffers
UChar b1Stack[MAX_LABEL_BUFFER_SIZE];
UChar *b1 = b1Stack;
UChar* labelStart = (UChar*)src;
int32_t remainingLen = srcLength;
int32_t b1Capacity = MAX_LABEL_BUFFER_SIZE;
-
- //get the options
+
+ //get the options
// UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0);
// UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0);
UBool done = FALSE;
if(srcLength == -1){
for(;;){
-
+
if(*delimiter == 0){
break;
}
labelLen = getNextSeparator(labelStart, -1, prep, &delimiter, &done, status);
-
- b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Capacity,
- options, parseError, status);
+ b1Len = 0;
+ if(!(labelLen==0 && done)){// make sure this is not a root label separator.
- if(*status == U_BUFFER_OVERFLOW_ERROR){
- // redo processing of string
- /* we do not have enough room so grow the buffer*/
- b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
- if(b1==NULL){
- *status = U_MEMORY_ALLOCATION_ERROR;
- goto CLEANUP;
- }
-
- *status = U_ZERO_ERROR; // reset error
-
- b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Len,
+ b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Capacity,
options, parseError, status);
-
+
+ if(*status == U_BUFFER_OVERFLOW_ERROR){
+ // redo processing of string
+ /* we do not have enough room so grow the buffer*/
+ b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
+ if(b1==NULL){
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ goto CLEANUP;
+ }
+
+ *status = U_ZERO_ERROR; // reset error
+
+ b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Len,
+ options, parseError, status);
+
+ }
}
-
+
if(U_FAILURE(*status)){
goto CLEANUP;
}
}
}else{
for(;;){
-
+
if(delimiter == src+srcLength){
break;
}
labelLen = getNextSeparator(labelStart, remainingLen, prep, &delimiter, &done, status);
-
+
b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Capacity,
options,parseError, status);
}
*status = U_ZERO_ERROR; // reset error
-
- b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Len,
+
+ b1Len = idnaref_toASCII(labelStart, labelLen, b1, b1Len,
options, parseError, status);
-
+
}
-
+
if(U_FAILURE(*status)){
goto CLEANUP;
}
CLEANUP:
-
+
if(b1 != b1Stack){
uprv_free(b1);
}
-
+
// delete prep;
return u_terminateUChars(dest, destCapacity, reqLength, status);
}
-U_CFUNC int32_t
+U_CFUNC int32_t U_EXPORT2
idnaref_IDNToUnicode( const UChar* src, int32_t srcLength,
- UChar* dest, int32_t destCapacity,
- int32_t options,
- UParseError* parseError,
+ UChar* dest, int32_t destCapacity,
+ int32_t options,
+ UParseError* parseError,
UErrorCode* status){
-
+
if(status == NULL || U_FAILURE(*status)){
return 0;
}
}
int32_t reqLength = 0;
-
+
UBool done = FALSE;
NamePrepTransform* prep = TestIDNA::getInstance(*status);
-
+
//initialize pointers to stack buffers
UChar b1Stack[MAX_LABEL_BUFFER_SIZE];
UChar *b1 = b1Stack;
UChar* labelStart = (UChar*)src;
int32_t remainingLen = srcLength;
int32_t b1Capacity = MAX_LABEL_BUFFER_SIZE;
-
- //get the options
+
+ //get the options
// UBool allowUnassigned = (UBool)((options & IDNAREF_ALLOW_UNASSIGNED) != 0);
// UBool useSTD3ASCIIRules = (UBool)((options & IDNAREF_USE_STD3_RULES) != 0);
-
- if(U_FAILURE(*status)){
+
+ if(U_FAILURE(*status)){
goto CLEANUP;
}
-
+
if(srcLength == -1){
for(;;){
-
+
if(*delimiter == 0){
break;
}
labelLen = getNextSeparator(labelStart, -1, prep, &delimiter, &done, status);
-
+
+ if(labelLen==0 && done==FALSE){
+ *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
+ }
b1Len = idnaref_toUnicode(labelStart, labelLen, b1, b1Capacity,
options, parseError, status);
}
*status = U_ZERO_ERROR; // reset error
-
- b1Len = idnaref_toUnicode( labelStart, labelLen, b1, b1Len,
+
+ b1Len = idnaref_toUnicode( labelStart, labelLen, b1, b1Len,
options, parseError, status);
-
+
}
-
+
if(U_FAILURE(*status)){
goto CLEANUP;
}
}
}else{
for(;;){
-
+
if(delimiter == src+srcLength){
break;
}
labelLen = getNextSeparator(labelStart, remainingLen, prep, &delimiter, &done, status);
-
+
+ if(labelLen==0 && done==FALSE){
+ *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
+ }
+
b1Len = idnaref_toUnicode( labelStart,labelLen, b1, b1Capacity,
options, parseError, status);
}
*status = U_ZERO_ERROR; // reset error
-
- b1Len = idnaref_toUnicode( labelStart, labelLen, b1, b1Len,
+
+ b1Len = idnaref_toUnicode( labelStart, labelLen, b1, b1Len,
options, parseError, status);
-
+
}
-
+
if(U_FAILURE(*status)){
goto CLEANUP;
}
}
CLEANUP:
-
+
if(b1 != b1Stack){
uprv_free(b1);
}
-
+
// delete prep;
-
+
return u_terminateUChars(dest, destCapacity, reqLength, status);
}
-U_CFUNC int32_t
+U_CFUNC int32_t U_EXPORT2
idnaref_compare( const UChar *s1, int32_t length1,
const UChar *s2, int32_t length2,
- int32_t options,
+ int32_t options,
UErrorCode* status){
if(status == NULL || U_FAILURE(*status)){
UChar *b1 = b1Stack, *b2 = b2Stack;
int32_t b1Len, b2Len, b1Capacity = MAX_IDN_BUFFER_SIZE, b2Capacity = MAX_IDN_BUFFER_SIZE;
int32_t result = -1;
-
- UParseError parseError;
+
+ UParseError parseError;
b1Len = idnaref_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, status);
if(*status == U_BUFFER_OVERFLOW_ERROR){
}
*status = U_ZERO_ERROR; // reset error
-
+
b1Len = idnaref_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, status);
-
+
}
b2Len = idnaref_IDNToASCII(s2,length2,b2,b2Capacity,options, &parseError, status);
}
*status = U_ZERO_ERROR; // reset error
-
+
b2Len = idnaref_IDNToASCII(s2,length2,b2,b2Len,options, &parseError, status);
-
+
}
// when toASCII is applied all label separators are replaced with FULL_STOP
result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len);