1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
6 * Copyright (C) 2003-2013, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 *******************************************************************************
10 * file name: testidn.cpp
12 * tab size: 8 (not used)
15 * created on: 2003-02-06
16 * created by: Ram Viswanadha
18 * This program reads the rfc3454_*.txt files,
19 * parses them, and extracts the data for Nameprep conformance.
20 * It then preprocesses it and writes a binary file for efficient use
21 * in various IDNA conversion processes.
24 #include "unicode/utypes.h"
26 #if !UCONFIG_NO_IDNA && !UCONFIG_NO_TRANSLITERATION
28 #define USPREP_TYPE_NAMES_ARRAY
30 #include "unicode/uchar.h"
31 #include "unicode/putil.h"
34 #include "unicode/udata.h"
35 #include "unicode/utf16.h"
46 UBool beVerbose
=FALSE
, haveCopyright
=TRUE
;
48 /* prototypes --------------------------------------------------------------- */
52 parseMappings(const char *filename
, UBool reportError
,TestIDNA
& test
, UErrorCode
*pErrorCode
);
55 compareMapping(uint32_t codepoint
, uint32_t* mapping
, int32_t mapLength
,
56 UStringPrepType option
);
59 compareFlagsForRange(uint32_t start
, uint32_t end
,UStringPrepType option
);
62 testAllCodepoints(TestIDNA
& test
);
64 static TestIDNA
* pTestIDNA
=NULL
;
66 static const char* fileNames
[] = {
69 static const UTrie
*idnTrie
= NULL
;
70 static const int32_t *indexes
= NULL
;
71 static const uint16_t *mappingData
= NULL
;
72 /* -------------------------------------------------------------------------- */
74 /* file definitions */
75 #define DATA_TYPE "icu"
77 #define SPREP_DIR "sprep"
80 testData(TestIDNA
& test
) {
82 UErrorCode errorCode
=U_ZERO_ERROR
;
83 char *saveBasename
=NULL
;
85 LocalUStringPrepProfilePointer
profile(usprep_openByType(USPREP_RFC3491_NAMEPREP
, &errorCode
));
86 if(U_FAILURE(errorCode
)){
87 test
.errcheckln(errorCode
, "Failed to load IDNA data file. " + UnicodeString(u_errorName(errorCode
)));
91 char* filename
= (char*) malloc(strlen(IntlTest::pathToDataDirectory())*1024);
92 //TODO get the srcDir dynamically
93 const char *srcDir
=IntlTest::pathToDataDirectory();
95 idnTrie
= &profile
->sprepTrie
;
96 indexes
= profile
->indexes
;
97 mappingData
= profile
->mappingData
;
102 /* prepare the filename beginning with the source dir */
103 if(uprv_strchr(srcDir
,U_FILE_SEP_CHAR
) == NULL
){
105 filename
[1] = U_FILE_SEP_CHAR
;
106 uprv_strcpy(filename
+2,srcDir
);
108 uprv_strcpy(filename
, srcDir
);
110 basename
=filename
+uprv_strlen(filename
);
111 if(basename
>filename
&& *(basename
-1)!=U_FILE_SEP_CHAR
) {
112 *basename
++=U_FILE_SEP_CHAR
;
115 /* process unassigned */
116 basename
=filename
+uprv_strlen(filename
);
117 if(basename
>filename
&& *(basename
-1)!=U_FILE_SEP_CHAR
) {
118 *basename
++=U_FILE_SEP_CHAR
;
121 /* first copy misc directory */
122 saveBasename
= basename
;
123 (void)saveBasename
; // Suppress set but not used warning.
124 uprv_strcpy(basename
,SPREP_DIR
);
125 basename
= basename
+ uprv_strlen(SPREP_DIR
);
126 *basename
++=U_FILE_SEP_CHAR
;
128 /* process unassigned */
129 uprv_strcpy(basename
,fileNames
[0]);
130 parseMappings(filename
,TRUE
, test
,&errorCode
);
131 if(U_FAILURE(errorCode
)) {
132 test
.errln( "Could not open file %s for reading \n", filename
);
136 testAllCodepoints(test
);
144 static void U_CALLCONV
145 strprepProfileLineFn(void * /*context*/,
146 char *fields
[][2], int32_t fieldCount
,
147 UErrorCode
*pErrorCode
) {
148 uint32_t mapping
[40];
152 /*UBool* mapWithNorm = (UBool*) context;*/
153 const char* typeName
;
154 uint32_t rangeStart
=0,rangeEnd
=0;
157 s
= u_skipWhitespace(fields
[0][0]);
159 /* a special directive introduced in 4.2 */
164 *pErrorCode
= U_INVALID_FORMAT_ERROR
;
168 typeName
= fields
[2][0];
171 if(uprv_strstr(typeName
, usprepTypeNames
[USPREP_UNASSIGNED
])!=NULL
){
173 u_parseCodePointRange(s
, &rangeStart
,&rangeEnd
, pErrorCode
);
175 /* store the range */
176 compareFlagsForRange(rangeStart
,rangeEnd
,USPREP_UNASSIGNED
);
178 }else if(uprv_strstr(typeName
, usprepTypeNames
[USPREP_PROHIBITED
])!=NULL
){
180 u_parseCodePointRange(s
, &rangeStart
,&rangeEnd
, pErrorCode
);
182 /* store the range */
183 compareFlagsForRange(rangeStart
,rangeEnd
,USPREP_PROHIBITED
);
185 }else if(uprv_strstr(typeName
, usprepTypeNames
[USPREP_MAP
])!=NULL
){
186 /* get the character code, field 0 */
187 code
=(uint32_t)uprv_strtoul(s
, &end
, 16);
189 /* parse the mapping string */
190 length
=u_parseCodePoints(map
, mapping
, sizeof(mapping
)/4, pErrorCode
);
192 /* store the mapping */
193 compareMapping(code
,mapping
, length
,USPREP_MAP
);
196 *pErrorCode
= U_INVALID_FORMAT_ERROR
;
204 parseMappings(const char *filename
,UBool reportError
, TestIDNA
& test
, UErrorCode
*pErrorCode
) {
207 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
211 u_parseDelimitedFile(filename
, ';', fields
, 3, strprepProfileLineFn
, (void*)filename
, pErrorCode
);
213 //fprintf(stdout,"Number of code points that have mappings with length >1 : %i\n",len);
215 if(U_FAILURE(*pErrorCode
) && (reportError
|| *pErrorCode
!=U_FILE_ACCESS_ERROR
)) {
216 test
.errln( "testidn error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename
, u_errorName(*pErrorCode
));
221 static inline UStringPrepType
222 getValues(uint32_t result
, int32_t& value
, UBool
& isIndex
){
224 UStringPrepType type
;
228 * Initial value stored in the mapping table
229 * just return USPREP_TYPE_LIMIT .. so that
230 * the source codepoint is copied to the destination
232 type
= USPREP_TYPE_LIMIT
;
235 }else if(result
>= _SPREP_TYPE_THRESHOLD
){
236 type
= (UStringPrepType
) (result
- _SPREP_TYPE_THRESHOLD
);
242 /* ascertain if the value is index or delta */
245 value
= result
>> 2; //mask off the lower 2 bits and shift
249 value
= (int16_t)result
;
250 value
= (value
>> 2);
253 if((result
>>2) == _SPREP_MAX_INDEX_VALUE
){
254 type
= USPREP_DELETE
;
265 testAllCodepoints(TestIDNA
& test
){
269 0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774,
271 0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74
273 uint32_t in[19] = {0};
274 UErrorCode status = U_ZERO_ERROR;
275 int32_t inLength=0, outLength=100;
276 char output[100] = {0};
277 punycode_status error;
278 u_strToUTF32((UChar32*)in,19,&inLength,str,19,&status);
280 error= punycode_encode(inLength, in, NULL, (uint32_t*)&outLength, output);
287 int32_t unassigned
= 0;
288 int32_t prohibited
= 0;
289 int32_t mappedWithNorm
= 0;
291 int32_t noValueInTrie
= 0;
293 UStringPrepType type
;
295 UBool isIndex
= FALSE
;
297 for(i
=0;i
<=0x10FFFF;i
++){
299 UTRIE_GET16(idnTrie
,i
, result
);
300 type
= getValues(result
,value
, isIndex
);
301 if(type
!= USPREP_TYPE_LIMIT
){
302 if(type
== USPREP_UNASSIGNED
){
305 if(type
== USPREP_PROHIBITED
){
308 if(type
== USPREP_MAP
){
314 test
.errln("The return value for 0x%06X is wrong. %i\n",i
,result
);
319 test
.logln("Number of Unassinged code points : %i \n",unassigned
);
320 test
.logln("Number of Prohibited code points : %i \n",prohibited
);
321 test
.logln("Number of Mapped code points : %i \n",mapped
);
322 test
.logln("Number of Mapped with NFKC code points : %i \n",mappedWithNorm
);
323 test
.logln("Number of code points that have no value in Trie: %i \n",noValueInTrie
);
329 compareMapping(uint32_t codepoint
, uint32_t* mapping
,int32_t mapLength
,
330 UStringPrepType type
){
332 UTRIE_GET16(idnTrie
,codepoint
, result
);
336 UStringPrepType retType
;
337 int32_t value
, index
=0, delta
=0;
339 retType
= getValues(result
,value
,isIndex
);
342 if(type
!= retType
&& retType
!= USPREP_DELETE
){
344 pTestIDNA
->errln( "Did not get the assigned type for codepoint 0x%08X. Expected: %i Got: %i\n",codepoint
, USPREP_MAP
, type
);
350 if(index
>= indexes
[_SPREP_ONE_UCHAR_MAPPING_INDEX_START
] &&
351 index
< indexes
[_SPREP_TWO_UCHARS_MAPPING_INDEX_START
]){
353 }else if(index
>= indexes
[_SPREP_TWO_UCHARS_MAPPING_INDEX_START
] &&
354 index
< indexes
[_SPREP_THREE_UCHARS_MAPPING_INDEX_START
]){
356 }else if(index
>= indexes
[_SPREP_THREE_UCHARS_MAPPING_INDEX_START
] &&
357 index
< indexes
[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START
]){
360 length
= mappingData
[index
++];
364 length
= (retType
== USPREP_DELETE
)? 0 : 1;
367 int32_t realLength
=0;
368 /* figure out the real length */
369 for(int32_t j
=0; j
<mapLength
; j
++){
370 if(mapping
[j
] > 0xFFFF){
377 if(realLength
!= length
){
378 pTestIDNA
->errln( "Did not get the expected length. Expected: %i Got: %i\n", mapLength
, length
);
382 for(int8_t i
=0; i
< mapLength
; i
++){
383 if(mapping
[i
] <= 0xFFFF){
384 if(mappingData
[index
+i
] != (uint16_t)mapping
[i
]){
385 pTestIDNA
->errln("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping
[i
], mappingData
[index
+i
]);
388 UChar lead
= U16_LEAD(mapping
[i
]);
389 UChar trail
= U16_TRAIL(mapping
[i
]);
390 if(mappingData
[index
+i
] != lead
||
391 mappingData
[index
+i
+1] != trail
){
392 pTestIDNA
->errln( "Did not get the expected result. Expected: 0x%04X 0x%04X Got: 0x%04X 0x%04X", lead
, trail
, mappingData
[index
+i
], mappingData
[index
+i
+1]);
397 if(retType
!=USPREP_DELETE
&& (codepoint
-delta
) != (uint16_t)mapping
[0]){
398 pTestIDNA
->errln("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping
[0],(codepoint
-delta
));
405 compareFlagsForRange(uint32_t start
, uint32_t end
,
406 UStringPrepType type
){
409 UStringPrepType retType
;
413 // supplementary code point
414 UChar __lead16=U16_LEAD(0x2323E);
417 // get data for lead surrogate
418 (result)=_UTRIE_GET_RAW((&idnTrie), index, 0, (__lead16));
419 __offset=(&idnTrie)->getFoldingOffset(result);
421 // get the real data from the folded lead/trail units
423 (result)=_UTRIE_GET_RAW((&idnTrie), index, __offset, (0x2323E)&0x3ff);
425 (result)=(uint32_t)((&idnTrie)->initialValue);
428 UTRIE_GET16(&idnTrie,0x2323E, result);
430 while(start
< end
+1){
431 UTRIE_GET16(idnTrie
,start
, result
);
432 retType
= getValues(result
,value
,isIndex
);
433 if(result
> _SPREP_TYPE_THRESHOLD
){
435 pTestIDNA
->errln( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start
,usprepTypeNames
[type
], usprepTypeNames
[retType
]);
438 if(type
== USPREP_PROHIBITED
&& ((result
& 0x01) != 0x01)){
439 pTestIDNA
->errln( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start
,usprepTypeNames
[type
], usprepTypeNames
[retType
]);
449 #endif /* #if !UCONFIG_NO_IDNA */
452 * Hey, Emacs, please set the following:
455 * indent-tabs-mode: nil