]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/intltest/testidn.cpp
ICU-8.11.4.tar.gz
[apple/icu.git] / icuSources / test / intltest / testidn.cpp
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2003-2006, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: testidn.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2003-02-06
14 * created by: Ram Viswanadha
15 *
16 * This program reads the rfc3454_*.txt files,
17 * parses them, and extracts the data for Nameprep conformance.
18 * It then preprocesses it and writes a binary file for efficient use
19 * in various IDNA conversion processes.
20 */
21
22 #include "unicode/utypes.h"
23
24 #if !UCONFIG_NO_IDNA && !UCONFIG_NO_TRANSLITERATION
25
26 #define USPREP_TYPE_NAMES_ARRAY
27
28 #include "unicode/uchar.h"
29 #include "unicode/putil.h"
30 #include "cmemory.h"
31 #include "cstring.h"
32 #include "unicode/udata.h"
33 #include "unewdata.h"
34 #include "uoptions.h"
35 #include "uparse.h"
36 #include "utrie.h"
37 #include "umutex.h"
38 #include "sprpimpl.h"
39 #include "testidna.h"
40 #include "punyref.h"
41 #include <stdlib.h>
42
43 UBool beVerbose=FALSE, haveCopyright=TRUE;
44
45 /* prototypes --------------------------------------------------------------- */
46
47
48 static void
49 parseMappings(const char *filename, UBool reportError,TestIDNA& test, UErrorCode *pErrorCode);
50
51 static void
52 compareMapping(uint32_t codepoint, uint32_t* mapping, int32_t mapLength,
53 UStringPrepType option);
54
55 static void
56 compareFlagsForRange(uint32_t start, uint32_t end,UStringPrepType option);
57
58 static void
59 testAllCodepoints(TestIDNA& test);
60
61 static TestIDNA* pTestIDNA =NULL;
62
63 static const char* fileNames[] = {
64 "NamePrepProfile.txt"
65 };
66 static UStringPrepProfile *profile = NULL;
67 static const UTrie *idnTrie = NULL;
68 static const int32_t *indexes = NULL;
69 static const uint16_t *mappingData = NULL;
70 /* -------------------------------------------------------------------------- */
71
72 /* file definitions */
73 #define DATA_NAME "uidna"
74 #define DATA_TYPE "icu"
75
76 #define MISC_DIR "misc"
77
78 extern int
79 testData(TestIDNA& test) {
80 char* filename = (char*) malloc(strlen(IntlTest::pathToDataDirectory())*5555);
81 //TODO get the srcDir dynamically
82 const char *srcDir=IntlTest::pathToDataDirectory();
83 char *basename=NULL;
84 UErrorCode errorCode=U_ZERO_ERROR;
85 char *saveBasename =NULL;
86
87 profile = usprep_open(NULL, DATA_NAME, &errorCode);
88 if(U_FAILURE(errorCode)){
89 test.errln("Failed to load IDNA data file. " + UnicodeString(u_errorName(errorCode)));
90 return errorCode;
91 }
92
93 idnTrie = &profile->sprepTrie;
94 indexes = profile->indexes;
95 mappingData = profile->mappingData;
96
97 //initialize
98 pTestIDNA = &test;
99
100 /* prepare the filename beginning with the source dir */
101 if(uprv_strchr(srcDir,U_FILE_SEP_CHAR) == NULL){
102 filename[0] = 0x2E;
103 filename[1] = U_FILE_SEP_CHAR;
104 uprv_strcpy(filename+2,srcDir);
105 }else{
106 uprv_strcpy(filename, srcDir);
107 }
108 basename=filename+uprv_strlen(filename);
109 if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
110 *basename++=U_FILE_SEP_CHAR;
111 }
112
113 /* process unassigned */
114 basename=filename+uprv_strlen(filename);
115 if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
116 *basename++=U_FILE_SEP_CHAR;
117 }
118
119 /* first copy misc directory */
120 saveBasename = basename;
121 uprv_strcpy(basename,MISC_DIR);
122 basename = basename + uprv_strlen(MISC_DIR);
123 *basename++=U_FILE_SEP_CHAR;
124
125 /* process unassigned */
126 uprv_strcpy(basename,fileNames[0]);
127 parseMappings(filename,TRUE, test,&errorCode);
128 if(U_FAILURE(errorCode)) {
129 test.errln( "Could not open file %s for reading \n", filename);
130 return errorCode;
131 }
132
133 testAllCodepoints(test);
134
135 usprep_close(profile);
136 pTestIDNA = NULL;
137 free(filename);
138 return errorCode;
139 }
140 U_CDECL_BEGIN
141
142 static void U_CALLCONV
143 strprepProfileLineFn(void * /*context*/,
144 char *fields[][2], int32_t fieldCount,
145 UErrorCode *pErrorCode) {
146 uint32_t mapping[40];
147 char *end, *map;
148 uint32_t code;
149 int32_t length;
150 /*UBool* mapWithNorm = (UBool*) context;*/
151 const char* typeName;
152 uint32_t rangeStart=0,rangeEnd =0;
153
154 if(fieldCount != 3){
155 *pErrorCode = U_INVALID_FORMAT_ERROR;
156 return;
157 }
158
159 typeName = fields[2][0];
160 map = fields[1][0];
161
162 if(uprv_strstr(typeName, usprepTypeNames[USPREP_UNASSIGNED])!=NULL){
163
164 u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode);
165
166 /* store the range */
167 compareFlagsForRange(rangeStart,rangeEnd,USPREP_UNASSIGNED);
168
169 }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_PROHIBITED])!=NULL){
170
171 u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode);
172
173 /* store the range */
174 compareFlagsForRange(rangeStart,rangeEnd,USPREP_PROHIBITED);
175
176 }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_MAP])!=NULL){
177 /* get the character code, field 0 */
178 code=(uint32_t)uprv_strtoul(fields[0][0], &end, 16);
179
180 /* parse the mapping string */
181 length=u_parseCodePoints(map, mapping, sizeof(mapping)/4, pErrorCode);
182
183 /* store the mapping */
184 compareMapping(code,mapping, length,USPREP_MAP);
185
186 }else{
187 *pErrorCode = U_INVALID_FORMAT_ERROR;
188 }
189
190 }
191
192 U_CDECL_END
193
194 static void
195 parseMappings(const char *filename,UBool reportError, TestIDNA& test, UErrorCode *pErrorCode) {
196 char *fields[3][2];
197
198 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
199 return;
200 }
201
202 u_parseDelimitedFile(filename, ';', fields, 3, strprepProfileLineFn, (void*)filename, pErrorCode);
203
204 //fprintf(stdout,"Number of code points that have mappings with length >1 : %i\n",len);
205
206 if(U_FAILURE(*pErrorCode) && (reportError || *pErrorCode!=U_FILE_ACCESS_ERROR)) {
207 test.errln( "testidn error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode));
208 }
209 }
210
211
212 static inline UStringPrepType
213 getValues(uint32_t result, int32_t& value, UBool& isIndex){
214
215 UStringPrepType type;
216
217 if(result == 0){
218 /*
219 * Initial value stored in the mapping table
220 * just return USPREP_TYPE_LIMIT .. so that
221 * the source codepoint is copied to the destination
222 */
223 type = USPREP_TYPE_LIMIT;
224 isIndex =FALSE;
225 value = 0;
226 }else if(result >= _SPREP_TYPE_THRESHOLD){
227 type = (UStringPrepType) (result - _SPREP_TYPE_THRESHOLD);
228 isIndex =FALSE;
229 value = 0;
230 }else{
231 /* get the state */
232 type = USPREP_MAP;
233 /* ascertain if the value is index or delta */
234 if(result & 0x02){
235 isIndex = TRUE;
236 value = result >> 2; //mask off the lower 2 bits and shift
237
238 }else{
239 isIndex = FALSE;
240 value = (int16_t)result;
241 value = (value >> 2);
242
243 }
244 if((result>>2) == _SPREP_MAX_INDEX_VALUE){
245 type = USPREP_DELETE;
246 isIndex =FALSE;
247 value = 0;
248 }
249 }
250 return type;
251 }
252
253
254
255 static void
256 testAllCodepoints(TestIDNA& test){
257 /*
258 {
259 UChar str[19] = {
260 0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774,
261 0x070F,//prohibited
262 0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74
263 };
264 uint32_t in[19] = {0};
265 UErrorCode status = U_ZERO_ERROR;
266 int32_t inLength=0, outLength=100;
267 char output[100] = {0};
268 punycode_status error;
269 u_strToUTF32((UChar32*)in,19,&inLength,str,19,&status);
270
271 error= punycode_encode(inLength, in, NULL, (uint32_t*)&outLength, output);
272 printf(output);
273
274 }
275 */
276
277 uint32_t i = 0;
278 int32_t unassigned = 0;
279 int32_t prohibited = 0;
280 int32_t mappedWithNorm = 0;
281 int32_t mapped = 0;
282 int32_t noValueInTrie = 0;
283
284 UStringPrepType type;
285 int32_t value;
286 UBool isIndex = FALSE;
287
288 for(i=0;i<=0x10FFFF;i++){
289 uint32_t result = 0;
290 UTRIE_GET16(idnTrie,i, result);
291 type = getValues(result,value, isIndex);
292 if(type != USPREP_TYPE_LIMIT ){
293 if(type == USPREP_UNASSIGNED){
294 unassigned++;
295 }
296 if(type == USPREP_PROHIBITED){
297 prohibited++;
298 }
299 if(type == USPREP_MAP){
300 mapped++;
301 }
302 }else{
303 noValueInTrie++;
304 if(result > 0){
305 test.errln("The return value for 0x%06X is wrong. %i\n",i,result);
306 }
307 }
308 }
309
310 test.logln("Number of Unassinged code points : %i \n",unassigned);
311 test.logln("Number of Prohibited code points : %i \n",prohibited);
312 test.logln("Number of Mapped code points : %i \n",mapped);
313 test.logln("Number of Mapped with NFKC code points : %i \n",mappedWithNorm);
314 test.logln("Number of code points that have no value in Trie: %i \n",noValueInTrie);
315
316
317 }
318
319 static void
320 compareMapping(uint32_t codepoint, uint32_t* mapping,int32_t mapLength,
321 UStringPrepType type){
322 uint32_t result = 0;
323 UTRIE_GET16(idnTrie,codepoint, result);
324
325 int32_t length=0;
326 UBool isIndex;
327 UStringPrepType retType;
328 int32_t value, index=0, delta=0;
329
330 retType = getValues(result,value,isIndex);
331
332
333 if(type != retType && retType != USPREP_DELETE){
334
335 pTestIDNA->errln( "Did not get the assigned type for codepoint 0x%08X. Expected: %i Got: %i\n",codepoint, USPREP_MAP, type);
336
337 }
338
339 if(isIndex){
340 index = value;
341 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
342 index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
343 length = 1;
344 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
345 index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
346 length = 2;
347 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
348 index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
349 length = 3;
350 }else{
351 length = mappingData[index++];
352 }
353 }else{
354 delta = value;
355 length = (retType == USPREP_DELETE)? 0 : 1;
356 }
357
358 int32_t realLength =0;
359 /* figure out the real length */
360 for(int32_t j=0; j<mapLength; j++){
361 if(mapping[j] > 0xFFFF){
362 realLength +=2;
363 }else{
364 realLength++;
365 }
366 }
367
368 if(realLength != length){
369 pTestIDNA->errln( "Did not get the expected length. Expected: %i Got: %i\n", mapLength, length);
370 }
371
372 if(isIndex){
373 for(int8_t i =0; i< mapLength; i++){
374 if(mapping[i] <= 0xFFFF){
375 if(mappingData[index+i] != (uint16_t)mapping[i]){
376 pTestIDNA->errln("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[i], mappingData[index+i]);
377 }
378 }else{
379 UChar lead = UTF16_LEAD(mapping[i]);
380 UChar trail = UTF16_TRAIL(mapping[i]);
381 if(mappingData[index+i] != lead ||
382 mappingData[index+i+1] != trail){
383 pTestIDNA->errln( "Did not get the expected result. Expected: 0x%04X 0x%04X Got: 0x%04X 0x%04X", lead, trail, mappingData[index+i], mappingData[index+i+1]);
384 }
385 }
386 }
387 }else{
388 if(retType!=USPREP_DELETE && (codepoint-delta) != (uint16_t)mapping[0]){
389 pTestIDNA->errln("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[0],(codepoint-delta));
390 }
391 }
392
393 }
394
395 static void
396 compareFlagsForRange(uint32_t start, uint32_t end,
397 UStringPrepType type){
398
399 uint32_t result =0 ;
400 UStringPrepType retType;
401 UBool isIndex=FALSE;
402 int32_t value=0;
403 /*
404 // supplementary code point
405 UChar __lead16=UTF16_LEAD(0x2323E);
406 int32_t __offset;
407
408 // get data for lead surrogate
409 (result)=_UTRIE_GET_RAW((&idnTrie), index, 0, (__lead16));
410 __offset=(&idnTrie)->getFoldingOffset(result);
411
412 // get the real data from the folded lead/trail units
413 if(__offset>0) {
414 (result)=_UTRIE_GET_RAW((&idnTrie), index, __offset, (0x2323E)&0x3ff);
415 } else {
416 (result)=(uint32_t)((&idnTrie)->initialValue);
417 }
418
419 UTRIE_GET16(&idnTrie,0x2323E, result);
420 */
421 while(start < end+1){
422 UTRIE_GET16(idnTrie,start, result);
423 retType = getValues(result,value,isIndex);
424 if(result > _SPREP_TYPE_THRESHOLD){
425 if(retType != type){
426 pTestIDNA->errln( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start,usprepTypeNames[type], usprepTypeNames[retType]);
427 }
428 }else{
429 if(type == USPREP_PROHIBITED && ((result & 0x01) != 0x01)){
430 pTestIDNA->errln( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start,usprepTypeNames[type], usprepTypeNames[retType]);
431 }
432 }
433
434 start++;
435 }
436
437 }
438
439
440 #endif /* #if !UCONFIG_NO_IDNA */
441
442 /*
443 * Hey, Emacs, please set the following:
444 *
445 * Local Variables:
446 * indent-tabs-mode: nil
447 * End:
448 *
449 */