]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
374ca955 A |
3 | /* |
4 | ******************************************************************************* | |
5 | * | |
51004dcb | 6 | * Copyright (C) 2003-2012, International Business Machines |
374ca955 A |
7 | * Corporation and others. All Rights Reserved. |
8 | * | |
9 | ******************************************************************************* | |
10 | * file name: spreptst.c | |
f3c0d7a5 | 11 | * encoding: UTF-8 |
374ca955 A |
12 | * tab size: 8 (not used) |
13 | * indentation:4 | |
14 | * | |
15 | * created on: 2003jul11 | |
16 | * created by: Ram Viswanadha | |
17 | */ | |
18 | #define USPREP_TYPE_NAMES_ARRAY | |
19 | ||
20 | #include "unicode/utypes.h" | |
21 | ||
22 | #if !UCONFIG_NO_IDNA | |
23 | ||
24 | #include "unicode/ustring.h" | |
25 | #include "unicode/putil.h" | |
26 | #include "cintltst.h" | |
27 | #include "unicode/usprep.h" | |
4388f060 | 28 | #include "unicode/utf16.h" |
374ca955 A |
29 | #include "sprpimpl.h" |
30 | #include "uparse.h" | |
31 | #include "cmemory.h" | |
32 | #include "ustr_imp.h" | |
33 | #include "cstring.h" | |
34 | ||
35 | static void | |
36 | parseMappings(const char *filename, UStringPrepProfile* data, UBool reportError, UErrorCode *pErrorCode); | |
37 | ||
38 | static void | |
39 | compareMapping(UStringPrepProfile* data, uint32_t codepoint, uint32_t* mapping, int32_t mapLength, | |
40 | UStringPrepType option); | |
41 | ||
42 | static void | |
43 | compareFlagsForRange(UStringPrepProfile* data, uint32_t start, uint32_t end,UStringPrepType option); | |
44 | ||
45 | void | |
46 | doStringPrepTest(const char* binFileName, const char* txtFileName, int32_t options, UErrorCode* errorCode); | |
47 | ||
48 | static void U_CALLCONV | |
49 | strprepProfileLineFn(void *context, | |
50 | char *fields[][2], int32_t fieldCount, | |
51 | UErrorCode *pErrorCode) { | |
340931cb | 52 | (void)fieldCount; // suppress compiler warnings about unused variable |
374ca955 A |
53 | uint32_t mapping[40]; |
54 | char *end, *map; | |
55 | uint32_t code; | |
56 | int32_t length; | |
57 | UStringPrepProfile* data = (UStringPrepProfile*) context; | |
58 | const char* typeName; | |
59 | uint32_t rangeStart=0,rangeEnd =0; | |
60 | ||
61 | typeName = fields[2][0]; | |
62 | map = fields[1][0]; | |
63 | ||
64 | if(strstr(typeName, usprepTypeNames[USPREP_UNASSIGNED])!=NULL){ | |
65 | ||
66 | u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode); | |
67 | ||
68 | /* store the range */ | |
69 | compareFlagsForRange(data, rangeStart,rangeEnd,USPREP_UNASSIGNED); | |
70 | ||
71 | }else if(strstr(typeName, usprepTypeNames[USPREP_PROHIBITED])!=NULL){ | |
72 | ||
73 | u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode); | |
74 | ||
75 | /* store the range */ | |
76 | compareFlagsForRange(data, rangeStart,rangeEnd,USPREP_PROHIBITED); | |
77 | ||
78 | }else if(strstr(typeName, usprepTypeNames[USPREP_MAP])!=NULL){ | |
79 | /* get the character code, field 0 */ | |
80 | code=(uint32_t)uprv_strtoul(fields[0][0], &end, 16); | |
81 | ||
82 | /* parse the mapping string */ | |
83 | length=u_parseCodePoints(map, mapping, sizeof(mapping)/4, pErrorCode); | |
84 | ||
85 | /* compare the mapping */ | |
86 | compareMapping(data, code,mapping, length,USPREP_MAP); | |
87 | }else{ | |
88 | *pErrorCode = U_INVALID_FORMAT_ERROR; | |
89 | } | |
90 | ||
91 | } | |
92 | ||
93 | ||
94 | ||
95 | static void | |
96 | parseMappings(const char *filename, UStringPrepProfile* data, UBool reportError, UErrorCode *pErrorCode) { | |
97 | char *fields[3][2]; | |
98 | ||
99 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | |
100 | return; | |
101 | } | |
102 | ||
103 | u_parseDelimitedFile(filename, ';', fields, 3, strprepProfileLineFn, (void*)data, pErrorCode); | |
104 | ||
105 | /*fprintf(stdout,"Number of code points that have mappings with length >1 : %i\n",len);*/ | |
106 | ||
107 | if(U_FAILURE(*pErrorCode) && (reportError || *pErrorCode!=U_FILE_ACCESS_ERROR)) { | |
108 | log_err( "testidn error: u_parseDelimitedFile(\"%s\") failed - %s\n", filename, u_errorName(*pErrorCode)); | |
109 | } | |
110 | } | |
111 | ||
112 | ||
113 | static UStringPrepType | |
114 | getValues(uint32_t result, int32_t* value, UBool* isIndex){ | |
115 | ||
116 | UStringPrepType type; | |
117 | if(result == 0){ | |
118 | /* | |
119 | * Initial value stored in the mapping table | |
120 | * just return USPREP_TYPE_LIMIT .. so that | |
121 | * the source codepoint is copied to the destination | |
122 | */ | |
123 | type = USPREP_TYPE_LIMIT; | |
124 | }else if(result >= _SPREP_TYPE_THRESHOLD){ | |
125 | type = (UStringPrepType) (result - _SPREP_TYPE_THRESHOLD); | |
126 | }else{ | |
127 | /* get the type */ | |
128 | type = USPREP_MAP; | |
129 | /* ascertain if the value is index or delta */ | |
130 | if(result & 0x02){ | |
131 | *isIndex = TRUE; | |
132 | *value = result >> 2; | |
133 | ||
134 | }else{ | |
135 | *isIndex = FALSE; | |
136 | *value = (int16_t)result; | |
137 | *value = (*value >> 2); | |
138 | ||
139 | } | |
140 | if((result>>2) == _SPREP_MAX_INDEX_VALUE){ | |
141 | type = USPREP_DELETE; | |
142 | isIndex =FALSE; | |
143 | value = 0; | |
144 | } | |
145 | } | |
146 | return type; | |
147 | } | |
148 | ||
149 | static void | |
150 | compareMapping(UStringPrepProfile* data, uint32_t codepoint, uint32_t* mapping,int32_t mapLength, | |
151 | UStringPrepType type){ | |
152 | uint32_t result = 0; | |
153 | int32_t length=0; | |
154 | UBool isIndex = FALSE; | |
155 | UStringPrepType retType; | |
51004dcb | 156 | int32_t value=0, idx=0, delta=0; |
374ca955 A |
157 | int32_t* indexes = data->indexes; |
158 | UTrie trie = data->sprepTrie; | |
159 | const uint16_t* mappingData = data->mappingData; | |
160 | int32_t realLength =0; | |
161 | int32_t j=0; | |
162 | int8_t i=0; | |
163 | ||
164 | UTRIE_GET16(&trie, codepoint, result); | |
165 | retType = getValues(result,&value,&isIndex); | |
166 | ||
167 | ||
168 | if(type != retType && retType != USPREP_DELETE){ | |
169 | ||
170 | log_err( "Did not get the assigned type for codepoint 0x%08X. Expected: %i Got: %i\n",codepoint, USPREP_MAP, type); | |
171 | ||
172 | } | |
173 | ||
174 | if(isIndex){ | |
51004dcb A |
175 | idx = value; |
176 | if(idx >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] && | |
177 | idx < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){ | |
374ca955 | 178 | length = 1; |
51004dcb A |
179 | }else if(idx >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] && |
180 | idx < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){ | |
374ca955 | 181 | length = 2; |
51004dcb A |
182 | }else if(idx >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] && |
183 | idx < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){ | |
374ca955 A |
184 | length = 3; |
185 | }else{ | |
51004dcb | 186 | length = mappingData[idx++]; |
374ca955 A |
187 | } |
188 | }else{ | |
189 | delta = value; | |
190 | length = (retType == USPREP_DELETE)? 0 : 1; | |
191 | } | |
192 | ||
193 | /* figure out the real length */ | |
194 | for(j=0; j<mapLength; j++){ | |
195 | if(mapping[j] > 0xFFFF){ | |
196 | realLength +=2; | |
197 | }else{ | |
198 | realLength++; | |
199 | } | |
200 | } | |
201 | ||
202 | if(realLength != length){ | |
203 | log_err( "Did not get the expected length. Expected: %i Got: %i\n", mapLength, length); | |
204 | } | |
205 | ||
206 | if(isIndex){ | |
207 | for(i =0; i< mapLength; i++){ | |
208 | if(mapping[i] <= 0xFFFF){ | |
51004dcb A |
209 | if(mappingData[idx+i] != (uint16_t)mapping[i]){ |
210 | log_err("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[i], mappingData[idx+i]); | |
374ca955 A |
211 | } |
212 | }else{ | |
4388f060 A |
213 | UChar lead = U16_LEAD(mapping[i]); |
214 | UChar trail = U16_TRAIL(mapping[i]); | |
51004dcb A |
215 | if(mappingData[idx+i] != lead || |
216 | mappingData[idx+i+1] != trail){ | |
217 | log_err( "Did not get the expected result. Expected: 0x%04X 0x%04X Got: 0x%04X 0x%04X\n", lead, trail, mappingData[idx+i], mappingData[idx+i+1]); | |
374ca955 A |
218 | } |
219 | } | |
220 | } | |
221 | }else{ | |
222 | if(retType!=USPREP_DELETE && (codepoint-delta) != (uint16_t)mapping[0]){ | |
223 | log_err("Did not get the expected result. Expected: 0x%04X Got: 0x%04X \n", mapping[0],(codepoint-delta)); | |
224 | } | |
225 | } | |
226 | ||
227 | } | |
228 | ||
229 | static void | |
230 | compareFlagsForRange(UStringPrepProfile* data, | |
231 | uint32_t start, uint32_t end, | |
232 | UStringPrepType type){ | |
233 | ||
234 | uint32_t result =0 ; | |
235 | UStringPrepType retType; | |
236 | UBool isIndex=FALSE; | |
237 | int32_t value=0; | |
238 | UTrie trie = data->sprepTrie; | |
239 | /* | |
240 | // supplementary code point | |
4388f060 | 241 | UChar __lead16=U16_LEAD(0x2323E); |
374ca955 A |
242 | int32_t __offset; |
243 | ||
244 | // get data for lead surrogate | |
245 | (result)=_UTRIE_GET_RAW((&idnTrie), index, 0, (__lead16)); | |
246 | __offset=(&idnTrie)->getFoldingOffset(result); | |
247 | ||
248 | // get the real data from the folded lead/trail units | |
249 | if(__offset>0) { | |
250 | (result)=_UTRIE_GET_RAW((&idnTrie), index, __offset, (0x2323E)&0x3ff); | |
251 | } else { | |
252 | (result)=(uint32_t)((&idnTrie)->initialValue); | |
253 | } | |
254 | ||
255 | UTRIE_GET16(&idnTrie,0x2323E, result); | |
256 | */ | |
257 | while(start < end+1){ | |
258 | UTRIE_GET16(&trie,start, result); | |
259 | retType = getValues(result, &value, &isIndex); | |
260 | if(result > _SPREP_TYPE_THRESHOLD){ | |
261 | if(retType != type){ | |
262 | log_err( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start,usprepTypeNames[type], usprepTypeNames[retType]); | |
263 | } | |
264 | }else{ | |
265 | if(type == USPREP_PROHIBITED && ((result & 0x01) != 0x01)){ | |
266 | log_err( "FAIL: Did not get the expected type for 0x%06X. Expected: %s Got: %s\n",start,usprepTypeNames[type], usprepTypeNames[retType]); | |
267 | } | |
268 | } | |
269 | ||
270 | start++; | |
271 | } | |
272 | ||
273 | } | |
274 | ||
275 | void | |
276 | doStringPrepTest(const char* binFileName, const char* txtFileName, int32_t options, UErrorCode* errorCode){ | |
340931cb | 277 | (void)options; // suppress compiler warnings about unused variable |
374ca955 A |
278 | const char *testdatapath = loadTestData(errorCode); |
279 | const char *srcdatapath = NULL; | |
280 | const char *relativepath = NULL; | |
281 | char *filename = NULL; | |
282 | UStringPrepProfile* profile = NULL; | |
283 | ||
284 | #ifdef U_TOPSRCDIR | |
285 | srcdatapath = U_TOPSRCDIR; | |
286 | relativepath = U_FILE_SEP_STRING"test"U_FILE_SEP_STRING"testdata"U_FILE_SEP_STRING; | |
287 | #else | |
288 | srcdatapath = ctest_dataOutDir(); | |
289 | relativepath = ".."U_FILE_SEP_STRING".."U_FILE_SEP_STRING"test"U_FILE_SEP_STRING"testdata"U_FILE_SEP_STRING; | |
290 | #endif | |
291 | ||
374ca955 A |
292 | profile = usprep_open(testdatapath, binFileName, errorCode); |
293 | ||
46f4442e A |
294 | if(*errorCode == U_FILE_ACCESS_ERROR) { |
295 | log_data_err("Failed to load %s data file. Error: %s \n", binFileName, u_errorName(*errorCode)); | |
296 | return; | |
297 | } else if(U_FAILURE(*errorCode)){ | |
374ca955 A |
298 | log_err("Failed to load %s data file. Error: %s \n", binFileName, u_errorName(*errorCode)); |
299 | return; | |
300 | } | |
4388f060 | 301 | filename = (char*) malloc(strlen(srcdatapath)+strlen(relativepath)+strlen(txtFileName)+10 ); |
374ca955 A |
302 | /* open and load the txt file */ |
303 | strcpy(filename,srcdatapath); | |
304 | strcat(filename,relativepath); | |
305 | strcat(filename,txtFileName); | |
306 | ||
307 | parseMappings(filename,profile, TRUE,errorCode); | |
308 | ||
309 | free(filename); | |
310 | } | |
311 | #endif | |
312 | /* | |
313 | * Hey, Emacs, please set the following: | |
314 | * | |
315 | * Local Variables: | |
316 | * indent-tabs-mode: nil | |
317 | * End: | |
318 | * | |
319 | */ |