]>
Commit | Line | Data |
---|---|---|
729e4ab9 A |
1 | /* |
2 | ******************************************************************************* | |
3 | * | |
4 | * Copyright (C) 2009-2010, International Business Machines | |
5 | * Corporation and others. All Rights Reserved. | |
6 | * | |
7 | ******************************************************************************* | |
8 | * file name: bidiconf.cpp | |
9 | * encoding: US-ASCII | |
10 | * tab size: 8 (not used) | |
11 | * indentation:4 | |
12 | * | |
13 | * created on: 2009oct16 | |
14 | * created by: Markus W. Scherer | |
15 | * | |
16 | * BiDi conformance test, using the Unicode BidiTest.txt file. | |
17 | */ | |
18 | ||
19 | #include <stdio.h> | |
20 | #include <stdlib.h> | |
21 | #include <string.h> | |
22 | #include "unicode/utypes.h" | |
23 | #include "unicode/ubidi.h" | |
24 | #include "unicode/errorcode.h" | |
25 | #include "unicode/localpointer.h" | |
26 | #include "unicode/putil.h" | |
27 | #include "unicode/unistr.h" | |
28 | #include "intltest.h" | |
29 | #include "uparse.h" | |
30 | ||
31 | class BiDiConformanceTest : public IntlTest { | |
32 | public: | |
33 | BiDiConformanceTest() : | |
34 | directionBits(0), lineNumber(0), levelsCount(0), orderingCount(0), | |
35 | errorCount(0) {} | |
36 | ||
37 | void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL); | |
38 | ||
39 | void TestBidiTest(); | |
40 | private: | |
41 | char *getUnidataPath(char path[]); | |
42 | ||
43 | UBool parseLevels(const char *start); | |
44 | UBool parseOrdering(const char *start); | |
45 | UBool parseInputStringFromBiDiClasses(const char *&start); | |
46 | ||
47 | UBool checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount, | |
48 | const char *paraLevelName); | |
49 | UBool checkOrdering(UBiDi *ubidi, const char *paraLevelName); | |
50 | ||
51 | void printErrorLine(const char *paraLevelName); | |
52 | ||
53 | char line[10000]; | |
54 | UBiDiLevel levels[1000]; | |
55 | uint32_t directionBits; | |
56 | int32_t ordering[1000]; | |
57 | int32_t lineNumber; | |
58 | int32_t levelsCount; | |
59 | int32_t orderingCount; | |
60 | int32_t errorCount; | |
61 | UnicodeString inputString; | |
62 | }; | |
63 | ||
64 | extern IntlTest *createBiDiConformanceTest() { | |
65 | return new BiDiConformanceTest(); | |
66 | } | |
67 | ||
68 | void BiDiConformanceTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { | |
69 | if(exec) { | |
70 | logln("TestSuite BiDiConformanceTest: "); | |
71 | } | |
72 | switch (index) { | |
73 | TESTCASE(0, TestBidiTest); | |
74 | default: | |
75 | name=""; | |
76 | break; // needed to end the loop | |
77 | } | |
78 | } | |
79 | ||
80 | // TODO: Move to a common place (IntlTest?) to avoid duplication with UnicodeTest (ucdtest.cpp). | |
81 | char *BiDiConformanceTest::getUnidataPath(char path[]) { | |
82 | IcuTestErrorCode errorCode(*this, "getUnidataPath"); | |
83 | const int kUnicodeDataTxtLength=15; // strlen("UnicodeData.txt") | |
84 | ||
85 | // Look inside ICU_DATA first. | |
86 | strcpy(path, pathToDataDirectory()); | |
87 | strcat(path, "unidata" U_FILE_SEP_STRING "UnicodeData.txt"); | |
88 | FILE *f=fopen(path, "r"); | |
89 | if(f!=NULL) { | |
90 | fclose(f); | |
91 | *(strchr(path, 0)-kUnicodeDataTxtLength)=0; // Remove the basename. | |
92 | return path; | |
93 | } | |
94 | ||
95 | // As a fallback, try to guess where the source data was located | |
96 | // at the time ICU was built, and look there. | |
97 | # ifdef U_TOPSRCDIR | |
98 | strcpy(path, U_TOPSRCDIR U_FILE_SEP_STRING "data"); | |
99 | # else | |
100 | strcpy(path, loadTestData(errorCode)); | |
101 | strcat(path, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." | |
102 | U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." | |
103 | U_FILE_SEP_STRING "data"); | |
104 | # endif | |
105 | strcat(path, U_FILE_SEP_STRING); | |
106 | strcat(path, "unidata" U_FILE_SEP_STRING "UnicodeData.txt"); | |
107 | f=fopen(path, "r"); | |
108 | if(f!=NULL) { | |
109 | fclose(f); | |
110 | *(strchr(path, 0)-kUnicodeDataTxtLength)=0; // Remove the basename. | |
111 | return path; | |
112 | } | |
113 | return NULL; | |
114 | } | |
115 | ||
116 | U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose); | |
117 | ||
118 | UBool BiDiConformanceTest::parseLevels(const char *start) { | |
119 | directionBits=0; | |
120 | levelsCount=0; | |
121 | while(*start!=0 && *(start=u_skipWhitespace(start))!=0) { | |
122 | if(*start=='x') { | |
123 | levels[levelsCount++]=UBIDI_DEFAULT_LTR; | |
124 | ++start; | |
125 | } else { | |
126 | char *end; | |
127 | uint32_t value=(uint32_t)strtoul(start, &end, 10); | |
128 | if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0) || value>(UBIDI_MAX_EXPLICIT_LEVEL+1)) { | |
129 | errln("@Levels: parse error at %s", start); | |
130 | return FALSE; | |
131 | } | |
132 | levels[levelsCount++]=(UBiDiLevel)value; | |
133 | directionBits|=(1<<(value&1)); | |
134 | start=end; | |
135 | } | |
136 | } | |
137 | return TRUE; | |
138 | } | |
139 | ||
140 | UBool BiDiConformanceTest::parseOrdering(const char *start) { | |
141 | orderingCount=0; | |
142 | while(*start!=0 && *(start=u_skipWhitespace(start))!=0) { | |
143 | char *end; | |
144 | uint32_t value=(uint32_t)strtoul(start, &end, 10); | |
145 | if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0) || value>=1000) { | |
146 | errln("@Reorder: parse error at %s", start); | |
147 | return FALSE; | |
148 | } | |
149 | ordering[orderingCount++]=(int32_t)value; | |
150 | start=end; | |
151 | } | |
152 | return TRUE; | |
153 | } | |
154 | ||
155 | static const UChar charFromBiDiClass[U_CHAR_DIRECTION_COUNT]={ | |
156 | 0x6c, // 'l' for L | |
157 | 0x52, // 'R' for R | |
158 | 0x33, // '3' for EN | |
159 | 0x2d, // '-' for ES | |
160 | 0x25, // '%' for ET | |
161 | 0x39, // '9' for AN | |
162 | 0x2c, // ',' for CS | |
163 | 0x2f, // '/' for B | |
164 | 0x5f, // '_' for S | |
165 | 0x20, // ' ' for WS | |
166 | 0x3d, // '=' for ON | |
167 | 0x65, // 'e' for LRE | |
168 | 0x6f, // 'o' for LRO | |
169 | 0x41, // 'A' for AL | |
170 | 0x45, // 'E' for RLE | |
171 | 0x4f, // 'O' for RLO | |
172 | 0x2a, // '*' for PDF | |
173 | 0x60, // '`' for NSM | |
174 | 0x7c // '|' for BN | |
175 | }; | |
176 | ||
177 | U_CDECL_BEGIN | |
178 | ||
179 | static UCharDirection U_CALLCONV | |
180 | biDiConfUBiDiClassCallback(const void * /*context*/, UChar32 c) { | |
181 | for(int i=0; i<U_CHAR_DIRECTION_COUNT; ++i) { | |
182 | if(c==charFromBiDiClass[i]) { | |
183 | return (UCharDirection)i; | |
184 | } | |
185 | } | |
186 | // Character not in our hardcoded table. | |
187 | // Should not occur during testing. | |
188 | return U_BIDI_CLASS_DEFAULT; | |
189 | } | |
190 | ||
191 | U_CDECL_END | |
192 | ||
193 | static const int8_t biDiClassNameLengths[U_CHAR_DIRECTION_COUNT+1]={ | |
194 | 1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 3, 2, 3, 3, 3, 3, 2, 0 | |
195 | }; | |
196 | ||
197 | UBool BiDiConformanceTest::parseInputStringFromBiDiClasses(const char *&start) { | |
198 | inputString.remove(); | |
199 | /* | |
200 | * Lengthy but fast BiDi class parser. | |
201 | * A simple parser could terminate or extract the name string and use | |
202 | * int32_t biDiClassInt=u_getPropertyValueEnum(UCHAR_BIDI_CLASS, bidiClassString); | |
203 | * but that makes this test take significantly more time. | |
204 | */ | |
205 | while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') { | |
206 | UCharDirection biDiClass=U_CHAR_DIRECTION_COUNT; | |
207 | // Compare each character once until we have a match on | |
208 | // a complete, short BiDi class name. | |
209 | if(start[0]=='L') { | |
210 | if(start[1]=='R') { | |
211 | if(start[2]=='E') { | |
212 | biDiClass=U_LEFT_TO_RIGHT_EMBEDDING; | |
213 | } else if(start[2]=='O') { | |
214 | biDiClass=U_LEFT_TO_RIGHT_OVERRIDE; | |
215 | } | |
216 | } else { | |
217 | biDiClass=U_LEFT_TO_RIGHT; | |
218 | } | |
219 | } else if(start[0]=='R') { | |
220 | if(start[1]=='L') { | |
221 | if(start[2]=='E') { | |
222 | biDiClass=U_RIGHT_TO_LEFT_EMBEDDING; | |
223 | } else if(start[2]=='O') { | |
224 | biDiClass=U_RIGHT_TO_LEFT_OVERRIDE; | |
225 | } | |
226 | } else { | |
227 | biDiClass=U_RIGHT_TO_LEFT; | |
228 | } | |
229 | } else if(start[0]=='E') { | |
230 | if(start[1]=='N') { | |
231 | biDiClass=U_EUROPEAN_NUMBER; | |
232 | } else if(start[1]=='S') { | |
233 | biDiClass=U_EUROPEAN_NUMBER_SEPARATOR; | |
234 | } else if(start[1]=='T') { | |
235 | biDiClass=U_EUROPEAN_NUMBER_TERMINATOR; | |
236 | } | |
237 | } else if(start[0]=='A') { | |
238 | if(start[1]=='L') { | |
239 | biDiClass=U_RIGHT_TO_LEFT_ARABIC; | |
240 | } else if(start[1]=='N') { | |
241 | biDiClass=U_ARABIC_NUMBER; | |
242 | } | |
243 | } else if(start[0]=='C' && start[1]=='S') { | |
244 | biDiClass=U_COMMON_NUMBER_SEPARATOR; | |
245 | } else if(start[0]=='B') { | |
246 | if(start[1]=='N') { | |
247 | biDiClass=U_BOUNDARY_NEUTRAL; | |
248 | } else { | |
249 | biDiClass=U_BLOCK_SEPARATOR; | |
250 | } | |
251 | } else if(start[0]=='S') { | |
252 | biDiClass=U_SEGMENT_SEPARATOR; | |
253 | } else if(start[0]=='W' && start[1]=='S') { | |
254 | biDiClass=U_WHITE_SPACE_NEUTRAL; | |
255 | } else if(start[0]=='O' && start[1]=='N') { | |
256 | biDiClass=U_OTHER_NEUTRAL; | |
257 | } else if(start[0]=='P' && start[1]=='D' && start[2]=='F') { | |
258 | biDiClass=U_POP_DIRECTIONAL_FORMAT; | |
259 | } else if(start[0]=='N' && start[1]=='S' && start[2]=='M') { | |
260 | biDiClass=U_DIR_NON_SPACING_MARK; | |
261 | } | |
262 | // Now we verify that the class name is terminated properly, | |
263 | // and not just the start of a longer word. | |
264 | int8_t biDiClassNameLength=biDiClassNameLengths[biDiClass]; | |
265 | char c=start[biDiClassNameLength]; | |
266 | if(biDiClass==U_CHAR_DIRECTION_COUNT || (!U_IS_INV_WHITESPACE(c) && c!=';' && c!=0)) { | |
267 | errln("BiDi class string not recognized at %s", start); | |
268 | return FALSE; | |
269 | } | |
270 | inputString.append(charFromBiDiClass[biDiClass]); | |
271 | start+=biDiClassNameLength; | |
272 | } | |
273 | return TRUE; | |
274 | } | |
275 | ||
276 | void BiDiConformanceTest::TestBidiTest() { | |
277 | IcuTestErrorCode errorCode(*this, "TestBidiTest"); | |
278 | const char *sourceTestDataPath=getSourceTestData(errorCode); | |
279 | if(errorCode.logIfFailureAndReset("unable to find the source/test/testdata " | |
280 | "folder (getSourceTestData())")) { | |
281 | return; | |
282 | } | |
283 | char bidiTestPath[400]; | |
284 | strcpy(bidiTestPath, sourceTestDataPath); | |
285 | strcat(bidiTestPath, "BidiTest.txt"); | |
286 | LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r")); | |
287 | if(bidiTestFile.isNull()) { | |
288 | errln("unable to open %s", bidiTestPath); | |
289 | return; | |
290 | } | |
291 | LocalUBiDiPointer ubidi(ubidi_open()); | |
292 | ubidi_setClassCallback(ubidi.getAlias(), biDiConfUBiDiClassCallback, NULL, | |
293 | NULL, NULL, errorCode); | |
294 | if(errorCode.logIfFailureAndReset("ubidi_setClassCallback()")) { | |
295 | return; | |
296 | } | |
297 | lineNumber=0; | |
298 | levelsCount=0; | |
299 | orderingCount=0; | |
300 | errorCount=0; | |
301 | while(errorCount<10 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias())!=NULL) { | |
302 | ++lineNumber; | |
303 | // Remove trailing comments and whitespace. | |
304 | char *commentStart=strchr(line, '#'); | |
305 | if(commentStart!=NULL) { | |
306 | *commentStart=0; | |
307 | } | |
308 | u_rtrim(line); | |
309 | const char *start=u_skipWhitespace(line); | |
310 | if(*start==0) { | |
311 | continue; // Skip empty and comment-only lines. | |
312 | } | |
313 | if(*start=='@') { | |
314 | ++start; | |
315 | if(0==strncmp(start, "Levels:", 7)) { | |
316 | if(!parseLevels(start+7)) { | |
317 | return; | |
318 | } | |
319 | } else if(0==strncmp(start, "Reorder:", 8)) { | |
320 | if(!parseOrdering(start+8)) { | |
321 | return; | |
322 | } | |
323 | } | |
324 | // Skip unknown @Xyz: ... | |
325 | } else { | |
326 | if(!parseInputStringFromBiDiClasses(start)) { | |
327 | return; | |
328 | } | |
329 | start=u_skipWhitespace(start); | |
330 | if(*start!=';') { | |
331 | errln("missing ; separator on input line %s", line); | |
332 | return; | |
333 | } | |
334 | start=u_skipWhitespace(start+1); | |
335 | char *end; | |
336 | uint32_t bitset=(uint32_t)strtoul(start, &end, 16); | |
337 | if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0)) { | |
338 | errln("input bitset parse error at %s", start); | |
339 | return; | |
340 | } | |
341 | // Loop over the bitset. | |
342 | static const UBiDiLevel paraLevels[]={ UBIDI_DEFAULT_LTR, 0, 1, UBIDI_DEFAULT_RTL }; | |
343 | static const char *const paraLevelNames[]={ "auto/LTR", "LTR", "RTL", "auto/RTL" }; | |
344 | for(int i=0; i<=3; ++i) { | |
345 | if(bitset&(1<<i)) { | |
346 | ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.length(), | |
347 | paraLevels[i], NULL, errorCode); | |
348 | const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode); | |
349 | if(errorCode.logIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()")) { | |
350 | errln("Input line %d: %s", (int)lineNumber, line); | |
351 | return; | |
352 | } | |
353 | if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()), | |
354 | paraLevelNames[i])) { | |
355 | // continue outerLoop; does not exist in C++ | |
356 | // so just break out of the inner loop. | |
357 | break; | |
358 | } | |
359 | if(!checkOrdering(ubidi.getAlias(), paraLevelNames[i])) { | |
360 | // continue outerLoop; does not exist in C++ | |
361 | // so just break out of the inner loop. | |
362 | break; | |
363 | } | |
364 | } | |
365 | } | |
366 | } | |
367 | } | |
368 | } | |
369 | ||
370 | static UChar printLevel(UBiDiLevel level) { | |
371 | if(level<UBIDI_DEFAULT_LTR) { | |
372 | return 0x30+level; | |
373 | } else { | |
374 | return 0x78; // 'x' | |
375 | } | |
376 | } | |
377 | ||
378 | static uint32_t getDirectionBits(const UBiDiLevel actualLevels[], int32_t actualCount) { | |
379 | uint32_t actualDirectionBits=0; | |
380 | for(int32_t i=0; i<actualCount; ++i) { | |
381 | actualDirectionBits|=(1<<(actualLevels[i]&1)); | |
382 | } | |
383 | return actualDirectionBits; | |
384 | } | |
385 | ||
386 | UBool BiDiConformanceTest::checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount, | |
387 | const char *paraLevelName) { | |
388 | UBool isOk=TRUE; | |
389 | if(levelsCount!=actualCount) { | |
390 | errln("Wrong number of level values; expected %d actual %d", | |
391 | (int)levelsCount, (int)actualCount); | |
392 | isOk=FALSE; | |
393 | } else { | |
394 | for(int32_t i=0; i<actualCount; ++i) { | |
395 | if(levels[i]!=actualLevels[i] && levels[i]<UBIDI_DEFAULT_LTR) { | |
396 | if(directionBits!=3 && directionBits==getDirectionBits(actualLevels, actualCount)) { | |
397 | // ICU used a shortcut: | |
398 | // Since the text is unidirectional, it did not store the resolved | |
399 | // levels but just returns all levels as the paragraph level 0 or 1. | |
400 | // The reordering result is the same, so this is fine. | |
401 | break; | |
402 | } else { | |
403 | errln("Wrong level value at index %d; expected %d actual %d", | |
404 | (int)i, levels[i], actualLevels[i]); | |
405 | isOk=FALSE; | |
406 | break; | |
407 | } | |
408 | } | |
409 | } | |
410 | } | |
411 | if(!isOk) { | |
412 | printErrorLine(paraLevelName); | |
413 | UnicodeString els("Expected levels: "); | |
414 | int32_t i; | |
415 | for(i=0; i<levelsCount; ++i) { | |
416 | els.append((UChar)0x20).append(printLevel(levels[i])); | |
417 | } | |
418 | UnicodeString als("Actual levels: "); | |
419 | for(i=0; i<actualCount; ++i) { | |
420 | als.append((UChar)0x20).append(printLevel(actualLevels[i])); | |
421 | } | |
422 | errln(els); | |
423 | errln(als); | |
424 | } | |
425 | return isOk; | |
426 | } | |
427 | ||
428 | // Note: ubidi_setReorderingOptions(ubidi, UBIDI_OPTION_REMOVE_CONTROLS); | |
429 | // does not work for custom BiDi class assignments | |
430 | // and anyway also removes LRM/RLM/ZWJ/ZWNJ which is not desirable here. | |
431 | // Therefore we just skip the indexes for BiDi controls while comparing | |
432 | // with the expected ordering that has them omitted. | |
433 | UBool BiDiConformanceTest::checkOrdering(UBiDi *ubidi, const char *paraLevelName) { | |
434 | UBool isOk=TRUE; | |
435 | IcuTestErrorCode errorCode(*this, "TestBidiTest/checkOrdering()"); | |
436 | int32_t resultLength=ubidi_getResultLength(ubidi); // visual length including BiDi controls | |
437 | int32_t i, visualIndex; | |
438 | // Note: It should be faster to call ubidi_countRuns()/ubidi_getVisualRun() | |
439 | // and loop over each run's indexes, but that seems unnecessary for this test code. | |
440 | for(i=visualIndex=0; i<resultLength; ++i) { | |
441 | int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode); | |
442 | if(errorCode.logIfFailureAndReset("ubidi_getLogicalIndex()")) { | |
443 | errln("Input line %d: %s", (int)lineNumber, line); | |
444 | return FALSE; | |
445 | } | |
446 | if(levels[logicalIndex]>=UBIDI_DEFAULT_LTR) { | |
447 | continue; // BiDi control, omitted from expected ordering. | |
448 | } | |
449 | if(visualIndex<orderingCount && logicalIndex!=ordering[visualIndex]) { | |
450 | errln("Wrong ordering value at visual index %d; expected %d actual %d", | |
451 | (int)visualIndex, ordering[visualIndex], logicalIndex); | |
452 | isOk=FALSE; | |
453 | break; | |
454 | } | |
455 | ++visualIndex; | |
456 | } | |
457 | // visualIndex is now the visual length minus the BiDi controls, | |
458 | // which should match the length of the BidiTest.txt ordering. | |
459 | if(isOk && orderingCount!=visualIndex) { | |
460 | errln("Wrong number of ordering values; expected %d actual %d", | |
461 | (int)orderingCount, (int)visualIndex); | |
462 | isOk=FALSE; | |
463 | } | |
464 | if(!isOk) { | |
465 | printErrorLine(paraLevelName); | |
466 | UnicodeString eord("Expected ordering: "); | |
467 | for(i=0; i<orderingCount; ++i) { | |
468 | eord.append((UChar)0x20).append((UChar)(0x30+ordering[i])); | |
469 | } | |
470 | UnicodeString aord("Actual ordering: "); | |
471 | for(i=0; i<resultLength; ++i) { | |
472 | int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode); | |
473 | if(levels[logicalIndex]<UBIDI_DEFAULT_LTR) { | |
474 | aord.append((UChar)0x20).append((UChar)(0x30+logicalIndex)); | |
475 | } | |
476 | } | |
477 | errln(eord); | |
478 | errln(aord); | |
479 | } | |
480 | return isOk; | |
481 | } | |
482 | ||
483 | void BiDiConformanceTest::printErrorLine(const char *paraLevelName) { | |
484 | ++errorCount; | |
485 | errln("Input line %5d: %s", (int)lineNumber, line); | |
486 | errln(UnicodeString("Input string: ")+inputString); | |
487 | errln("Para level: %s", paraLevelName); | |
488 | } |