]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/intltest/bidiconf.cpp
ICU-511.25.tar.gz
[apple/icu.git] / icuSources / test / intltest / bidiconf.cpp
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2009-2010, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: bidiconf.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2009oct16
14 * created by: Markus W. Scherer
15 *
16 * BiDi conformance test, using the Unicode BidiTest.txt file.
17 */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include "unicode/utypes.h"
23 #include "unicode/ubidi.h"
24 #include "unicode/errorcode.h"
25 #include "unicode/localpointer.h"
26 #include "unicode/putil.h"
27 #include "unicode/unistr.h"
28 #include "intltest.h"
29 #include "uparse.h"
30
31 class BiDiConformanceTest : public IntlTest {
32 public:
33 BiDiConformanceTest() :
34 directionBits(0), lineNumber(0), levelsCount(0), orderingCount(0),
35 errorCount(0) {}
36
37 void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
38
39 void TestBidiTest();
40 private:
41 char *getUnidataPath(char path[]);
42
43 UBool parseLevels(const char *start);
44 UBool parseOrdering(const char *start);
45 UBool parseInputStringFromBiDiClasses(const char *&start);
46
47 UBool checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount,
48 const char *paraLevelName);
49 UBool checkOrdering(UBiDi *ubidi, const char *paraLevelName);
50
51 void printErrorLine(const char *paraLevelName);
52
53 char line[10000];
54 UBiDiLevel levels[1000];
55 uint32_t directionBits;
56 int32_t ordering[1000];
57 int32_t lineNumber;
58 int32_t levelsCount;
59 int32_t orderingCount;
60 int32_t errorCount;
61 UnicodeString inputString;
62 };
63
64 extern IntlTest *createBiDiConformanceTest() {
65 return new BiDiConformanceTest();
66 }
67
68 void BiDiConformanceTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
69 if(exec) {
70 logln("TestSuite BiDiConformanceTest: ");
71 }
72 switch (index) {
73 TESTCASE(0, TestBidiTest);
74 default:
75 name="";
76 break; // needed to end the loop
77 }
78 }
79
80 // TODO: Move to a common place (IntlTest?) to avoid duplication with UnicodeTest (ucdtest.cpp).
81 char *BiDiConformanceTest::getUnidataPath(char path[]) {
82 IcuTestErrorCode errorCode(*this, "getUnidataPath");
83 const int kUnicodeDataTxtLength=15; // strlen("UnicodeData.txt")
84
85 // Look inside ICU_DATA first.
86 strcpy(path, pathToDataDirectory());
87 strcat(path, "unidata" U_FILE_SEP_STRING "UnicodeData.txt");
88 FILE *f=fopen(path, "r");
89 if(f!=NULL) {
90 fclose(f);
91 *(strchr(path, 0)-kUnicodeDataTxtLength)=0; // Remove the basename.
92 return path;
93 }
94
95 // As a fallback, try to guess where the source data was located
96 // at the time ICU was built, and look there.
97 # ifdef U_TOPSRCDIR
98 strcpy(path, U_TOPSRCDIR U_FILE_SEP_STRING "data");
99 # else
100 strcpy(path, loadTestData(errorCode));
101 strcat(path, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".."
102 U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".."
103 U_FILE_SEP_STRING "data");
104 # endif
105 strcat(path, U_FILE_SEP_STRING);
106 strcat(path, "unidata" U_FILE_SEP_STRING "UnicodeData.txt");
107 f=fopen(path, "r");
108 if(f!=NULL) {
109 fclose(f);
110 *(strchr(path, 0)-kUnicodeDataTxtLength)=0; // Remove the basename.
111 return path;
112 }
113 return NULL;
114 }
115
116 U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose);
117
118 UBool BiDiConformanceTest::parseLevels(const char *start) {
119 directionBits=0;
120 levelsCount=0;
121 while(*start!=0 && *(start=u_skipWhitespace(start))!=0) {
122 if(*start=='x') {
123 levels[levelsCount++]=UBIDI_DEFAULT_LTR;
124 ++start;
125 } else {
126 char *end;
127 uint32_t value=(uint32_t)strtoul(start, &end, 10);
128 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0) || value>(UBIDI_MAX_EXPLICIT_LEVEL+1)) {
129 errln("@Levels: parse error at %s", start);
130 return FALSE;
131 }
132 levels[levelsCount++]=(UBiDiLevel)value;
133 directionBits|=(1<<(value&1));
134 start=end;
135 }
136 }
137 return TRUE;
138 }
139
140 UBool BiDiConformanceTest::parseOrdering(const char *start) {
141 orderingCount=0;
142 while(*start!=0 && *(start=u_skipWhitespace(start))!=0) {
143 char *end;
144 uint32_t value=(uint32_t)strtoul(start, &end, 10);
145 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0) || value>=1000) {
146 errln("@Reorder: parse error at %s", start);
147 return FALSE;
148 }
149 ordering[orderingCount++]=(int32_t)value;
150 start=end;
151 }
152 return TRUE;
153 }
154
155 static const UChar charFromBiDiClass[U_CHAR_DIRECTION_COUNT]={
156 0x6c, // 'l' for L
157 0x52, // 'R' for R
158 0x33, // '3' for EN
159 0x2d, // '-' for ES
160 0x25, // '%' for ET
161 0x39, // '9' for AN
162 0x2c, // ',' for CS
163 0x2f, // '/' for B
164 0x5f, // '_' for S
165 0x20, // ' ' for WS
166 0x3d, // '=' for ON
167 0x65, // 'e' for LRE
168 0x6f, // 'o' for LRO
169 0x41, // 'A' for AL
170 0x45, // 'E' for RLE
171 0x4f, // 'O' for RLO
172 0x2a, // '*' for PDF
173 0x60, // '`' for NSM
174 0x7c // '|' for BN
175 };
176
177 U_CDECL_BEGIN
178
179 static UCharDirection U_CALLCONV
180 biDiConfUBiDiClassCallback(const void * /*context*/, UChar32 c) {
181 for(int i=0; i<U_CHAR_DIRECTION_COUNT; ++i) {
182 if(c==charFromBiDiClass[i]) {
183 return (UCharDirection)i;
184 }
185 }
186 // Character not in our hardcoded table.
187 // Should not occur during testing.
188 return U_BIDI_CLASS_DEFAULT;
189 }
190
191 U_CDECL_END
192
193 static const int8_t biDiClassNameLengths[U_CHAR_DIRECTION_COUNT+1]={
194 1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 3, 2, 3, 3, 3, 3, 2, 0
195 };
196
197 UBool BiDiConformanceTest::parseInputStringFromBiDiClasses(const char *&start) {
198 inputString.remove();
199 /*
200 * Lengthy but fast BiDi class parser.
201 * A simple parser could terminate or extract the name string and use
202 * int32_t biDiClassInt=u_getPropertyValueEnum(UCHAR_BIDI_CLASS, bidiClassString);
203 * but that makes this test take significantly more time.
204 */
205 while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') {
206 UCharDirection biDiClass=U_CHAR_DIRECTION_COUNT;
207 // Compare each character once until we have a match on
208 // a complete, short BiDi class name.
209 if(start[0]=='L') {
210 if(start[1]=='R') {
211 if(start[2]=='E') {
212 biDiClass=U_LEFT_TO_RIGHT_EMBEDDING;
213 } else if(start[2]=='O') {
214 biDiClass=U_LEFT_TO_RIGHT_OVERRIDE;
215 }
216 } else {
217 biDiClass=U_LEFT_TO_RIGHT;
218 }
219 } else if(start[0]=='R') {
220 if(start[1]=='L') {
221 if(start[2]=='E') {
222 biDiClass=U_RIGHT_TO_LEFT_EMBEDDING;
223 } else if(start[2]=='O') {
224 biDiClass=U_RIGHT_TO_LEFT_OVERRIDE;
225 }
226 } else {
227 biDiClass=U_RIGHT_TO_LEFT;
228 }
229 } else if(start[0]=='E') {
230 if(start[1]=='N') {
231 biDiClass=U_EUROPEAN_NUMBER;
232 } else if(start[1]=='S') {
233 biDiClass=U_EUROPEAN_NUMBER_SEPARATOR;
234 } else if(start[1]=='T') {
235 biDiClass=U_EUROPEAN_NUMBER_TERMINATOR;
236 }
237 } else if(start[0]=='A') {
238 if(start[1]=='L') {
239 biDiClass=U_RIGHT_TO_LEFT_ARABIC;
240 } else if(start[1]=='N') {
241 biDiClass=U_ARABIC_NUMBER;
242 }
243 } else if(start[0]=='C' && start[1]=='S') {
244 biDiClass=U_COMMON_NUMBER_SEPARATOR;
245 } else if(start[0]=='B') {
246 if(start[1]=='N') {
247 biDiClass=U_BOUNDARY_NEUTRAL;
248 } else {
249 biDiClass=U_BLOCK_SEPARATOR;
250 }
251 } else if(start[0]=='S') {
252 biDiClass=U_SEGMENT_SEPARATOR;
253 } else if(start[0]=='W' && start[1]=='S') {
254 biDiClass=U_WHITE_SPACE_NEUTRAL;
255 } else if(start[0]=='O' && start[1]=='N') {
256 biDiClass=U_OTHER_NEUTRAL;
257 } else if(start[0]=='P' && start[1]=='D' && start[2]=='F') {
258 biDiClass=U_POP_DIRECTIONAL_FORMAT;
259 } else if(start[0]=='N' && start[1]=='S' && start[2]=='M') {
260 biDiClass=U_DIR_NON_SPACING_MARK;
261 }
262 // Now we verify that the class name is terminated properly,
263 // and not just the start of a longer word.
264 int8_t biDiClassNameLength=biDiClassNameLengths[biDiClass];
265 char c=start[biDiClassNameLength];
266 if(biDiClass==U_CHAR_DIRECTION_COUNT || (!U_IS_INV_WHITESPACE(c) && c!=';' && c!=0)) {
267 errln("BiDi class string not recognized at %s", start);
268 return FALSE;
269 }
270 inputString.append(charFromBiDiClass[biDiClass]);
271 start+=biDiClassNameLength;
272 }
273 return TRUE;
274 }
275
276 void BiDiConformanceTest::TestBidiTest() {
277 IcuTestErrorCode errorCode(*this, "TestBidiTest");
278 const char *sourceTestDataPath=getSourceTestData(errorCode);
279 if(errorCode.logIfFailureAndReset("unable to find the source/test/testdata "
280 "folder (getSourceTestData())")) {
281 return;
282 }
283 char bidiTestPath[400];
284 strcpy(bidiTestPath, sourceTestDataPath);
285 strcat(bidiTestPath, "BidiTest.txt");
286 LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r"));
287 if(bidiTestFile.isNull()) {
288 errln("unable to open %s", bidiTestPath);
289 return;
290 }
291 LocalUBiDiPointer ubidi(ubidi_open());
292 ubidi_setClassCallback(ubidi.getAlias(), biDiConfUBiDiClassCallback, NULL,
293 NULL, NULL, errorCode);
294 if(errorCode.logIfFailureAndReset("ubidi_setClassCallback()")) {
295 return;
296 }
297 lineNumber=0;
298 levelsCount=0;
299 orderingCount=0;
300 errorCount=0;
301 while(errorCount<10 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias())!=NULL) {
302 ++lineNumber;
303 // Remove trailing comments and whitespace.
304 char *commentStart=strchr(line, '#');
305 if(commentStart!=NULL) {
306 *commentStart=0;
307 }
308 u_rtrim(line);
309 const char *start=u_skipWhitespace(line);
310 if(*start==0) {
311 continue; // Skip empty and comment-only lines.
312 }
313 if(*start=='@') {
314 ++start;
315 if(0==strncmp(start, "Levels:", 7)) {
316 if(!parseLevels(start+7)) {
317 return;
318 }
319 } else if(0==strncmp(start, "Reorder:", 8)) {
320 if(!parseOrdering(start+8)) {
321 return;
322 }
323 }
324 // Skip unknown @Xyz: ...
325 } else {
326 if(!parseInputStringFromBiDiClasses(start)) {
327 return;
328 }
329 start=u_skipWhitespace(start);
330 if(*start!=';') {
331 errln("missing ; separator on input line %s", line);
332 return;
333 }
334 start=u_skipWhitespace(start+1);
335 char *end;
336 uint32_t bitset=(uint32_t)strtoul(start, &end, 16);
337 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0)) {
338 errln("input bitset parse error at %s", start);
339 return;
340 }
341 // Loop over the bitset.
342 static const UBiDiLevel paraLevels[]={ UBIDI_DEFAULT_LTR, 0, 1, UBIDI_DEFAULT_RTL };
343 static const char *const paraLevelNames[]={ "auto/LTR", "LTR", "RTL", "auto/RTL" };
344 for(int i=0; i<=3; ++i) {
345 if(bitset&(1<<i)) {
346 ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.length(),
347 paraLevels[i], NULL, errorCode);
348 const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode);
349 if(errorCode.logIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()")) {
350 errln("Input line %d: %s", (int)lineNumber, line);
351 return;
352 }
353 if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()),
354 paraLevelNames[i])) {
355 // continue outerLoop; does not exist in C++
356 // so just break out of the inner loop.
357 break;
358 }
359 if(!checkOrdering(ubidi.getAlias(), paraLevelNames[i])) {
360 // continue outerLoop; does not exist in C++
361 // so just break out of the inner loop.
362 break;
363 }
364 }
365 }
366 }
367 }
368 }
369
370 static UChar printLevel(UBiDiLevel level) {
371 if(level<UBIDI_DEFAULT_LTR) {
372 return 0x30+level;
373 } else {
374 return 0x78; // 'x'
375 }
376 }
377
378 static uint32_t getDirectionBits(const UBiDiLevel actualLevels[], int32_t actualCount) {
379 uint32_t actualDirectionBits=0;
380 for(int32_t i=0; i<actualCount; ++i) {
381 actualDirectionBits|=(1<<(actualLevels[i]&1));
382 }
383 return actualDirectionBits;
384 }
385
386 UBool BiDiConformanceTest::checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount,
387 const char *paraLevelName) {
388 UBool isOk=TRUE;
389 if(levelsCount!=actualCount) {
390 errln("Wrong number of level values; expected %d actual %d",
391 (int)levelsCount, (int)actualCount);
392 isOk=FALSE;
393 } else {
394 for(int32_t i=0; i<actualCount; ++i) {
395 if(levels[i]!=actualLevels[i] && levels[i]<UBIDI_DEFAULT_LTR) {
396 if(directionBits!=3 && directionBits==getDirectionBits(actualLevels, actualCount)) {
397 // ICU used a shortcut:
398 // Since the text is unidirectional, it did not store the resolved
399 // levels but just returns all levels as the paragraph level 0 or 1.
400 // The reordering result is the same, so this is fine.
401 break;
402 } else {
403 errln("Wrong level value at index %d; expected %d actual %d",
404 (int)i, levels[i], actualLevels[i]);
405 isOk=FALSE;
406 break;
407 }
408 }
409 }
410 }
411 if(!isOk) {
412 printErrorLine(paraLevelName);
413 UnicodeString els("Expected levels: ");
414 int32_t i;
415 for(i=0; i<levelsCount; ++i) {
416 els.append((UChar)0x20).append(printLevel(levels[i]));
417 }
418 UnicodeString als("Actual levels: ");
419 for(i=0; i<actualCount; ++i) {
420 als.append((UChar)0x20).append(printLevel(actualLevels[i]));
421 }
422 errln(els);
423 errln(als);
424 }
425 return isOk;
426 }
427
428 // Note: ubidi_setReorderingOptions(ubidi, UBIDI_OPTION_REMOVE_CONTROLS);
429 // does not work for custom BiDi class assignments
430 // and anyway also removes LRM/RLM/ZWJ/ZWNJ which is not desirable here.
431 // Therefore we just skip the indexes for BiDi controls while comparing
432 // with the expected ordering that has them omitted.
433 UBool BiDiConformanceTest::checkOrdering(UBiDi *ubidi, const char *paraLevelName) {
434 UBool isOk=TRUE;
435 IcuTestErrorCode errorCode(*this, "TestBidiTest/checkOrdering()");
436 int32_t resultLength=ubidi_getResultLength(ubidi); // visual length including BiDi controls
437 int32_t i, visualIndex;
438 // Note: It should be faster to call ubidi_countRuns()/ubidi_getVisualRun()
439 // and loop over each run's indexes, but that seems unnecessary for this test code.
440 for(i=visualIndex=0; i<resultLength; ++i) {
441 int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode);
442 if(errorCode.logIfFailureAndReset("ubidi_getLogicalIndex()")) {
443 errln("Input line %d: %s", (int)lineNumber, line);
444 return FALSE;
445 }
446 if(levels[logicalIndex]>=UBIDI_DEFAULT_LTR) {
447 continue; // BiDi control, omitted from expected ordering.
448 }
449 if(visualIndex<orderingCount && logicalIndex!=ordering[visualIndex]) {
450 errln("Wrong ordering value at visual index %d; expected %d actual %d",
451 (int)visualIndex, ordering[visualIndex], logicalIndex);
452 isOk=FALSE;
453 break;
454 }
455 ++visualIndex;
456 }
457 // visualIndex is now the visual length minus the BiDi controls,
458 // which should match the length of the BidiTest.txt ordering.
459 if(isOk && orderingCount!=visualIndex) {
460 errln("Wrong number of ordering values; expected %d actual %d",
461 (int)orderingCount, (int)visualIndex);
462 isOk=FALSE;
463 }
464 if(!isOk) {
465 printErrorLine(paraLevelName);
466 UnicodeString eord("Expected ordering: ");
467 for(i=0; i<orderingCount; ++i) {
468 eord.append((UChar)0x20).append((UChar)(0x30+ordering[i]));
469 }
470 UnicodeString aord("Actual ordering: ");
471 for(i=0; i<resultLength; ++i) {
472 int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode);
473 if(levels[logicalIndex]<UBIDI_DEFAULT_LTR) {
474 aord.append((UChar)0x20).append((UChar)(0x30+logicalIndex));
475 }
476 }
477 errln(eord);
478 errln(aord);
479 }
480 return isOk;
481 }
482
483 void BiDiConformanceTest::printErrorLine(const char *paraLevelName) {
484 ++errorCount;
485 errln("Input line %5d: %s", (int)lineNumber, line);
486 errln(UnicodeString("Input string: ")+inputString);
487 errln("Para level: %s", paraLevelName);
488 }