2 *******************************************************************************
4 * Copyright (C) 2009-2010, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: bidiconf.cpp
10 * tab size: 8 (not used)
13 * created on: 2009oct16
14 * created by: Markus W. Scherer
16 * BiDi conformance test, using the Unicode BidiTest.txt file.
22 #include "unicode/utypes.h"
23 #include "unicode/ubidi.h"
24 #include "unicode/errorcode.h"
25 #include "unicode/localpointer.h"
26 #include "unicode/putil.h"
27 #include "unicode/unistr.h"
31 class BiDiConformanceTest
: public IntlTest
{
33 BiDiConformanceTest() :
34 directionBits(0), lineNumber(0), levelsCount(0), orderingCount(0),
37 void runIndexedTest(int32_t index
, UBool exec
, const char *&name
, char *par
=NULL
);
41 char *getUnidataPath(char path
[]);
43 UBool
parseLevels(const char *start
);
44 UBool
parseOrdering(const char *start
);
45 UBool
parseInputStringFromBiDiClasses(const char *&start
);
47 UBool
checkLevels(const UBiDiLevel actualLevels
[], int32_t actualCount
,
48 const char *paraLevelName
);
49 UBool
checkOrdering(UBiDi
*ubidi
, const char *paraLevelName
);
51 void printErrorLine(const char *paraLevelName
);
54 UBiDiLevel levels
[1000];
55 uint32_t directionBits
;
56 int32_t ordering
[1000];
59 int32_t orderingCount
;
61 UnicodeString inputString
;
64 extern IntlTest
*createBiDiConformanceTest() {
65 return new BiDiConformanceTest();
68 void BiDiConformanceTest::runIndexedTest(int32_t index
, UBool exec
, const char *&name
, char * /*par*/) {
70 logln("TestSuite BiDiConformanceTest: ");
73 TESTCASE(0, TestBidiTest
);
76 break; // needed to end the loop
80 // TODO: Move to a common place (IntlTest?) to avoid duplication with UnicodeTest (ucdtest.cpp).
81 char *BiDiConformanceTest::getUnidataPath(char path
[]) {
82 IcuTestErrorCode
errorCode(*this, "getUnidataPath");
83 const int kUnicodeDataTxtLength
=15; // strlen("UnicodeData.txt")
85 // Look inside ICU_DATA first.
86 strcpy(path
, pathToDataDirectory());
87 strcat(path
, "unidata" U_FILE_SEP_STRING
"UnicodeData.txt");
88 FILE *f
=fopen(path
, "r");
91 *(strchr(path
, 0)-kUnicodeDataTxtLength
)=0; // Remove the basename.
95 // As a fallback, try to guess where the source data was located
96 // at the time ICU was built, and look there.
98 strcpy(path
, U_TOPSRCDIR U_FILE_SEP_STRING
"data");
100 strcpy(path
, loadTestData(errorCode
));
101 strcat(path
, U_FILE_SEP_STRING
".." U_FILE_SEP_STRING
".."
102 U_FILE_SEP_STRING
".." U_FILE_SEP_STRING
".."
103 U_FILE_SEP_STRING
"data");
105 strcat(path
, U_FILE_SEP_STRING
);
106 strcat(path
, "unidata" U_FILE_SEP_STRING
"UnicodeData.txt");
110 *(strchr(path
, 0)-kUnicodeDataTxtLength
)=0; // Remove the basename.
116 U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer
, FILE, fclose
);
118 UBool
BiDiConformanceTest::parseLevels(const char *start
) {
121 while(*start
!=0 && *(start
=u_skipWhitespace(start
))!=0) {
123 levels
[levelsCount
++]=UBIDI_DEFAULT_LTR
;
127 uint32_t value
=(uint32_t)strtoul(start
, &end
, 10);
128 if(end
<=start
|| (!U_IS_INV_WHITESPACE(*end
) && *end
!=0) || value
>(UBIDI_MAX_EXPLICIT_LEVEL
+1)) {
129 errln("@Levels: parse error at %s", start
);
132 levels
[levelsCount
++]=(UBiDiLevel
)value
;
133 directionBits
|=(1<<(value
&1));
140 UBool
BiDiConformanceTest::parseOrdering(const char *start
) {
142 while(*start
!=0 && *(start
=u_skipWhitespace(start
))!=0) {
144 uint32_t value
=(uint32_t)strtoul(start
, &end
, 10);
145 if(end
<=start
|| (!U_IS_INV_WHITESPACE(*end
) && *end
!=0) || value
>=1000) {
146 errln("@Reorder: parse error at %s", start
);
149 ordering
[orderingCount
++]=(int32_t)value
;
155 static const UChar charFromBiDiClass
[U_CHAR_DIRECTION_COUNT
]={
179 static UCharDirection U_CALLCONV
180 biDiConfUBiDiClassCallback(const void * /*context*/, UChar32 c
) {
181 for(int i
=0; i
<U_CHAR_DIRECTION_COUNT
; ++i
) {
182 if(c
==charFromBiDiClass
[i
]) {
183 return (UCharDirection
)i
;
186 // Character not in our hardcoded table.
187 // Should not occur during testing.
188 return U_BIDI_CLASS_DEFAULT
;
193 static const int8_t biDiClassNameLengths
[U_CHAR_DIRECTION_COUNT
+1]={
194 1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 3, 2, 3, 3, 3, 3, 2, 0
197 UBool
BiDiConformanceTest::parseInputStringFromBiDiClasses(const char *&start
) {
198 inputString
.remove();
200 * Lengthy but fast BiDi class parser.
201 * A simple parser could terminate or extract the name string and use
202 * int32_t biDiClassInt=u_getPropertyValueEnum(UCHAR_BIDI_CLASS, bidiClassString);
203 * but that makes this test take significantly more time.
205 while(*start
!=0 && *(start
=u_skipWhitespace(start
))!=0 && *start
!=';') {
206 UCharDirection biDiClass
=U_CHAR_DIRECTION_COUNT
;
207 // Compare each character once until we have a match on
208 // a complete, short BiDi class name.
212 biDiClass
=U_LEFT_TO_RIGHT_EMBEDDING
;
213 } else if(start
[2]=='O') {
214 biDiClass
=U_LEFT_TO_RIGHT_OVERRIDE
;
217 biDiClass
=U_LEFT_TO_RIGHT
;
219 } else if(start
[0]=='R') {
222 biDiClass
=U_RIGHT_TO_LEFT_EMBEDDING
;
223 } else if(start
[2]=='O') {
224 biDiClass
=U_RIGHT_TO_LEFT_OVERRIDE
;
227 biDiClass
=U_RIGHT_TO_LEFT
;
229 } else if(start
[0]=='E') {
231 biDiClass
=U_EUROPEAN_NUMBER
;
232 } else if(start
[1]=='S') {
233 biDiClass
=U_EUROPEAN_NUMBER_SEPARATOR
;
234 } else if(start
[1]=='T') {
235 biDiClass
=U_EUROPEAN_NUMBER_TERMINATOR
;
237 } else if(start
[0]=='A') {
239 biDiClass
=U_RIGHT_TO_LEFT_ARABIC
;
240 } else if(start
[1]=='N') {
241 biDiClass
=U_ARABIC_NUMBER
;
243 } else if(start
[0]=='C' && start
[1]=='S') {
244 biDiClass
=U_COMMON_NUMBER_SEPARATOR
;
245 } else if(start
[0]=='B') {
247 biDiClass
=U_BOUNDARY_NEUTRAL
;
249 biDiClass
=U_BLOCK_SEPARATOR
;
251 } else if(start
[0]=='S') {
252 biDiClass
=U_SEGMENT_SEPARATOR
;
253 } else if(start
[0]=='W' && start
[1]=='S') {
254 biDiClass
=U_WHITE_SPACE_NEUTRAL
;
255 } else if(start
[0]=='O' && start
[1]=='N') {
256 biDiClass
=U_OTHER_NEUTRAL
;
257 } else if(start
[0]=='P' && start
[1]=='D' && start
[2]=='F') {
258 biDiClass
=U_POP_DIRECTIONAL_FORMAT
;
259 } else if(start
[0]=='N' && start
[1]=='S' && start
[2]=='M') {
260 biDiClass
=U_DIR_NON_SPACING_MARK
;
262 // Now we verify that the class name is terminated properly,
263 // and not just the start of a longer word.
264 int8_t biDiClassNameLength
=biDiClassNameLengths
[biDiClass
];
265 char c
=start
[biDiClassNameLength
];
266 if(biDiClass
==U_CHAR_DIRECTION_COUNT
|| (!U_IS_INV_WHITESPACE(c
) && c
!=';' && c
!=0)) {
267 errln("BiDi class string not recognized at %s", start
);
270 inputString
.append(charFromBiDiClass
[biDiClass
]);
271 start
+=biDiClassNameLength
;
276 void BiDiConformanceTest::TestBidiTest() {
277 IcuTestErrorCode
errorCode(*this, "TestBidiTest");
278 const char *sourceTestDataPath
=getSourceTestData(errorCode
);
279 if(errorCode
.logIfFailureAndReset("unable to find the source/test/testdata "
280 "folder (getSourceTestData())")) {
283 char bidiTestPath
[400];
284 strcpy(bidiTestPath
, sourceTestDataPath
);
285 strcat(bidiTestPath
, "BidiTest.txt");
286 LocalStdioFilePointer
bidiTestFile(fopen(bidiTestPath
, "r"));
287 if(bidiTestFile
.isNull()) {
288 errln("unable to open %s", bidiTestPath
);
291 LocalUBiDiPointer
ubidi(ubidi_open());
292 ubidi_setClassCallback(ubidi
.getAlias(), biDiConfUBiDiClassCallback
, NULL
,
293 NULL
, NULL
, errorCode
);
294 if(errorCode
.logIfFailureAndReset("ubidi_setClassCallback()")) {
301 while(errorCount
<10 && fgets(line
, (int)sizeof(line
), bidiTestFile
.getAlias())!=NULL
) {
303 // Remove trailing comments and whitespace.
304 char *commentStart
=strchr(line
, '#');
305 if(commentStart
!=NULL
) {
309 const char *start
=u_skipWhitespace(line
);
311 continue; // Skip empty and comment-only lines.
315 if(0==strncmp(start
, "Levels:", 7)) {
316 if(!parseLevels(start
+7)) {
319 } else if(0==strncmp(start
, "Reorder:", 8)) {
320 if(!parseOrdering(start
+8)) {
324 // Skip unknown @Xyz: ...
326 if(!parseInputStringFromBiDiClasses(start
)) {
329 start
=u_skipWhitespace(start
);
331 errln("missing ; separator on input line %s", line
);
334 start
=u_skipWhitespace(start
+1);
336 uint32_t bitset
=(uint32_t)strtoul(start
, &end
, 16);
337 if(end
<=start
|| (!U_IS_INV_WHITESPACE(*end
) && *end
!=';' && *end
!=0)) {
338 errln("input bitset parse error at %s", start
);
341 // Loop over the bitset.
342 static const UBiDiLevel paraLevels
[]={ UBIDI_DEFAULT_LTR
, 0, 1, UBIDI_DEFAULT_RTL
};
343 static const char *const paraLevelNames
[]={ "auto/LTR", "LTR", "RTL", "auto/RTL" };
344 for(int i
=0; i
<=3; ++i
) {
346 ubidi_setPara(ubidi
.getAlias(), inputString
.getBuffer(), inputString
.length(),
347 paraLevels
[i
], NULL
, errorCode
);
348 const UBiDiLevel
*actualLevels
=ubidi_getLevels(ubidi
.getAlias(), errorCode
);
349 if(errorCode
.logIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()")) {
350 errln("Input line %d: %s", (int)lineNumber
, line
);
353 if(!checkLevels(actualLevels
, ubidi_getProcessedLength(ubidi
.getAlias()),
354 paraLevelNames
[i
])) {
355 // continue outerLoop; does not exist in C++
356 // so just break out of the inner loop.
359 if(!checkOrdering(ubidi
.getAlias(), paraLevelNames
[i
])) {
360 // continue outerLoop; does not exist in C++
361 // so just break out of the inner loop.
370 static UChar
printLevel(UBiDiLevel level
) {
371 if(level
<UBIDI_DEFAULT_LTR
) {
378 static uint32_t getDirectionBits(const UBiDiLevel actualLevels
[], int32_t actualCount
) {
379 uint32_t actualDirectionBits
=0;
380 for(int32_t i
=0; i
<actualCount
; ++i
) {
381 actualDirectionBits
|=(1<<(actualLevels
[i
]&1));
383 return actualDirectionBits
;
386 UBool
BiDiConformanceTest::checkLevels(const UBiDiLevel actualLevels
[], int32_t actualCount
,
387 const char *paraLevelName
) {
389 if(levelsCount
!=actualCount
) {
390 errln("Wrong number of level values; expected %d actual %d",
391 (int)levelsCount
, (int)actualCount
);
394 for(int32_t i
=0; i
<actualCount
; ++i
) {
395 if(levels
[i
]!=actualLevels
[i
] && levels
[i
]<UBIDI_DEFAULT_LTR
) {
396 if(directionBits
!=3 && directionBits
==getDirectionBits(actualLevels
, actualCount
)) {
397 // ICU used a shortcut:
398 // Since the text is unidirectional, it did not store the resolved
399 // levels but just returns all levels as the paragraph level 0 or 1.
400 // The reordering result is the same, so this is fine.
403 errln("Wrong level value at index %d; expected %d actual %d",
404 (int)i
, levels
[i
], actualLevels
[i
]);
412 printErrorLine(paraLevelName
);
413 UnicodeString
els("Expected levels: ");
415 for(i
=0; i
<levelsCount
; ++i
) {
416 els
.append((UChar
)0x20).append(printLevel(levels
[i
]));
418 UnicodeString
als("Actual levels: ");
419 for(i
=0; i
<actualCount
; ++i
) {
420 als
.append((UChar
)0x20).append(printLevel(actualLevels
[i
]));
428 // Note: ubidi_setReorderingOptions(ubidi, UBIDI_OPTION_REMOVE_CONTROLS);
429 // does not work for custom BiDi class assignments
430 // and anyway also removes LRM/RLM/ZWJ/ZWNJ which is not desirable here.
431 // Therefore we just skip the indexes for BiDi controls while comparing
432 // with the expected ordering that has them omitted.
433 UBool
BiDiConformanceTest::checkOrdering(UBiDi
*ubidi
, const char *paraLevelName
) {
435 IcuTestErrorCode
errorCode(*this, "TestBidiTest/checkOrdering()");
436 int32_t resultLength
=ubidi_getResultLength(ubidi
); // visual length including BiDi controls
437 int32_t i
, visualIndex
;
438 // Note: It should be faster to call ubidi_countRuns()/ubidi_getVisualRun()
439 // and loop over each run's indexes, but that seems unnecessary for this test code.
440 for(i
=visualIndex
=0; i
<resultLength
; ++i
) {
441 int32_t logicalIndex
=ubidi_getLogicalIndex(ubidi
, i
, errorCode
);
442 if(errorCode
.logIfFailureAndReset("ubidi_getLogicalIndex()")) {
443 errln("Input line %d: %s", (int)lineNumber
, line
);
446 if(levels
[logicalIndex
]>=UBIDI_DEFAULT_LTR
) {
447 continue; // BiDi control, omitted from expected ordering.
449 if(visualIndex
<orderingCount
&& logicalIndex
!=ordering
[visualIndex
]) {
450 errln("Wrong ordering value at visual index %d; expected %d actual %d",
451 (int)visualIndex
, ordering
[visualIndex
], logicalIndex
);
457 // visualIndex is now the visual length minus the BiDi controls,
458 // which should match the length of the BidiTest.txt ordering.
459 if(isOk
&& orderingCount
!=visualIndex
) {
460 errln("Wrong number of ordering values; expected %d actual %d",
461 (int)orderingCount
, (int)visualIndex
);
465 printErrorLine(paraLevelName
);
466 UnicodeString
eord("Expected ordering: ");
467 for(i
=0; i
<orderingCount
; ++i
) {
468 eord
.append((UChar
)0x20).append((UChar
)(0x30+ordering
[i
]));
470 UnicodeString
aord("Actual ordering: ");
471 for(i
=0; i
<resultLength
; ++i
) {
472 int32_t logicalIndex
=ubidi_getLogicalIndex(ubidi
, i
, errorCode
);
473 if(levels
[logicalIndex
]<UBIDI_DEFAULT_LTR
) {
474 aord
.append((UChar
)0x20).append((UChar
)(0x30+logicalIndex
));
483 void BiDiConformanceTest::printErrorLine(const char *paraLevelName
) {
485 errln("Input line %5d: %s", (int)lineNumber
, line
);
486 errln(UnicodeString("Input string: ")+inputString
);
487 errln("Para level: %s", paraLevelName
);