]>
Commit | Line | Data |
---|---|---|
729e4ab9 A |
1 | /* |
2 | ******************************************************************************* | |
3 | * | |
b331163b | 4 | * Copyright (C) 2009-2014, International Business Machines |
729e4ab9 A |
5 | * Corporation and others. All Rights Reserved. |
6 | * | |
7 | ******************************************************************************* | |
8 | * file name: bidiconf.cpp | |
9 | * encoding: US-ASCII | |
10 | * tab size: 8 (not used) | |
11 | * indentation:4 | |
12 | * | |
13 | * created on: 2009oct16 | |
14 | * created by: Markus W. Scherer | |
15 | * | |
57a6839d | 16 | * BiDi conformance test, using the Unicode BidiTest.txt and BidiCharacterTest.txt files. |
729e4ab9 A |
17 | */ |
18 | ||
19 | #include <stdio.h> | |
20 | #include <stdlib.h> | |
21 | #include <string.h> | |
22 | #include "unicode/utypes.h" | |
23 | #include "unicode/ubidi.h" | |
24 | #include "unicode/errorcode.h" | |
25 | #include "unicode/localpointer.h" | |
26 | #include "unicode/putil.h" | |
27 | #include "unicode/unistr.h" | |
28 | #include "intltest.h" | |
29 | #include "uparse.h" | |
30 | ||
31 | class BiDiConformanceTest : public IntlTest { | |
32 | public: | |
33 | BiDiConformanceTest() : | |
34 | directionBits(0), lineNumber(0), levelsCount(0), orderingCount(0), | |
35 | errorCount(0) {} | |
36 | ||
37 | void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL); | |
38 | ||
39 | void TestBidiTest(); | |
57a6839d | 40 | void TestBidiCharacterTest(); |
729e4ab9 | 41 | private: |
57a6839d | 42 | UBool parseLevels(const char *&start); |
729e4ab9 A |
43 | UBool parseOrdering(const char *start); |
44 | UBool parseInputStringFromBiDiClasses(const char *&start); | |
45 | ||
57a6839d A |
46 | UBool checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount); |
47 | UBool checkOrdering(UBiDi *ubidi); | |
729e4ab9 | 48 | |
57a6839d | 49 | void printErrorLine(); |
729e4ab9 A |
50 | |
51 | char line[10000]; | |
52 | UBiDiLevel levels[1000]; | |
53 | uint32_t directionBits; | |
54 | int32_t ordering[1000]; | |
55 | int32_t lineNumber; | |
56 | int32_t levelsCount; | |
57 | int32_t orderingCount; | |
58 | int32_t errorCount; | |
59 | UnicodeString inputString; | |
57a6839d A |
60 | const char *paraLevelName; |
61 | char levelNameString[12]; | |
729e4ab9 A |
62 | }; |
63 | ||
64 | extern IntlTest *createBiDiConformanceTest() { | |
65 | return new BiDiConformanceTest(); | |
66 | } | |
67 | ||
68 | void BiDiConformanceTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { | |
69 | if(exec) { | |
70 | logln("TestSuite BiDiConformanceTest: "); | |
71 | } | |
57a6839d A |
72 | TESTCASE_AUTO_BEGIN; |
73 | TESTCASE_AUTO(TestBidiTest); | |
74 | TESTCASE_AUTO(TestBidiCharacterTest); | |
75 | TESTCASE_AUTO_END; | |
729e4ab9 A |
76 | } |
77 | ||
729e4ab9 A |
78 | U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose); |
79 | ||
57a6839d | 80 | UBool BiDiConformanceTest::parseLevels(const char *&start) { |
729e4ab9 A |
81 | directionBits=0; |
82 | levelsCount=0; | |
57a6839d | 83 | while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') { |
729e4ab9 A |
84 | if(*start=='x') { |
85 | levels[levelsCount++]=UBIDI_DEFAULT_LTR; | |
86 | ++start; | |
87 | } else { | |
88 | char *end; | |
89 | uint32_t value=(uint32_t)strtoul(start, &end, 10); | |
57a6839d A |
90 | if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0 && *end!=';') |
91 | || value>(UBIDI_MAX_EXPLICIT_LEVEL+1)) { | |
92 | errln("\nError on line %d: Levels parse error at %s", (int)lineNumber, start); | |
93 | printErrorLine(); | |
729e4ab9 A |
94 | return FALSE; |
95 | } | |
96 | levels[levelsCount++]=(UBiDiLevel)value; | |
97 | directionBits|=(1<<(value&1)); | |
98 | start=end; | |
99 | } | |
100 | } | |
101 | return TRUE; | |
102 | } | |
103 | ||
104 | UBool BiDiConformanceTest::parseOrdering(const char *start) { | |
105 | orderingCount=0; | |
57a6839d | 106 | while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') { |
729e4ab9 A |
107 | char *end; |
108 | uint32_t value=(uint32_t)strtoul(start, &end, 10); | |
57a6839d A |
109 | if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0 && *end!=';') || value>=1000) { |
110 | errln("\nError on line %d: Reorder parse error at %s", (int)lineNumber, start); | |
111 | printErrorLine(); | |
729e4ab9 A |
112 | return FALSE; |
113 | } | |
114 | ordering[orderingCount++]=(int32_t)value; | |
115 | start=end; | |
116 | } | |
117 | return TRUE; | |
118 | } | |
119 | ||
120 | static const UChar charFromBiDiClass[U_CHAR_DIRECTION_COUNT]={ | |
121 | 0x6c, // 'l' for L | |
122 | 0x52, // 'R' for R | |
123 | 0x33, // '3' for EN | |
124 | 0x2d, // '-' for ES | |
125 | 0x25, // '%' for ET | |
126 | 0x39, // '9' for AN | |
127 | 0x2c, // ',' for CS | |
128 | 0x2f, // '/' for B | |
129 | 0x5f, // '_' for S | |
130 | 0x20, // ' ' for WS | |
131 | 0x3d, // '=' for ON | |
132 | 0x65, // 'e' for LRE | |
133 | 0x6f, // 'o' for LRO | |
134 | 0x41, // 'A' for AL | |
135 | 0x45, // 'E' for RLE | |
136 | 0x4f, // 'O' for RLO | |
137 | 0x2a, // '*' for PDF | |
138 | 0x60, // '`' for NSM | |
57a6839d A |
139 | 0x7c, // '|' for BN |
140 | // new in Unicode 6.3/ICU 52 | |
141 | 0x53, // 'S' for FSI | |
142 | 0x69, // 'i' for LRI | |
143 | 0x49, // 'I' for RLI | |
144 | 0x2e // '.' for PDI | |
729e4ab9 A |
145 | }; |
146 | ||
147 | U_CDECL_BEGIN | |
148 | ||
149 | static UCharDirection U_CALLCONV | |
150 | biDiConfUBiDiClassCallback(const void * /*context*/, UChar32 c) { | |
151 | for(int i=0; i<U_CHAR_DIRECTION_COUNT; ++i) { | |
152 | if(c==charFromBiDiClass[i]) { | |
153 | return (UCharDirection)i; | |
154 | } | |
155 | } | |
156 | // Character not in our hardcoded table. | |
157 | // Should not occur during testing. | |
158 | return U_BIDI_CLASS_DEFAULT; | |
159 | } | |
160 | ||
161 | U_CDECL_END | |
162 | ||
163 | static const int8_t biDiClassNameLengths[U_CHAR_DIRECTION_COUNT+1]={ | |
57a6839d | 164 | 1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 3, 2, 3, 3, 3, 3, 2, 3, 3, 3, 3, 0 |
729e4ab9 A |
165 | }; |
166 | ||
167 | UBool BiDiConformanceTest::parseInputStringFromBiDiClasses(const char *&start) { | |
168 | inputString.remove(); | |
169 | /* | |
170 | * Lengthy but fast BiDi class parser. | |
171 | * A simple parser could terminate or extract the name string and use | |
172 | * int32_t biDiClassInt=u_getPropertyValueEnum(UCHAR_BIDI_CLASS, bidiClassString); | |
173 | * but that makes this test take significantly more time. | |
174 | */ | |
175 | while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') { | |
176 | UCharDirection biDiClass=U_CHAR_DIRECTION_COUNT; | |
177 | // Compare each character once until we have a match on | |
178 | // a complete, short BiDi class name. | |
179 | if(start[0]=='L') { | |
180 | if(start[1]=='R') { | |
181 | if(start[2]=='E') { | |
182 | biDiClass=U_LEFT_TO_RIGHT_EMBEDDING; | |
57a6839d A |
183 | } else if(start[2]=='I') { |
184 | biDiClass=U_LEFT_TO_RIGHT_ISOLATE; | |
729e4ab9 A |
185 | } else if(start[2]=='O') { |
186 | biDiClass=U_LEFT_TO_RIGHT_OVERRIDE; | |
187 | } | |
188 | } else { | |
189 | biDiClass=U_LEFT_TO_RIGHT; | |
190 | } | |
191 | } else if(start[0]=='R') { | |
192 | if(start[1]=='L') { | |
193 | if(start[2]=='E') { | |
194 | biDiClass=U_RIGHT_TO_LEFT_EMBEDDING; | |
57a6839d A |
195 | } else if(start[2]=='I') { |
196 | biDiClass=U_RIGHT_TO_LEFT_ISOLATE; | |
729e4ab9 A |
197 | } else if(start[2]=='O') { |
198 | biDiClass=U_RIGHT_TO_LEFT_OVERRIDE; | |
199 | } | |
200 | } else { | |
201 | biDiClass=U_RIGHT_TO_LEFT; | |
202 | } | |
203 | } else if(start[0]=='E') { | |
204 | if(start[1]=='N') { | |
205 | biDiClass=U_EUROPEAN_NUMBER; | |
206 | } else if(start[1]=='S') { | |
207 | biDiClass=U_EUROPEAN_NUMBER_SEPARATOR; | |
208 | } else if(start[1]=='T') { | |
209 | biDiClass=U_EUROPEAN_NUMBER_TERMINATOR; | |
210 | } | |
211 | } else if(start[0]=='A') { | |
212 | if(start[1]=='L') { | |
213 | biDiClass=U_RIGHT_TO_LEFT_ARABIC; | |
214 | } else if(start[1]=='N') { | |
215 | biDiClass=U_ARABIC_NUMBER; | |
216 | } | |
217 | } else if(start[0]=='C' && start[1]=='S') { | |
218 | biDiClass=U_COMMON_NUMBER_SEPARATOR; | |
219 | } else if(start[0]=='B') { | |
220 | if(start[1]=='N') { | |
221 | biDiClass=U_BOUNDARY_NEUTRAL; | |
222 | } else { | |
223 | biDiClass=U_BLOCK_SEPARATOR; | |
224 | } | |
225 | } else if(start[0]=='S') { | |
226 | biDiClass=U_SEGMENT_SEPARATOR; | |
227 | } else if(start[0]=='W' && start[1]=='S') { | |
228 | biDiClass=U_WHITE_SPACE_NEUTRAL; | |
229 | } else if(start[0]=='O' && start[1]=='N') { | |
230 | biDiClass=U_OTHER_NEUTRAL; | |
57a6839d A |
231 | } else if(start[0]=='P' && start[1]=='D') { |
232 | if(start[2]=='F') { | |
233 | biDiClass=U_POP_DIRECTIONAL_FORMAT; | |
234 | } else if(start[2]=='I') { | |
235 | biDiClass=U_POP_DIRECTIONAL_ISOLATE; | |
236 | } | |
729e4ab9 A |
237 | } else if(start[0]=='N' && start[1]=='S' && start[2]=='M') { |
238 | biDiClass=U_DIR_NON_SPACING_MARK; | |
57a6839d A |
239 | } else if(start[0]=='F' && start[1]=='S' && start[2]=='I') { |
240 | biDiClass=U_FIRST_STRONG_ISOLATE; | |
729e4ab9 A |
241 | } |
242 | // Now we verify that the class name is terminated properly, | |
243 | // and not just the start of a longer word. | |
244 | int8_t biDiClassNameLength=biDiClassNameLengths[biDiClass]; | |
245 | char c=start[biDiClassNameLength]; | |
57a6839d A |
246 | if(biDiClass<U_CHAR_DIRECTION_COUNT && (U_IS_INV_WHITESPACE(c) || c==';' || c==0)) { |
247 | inputString.append(charFromBiDiClass[biDiClass]); | |
248 | start+=biDiClassNameLength; | |
249 | continue; | |
729e4ab9 | 250 | } |
57a6839d A |
251 | errln("\nError on line %d: BiDi class string not recognized at %s", (int)lineNumber, start); |
252 | printErrorLine(); | |
253 | return FALSE; | |
729e4ab9 A |
254 | } |
255 | return TRUE; | |
256 | } | |
257 | ||
258 | void BiDiConformanceTest::TestBidiTest() { | |
259 | IcuTestErrorCode errorCode(*this, "TestBidiTest"); | |
260 | const char *sourceTestDataPath=getSourceTestData(errorCode); | |
261 | if(errorCode.logIfFailureAndReset("unable to find the source/test/testdata " | |
262 | "folder (getSourceTestData())")) { | |
263 | return; | |
264 | } | |
265 | char bidiTestPath[400]; | |
266 | strcpy(bidiTestPath, sourceTestDataPath); | |
267 | strcat(bidiTestPath, "BidiTest.txt"); | |
268 | LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r")); | |
269 | if(bidiTestFile.isNull()) { | |
270 | errln("unable to open %s", bidiTestPath); | |
271 | return; | |
272 | } | |
273 | LocalUBiDiPointer ubidi(ubidi_open()); | |
274 | ubidi_setClassCallback(ubidi.getAlias(), biDiConfUBiDiClassCallback, NULL, | |
275 | NULL, NULL, errorCode); | |
276 | if(errorCode.logIfFailureAndReset("ubidi_setClassCallback()")) { | |
277 | return; | |
278 | } | |
279 | lineNumber=0; | |
280 | levelsCount=0; | |
281 | orderingCount=0; | |
282 | errorCount=0; | |
b331163b A |
283 | // paraLevelName must be initialized in case the first non-comment line is in error |
284 | paraLevelName="N/A"; | |
729e4ab9 A |
285 | while(errorCount<10 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias())!=NULL) { |
286 | ++lineNumber; | |
287 | // Remove trailing comments and whitespace. | |
288 | char *commentStart=strchr(line, '#'); | |
289 | if(commentStart!=NULL) { | |
290 | *commentStart=0; | |
291 | } | |
292 | u_rtrim(line); | |
293 | const char *start=u_skipWhitespace(line); | |
294 | if(*start==0) { | |
295 | continue; // Skip empty and comment-only lines. | |
296 | } | |
297 | if(*start=='@') { | |
298 | ++start; | |
299 | if(0==strncmp(start, "Levels:", 7)) { | |
57a6839d A |
300 | start+=7; |
301 | if(!parseLevels(start)) { | |
729e4ab9 A |
302 | return; |
303 | } | |
304 | } else if(0==strncmp(start, "Reorder:", 8)) { | |
305 | if(!parseOrdering(start+8)) { | |
306 | return; | |
307 | } | |
308 | } | |
309 | // Skip unknown @Xyz: ... | |
310 | } else { | |
311 | if(!parseInputStringFromBiDiClasses(start)) { | |
312 | return; | |
313 | } | |
314 | start=u_skipWhitespace(start); | |
315 | if(*start!=';') { | |
316 | errln("missing ; separator on input line %s", line); | |
317 | return; | |
318 | } | |
319 | start=u_skipWhitespace(start+1); | |
320 | char *end; | |
321 | uint32_t bitset=(uint32_t)strtoul(start, &end, 16); | |
322 | if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0)) { | |
323 | errln("input bitset parse error at %s", start); | |
324 | return; | |
325 | } | |
326 | // Loop over the bitset. | |
327 | static const UBiDiLevel paraLevels[]={ UBIDI_DEFAULT_LTR, 0, 1, UBIDI_DEFAULT_RTL }; | |
328 | static const char *const paraLevelNames[]={ "auto/LTR", "LTR", "RTL", "auto/RTL" }; | |
329 | for(int i=0; i<=3; ++i) { | |
330 | if(bitset&(1<<i)) { | |
331 | ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.length(), | |
332 | paraLevels[i], NULL, errorCode); | |
333 | const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode); | |
334 | if(errorCode.logIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()")) { | |
335 | errln("Input line %d: %s", (int)lineNumber, line); | |
336 | return; | |
337 | } | |
57a6839d A |
338 | paraLevelName=paraLevelNames[i]; |
339 | if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()))) { | |
729e4ab9 A |
340 | // continue outerLoop; does not exist in C++ |
341 | // so just break out of the inner loop. | |
342 | break; | |
343 | } | |
57a6839d | 344 | if(!checkOrdering(ubidi.getAlias())) { |
729e4ab9 A |
345 | // continue outerLoop; does not exist in C++ |
346 | // so just break out of the inner loop. | |
347 | break; | |
348 | } | |
349 | } | |
350 | } | |
351 | } | |
352 | } | |
353 | } | |
354 | ||
57a6839d A |
355 | /* |
356 | ******************************************************************************* | |
357 | * | |
358 | * created on: 2013jul01 | |
359 | * created by: Matitiahu Allouche | |
360 | ||
361 | This function performs a conformance test for implementations of the | |
362 | Unicode Bidirectional Algorithm, specified in UAX #9: Unicode | |
363 | Bidirectional Algorithm, at http://www.unicode.org/unicode/reports/tr9/ | |
364 | ||
365 | Each test case is represented in a single line which is read from a file | |
366 | named BidiCharacter.txt. Empty, blank and comment lines may also appear | |
367 | in this file. | |
368 | ||
369 | The format of the test data is specified below. Note that each test | |
370 | case constitutes a single line of text; reordering is applied within a | |
371 | single line and independently of a rendering engine, and rules L3 and L4 | |
372 | are out of scope. | |
373 | ||
374 | The number sign '#' is the comment character: everything is ignored from | |
375 | the occurrence of '#' until the end of the line, | |
376 | Empty lines and lines containing only spaces and/or comments are ignored. | |
377 | ||
378 | Lines which represent test cases consist of 4 or 5 fields separated by a | |
379 | semicolon. Each field consists of tokens separated by whitespace (space | |
380 | or Tab). Whitespace before and after semicolons is optional. | |
381 | ||
382 | Field 0: A sequence of hexadecimal code point values separated by space | |
383 | ||
384 | Field 1: A value representing the paragraph direction, as follows: | |
385 | - 0 represents left-to-right | |
386 | - 1 represents right-to-left | |
387 | - 2 represents auto-LTR according to rules P2 and P3 of the algorithm | |
388 | - 3 represents auto-RTL according to rules P2 and P3 of the algorithm | |
389 | - a negative number whose absolute value is taken as paragraph level; | |
390 | this may be useful to test cases where the embedding level approaches | |
391 | or exceeds the maximum embedding level. | |
392 | ||
393 | Field 2: The resolved paragraph embedding level. If the input (field 0) | |
394 | includes more than one paragraph, this field represents the | |
395 | resolved level of the first paragraph. | |
396 | ||
397 | Field 3: An ordered list of resulting levels for each token in field 0 | |
398 | (each token represents one source character). | |
399 | The UBA does not assign levels to certain characters (e.g. LRO); | |
400 | characters removed in rule X9 are indicated with an 'x'. | |
401 | ||
402 | Field 4: An ordered list of indices showing the resulting visual ordering | |
403 | from left to right; characters with a resolved level of 'x' are | |
404 | skipped. The number are zero-based. Each index corresponds to | |
405 | a character in the reordered (visual) string. It represents the | |
406 | index of the source character in the input (field 0). | |
407 | This field is optional. When it is absent, the visual ordering | |
408 | is not verified. | |
409 | ||
410 | Examples: | |
411 | ||
412 | # This is a comment line. | |
413 | L L ON R ; 0 ; 0 ; 0 0 0 1 ; 0 1 2 3 | |
414 | L L ON R;0;0;0 0 0 1;0 1 2 3 | |
415 | ||
416 | # Note: in the next line, 'B' represents a block separator, not the letter 'B'. | |
417 | LRE A B C PDF;2;0;x 2 0 0 x;1 2 3 | |
418 | # Note: in the next line, 'b' represents the letter 'b', not a block separator. | |
419 | a b c 05d0 05d1 x ; 0 ; 0 ; 0 0 0 1 1 0 ; 0 1 2 4 3 5 | |
420 | ||
421 | a R R x ; 1 ; 1 ; 2 1 1 2 | |
422 | L L R R R B R R L L L B ON ON ; 3 ; 0 ; 0 0 1 1 1 0 1 1 2 2 2 1 1 1 | |
423 | ||
424 | * | |
425 | ******************************************************************************* | |
426 | */ | |
427 | void BiDiConformanceTest::TestBidiCharacterTest() { | |
428 | IcuTestErrorCode errorCode(*this, "TestBidiCharacterTest"); | |
429 | const char *sourceTestDataPath=getSourceTestData(errorCode); | |
430 | if(errorCode.logIfFailureAndReset("unable to find the source/test/testdata " | |
431 | "folder (getSourceTestData())")) { | |
432 | return; | |
433 | } | |
434 | char bidiTestPath[400]; | |
435 | strcpy(bidiTestPath, sourceTestDataPath); | |
436 | strcat(bidiTestPath, "BidiCharacterTest.txt"); | |
437 | LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r")); | |
438 | if(bidiTestFile.isNull()) { | |
439 | errln("unable to open %s", bidiTestPath); | |
440 | return; | |
441 | } | |
442 | LocalUBiDiPointer ubidi(ubidi_open()); | |
443 | lineNumber=0; | |
444 | levelsCount=0; | |
445 | orderingCount=0; | |
446 | errorCount=0; | |
447 | while(errorCount<20 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias())!=NULL) { | |
448 | ++lineNumber; | |
449 | paraLevelName="N/A"; | |
450 | inputString="N/A"; | |
451 | // Remove trailing comments and whitespace. | |
452 | char *commentStart=strchr(line, '#'); | |
453 | if(commentStart!=NULL) { | |
454 | *commentStart=0; | |
455 | } | |
456 | u_rtrim(line); | |
457 | const char *start=u_skipWhitespace(line); | |
458 | if(*start==0) { | |
459 | continue; // Skip empty and comment-only lines. | |
460 | } | |
461 | // Parse the code point string in field 0. | |
462 | UChar *buffer=inputString.getBuffer(200); | |
463 | int32_t length=u_parseString(start, buffer, inputString.getCapacity(), NULL, errorCode); | |
464 | if(errorCode.logIfFailureAndReset("Invalid string in field 0")) { | |
465 | errln("Input line %d: %s", (int)lineNumber, line); | |
466 | inputString.remove(); | |
467 | continue; | |
468 | } | |
469 | inputString.releaseBuffer(length); | |
470 | start=strchr(start, ';'); | |
471 | if(start==NULL) { | |
472 | errorCount++; | |
473 | errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line); | |
474 | continue; | |
475 | } | |
476 | start=u_skipWhitespace(start+1); | |
477 | char *end; | |
478 | int32_t paraDirection=(int32_t)strtol(start, &end, 10); | |
479 | UBiDiLevel paraLevel=UBIDI_MAX_EXPLICIT_LEVEL+2; | |
480 | if(paraDirection==0) { | |
481 | paraLevel=0; | |
482 | paraLevelName="LTR"; | |
483 | } | |
484 | else if(paraDirection==1) { | |
485 | paraLevel=1; | |
486 | paraLevelName="RTL"; | |
487 | } | |
488 | else if(paraDirection==2) { | |
489 | paraLevel=UBIDI_DEFAULT_LTR; | |
490 | paraLevelName="Auto/LTR"; | |
491 | } | |
492 | else if(paraDirection==3) { | |
493 | paraLevel=UBIDI_DEFAULT_RTL; | |
494 | paraLevelName="Auto/RTL"; | |
495 | } | |
496 | else if(paraDirection<0 && -paraDirection<=(UBIDI_MAX_EXPLICIT_LEVEL+1)) { | |
497 | paraLevel=(UBiDiLevel)(-paraDirection); | |
498 | sprintf(levelNameString, "%d", (int)paraLevel); | |
499 | paraLevelName=levelNameString; | |
500 | } | |
501 | if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) || | |
502 | paraLevel==(UBIDI_MAX_EXPLICIT_LEVEL+2)) { | |
503 | errln("\nError on line %d: Input paragraph direction incorrect at %s", (int)lineNumber, start); | |
504 | printErrorLine(); | |
505 | continue; | |
506 | } | |
507 | start=u_skipWhitespace(end); | |
508 | if(*start!=';') { | |
509 | errorCount++; | |
510 | errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line); | |
511 | continue; | |
512 | } | |
513 | start++; | |
514 | uint32_t resolvedParaLevel=(uint32_t)strtoul(start, &end, 10); | |
515 | if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) || | |
516 | resolvedParaLevel>1) { | |
517 | errln("\nError on line %d: Resolved paragraph level incorrect at %s", (int)lineNumber, start); | |
518 | printErrorLine(); | |
519 | continue; | |
520 | } | |
521 | start=u_skipWhitespace(end); | |
522 | if(*start!=';') { | |
523 | errorCount++; | |
524 | errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line); | |
525 | return; | |
526 | } | |
527 | start++; | |
528 | if(!parseLevels(start)) { | |
529 | continue; | |
530 | } | |
531 | start=u_skipWhitespace(start); | |
532 | if(*start==';') { | |
533 | if(!parseOrdering(start+1)) { | |
534 | continue; | |
535 | } | |
536 | } | |
537 | else | |
538 | orderingCount=-1; | |
539 | ||
540 | ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.length(), | |
541 | paraLevel, NULL, errorCode); | |
542 | const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode); | |
543 | if(errorCode.logIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()")) { | |
544 | errln("Input line %d: %s", (int)lineNumber, line); | |
545 | continue; | |
546 | } | |
547 | UBiDiLevel actualLevel; | |
548 | if((actualLevel=ubidi_getParaLevel(ubidi.getAlias()))!=resolvedParaLevel) { | |
549 | printErrorLine(); | |
550 | errln("\nError on line %d: Wrong resolved paragraph level; expected %d actual %d", | |
551 | (int)lineNumber, resolvedParaLevel, actualLevel); | |
552 | continue; | |
553 | } | |
554 | if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()))) { | |
555 | continue; | |
556 | } | |
557 | if(orderingCount>=0 && !checkOrdering(ubidi.getAlias())) { | |
558 | continue; | |
559 | } | |
560 | } | |
561 | } | |
562 | ||
729e4ab9 A |
563 | static UChar printLevel(UBiDiLevel level) { |
564 | if(level<UBIDI_DEFAULT_LTR) { | |
565 | return 0x30+level; | |
566 | } else { | |
567 | return 0x78; // 'x' | |
568 | } | |
569 | } | |
570 | ||
571 | static uint32_t getDirectionBits(const UBiDiLevel actualLevels[], int32_t actualCount) { | |
572 | uint32_t actualDirectionBits=0; | |
573 | for(int32_t i=0; i<actualCount; ++i) { | |
574 | actualDirectionBits|=(1<<(actualLevels[i]&1)); | |
575 | } | |
576 | return actualDirectionBits; | |
577 | } | |
578 | ||
57a6839d | 579 | UBool BiDiConformanceTest::checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount) { |
729e4ab9 A |
580 | UBool isOk=TRUE; |
581 | if(levelsCount!=actualCount) { | |
57a6839d A |
582 | errln("\nError on line %d: Wrong number of level values; expected %d actual %d", |
583 | (int)lineNumber, (int)levelsCount, (int)actualCount); | |
729e4ab9 A |
584 | isOk=FALSE; |
585 | } else { | |
586 | for(int32_t i=0; i<actualCount; ++i) { | |
587 | if(levels[i]!=actualLevels[i] && levels[i]<UBIDI_DEFAULT_LTR) { | |
588 | if(directionBits!=3 && directionBits==getDirectionBits(actualLevels, actualCount)) { | |
589 | // ICU used a shortcut: | |
590 | // Since the text is unidirectional, it did not store the resolved | |
591 | // levels but just returns all levels as the paragraph level 0 or 1. | |
592 | // The reordering result is the same, so this is fine. | |
593 | break; | |
594 | } else { | |
57a6839d A |
595 | errln("\nError on line %d: Wrong level value at index %d; expected %d actual %d", |
596 | (int)lineNumber, (int)i, levels[i], actualLevels[i]); | |
729e4ab9 A |
597 | isOk=FALSE; |
598 | break; | |
599 | } | |
600 | } | |
601 | } | |
602 | } | |
603 | if(!isOk) { | |
57a6839d | 604 | printErrorLine(); |
729e4ab9 A |
605 | UnicodeString els("Expected levels: "); |
606 | int32_t i; | |
607 | for(i=0; i<levelsCount; ++i) { | |
608 | els.append((UChar)0x20).append(printLevel(levels[i])); | |
609 | } | |
610 | UnicodeString als("Actual levels: "); | |
611 | for(i=0; i<actualCount; ++i) { | |
612 | als.append((UChar)0x20).append(printLevel(actualLevels[i])); | |
613 | } | |
614 | errln(els); | |
615 | errln(als); | |
616 | } | |
617 | return isOk; | |
618 | } | |
619 | ||
620 | // Note: ubidi_setReorderingOptions(ubidi, UBIDI_OPTION_REMOVE_CONTROLS); | |
621 | // does not work for custom BiDi class assignments | |
622 | // and anyway also removes LRM/RLM/ZWJ/ZWNJ which is not desirable here. | |
623 | // Therefore we just skip the indexes for BiDi controls while comparing | |
624 | // with the expected ordering that has them omitted. | |
57a6839d | 625 | UBool BiDiConformanceTest::checkOrdering(UBiDi *ubidi) { |
729e4ab9 | 626 | UBool isOk=TRUE; |
57a6839d | 627 | IcuTestErrorCode errorCode(*this, "checkOrdering()"); |
729e4ab9 A |
628 | int32_t resultLength=ubidi_getResultLength(ubidi); // visual length including BiDi controls |
629 | int32_t i, visualIndex; | |
630 | // Note: It should be faster to call ubidi_countRuns()/ubidi_getVisualRun() | |
631 | // and loop over each run's indexes, but that seems unnecessary for this test code. | |
632 | for(i=visualIndex=0; i<resultLength; ++i) { | |
633 | int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode); | |
634 | if(errorCode.logIfFailureAndReset("ubidi_getLogicalIndex()")) { | |
635 | errln("Input line %d: %s", (int)lineNumber, line); | |
636 | return FALSE; | |
637 | } | |
638 | if(levels[logicalIndex]>=UBIDI_DEFAULT_LTR) { | |
639 | continue; // BiDi control, omitted from expected ordering. | |
640 | } | |
641 | if(visualIndex<orderingCount && logicalIndex!=ordering[visualIndex]) { | |
57a6839d A |
642 | errln("\nError on line %d: Wrong ordering value at visual index %d; expected %d actual %d", |
643 | (int)lineNumber, (int)visualIndex, ordering[visualIndex], logicalIndex); | |
729e4ab9 A |
644 | isOk=FALSE; |
645 | break; | |
646 | } | |
647 | ++visualIndex; | |
648 | } | |
649 | // visualIndex is now the visual length minus the BiDi controls, | |
650 | // which should match the length of the BidiTest.txt ordering. | |
651 | if(isOk && orderingCount!=visualIndex) { | |
57a6839d A |
652 | errln("\nError on line %d: Wrong number of ordering values; expected %d actual %d", |
653 | (int)lineNumber, (int)orderingCount, (int)visualIndex); | |
729e4ab9 A |
654 | isOk=FALSE; |
655 | } | |
656 | if(!isOk) { | |
57a6839d | 657 | printErrorLine(); |
729e4ab9 A |
658 | UnicodeString eord("Expected ordering: "); |
659 | for(i=0; i<orderingCount; ++i) { | |
660 | eord.append((UChar)0x20).append((UChar)(0x30+ordering[i])); | |
661 | } | |
662 | UnicodeString aord("Actual ordering: "); | |
663 | for(i=0; i<resultLength; ++i) { | |
664 | int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode); | |
665 | if(levels[logicalIndex]<UBIDI_DEFAULT_LTR) { | |
666 | aord.append((UChar)0x20).append((UChar)(0x30+logicalIndex)); | |
667 | } | |
668 | } | |
669 | errln(eord); | |
670 | errln(aord); | |
671 | } | |
672 | return isOk; | |
673 | } | |
674 | ||
57a6839d | 675 | void BiDiConformanceTest::printErrorLine() { |
729e4ab9 A |
676 | ++errorCount; |
677 | errln("Input line %5d: %s", (int)lineNumber, line); | |
678 | errln(UnicodeString("Input string: ")+inputString); | |
679 | errln("Para level: %s", paraLevelName); | |
680 | } |