]>
Commit | Line | Data |
---|---|---|
729e4ab9 A |
1 | /* |
2 | ******************************************************************************* | |
3 | * | |
b331163b | 4 | * Copyright (C) 2009-2014, International Business Machines |
729e4ab9 A |
5 | * Corporation and others. All Rights Reserved. |
6 | * | |
7 | ******************************************************************************* | |
8 | * file name: bidiconf.cpp | |
9 | * encoding: US-ASCII | |
10 | * tab size: 8 (not used) | |
11 | * indentation:4 | |
12 | * | |
13 | * created on: 2009oct16 | |
14 | * created by: Markus W. Scherer | |
15 | * | |
57a6839d | 16 | * BiDi conformance test, using the Unicode BidiTest.txt and BidiCharacterTest.txt files. |
729e4ab9 A |
17 | */ |
18 | ||
19 | #include <stdio.h> | |
20 | #include <stdlib.h> | |
21 | #include <string.h> | |
22 | #include "unicode/utypes.h" | |
23 | #include "unicode/ubidi.h" | |
24 | #include "unicode/errorcode.h" | |
25 | #include "unicode/localpointer.h" | |
26 | #include "unicode/putil.h" | |
27 | #include "unicode/unistr.h" | |
28 | #include "intltest.h" | |
29 | #include "uparse.h" | |
30 | ||
31 | class BiDiConformanceTest : public IntlTest { | |
32 | public: | |
33 | BiDiConformanceTest() : | |
34 | directionBits(0), lineNumber(0), levelsCount(0), orderingCount(0), | |
35 | errorCount(0) {} | |
36 | ||
37 | void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL); | |
38 | ||
39 | void TestBidiTest(); | |
57a6839d | 40 | void TestBidiCharacterTest(); |
729e4ab9 | 41 | private: |
57a6839d | 42 | UBool parseLevels(const char *&start); |
729e4ab9 A |
43 | UBool parseOrdering(const char *start); |
44 | UBool parseInputStringFromBiDiClasses(const char *&start); | |
45 | ||
57a6839d A |
46 | UBool checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount); |
47 | UBool checkOrdering(UBiDi *ubidi); | |
729e4ab9 | 48 | |
57a6839d | 49 | void printErrorLine(); |
729e4ab9 A |
50 | |
51 | char line[10000]; | |
52 | UBiDiLevel levels[1000]; | |
53 | uint32_t directionBits; | |
54 | int32_t ordering[1000]; | |
55 | int32_t lineNumber; | |
56 | int32_t levelsCount; | |
57 | int32_t orderingCount; | |
58 | int32_t errorCount; | |
59 | UnicodeString inputString; | |
57a6839d A |
60 | const char *paraLevelName; |
61 | char levelNameString[12]; | |
729e4ab9 A |
62 | }; |
63 | ||
64 | extern IntlTest *createBiDiConformanceTest() { | |
65 | return new BiDiConformanceTest(); | |
66 | } | |
67 | ||
68 | void BiDiConformanceTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { | |
69 | if(exec) { | |
70 | logln("TestSuite BiDiConformanceTest: "); | |
71 | } | |
57a6839d A |
72 | TESTCASE_AUTO_BEGIN; |
73 | TESTCASE_AUTO(TestBidiTest); | |
74 | TESTCASE_AUTO(TestBidiCharacterTest); | |
75 | TESTCASE_AUTO_END; | |
729e4ab9 A |
76 | } |
77 | ||
729e4ab9 A |
78 | U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose); |
79 | ||
57a6839d | 80 | UBool BiDiConformanceTest::parseLevels(const char *&start) { |
729e4ab9 A |
81 | directionBits=0; |
82 | levelsCount=0; | |
57a6839d | 83 | while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') { |
729e4ab9 A |
84 | if(*start=='x') { |
85 | levels[levelsCount++]=UBIDI_DEFAULT_LTR; | |
86 | ++start; | |
87 | } else { | |
88 | char *end; | |
89 | uint32_t value=(uint32_t)strtoul(start, &end, 10); | |
57a6839d A |
90 | if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0 && *end!=';') |
91 | || value>(UBIDI_MAX_EXPLICIT_LEVEL+1)) { | |
92 | errln("\nError on line %d: Levels parse error at %s", (int)lineNumber, start); | |
93 | printErrorLine(); | |
729e4ab9 A |
94 | return FALSE; |
95 | } | |
96 | levels[levelsCount++]=(UBiDiLevel)value; | |
97 | directionBits|=(1<<(value&1)); | |
98 | start=end; | |
99 | } | |
100 | } | |
101 | return TRUE; | |
102 | } | |
103 | ||
104 | UBool BiDiConformanceTest::parseOrdering(const char *start) { | |
105 | orderingCount=0; | |
57a6839d | 106 | while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') { |
729e4ab9 A |
107 | char *end; |
108 | uint32_t value=(uint32_t)strtoul(start, &end, 10); | |
57a6839d A |
109 | if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0 && *end!=';') || value>=1000) { |
110 | errln("\nError on line %d: Reorder parse error at %s", (int)lineNumber, start); | |
111 | printErrorLine(); | |
729e4ab9 A |
112 | return FALSE; |
113 | } | |
114 | ordering[orderingCount++]=(int32_t)value; | |
115 | start=end; | |
116 | } | |
117 | return TRUE; | |
118 | } | |
119 | ||
120 | static const UChar charFromBiDiClass[U_CHAR_DIRECTION_COUNT]={ | |
121 | 0x6c, // 'l' for L | |
122 | 0x52, // 'R' for R | |
123 | 0x33, // '3' for EN | |
124 | 0x2d, // '-' for ES | |
125 | 0x25, // '%' for ET | |
126 | 0x39, // '9' for AN | |
127 | 0x2c, // ',' for CS | |
128 | 0x2f, // '/' for B | |
129 | 0x5f, // '_' for S | |
130 | 0x20, // ' ' for WS | |
131 | 0x3d, // '=' for ON | |
132 | 0x65, // 'e' for LRE | |
133 | 0x6f, // 'o' for LRO | |
134 | 0x41, // 'A' for AL | |
135 | 0x45, // 'E' for RLE | |
136 | 0x4f, // 'O' for RLO | |
137 | 0x2a, // '*' for PDF | |
138 | 0x60, // '`' for NSM | |
57a6839d A |
139 | 0x7c, // '|' for BN |
140 | // new in Unicode 6.3/ICU 52 | |
141 | 0x53, // 'S' for FSI | |
142 | 0x69, // 'i' for LRI | |
143 | 0x49, // 'I' for RLI | |
144 | 0x2e // '.' for PDI | |
729e4ab9 A |
145 | }; |
146 | ||
147 | U_CDECL_BEGIN | |
148 | ||
149 | static UCharDirection U_CALLCONV | |
150 | biDiConfUBiDiClassCallback(const void * /*context*/, UChar32 c) { | |
151 | for(int i=0; i<U_CHAR_DIRECTION_COUNT; ++i) { | |
152 | if(c==charFromBiDiClass[i]) { | |
153 | return (UCharDirection)i; | |
154 | } | |
155 | } | |
156 | // Character not in our hardcoded table. | |
157 | // Should not occur during testing. | |
158 | return U_BIDI_CLASS_DEFAULT; | |
159 | } | |
160 | ||
161 | U_CDECL_END | |
162 | ||
163 | static const int8_t biDiClassNameLengths[U_CHAR_DIRECTION_COUNT+1]={ | |
57a6839d | 164 | 1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 3, 2, 3, 3, 3, 3, 2, 3, 3, 3, 3, 0 |
729e4ab9 A |
165 | }; |
166 | ||
167 | UBool BiDiConformanceTest::parseInputStringFromBiDiClasses(const char *&start) { | |
168 | inputString.remove(); | |
169 | /* | |
170 | * Lengthy but fast BiDi class parser. | |
171 | * A simple parser could terminate or extract the name string and use | |
172 | * int32_t biDiClassInt=u_getPropertyValueEnum(UCHAR_BIDI_CLASS, bidiClassString); | |
173 | * but that makes this test take significantly more time. | |
174 | */ | |
175 | while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') { | |
176 | UCharDirection biDiClass=U_CHAR_DIRECTION_COUNT; | |
177 | // Compare each character once until we have a match on | |
178 | // a complete, short BiDi class name. | |
179 | if(start[0]=='L') { | |
180 | if(start[1]=='R') { | |
181 | if(start[2]=='E') { | |
182 | biDiClass=U_LEFT_TO_RIGHT_EMBEDDING; | |
57a6839d A |
183 | } else if(start[2]=='I') { |
184 | biDiClass=U_LEFT_TO_RIGHT_ISOLATE; | |
729e4ab9 A |
185 | } else if(start[2]=='O') { |
186 | biDiClass=U_LEFT_TO_RIGHT_OVERRIDE; | |
187 | } | |
188 | } else { | |
189 | biDiClass=U_LEFT_TO_RIGHT; | |
190 | } | |
191 | } else if(start[0]=='R') { | |
192 | if(start[1]=='L') { | |
193 | if(start[2]=='E') { | |
194 | biDiClass=U_RIGHT_TO_LEFT_EMBEDDING; | |
57a6839d A |
195 | } else if(start[2]=='I') { |
196 | biDiClass=U_RIGHT_TO_LEFT_ISOLATE; | |
729e4ab9 A |
197 | } else if(start[2]=='O') { |
198 | biDiClass=U_RIGHT_TO_LEFT_OVERRIDE; | |
199 | } | |
200 | } else { | |
201 | biDiClass=U_RIGHT_TO_LEFT; | |
202 | } | |
203 | } else if(start[0]=='E') { | |
204 | if(start[1]=='N') { | |
205 | biDiClass=U_EUROPEAN_NUMBER; | |
206 | } else if(start[1]=='S') { | |
207 | biDiClass=U_EUROPEAN_NUMBER_SEPARATOR; | |
208 | } else if(start[1]=='T') { | |
209 | biDiClass=U_EUROPEAN_NUMBER_TERMINATOR; | |
210 | } | |
211 | } else if(start[0]=='A') { | |
212 | if(start[1]=='L') { | |
213 | biDiClass=U_RIGHT_TO_LEFT_ARABIC; | |
214 | } else if(start[1]=='N') { | |
215 | biDiClass=U_ARABIC_NUMBER; | |
216 | } | |
217 | } else if(start[0]=='C' && start[1]=='S') { | |
218 | biDiClass=U_COMMON_NUMBER_SEPARATOR; | |
219 | } else if(start[0]=='B') { | |
220 | if(start[1]=='N') { | |
221 | biDiClass=U_BOUNDARY_NEUTRAL; | |
222 | } else { | |
223 | biDiClass=U_BLOCK_SEPARATOR; | |
224 | } | |
225 | } else if(start[0]=='S') { | |
226 | biDiClass=U_SEGMENT_SEPARATOR; | |
227 | } else if(start[0]=='W' && start[1]=='S') { | |
228 | biDiClass=U_WHITE_SPACE_NEUTRAL; | |
229 | } else if(start[0]=='O' && start[1]=='N') { | |
230 | biDiClass=U_OTHER_NEUTRAL; | |
57a6839d A |
231 | } else if(start[0]=='P' && start[1]=='D') { |
232 | if(start[2]=='F') { | |
233 | biDiClass=U_POP_DIRECTIONAL_FORMAT; | |
234 | } else if(start[2]=='I') { | |
235 | biDiClass=U_POP_DIRECTIONAL_ISOLATE; | |
236 | } | |
729e4ab9 A |
237 | } else if(start[0]=='N' && start[1]=='S' && start[2]=='M') { |
238 | biDiClass=U_DIR_NON_SPACING_MARK; | |
57a6839d A |
239 | } else if(start[0]=='F' && start[1]=='S' && start[2]=='I') { |
240 | biDiClass=U_FIRST_STRONG_ISOLATE; | |
729e4ab9 A |
241 | } |
242 | // Now we verify that the class name is terminated properly, | |
243 | // and not just the start of a longer word. | |
244 | int8_t biDiClassNameLength=biDiClassNameLengths[biDiClass]; | |
245 | char c=start[biDiClassNameLength]; | |
57a6839d A |
246 | if(biDiClass<U_CHAR_DIRECTION_COUNT && (U_IS_INV_WHITESPACE(c) || c==';' || c==0)) { |
247 | inputString.append(charFromBiDiClass[biDiClass]); | |
248 | start+=biDiClassNameLength; | |
249 | continue; | |
729e4ab9 | 250 | } |
57a6839d A |
251 | errln("\nError on line %d: BiDi class string not recognized at %s", (int)lineNumber, start); |
252 | printErrorLine(); | |
253 | return FALSE; | |
729e4ab9 A |
254 | } |
255 | return TRUE; | |
256 | } | |
257 | ||
258 | void BiDiConformanceTest::TestBidiTest() { | |
259 | IcuTestErrorCode errorCode(*this, "TestBidiTest"); | |
260 | const char *sourceTestDataPath=getSourceTestData(errorCode); | |
261 | if(errorCode.logIfFailureAndReset("unable to find the source/test/testdata " | |
262 | "folder (getSourceTestData())")) { | |
263 | return; | |
264 | } | |
265 | char bidiTestPath[400]; | |
266 | strcpy(bidiTestPath, sourceTestDataPath); | |
267 | strcat(bidiTestPath, "BidiTest.txt"); | |
268 | LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r")); | |
269 | if(bidiTestFile.isNull()) { | |
270 | errln("unable to open %s", bidiTestPath); | |
271 | return; | |
272 | } | |
273 | LocalUBiDiPointer ubidi(ubidi_open()); | |
274 | ubidi_setClassCallback(ubidi.getAlias(), biDiConfUBiDiClassCallback, NULL, | |
275 | NULL, NULL, errorCode); | |
276 | if(errorCode.logIfFailureAndReset("ubidi_setClassCallback()")) { | |
277 | return; | |
278 | } | |
279 | lineNumber=0; | |
280 | levelsCount=0; | |
281 | orderingCount=0; | |
282 | errorCount=0; | |
b331163b A |
283 | // paraLevelName must be initialized in case the first non-comment line is in error |
284 | paraLevelName="N/A"; | |
729e4ab9 A |
285 | while(errorCount<10 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias())!=NULL) { |
286 | ++lineNumber; | |
287 | // Remove trailing comments and whitespace. | |
288 | char *commentStart=strchr(line, '#'); | |
289 | if(commentStart!=NULL) { | |
290 | *commentStart=0; | |
291 | } | |
292 | u_rtrim(line); | |
293 | const char *start=u_skipWhitespace(line); | |
294 | if(*start==0) { | |
295 | continue; // Skip empty and comment-only lines. | |
296 | } | |
297 | if(*start=='@') { | |
298 | ++start; | |
299 | if(0==strncmp(start, "Levels:", 7)) { | |
57a6839d A |
300 | start+=7; |
301 | if(!parseLevels(start)) { | |
729e4ab9 A |
302 | return; |
303 | } | |
304 | } else if(0==strncmp(start, "Reorder:", 8)) { | |
305 | if(!parseOrdering(start+8)) { | |
306 | return; | |
307 | } | |
308 | } | |
309 | // Skip unknown @Xyz: ... | |
310 | } else { | |
311 | if(!parseInputStringFromBiDiClasses(start)) { | |
312 | return; | |
313 | } | |
314 | start=u_skipWhitespace(start); | |
315 | if(*start!=';') { | |
316 | errln("missing ; separator on input line %s", line); | |
317 | return; | |
318 | } | |
319 | start=u_skipWhitespace(start+1); | |
320 | char *end; | |
321 | uint32_t bitset=(uint32_t)strtoul(start, &end, 16); | |
322 | if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0)) { | |
323 | errln("input bitset parse error at %s", start); | |
324 | return; | |
325 | } | |
326 | // Loop over the bitset. | |
327 | static const UBiDiLevel paraLevels[]={ UBIDI_DEFAULT_LTR, 0, 1, UBIDI_DEFAULT_RTL }; | |
328 | static const char *const paraLevelNames[]={ "auto/LTR", "LTR", "RTL", "auto/RTL" }; | |
329 | for(int i=0; i<=3; ++i) { | |
330 | if(bitset&(1<<i)) { | |
331 | ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.length(), | |
332 | paraLevels[i], NULL, errorCode); | |
333 | const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode); | |
334 | if(errorCode.logIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()")) { | |
335 | errln("Input line %d: %s", (int)lineNumber, line); | |
336 | return; | |
337 | } | |
57a6839d A |
338 | paraLevelName=paraLevelNames[i]; |
339 | if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()))) { | |
729e4ab9 A |
340 | // continue outerLoop; does not exist in C++ |
341 | // so just break out of the inner loop. | |
342 | break; | |
343 | } | |
57a6839d | 344 | if(!checkOrdering(ubidi.getAlias())) { |
729e4ab9 A |
345 | // continue outerLoop; does not exist in C++ |
346 | // so just break out of the inner loop. | |
347 | break; | |
348 | } | |
349 | } | |
350 | } | |
351 | } | |
352 | } | |
353 | } | |
354 | ||
57a6839d A |
355 | /* |
356 | ******************************************************************************* | |
357 | * | |
358 | * created on: 2013jul01 | |
359 | * created by: Matitiahu Allouche | |
360 | ||
361 | This function performs a conformance test for implementations of the | |
362 | Unicode Bidirectional Algorithm, specified in UAX #9: Unicode | |
363 | Bidirectional Algorithm, at http://www.unicode.org/unicode/reports/tr9/ | |
364 | ||
365 | Each test case is represented in a single line which is read from a file | |
366 | named BidiCharacter.txt. Empty, blank and comment lines may also appear | |
367 | in this file. | |
368 | ||
369 | The format of the test data is specified below. Note that each test | |
370 | case constitutes a single line of text; reordering is applied within a | |
371 | single line and independently of a rendering engine, and rules L3 and L4 | |
372 | are out of scope. | |
373 | ||
374 | The number sign '#' is the comment character: everything is ignored from | |
375 | the occurrence of '#' until the end of the line, | |
376 | Empty lines and lines containing only spaces and/or comments are ignored. | |
377 | ||
378 | Lines which represent test cases consist of 4 or 5 fields separated by a | |
379 | semicolon. Each field consists of tokens separated by whitespace (space | |
380 | or Tab). Whitespace before and after semicolons is optional. | |
381 | ||
382 | Field 0: A sequence of hexadecimal code point values separated by space | |
383 | ||
384 | Field 1: A value representing the paragraph direction, as follows: | |
385 | - 0 represents left-to-right | |
386 | - 1 represents right-to-left | |
387 | - 2 represents auto-LTR according to rules P2 and P3 of the algorithm | |
388 | - 3 represents auto-RTL according to rules P2 and P3 of the algorithm | |
389 | - a negative number whose absolute value is taken as paragraph level; | |
390 | this may be useful to test cases where the embedding level approaches | |
391 | or exceeds the maximum embedding level. | |
392 | ||
393 | Field 2: The resolved paragraph embedding level. If the input (field 0) | |
394 | includes more than one paragraph, this field represents the | |
395 | resolved level of the first paragraph. | |
396 | ||
397 | Field 3: An ordered list of resulting levels for each token in field 0 | |
398 | (each token represents one source character). | |
399 | The UBA does not assign levels to certain characters (e.g. LRO); | |
400 | characters removed in rule X9 are indicated with an 'x'. | |
401 | ||
402 | Field 4: An ordered list of indices showing the resulting visual ordering | |
403 | from left to right; characters with a resolved level of 'x' are | |
404 | skipped. The number are zero-based. Each index corresponds to | |
405 | a character in the reordered (visual) string. It represents the | |
406 | index of the source character in the input (field 0). | |
407 | This field is optional. When it is absent, the visual ordering | |
408 | is not verified. | |
409 | ||
410 | Examples: | |
411 | ||
412 | # This is a comment line. | |
413 | L L ON R ; 0 ; 0 ; 0 0 0 1 ; 0 1 2 3 | |
414 | L L ON R;0;0;0 0 0 1;0 1 2 3 | |
415 | ||
416 | # Note: in the next line, 'B' represents a block separator, not the letter 'B'. | |
417 | LRE A B C PDF;2;0;x 2 0 0 x;1 2 3 | |
418 | # Note: in the next line, 'b' represents the letter 'b', not a block separator. | |
419 | a b c 05d0 05d1 x ; 0 ; 0 ; 0 0 0 1 1 0 ; 0 1 2 4 3 5 | |
420 | ||
421 | a R R x ; 1 ; 1 ; 2 1 1 2 | |
422 | L L R R R B R R L L L B ON ON ; 3 ; 0 ; 0 0 1 1 1 0 1 1 2 2 2 1 1 1 | |
423 | ||
424 | * | |
425 | ******************************************************************************* | |
426 | */ | |
2ca993e8 A |
427 | enum { kMaxUtxt = 32, kMaxUctl = 16 }; |
428 | ||
57a6839d A |
429 | void BiDiConformanceTest::TestBidiCharacterTest() { |
430 | IcuTestErrorCode errorCode(*this, "TestBidiCharacterTest"); | |
431 | const char *sourceTestDataPath=getSourceTestData(errorCode); | |
432 | if(errorCode.logIfFailureAndReset("unable to find the source/test/testdata " | |
433 | "folder (getSourceTestData())")) { | |
434 | return; | |
435 | } | |
436 | char bidiTestPath[400]; | |
437 | strcpy(bidiTestPath, sourceTestDataPath); | |
438 | strcat(bidiTestPath, "BidiCharacterTest.txt"); | |
439 | LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r")); | |
440 | if(bidiTestFile.isNull()) { | |
441 | errln("unable to open %s", bidiTestPath); | |
442 | return; | |
443 | } | |
444 | LocalUBiDiPointer ubidi(ubidi_open()); | |
445 | lineNumber=0; | |
446 | levelsCount=0; | |
447 | orderingCount=0; | |
448 | errorCount=0; | |
449 | while(errorCount<20 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias())!=NULL) { | |
450 | ++lineNumber; | |
451 | paraLevelName="N/A"; | |
452 | inputString="N/A"; | |
453 | // Remove trailing comments and whitespace. | |
454 | char *commentStart=strchr(line, '#'); | |
455 | if(commentStart!=NULL) { | |
456 | *commentStart=0; | |
457 | } | |
458 | u_rtrim(line); | |
459 | const char *start=u_skipWhitespace(line); | |
460 | if(*start==0) { | |
461 | continue; // Skip empty and comment-only lines. | |
462 | } | |
463 | // Parse the code point string in field 0. | |
464 | UChar *buffer=inputString.getBuffer(200); | |
465 | int32_t length=u_parseString(start, buffer, inputString.getCapacity(), NULL, errorCode); | |
466 | if(errorCode.logIfFailureAndReset("Invalid string in field 0")) { | |
467 | errln("Input line %d: %s", (int)lineNumber, line); | |
468 | inputString.remove(); | |
469 | continue; | |
470 | } | |
471 | inputString.releaseBuffer(length); | |
472 | start=strchr(start, ';'); | |
473 | if(start==NULL) { | |
474 | errorCount++; | |
475 | errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line); | |
476 | continue; | |
477 | } | |
478 | start=u_skipWhitespace(start+1); | |
479 | char *end; | |
480 | int32_t paraDirection=(int32_t)strtol(start, &end, 10); | |
481 | UBiDiLevel paraLevel=UBIDI_MAX_EXPLICIT_LEVEL+2; | |
482 | if(paraDirection==0) { | |
483 | paraLevel=0; | |
484 | paraLevelName="LTR"; | |
485 | } | |
486 | else if(paraDirection==1) { | |
487 | paraLevel=1; | |
488 | paraLevelName="RTL"; | |
489 | } | |
490 | else if(paraDirection==2) { | |
491 | paraLevel=UBIDI_DEFAULT_LTR; | |
492 | paraLevelName="Auto/LTR"; | |
493 | } | |
494 | else if(paraDirection==3) { | |
495 | paraLevel=UBIDI_DEFAULT_RTL; | |
496 | paraLevelName="Auto/RTL"; | |
497 | } | |
498 | else if(paraDirection<0 && -paraDirection<=(UBIDI_MAX_EXPLICIT_LEVEL+1)) { | |
499 | paraLevel=(UBiDiLevel)(-paraDirection); | |
500 | sprintf(levelNameString, "%d", (int)paraLevel); | |
501 | paraLevelName=levelNameString; | |
502 | } | |
503 | if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) || | |
504 | paraLevel==(UBIDI_MAX_EXPLICIT_LEVEL+2)) { | |
505 | errln("\nError on line %d: Input paragraph direction incorrect at %s", (int)lineNumber, start); | |
506 | printErrorLine(); | |
507 | continue; | |
508 | } | |
509 | start=u_skipWhitespace(end); | |
510 | if(*start!=';') { | |
511 | errorCount++; | |
512 | errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line); | |
513 | continue; | |
514 | } | |
515 | start++; | |
516 | uint32_t resolvedParaLevel=(uint32_t)strtoul(start, &end, 10); | |
517 | if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) || | |
518 | resolvedParaLevel>1) { | |
519 | errln("\nError on line %d: Resolved paragraph level incorrect at %s", (int)lineNumber, start); | |
520 | printErrorLine(); | |
521 | continue; | |
522 | } | |
523 | start=u_skipWhitespace(end); | |
524 | if(*start!=';') { | |
525 | errorCount++; | |
526 | errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line); | |
527 | return; | |
528 | } | |
529 | start++; | |
530 | if(!parseLevels(start)) { | |
531 | continue; | |
532 | } | |
533 | start=u_skipWhitespace(start); | |
534 | if(*start==';') { | |
535 | if(!parseOrdering(start+1)) { | |
536 | continue; | |
537 | } | |
538 | } | |
539 | else | |
540 | orderingCount=-1; | |
541 | ||
542 | ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.length(), | |
543 | paraLevel, NULL, errorCode); | |
544 | const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode); | |
545 | if(errorCode.logIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()")) { | |
546 | errln("Input line %d: %s", (int)lineNumber, line); | |
547 | continue; | |
548 | } | |
549 | UBiDiLevel actualLevel; | |
550 | if((actualLevel=ubidi_getParaLevel(ubidi.getAlias()))!=resolvedParaLevel) { | |
551 | printErrorLine(); | |
552 | errln("\nError on line %d: Wrong resolved paragraph level; expected %d actual %d", | |
553 | (int)lineNumber, resolvedParaLevel, actualLevel); | |
554 | continue; | |
555 | } | |
556 | if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()))) { | |
557 | continue; | |
558 | } | |
559 | if(orderingCount>=0 && !checkOrdering(ubidi.getAlias())) { | |
560 | continue; | |
561 | } | |
2ca993e8 A |
562 | |
563 | // tests for ubidi_setParaWithControls | |
564 | // skip 2 tests known not to work (out of 91678 cases, though | |
565 | // only 86 of those tests use controls so 2.3% of those failing), | |
566 | // still investigating these | |
567 | if (lineNumber==210 || lineNumber==211) { | |
568 | continue; | |
569 | } | |
570 | ||
571 | const UChar* ubufPtr = inputString.getBuffer(); | |
572 | int32_t ubufIdx; | |
573 | UChar utxt[kMaxUtxt]; | |
574 | UBiDiLevel ulev[kMaxUtxt]; | |
575 | int32_t offsets[kMaxUctl]; | |
576 | UChar* uctlPtrs[kMaxUctl]; | |
577 | UChar uctl[kMaxUctl][5]; | |
578 | UChar *uctlPtr; | |
579 | int32_t utxtLen = 0, offsetsLen = 0, ctlLen = 0; | |
580 | UBool fail = FALSE; | |
581 | for (ubufIdx = 0; ubufIdx < inputString.length(); ubufIdx++) { | |
582 | UChar uc = ubufPtr[ubufIdx]; | |
583 | if ( (uc >=0x202A && uc<=0x202E) || (uc >=0x2066 && uc<=0x2069) ) { | |
584 | // have a bidi control | |
585 | if (ctlLen >= 4) { | |
586 | fail = TRUE; break; | |
587 | } | |
588 | if (ctlLen == 0) { | |
589 | // starting a new control sequence | |
590 | if (offsetsLen >= kMaxUctl) { | |
591 | fail = TRUE; break; | |
592 | } | |
593 | offsets[offsetsLen] = utxtLen; | |
594 | uctlPtr = &uctl[offsetsLen][0]; | |
595 | uctlPtrs[offsetsLen] = uctlPtr; | |
596 | offsetsLen++; | |
597 | } | |
598 | uctlPtr[ctlLen++] = uc; | |
599 | uctlPtr[ctlLen] = 0; | |
600 | } else { | |
601 | if (utxtLen >= kMaxUtxt) { | |
602 | fail = TRUE; break; | |
603 | } | |
604 | ctlLen = 0; | |
605 | utxt[utxtLen] = uc; | |
606 | levels[utxtLen] = levels[ubufIdx]; // will always have ubufIdx >= utxtLen so this is OK | |
607 | utxtLen++; | |
608 | } | |
609 | } | |
610 | levelsCount = utxtLen; | |
611 | if (fail) { | |
612 | logln("Skipping BidiCharacterTest unsuitable for ubidi_setParaWithControls: %d: %s", (int)lineNumber, line); | |
613 | continue; // can't use this test | |
614 | } | |
615 | if (offsetsLen > 0 && offsets[offsetsLen-1] >= utxtLen) { | |
616 | --offsetsLen; | |
617 | ubidi_setContext(ubidi.getAlias(), NULL, 0, uctlPtrs[offsetsLen], -1, errorCode); | |
618 | } else { | |
619 | ubidi_setContext(ubidi.getAlias(), NULL, 0, NULL, 0, errorCode); | |
620 | } | |
621 | ubidi_setParaWithControls(ubidi.getAlias(), utxt, utxtLen, paraLevel, | |
622 | offsets, offsetsLen, NULL, uctlPtrs, errorCode); | |
623 | actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode); | |
624 | if(errorCode.logIfFailureAndReset("ubidi_setContext()/ubidi_setParaWithControls()/ubidi_getLevels()")) { | |
625 | errln("Input line %d: %s", (int)lineNumber, line); | |
626 | continue; | |
627 | } | |
628 | if((actualLevel=ubidi_getParaLevel(ubidi.getAlias()))!=resolvedParaLevel) { | |
629 | printErrorLine(); | |
630 | errln("\nError on line %d: Wrong resolved paragraph level from ubidi_setParaWithControls; expected %d actual %d", | |
631 | (int)lineNumber, resolvedParaLevel, actualLevel); | |
632 | continue; | |
633 | } | |
634 | if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()))) { | |
635 | continue; | |
636 | } | |
57a6839d A |
637 | } |
638 | } | |
639 | ||
729e4ab9 A |
640 | static UChar printLevel(UBiDiLevel level) { |
641 | if(level<UBIDI_DEFAULT_LTR) { | |
642 | return 0x30+level; | |
643 | } else { | |
644 | return 0x78; // 'x' | |
645 | } | |
646 | } | |
647 | ||
648 | static uint32_t getDirectionBits(const UBiDiLevel actualLevels[], int32_t actualCount) { | |
649 | uint32_t actualDirectionBits=0; | |
650 | for(int32_t i=0; i<actualCount; ++i) { | |
651 | actualDirectionBits|=(1<<(actualLevels[i]&1)); | |
652 | } | |
653 | return actualDirectionBits; | |
654 | } | |
655 | ||
57a6839d | 656 | UBool BiDiConformanceTest::checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount) { |
729e4ab9 A |
657 | UBool isOk=TRUE; |
658 | if(levelsCount!=actualCount) { | |
57a6839d A |
659 | errln("\nError on line %d: Wrong number of level values; expected %d actual %d", |
660 | (int)lineNumber, (int)levelsCount, (int)actualCount); | |
729e4ab9 A |
661 | isOk=FALSE; |
662 | } else { | |
663 | for(int32_t i=0; i<actualCount; ++i) { | |
664 | if(levels[i]!=actualLevels[i] && levels[i]<UBIDI_DEFAULT_LTR) { | |
665 | if(directionBits!=3 && directionBits==getDirectionBits(actualLevels, actualCount)) { | |
666 | // ICU used a shortcut: | |
667 | // Since the text is unidirectional, it did not store the resolved | |
668 | // levels but just returns all levels as the paragraph level 0 or 1. | |
669 | // The reordering result is the same, so this is fine. | |
670 | break; | |
671 | } else { | |
57a6839d A |
672 | errln("\nError on line %d: Wrong level value at index %d; expected %d actual %d", |
673 | (int)lineNumber, (int)i, levels[i], actualLevels[i]); | |
729e4ab9 A |
674 | isOk=FALSE; |
675 | break; | |
676 | } | |
677 | } | |
678 | } | |
679 | } | |
680 | if(!isOk) { | |
57a6839d | 681 | printErrorLine(); |
729e4ab9 A |
682 | UnicodeString els("Expected levels: "); |
683 | int32_t i; | |
684 | for(i=0; i<levelsCount; ++i) { | |
685 | els.append((UChar)0x20).append(printLevel(levels[i])); | |
686 | } | |
687 | UnicodeString als("Actual levels: "); | |
688 | for(i=0; i<actualCount; ++i) { | |
689 | als.append((UChar)0x20).append(printLevel(actualLevels[i])); | |
690 | } | |
691 | errln(els); | |
692 | errln(als); | |
693 | } | |
694 | return isOk; | |
695 | } | |
696 | ||
697 | // Note: ubidi_setReorderingOptions(ubidi, UBIDI_OPTION_REMOVE_CONTROLS); | |
698 | // does not work for custom BiDi class assignments | |
699 | // and anyway also removes LRM/RLM/ZWJ/ZWNJ which is not desirable here. | |
700 | // Therefore we just skip the indexes for BiDi controls while comparing | |
701 | // with the expected ordering that has them omitted. | |
57a6839d | 702 | UBool BiDiConformanceTest::checkOrdering(UBiDi *ubidi) { |
729e4ab9 | 703 | UBool isOk=TRUE; |
57a6839d | 704 | IcuTestErrorCode errorCode(*this, "checkOrdering()"); |
729e4ab9 A |
705 | int32_t resultLength=ubidi_getResultLength(ubidi); // visual length including BiDi controls |
706 | int32_t i, visualIndex; | |
707 | // Note: It should be faster to call ubidi_countRuns()/ubidi_getVisualRun() | |
708 | // and loop over each run's indexes, but that seems unnecessary for this test code. | |
709 | for(i=visualIndex=0; i<resultLength; ++i) { | |
710 | int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode); | |
711 | if(errorCode.logIfFailureAndReset("ubidi_getLogicalIndex()")) { | |
712 | errln("Input line %d: %s", (int)lineNumber, line); | |
713 | return FALSE; | |
714 | } | |
715 | if(levels[logicalIndex]>=UBIDI_DEFAULT_LTR) { | |
716 | continue; // BiDi control, omitted from expected ordering. | |
717 | } | |
718 | if(visualIndex<orderingCount && logicalIndex!=ordering[visualIndex]) { | |
57a6839d A |
719 | errln("\nError on line %d: Wrong ordering value at visual index %d; expected %d actual %d", |
720 | (int)lineNumber, (int)visualIndex, ordering[visualIndex], logicalIndex); | |
729e4ab9 A |
721 | isOk=FALSE; |
722 | break; | |
723 | } | |
724 | ++visualIndex; | |
725 | } | |
726 | // visualIndex is now the visual length minus the BiDi controls, | |
727 | // which should match the length of the BidiTest.txt ordering. | |
728 | if(isOk && orderingCount!=visualIndex) { | |
57a6839d A |
729 | errln("\nError on line %d: Wrong number of ordering values; expected %d actual %d", |
730 | (int)lineNumber, (int)orderingCount, (int)visualIndex); | |
729e4ab9 A |
731 | isOk=FALSE; |
732 | } | |
733 | if(!isOk) { | |
57a6839d | 734 | printErrorLine(); |
729e4ab9 A |
735 | UnicodeString eord("Expected ordering: "); |
736 | for(i=0; i<orderingCount; ++i) { | |
737 | eord.append((UChar)0x20).append((UChar)(0x30+ordering[i])); | |
738 | } | |
739 | UnicodeString aord("Actual ordering: "); | |
740 | for(i=0; i<resultLength; ++i) { | |
741 | int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode); | |
742 | if(levels[logicalIndex]<UBIDI_DEFAULT_LTR) { | |
743 | aord.append((UChar)0x20).append((UChar)(0x30+logicalIndex)); | |
744 | } | |
745 | } | |
746 | errln(eord); | |
747 | errln(aord); | |
748 | } | |
749 | return isOk; | |
750 | } | |
751 | ||
57a6839d | 752 | void BiDiConformanceTest::printErrorLine() { |
729e4ab9 A |
753 | ++errorCount; |
754 | errln("Input line %5d: %s", (int)lineNumber, line); | |
755 | errln(UnicodeString("Input string: ")+inputString); | |
756 | errln("Para level: %s", paraLevelName); | |
757 | } |