]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
729e4ab9 A |
3 | /* |
4 | ******************************************************************************* | |
5 | * | |
b331163b | 6 | * Copyright (C) 2009-2014, International Business Machines |
729e4ab9 A |
7 | * Corporation and others. All Rights Reserved. |
8 | * | |
9 | ******************************************************************************* | |
10 | * file name: bidiconf.cpp | |
f3c0d7a5 | 11 | * encoding: UTF-8 |
729e4ab9 A |
12 | * tab size: 8 (not used) |
13 | * indentation:4 | |
14 | * | |
15 | * created on: 2009oct16 | |
16 | * created by: Markus W. Scherer | |
17 | * | |
57a6839d | 18 | * BiDi conformance test, using the Unicode BidiTest.txt and BidiCharacterTest.txt files. |
729e4ab9 A |
19 | */ |
20 | ||
21 | #include <stdio.h> | |
22 | #include <stdlib.h> | |
23 | #include <string.h> | |
24 | #include "unicode/utypes.h" | |
25 | #include "unicode/ubidi.h" | |
26 | #include "unicode/errorcode.h" | |
27 | #include "unicode/localpointer.h" | |
28 | #include "unicode/putil.h" | |
29 | #include "unicode/unistr.h" | |
30 | #include "intltest.h" | |
31 | #include "uparse.h" | |
32 | ||
33 | class BiDiConformanceTest : public IntlTest { | |
34 | public: | |
35 | BiDiConformanceTest() : | |
36 | directionBits(0), lineNumber(0), levelsCount(0), orderingCount(0), | |
37 | errorCount(0) {} | |
38 | ||
39 | void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL); | |
40 | ||
41 | void TestBidiTest(); | |
57a6839d | 42 | void TestBidiCharacterTest(); |
729e4ab9 | 43 | private: |
57a6839d | 44 | UBool parseLevels(const char *&start); |
729e4ab9 A |
45 | UBool parseOrdering(const char *start); |
46 | UBool parseInputStringFromBiDiClasses(const char *&start); | |
47 | ||
57a6839d A |
48 | UBool checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount); |
49 | UBool checkOrdering(UBiDi *ubidi); | |
729e4ab9 | 50 | |
57a6839d | 51 | void printErrorLine(); |
729e4ab9 A |
52 | |
53 | char line[10000]; | |
54 | UBiDiLevel levels[1000]; | |
55 | uint32_t directionBits; | |
56 | int32_t ordering[1000]; | |
57 | int32_t lineNumber; | |
58 | int32_t levelsCount; | |
59 | int32_t orderingCount; | |
60 | int32_t errorCount; | |
61 | UnicodeString inputString; | |
57a6839d A |
62 | const char *paraLevelName; |
63 | char levelNameString[12]; | |
729e4ab9 A |
64 | }; |
65 | ||
66 | extern IntlTest *createBiDiConformanceTest() { | |
67 | return new BiDiConformanceTest(); | |
68 | } | |
69 | ||
70 | void BiDiConformanceTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { | |
71 | if(exec) { | |
72 | logln("TestSuite BiDiConformanceTest: "); | |
73 | } | |
57a6839d A |
74 | TESTCASE_AUTO_BEGIN; |
75 | TESTCASE_AUTO(TestBidiTest); | |
76 | TESTCASE_AUTO(TestBidiCharacterTest); | |
77 | TESTCASE_AUTO_END; | |
729e4ab9 A |
78 | } |
79 | ||
729e4ab9 A |
80 | U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose); |
81 | ||
57a6839d | 82 | UBool BiDiConformanceTest::parseLevels(const char *&start) { |
729e4ab9 A |
83 | directionBits=0; |
84 | levelsCount=0; | |
57a6839d | 85 | while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') { |
729e4ab9 A |
86 | if(*start=='x') { |
87 | levels[levelsCount++]=UBIDI_DEFAULT_LTR; | |
88 | ++start; | |
89 | } else { | |
90 | char *end; | |
91 | uint32_t value=(uint32_t)strtoul(start, &end, 10); | |
57a6839d A |
92 | if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0 && *end!=';') |
93 | || value>(UBIDI_MAX_EXPLICIT_LEVEL+1)) { | |
94 | errln("\nError on line %d: Levels parse error at %s", (int)lineNumber, start); | |
95 | printErrorLine(); | |
729e4ab9 A |
96 | return FALSE; |
97 | } | |
98 | levels[levelsCount++]=(UBiDiLevel)value; | |
99 | directionBits|=(1<<(value&1)); | |
100 | start=end; | |
101 | } | |
102 | } | |
103 | return TRUE; | |
104 | } | |
105 | ||
106 | UBool BiDiConformanceTest::parseOrdering(const char *start) { | |
107 | orderingCount=0; | |
57a6839d | 108 | while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') { |
729e4ab9 A |
109 | char *end; |
110 | uint32_t value=(uint32_t)strtoul(start, &end, 10); | |
57a6839d A |
111 | if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0 && *end!=';') || value>=1000) { |
112 | errln("\nError on line %d: Reorder parse error at %s", (int)lineNumber, start); | |
113 | printErrorLine(); | |
729e4ab9 A |
114 | return FALSE; |
115 | } | |
116 | ordering[orderingCount++]=(int32_t)value; | |
117 | start=end; | |
118 | } | |
119 | return TRUE; | |
120 | } | |
121 | ||
122 | static const UChar charFromBiDiClass[U_CHAR_DIRECTION_COUNT]={ | |
123 | 0x6c, // 'l' for L | |
124 | 0x52, // 'R' for R | |
125 | 0x33, // '3' for EN | |
126 | 0x2d, // '-' for ES | |
127 | 0x25, // '%' for ET | |
128 | 0x39, // '9' for AN | |
129 | 0x2c, // ',' for CS | |
130 | 0x2f, // '/' for B | |
131 | 0x5f, // '_' for S | |
132 | 0x20, // ' ' for WS | |
133 | 0x3d, // '=' for ON | |
134 | 0x65, // 'e' for LRE | |
135 | 0x6f, // 'o' for LRO | |
136 | 0x41, // 'A' for AL | |
137 | 0x45, // 'E' for RLE | |
138 | 0x4f, // 'O' for RLO | |
139 | 0x2a, // '*' for PDF | |
140 | 0x60, // '`' for NSM | |
57a6839d A |
141 | 0x7c, // '|' for BN |
142 | // new in Unicode 6.3/ICU 52 | |
143 | 0x53, // 'S' for FSI | |
144 | 0x69, // 'i' for LRI | |
145 | 0x49, // 'I' for RLI | |
146 | 0x2e // '.' for PDI | |
729e4ab9 A |
147 | }; |
148 | ||
149 | U_CDECL_BEGIN | |
150 | ||
151 | static UCharDirection U_CALLCONV | |
152 | biDiConfUBiDiClassCallback(const void * /*context*/, UChar32 c) { | |
153 | for(int i=0; i<U_CHAR_DIRECTION_COUNT; ++i) { | |
154 | if(c==charFromBiDiClass[i]) { | |
155 | return (UCharDirection)i; | |
156 | } | |
157 | } | |
158 | // Character not in our hardcoded table. | |
159 | // Should not occur during testing. | |
160 | return U_BIDI_CLASS_DEFAULT; | |
161 | } | |
162 | ||
163 | U_CDECL_END | |
164 | ||
165 | static const int8_t biDiClassNameLengths[U_CHAR_DIRECTION_COUNT+1]={ | |
57a6839d | 166 | 1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 3, 2, 3, 3, 3, 3, 2, 3, 3, 3, 3, 0 |
729e4ab9 A |
167 | }; |
168 | ||
169 | UBool BiDiConformanceTest::parseInputStringFromBiDiClasses(const char *&start) { | |
170 | inputString.remove(); | |
171 | /* | |
172 | * Lengthy but fast BiDi class parser. | |
173 | * A simple parser could terminate or extract the name string and use | |
174 | * int32_t biDiClassInt=u_getPropertyValueEnum(UCHAR_BIDI_CLASS, bidiClassString); | |
175 | * but that makes this test take significantly more time. | |
176 | */ | |
177 | while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') { | |
178 | UCharDirection biDiClass=U_CHAR_DIRECTION_COUNT; | |
179 | // Compare each character once until we have a match on | |
180 | // a complete, short BiDi class name. | |
181 | if(start[0]=='L') { | |
182 | if(start[1]=='R') { | |
183 | if(start[2]=='E') { | |
184 | biDiClass=U_LEFT_TO_RIGHT_EMBEDDING; | |
57a6839d A |
185 | } else if(start[2]=='I') { |
186 | biDiClass=U_LEFT_TO_RIGHT_ISOLATE; | |
729e4ab9 A |
187 | } else if(start[2]=='O') { |
188 | biDiClass=U_LEFT_TO_RIGHT_OVERRIDE; | |
189 | } | |
190 | } else { | |
191 | biDiClass=U_LEFT_TO_RIGHT; | |
192 | } | |
193 | } else if(start[0]=='R') { | |
194 | if(start[1]=='L') { | |
195 | if(start[2]=='E') { | |
196 | biDiClass=U_RIGHT_TO_LEFT_EMBEDDING; | |
57a6839d A |
197 | } else if(start[2]=='I') { |
198 | biDiClass=U_RIGHT_TO_LEFT_ISOLATE; | |
729e4ab9 A |
199 | } else if(start[2]=='O') { |
200 | biDiClass=U_RIGHT_TO_LEFT_OVERRIDE; | |
201 | } | |
202 | } else { | |
203 | biDiClass=U_RIGHT_TO_LEFT; | |
204 | } | |
205 | } else if(start[0]=='E') { | |
206 | if(start[1]=='N') { | |
207 | biDiClass=U_EUROPEAN_NUMBER; | |
208 | } else if(start[1]=='S') { | |
209 | biDiClass=U_EUROPEAN_NUMBER_SEPARATOR; | |
210 | } else if(start[1]=='T') { | |
211 | biDiClass=U_EUROPEAN_NUMBER_TERMINATOR; | |
212 | } | |
213 | } else if(start[0]=='A') { | |
214 | if(start[1]=='L') { | |
215 | biDiClass=U_RIGHT_TO_LEFT_ARABIC; | |
216 | } else if(start[1]=='N') { | |
217 | biDiClass=U_ARABIC_NUMBER; | |
218 | } | |
219 | } else if(start[0]=='C' && start[1]=='S') { | |
220 | biDiClass=U_COMMON_NUMBER_SEPARATOR; | |
221 | } else if(start[0]=='B') { | |
222 | if(start[1]=='N') { | |
223 | biDiClass=U_BOUNDARY_NEUTRAL; | |
224 | } else { | |
225 | biDiClass=U_BLOCK_SEPARATOR; | |
226 | } | |
227 | } else if(start[0]=='S') { | |
228 | biDiClass=U_SEGMENT_SEPARATOR; | |
229 | } else if(start[0]=='W' && start[1]=='S') { | |
230 | biDiClass=U_WHITE_SPACE_NEUTRAL; | |
231 | } else if(start[0]=='O' && start[1]=='N') { | |
232 | biDiClass=U_OTHER_NEUTRAL; | |
57a6839d A |
233 | } else if(start[0]=='P' && start[1]=='D') { |
234 | if(start[2]=='F') { | |
235 | biDiClass=U_POP_DIRECTIONAL_FORMAT; | |
236 | } else if(start[2]=='I') { | |
237 | biDiClass=U_POP_DIRECTIONAL_ISOLATE; | |
238 | } | |
729e4ab9 A |
239 | } else if(start[0]=='N' && start[1]=='S' && start[2]=='M') { |
240 | biDiClass=U_DIR_NON_SPACING_MARK; | |
57a6839d A |
241 | } else if(start[0]=='F' && start[1]=='S' && start[2]=='I') { |
242 | biDiClass=U_FIRST_STRONG_ISOLATE; | |
729e4ab9 A |
243 | } |
244 | // Now we verify that the class name is terminated properly, | |
245 | // and not just the start of a longer word. | |
246 | int8_t biDiClassNameLength=biDiClassNameLengths[biDiClass]; | |
247 | char c=start[biDiClassNameLength]; | |
57a6839d A |
248 | if(biDiClass<U_CHAR_DIRECTION_COUNT && (U_IS_INV_WHITESPACE(c) || c==';' || c==0)) { |
249 | inputString.append(charFromBiDiClass[biDiClass]); | |
250 | start+=biDiClassNameLength; | |
251 | continue; | |
729e4ab9 | 252 | } |
57a6839d A |
253 | errln("\nError on line %d: BiDi class string not recognized at %s", (int)lineNumber, start); |
254 | printErrorLine(); | |
255 | return FALSE; | |
729e4ab9 A |
256 | } |
257 | return TRUE; | |
258 | } | |
259 | ||
260 | void BiDiConformanceTest::TestBidiTest() { | |
261 | IcuTestErrorCode errorCode(*this, "TestBidiTest"); | |
262 | const char *sourceTestDataPath=getSourceTestData(errorCode); | |
0f5d89e8 | 263 | if(errorCode.errIfFailureAndReset("unable to find the source/test/testdata " |
729e4ab9 A |
264 | "folder (getSourceTestData())")) { |
265 | return; | |
266 | } | |
267 | char bidiTestPath[400]; | |
268 | strcpy(bidiTestPath, sourceTestDataPath); | |
269 | strcat(bidiTestPath, "BidiTest.txt"); | |
270 | LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r")); | |
271 | if(bidiTestFile.isNull()) { | |
272 | errln("unable to open %s", bidiTestPath); | |
273 | return; | |
274 | } | |
275 | LocalUBiDiPointer ubidi(ubidi_open()); | |
276 | ubidi_setClassCallback(ubidi.getAlias(), biDiConfUBiDiClassCallback, NULL, | |
277 | NULL, NULL, errorCode); | |
0f5d89e8 | 278 | if(errorCode.errIfFailureAndReset("ubidi_setClassCallback()")) { |
729e4ab9 A |
279 | return; |
280 | } | |
281 | lineNumber=0; | |
282 | levelsCount=0; | |
283 | orderingCount=0; | |
284 | errorCount=0; | |
b331163b A |
285 | // paraLevelName must be initialized in case the first non-comment line is in error |
286 | paraLevelName="N/A"; | |
729e4ab9 A |
287 | while(errorCount<10 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias())!=NULL) { |
288 | ++lineNumber; | |
289 | // Remove trailing comments and whitespace. | |
290 | char *commentStart=strchr(line, '#'); | |
291 | if(commentStart!=NULL) { | |
292 | *commentStart=0; | |
293 | } | |
294 | u_rtrim(line); | |
295 | const char *start=u_skipWhitespace(line); | |
296 | if(*start==0) { | |
297 | continue; // Skip empty and comment-only lines. | |
298 | } | |
299 | if(*start=='@') { | |
300 | ++start; | |
301 | if(0==strncmp(start, "Levels:", 7)) { | |
57a6839d A |
302 | start+=7; |
303 | if(!parseLevels(start)) { | |
729e4ab9 A |
304 | return; |
305 | } | |
306 | } else if(0==strncmp(start, "Reorder:", 8)) { | |
307 | if(!parseOrdering(start+8)) { | |
308 | return; | |
309 | } | |
310 | } | |
311 | // Skip unknown @Xyz: ... | |
312 | } else { | |
313 | if(!parseInputStringFromBiDiClasses(start)) { | |
314 | return; | |
315 | } | |
316 | start=u_skipWhitespace(start); | |
317 | if(*start!=';') { | |
318 | errln("missing ; separator on input line %s", line); | |
319 | return; | |
320 | } | |
321 | start=u_skipWhitespace(start+1); | |
322 | char *end; | |
323 | uint32_t bitset=(uint32_t)strtoul(start, &end, 16); | |
324 | if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0)) { | |
325 | errln("input bitset parse error at %s", start); | |
326 | return; | |
327 | } | |
328 | // Loop over the bitset. | |
329 | static const UBiDiLevel paraLevels[]={ UBIDI_DEFAULT_LTR, 0, 1, UBIDI_DEFAULT_RTL }; | |
330 | static const char *const paraLevelNames[]={ "auto/LTR", "LTR", "RTL", "auto/RTL" }; | |
331 | for(int i=0; i<=3; ++i) { | |
332 | if(bitset&(1<<i)) { | |
333 | ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.length(), | |
334 | paraLevels[i], NULL, errorCode); | |
335 | const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode); | |
0f5d89e8 | 336 | if(errorCode.errIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()")) { |
729e4ab9 A |
337 | errln("Input line %d: %s", (int)lineNumber, line); |
338 | return; | |
339 | } | |
57a6839d A |
340 | paraLevelName=paraLevelNames[i]; |
341 | if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()))) { | |
729e4ab9 A |
342 | // continue outerLoop; does not exist in C++ |
343 | // so just break out of the inner loop. | |
344 | break; | |
345 | } | |
57a6839d | 346 | if(!checkOrdering(ubidi.getAlias())) { |
729e4ab9 A |
347 | // continue outerLoop; does not exist in C++ |
348 | // so just break out of the inner loop. | |
349 | break; | |
350 | } | |
351 | } | |
352 | } | |
353 | } | |
354 | } | |
355 | } | |
356 | ||
57a6839d A |
357 | /* |
358 | ******************************************************************************* | |
359 | * | |
360 | * created on: 2013jul01 | |
361 | * created by: Matitiahu Allouche | |
362 | ||
363 | This function performs a conformance test for implementations of the | |
364 | Unicode Bidirectional Algorithm, specified in UAX #9: Unicode | |
365 | Bidirectional Algorithm, at http://www.unicode.org/unicode/reports/tr9/ | |
366 | ||
367 | Each test case is represented in a single line which is read from a file | |
368 | named BidiCharacter.txt. Empty, blank and comment lines may also appear | |
369 | in this file. | |
370 | ||
371 | The format of the test data is specified below. Note that each test | |
372 | case constitutes a single line of text; reordering is applied within a | |
373 | single line and independently of a rendering engine, and rules L3 and L4 | |
374 | are out of scope. | |
375 | ||
376 | The number sign '#' is the comment character: everything is ignored from | |
377 | the occurrence of '#' until the end of the line, | |
378 | Empty lines and lines containing only spaces and/or comments are ignored. | |
379 | ||
380 | Lines which represent test cases consist of 4 or 5 fields separated by a | |
381 | semicolon. Each field consists of tokens separated by whitespace (space | |
382 | or Tab). Whitespace before and after semicolons is optional. | |
383 | ||
384 | Field 0: A sequence of hexadecimal code point values separated by space | |
385 | ||
386 | Field 1: A value representing the paragraph direction, as follows: | |
387 | - 0 represents left-to-right | |
388 | - 1 represents right-to-left | |
389 | - 2 represents auto-LTR according to rules P2 and P3 of the algorithm | |
390 | - 3 represents auto-RTL according to rules P2 and P3 of the algorithm | |
391 | - a negative number whose absolute value is taken as paragraph level; | |
392 | this may be useful to test cases where the embedding level approaches | |
393 | or exceeds the maximum embedding level. | |
394 | ||
395 | Field 2: The resolved paragraph embedding level. If the input (field 0) | |
396 | includes more than one paragraph, this field represents the | |
397 | resolved level of the first paragraph. | |
398 | ||
399 | Field 3: An ordered list of resulting levels for each token in field 0 | |
400 | (each token represents one source character). | |
401 | The UBA does not assign levels to certain characters (e.g. LRO); | |
402 | characters removed in rule X9 are indicated with an 'x'. | |
403 | ||
404 | Field 4: An ordered list of indices showing the resulting visual ordering | |
405 | from left to right; characters with a resolved level of 'x' are | |
406 | skipped. The number are zero-based. Each index corresponds to | |
407 | a character in the reordered (visual) string. It represents the | |
408 | index of the source character in the input (field 0). | |
409 | This field is optional. When it is absent, the visual ordering | |
410 | is not verified. | |
411 | ||
412 | Examples: | |
413 | ||
414 | # This is a comment line. | |
415 | L L ON R ; 0 ; 0 ; 0 0 0 1 ; 0 1 2 3 | |
416 | L L ON R;0;0;0 0 0 1;0 1 2 3 | |
417 | ||
418 | # Note: in the next line, 'B' represents a block separator, not the letter 'B'. | |
419 | LRE A B C PDF;2;0;x 2 0 0 x;1 2 3 | |
420 | # Note: in the next line, 'b' represents the letter 'b', not a block separator. | |
421 | a b c 05d0 05d1 x ; 0 ; 0 ; 0 0 0 1 1 0 ; 0 1 2 4 3 5 | |
422 | ||
423 | a R R x ; 1 ; 1 ; 2 1 1 2 | |
424 | L L R R R B R R L L L B ON ON ; 3 ; 0 ; 0 0 1 1 1 0 1 1 2 2 2 1 1 1 | |
425 | ||
426 | * | |
427 | ******************************************************************************* | |
428 | */ | |
2ca993e8 A |
429 | enum { kMaxUtxt = 32, kMaxUctl = 16 }; |
430 | ||
57a6839d A |
431 | void BiDiConformanceTest::TestBidiCharacterTest() { |
432 | IcuTestErrorCode errorCode(*this, "TestBidiCharacterTest"); | |
433 | const char *sourceTestDataPath=getSourceTestData(errorCode); | |
0f5d89e8 | 434 | if(errorCode.errIfFailureAndReset("unable to find the source/test/testdata " |
57a6839d A |
435 | "folder (getSourceTestData())")) { |
436 | return; | |
437 | } | |
438 | char bidiTestPath[400]; | |
439 | strcpy(bidiTestPath, sourceTestDataPath); | |
440 | strcat(bidiTestPath, "BidiCharacterTest.txt"); | |
441 | LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r")); | |
442 | if(bidiTestFile.isNull()) { | |
443 | errln("unable to open %s", bidiTestPath); | |
444 | return; | |
445 | } | |
446 | LocalUBiDiPointer ubidi(ubidi_open()); | |
447 | lineNumber=0; | |
448 | levelsCount=0; | |
449 | orderingCount=0; | |
450 | errorCount=0; | |
451 | while(errorCount<20 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias())!=NULL) { | |
452 | ++lineNumber; | |
453 | paraLevelName="N/A"; | |
454 | inputString="N/A"; | |
455 | // Remove trailing comments and whitespace. | |
456 | char *commentStart=strchr(line, '#'); | |
457 | if(commentStart!=NULL) { | |
458 | *commentStart=0; | |
459 | } | |
460 | u_rtrim(line); | |
461 | const char *start=u_skipWhitespace(line); | |
462 | if(*start==0) { | |
463 | continue; // Skip empty and comment-only lines. | |
464 | } | |
465 | // Parse the code point string in field 0. | |
466 | UChar *buffer=inputString.getBuffer(200); | |
467 | int32_t length=u_parseString(start, buffer, inputString.getCapacity(), NULL, errorCode); | |
0f5d89e8 | 468 | if(errorCode.errIfFailureAndReset("Invalid string in field 0")) { |
57a6839d A |
469 | errln("Input line %d: %s", (int)lineNumber, line); |
470 | inputString.remove(); | |
471 | continue; | |
472 | } | |
473 | inputString.releaseBuffer(length); | |
474 | start=strchr(start, ';'); | |
475 | if(start==NULL) { | |
476 | errorCount++; | |
477 | errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line); | |
478 | continue; | |
479 | } | |
480 | start=u_skipWhitespace(start+1); | |
481 | char *end; | |
482 | int32_t paraDirection=(int32_t)strtol(start, &end, 10); | |
483 | UBiDiLevel paraLevel=UBIDI_MAX_EXPLICIT_LEVEL+2; | |
484 | if(paraDirection==0) { | |
485 | paraLevel=0; | |
486 | paraLevelName="LTR"; | |
487 | } | |
488 | else if(paraDirection==1) { | |
489 | paraLevel=1; | |
490 | paraLevelName="RTL"; | |
491 | } | |
492 | else if(paraDirection==2) { | |
493 | paraLevel=UBIDI_DEFAULT_LTR; | |
494 | paraLevelName="Auto/LTR"; | |
495 | } | |
496 | else if(paraDirection==3) { | |
497 | paraLevel=UBIDI_DEFAULT_RTL; | |
498 | paraLevelName="Auto/RTL"; | |
499 | } | |
500 | else if(paraDirection<0 && -paraDirection<=(UBIDI_MAX_EXPLICIT_LEVEL+1)) { | |
501 | paraLevel=(UBiDiLevel)(-paraDirection); | |
502 | sprintf(levelNameString, "%d", (int)paraLevel); | |
503 | paraLevelName=levelNameString; | |
504 | } | |
505 | if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) || | |
506 | paraLevel==(UBIDI_MAX_EXPLICIT_LEVEL+2)) { | |
507 | errln("\nError on line %d: Input paragraph direction incorrect at %s", (int)lineNumber, start); | |
508 | printErrorLine(); | |
509 | continue; | |
510 | } | |
511 | start=u_skipWhitespace(end); | |
512 | if(*start!=';') { | |
513 | errorCount++; | |
514 | errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line); | |
515 | continue; | |
516 | } | |
517 | start++; | |
518 | uint32_t resolvedParaLevel=(uint32_t)strtoul(start, &end, 10); | |
519 | if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) || | |
520 | resolvedParaLevel>1) { | |
521 | errln("\nError on line %d: Resolved paragraph level incorrect at %s", (int)lineNumber, start); | |
522 | printErrorLine(); | |
523 | continue; | |
524 | } | |
525 | start=u_skipWhitespace(end); | |
526 | if(*start!=';') { | |
527 | errorCount++; | |
528 | errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line); | |
529 | return; | |
530 | } | |
531 | start++; | |
532 | if(!parseLevels(start)) { | |
533 | continue; | |
534 | } | |
535 | start=u_skipWhitespace(start); | |
536 | if(*start==';') { | |
537 | if(!parseOrdering(start+1)) { | |
538 | continue; | |
539 | } | |
540 | } | |
541 | else | |
542 | orderingCount=-1; | |
543 | ||
544 | ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.length(), | |
545 | paraLevel, NULL, errorCode); | |
546 | const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode); | |
0f5d89e8 | 547 | if(errorCode.errIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()")) { |
57a6839d A |
548 | errln("Input line %d: %s", (int)lineNumber, line); |
549 | continue; | |
550 | } | |
551 | UBiDiLevel actualLevel; | |
552 | if((actualLevel=ubidi_getParaLevel(ubidi.getAlias()))!=resolvedParaLevel) { | |
553 | printErrorLine(); | |
554 | errln("\nError on line %d: Wrong resolved paragraph level; expected %d actual %d", | |
555 | (int)lineNumber, resolvedParaLevel, actualLevel); | |
556 | continue; | |
557 | } | |
558 | if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()))) { | |
559 | continue; | |
560 | } | |
561 | if(orderingCount>=0 && !checkOrdering(ubidi.getAlias())) { | |
562 | continue; | |
563 | } | |
2ca993e8 A |
564 | |
565 | // tests for ubidi_setParaWithControls | |
566 | // skip 2 tests known not to work (out of 91678 cases, though | |
567 | // only 86 of those tests use controls so 2.3% of those failing), | |
568 | // still investigating these | |
f3c0d7a5 | 569 | if (lineNumber>=212 && lineNumber<=213) { |
2ca993e8 A |
570 | continue; |
571 | } | |
572 | ||
573 | const UChar* ubufPtr = inputString.getBuffer(); | |
574 | int32_t ubufIdx; | |
575 | UChar utxt[kMaxUtxt]; | |
576 | UBiDiLevel ulev[kMaxUtxt]; | |
577 | int32_t offsets[kMaxUctl]; | |
578 | UChar* uctlPtrs[kMaxUctl]; | |
579 | UChar uctl[kMaxUctl][5]; | |
580 | UChar *uctlPtr; | |
581 | int32_t utxtLen = 0, offsetsLen = 0, ctlLen = 0; | |
582 | UBool fail = FALSE; | |
583 | for (ubufIdx = 0; ubufIdx < inputString.length(); ubufIdx++) { | |
584 | UChar uc = ubufPtr[ubufIdx]; | |
585 | if ( (uc >=0x202A && uc<=0x202E) || (uc >=0x2066 && uc<=0x2069) ) { | |
586 | // have a bidi control | |
587 | if (ctlLen >= 4) { | |
588 | fail = TRUE; break; | |
589 | } | |
590 | if (ctlLen == 0) { | |
591 | // starting a new control sequence | |
592 | if (offsetsLen >= kMaxUctl) { | |
593 | fail = TRUE; break; | |
594 | } | |
595 | offsets[offsetsLen] = utxtLen; | |
596 | uctlPtr = &uctl[offsetsLen][0]; | |
597 | uctlPtrs[offsetsLen] = uctlPtr; | |
598 | offsetsLen++; | |
599 | } | |
600 | uctlPtr[ctlLen++] = uc; | |
601 | uctlPtr[ctlLen] = 0; | |
602 | } else { | |
603 | if (utxtLen >= kMaxUtxt) { | |
604 | fail = TRUE; break; | |
605 | } | |
606 | ctlLen = 0; | |
607 | utxt[utxtLen] = uc; | |
608 | levels[utxtLen] = levels[ubufIdx]; // will always have ubufIdx >= utxtLen so this is OK | |
609 | utxtLen++; | |
610 | } | |
611 | } | |
612 | levelsCount = utxtLen; | |
613 | if (fail) { | |
614 | logln("Skipping BidiCharacterTest unsuitable for ubidi_setParaWithControls: %d: %s", (int)lineNumber, line); | |
615 | continue; // can't use this test | |
616 | } | |
617 | if (offsetsLen > 0 && offsets[offsetsLen-1] >= utxtLen) { | |
618 | --offsetsLen; | |
619 | ubidi_setContext(ubidi.getAlias(), NULL, 0, uctlPtrs[offsetsLen], -1, errorCode); | |
620 | } else { | |
621 | ubidi_setContext(ubidi.getAlias(), NULL, 0, NULL, 0, errorCode); | |
622 | } | |
623 | ubidi_setParaWithControls(ubidi.getAlias(), utxt, utxtLen, paraLevel, | |
624 | offsets, offsetsLen, NULL, uctlPtrs, errorCode); | |
625 | actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode); | |
0f5d89e8 | 626 | if(errorCode.errIfFailureAndReset("ubidi_setContext()/ubidi_setParaWithControls()/ubidi_getLevels()")) { |
2ca993e8 A |
627 | errln("Input line %d: %s", (int)lineNumber, line); |
628 | continue; | |
629 | } | |
630 | if((actualLevel=ubidi_getParaLevel(ubidi.getAlias()))!=resolvedParaLevel) { | |
631 | printErrorLine(); | |
632 | errln("\nError on line %d: Wrong resolved paragraph level from ubidi_setParaWithControls; expected %d actual %d", | |
633 | (int)lineNumber, resolvedParaLevel, actualLevel); | |
634 | continue; | |
635 | } | |
636 | if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()))) { | |
637 | continue; | |
638 | } | |
57a6839d A |
639 | } |
640 | } | |
641 | ||
729e4ab9 A |
642 | static UChar printLevel(UBiDiLevel level) { |
643 | if(level<UBIDI_DEFAULT_LTR) { | |
644 | return 0x30+level; | |
645 | } else { | |
646 | return 0x78; // 'x' | |
647 | } | |
648 | } | |
649 | ||
650 | static uint32_t getDirectionBits(const UBiDiLevel actualLevels[], int32_t actualCount) { | |
651 | uint32_t actualDirectionBits=0; | |
652 | for(int32_t i=0; i<actualCount; ++i) { | |
653 | actualDirectionBits|=(1<<(actualLevels[i]&1)); | |
654 | } | |
655 | return actualDirectionBits; | |
656 | } | |
657 | ||
57a6839d | 658 | UBool BiDiConformanceTest::checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount) { |
729e4ab9 A |
659 | UBool isOk=TRUE; |
660 | if(levelsCount!=actualCount) { | |
57a6839d A |
661 | errln("\nError on line %d: Wrong number of level values; expected %d actual %d", |
662 | (int)lineNumber, (int)levelsCount, (int)actualCount); | |
729e4ab9 A |
663 | isOk=FALSE; |
664 | } else { | |
665 | for(int32_t i=0; i<actualCount; ++i) { | |
666 | if(levels[i]!=actualLevels[i] && levels[i]<UBIDI_DEFAULT_LTR) { | |
667 | if(directionBits!=3 && directionBits==getDirectionBits(actualLevels, actualCount)) { | |
668 | // ICU used a shortcut: | |
669 | // Since the text is unidirectional, it did not store the resolved | |
670 | // levels but just returns all levels as the paragraph level 0 or 1. | |
671 | // The reordering result is the same, so this is fine. | |
672 | break; | |
673 | } else { | |
57a6839d A |
674 | errln("\nError on line %d: Wrong level value at index %d; expected %d actual %d", |
675 | (int)lineNumber, (int)i, levels[i], actualLevels[i]); | |
729e4ab9 A |
676 | isOk=FALSE; |
677 | break; | |
678 | } | |
679 | } | |
680 | } | |
681 | } | |
682 | if(!isOk) { | |
57a6839d | 683 | printErrorLine(); |
729e4ab9 A |
684 | UnicodeString els("Expected levels: "); |
685 | int32_t i; | |
686 | for(i=0; i<levelsCount; ++i) { | |
687 | els.append((UChar)0x20).append(printLevel(levels[i])); | |
688 | } | |
689 | UnicodeString als("Actual levels: "); | |
690 | for(i=0; i<actualCount; ++i) { | |
691 | als.append((UChar)0x20).append(printLevel(actualLevels[i])); | |
692 | } | |
693 | errln(els); | |
694 | errln(als); | |
695 | } | |
696 | return isOk; | |
697 | } | |
698 | ||
699 | // Note: ubidi_setReorderingOptions(ubidi, UBIDI_OPTION_REMOVE_CONTROLS); | |
700 | // does not work for custom BiDi class assignments | |
701 | // and anyway also removes LRM/RLM/ZWJ/ZWNJ which is not desirable here. | |
702 | // Therefore we just skip the indexes for BiDi controls while comparing | |
703 | // with the expected ordering that has them omitted. | |
57a6839d | 704 | UBool BiDiConformanceTest::checkOrdering(UBiDi *ubidi) { |
729e4ab9 | 705 | UBool isOk=TRUE; |
57a6839d | 706 | IcuTestErrorCode errorCode(*this, "checkOrdering()"); |
729e4ab9 A |
707 | int32_t resultLength=ubidi_getResultLength(ubidi); // visual length including BiDi controls |
708 | int32_t i, visualIndex; | |
709 | // Note: It should be faster to call ubidi_countRuns()/ubidi_getVisualRun() | |
710 | // and loop over each run's indexes, but that seems unnecessary for this test code. | |
711 | for(i=visualIndex=0; i<resultLength; ++i) { | |
712 | int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode); | |
0f5d89e8 | 713 | if(errorCode.errIfFailureAndReset("ubidi_getLogicalIndex()")) { |
729e4ab9 A |
714 | errln("Input line %d: %s", (int)lineNumber, line); |
715 | return FALSE; | |
716 | } | |
717 | if(levels[logicalIndex]>=UBIDI_DEFAULT_LTR) { | |
718 | continue; // BiDi control, omitted from expected ordering. | |
719 | } | |
720 | if(visualIndex<orderingCount && logicalIndex!=ordering[visualIndex]) { | |
57a6839d A |
721 | errln("\nError on line %d: Wrong ordering value at visual index %d; expected %d actual %d", |
722 | (int)lineNumber, (int)visualIndex, ordering[visualIndex], logicalIndex); | |
729e4ab9 A |
723 | isOk=FALSE; |
724 | break; | |
725 | } | |
726 | ++visualIndex; | |
727 | } | |
728 | // visualIndex is now the visual length minus the BiDi controls, | |
729 | // which should match the length of the BidiTest.txt ordering. | |
730 | if(isOk && orderingCount!=visualIndex) { | |
57a6839d A |
731 | errln("\nError on line %d: Wrong number of ordering values; expected %d actual %d", |
732 | (int)lineNumber, (int)orderingCount, (int)visualIndex); | |
729e4ab9 A |
733 | isOk=FALSE; |
734 | } | |
735 | if(!isOk) { | |
57a6839d | 736 | printErrorLine(); |
729e4ab9 A |
737 | UnicodeString eord("Expected ordering: "); |
738 | for(i=0; i<orderingCount; ++i) { | |
739 | eord.append((UChar)0x20).append((UChar)(0x30+ordering[i])); | |
740 | } | |
741 | UnicodeString aord("Actual ordering: "); | |
742 | for(i=0; i<resultLength; ++i) { | |
743 | int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode); | |
744 | if(levels[logicalIndex]<UBIDI_DEFAULT_LTR) { | |
745 | aord.append((UChar)0x20).append((UChar)(0x30+logicalIndex)); | |
746 | } | |
747 | } | |
748 | errln(eord); | |
749 | errln(aord); | |
750 | } | |
751 | return isOk; | |
752 | } | |
753 | ||
57a6839d | 754 | void BiDiConformanceTest::printErrorLine() { |
729e4ab9 A |
755 | ++errorCount; |
756 | errln("Input line %5d: %s", (int)lineNumber, line); | |
757 | errln(UnicodeString("Input string: ")+inputString); | |
758 | errln("Para level: %s", paraLevelName); | |
759 | } |