]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/intltest/bidiconf.cpp
ICU-64232.0.1.tar.gz
[apple/icu.git] / icuSources / test / intltest / bidiconf.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
729e4ab9
A
3/*
4*******************************************************************************
5*
b331163b 6* Copyright (C) 2009-2014, International Business Machines
729e4ab9
A
7* Corporation and others. All Rights Reserved.
8*
9*******************************************************************************
10* file name: bidiconf.cpp
f3c0d7a5 11* encoding: UTF-8
729e4ab9
A
12* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 2009oct16
16* created by: Markus W. Scherer
17*
57a6839d 18* BiDi conformance test, using the Unicode BidiTest.txt and BidiCharacterTest.txt files.
729e4ab9
A
19*/
20
21#include <stdio.h>
22#include <stdlib.h>
23#include <string.h>
24#include "unicode/utypes.h"
25#include "unicode/ubidi.h"
26#include "unicode/errorcode.h"
27#include "unicode/localpointer.h"
28#include "unicode/putil.h"
29#include "unicode/unistr.h"
30#include "intltest.h"
31#include "uparse.h"
32
33class BiDiConformanceTest : public IntlTest {
34public:
35 BiDiConformanceTest() :
36 directionBits(0), lineNumber(0), levelsCount(0), orderingCount(0),
37 errorCount(0) {}
38
39 void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
40
41 void TestBidiTest();
57a6839d 42 void TestBidiCharacterTest();
729e4ab9 43private:
57a6839d 44 UBool parseLevels(const char *&start);
729e4ab9
A
45 UBool parseOrdering(const char *start);
46 UBool parseInputStringFromBiDiClasses(const char *&start);
47
57a6839d
A
48 UBool checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount);
49 UBool checkOrdering(UBiDi *ubidi);
729e4ab9 50
57a6839d 51 void printErrorLine();
729e4ab9
A
52
53 char line[10000];
54 UBiDiLevel levels[1000];
55 uint32_t directionBits;
56 int32_t ordering[1000];
57 int32_t lineNumber;
58 int32_t levelsCount;
59 int32_t orderingCount;
60 int32_t errorCount;
61 UnicodeString inputString;
57a6839d
A
62 const char *paraLevelName;
63 char levelNameString[12];
729e4ab9
A
64};
65
66extern IntlTest *createBiDiConformanceTest() {
67 return new BiDiConformanceTest();
68}
69
70void BiDiConformanceTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
71 if(exec) {
72 logln("TestSuite BiDiConformanceTest: ");
73 }
57a6839d
A
74 TESTCASE_AUTO_BEGIN;
75 TESTCASE_AUTO(TestBidiTest);
76 TESTCASE_AUTO(TestBidiCharacterTest);
77 TESTCASE_AUTO_END;
729e4ab9
A
78}
79
729e4ab9
A
80U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose);
81
57a6839d 82UBool BiDiConformanceTest::parseLevels(const char *&start) {
729e4ab9
A
83 directionBits=0;
84 levelsCount=0;
57a6839d 85 while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') {
729e4ab9
A
86 if(*start=='x') {
87 levels[levelsCount++]=UBIDI_DEFAULT_LTR;
88 ++start;
89 } else {
90 char *end;
91 uint32_t value=(uint32_t)strtoul(start, &end, 10);
57a6839d
A
92 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0 && *end!=';')
93 || value>(UBIDI_MAX_EXPLICIT_LEVEL+1)) {
94 errln("\nError on line %d: Levels parse error at %s", (int)lineNumber, start);
95 printErrorLine();
729e4ab9
A
96 return FALSE;
97 }
98 levels[levelsCount++]=(UBiDiLevel)value;
99 directionBits|=(1<<(value&1));
100 start=end;
101 }
102 }
103 return TRUE;
104}
105
106UBool BiDiConformanceTest::parseOrdering(const char *start) {
107 orderingCount=0;
57a6839d 108 while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') {
729e4ab9
A
109 char *end;
110 uint32_t value=(uint32_t)strtoul(start, &end, 10);
57a6839d
A
111 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0 && *end!=';') || value>=1000) {
112 errln("\nError on line %d: Reorder parse error at %s", (int)lineNumber, start);
113 printErrorLine();
729e4ab9
A
114 return FALSE;
115 }
116 ordering[orderingCount++]=(int32_t)value;
117 start=end;
118 }
119 return TRUE;
120}
121
122static const UChar charFromBiDiClass[U_CHAR_DIRECTION_COUNT]={
123 0x6c, // 'l' for L
124 0x52, // 'R' for R
125 0x33, // '3' for EN
126 0x2d, // '-' for ES
127 0x25, // '%' for ET
128 0x39, // '9' for AN
129 0x2c, // ',' for CS
130 0x2f, // '/' for B
131 0x5f, // '_' for S
132 0x20, // ' ' for WS
133 0x3d, // '=' for ON
134 0x65, // 'e' for LRE
135 0x6f, // 'o' for LRO
136 0x41, // 'A' for AL
137 0x45, // 'E' for RLE
138 0x4f, // 'O' for RLO
139 0x2a, // '*' for PDF
140 0x60, // '`' for NSM
57a6839d
A
141 0x7c, // '|' for BN
142 // new in Unicode 6.3/ICU 52
143 0x53, // 'S' for FSI
144 0x69, // 'i' for LRI
145 0x49, // 'I' for RLI
146 0x2e // '.' for PDI
729e4ab9
A
147};
148
149U_CDECL_BEGIN
150
151static UCharDirection U_CALLCONV
152biDiConfUBiDiClassCallback(const void * /*context*/, UChar32 c) {
153 for(int i=0; i<U_CHAR_DIRECTION_COUNT; ++i) {
154 if(c==charFromBiDiClass[i]) {
155 return (UCharDirection)i;
156 }
157 }
158 // Character not in our hardcoded table.
159 // Should not occur during testing.
160 return U_BIDI_CLASS_DEFAULT;
161}
162
163U_CDECL_END
164
165static const int8_t biDiClassNameLengths[U_CHAR_DIRECTION_COUNT+1]={
57a6839d 166 1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 3, 2, 3, 3, 3, 3, 2, 3, 3, 3, 3, 0
729e4ab9
A
167};
168
169UBool BiDiConformanceTest::parseInputStringFromBiDiClasses(const char *&start) {
170 inputString.remove();
171 /*
172 * Lengthy but fast BiDi class parser.
173 * A simple parser could terminate or extract the name string and use
174 * int32_t biDiClassInt=u_getPropertyValueEnum(UCHAR_BIDI_CLASS, bidiClassString);
175 * but that makes this test take significantly more time.
176 */
177 while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') {
178 UCharDirection biDiClass=U_CHAR_DIRECTION_COUNT;
179 // Compare each character once until we have a match on
180 // a complete, short BiDi class name.
181 if(start[0]=='L') {
182 if(start[1]=='R') {
183 if(start[2]=='E') {
184 biDiClass=U_LEFT_TO_RIGHT_EMBEDDING;
57a6839d
A
185 } else if(start[2]=='I') {
186 biDiClass=U_LEFT_TO_RIGHT_ISOLATE;
729e4ab9
A
187 } else if(start[2]=='O') {
188 biDiClass=U_LEFT_TO_RIGHT_OVERRIDE;
189 }
190 } else {
191 biDiClass=U_LEFT_TO_RIGHT;
192 }
193 } else if(start[0]=='R') {
194 if(start[1]=='L') {
195 if(start[2]=='E') {
196 biDiClass=U_RIGHT_TO_LEFT_EMBEDDING;
57a6839d
A
197 } else if(start[2]=='I') {
198 biDiClass=U_RIGHT_TO_LEFT_ISOLATE;
729e4ab9
A
199 } else if(start[2]=='O') {
200 biDiClass=U_RIGHT_TO_LEFT_OVERRIDE;
201 }
202 } else {
203 biDiClass=U_RIGHT_TO_LEFT;
204 }
205 } else if(start[0]=='E') {
206 if(start[1]=='N') {
207 biDiClass=U_EUROPEAN_NUMBER;
208 } else if(start[1]=='S') {
209 biDiClass=U_EUROPEAN_NUMBER_SEPARATOR;
210 } else if(start[1]=='T') {
211 biDiClass=U_EUROPEAN_NUMBER_TERMINATOR;
212 }
213 } else if(start[0]=='A') {
214 if(start[1]=='L') {
215 biDiClass=U_RIGHT_TO_LEFT_ARABIC;
216 } else if(start[1]=='N') {
217 biDiClass=U_ARABIC_NUMBER;
218 }
219 } else if(start[0]=='C' && start[1]=='S') {
220 biDiClass=U_COMMON_NUMBER_SEPARATOR;
221 } else if(start[0]=='B') {
222 if(start[1]=='N') {
223 biDiClass=U_BOUNDARY_NEUTRAL;
224 } else {
225 biDiClass=U_BLOCK_SEPARATOR;
226 }
227 } else if(start[0]=='S') {
228 biDiClass=U_SEGMENT_SEPARATOR;
229 } else if(start[0]=='W' && start[1]=='S') {
230 biDiClass=U_WHITE_SPACE_NEUTRAL;
231 } else if(start[0]=='O' && start[1]=='N') {
232 biDiClass=U_OTHER_NEUTRAL;
57a6839d
A
233 } else if(start[0]=='P' && start[1]=='D') {
234 if(start[2]=='F') {
235 biDiClass=U_POP_DIRECTIONAL_FORMAT;
236 } else if(start[2]=='I') {
237 biDiClass=U_POP_DIRECTIONAL_ISOLATE;
238 }
729e4ab9
A
239 } else if(start[0]=='N' && start[1]=='S' && start[2]=='M') {
240 biDiClass=U_DIR_NON_SPACING_MARK;
57a6839d
A
241 } else if(start[0]=='F' && start[1]=='S' && start[2]=='I') {
242 biDiClass=U_FIRST_STRONG_ISOLATE;
729e4ab9
A
243 }
244 // Now we verify that the class name is terminated properly,
245 // and not just the start of a longer word.
246 int8_t biDiClassNameLength=biDiClassNameLengths[biDiClass];
247 char c=start[biDiClassNameLength];
57a6839d
A
248 if(biDiClass<U_CHAR_DIRECTION_COUNT && (U_IS_INV_WHITESPACE(c) || c==';' || c==0)) {
249 inputString.append(charFromBiDiClass[biDiClass]);
250 start+=biDiClassNameLength;
251 continue;
729e4ab9 252 }
57a6839d
A
253 errln("\nError on line %d: BiDi class string not recognized at %s", (int)lineNumber, start);
254 printErrorLine();
255 return FALSE;
729e4ab9
A
256 }
257 return TRUE;
258}
259
260void BiDiConformanceTest::TestBidiTest() {
261 IcuTestErrorCode errorCode(*this, "TestBidiTest");
262 const char *sourceTestDataPath=getSourceTestData(errorCode);
0f5d89e8 263 if(errorCode.errIfFailureAndReset("unable to find the source/test/testdata "
729e4ab9
A
264 "folder (getSourceTestData())")) {
265 return;
266 }
267 char bidiTestPath[400];
268 strcpy(bidiTestPath, sourceTestDataPath);
269 strcat(bidiTestPath, "BidiTest.txt");
270 LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r"));
271 if(bidiTestFile.isNull()) {
272 errln("unable to open %s", bidiTestPath);
273 return;
274 }
275 LocalUBiDiPointer ubidi(ubidi_open());
276 ubidi_setClassCallback(ubidi.getAlias(), biDiConfUBiDiClassCallback, NULL,
277 NULL, NULL, errorCode);
0f5d89e8 278 if(errorCode.errIfFailureAndReset("ubidi_setClassCallback()")) {
729e4ab9
A
279 return;
280 }
281 lineNumber=0;
282 levelsCount=0;
283 orderingCount=0;
284 errorCount=0;
b331163b
A
285 // paraLevelName must be initialized in case the first non-comment line is in error
286 paraLevelName="N/A";
729e4ab9
A
287 while(errorCount<10 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias())!=NULL) {
288 ++lineNumber;
289 // Remove trailing comments and whitespace.
290 char *commentStart=strchr(line, '#');
291 if(commentStart!=NULL) {
292 *commentStart=0;
293 }
294 u_rtrim(line);
295 const char *start=u_skipWhitespace(line);
296 if(*start==0) {
297 continue; // Skip empty and comment-only lines.
298 }
299 if(*start=='@') {
300 ++start;
301 if(0==strncmp(start, "Levels:", 7)) {
57a6839d
A
302 start+=7;
303 if(!parseLevels(start)) {
729e4ab9
A
304 return;
305 }
306 } else if(0==strncmp(start, "Reorder:", 8)) {
307 if(!parseOrdering(start+8)) {
308 return;
309 }
310 }
311 // Skip unknown @Xyz: ...
312 } else {
313 if(!parseInputStringFromBiDiClasses(start)) {
314 return;
315 }
316 start=u_skipWhitespace(start);
317 if(*start!=';') {
318 errln("missing ; separator on input line %s", line);
319 return;
320 }
321 start=u_skipWhitespace(start+1);
322 char *end;
323 uint32_t bitset=(uint32_t)strtoul(start, &end, 16);
324 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0)) {
325 errln("input bitset parse error at %s", start);
326 return;
327 }
328 // Loop over the bitset.
329 static const UBiDiLevel paraLevels[]={ UBIDI_DEFAULT_LTR, 0, 1, UBIDI_DEFAULT_RTL };
330 static const char *const paraLevelNames[]={ "auto/LTR", "LTR", "RTL", "auto/RTL" };
331 for(int i=0; i<=3; ++i) {
332 if(bitset&(1<<i)) {
333 ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.length(),
334 paraLevels[i], NULL, errorCode);
335 const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode);
0f5d89e8 336 if(errorCode.errIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()")) {
729e4ab9
A
337 errln("Input line %d: %s", (int)lineNumber, line);
338 return;
339 }
57a6839d
A
340 paraLevelName=paraLevelNames[i];
341 if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()))) {
729e4ab9
A
342 // continue outerLoop; does not exist in C++
343 // so just break out of the inner loop.
344 break;
345 }
57a6839d 346 if(!checkOrdering(ubidi.getAlias())) {
729e4ab9
A
347 // continue outerLoop; does not exist in C++
348 // so just break out of the inner loop.
349 break;
350 }
351 }
352 }
353 }
354 }
355}
356
57a6839d
A
357/*
358*******************************************************************************
359*
360* created on: 2013jul01
361* created by: Matitiahu Allouche
362
363This function performs a conformance test for implementations of the
364Unicode Bidirectional Algorithm, specified in UAX #9: Unicode
365Bidirectional Algorithm, at http://www.unicode.org/unicode/reports/tr9/
366
367Each test case is represented in a single line which is read from a file
368named BidiCharacter.txt. Empty, blank and comment lines may also appear
369in this file.
370
371The format of the test data is specified below. Note that each test
372case constitutes a single line of text; reordering is applied within a
373single line and independently of a rendering engine, and rules L3 and L4
374are out of scope.
375
376The number sign '#' is the comment character: everything is ignored from
377the occurrence of '#' until the end of the line,
378Empty lines and lines containing only spaces and/or comments are ignored.
379
380Lines which represent test cases consist of 4 or 5 fields separated by a
381semicolon. Each field consists of tokens separated by whitespace (space
382or Tab). Whitespace before and after semicolons is optional.
383
384Field 0: A sequence of hexadecimal code point values separated by space
385
386Field 1: A value representing the paragraph direction, as follows:
387 - 0 represents left-to-right
388 - 1 represents right-to-left
389 - 2 represents auto-LTR according to rules P2 and P3 of the algorithm
390 - 3 represents auto-RTL according to rules P2 and P3 of the algorithm
391 - a negative number whose absolute value is taken as paragraph level;
392 this may be useful to test cases where the embedding level approaches
393 or exceeds the maximum embedding level.
394
395Field 2: The resolved paragraph embedding level. If the input (field 0)
396 includes more than one paragraph, this field represents the
397 resolved level of the first paragraph.
398
399Field 3: An ordered list of resulting levels for each token in field 0
400 (each token represents one source character).
401 The UBA does not assign levels to certain characters (e.g. LRO);
402 characters removed in rule X9 are indicated with an 'x'.
403
404Field 4: An ordered list of indices showing the resulting visual ordering
405 from left to right; characters with a resolved level of 'x' are
406 skipped. The number are zero-based. Each index corresponds to
407 a character in the reordered (visual) string. It represents the
408 index of the source character in the input (field 0).
409 This field is optional. When it is absent, the visual ordering
410 is not verified.
411
412Examples:
413
414# This is a comment line.
415L L ON R ; 0 ; 0 ; 0 0 0 1 ; 0 1 2 3
416L L ON R;0;0;0 0 0 1;0 1 2 3
417
418# Note: in the next line, 'B' represents a block separator, not the letter 'B'.
419LRE A B C PDF;2;0;x 2 0 0 x;1 2 3
420# Note: in the next line, 'b' represents the letter 'b', not a block separator.
421a b c 05d0 05d1 x ; 0 ; 0 ; 0 0 0 1 1 0 ; 0 1 2 4 3 5
422
423a R R x ; 1 ; 1 ; 2 1 1 2
424L L R R R B R R L L L B ON ON ; 3 ; 0 ; 0 0 1 1 1 0 1 1 2 2 2 1 1 1
425
426*
427*******************************************************************************
428*/
2ca993e8
A
429enum { kMaxUtxt = 32, kMaxUctl = 16 };
430
57a6839d
A
431void BiDiConformanceTest::TestBidiCharacterTest() {
432 IcuTestErrorCode errorCode(*this, "TestBidiCharacterTest");
433 const char *sourceTestDataPath=getSourceTestData(errorCode);
0f5d89e8 434 if(errorCode.errIfFailureAndReset("unable to find the source/test/testdata "
57a6839d
A
435 "folder (getSourceTestData())")) {
436 return;
437 }
438 char bidiTestPath[400];
439 strcpy(bidiTestPath, sourceTestDataPath);
440 strcat(bidiTestPath, "BidiCharacterTest.txt");
441 LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r"));
442 if(bidiTestFile.isNull()) {
443 errln("unable to open %s", bidiTestPath);
444 return;
445 }
446 LocalUBiDiPointer ubidi(ubidi_open());
447 lineNumber=0;
448 levelsCount=0;
449 orderingCount=0;
450 errorCount=0;
451 while(errorCount<20 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias())!=NULL) {
452 ++lineNumber;
453 paraLevelName="N/A";
454 inputString="N/A";
455 // Remove trailing comments and whitespace.
456 char *commentStart=strchr(line, '#');
457 if(commentStart!=NULL) {
458 *commentStart=0;
459 }
460 u_rtrim(line);
461 const char *start=u_skipWhitespace(line);
462 if(*start==0) {
463 continue; // Skip empty and comment-only lines.
464 }
465 // Parse the code point string in field 0.
466 UChar *buffer=inputString.getBuffer(200);
467 int32_t length=u_parseString(start, buffer, inputString.getCapacity(), NULL, errorCode);
0f5d89e8 468 if(errorCode.errIfFailureAndReset("Invalid string in field 0")) {
57a6839d
A
469 errln("Input line %d: %s", (int)lineNumber, line);
470 inputString.remove();
471 continue;
472 }
473 inputString.releaseBuffer(length);
474 start=strchr(start, ';');
475 if(start==NULL) {
476 errorCount++;
477 errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line);
478 continue;
479 }
480 start=u_skipWhitespace(start+1);
481 char *end;
482 int32_t paraDirection=(int32_t)strtol(start, &end, 10);
483 UBiDiLevel paraLevel=UBIDI_MAX_EXPLICIT_LEVEL+2;
484 if(paraDirection==0) {
485 paraLevel=0;
486 paraLevelName="LTR";
487 }
488 else if(paraDirection==1) {
489 paraLevel=1;
490 paraLevelName="RTL";
491 }
492 else if(paraDirection==2) {
493 paraLevel=UBIDI_DEFAULT_LTR;
494 paraLevelName="Auto/LTR";
495 }
496 else if(paraDirection==3) {
497 paraLevel=UBIDI_DEFAULT_RTL;
498 paraLevelName="Auto/RTL";
499 }
500 else if(paraDirection<0 && -paraDirection<=(UBIDI_MAX_EXPLICIT_LEVEL+1)) {
501 paraLevel=(UBiDiLevel)(-paraDirection);
502 sprintf(levelNameString, "%d", (int)paraLevel);
503 paraLevelName=levelNameString;
504 }
505 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) ||
506 paraLevel==(UBIDI_MAX_EXPLICIT_LEVEL+2)) {
507 errln("\nError on line %d: Input paragraph direction incorrect at %s", (int)lineNumber, start);
508 printErrorLine();
509 continue;
510 }
511 start=u_skipWhitespace(end);
512 if(*start!=';') {
513 errorCount++;
514 errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line);
515 continue;
516 }
517 start++;
518 uint32_t resolvedParaLevel=(uint32_t)strtoul(start, &end, 10);
519 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) ||
520 resolvedParaLevel>1) {
521 errln("\nError on line %d: Resolved paragraph level incorrect at %s", (int)lineNumber, start);
522 printErrorLine();
523 continue;
524 }
525 start=u_skipWhitespace(end);
526 if(*start!=';') {
527 errorCount++;
528 errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line);
529 return;
530 }
531 start++;
532 if(!parseLevels(start)) {
533 continue;
534 }
535 start=u_skipWhitespace(start);
536 if(*start==';') {
537 if(!parseOrdering(start+1)) {
538 continue;
539 }
540 }
541 else
542 orderingCount=-1;
543
544 ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.length(),
545 paraLevel, NULL, errorCode);
546 const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode);
0f5d89e8 547 if(errorCode.errIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()")) {
57a6839d
A
548 errln("Input line %d: %s", (int)lineNumber, line);
549 continue;
550 }
551 UBiDiLevel actualLevel;
552 if((actualLevel=ubidi_getParaLevel(ubidi.getAlias()))!=resolvedParaLevel) {
553 printErrorLine();
554 errln("\nError on line %d: Wrong resolved paragraph level; expected %d actual %d",
555 (int)lineNumber, resolvedParaLevel, actualLevel);
556 continue;
557 }
558 if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()))) {
559 continue;
560 }
561 if(orderingCount>=0 && !checkOrdering(ubidi.getAlias())) {
562 continue;
563 }
2ca993e8
A
564
565 // tests for ubidi_setParaWithControls
566 // skip 2 tests known not to work (out of 91678 cases, though
567 // only 86 of those tests use controls so 2.3% of those failing),
568 // still investigating these
f3c0d7a5 569 if (lineNumber>=212 && lineNumber<=213) {
2ca993e8
A
570 continue;
571 }
572
573 const UChar* ubufPtr = inputString.getBuffer();
574 int32_t ubufIdx;
575 UChar utxt[kMaxUtxt];
576 UBiDiLevel ulev[kMaxUtxt];
577 int32_t offsets[kMaxUctl];
578 UChar* uctlPtrs[kMaxUctl];
579 UChar uctl[kMaxUctl][5];
580 UChar *uctlPtr;
581 int32_t utxtLen = 0, offsetsLen = 0, ctlLen = 0;
582 UBool fail = FALSE;
583 for (ubufIdx = 0; ubufIdx < inputString.length(); ubufIdx++) {
584 UChar uc = ubufPtr[ubufIdx];
585 if ( (uc >=0x202A && uc<=0x202E) || (uc >=0x2066 && uc<=0x2069) ) {
586 // have a bidi control
587 if (ctlLen >= 4) {
588 fail = TRUE; break;
589 }
590 if (ctlLen == 0) {
591 // starting a new control sequence
592 if (offsetsLen >= kMaxUctl) {
593 fail = TRUE; break;
594 }
595 offsets[offsetsLen] = utxtLen;
596 uctlPtr = &uctl[offsetsLen][0];
597 uctlPtrs[offsetsLen] = uctlPtr;
598 offsetsLen++;
599 }
600 uctlPtr[ctlLen++] = uc;
601 uctlPtr[ctlLen] = 0;
602 } else {
603 if (utxtLen >= kMaxUtxt) {
604 fail = TRUE; break;
605 }
606 ctlLen = 0;
607 utxt[utxtLen] = uc;
608 levels[utxtLen] = levels[ubufIdx]; // will always have ubufIdx >= utxtLen so this is OK
609 utxtLen++;
610 }
611 }
612 levelsCount = utxtLen;
613 if (fail) {
614 logln("Skipping BidiCharacterTest unsuitable for ubidi_setParaWithControls: %d: %s", (int)lineNumber, line);
615 continue; // can't use this test
616 }
617 if (offsetsLen > 0 && offsets[offsetsLen-1] >= utxtLen) {
618 --offsetsLen;
619 ubidi_setContext(ubidi.getAlias(), NULL, 0, uctlPtrs[offsetsLen], -1, errorCode);
620 } else {
621 ubidi_setContext(ubidi.getAlias(), NULL, 0, NULL, 0, errorCode);
622 }
623 ubidi_setParaWithControls(ubidi.getAlias(), utxt, utxtLen, paraLevel,
624 offsets, offsetsLen, NULL, uctlPtrs, errorCode);
625 actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode);
0f5d89e8 626 if(errorCode.errIfFailureAndReset("ubidi_setContext()/ubidi_setParaWithControls()/ubidi_getLevels()")) {
2ca993e8
A
627 errln("Input line %d: %s", (int)lineNumber, line);
628 continue;
629 }
630 if((actualLevel=ubidi_getParaLevel(ubidi.getAlias()))!=resolvedParaLevel) {
631 printErrorLine();
632 errln("\nError on line %d: Wrong resolved paragraph level from ubidi_setParaWithControls; expected %d actual %d",
633 (int)lineNumber, resolvedParaLevel, actualLevel);
634 continue;
635 }
636 if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()))) {
637 continue;
638 }
57a6839d
A
639 }
640}
641
729e4ab9
A
642static UChar printLevel(UBiDiLevel level) {
643 if(level<UBIDI_DEFAULT_LTR) {
644 return 0x30+level;
645 } else {
646 return 0x78; // 'x'
647 }
648}
649
650static uint32_t getDirectionBits(const UBiDiLevel actualLevels[], int32_t actualCount) {
651 uint32_t actualDirectionBits=0;
652 for(int32_t i=0; i<actualCount; ++i) {
653 actualDirectionBits|=(1<<(actualLevels[i]&1));
654 }
655 return actualDirectionBits;
656}
657
57a6839d 658UBool BiDiConformanceTest::checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount) {
729e4ab9
A
659 UBool isOk=TRUE;
660 if(levelsCount!=actualCount) {
57a6839d
A
661 errln("\nError on line %d: Wrong number of level values; expected %d actual %d",
662 (int)lineNumber, (int)levelsCount, (int)actualCount);
729e4ab9
A
663 isOk=FALSE;
664 } else {
665 for(int32_t i=0; i<actualCount; ++i) {
666 if(levels[i]!=actualLevels[i] && levels[i]<UBIDI_DEFAULT_LTR) {
667 if(directionBits!=3 && directionBits==getDirectionBits(actualLevels, actualCount)) {
668 // ICU used a shortcut:
669 // Since the text is unidirectional, it did not store the resolved
670 // levels but just returns all levels as the paragraph level 0 or 1.
671 // The reordering result is the same, so this is fine.
672 break;
673 } else {
57a6839d
A
674 errln("\nError on line %d: Wrong level value at index %d; expected %d actual %d",
675 (int)lineNumber, (int)i, levels[i], actualLevels[i]);
729e4ab9
A
676 isOk=FALSE;
677 break;
678 }
679 }
680 }
681 }
682 if(!isOk) {
57a6839d 683 printErrorLine();
729e4ab9
A
684 UnicodeString els("Expected levels: ");
685 int32_t i;
686 for(i=0; i<levelsCount; ++i) {
687 els.append((UChar)0x20).append(printLevel(levels[i]));
688 }
689 UnicodeString als("Actual levels: ");
690 for(i=0; i<actualCount; ++i) {
691 als.append((UChar)0x20).append(printLevel(actualLevels[i]));
692 }
693 errln(els);
694 errln(als);
695 }
696 return isOk;
697}
698
699// Note: ubidi_setReorderingOptions(ubidi, UBIDI_OPTION_REMOVE_CONTROLS);
700// does not work for custom BiDi class assignments
701// and anyway also removes LRM/RLM/ZWJ/ZWNJ which is not desirable here.
702// Therefore we just skip the indexes for BiDi controls while comparing
703// with the expected ordering that has them omitted.
57a6839d 704UBool BiDiConformanceTest::checkOrdering(UBiDi *ubidi) {
729e4ab9 705 UBool isOk=TRUE;
57a6839d 706 IcuTestErrorCode errorCode(*this, "checkOrdering()");
729e4ab9
A
707 int32_t resultLength=ubidi_getResultLength(ubidi); // visual length including BiDi controls
708 int32_t i, visualIndex;
709 // Note: It should be faster to call ubidi_countRuns()/ubidi_getVisualRun()
710 // and loop over each run's indexes, but that seems unnecessary for this test code.
711 for(i=visualIndex=0; i<resultLength; ++i) {
712 int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode);
0f5d89e8 713 if(errorCode.errIfFailureAndReset("ubidi_getLogicalIndex()")) {
729e4ab9
A
714 errln("Input line %d: %s", (int)lineNumber, line);
715 return FALSE;
716 }
717 if(levels[logicalIndex]>=UBIDI_DEFAULT_LTR) {
718 continue; // BiDi control, omitted from expected ordering.
719 }
720 if(visualIndex<orderingCount && logicalIndex!=ordering[visualIndex]) {
57a6839d
A
721 errln("\nError on line %d: Wrong ordering value at visual index %d; expected %d actual %d",
722 (int)lineNumber, (int)visualIndex, ordering[visualIndex], logicalIndex);
729e4ab9
A
723 isOk=FALSE;
724 break;
725 }
726 ++visualIndex;
727 }
728 // visualIndex is now the visual length minus the BiDi controls,
729 // which should match the length of the BidiTest.txt ordering.
730 if(isOk && orderingCount!=visualIndex) {
57a6839d
A
731 errln("\nError on line %d: Wrong number of ordering values; expected %d actual %d",
732 (int)lineNumber, (int)orderingCount, (int)visualIndex);
729e4ab9
A
733 isOk=FALSE;
734 }
735 if(!isOk) {
57a6839d 736 printErrorLine();
729e4ab9
A
737 UnicodeString eord("Expected ordering: ");
738 for(i=0; i<orderingCount; ++i) {
739 eord.append((UChar)0x20).append((UChar)(0x30+ordering[i]));
740 }
741 UnicodeString aord("Actual ordering: ");
742 for(i=0; i<resultLength; ++i) {
743 int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode);
744 if(levels[logicalIndex]<UBIDI_DEFAULT_LTR) {
745 aord.append((UChar)0x20).append((UChar)(0x30+logicalIndex));
746 }
747 }
748 errln(eord);
749 errln(aord);
750 }
751 return isOk;
752}
753
57a6839d 754void BiDiConformanceTest::printErrorLine() {
729e4ab9
A
755 ++errorCount;
756 errln("Input line %5d: %s", (int)lineNumber, line);
757 errln(UnicodeString("Input string: ")+inputString);
758 errln("Para level: %s", paraLevelName);
759}