]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/intltest/bidiconf.cpp
ICU-57166.0.1.tar.gz
[apple/icu.git] / icuSources / test / intltest / bidiconf.cpp
CommitLineData
729e4ab9
A
1/*
2*******************************************************************************
3*
b331163b 4* Copyright (C) 2009-2014, International Business Machines
729e4ab9
A
5* Corporation and others. All Rights Reserved.
6*
7*******************************************************************************
8* file name: bidiconf.cpp
9* encoding: US-ASCII
10* tab size: 8 (not used)
11* indentation:4
12*
13* created on: 2009oct16
14* created by: Markus W. Scherer
15*
57a6839d 16* BiDi conformance test, using the Unicode BidiTest.txt and BidiCharacterTest.txt files.
729e4ab9
A
17*/
18
19#include <stdio.h>
20#include <stdlib.h>
21#include <string.h>
22#include "unicode/utypes.h"
23#include "unicode/ubidi.h"
24#include "unicode/errorcode.h"
25#include "unicode/localpointer.h"
26#include "unicode/putil.h"
27#include "unicode/unistr.h"
28#include "intltest.h"
29#include "uparse.h"
30
31class BiDiConformanceTest : public IntlTest {
32public:
33 BiDiConformanceTest() :
34 directionBits(0), lineNumber(0), levelsCount(0), orderingCount(0),
35 errorCount(0) {}
36
37 void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
38
39 void TestBidiTest();
57a6839d 40 void TestBidiCharacterTest();
729e4ab9 41private:
57a6839d 42 UBool parseLevels(const char *&start);
729e4ab9
A
43 UBool parseOrdering(const char *start);
44 UBool parseInputStringFromBiDiClasses(const char *&start);
45
57a6839d
A
46 UBool checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount);
47 UBool checkOrdering(UBiDi *ubidi);
729e4ab9 48
57a6839d 49 void printErrorLine();
729e4ab9
A
50
51 char line[10000];
52 UBiDiLevel levels[1000];
53 uint32_t directionBits;
54 int32_t ordering[1000];
55 int32_t lineNumber;
56 int32_t levelsCount;
57 int32_t orderingCount;
58 int32_t errorCount;
59 UnicodeString inputString;
57a6839d
A
60 const char *paraLevelName;
61 char levelNameString[12];
729e4ab9
A
62};
63
64extern IntlTest *createBiDiConformanceTest() {
65 return new BiDiConformanceTest();
66}
67
68void BiDiConformanceTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
69 if(exec) {
70 logln("TestSuite BiDiConformanceTest: ");
71 }
57a6839d
A
72 TESTCASE_AUTO_BEGIN;
73 TESTCASE_AUTO(TestBidiTest);
74 TESTCASE_AUTO(TestBidiCharacterTest);
75 TESTCASE_AUTO_END;
729e4ab9
A
76}
77
729e4ab9
A
78U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose);
79
57a6839d 80UBool BiDiConformanceTest::parseLevels(const char *&start) {
729e4ab9
A
81 directionBits=0;
82 levelsCount=0;
57a6839d 83 while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') {
729e4ab9
A
84 if(*start=='x') {
85 levels[levelsCount++]=UBIDI_DEFAULT_LTR;
86 ++start;
87 } else {
88 char *end;
89 uint32_t value=(uint32_t)strtoul(start, &end, 10);
57a6839d
A
90 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0 && *end!=';')
91 || value>(UBIDI_MAX_EXPLICIT_LEVEL+1)) {
92 errln("\nError on line %d: Levels parse error at %s", (int)lineNumber, start);
93 printErrorLine();
729e4ab9
A
94 return FALSE;
95 }
96 levels[levelsCount++]=(UBiDiLevel)value;
97 directionBits|=(1<<(value&1));
98 start=end;
99 }
100 }
101 return TRUE;
102}
103
104UBool BiDiConformanceTest::parseOrdering(const char *start) {
105 orderingCount=0;
57a6839d 106 while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') {
729e4ab9
A
107 char *end;
108 uint32_t value=(uint32_t)strtoul(start, &end, 10);
57a6839d
A
109 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=0 && *end!=';') || value>=1000) {
110 errln("\nError on line %d: Reorder parse error at %s", (int)lineNumber, start);
111 printErrorLine();
729e4ab9
A
112 return FALSE;
113 }
114 ordering[orderingCount++]=(int32_t)value;
115 start=end;
116 }
117 return TRUE;
118}
119
120static const UChar charFromBiDiClass[U_CHAR_DIRECTION_COUNT]={
121 0x6c, // 'l' for L
122 0x52, // 'R' for R
123 0x33, // '3' for EN
124 0x2d, // '-' for ES
125 0x25, // '%' for ET
126 0x39, // '9' for AN
127 0x2c, // ',' for CS
128 0x2f, // '/' for B
129 0x5f, // '_' for S
130 0x20, // ' ' for WS
131 0x3d, // '=' for ON
132 0x65, // 'e' for LRE
133 0x6f, // 'o' for LRO
134 0x41, // 'A' for AL
135 0x45, // 'E' for RLE
136 0x4f, // 'O' for RLO
137 0x2a, // '*' for PDF
138 0x60, // '`' for NSM
57a6839d
A
139 0x7c, // '|' for BN
140 // new in Unicode 6.3/ICU 52
141 0x53, // 'S' for FSI
142 0x69, // 'i' for LRI
143 0x49, // 'I' for RLI
144 0x2e // '.' for PDI
729e4ab9
A
145};
146
147U_CDECL_BEGIN
148
149static UCharDirection U_CALLCONV
150biDiConfUBiDiClassCallback(const void * /*context*/, UChar32 c) {
151 for(int i=0; i<U_CHAR_DIRECTION_COUNT; ++i) {
152 if(c==charFromBiDiClass[i]) {
153 return (UCharDirection)i;
154 }
155 }
156 // Character not in our hardcoded table.
157 // Should not occur during testing.
158 return U_BIDI_CLASS_DEFAULT;
159}
160
161U_CDECL_END
162
163static const int8_t biDiClassNameLengths[U_CHAR_DIRECTION_COUNT+1]={
57a6839d 164 1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 3, 3, 2, 3, 3, 3, 3, 2, 3, 3, 3, 3, 0
729e4ab9
A
165};
166
167UBool BiDiConformanceTest::parseInputStringFromBiDiClasses(const char *&start) {
168 inputString.remove();
169 /*
170 * Lengthy but fast BiDi class parser.
171 * A simple parser could terminate or extract the name string and use
172 * int32_t biDiClassInt=u_getPropertyValueEnum(UCHAR_BIDI_CLASS, bidiClassString);
173 * but that makes this test take significantly more time.
174 */
175 while(*start!=0 && *(start=u_skipWhitespace(start))!=0 && *start!=';') {
176 UCharDirection biDiClass=U_CHAR_DIRECTION_COUNT;
177 // Compare each character once until we have a match on
178 // a complete, short BiDi class name.
179 if(start[0]=='L') {
180 if(start[1]=='R') {
181 if(start[2]=='E') {
182 biDiClass=U_LEFT_TO_RIGHT_EMBEDDING;
57a6839d
A
183 } else if(start[2]=='I') {
184 biDiClass=U_LEFT_TO_RIGHT_ISOLATE;
729e4ab9
A
185 } else if(start[2]=='O') {
186 biDiClass=U_LEFT_TO_RIGHT_OVERRIDE;
187 }
188 } else {
189 biDiClass=U_LEFT_TO_RIGHT;
190 }
191 } else if(start[0]=='R') {
192 if(start[1]=='L') {
193 if(start[2]=='E') {
194 biDiClass=U_RIGHT_TO_LEFT_EMBEDDING;
57a6839d
A
195 } else if(start[2]=='I') {
196 biDiClass=U_RIGHT_TO_LEFT_ISOLATE;
729e4ab9
A
197 } else if(start[2]=='O') {
198 biDiClass=U_RIGHT_TO_LEFT_OVERRIDE;
199 }
200 } else {
201 biDiClass=U_RIGHT_TO_LEFT;
202 }
203 } else if(start[0]=='E') {
204 if(start[1]=='N') {
205 biDiClass=U_EUROPEAN_NUMBER;
206 } else if(start[1]=='S') {
207 biDiClass=U_EUROPEAN_NUMBER_SEPARATOR;
208 } else if(start[1]=='T') {
209 biDiClass=U_EUROPEAN_NUMBER_TERMINATOR;
210 }
211 } else if(start[0]=='A') {
212 if(start[1]=='L') {
213 biDiClass=U_RIGHT_TO_LEFT_ARABIC;
214 } else if(start[1]=='N') {
215 biDiClass=U_ARABIC_NUMBER;
216 }
217 } else if(start[0]=='C' && start[1]=='S') {
218 biDiClass=U_COMMON_NUMBER_SEPARATOR;
219 } else if(start[0]=='B') {
220 if(start[1]=='N') {
221 biDiClass=U_BOUNDARY_NEUTRAL;
222 } else {
223 biDiClass=U_BLOCK_SEPARATOR;
224 }
225 } else if(start[0]=='S') {
226 biDiClass=U_SEGMENT_SEPARATOR;
227 } else if(start[0]=='W' && start[1]=='S') {
228 biDiClass=U_WHITE_SPACE_NEUTRAL;
229 } else if(start[0]=='O' && start[1]=='N') {
230 biDiClass=U_OTHER_NEUTRAL;
57a6839d
A
231 } else if(start[0]=='P' && start[1]=='D') {
232 if(start[2]=='F') {
233 biDiClass=U_POP_DIRECTIONAL_FORMAT;
234 } else if(start[2]=='I') {
235 biDiClass=U_POP_DIRECTIONAL_ISOLATE;
236 }
729e4ab9
A
237 } else if(start[0]=='N' && start[1]=='S' && start[2]=='M') {
238 biDiClass=U_DIR_NON_SPACING_MARK;
57a6839d
A
239 } else if(start[0]=='F' && start[1]=='S' && start[2]=='I') {
240 biDiClass=U_FIRST_STRONG_ISOLATE;
729e4ab9
A
241 }
242 // Now we verify that the class name is terminated properly,
243 // and not just the start of a longer word.
244 int8_t biDiClassNameLength=biDiClassNameLengths[biDiClass];
245 char c=start[biDiClassNameLength];
57a6839d
A
246 if(biDiClass<U_CHAR_DIRECTION_COUNT && (U_IS_INV_WHITESPACE(c) || c==';' || c==0)) {
247 inputString.append(charFromBiDiClass[biDiClass]);
248 start+=biDiClassNameLength;
249 continue;
729e4ab9 250 }
57a6839d
A
251 errln("\nError on line %d: BiDi class string not recognized at %s", (int)lineNumber, start);
252 printErrorLine();
253 return FALSE;
729e4ab9
A
254 }
255 return TRUE;
256}
257
258void BiDiConformanceTest::TestBidiTest() {
259 IcuTestErrorCode errorCode(*this, "TestBidiTest");
260 const char *sourceTestDataPath=getSourceTestData(errorCode);
261 if(errorCode.logIfFailureAndReset("unable to find the source/test/testdata "
262 "folder (getSourceTestData())")) {
263 return;
264 }
265 char bidiTestPath[400];
266 strcpy(bidiTestPath, sourceTestDataPath);
267 strcat(bidiTestPath, "BidiTest.txt");
268 LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r"));
269 if(bidiTestFile.isNull()) {
270 errln("unable to open %s", bidiTestPath);
271 return;
272 }
273 LocalUBiDiPointer ubidi(ubidi_open());
274 ubidi_setClassCallback(ubidi.getAlias(), biDiConfUBiDiClassCallback, NULL,
275 NULL, NULL, errorCode);
276 if(errorCode.logIfFailureAndReset("ubidi_setClassCallback()")) {
277 return;
278 }
279 lineNumber=0;
280 levelsCount=0;
281 orderingCount=0;
282 errorCount=0;
b331163b
A
283 // paraLevelName must be initialized in case the first non-comment line is in error
284 paraLevelName="N/A";
729e4ab9
A
285 while(errorCount<10 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias())!=NULL) {
286 ++lineNumber;
287 // Remove trailing comments and whitespace.
288 char *commentStart=strchr(line, '#');
289 if(commentStart!=NULL) {
290 *commentStart=0;
291 }
292 u_rtrim(line);
293 const char *start=u_skipWhitespace(line);
294 if(*start==0) {
295 continue; // Skip empty and comment-only lines.
296 }
297 if(*start=='@') {
298 ++start;
299 if(0==strncmp(start, "Levels:", 7)) {
57a6839d
A
300 start+=7;
301 if(!parseLevels(start)) {
729e4ab9
A
302 return;
303 }
304 } else if(0==strncmp(start, "Reorder:", 8)) {
305 if(!parseOrdering(start+8)) {
306 return;
307 }
308 }
309 // Skip unknown @Xyz: ...
310 } else {
311 if(!parseInputStringFromBiDiClasses(start)) {
312 return;
313 }
314 start=u_skipWhitespace(start);
315 if(*start!=';') {
316 errln("missing ; separator on input line %s", line);
317 return;
318 }
319 start=u_skipWhitespace(start+1);
320 char *end;
321 uint32_t bitset=(uint32_t)strtoul(start, &end, 16);
322 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0)) {
323 errln("input bitset parse error at %s", start);
324 return;
325 }
326 // Loop over the bitset.
327 static const UBiDiLevel paraLevels[]={ UBIDI_DEFAULT_LTR, 0, 1, UBIDI_DEFAULT_RTL };
328 static const char *const paraLevelNames[]={ "auto/LTR", "LTR", "RTL", "auto/RTL" };
329 for(int i=0; i<=3; ++i) {
330 if(bitset&(1<<i)) {
331 ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.length(),
332 paraLevels[i], NULL, errorCode);
333 const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode);
334 if(errorCode.logIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()")) {
335 errln("Input line %d: %s", (int)lineNumber, line);
336 return;
337 }
57a6839d
A
338 paraLevelName=paraLevelNames[i];
339 if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()))) {
729e4ab9
A
340 // continue outerLoop; does not exist in C++
341 // so just break out of the inner loop.
342 break;
343 }
57a6839d 344 if(!checkOrdering(ubidi.getAlias())) {
729e4ab9
A
345 // continue outerLoop; does not exist in C++
346 // so just break out of the inner loop.
347 break;
348 }
349 }
350 }
351 }
352 }
353}
354
57a6839d
A
355/*
356*******************************************************************************
357*
358* created on: 2013jul01
359* created by: Matitiahu Allouche
360
361This function performs a conformance test for implementations of the
362Unicode Bidirectional Algorithm, specified in UAX #9: Unicode
363Bidirectional Algorithm, at http://www.unicode.org/unicode/reports/tr9/
364
365Each test case is represented in a single line which is read from a file
366named BidiCharacter.txt. Empty, blank and comment lines may also appear
367in this file.
368
369The format of the test data is specified below. Note that each test
370case constitutes a single line of text; reordering is applied within a
371single line and independently of a rendering engine, and rules L3 and L4
372are out of scope.
373
374The number sign '#' is the comment character: everything is ignored from
375the occurrence of '#' until the end of the line,
376Empty lines and lines containing only spaces and/or comments are ignored.
377
378Lines which represent test cases consist of 4 or 5 fields separated by a
379semicolon. Each field consists of tokens separated by whitespace (space
380or Tab). Whitespace before and after semicolons is optional.
381
382Field 0: A sequence of hexadecimal code point values separated by space
383
384Field 1: A value representing the paragraph direction, as follows:
385 - 0 represents left-to-right
386 - 1 represents right-to-left
387 - 2 represents auto-LTR according to rules P2 and P3 of the algorithm
388 - 3 represents auto-RTL according to rules P2 and P3 of the algorithm
389 - a negative number whose absolute value is taken as paragraph level;
390 this may be useful to test cases where the embedding level approaches
391 or exceeds the maximum embedding level.
392
393Field 2: The resolved paragraph embedding level. If the input (field 0)
394 includes more than one paragraph, this field represents the
395 resolved level of the first paragraph.
396
397Field 3: An ordered list of resulting levels for each token in field 0
398 (each token represents one source character).
399 The UBA does not assign levels to certain characters (e.g. LRO);
400 characters removed in rule X9 are indicated with an 'x'.
401
402Field 4: An ordered list of indices showing the resulting visual ordering
403 from left to right; characters with a resolved level of 'x' are
404 skipped. The number are zero-based. Each index corresponds to
405 a character in the reordered (visual) string. It represents the
406 index of the source character in the input (field 0).
407 This field is optional. When it is absent, the visual ordering
408 is not verified.
409
410Examples:
411
412# This is a comment line.
413L L ON R ; 0 ; 0 ; 0 0 0 1 ; 0 1 2 3
414L L ON R;0;0;0 0 0 1;0 1 2 3
415
416# Note: in the next line, 'B' represents a block separator, not the letter 'B'.
417LRE A B C PDF;2;0;x 2 0 0 x;1 2 3
418# Note: in the next line, 'b' represents the letter 'b', not a block separator.
419a b c 05d0 05d1 x ; 0 ; 0 ; 0 0 0 1 1 0 ; 0 1 2 4 3 5
420
421a R R x ; 1 ; 1 ; 2 1 1 2
422L L R R R B R R L L L B ON ON ; 3 ; 0 ; 0 0 1 1 1 0 1 1 2 2 2 1 1 1
423
424*
425*******************************************************************************
426*/
2ca993e8
A
427enum { kMaxUtxt = 32, kMaxUctl = 16 };
428
57a6839d
A
429void BiDiConformanceTest::TestBidiCharacterTest() {
430 IcuTestErrorCode errorCode(*this, "TestBidiCharacterTest");
431 const char *sourceTestDataPath=getSourceTestData(errorCode);
432 if(errorCode.logIfFailureAndReset("unable to find the source/test/testdata "
433 "folder (getSourceTestData())")) {
434 return;
435 }
436 char bidiTestPath[400];
437 strcpy(bidiTestPath, sourceTestDataPath);
438 strcat(bidiTestPath, "BidiCharacterTest.txt");
439 LocalStdioFilePointer bidiTestFile(fopen(bidiTestPath, "r"));
440 if(bidiTestFile.isNull()) {
441 errln("unable to open %s", bidiTestPath);
442 return;
443 }
444 LocalUBiDiPointer ubidi(ubidi_open());
445 lineNumber=0;
446 levelsCount=0;
447 orderingCount=0;
448 errorCount=0;
449 while(errorCount<20 && fgets(line, (int)sizeof(line), bidiTestFile.getAlias())!=NULL) {
450 ++lineNumber;
451 paraLevelName="N/A";
452 inputString="N/A";
453 // Remove trailing comments and whitespace.
454 char *commentStart=strchr(line, '#');
455 if(commentStart!=NULL) {
456 *commentStart=0;
457 }
458 u_rtrim(line);
459 const char *start=u_skipWhitespace(line);
460 if(*start==0) {
461 continue; // Skip empty and comment-only lines.
462 }
463 // Parse the code point string in field 0.
464 UChar *buffer=inputString.getBuffer(200);
465 int32_t length=u_parseString(start, buffer, inputString.getCapacity(), NULL, errorCode);
466 if(errorCode.logIfFailureAndReset("Invalid string in field 0")) {
467 errln("Input line %d: %s", (int)lineNumber, line);
468 inputString.remove();
469 continue;
470 }
471 inputString.releaseBuffer(length);
472 start=strchr(start, ';');
473 if(start==NULL) {
474 errorCount++;
475 errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line);
476 continue;
477 }
478 start=u_skipWhitespace(start+1);
479 char *end;
480 int32_t paraDirection=(int32_t)strtol(start, &end, 10);
481 UBiDiLevel paraLevel=UBIDI_MAX_EXPLICIT_LEVEL+2;
482 if(paraDirection==0) {
483 paraLevel=0;
484 paraLevelName="LTR";
485 }
486 else if(paraDirection==1) {
487 paraLevel=1;
488 paraLevelName="RTL";
489 }
490 else if(paraDirection==2) {
491 paraLevel=UBIDI_DEFAULT_LTR;
492 paraLevelName="Auto/LTR";
493 }
494 else if(paraDirection==3) {
495 paraLevel=UBIDI_DEFAULT_RTL;
496 paraLevelName="Auto/RTL";
497 }
498 else if(paraDirection<0 && -paraDirection<=(UBIDI_MAX_EXPLICIT_LEVEL+1)) {
499 paraLevel=(UBiDiLevel)(-paraDirection);
500 sprintf(levelNameString, "%d", (int)paraLevel);
501 paraLevelName=levelNameString;
502 }
503 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) ||
504 paraLevel==(UBIDI_MAX_EXPLICIT_LEVEL+2)) {
505 errln("\nError on line %d: Input paragraph direction incorrect at %s", (int)lineNumber, start);
506 printErrorLine();
507 continue;
508 }
509 start=u_skipWhitespace(end);
510 if(*start!=';') {
511 errorCount++;
512 errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line);
513 continue;
514 }
515 start++;
516 uint32_t resolvedParaLevel=(uint32_t)strtoul(start, &end, 10);
517 if(end<=start || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) ||
518 resolvedParaLevel>1) {
519 errln("\nError on line %d: Resolved paragraph level incorrect at %s", (int)lineNumber, start);
520 printErrorLine();
521 continue;
522 }
523 start=u_skipWhitespace(end);
524 if(*start!=';') {
525 errorCount++;
526 errln("\nError on line %d: Missing ; separator on line: %s", (int)lineNumber, line);
527 return;
528 }
529 start++;
530 if(!parseLevels(start)) {
531 continue;
532 }
533 start=u_skipWhitespace(start);
534 if(*start==';') {
535 if(!parseOrdering(start+1)) {
536 continue;
537 }
538 }
539 else
540 orderingCount=-1;
541
542 ubidi_setPara(ubidi.getAlias(), inputString.getBuffer(), inputString.length(),
543 paraLevel, NULL, errorCode);
544 const UBiDiLevel *actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode);
545 if(errorCode.logIfFailureAndReset("ubidi_setPara() or ubidi_getLevels()")) {
546 errln("Input line %d: %s", (int)lineNumber, line);
547 continue;
548 }
549 UBiDiLevel actualLevel;
550 if((actualLevel=ubidi_getParaLevel(ubidi.getAlias()))!=resolvedParaLevel) {
551 printErrorLine();
552 errln("\nError on line %d: Wrong resolved paragraph level; expected %d actual %d",
553 (int)lineNumber, resolvedParaLevel, actualLevel);
554 continue;
555 }
556 if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()))) {
557 continue;
558 }
559 if(orderingCount>=0 && !checkOrdering(ubidi.getAlias())) {
560 continue;
561 }
2ca993e8
A
562
563 // tests for ubidi_setParaWithControls
564 // skip 2 tests known not to work (out of 91678 cases, though
565 // only 86 of those tests use controls so 2.3% of those failing),
566 // still investigating these
567 if (lineNumber==210 || lineNumber==211) {
568 continue;
569 }
570
571 const UChar* ubufPtr = inputString.getBuffer();
572 int32_t ubufIdx;
573 UChar utxt[kMaxUtxt];
574 UBiDiLevel ulev[kMaxUtxt];
575 int32_t offsets[kMaxUctl];
576 UChar* uctlPtrs[kMaxUctl];
577 UChar uctl[kMaxUctl][5];
578 UChar *uctlPtr;
579 int32_t utxtLen = 0, offsetsLen = 0, ctlLen = 0;
580 UBool fail = FALSE;
581 for (ubufIdx = 0; ubufIdx < inputString.length(); ubufIdx++) {
582 UChar uc = ubufPtr[ubufIdx];
583 if ( (uc >=0x202A && uc<=0x202E) || (uc >=0x2066 && uc<=0x2069) ) {
584 // have a bidi control
585 if (ctlLen >= 4) {
586 fail = TRUE; break;
587 }
588 if (ctlLen == 0) {
589 // starting a new control sequence
590 if (offsetsLen >= kMaxUctl) {
591 fail = TRUE; break;
592 }
593 offsets[offsetsLen] = utxtLen;
594 uctlPtr = &uctl[offsetsLen][0];
595 uctlPtrs[offsetsLen] = uctlPtr;
596 offsetsLen++;
597 }
598 uctlPtr[ctlLen++] = uc;
599 uctlPtr[ctlLen] = 0;
600 } else {
601 if (utxtLen >= kMaxUtxt) {
602 fail = TRUE; break;
603 }
604 ctlLen = 0;
605 utxt[utxtLen] = uc;
606 levels[utxtLen] = levels[ubufIdx]; // will always have ubufIdx >= utxtLen so this is OK
607 utxtLen++;
608 }
609 }
610 levelsCount = utxtLen;
611 if (fail) {
612 logln("Skipping BidiCharacterTest unsuitable for ubidi_setParaWithControls: %d: %s", (int)lineNumber, line);
613 continue; // can't use this test
614 }
615 if (offsetsLen > 0 && offsets[offsetsLen-1] >= utxtLen) {
616 --offsetsLen;
617 ubidi_setContext(ubidi.getAlias(), NULL, 0, uctlPtrs[offsetsLen], -1, errorCode);
618 } else {
619 ubidi_setContext(ubidi.getAlias(), NULL, 0, NULL, 0, errorCode);
620 }
621 ubidi_setParaWithControls(ubidi.getAlias(), utxt, utxtLen, paraLevel,
622 offsets, offsetsLen, NULL, uctlPtrs, errorCode);
623 actualLevels=ubidi_getLevels(ubidi.getAlias(), errorCode);
624 if(errorCode.logIfFailureAndReset("ubidi_setContext()/ubidi_setParaWithControls()/ubidi_getLevels()")) {
625 errln("Input line %d: %s", (int)lineNumber, line);
626 continue;
627 }
628 if((actualLevel=ubidi_getParaLevel(ubidi.getAlias()))!=resolvedParaLevel) {
629 printErrorLine();
630 errln("\nError on line %d: Wrong resolved paragraph level from ubidi_setParaWithControls; expected %d actual %d",
631 (int)lineNumber, resolvedParaLevel, actualLevel);
632 continue;
633 }
634 if(!checkLevels(actualLevels, ubidi_getProcessedLength(ubidi.getAlias()))) {
635 continue;
636 }
57a6839d
A
637 }
638}
639
729e4ab9
A
640static UChar printLevel(UBiDiLevel level) {
641 if(level<UBIDI_DEFAULT_LTR) {
642 return 0x30+level;
643 } else {
644 return 0x78; // 'x'
645 }
646}
647
648static uint32_t getDirectionBits(const UBiDiLevel actualLevels[], int32_t actualCount) {
649 uint32_t actualDirectionBits=0;
650 for(int32_t i=0; i<actualCount; ++i) {
651 actualDirectionBits|=(1<<(actualLevels[i]&1));
652 }
653 return actualDirectionBits;
654}
655
57a6839d 656UBool BiDiConformanceTest::checkLevels(const UBiDiLevel actualLevels[], int32_t actualCount) {
729e4ab9
A
657 UBool isOk=TRUE;
658 if(levelsCount!=actualCount) {
57a6839d
A
659 errln("\nError on line %d: Wrong number of level values; expected %d actual %d",
660 (int)lineNumber, (int)levelsCount, (int)actualCount);
729e4ab9
A
661 isOk=FALSE;
662 } else {
663 for(int32_t i=0; i<actualCount; ++i) {
664 if(levels[i]!=actualLevels[i] && levels[i]<UBIDI_DEFAULT_LTR) {
665 if(directionBits!=3 && directionBits==getDirectionBits(actualLevels, actualCount)) {
666 // ICU used a shortcut:
667 // Since the text is unidirectional, it did not store the resolved
668 // levels but just returns all levels as the paragraph level 0 or 1.
669 // The reordering result is the same, so this is fine.
670 break;
671 } else {
57a6839d
A
672 errln("\nError on line %d: Wrong level value at index %d; expected %d actual %d",
673 (int)lineNumber, (int)i, levels[i], actualLevels[i]);
729e4ab9
A
674 isOk=FALSE;
675 break;
676 }
677 }
678 }
679 }
680 if(!isOk) {
57a6839d 681 printErrorLine();
729e4ab9
A
682 UnicodeString els("Expected levels: ");
683 int32_t i;
684 for(i=0; i<levelsCount; ++i) {
685 els.append((UChar)0x20).append(printLevel(levels[i]));
686 }
687 UnicodeString als("Actual levels: ");
688 for(i=0; i<actualCount; ++i) {
689 als.append((UChar)0x20).append(printLevel(actualLevels[i]));
690 }
691 errln(els);
692 errln(als);
693 }
694 return isOk;
695}
696
697// Note: ubidi_setReorderingOptions(ubidi, UBIDI_OPTION_REMOVE_CONTROLS);
698// does not work for custom BiDi class assignments
699// and anyway also removes LRM/RLM/ZWJ/ZWNJ which is not desirable here.
700// Therefore we just skip the indexes for BiDi controls while comparing
701// with the expected ordering that has them omitted.
57a6839d 702UBool BiDiConformanceTest::checkOrdering(UBiDi *ubidi) {
729e4ab9 703 UBool isOk=TRUE;
57a6839d 704 IcuTestErrorCode errorCode(*this, "checkOrdering()");
729e4ab9
A
705 int32_t resultLength=ubidi_getResultLength(ubidi); // visual length including BiDi controls
706 int32_t i, visualIndex;
707 // Note: It should be faster to call ubidi_countRuns()/ubidi_getVisualRun()
708 // and loop over each run's indexes, but that seems unnecessary for this test code.
709 for(i=visualIndex=0; i<resultLength; ++i) {
710 int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode);
711 if(errorCode.logIfFailureAndReset("ubidi_getLogicalIndex()")) {
712 errln("Input line %d: %s", (int)lineNumber, line);
713 return FALSE;
714 }
715 if(levels[logicalIndex]>=UBIDI_DEFAULT_LTR) {
716 continue; // BiDi control, omitted from expected ordering.
717 }
718 if(visualIndex<orderingCount && logicalIndex!=ordering[visualIndex]) {
57a6839d
A
719 errln("\nError on line %d: Wrong ordering value at visual index %d; expected %d actual %d",
720 (int)lineNumber, (int)visualIndex, ordering[visualIndex], logicalIndex);
729e4ab9
A
721 isOk=FALSE;
722 break;
723 }
724 ++visualIndex;
725 }
726 // visualIndex is now the visual length minus the BiDi controls,
727 // which should match the length of the BidiTest.txt ordering.
728 if(isOk && orderingCount!=visualIndex) {
57a6839d
A
729 errln("\nError on line %d: Wrong number of ordering values; expected %d actual %d",
730 (int)lineNumber, (int)orderingCount, (int)visualIndex);
729e4ab9
A
731 isOk=FALSE;
732 }
733 if(!isOk) {
57a6839d 734 printErrorLine();
729e4ab9
A
735 UnicodeString eord("Expected ordering: ");
736 for(i=0; i<orderingCount; ++i) {
737 eord.append((UChar)0x20).append((UChar)(0x30+ordering[i]));
738 }
739 UnicodeString aord("Actual ordering: ");
740 for(i=0; i<resultLength; ++i) {
741 int32_t logicalIndex=ubidi_getLogicalIndex(ubidi, i, errorCode);
742 if(levels[logicalIndex]<UBIDI_DEFAULT_LTR) {
743 aord.append((UChar)0x20).append((UChar)(0x30+logicalIndex));
744 }
745 }
746 errln(eord);
747 errln(aord);
748 }
749 return isOk;
750}
751
57a6839d 752void BiDiConformanceTest::printErrorLine() {
729e4ab9
A
753 ++errorCount;
754 errln("Input line %5d: %s", (int)lineNumber, line);
755 errln(UnicodeString("Input string: ")+inputString);
756 errln("Para level: %s", paraLevelName);
757}