]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/ubidi.cpp
ICU-59180.0.1.tar.gz
[apple/icu.git] / icuSources / common / ubidi.cpp
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 *
6 * Copyright (C) 1999-2015, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 ******************************************************************************
10 * file name: ubidi.c
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 1999jul27
16 * created by: Markus W. Scherer, updated by Matitiahu Allouche
17 *
18 */
19
20 #include "cmemory.h"
21 #include "unicode/utypes.h"
22 #include "unicode/ustring.h"
23 #include "unicode/uchar.h"
24 #include "unicode/ubidi.h"
25 #include "unicode/utf16.h"
26 #include "ubidi_props.h"
27 #include "ubidiimp.h"
28 #include "uassert.h"
29
30 /*
31 * General implementation notes:
32 *
33 * Throughout the implementation, there are comments like (W2) that refer to
34 * rules of the BiDi algorithm, in this example to the second rule of the
35 * resolution of weak types.
36 *
37 * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
38 * character according to UTF-16, the second UChar gets the directional property of
39 * the entire character assigned, while the first one gets a BN, a boundary
40 * neutral, type, which is ignored by most of the algorithm according to
41 * rule (X9) and the implementation suggestions of the BiDi algorithm.
42 *
43 * Later, adjustWSLevels() will set the level for each BN to that of the
44 * following character (UChar), which results in surrogate pairs getting the
45 * same level on each of their surrogates.
46 *
47 * In a UTF-8 implementation, the same thing could be done: the last byte of
48 * a multi-byte sequence would get the "real" property, while all previous
49 * bytes of that sequence would get BN.
50 *
51 * It is not possible to assign all those parts of a character the same real
52 * property because this would fail in the resolution of weak types with rules
53 * that look at immediately surrounding types.
54 *
55 * As a related topic, this implementation does not remove Boundary Neutral
56 * types from the input, but ignores them wherever this is relevant.
57 * For example, the loop for the resolution of the weak types reads
58 * types until it finds a non-BN.
59 * Also, explicit embedding codes are neither changed into BN nor removed.
60 * They are only treated the same way real BNs are.
61 * As stated before, adjustWSLevels() takes care of them at the end.
62 * For the purpose of conformance, the levels of all these codes
63 * do not matter.
64 *
65 * Note that this implementation modifies the dirProps
66 * after the initial setup, when applying X5c (replace FSI by LRI or RLI),
67 * X6, N0 (replace paired brackets by L or R).
68 *
69 * In this implementation, the resolution of weak types (W1 to W6),
70 * neutrals (N1 and N2), and the assignment of the resolved level (In)
71 * are all done in one single loop, in resolveImplicitLevels().
72 * Changes of dirProp values are done on the fly, without writing
73 * them back to the dirProps array.
74 *
75 *
76 * This implementation contains code that allows to bypass steps of the
77 * algorithm that are not needed on the specific paragraph
78 * in order to speed up the most common cases considerably,
79 * like text that is entirely LTR, or RTL text without numbers.
80 *
81 * Most of this is done by setting a bit for each directional property
82 * in a flags variable and later checking for whether there are
83 * any LTR characters or any RTL characters, or both, whether
84 * there are any explicit embedding codes, etc.
85 *
86 * If the (Xn) steps are performed, then the flags are re-evaluated,
87 * because they will then not contain the embedding codes any more
88 * and will be adjusted for override codes, so that subsequently
89 * more bypassing may be possible than what the initial flags suggested.
90 *
91 * If the text is not mixed-directional, then the
92 * algorithm steps for the weak type resolution are not performed,
93 * and all levels are set to the paragraph level.
94 *
95 * If there are no explicit embedding codes, then the (Xn) steps
96 * are not performed.
97 *
98 * If embedding levels are supplied as a parameter, then all
99 * explicit embedding codes are ignored, and the (Xn) steps
100 * are not performed.
101 *
102 * White Space types could get the level of the run they belong to,
103 * and are checked with a test of (flags&MASK_EMBEDDING) to
104 * consider if the paragraph direction should be considered in
105 * the flags variable.
106 *
107 * If there are no White Space types in the paragraph, then
108 * (L1) is not necessary in adjustWSLevels().
109 */
110
111 /* to avoid some conditional statements, use tiny constant arrays */
112 static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) };
113 static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) };
114 static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) };
115
116 #define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
117 #define DIRPROP_FLAG_E(level) flagE[(level)&1]
118 #define DIRPROP_FLAG_O(level) flagO[(level)&1]
119
120 #define DIR_FROM_STRONG(strong) ((strong)==L ? L : R)
121
122 #define NO_OVERRIDE(level) ((level)&~UBIDI_LEVEL_OVERRIDE)
123
124 /* UBiDi object management -------------------------------------------------- */
125
126 U_CAPI UBiDi * U_EXPORT2
127 ubidi_open(void)
128 {
129 UErrorCode errorCode=U_ZERO_ERROR;
130 return ubidi_openSized(0, 0, &errorCode);
131 }
132
133 U_CAPI UBiDi * U_EXPORT2
134 ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) {
135 UBiDi *pBiDi;
136
137 /* check the argument values */
138 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
139 return NULL;
140 } else if(maxLength<0 || maxRunCount<0) {
141 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
142 return NULL; /* invalid arguments */
143 }
144
145 /* allocate memory for the object */
146 pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi));
147 if(pBiDi==NULL) {
148 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
149 return NULL;
150 }
151
152 /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */
153 uprv_memset(pBiDi, 0, sizeof(UBiDi));
154
155 /* get BiDi properties */
156 pBiDi->bdp=ubidi_getSingleton();
157
158 /* allocate memory for arrays as requested */
159 if(maxLength>0) {
160 if( !getInitialDirPropsMemory(pBiDi, maxLength) ||
161 !getInitialLevelsMemory(pBiDi, maxLength)
162 ) {
163 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
164 }
165 } else {
166 pBiDi->mayAllocateText=TRUE;
167 }
168
169 if(maxRunCount>0) {
170 if(maxRunCount==1) {
171 /* use simpleRuns[] */
172 pBiDi->runsSize=sizeof(Run);
173 } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) {
174 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
175 }
176 } else {
177 pBiDi->mayAllocateRuns=TRUE;
178 }
179
180 if(U_SUCCESS(*pErrorCode)) {
181 return pBiDi;
182 } else {
183 ubidi_close(pBiDi);
184 return NULL;
185 }
186 }
187
188 /*
189 * We are allowed to allocate memory if memory==NULL or
190 * mayAllocate==TRUE for each array that we need.
191 * We also try to grow memory as needed if we
192 * allocate it.
193 *
194 * Assume sizeNeeded>0.
195 * If *pMemory!=NULL, then assume *pSize>0.
196 *
197 * ### this realloc() may unnecessarily copy the old data,
198 * which we know we don't need any more;
199 * is this the best way to do this??
200 */
201 U_CFUNC UBool
202 ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) {
203 void **pMemory = (void **)bidiMem;
204 /* check for existing memory */
205 if(*pMemory==NULL) {
206 /* we need to allocate memory */
207 if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) {
208 *pSize=sizeNeeded;
209 return TRUE;
210 } else {
211 return FALSE;
212 }
213 } else {
214 if(sizeNeeded<=*pSize) {
215 /* there is already enough memory */
216 return TRUE;
217 }
218 else if(!mayAllocate) {
219 /* not enough memory, and we must not allocate */
220 return FALSE;
221 } else {
222 /* we try to grow */
223 void *memory;
224 /* in most cases, we do not need the copy-old-data part of
225 * realloc, but it is needed when adding runs using getRunsMemory()
226 * in setParaRunsOnly()
227 */
228 if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) {
229 *pMemory=memory;
230 *pSize=sizeNeeded;
231 return TRUE;
232 } else {
233 /* we failed to grow */
234 return FALSE;
235 }
236 }
237 }
238 }
239
240 U_CAPI void U_EXPORT2
241 ubidi_close(UBiDi *pBiDi) {
242 if(pBiDi!=NULL) {
243 pBiDi->pParaBiDi=NULL; /* in case one tries to reuse this block */
244 if(pBiDi->dirInsertMemory!=NULL) {
245 uprv_free(pBiDi->dirInsertMemory);
246 }
247 if(pBiDi->dirPropsMemory!=NULL) {
248 uprv_free(pBiDi->dirPropsMemory);
249 }
250 if(pBiDi->levelsMemory!=NULL) {
251 uprv_free(pBiDi->levelsMemory);
252 }
253 if(pBiDi->openingsMemory!=NULL) {
254 uprv_free(pBiDi->openingsMemory);
255 }
256 if(pBiDi->parasMemory!=NULL) {
257 uprv_free(pBiDi->parasMemory);
258 }
259 if(pBiDi->runsMemory!=NULL) {
260 uprv_free(pBiDi->runsMemory);
261 }
262 if(pBiDi->isolatesMemory!=NULL) {
263 uprv_free(pBiDi->isolatesMemory);
264 }
265 if(pBiDi->insertPoints.points!=NULL) {
266 uprv_free(pBiDi->insertPoints.points);
267 }
268
269 uprv_free(pBiDi);
270 }
271 }
272
273 /* set to approximate "inverse BiDi" ---------------------------------------- */
274
275 U_CAPI void U_EXPORT2
276 ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) {
277 if(pBiDi!=NULL) {
278 pBiDi->isInverse=isInverse;
279 pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L
280 : UBIDI_REORDER_DEFAULT;
281 }
282 }
283
284 U_CAPI UBool U_EXPORT2
285 ubidi_isInverse(UBiDi *pBiDi) {
286 if(pBiDi!=NULL) {
287 return pBiDi->isInverse;
288 } else {
289 return FALSE;
290 }
291 }
292
293 /* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
294 * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre
295 * concept of RUNS_ONLY which is a double operation.
296 * It could be advantageous to divide this into 3 concepts:
297 * a) Operation: direct / inverse / RUNS_ONLY
298 * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R
299 * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
300 * This would allow combinations not possible today like RUNS_ONLY with
301 * NUMBERS_SPECIAL.
302 * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
303 * REMOVE_CONTROLS for the inverse step.
304 * Not all combinations would be supported, and probably not all do make sense.
305 * This would need to document which ones are supported and what are the
306 * fallbacks for unsupported combinations.
307 */
308 U_CAPI void U_EXPORT2
309 ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) {
310 if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT)
311 && (reorderingMode < UBIDI_REORDER_COUNT)) {
312 pBiDi->reorderingMode = reorderingMode;
313 pBiDi->isInverse = (UBool)(reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L);
314 }
315 }
316
317 U_CAPI UBiDiReorderingMode U_EXPORT2
318 ubidi_getReorderingMode(UBiDi *pBiDi) {
319 if (pBiDi!=NULL) {
320 return pBiDi->reorderingMode;
321 } else {
322 return UBIDI_REORDER_DEFAULT;
323 }
324 }
325
326 U_CAPI void U_EXPORT2
327 ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) {
328 if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
329 reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
330 }
331 if (pBiDi!=NULL) {
332 pBiDi->reorderingOptions=reorderingOptions;
333 }
334 }
335
336 U_CAPI uint32_t U_EXPORT2
337 ubidi_getReorderingOptions(UBiDi *pBiDi) {
338 if (pBiDi!=NULL) {
339 return pBiDi->reorderingOptions;
340 } else {
341 return 0;
342 }
343 }
344
345 U_CAPI UBiDiDirection U_EXPORT2
346 ubidi_getBaseDirection(const UChar *text,
347 int32_t length){
348
349 int32_t i;
350 UChar32 uchar;
351 UCharDirection dir;
352
353 if( text==NULL || length<-1 ){
354 return UBIDI_NEUTRAL;
355 }
356
357 if(length==-1) {
358 length=u_strlen(text);
359 }
360
361 for( i = 0 ; i < length; ) {
362 /* i is incremented by U16_NEXT */
363 U16_NEXT(text, i, length, uchar);
364 dir = u_charDirection(uchar);
365 if( dir == U_LEFT_TO_RIGHT )
366 return UBIDI_LTR;
367 if( dir == U_RIGHT_TO_LEFT || dir ==U_RIGHT_TO_LEFT_ARABIC )
368 return UBIDI_RTL;
369 }
370 return UBIDI_NEUTRAL;
371 }
372
373 /* perform (P2)..(P3) ------------------------------------------------------- */
374
375 /**
376 * Returns the directionality of the first strong character
377 * after the last B in prologue, if any.
378 * Requires prologue!=null.
379 */
380 static DirProp
381 firstL_R_AL(UBiDi *pBiDi) {
382 const UChar *text=pBiDi->prologue;
383 int32_t length=pBiDi->proLength;
384 int32_t i;
385 UChar32 uchar;
386 DirProp dirProp, result=ON;
387 for(i=0; i<length; ) {
388 /* i is incremented by U16_NEXT */
389 U16_NEXT(text, i, length, uchar);
390 dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
391 if(result==ON) {
392 if(dirProp==L || dirProp==R || dirProp==AL) {
393 result=dirProp;
394 }
395 } else {
396 if(dirProp==B) {
397 result=ON;
398 }
399 }
400 }
401 return result;
402 }
403
404 /*
405 * Check that there are enough entries in the array pointed to by pBiDi->paras
406 */
407 static UBool
408 checkParaCount(UBiDi *pBiDi) {
409 int32_t count=pBiDi->paraCount;
410 if(pBiDi->paras==pBiDi->simpleParas) {
411 if(count<=SIMPLE_PARAS_COUNT)
412 return TRUE;
413 if(!getInitialParasMemory(pBiDi, SIMPLE_PARAS_COUNT * 2))
414 return FALSE;
415 pBiDi->paras=pBiDi->parasMemory;
416 uprv_memcpy(pBiDi->parasMemory, pBiDi->simpleParas, SIMPLE_PARAS_COUNT * sizeof(Para));
417 return TRUE;
418 }
419 if(!getInitialParasMemory(pBiDi, count * 2))
420 return FALSE;
421 pBiDi->paras=pBiDi->parasMemory;
422 return TRUE;
423 }
424
425 /*
426 * Get the directional properties for the inserted bidi controls.
427 */
428
429 /* subset of bidi properties, fit in 4 bits */
430 enum { /* correspondence to standard class */
431 Insert_none = 0, /* 0 all others */
432 Insert_L, /* 1 L = U_LEFT_TO_RIGHT */
433 Insert_R, /* 2 R = U_RIGHT_TO_LEFT */
434 Insert_AL, /* 3 AL = U_RIGHT_TO_LEFT_ARABIC */
435 Insert_LRE, /* 4 LRE = U_LEFT_TO_RIGHT_EMBEDDING */
436 Insert_LRO, /* 5 LRO = U_LEFT_TO_RIGHT_OVERRIDE */
437 Insert_RLE, /* 6 RLE = U_RIGHT_TO_LEFT_EMBEDDING */
438 Insert_RLO, /* 7 RLO = U_RIGHT_TO_LEFT_OVERRIDE */
439 Insert_PDF, /* 8 PDF = U_POP_DIRECTIONAL_FORMAT */
440 Insert_FSI, /* 9 FSI = U_FIRST_STRONG_ISOLATE */
441 Insert_LRI, /* 10 LRI = U_LEFT_TO_RIGHT_ISOLATE */
442 Insert_RLI, /* 11 RLI = U_RIGHT_TO_LEFT_ISOLATE */
443 Insert_PDI, /* 12 PDI = U_POP_DIRECTIONAL_ISOLATE */
444 Insert_B, /* 13 B = U_BLOCK_SEPARATOR */
445 Insert_S, /* 14 S = U_SEGMENT_SEPARATOR */
446 Insert_WS, /* 15 WS = U_WHITE_SPACE_NEUTRAL */
447 Insert_count /* 16 */
448 };
449
450 /* map standard dir class to special 4-bit insert value (Insert_none as default) */
451 static const uint16_t insertDirFromStdDir[dirPropCount] = {
452 Insert_none, /* L= U_LEFT_TO_RIGHT */
453 Insert_none, /* R= U_RIGHT_TO_LEFT, */
454 Insert_none, /* EN= U_EUROPEAN_NUMBER */
455 Insert_none, /* ES= U_EUROPEAN_NUMBER_SEPARATOR */
456 Insert_none, /* ET= U_EUROPEAN_NUMBER_TERMINATOR */
457 Insert_none, /* AN= U_ARABIC_NUMBER */
458 Insert_none, /* CS= U_COMMON_NUMBER_SEPARATOR */
459 Insert_none, /* B= U_BLOCK_SEPARATOR */
460 Insert_none, /* S= U_SEGMENT_SEPARATOR */
461 Insert_none, /* WS= U_WHITE_SPACE_NEUTRAL */
462 Insert_none, /* ON= U_OTHER_NEUTRAL */
463 Insert_LRE, /* LRE=U_LEFT_TO_RIGHT_EMBEDDING */
464 Insert_LRO, /* LRO=U_LEFT_TO_RIGHT_OVERRIDE */
465 Insert_none, /* AL= U_RIGHT_TO_LEFT_ARABIC */
466 Insert_RLE, /* RLE=U_RIGHT_TO_LEFT_EMBEDDING */
467 Insert_RLO, /* RLO=U_RIGHT_TO_LEFT_OVERRIDE */
468 Insert_PDF, /* PDF=U_POP_DIRECTIONAL_FORMAT */
469 Insert_none, /* NSM=U_DIR_NON_SPACING_MARK */
470 Insert_none, /* BN= U_BOUNDARY_NEUTRAL */
471 Insert_FSI, /* FSI=U_FIRST_STRONG_ISOLATE */
472 Insert_LRI, /* LRI=U_LEFT_TO_RIGHT_ISOLATE */
473 Insert_RLI, /* RLI=U_RIGHT_TO_LEFT_ISOLATE */
474 Insert_PDI, /* PDI=U_POP_DIRECTIONAL_ISOLATE */
475 Insert_none, /* ENL */
476 Insert_none, /* ENR */
477 };
478
479 /* map special 4-bit insert direction class to standard dir class (ON as default) */
480 static const uint8_t stdDirFromInsertDir[Insert_count] = {
481 ON, /* Insert_none > ON */
482 L, /* Insert_L */
483 R, /* Insert_R */
484 AL, /* Insert_AL */
485 LRE, /* Insert_LRE */
486 LRO, /* Insert_LRO */
487 RLE, /* Insert_RLE */
488 RLO, /* Insert_RLO */
489 PDF, /* Insert_PDF */
490 FSI, /* Insert_FSI */
491 LRI, /* Insert_LRI */
492 RLI, /* Insert_RLI */
493 PDI, /* Insert_PDI */
494 B, /* Insert_B */
495 S, /* Insert_S */
496 WS, /* Insert_WS */
497 };
498
499 enum { kMaxControlStringLen = 4 };
500
501 static UBool
502 getDirInsert(UBiDi *pBiDi,
503 const int32_t *offsets, int32_t offsetCount,
504 const int32_t *controlStringIndices,
505 const UChar * const * controlStrings) {
506 int32_t offset, offsetsIndex;
507 uint16_t *dirInsert = pBiDi->dirInsert;
508 /* initialize dirInsert */
509 for (offset = 0; offset < pBiDi->length; offset++) {
510 dirInsert[offset] = 0;
511 }
512 for (offsetsIndex = 0; offsetsIndex < offsetCount; offsetsIndex++) {
513 const UChar * controlString;
514 UChar uchar;
515 int32_t controlStringIndex, dirInsertIndex = 0;
516 uint16_t dirInsertValue = 0;
517 offset = offsets[offsetsIndex];
518 if (offset < 0 || offset >= pBiDi->length) {
519 return FALSE; /* param err in offsets array */
520 }
521 controlStringIndex = (controlStringIndices == NULL)? offsetsIndex: controlStringIndices[offsetsIndex];
522 controlString = controlStrings[controlStringIndex];
523 if (controlString == NULL) {
524 return FALSE; /* param err in controlStrings array */
525 }
526 while ((uchar = *controlString++) != 0) {
527 uint16_t insertValue = (U16_IS_SURROGATE(uchar))? Insert_none:
528 insertDirFromStdDir[(uint32_t)ubidi_getCustomizedClass(pBiDi, uchar)];
529 if (dirInsertIndex >= kMaxControlStringLen || insertValue == Insert_none) {
530 return FALSE; /* param err in controlStrings array */
531 }
532 dirInsertValue |= (insertValue << (4 * dirInsertIndex++));
533 }
534 dirInsert[offset] = dirInsertValue;
535 }
536 return TRUE;
537 }
538
539 /*
540 * Get the directional properties for the text, calculate the flags bit-set, and
541 * determine the paragraph level if necessary (in pBiDi->paras[i].level).
542 * FSI initiators are also resolved and their dirProp replaced with LRI or RLI.
543 * When encountering an FSI, it is initially replaced with an LRI, which is the
544 * default. Only if a strong R or AL is found within its scope will the LRI be
545 * replaced by an RLI.
546 */
547 static UBool
548 getDirProps(UBiDi *pBiDi) {
549 const UChar *text=pBiDi->text;
550 DirProp *dirProps=pBiDi->dirPropsMemory; /* pBiDi->dirProps is const */
551 uint16_t *dirInsert = pBiDi->dirInsert; /* may be NULL */
552
553 int32_t i=0, originalLength=pBiDi->originalLength;
554 Flags flags=0; /* collect all directionalities in the text */
555 UChar32 uchar;
556 DirProp dirProp=0, defaultParaLevel=0; /* initialize to avoid compiler warnings */
557 int32_t dirInsertValue;
558 int8_t dirInsertIndex; /* position within dirInsertValue, if any */
559 UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel);
560 /* for inverse BiDi, the default para level is set to RTL if there is a
561 strong R or AL character at either end of the text */
562 UBool isDefaultLevelInverse=isDefaultLevel && (UBool)
563 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
564 pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL);
565 int32_t lastArabicPos=-1;
566 int32_t controlCount=0;
567 UBool removeBiDiControls = (UBool)(pBiDi->reorderingOptions &
568 UBIDI_OPTION_REMOVE_CONTROLS);
569
570 enum State {
571 NOT_SEEKING_STRONG, /* 0: not contextual paraLevel, not after FSI */
572 SEEKING_STRONG_FOR_PARA, /* 1: looking for first strong char in para */
573 SEEKING_STRONG_FOR_FSI, /* 2: looking for first strong after FSI */
574 LOOKING_FOR_PDI /* 3: found strong after FSI, looking for PDI */
575 };
576 State state;
577 DirProp lastStrong=ON; /* for default level & inverse BiDi */
578 /* The following stacks are used to manage isolate sequences. Those
579 sequences may be nested, but obviously never more deeply than the
580 maximum explicit embedding level.
581 lastStack is the index of the last used entry in the stack. A value of -1
582 means that there is no open isolate sequence.
583 lastStack is reset to -1 on paragraph boundaries. */
584 /* The following stack contains the position of the initiator of
585 each open isolate sequence */
586 int32_t isolateStartStack[UBIDI_MAX_EXPLICIT_LEVEL+1];
587 int8_t isolateStartInsertIndex[UBIDI_MAX_EXPLICIT_LEVEL+1];
588 /* The following stack contains the last known state before
589 encountering the initiator of an isolate sequence */
590 State previousStateStack[UBIDI_MAX_EXPLICIT_LEVEL+1];
591 int32_t stackLast=-1;
592
593 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING)
594 pBiDi->length=0;
595 defaultParaLevel=pBiDi->paraLevel&1;
596 if(isDefaultLevel) {
597 pBiDi->paras[0].level=defaultParaLevel;
598 lastStrong=defaultParaLevel;
599 if(pBiDi->proLength>0 && /* there is a prologue */
600 (dirProp=firstL_R_AL(pBiDi))!=ON) { /* with a strong character */
601 if(dirProp==L)
602 pBiDi->paras[0].level=0; /* set the default para level */
603 else
604 pBiDi->paras[0].level=1; /* set the default para level */
605 state=NOT_SEEKING_STRONG;
606 } else {
607 state=SEEKING_STRONG_FOR_PARA;
608 }
609 } else {
610 pBiDi->paras[0].level=pBiDi->paraLevel;
611 state=NOT_SEEKING_STRONG;
612 }
613 /* count paragraphs and determine the paragraph level (P2..P3) */
614 /*
615 * see comment in ubidi.h:
616 * the UBIDI_DEFAULT_XXX values are designed so that
617 * their bit 0 alone yields the intended default
618 */
619 dirInsertValue = 0;
620 dirInsertIndex = -1; /* indicate that we have not checked dirInsert yet */
621 for( /* i=0 above */ ; i<originalLength; ) {
622 if (dirInsert != NULL && dirInsertIndex < 0) {
623 dirInsertValue = dirInsert[i];
624 }
625 if (dirInsertValue > 0) {
626 dirInsertIndex++;
627 dirProp = (DirProp)stdDirFromInsertDir[dirInsertValue & 0x000F];
628 dirInsertValue >>= 4;
629 flags|=DIRPROP_FLAG(dirProp);
630 uchar = 0;
631 } else {
632 dirInsertIndex = -1;
633 /* i is incremented by U16_NEXT */
634 U16_NEXT(text, i, originalLength, uchar);
635 flags|=DIRPROP_FLAG(dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar));
636 dirProps[i-1]=dirProp;
637 if(uchar>0xffff) { /* set the lead surrogate's property to BN */
638 flags|=DIRPROP_FLAG(BN);
639 dirProps[i-2]=BN;
640 }
641 }
642 if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar))
643 controlCount++;
644 if(dirProp==L) {
645 if(state==SEEKING_STRONG_FOR_PARA) {
646 pBiDi->paras[pBiDi->paraCount-1].level=0;
647 state=NOT_SEEKING_STRONG;
648 }
649 else if(state==SEEKING_STRONG_FOR_FSI) {
650 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
651 /* no need for next statement, already set by default */
652 /* dirProps[isolateStartStack[stackLast]]=LRI; */
653 flags|=DIRPROP_FLAG(LRI);
654 }
655 state=LOOKING_FOR_PDI;
656 }
657 lastStrong=L;
658 continue;
659 }
660 if(dirProp==R || dirProp==AL) {
661 if(state==SEEKING_STRONG_FOR_PARA) {
662 pBiDi->paras[pBiDi->paraCount-1].level=1;
663 state=NOT_SEEKING_STRONG;
664 }
665 else if(state==SEEKING_STRONG_FOR_FSI) {
666 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
667 if (isolateStartInsertIndex[stackLast] < 0) {
668 dirProps[isolateStartStack[stackLast]]=RLI;
669 } else {
670 dirInsert[stackLast] &= ~(0x000F << (4*isolateStartInsertIndex[stackLast]));
671 dirInsert[stackLast] |= (Insert_RLI << (4*isolateStartInsertIndex[stackLast]));
672 }
673 flags|=DIRPROP_FLAG(RLI);
674 }
675 state=LOOKING_FOR_PDI;
676 }
677 lastStrong=R;
678 if(dirProp==AL)
679 lastArabicPos=i-1;
680 continue;
681 }
682 if(dirProp>=FSI && dirProp<=RLI) { /* FSI, LRI or RLI */
683 stackLast++;
684 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
685 isolateStartStack[stackLast]= (dirInsertIndex < 0)? i-1: i /* we have not incremented with U16_NEXT yet */;
686 isolateStartInsertIndex[stackLast] = dirInsertIndex;
687 previousStateStack[stackLast]=state;
688 }
689 if(dirProp==FSI) {
690 if (dirInsertIndex < 0) {
691 dirProps[i-1]=LRI; /* default if no strong char */
692 } else {
693 dirInsert[i] &= ~(0x000F << (4*dirInsertIndex));
694 dirInsert[i] |= (Insert_LRI << (4*dirInsertIndex));
695 }
696 state=SEEKING_STRONG_FOR_FSI;
697 }
698 else
699 state=LOOKING_FOR_PDI;
700 continue;
701 }
702 if(dirProp==PDI) {
703 if(state==SEEKING_STRONG_FOR_FSI) {
704 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
705 /* no need for next statement, already set by default */
706 /* dirProps[isolateStartStack[stackLast]]=LRI; */
707 flags|=DIRPROP_FLAG(LRI);
708 }
709 }
710 if(stackLast>=0) {
711 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL)
712 state=previousStateStack[stackLast];
713 stackLast--;
714 }
715 continue;
716 }
717 if(dirProp==B) {
718 if(i<originalLength && uchar==CR && text[i]==LF) /* do nothing on the CR */
719 continue;
720 pBiDi->paras[pBiDi->paraCount-1].limit=i;
721 if(isDefaultLevelInverse && lastStrong==R)
722 pBiDi->paras[pBiDi->paraCount-1].level=1;
723 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
724 /* When streaming, we only process whole paragraphs
725 thus some updates are only done on paragraph boundaries */
726 pBiDi->length=i; /* i is index to next character */
727 pBiDi->controlCount=controlCount;
728 }
729 if(i<originalLength) { /* B not last char in text */
730 pBiDi->paraCount++;
731 if(checkParaCount(pBiDi)==FALSE) /* not enough memory for a new para entry */
732 return FALSE;
733 if(isDefaultLevel) {
734 pBiDi->paras[pBiDi->paraCount-1].level=defaultParaLevel;
735 state=SEEKING_STRONG_FOR_PARA;
736 lastStrong=defaultParaLevel;
737 } else {
738 pBiDi->paras[pBiDi->paraCount-1].level=pBiDi->paraLevel;
739 state=NOT_SEEKING_STRONG;
740 }
741 stackLast=-1;
742 }
743 continue;
744 }
745 }
746 /* Ignore still open isolate sequences with overflow */
747 if(stackLast>UBIDI_MAX_EXPLICIT_LEVEL) {
748 stackLast=UBIDI_MAX_EXPLICIT_LEVEL;
749 state=SEEKING_STRONG_FOR_FSI; /* to be on the safe side */
750 }
751 /* Resolve direction of still unresolved open FSI sequences */
752 while(stackLast>=0) {
753 if(state==SEEKING_STRONG_FOR_FSI) {
754 /* no need for next statement, already set by default */
755 /* dirProps[isolateStartStack[stackLast]]=LRI; */
756 flags|=DIRPROP_FLAG(LRI);
757 break;
758 }
759 state=previousStateStack[stackLast];
760 stackLast--;
761 }
762 /* When streaming, ignore text after the last paragraph separator */
763 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
764 if(pBiDi->length<originalLength)
765 pBiDi->paraCount--;
766 } else {
767 pBiDi->paras[pBiDi->paraCount-1].limit=originalLength;
768 pBiDi->controlCount=controlCount;
769 }
770 /* For inverse bidi, default para direction is RTL if there is
771 a strong R or AL at either end of the paragraph */
772 if(isDefaultLevelInverse && lastStrong==R) {
773 pBiDi->paras[pBiDi->paraCount-1].level=1;
774 }
775 if(isDefaultLevel) {
776 pBiDi->paraLevel=pBiDi->paras[0].level;
777 }
778 /* The following is needed to resolve the text direction for default level
779 paragraphs containing no strong character */
780 for(i=0; i<pBiDi->paraCount; i++)
781 flags|=DIRPROP_FLAG_LR(pBiDi->paras[i].level);
782
783 if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
784 flags|=DIRPROP_FLAG(L);
785 }
786 pBiDi->flags=flags;
787 pBiDi->lastArabicPos=lastArabicPos;
788 return TRUE;
789 }
790
791 /* determine the paragraph level at position index */
792 U_CFUNC UBiDiLevel
793 ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t pindex) {
794 int32_t i;
795 for(i=0; i<pBiDi->paraCount; i++)
796 if(pindex<pBiDi->paras[i].limit)
797 break;
798 if(i>=pBiDi->paraCount)
799 i=pBiDi->paraCount-1;
800 return (UBiDiLevel)(pBiDi->paras[i].level);
801 }
802
803 /* Functions for handling paired brackets ----------------------------------- */
804
805 /* In the isoRuns array, the first entry is used for text outside of any
806 isolate sequence. Higher entries are used for each more deeply nested
807 isolate sequence. isoRunLast is the index of the last used entry. The
808 openings array is used to note the data of opening brackets not yet
809 matched by a closing bracket, or matched but still susceptible to change
810 level.
811 Each isoRun entry contains the index of the first and
812 one-after-last openings entries for pending opening brackets it
813 contains. The next openings entry to use is the one-after-last of the
814 most deeply nested isoRun entry.
815 isoRun entries also contain their current embedding level and the last
816 encountered strong character, since these will be needed to resolve
817 the level of paired brackets. */
818
819 static void
820 bracketInit(UBiDi *pBiDi, BracketData *bd) {
821 bd->pBiDi=pBiDi;
822 bd->isoRunLast=0;
823 bd->isoRuns[0].start=0;
824 bd->isoRuns[0].limit=0;
825 bd->isoRuns[0].level=GET_PARALEVEL(pBiDi, 0);
826 UBiDiLevel t = GET_PARALEVEL(pBiDi, 0) & 1;
827 bd->isoRuns[0].lastStrong = bd->isoRuns[0].lastBase = t;
828 bd->isoRuns[0].contextDir = (UBiDiDirection)t;
829 bd->isoRuns[0].contextPos=0;
830 if(pBiDi->openingsMemory) {
831 bd->openings=pBiDi->openingsMemory;
832 bd->openingsCount=pBiDi->openingsSize / sizeof(Opening);
833 } else {
834 bd->openings=bd->simpleOpenings;
835 bd->openingsCount=SIMPLE_OPENINGS_COUNT;
836 }
837 bd->isNumbersSpecial=bd->pBiDi->reorderingMode==UBIDI_REORDER_NUMBERS_SPECIAL ||
838 bd->pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL;
839 }
840
841 /* paragraph boundary */
842 static void
843 bracketProcessB(BracketData *bd, UBiDiLevel level) {
844 bd->isoRunLast=0;
845 bd->isoRuns[0].limit=0;
846 bd->isoRuns[0].level=level;
847 bd->isoRuns[0].lastStrong=bd->isoRuns[0].lastBase=level&1;
848 bd->isoRuns[0].contextDir=(UBiDiDirection)(level&1);
849 bd->isoRuns[0].contextPos=0;
850 }
851
852 /* LRE, LRO, RLE, RLO, PDF */
853 static void
854 bracketProcessBoundary(BracketData *bd, int32_t lastCcPos, DirProp lastCcDirProp,
855 UBiDiLevel contextLevel, UBiDiLevel embeddingLevel) {
856 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
857 if(DIRPROP_FLAG(lastCcDirProp)&MASK_ISO) /* after an isolate */
858 return;
859 if(NO_OVERRIDE(embeddingLevel)>NO_OVERRIDE(contextLevel)) /* not a PDF */
860 contextLevel=embeddingLevel;
861 pLastIsoRun->limit=pLastIsoRun->start;
862 pLastIsoRun->level=embeddingLevel;
863 pLastIsoRun->lastStrong=pLastIsoRun->lastBase=contextLevel&1;
864 pLastIsoRun->contextDir=(UBiDiDirection)(contextLevel&1);
865 pLastIsoRun->contextPos=(UBiDiDirection)lastCcPos;
866 }
867
868 /* LRI or RLI */
869 static void
870 bracketProcessLRI_RLI(BracketData *bd, UBiDiLevel level) {
871 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
872 int16_t lastLimit;
873 pLastIsoRun->lastBase=ON;
874 lastLimit=pLastIsoRun->limit;
875 bd->isoRunLast++;
876 pLastIsoRun++;
877 pLastIsoRun->start=pLastIsoRun->limit=lastLimit;
878 pLastIsoRun->level=level;
879 pLastIsoRun->lastStrong=pLastIsoRun->lastBase=level&1;
880 pLastIsoRun->contextDir=(UBiDiDirection)(level&1);
881 pLastIsoRun->contextPos=0;
882 }
883
884 /* PDI */
885 static void
886 bracketProcessPDI(BracketData *bd) {
887 IsoRun *pLastIsoRun;
888 bd->isoRunLast--;
889 pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
890 pLastIsoRun->lastBase=ON;
891 }
892
893 /* newly found opening bracket: create an openings entry */
894 static UBool /* return TRUE if success */
895 bracketAddOpening(BracketData *bd, UChar match, int32_t position) {
896 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
897 Opening *pOpening;
898 if(pLastIsoRun->limit>=bd->openingsCount) { /* no available new entry */
899 UBiDi *pBiDi=bd->pBiDi;
900 if(!getInitialOpeningsMemory(pBiDi, pLastIsoRun->limit * 2))
901 return FALSE;
902 if(bd->openings==bd->simpleOpenings)
903 uprv_memcpy(pBiDi->openingsMemory, bd->simpleOpenings,
904 SIMPLE_OPENINGS_COUNT * sizeof(Opening));
905 bd->openings=pBiDi->openingsMemory; /* may have changed */
906 bd->openingsCount=pBiDi->openingsSize / sizeof(Opening);
907 }
908 pOpening=&bd->openings[pLastIsoRun->limit];
909 pOpening->position=position;
910 pOpening->match=match;
911 pOpening->contextDir=pLastIsoRun->contextDir;
912 pOpening->contextPos=pLastIsoRun->contextPos;
913 pOpening->flags=0;
914 pLastIsoRun->limit++;
915 return TRUE;
916 }
917
918 /* change N0c1 to N0c2 when a preceding bracket is assigned the embedding level */
919 static void
920 fixN0c(BracketData *bd, int32_t openingIndex, int32_t newPropPosition, DirProp newProp) {
921 /* This function calls itself recursively */
922 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
923 Opening *qOpening;
924 DirProp *dirProps=bd->pBiDi->dirProps;
925 int32_t k, openingPosition, closingPosition;
926 for(k=openingIndex+1, qOpening=&bd->openings[k]; k<pLastIsoRun->limit; k++, qOpening++) {
927 if(qOpening->match>=0) /* not an N0c match */
928 continue;
929 if(newPropPosition<qOpening->contextPos)
930 break;
931 if(newPropPosition>=qOpening->position)
932 continue;
933 if(newProp==qOpening->contextDir)
934 break;
935 openingPosition=qOpening->position;
936 dirProps[openingPosition]=newProp;
937 closingPosition=-(qOpening->match);
938 dirProps[closingPosition]=newProp;
939 qOpening->match=0; /* prevent further changes */
940 fixN0c(bd, k, openingPosition, newProp);
941 fixN0c(bd, k, closingPosition, newProp);
942 }
943 }
944
945 /* process closing bracket */
946 static DirProp /* return L or R if N0b or N0c, ON if N0d */
947 bracketProcessClosing(BracketData *bd, int32_t openIdx, int32_t position) {
948 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
949 Opening *pOpening, *qOpening;
950 UBiDiDirection direction;
951 UBool stable;
952 DirProp newProp;
953 pOpening=&bd->openings[openIdx];
954 direction=(UBiDiDirection)(pLastIsoRun->level&1);
955 stable=TRUE; /* assume stable until proved otherwise */
956
957 /* The stable flag is set when brackets are paired and their
958 level is resolved and cannot be changed by what will be
959 found later in the source string.
960 An unstable match can occur only when applying N0c, where
961 the resolved level depends on the preceding context, and
962 this context may be affected by text occurring later.
963 Example: RTL paragraph containing: abc[(latin) HEBREW]
964 When the closing parenthesis is encountered, it appears
965 that N0c1 must be applied since 'abc' sets an opposite
966 direction context and both parentheses receive level 2.
967 However, when the closing square bracket is processed,
968 N0b applies because of 'HEBREW' being included within the
969 brackets, thus the square brackets are treated like R and
970 receive level 1. However, this changes the preceding
971 context of the opening parenthesis, and it now appears
972 that N0c2 must be applied to the parentheses rather than
973 N0c1. */
974
975 if((direction==0 && pOpening->flags&FOUND_L) ||
976 (direction==1 && pOpening->flags&FOUND_R)) { /* N0b */
977 newProp=direction;
978 }
979 else if(pOpening->flags&(FOUND_L|FOUND_R)) { /* N0c */
980 /* it is stable if there is no containing pair or in
981 conditions too complicated and not worth checking */
982 stable=(openIdx==pLastIsoRun->start);
983 if(direction!=pOpening->contextDir)
984 newProp=pOpening->contextDir; /* N0c1 */
985 else
986 newProp=direction; /* N0c2 */
987 } else {
988 /* forget this and any brackets nested within this pair */
989 pLastIsoRun->limit=openIdx;
990 return ON; /* N0d */
991 }
992 bd->pBiDi->dirProps[pOpening->position]=newProp;
993 bd->pBiDi->dirProps[position]=newProp;
994 /* Update nested N0c pairs that may be affected */
995 fixN0c(bd, openIdx, pOpening->position, newProp);
996 if(stable) {
997 pLastIsoRun->limit=openIdx; /* forget any brackets nested within this pair */
998 /* remove lower located synonyms if any */
999 while(pLastIsoRun->limit>pLastIsoRun->start &&
1000 bd->openings[pLastIsoRun->limit-1].position==pOpening->position)
1001 pLastIsoRun->limit--;
1002 } else {
1003 int32_t k;
1004 pOpening->match=-position;
1005 /* neutralize lower located synonyms if any */
1006 k=openIdx-1;
1007 while(k>=pLastIsoRun->start &&
1008 bd->openings[k].position==pOpening->position)
1009 bd->openings[k--].match=0;
1010 /* neutralize any unmatched opening between the current pair;
1011 this will also neutralize higher located synonyms if any */
1012 for(k=openIdx+1; k<pLastIsoRun->limit; k++) {
1013 qOpening=&bd->openings[k];
1014 if(qOpening->position>=position)
1015 break;
1016 if(qOpening->match>0)
1017 qOpening->match=0;
1018 }
1019 }
1020 return newProp;
1021 }
1022
1023 /* handle strong characters, digits and candidates for closing brackets */
1024 static UBool /* return TRUE if success */
1025 bracketProcessChar(BracketData *bd, int32_t position) {
1026 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
1027 DirProp *dirProps, dirProp, newProp;
1028 UBiDiLevel level;
1029 dirProps=bd->pBiDi->dirProps;
1030 dirProp=dirProps[position];
1031 if(dirProp==ON) {
1032 UChar c, match;
1033 int32_t idx;
1034 /* First see if it is a matching closing bracket. Hopefully, this is
1035 more efficient than checking if it is a closing bracket at all */
1036 c=bd->pBiDi->text[position];
1037 for(idx=pLastIsoRun->limit-1; idx>=pLastIsoRun->start; idx--) {
1038 if(bd->openings[idx].match!=c)
1039 continue;
1040 /* We have a match */
1041 newProp=bracketProcessClosing(bd, idx, position);
1042 if(newProp==ON) { /* N0d */
1043 c=0; /* prevent handling as an opening */
1044 break;
1045 }
1046 pLastIsoRun->lastBase=ON;
1047 pLastIsoRun->contextDir=(UBiDiDirection)newProp;
1048 pLastIsoRun->contextPos=position;
1049 level=bd->pBiDi->levels[position];
1050 if(level&UBIDI_LEVEL_OVERRIDE) { /* X4, X5 */
1051 uint16_t flag;
1052 int32_t i;
1053 newProp=level&1;
1054 pLastIsoRun->lastStrong=newProp;
1055 flag=DIRPROP_FLAG(newProp);
1056 for(i=pLastIsoRun->start; i<idx; i++)
1057 bd->openings[i].flags|=flag;
1058 /* matching brackets are not overridden by LRO/RLO */
1059 bd->pBiDi->levels[position]&=~UBIDI_LEVEL_OVERRIDE;
1060 }
1061 /* matching brackets are not overridden by LRO/RLO */
1062 bd->pBiDi->levels[bd->openings[idx].position]&=~UBIDI_LEVEL_OVERRIDE;
1063 return TRUE;
1064 }
1065 /* We get here only if the ON character is not a matching closing
1066 bracket or it is a case of N0d */
1067 /* Now see if it is an opening bracket */
1068 if(c)
1069 match=u_getBidiPairedBracket(c); /* get the matching char */
1070 else
1071 match=0;
1072 if(match!=c && /* has a matching char */
1073 ubidi_getPairedBracketType(bd->pBiDi->bdp, c)==U_BPT_OPEN) { /* opening bracket */
1074 /* special case: process synonyms
1075 create an opening entry for each synonym */
1076 if(match==0x232A) { /* RIGHT-POINTING ANGLE BRACKET */
1077 if(!bracketAddOpening(bd, 0x3009, position))
1078 return FALSE;
1079 }
1080 else if(match==0x3009) { /* RIGHT ANGLE BRACKET */
1081 if(!bracketAddOpening(bd, 0x232A, position))
1082 return FALSE;
1083 }
1084 if(!bracketAddOpening(bd, match, position))
1085 return FALSE;
1086 }
1087 }
1088 level=bd->pBiDi->levels[position];
1089 if(level&UBIDI_LEVEL_OVERRIDE) { /* X4, X5 */
1090 newProp=level&1;
1091 if(dirProp!=S && dirProp!=WS && dirProp!=ON)
1092 dirProps[position]=newProp;
1093 pLastIsoRun->lastBase=newProp;
1094 pLastIsoRun->lastStrong=newProp;
1095 pLastIsoRun->contextDir=(UBiDiDirection)newProp;
1096 pLastIsoRun->contextPos=position;
1097 }
1098 else if(dirProp<=R || dirProp==AL) {
1099 newProp=DIR_FROM_STRONG(dirProp);
1100 pLastIsoRun->lastBase=dirProp;
1101 pLastIsoRun->lastStrong=dirProp;
1102 pLastIsoRun->contextDir=(UBiDiDirection)newProp;
1103 pLastIsoRun->contextPos=position;
1104 }
1105 else if(dirProp==EN) {
1106 pLastIsoRun->lastBase=EN;
1107 if(pLastIsoRun->lastStrong==L) {
1108 newProp=L; /* W7 */
1109 if(!bd->isNumbersSpecial)
1110 dirProps[position]=ENL;
1111 pLastIsoRun->contextDir=(UBiDiDirection)L;
1112 pLastIsoRun->contextPos=position;
1113 }
1114 else {
1115 newProp=R; /* N0 */
1116 if(pLastIsoRun->lastStrong==AL)
1117 dirProps[position]=AN; /* W2 */
1118 else
1119 dirProps[position]=ENR;
1120 pLastIsoRun->contextDir=(UBiDiDirection)R;
1121 pLastIsoRun->contextPos=position;
1122 }
1123 }
1124 else if(dirProp==AN) {
1125 newProp=R; /* N0 */
1126 pLastIsoRun->lastBase=AN;
1127 pLastIsoRun->contextDir=(UBiDiDirection)R;
1128 pLastIsoRun->contextPos=position;
1129 }
1130 else if(dirProp==NSM) {
1131 /* if the last real char was ON, change NSM to ON so that it
1132 will stay ON even if the last real char is a bracket which
1133 may be changed to L or R */
1134 newProp=pLastIsoRun->lastBase;
1135 if(newProp==ON)
1136 dirProps[position]=newProp;
1137 }
1138 else {
1139 newProp=dirProp;
1140 pLastIsoRun->lastBase=dirProp;
1141 }
1142 if(newProp<=R || newProp==AL) {
1143 int32_t i;
1144 uint16_t flag=DIRPROP_FLAG(DIR_FROM_STRONG(newProp));
1145 for(i=pLastIsoRun->start; i<pLastIsoRun->limit; i++)
1146 if(position>bd->openings[i].position)
1147 bd->openings[i].flags|=flag;
1148 }
1149 return TRUE;
1150 }
1151
1152 /* perform (X1)..(X9) ------------------------------------------------------- */
1153
1154 /* determine if the text is mixed-directional or single-directional */
1155 static UBiDiDirection
1156 directionFromFlags(UBiDi *pBiDi) {
1157 Flags flags=pBiDi->flags;
1158 /* if the text contains AN and neutrals, then some neutrals may become RTL */
1159 if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) {
1160 return UBIDI_LTR;
1161 } else if(!(flags&MASK_LTR)) {
1162 return UBIDI_RTL;
1163 } else {
1164 return UBIDI_MIXED;
1165 }
1166 }
1167
1168 /*
1169 * Resolve the explicit levels as specified by explicit embedding codes.
1170 * Recalculate the flags to have them reflect the real properties
1171 * after taking the explicit embeddings into account.
1172 *
1173 * The BiDi algorithm is designed to result in the same behavior whether embedding
1174 * levels are externally specified (from "styled text", supposedly the preferred
1175 * method) or set by explicit embedding codes (LRx, RLx, PDF, FSI, PDI) in the plain text.
1176 * That is why (X9) instructs to remove all not-isolate explicit codes (and BN).
1177 * However, in a real implementation, the removal of these codes and their index
1178 * positions in the plain text is undesirable since it would result in
1179 * reallocated, reindexed text.
1180 * Instead, this implementation leaves the codes in there and just ignores them
1181 * in the subsequent processing.
1182 * In order to get the same reordering behavior, positions with a BN or a not-isolate
1183 * explicit embedding code just get the same level assigned as the last "real"
1184 * character.
1185 *
1186 * Some implementations, not this one, then overwrite some of these
1187 * directionality properties at "real" same-level-run boundaries by
1188 * L or R codes so that the resolution of weak types can be performed on the
1189 * entire paragraph at once instead of having to parse it once more and
1190 * perform that resolution on same-level-runs.
1191 * This limits the scope of the implicit rules in effectively
1192 * the same way as the run limits.
1193 *
1194 * Instead, this implementation does not modify these codes, except for
1195 * paired brackets whose properties (ON) may be replaced by L or R.
1196 * On one hand, the paragraph has to be scanned for same-level-runs, but
1197 * on the other hand, this saves another loop to reset these codes,
1198 * or saves making and modifying a copy of dirProps[].
1199 *
1200 *
1201 * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
1202 *
1203 *
1204 * Handling the stack of explicit levels (Xn):
1205 *
1206 * With the BiDi stack of explicit levels, as pushed with each
1207 * LRE, RLE, LRO, RLO, LRI, RLI and FSI and popped with each PDF and PDI,
1208 * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL.
1209 *
1210 * In order to have a correct push-pop semantics even in the case of overflows,
1211 * overflow counters and a valid isolate counter are used as described in UAX#9
1212 * section 3.3.2 "Explicit Levels and Directions".
1213 *
1214 * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
1215 *
1216 * Returns normally the direction; -1 if there was a memory shortage
1217 *
1218 */
1219 static UBiDiDirection
1220 resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
1221 DirProp *dirProps=pBiDi->dirProps;
1222 uint16_t *dirInsert = pBiDi->dirInsert; /* may be NULL */
1223 UBiDiLevel *levels=pBiDi->levels;
1224 const UChar *text=pBiDi->text;
1225
1226 int32_t i=0, length=pBiDi->length;
1227 Flags flags=pBiDi->flags; /* collect all directionalities in the text */
1228 DirProp dirProp;
1229 int32_t dirInsertValue;
1230 int8_t dirInsertIndex; /* position within dirInsertValue, if any */
1231 UBiDiLevel level=GET_PARALEVEL(pBiDi, 0);
1232 UBiDiDirection direction;
1233 pBiDi->isolateCount=0;
1234
1235 if(U_FAILURE(*pErrorCode)) { return UBIDI_LTR; }
1236
1237 /* determine if the text is mixed-directional or single-directional */
1238 direction=directionFromFlags(pBiDi);
1239
1240 /* we may not need to resolve any explicit levels */
1241 if((direction!=UBIDI_MIXED)) {
1242 /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
1243 return direction;
1244 }
1245 if(pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL) {
1246 /* inverse BiDi: mixed, but all characters are at the same embedding level */
1247 /* set all levels to the paragraph level */
1248 int32_t paraIndex, start, limit;
1249 for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) {
1250 if(paraIndex==0)
1251 start=0;
1252 else
1253 start=pBiDi->paras[paraIndex-1].limit;
1254 limit=pBiDi->paras[paraIndex].limit;
1255 level=pBiDi->paras[paraIndex].level;
1256 for(i=start; i<limit; i++)
1257 levels[i]=level;
1258 }
1259 return direction; /* no bracket matching for inverse BiDi */
1260 }
1261 if(!(flags&(MASK_EXPLICIT|MASK_ISO))) {
1262 /* no embeddings, set all levels to the paragraph level */
1263 /* we still have to perform bracket matching */
1264 int32_t paraIndex, start, limit;
1265 BracketData bracketData;
1266 bracketInit(pBiDi, &bracketData);
1267 for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) {
1268 if(paraIndex==0)
1269 start=0;
1270 else
1271 start=pBiDi->paras[paraIndex-1].limit;
1272 limit=pBiDi->paras[paraIndex].limit;
1273 level=pBiDi->paras[paraIndex].level;
1274 for(i=start; i<limit; i++) {
1275 levels[i]=level;
1276 dirProp=dirProps[i];
1277 if(dirProp==BN)
1278 continue;
1279 if(dirProp==B) {
1280 if((i+1)<length) {
1281 if(text[i]==CR && text[i+1]==LF)
1282 continue; /* skip CR when followed by LF */
1283 bracketProcessB(&bracketData, level);
1284 }
1285 continue;
1286 }
1287 if(!bracketProcessChar(&bracketData, i)) {
1288 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1289 return UBIDI_LTR;
1290 }
1291 }
1292 }
1293 return direction;
1294 }
1295 {
1296 /* continue to perform (Xn) */
1297
1298 /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
1299 /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */
1300 UBiDiLevel embeddingLevel=level, newLevel;
1301 UBiDiLevel previousLevel=level; /* previous level for regular (not CC) characters */
1302 int32_t lastCcPos=0; /* index of last effective LRx,RLx, PDx */
1303 DirProp lastCcDirProp=0; /* dirProp of last effective LRx,RLx, PDx */
1304
1305 /* The following stack remembers the embedding level and the ISOLATE flag of level runs.
1306 stackLast points to its current entry. */
1307 uint16_t stack[UBIDI_MAX_EXPLICIT_LEVEL+2]; /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL
1308 but we need one more entry as base */
1309 uint32_t stackLast=0;
1310 int32_t overflowIsolateCount=0;
1311 int32_t overflowEmbeddingCount=0;
1312 int32_t validIsolateCount=0;
1313 BracketData bracketData;
1314 bracketInit(pBiDi, &bracketData);
1315 stack[0]=level; /* initialize base entry to para level, no override, no isolate */
1316
1317 /* recalculate the flags */
1318 flags=0;
1319
1320 dirInsertValue = 0;
1321 dirInsertIndex = -1; /* indicate that we have not checked dirInsert yet */
1322 for(i=0; i<length; ) { /* now conditionally increment at end */
1323 if (dirInsert != NULL && dirInsertIndex < 0) {
1324 dirInsertValue = dirInsert[i];
1325 }
1326 if (dirInsertValue > 0) {
1327 dirInsertIndex++;
1328 dirProp = (DirProp)stdDirFromInsertDir[dirInsertValue & 0x000F];
1329 dirInsertValue >>= 4;
1330 } else {
1331 dirInsertIndex = -1;
1332 dirProp=dirProps[i];
1333 }
1334 switch(dirProp) {
1335 case LRE:
1336 case RLE:
1337 case LRO:
1338 case RLO:
1339 /* (X2, X3, X4, X5) */
1340 flags|=DIRPROP_FLAG(BN);
1341 levels[i]=previousLevel;
1342 if (dirProp==LRE || dirProp==LRO)
1343 /* least greater even level */
1344 newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1));
1345 else
1346 /* least greater odd level */
1347 newLevel=(UBiDiLevel)((NO_OVERRIDE(embeddingLevel)+1)|1);
1348 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
1349 overflowEmbeddingCount==0) {
1350 lastCcPos=i;
1351 lastCcDirProp = dirProp;
1352 embeddingLevel=newLevel;
1353 if(dirProp==LRO || dirProp==RLO)
1354 embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
1355 stackLast++;
1356 stack[stackLast]=embeddingLevel;
1357 /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE and RLE
1358 since this has already been done for newLevel which is
1359 the source for embeddingLevel.
1360 */
1361 } else {
1362 if(overflowIsolateCount==0)
1363 overflowEmbeddingCount++;
1364 }
1365 break;
1366 case PDF:
1367 /* (X7) */
1368 flags|=DIRPROP_FLAG(BN);
1369 levels[i]=previousLevel;
1370 /* handle all the overflow cases first */
1371 if(overflowIsolateCount) {
1372 break;
1373 }
1374 if(overflowEmbeddingCount) {
1375 overflowEmbeddingCount--;
1376 break;
1377 }
1378 if(stackLast>0 && stack[stackLast]<ISOLATE) { /* not an isolate entry */
1379 lastCcPos=i;
1380 lastCcDirProp = dirProp;
1381 stackLast--;
1382 embeddingLevel=(UBiDiLevel)stack[stackLast];
1383 }
1384 break;
1385 case LRI:
1386 case RLI:
1387 flags|=(DIRPROP_FLAG(ON)|DIRPROP_FLAG_LR(embeddingLevel));
1388 levels[i]=NO_OVERRIDE(embeddingLevel);
1389 if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
1390 bracketProcessBoundary(&bracketData, lastCcPos, lastCcDirProp,
1391 previousLevel, embeddingLevel);
1392 flags|=DIRPROP_FLAG_MULTI_RUNS;
1393 }
1394 previousLevel=embeddingLevel;
1395 /* (X5a, X5b) */
1396 if(dirProp==LRI)
1397 /* least greater even level */
1398 newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1));
1399 else
1400 /* least greater odd level */
1401 newLevel=(UBiDiLevel)((NO_OVERRIDE(embeddingLevel)+1)|1);
1402 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
1403 overflowEmbeddingCount==0) {
1404 flags|=DIRPROP_FLAG(dirProp);
1405 lastCcPos=i;
1406 lastCcDirProp = dirProp;
1407 validIsolateCount++;
1408 if(validIsolateCount>pBiDi->isolateCount)
1409 pBiDi->isolateCount=validIsolateCount;
1410 embeddingLevel=newLevel;
1411 /* we can increment stackLast without checking because newLevel
1412 will exceed UBIDI_MAX_EXPLICIT_LEVEL before stackLast overflows */
1413 stackLast++;
1414 stack[stackLast]=embeddingLevel+ISOLATE;
1415 bracketProcessLRI_RLI(&bracketData, embeddingLevel);
1416 } else {
1417 /* make it WS so that it is handled by adjustWSLevels() */
1418 if (dirInsertIndex < 0) {
1419 dirProps[i]=WS;
1420 } else {
1421 dirInsert[i] &= ~(0x000F << (4*dirInsertIndex));
1422 dirInsert[i] |= (Insert_WS << (4*dirInsertIndex));
1423 }
1424 overflowIsolateCount++;
1425 }
1426 break;
1427 case PDI:
1428 if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
1429 bracketProcessBoundary(&bracketData, lastCcPos, lastCcDirProp,
1430 previousLevel, embeddingLevel);
1431 flags|=DIRPROP_FLAG_MULTI_RUNS;
1432 }
1433 /* (X6a) */
1434 if(overflowIsolateCount) {
1435 overflowIsolateCount--;
1436 /* make it WS so that it is handled by adjustWSLevels() */
1437 if (dirInsertIndex < 0) {
1438 dirProps[i]=WS;
1439 } else {
1440 dirInsert[i] &= ~(0x000F << (4*dirInsertIndex));
1441 dirInsert[i] |= (Insert_WS << (4*dirInsertIndex));
1442 }
1443 }
1444 else if(validIsolateCount) {
1445 flags|=DIRPROP_FLAG(PDI);
1446 lastCcPos=i;
1447 lastCcDirProp = dirProp;
1448 overflowEmbeddingCount=0;
1449 while(stack[stackLast]<ISOLATE) /* pop embedding entries */
1450 stackLast--; /* until the last isolate entry */
1451 stackLast--; /* pop also the last isolate entry */
1452 validIsolateCount--;
1453 bracketProcessPDI(&bracketData);
1454 } else
1455 /* make it WS so that it is handled by adjustWSLevels() */
1456 if (dirInsertIndex < 0) {
1457 dirProps[i]=WS;
1458 } else {
1459 dirInsert[i] &= ~(0x000F << (4*dirInsertIndex));
1460 dirInsert[i] |= (Insert_WS << (4*dirInsertIndex));
1461 }
1462 embeddingLevel=(UBiDiLevel)stack[stackLast]&~ISOLATE;
1463 flags|=(DIRPROP_FLAG(ON)|DIRPROP_FLAG_LR(embeddingLevel));
1464 previousLevel=embeddingLevel;
1465 levels[i]=NO_OVERRIDE(embeddingLevel);
1466 break;
1467 case B:
1468 flags|=DIRPROP_FLAG(B);
1469 levels[i]=GET_PARALEVEL(pBiDi, i);
1470 if((i+1)<length) {
1471 if(text[i]==CR && text[i+1]==LF)
1472 break; /* skip CR when followed by LF */
1473 overflowEmbeddingCount=overflowIsolateCount=0;
1474 validIsolateCount=0;
1475 stackLast=0;
1476 previousLevel=embeddingLevel=GET_PARALEVEL(pBiDi, i+1);
1477 stack[0]=embeddingLevel; /* initialize base entry to para level, no override, no isolate */
1478 bracketProcessB(&bracketData, embeddingLevel);
1479 }
1480 break;
1481 case BN:
1482 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
1483 /* they will get their levels set correctly in adjustWSLevels() */
1484 levels[i]=previousLevel;
1485 flags|=DIRPROP_FLAG(BN);
1486 break;
1487 default:
1488 /* all other types are normal characters and get the "real" level */
1489 if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
1490 bracketProcessBoundary(&bracketData, lastCcPos, lastCcDirProp,
1491 previousLevel, embeddingLevel);
1492 flags|=DIRPROP_FLAG_MULTI_RUNS;
1493 if(embeddingLevel&UBIDI_LEVEL_OVERRIDE)
1494 flags|=DIRPROP_FLAG_O(embeddingLevel);
1495 else
1496 flags|=DIRPROP_FLAG_E(embeddingLevel);
1497 }
1498 previousLevel=embeddingLevel;
1499 levels[i]=embeddingLevel;
1500 if(!bracketProcessChar(&bracketData, i))
1501 return (UBiDiDirection)-1;
1502 /* the dirProp may have been changed in bracketProcessChar() */
1503 flags|=DIRPROP_FLAG(dirProps[i]);
1504 break;
1505 }
1506 if (dirInsertIndex < 0) {
1507 ++i;
1508 }
1509 }
1510 if(flags&MASK_EMBEDDING)
1511 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
1512 if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B)))
1513 flags|=DIRPROP_FLAG(L);
1514 /* again, determine if the text is mixed-directional or single-directional */
1515 pBiDi->flags=flags;
1516 direction=directionFromFlags(pBiDi);
1517 }
1518 return direction;
1519 }
1520
1521 /*
1522 * Use a pre-specified embedding levels array:
1523 *
1524 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
1525 * ignore all explicit codes (X9),
1526 * and check all the preset levels.
1527 *
1528 * Recalculate the flags to have them reflect the real properties
1529 * after taking the explicit embeddings into account.
1530 */
1531 static UBiDiDirection
1532 checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
1533 DirProp *dirProps=pBiDi->dirProps;
1534 UBiDiLevel *levels=pBiDi->levels;
1535 int32_t isolateCount=0;
1536
1537 int32_t length=pBiDi->length;
1538 Flags flags=0; /* collect all directionalities in the text */
1539 pBiDi->isolateCount=0;
1540
1541 int32_t currentParaIndex = 0;
1542 int32_t currentParaLimit = pBiDi->paras[0].limit;
1543 int32_t currentParaLevel = pBiDi->paraLevel;
1544
1545 for(int32_t i=0; i<length; ++i) {
1546 UBiDiLevel level=levels[i];
1547 DirProp dirProp=dirProps[i];
1548 if(dirProp==LRI || dirProp==RLI) {
1549 isolateCount++;
1550 if(isolateCount>pBiDi->isolateCount)
1551 pBiDi->isolateCount=isolateCount;
1552 }
1553 else if(dirProp==PDI)
1554 isolateCount--;
1555 else if(dirProp==B)
1556 isolateCount=0;
1557
1558 // optimized version of int32_t currentParaLevel = GET_PARALEVEL(pBiDi, i);
1559 if (pBiDi->defaultParaLevel != 0 &&
1560 i == currentParaLimit && (currentParaIndex + 1) < pBiDi->paraCount) {
1561 currentParaLevel = pBiDi->paras[++currentParaIndex].level;
1562 currentParaLimit = pBiDi->paras[currentParaIndex].limit;
1563 }
1564
1565 UBiDiLevel overrideFlag = level & UBIDI_LEVEL_OVERRIDE;
1566 level &= ~UBIDI_LEVEL_OVERRIDE;
1567 if (level < currentParaLevel || UBIDI_MAX_EXPLICIT_LEVEL < level) {
1568 if (level == 0) {
1569 if (dirProp == B) {
1570 // Paragraph separators are ok with explicit level 0.
1571 // Prevents reordering of paragraphs.
1572 } else {
1573 // Treat explicit level 0 as a wildcard for the paragraph level.
1574 // Avoid making the caller guess what the paragraph level would be.
1575 level = (UBiDiLevel)currentParaLevel;
1576 levels[i] = level | overrideFlag;
1577 }
1578 } else {
1579 // 1 <= level < currentParaLevel or UBIDI_MAX_EXPLICIT_LEVEL < level
1580 /* level out of bounds */
1581 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1582 return UBIDI_LTR;
1583 }
1584 }
1585 if (overrideFlag != 0) {
1586 /* keep the override flag in levels[i] but adjust the flags */
1587 flags|=DIRPROP_FLAG_O(level);
1588 } else {
1589 /* set the flags */
1590 flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp);
1591 }
1592 }
1593 if(flags&MASK_EMBEDDING)
1594 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
1595 /* determine if the text is mixed-directional or single-directional */
1596 pBiDi->flags=flags;
1597 return directionFromFlags(pBiDi);
1598 }
1599
1600 /******************************************************************
1601 The Properties state machine table
1602 *******************************************************************
1603
1604 All table cells are 8 bits:
1605 bits 0..4: next state
1606 bits 5..7: action to perform (if > 0)
1607
1608 Cells may be of format "n" where n represents the next state
1609 (except for the rightmost column).
1610 Cells may also be of format "s(x,y)" where x represents an action
1611 to perform and y represents the next state.
1612
1613 *******************************************************************
1614 Definitions and type for properties state table
1615 *******************************************************************
1616 */
1617 #define IMPTABPROPS_COLUMNS 16
1618 #define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1)
1619 #define GET_STATEPROPS(cell) ((cell)&0x1f)
1620 #define GET_ACTIONPROPS(cell) ((cell)>>5)
1621 #define s(action, newState) ((uint8_t)(newState+(action<<5)))
1622
1623 static const uint8_t groupProp[] = /* dirProp regrouped */
1624 {
1625 /* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN FSI LRI RLI PDI ENL ENR */
1626 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10, 4, 4, 4, 4, 13, 14
1627 };
1628 enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */
1629
1630 /******************************************************************
1631
1632 PROPERTIES STATE TABLE
1633
1634 In table impTabProps,
1635 - the ON column regroups ON and WS, FSI, RLI, LRI and PDI
1636 - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF
1637 - the Res column is the reduced property assigned to a run
1638
1639 Action 1: process current run1, init new run1
1640 2: init new run2
1641 3: process run1, process run2, init new run1
1642 4: process run1, set run1=run2, init new run2
1643
1644 Notes:
1645 1) This table is used in resolveImplicitLevels().
1646 2) This table triggers actions when there is a change in the Bidi
1647 property of incoming characters (action 1).
1648 3) Most such property sequences are processed immediately (in
1649 fact, passed to processPropertySeq().
1650 4) However, numbers are assembled as one sequence. This means
1651 that undefined situations (like CS following digits, until
1652 it is known if the next char will be a digit) are held until
1653 following chars define them.
1654 Example: digits followed by CS, then comes another CS or ON;
1655 the digits will be processed, then the CS assigned
1656 as the start of an ON sequence (action 3).
1657 5) There are cases where more than one sequence must be
1658 processed, for instance digits followed by CS followed by L:
1659 the digits must be processed as one sequence, and the CS
1660 must be processed as an ON sequence, all this before starting
1661 assembling chars for the opening L sequence.
1662
1663
1664 */
1665 static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] =
1666 {
1667 /* L , R , EN , AN , ON , S , B , ES , ET , CS , BN , NSM , AL , ENL , ENR , Res */
1668 /* 0 Init */ { 1 , 2 , 4 , 5 , 7 , 15 , 17 , 7 , 9 , 7 , 0 , 7 , 3 , 18 , 21 , DirProp_ON },
1669 /* 1 L */ { 1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 1 , 1 , s(1,3),s(1,18),s(1,21), DirProp_L },
1670 /* 2 R */ { s(1,1), 2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 2 , 2 , s(1,3),s(1,18),s(1,21), DirProp_R },
1671 /* 3 AL */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8), 3 , 3 , 3 ,s(1,18),s(1,21), DirProp_R },
1672 /* 4 EN */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10), 11 ,s(2,10), 4 , 4 , s(1,3), 18 , 21 , DirProp_EN },
1673 /* 5 AN */ { s(1,1), s(1,2), s(1,4), 5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12), 5 , 5 , s(1,3),s(1,18),s(1,21), DirProp_AN },
1674 /* 6 AL:EN/AN */ { s(1,1), s(1,2), 6 , 6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13), 6 , 6 , s(1,3), 18 , 21 , DirProp_AN },
1675 /* 7 ON */ { s(1,1), s(1,2), s(1,4), s(1,5), 7 ,s(1,15),s(1,17), 7 ,s(2,14), 7 , 7 , 7 , s(1,3),s(1,18),s(1,21), DirProp_ON },
1676 /* 8 AL:ON */ { s(1,1), s(1,2), s(1,6), s(1,6), 8 ,s(1,16),s(1,17), 8 , 8 , 8 , 8 , 8 , s(1,3),s(1,18),s(1,21), DirProp_ON },
1677 /* 9 ET */ { s(1,1), s(1,2), 4 , s(1,5), 7 ,s(1,15),s(1,17), 7 , 9 , 7 , 9 , 9 , s(1,3), 18 , 21 , DirProp_ON },
1678 /*10 EN+ES/CS */ { s(3,1), s(3,2), 4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 10 , s(4,7), s(3,3), 18 , 21 , DirProp_EN },
1679 /*11 EN+ET */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 11 , s(1,7), 11 , 11 , s(1,3), 18 , 21 , DirProp_EN },
1680 /*12 AN+CS */ { s(3,1), s(3,2), s(3,4), 5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 12 , s(4,7), s(3,3),s(3,18),s(3,21), DirProp_AN },
1681 /*13 AL:EN/AN+CS */ { s(3,1), s(3,2), 6 , 6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8), 13 , s(4,8), s(3,3), 18 , 21 , DirProp_AN },
1682 /*14 ON+ET */ { s(1,1), s(1,2), s(4,4), s(1,5), 7 ,s(1,15),s(1,17), 7 , 14 , 7 , 14 , 14 , s(1,3),s(4,18),s(4,21), DirProp_ON },
1683 /*15 S */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7), 15 ,s(1,17), s(1,7), s(1,9), s(1,7), 15 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_S },
1684 /*16 AL:S */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8), 16 ,s(1,17), s(1,8), s(1,8), s(1,8), 16 , s(1,8), s(1,3),s(1,18),s(1,21), DirProp_S },
1685 /*17 B */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15), 17 , s(1,7), s(1,9), s(1,7), 17 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_B },
1686 /*18 ENL */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,19), 20 ,s(2,19), 18 , 18 , s(1,3), 18 , 21 , DirProp_L },
1687 /*19 ENL+ES/CS */ { s(3,1), s(3,2), 18 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 19 , s(4,7), s(3,3), 18 , 21 , DirProp_L },
1688 /*20 ENL+ET */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 20 , s(1,7), 20 , 20 , s(1,3), 18 , 21 , DirProp_L },
1689 /*21 ENR */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,22), 23 ,s(2,22), 21 , 21 , s(1,3), 18 , 21 , DirProp_AN },
1690 /*22 ENR+ES/CS */ { s(3,1), s(3,2), 21 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 22 , s(4,7), s(3,3), 18 , 21 , DirProp_AN },
1691 /*23 ENR+ET */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 23 , s(1,7), 23 , 23 , s(1,3), 18 , 21 , DirProp_AN }
1692 };
1693
1694 /* we must undef macro s because the levels tables have a different
1695 * structure (4 bits for action and 4 bits for next state.
1696 */
1697 #undef s
1698
1699 /******************************************************************
1700 The levels state machine tables
1701 *******************************************************************
1702
1703 All table cells are 8 bits:
1704 bits 0..3: next state
1705 bits 4..7: action to perform (if > 0)
1706
1707 Cells may be of format "n" where n represents the next state
1708 (except for the rightmost column).
1709 Cells may also be of format "s(x,y)" where x represents an action
1710 to perform and y represents the next state.
1711
1712 This format limits each table to 16 states each and to 15 actions.
1713
1714 *******************************************************************
1715 Definitions and type for levels state tables
1716 *******************************************************************
1717 */
1718 #define IMPTABLEVELS_COLUMNS (DirProp_B + 2)
1719 #define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1)
1720 #define GET_STATE(cell) ((cell)&0x0f)
1721 #define GET_ACTION(cell) ((cell)>>4)
1722 #define s(action, newState) ((uint8_t)(newState+(action<<4)))
1723
1724 typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS];
1725 typedef uint8_t ImpAct[];
1726
1727 /* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct,
1728 * instead of having a pair of ImpTab and a pair of ImpAct.
1729 */
1730 typedef struct ImpTabPair {
1731 const void * pImpTab[2];
1732 const void * pImpAct[2];
1733 } ImpTabPair;
1734
1735 /******************************************************************
1736
1737 LEVELS STATE TABLES
1738
1739 In all levels state tables,
1740 - state 0 is the initial state
1741 - the Res column is the increment to add to the text level
1742 for this property sequence.
1743
1744 The impAct arrays for each table of a pair map the local action
1745 numbers of the table to the total list of actions. For instance,
1746 action 2 in a given table corresponds to the action number which
1747 appears in entry [2] of the impAct array for that table.
1748 The first entry of all impAct arrays must be 0.
1749
1750 Action 1: init conditional sequence
1751 2: prepend conditional sequence to current sequence
1752 3: set ON sequence to new level - 1
1753 4: init EN/AN/ON sequence
1754 5: fix EN/AN/ON sequence followed by R
1755 6: set previous level sequence to level 2
1756
1757 Notes:
1758 1) These tables are used in processPropertySeq(). The input
1759 is property sequences as determined by resolveImplicitLevels.
1760 2) Most such property sequences are processed immediately
1761 (levels are assigned).
1762 3) However, some sequences cannot be assigned a final level till
1763 one or more following sequences are received. For instance,
1764 ON following an R sequence within an even-level paragraph.
1765 If the following sequence is R, the ON sequence will be
1766 assigned basic run level+1, and so will the R sequence.
1767 4) S is generally handled like ON, since its level will be fixed
1768 to paragraph level in adjustWSLevels().
1769
1770 */
1771
1772 static const ImpTab impTabL_DEFAULT = /* Even paragraph level */
1773 /* In this table, conditional sequences receive the lower possible level
1774 until proven otherwise.
1775 */
1776 {
1777 /* L , R , EN , AN , ON , S , B , Res */
1778 /* 0 : init */ { 0 , 1 , 0 , 2 , 0 , 0 , 0 , 0 },
1779 /* 1 : R */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 1 },
1780 /* 2 : AN */ { 0 , 1 , 0 , 2 , s(1,5), s(1,5), 0 , 2 },
1781 /* 3 : R+EN/AN */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 2 },
1782 /* 4 : R+ON */ { 0 , s(2,1), s(3,3), s(3,3), 4 , 4 , 0 , 0 },
1783 /* 5 : AN+ON */ { 0 , s(2,1), 0 , s(3,2), 5 , 5 , 0 , 0 }
1784 };
1785 static const ImpTab impTabR_DEFAULT = /* Odd paragraph level */
1786 /* In this table, conditional sequences receive the lower possible level
1787 until proven otherwise.
1788 */
1789 {
1790 /* L , R , EN , AN , ON , S , B , Res */
1791 /* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 },
1792 /* 1 : L */ { 1 , 0 , 1 , 3 , s(1,4), s(1,4), 0 , 1 },
1793 /* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 },
1794 /* 3 : L+AN */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 1 },
1795 /* 4 : L+ON */ { s(2,1), 0 , s(2,1), 3 , 4 , 4 , 0 , 0 },
1796 /* 5 : L+AN+ON */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 0 }
1797 };
1798 static const ImpAct impAct0 = {0,1,2,3,4};
1799 static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT,
1800 &impTabR_DEFAULT},
1801 {&impAct0, &impAct0}};
1802
1803 static const ImpTab impTabL_NUMBERS_SPECIAL = /* Even paragraph level */
1804 /* In this table, conditional sequences receive the lower possible level
1805 until proven otherwise.
1806 */
1807 {
1808 /* L , R , EN , AN , ON , S , B , Res */
1809 /* 0 : init */ { 0 , 2 , s(1,1), s(1,1), 0 , 0 , 0 , 0 },
1810 /* 1 : L+EN/AN */ { 0 , s(4,2), 1 , 1 , 0 , 0 , 0 , 0 },
1811 /* 2 : R */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 1 },
1812 /* 3 : R+ON */ { 0 , s(2,2), s(3,4), s(3,4), 3 , 3 , 0 , 0 },
1813 /* 4 : R+EN/AN */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 2 }
1814 };
1815 static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL,
1816 &impTabR_DEFAULT},
1817 {&impAct0, &impAct0}};
1818
1819 static const ImpTab impTabL_GROUP_NUMBERS_WITH_R =
1820 /* In this table, EN/AN+ON sequences receive levels as if associated with R
1821 until proven that there is L or sor/eor on both sides. AN is handled like EN.
1822 */
1823 {
1824 /* L , R , EN , AN , ON , S , B , Res */
1825 /* 0 init */ { 0 , 3 , s(1,1), s(1,1), 0 , 0 , 0 , 0 },
1826 /* 1 EN/AN */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 2 },
1827 /* 2 EN/AN+ON */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 1 },
1828 /* 3 R */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 1 },
1829 /* 4 R+ON */ { s(2,0), 3 , 5 , 5 , 4 , s(2,0), s(2,0), 1 },
1830 /* 5 R+EN/AN */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 2 }
1831 };
1832 static const ImpTab impTabR_GROUP_NUMBERS_WITH_R =
1833 /* In this table, EN/AN+ON sequences receive levels as if associated with R
1834 until proven that there is L on both sides. AN is handled like EN.
1835 */
1836 {
1837 /* L , R , EN , AN , ON , S , B , Res */
1838 /* 0 init */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1839 /* 1 EN/AN */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 1 },
1840 /* 2 L */ { 2 , 0 , s(1,4), s(1,4), s(1,3), 0 , 0 , 1 },
1841 /* 3 L+ON */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 0 },
1842 /* 4 L+EN/AN */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 1 }
1843 };
1844 static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = {
1845 {&impTabL_GROUP_NUMBERS_WITH_R,
1846 &impTabR_GROUP_NUMBERS_WITH_R},
1847 {&impAct0, &impAct0}};
1848
1849
1850 static const ImpTab impTabL_INVERSE_NUMBERS_AS_L =
1851 /* This table is identical to the Default LTR table except that EN and AN are
1852 handled like L.
1853 */
1854 {
1855 /* L , R , EN , AN , ON , S , B , Res */
1856 /* 0 : init */ { 0 , 1 , 0 , 0 , 0 , 0 , 0 , 0 },
1857 /* 1 : R */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 1 },
1858 /* 2 : AN */ { 0 , 1 , 0 , 0 , s(1,5), s(1,5), 0 , 2 },
1859 /* 3 : R+EN/AN */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 2 },
1860 /* 4 : R+ON */ { s(2,0), 1 , s(2,0), s(2,0), 4 , 4 , s(2,0), 1 },
1861 /* 5 : AN+ON */ { s(2,0), 1 , s(2,0), s(2,0), 5 , 5 , s(2,0), 1 }
1862 };
1863 static const ImpTab impTabR_INVERSE_NUMBERS_AS_L =
1864 /* This table is identical to the Default RTL table except that EN and AN are
1865 handled like L.
1866 */
1867 {
1868 /* L , R , EN , AN , ON , S , B , Res */
1869 /* 0 : init */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1870 /* 1 : L */ { 1 , 0 , 1 , 1 , s(1,4), s(1,4), 0 , 1 },
1871 /* 2 : EN/AN */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 1 },
1872 /* 3 : L+AN */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 1 },
1873 /* 4 : L+ON */ { s(2,1), 0 , s(2,1), s(2,1), 4 , 4 , 0 , 0 },
1874 /* 5 : L+AN+ON */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 0 }
1875 };
1876 static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = {
1877 {&impTabL_INVERSE_NUMBERS_AS_L,
1878 &impTabR_INVERSE_NUMBERS_AS_L},
1879 {&impAct0, &impAct0}};
1880
1881 static const ImpTab impTabR_INVERSE_LIKE_DIRECT = /* Odd paragraph level */
1882 /* In this table, conditional sequences receive the lower possible level
1883 until proven otherwise.
1884 */
1885 {
1886 /* L , R , EN , AN , ON , S , B , Res */
1887 /* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 },
1888 /* 1 : L */ { 1 , 0 , 1 , 2 , s(1,3), s(1,3), 0 , 1 },
1889 /* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 },
1890 /* 3 : L+ON */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 0 },
1891 /* 4 : L+ON+AN */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 3 },
1892 /* 5 : L+AN+ON */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 2 },
1893 /* 6 : L+ON+EN */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 1 }
1894 };
1895 static const ImpAct impAct1 = {0,1,13,14};
1896 /* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc"
1897 */
1898 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = {
1899 {&impTabL_DEFAULT,
1900 &impTabR_INVERSE_LIKE_DIRECT},
1901 {&impAct0, &impAct1}};
1902
1903 static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS =
1904 /* The case handled in this table is (visually): R EN L
1905 */
1906 {
1907 /* L , R , EN , AN , ON , S , B , Res */
1908 /* 0 : init */ { 0 , s(6,3), 0 , 1 , 0 , 0 , 0 , 0 },
1909 /* 1 : L+AN */ { 0 , s(6,3), 0 , 1 , s(1,2), s(3,0), 0 , 4 },
1910 /* 2 : L+AN+ON */ { s(2,0), s(6,3), s(2,0), 1 , 2 , s(3,0), s(2,0), 3 },
1911 /* 3 : R */ { 0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0), 0 , 3 },
1912 /* 4 : R+ON */ { s(3,0), s(4,3), s(5,5), s(5,6), 4 , s(3,0), s(3,0), 3 },
1913 /* 5 : R+EN */ { s(3,0), s(4,3), 5 , s(5,6), s(1,4), s(3,0), s(3,0), 4 },
1914 /* 6 : R+AN */ { s(3,0), s(4,3), s(5,5), 6 , s(1,4), s(3,0), s(3,0), 4 }
1915 };
1916 static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS =
1917 /* The cases handled in this table are (visually): R EN L
1918 R L AN L
1919 */
1920 {
1921 /* L , R , EN , AN , ON , S , B , Res */
1922 /* 0 : init */ { s(1,3), 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1923 /* 1 : R+EN/AN */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 1 },
1924 /* 2 : R+EN/AN+ON */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 0 },
1925 /* 3 : L */ { 3 , 0 , 3 , s(3,6), s(1,4), s(4,0), 0 , 1 },
1926 /* 4 : L+ON */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 0 },
1927 /* 5 : L+ON+EN */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 1 },
1928 /* 6 : L+AN */ { s(5,3), s(4,0), 6 , 6 , 4 , s(4,0), s(4,0), 3 }
1929 };
1930 static const ImpAct impAct2 = {0,1,2,5,6,7,8};
1931 static const ImpAct impAct3 = {0,1,9,10,11,12};
1932 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = {
1933 {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
1934 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
1935 {&impAct2, &impAct3}};
1936
1937 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = {
1938 {&impTabL_NUMBERS_SPECIAL,
1939 &impTabR_INVERSE_LIKE_DIRECT},
1940 {&impAct0, &impAct1}};
1941
1942 static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS =
1943 /* The case handled in this table is (visually): R EN L
1944 */
1945 {
1946 /* L , R , EN , AN , ON , S , B , Res */
1947 /* 0 : init */ { 0 , s(6,2), 1 , 1 , 0 , 0 , 0 , 0 },
1948 /* 1 : L+EN/AN */ { 0 , s(6,2), 1 , 1 , 0 , s(3,0), 0 , 4 },
1949 /* 2 : R */ { 0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0), 0 , 3 },
1950 /* 3 : R+ON */ { s(3,0), s(4,2), s(5,4), s(5,4), 3 , s(3,0), s(3,0), 3 },
1951 /* 4 : R+EN/AN */ { s(3,0), s(4,2), 4 , 4 , s(1,3), s(3,0), s(3,0), 4 }
1952 };
1953 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = {
1954 {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
1955 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
1956 {&impAct2, &impAct3}};
1957
1958 #undef s
1959
1960 typedef struct {
1961 const ImpTab * pImpTab; /* level table pointer */
1962 const ImpAct * pImpAct; /* action map array */
1963 int32_t startON; /* start of ON sequence */
1964 int32_t startL2EN; /* start of level 2 sequence */
1965 int32_t lastStrongRTL; /* index of last found R or AL */
1966 int32_t state; /* current state */
1967 int32_t runStart; /* start position of the run */
1968 UBiDiLevel runLevel; /* run level before implicit solving */
1969 } LevState;
1970
1971 /*------------------------------------------------------------------------*/
1972
1973 static void
1974 addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag)
1975 /* param pos: position where to insert
1976 param flag: one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER
1977 */
1978 {
1979 #define FIRSTALLOC 10
1980 Point point;
1981 InsertPoints * pInsertPoints=&(pBiDi->insertPoints);
1982
1983 if (pInsertPoints->capacity == 0)
1984 {
1985 pInsertPoints->points=static_cast<Point *>(uprv_malloc(sizeof(Point)*FIRSTALLOC));
1986 if (pInsertPoints->points == NULL)
1987 {
1988 pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
1989 return;
1990 }
1991 pInsertPoints->capacity=FIRSTALLOC;
1992 }
1993 if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */
1994 {
1995 Point * savePoints=pInsertPoints->points;
1996 pInsertPoints->points=static_cast<Point *>(uprv_realloc(pInsertPoints->points,
1997 pInsertPoints->capacity*2*sizeof(Point)));
1998 if (pInsertPoints->points == NULL)
1999 {
2000 pInsertPoints->points=savePoints;
2001 pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
2002 return;
2003 }
2004 else pInsertPoints->capacity*=2;
2005 }
2006 point.pos=pos;
2007 point.flag=flag;
2008 pInsertPoints->points[pInsertPoints->size]=point;
2009 pInsertPoints->size++;
2010 #undef FIRSTALLOC
2011 }
2012
2013 static void
2014 setLevelsOutsideIsolates(UBiDi *pBiDi, int32_t start, int32_t limit, UBiDiLevel level)
2015 {
2016 DirProp *dirProps=pBiDi->dirProps, dirProp;
2017 uint16_t *dirInsert = pBiDi->dirInsert; /* may be NULL */
2018 UBiDiLevel *levels=pBiDi->levels;
2019 int32_t dirInsertValue;
2020 int8_t dirInsertIndex; /* position within dirInsertValue, if any */
2021 int32_t isolateCount=0, k;
2022 dirInsertValue = 0;
2023 dirInsertIndex = -1; /* indicate that we have not checked dirInsert yet */
2024 for(k=start; k<limit; k++) {
2025 if (dirInsert != NULL && dirInsertIndex < 0) {
2026 dirInsertValue = dirInsert[k];
2027 }
2028 if (dirInsertValue > 0) {
2029 dirInsertIndex++;
2030 dirProp = (DirProp)stdDirFromInsertDir[dirInsertValue & 0x000F];
2031 dirInsertValue >>= 4;
2032 } else {
2033 dirInsertIndex = -1;
2034 dirProp=dirProps[k];
2035 }
2036 if(dirProp==PDI)
2037 isolateCount--;
2038 if(isolateCount==0)
2039 levels[k]=level;
2040 if(dirProp==LRI || dirProp==RLI)
2041 isolateCount++;
2042 }
2043 }
2044
2045 /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
2046
2047 /*
2048 * This implementation of the (Wn) rules applies all rules in one pass.
2049 * In order to do so, it needs a look-ahead of typically 1 character
2050 * (except for W5: sequences of ET) and keeps track of changes
2051 * in a rule Wp that affect a later Wq (p<q).
2052 *
2053 * The (Nn) and (In) rules are also performed in that same single loop,
2054 * but effectively one iteration behind for white space.
2055 *
2056 * Since all implicit rules are performed in one step, it is not necessary
2057 * to actually store the intermediate directional properties in dirProps[].
2058 */
2059
2060 static void
2061 processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop,
2062 int32_t start, int32_t limit) {
2063 uint8_t cell, oldStateSeq, actionSeq;
2064 const ImpTab * pImpTab=pLevState->pImpTab;
2065 const ImpAct * pImpAct=pLevState->pImpAct;
2066 UBiDiLevel * levels=pBiDi->levels;
2067 UBiDiLevel level, addLevel;
2068 InsertPoints * pInsertPoints;
2069 int32_t start0, k;
2070
2071 start0=start; /* save original start position */
2072 oldStateSeq=(uint8_t)pLevState->state;
2073 cell=(*pImpTab)[oldStateSeq][_prop];
2074 pLevState->state=GET_STATE(cell); /* isolate the new state */
2075 actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */
2076 addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES];
2077
2078 if(actionSeq) {
2079 switch(actionSeq) {
2080 case 1: /* init ON seq */
2081 pLevState->startON=start0;
2082 break;
2083
2084 case 2: /* prepend ON seq to current seq */
2085 start=pLevState->startON;
2086 break;
2087
2088 case 3: /* EN/AN after R+ON */
2089 level=pLevState->runLevel+1;
2090 setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level);
2091 break;
2092
2093 case 4: /* EN/AN before R for NUMBERS_SPECIAL */
2094 level=pLevState->runLevel+2;
2095 setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level);
2096 break;
2097
2098 case 5: /* L or S after possible relevant EN/AN */
2099 /* check if we had EN after R/AL */
2100 if (pLevState->startL2EN >= 0) {
2101 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
2102 }
2103 pLevState->startL2EN=-1; /* not within previous if since could also be -2 */
2104 /* check if we had any relevant EN/AN after R/AL */
2105 pInsertPoints=&(pBiDi->insertPoints);
2106 if ((pInsertPoints->capacity == 0) ||
2107 (pInsertPoints->size <= pInsertPoints->confirmed))
2108 {
2109 /* nothing, just clean up */
2110 pLevState->lastStrongRTL=-1;
2111 /* check if we have a pending conditional segment */
2112 level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES];
2113 if ((level & 1) && (pLevState->startON > 0)) { /* after ON */
2114 start=pLevState->startON; /* reset to basic run level */
2115 }
2116 if (_prop == DirProp_S) /* add LRM before S */
2117 {
2118 addPoint(pBiDi, start0, LRM_BEFORE);
2119 pInsertPoints->confirmed=pInsertPoints->size;
2120 }
2121 break;
2122 }
2123 /* reset previous RTL cont to level for LTR text */
2124 for (k=pLevState->lastStrongRTL+1; k<start0; k++)
2125 {
2126 /* reset odd level, leave runLevel+2 as is */
2127 levels[k]=(levels[k] - 2) & ~1;
2128 }
2129 /* mark insert points as confirmed */
2130 pInsertPoints->confirmed=pInsertPoints->size;
2131 pLevState->lastStrongRTL=-1;
2132 if (_prop == DirProp_S) /* add LRM before S */
2133 {
2134 addPoint(pBiDi, start0, LRM_BEFORE);
2135 pInsertPoints->confirmed=pInsertPoints->size;
2136 }
2137 break;
2138
2139 case 6: /* R/AL after possible relevant EN/AN */
2140 /* just clean up */
2141 pInsertPoints=&(pBiDi->insertPoints);
2142 if (pInsertPoints->capacity > 0)
2143 /* remove all non confirmed insert points */
2144 pInsertPoints->size=pInsertPoints->confirmed;
2145 pLevState->startON=-1;
2146 pLevState->startL2EN=-1;
2147 pLevState->lastStrongRTL=limit - 1;
2148 break;
2149
2150 case 7: /* EN/AN after R/AL + possible cont */
2151 /* check for real AN */
2152 if ((_prop == DirProp_AN) && (pBiDi->dirProps[start0] == AN) &&
2153 (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))
2154 {
2155 /* real AN */
2156 if (pLevState->startL2EN == -1) /* if no relevant EN already found */
2157 {
2158 /* just note the righmost digit as a strong RTL */
2159 pLevState->lastStrongRTL=limit - 1;
2160 break;
2161 }
2162 if (pLevState->startL2EN >= 0) /* after EN, no AN */
2163 {
2164 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
2165 pLevState->startL2EN=-2;
2166 }
2167 /* note AN */
2168 addPoint(pBiDi, start0, LRM_BEFORE);
2169 break;
2170 }
2171 /* if first EN/AN after R/AL */
2172 if (pLevState->startL2EN == -1) {
2173 pLevState->startL2EN=start0;
2174 }
2175 break;
2176
2177 case 8: /* note location of latest R/AL */
2178 pLevState->lastStrongRTL=limit - 1;
2179 pLevState->startON=-1;
2180 break;
2181
2182 case 9: /* L after R+ON/EN/AN */
2183 /* include possible adjacent number on the left */
2184 for (k=start0-1; k>=0 && !(levels[k]&1); k--);
2185 if(k>=0) {
2186 addPoint(pBiDi, k, RLM_BEFORE); /* add RLM before */
2187 pInsertPoints=&(pBiDi->insertPoints);
2188 pInsertPoints->confirmed=pInsertPoints->size; /* confirm it */
2189 }
2190 pLevState->startON=start0;
2191 break;
2192
2193 case 10: /* AN after L */
2194 /* AN numbers between L text on both sides may be trouble. */
2195 /* tentatively bracket with LRMs; will be confirmed if followed by L */
2196 addPoint(pBiDi, start0, LRM_BEFORE); /* add LRM before */
2197 addPoint(pBiDi, start0, LRM_AFTER); /* add LRM after */
2198 break;
2199
2200 case 11: /* R after L+ON/EN/AN */
2201 /* false alert, infirm LRMs around previous AN */
2202 pInsertPoints=&(pBiDi->insertPoints);
2203 pInsertPoints->size=pInsertPoints->confirmed;
2204 if (_prop == DirProp_S) /* add RLM before S */
2205 {
2206 addPoint(pBiDi, start0, RLM_BEFORE);
2207 pInsertPoints->confirmed=pInsertPoints->size;
2208 }
2209 break;
2210
2211 case 12: /* L after L+ON/AN */
2212 level=pLevState->runLevel + addLevel;
2213 for(k=pLevState->startON; k<start0; k++) {
2214 if (levels[k]<level)
2215 levels[k]=level;
2216 }
2217 pInsertPoints=&(pBiDi->insertPoints);
2218 pInsertPoints->confirmed=pInsertPoints->size; /* confirm inserts */
2219 pLevState->startON=start0;
2220 break;
2221
2222 case 13: /* L after L+ON+EN/AN/ON */
2223 level=pLevState->runLevel;
2224 for(k=start0-1; k>=pLevState->startON; k--) {
2225 if(levels[k]==level+3) {
2226 while(levels[k]==level+3) {
2227 levels[k--]-=2;
2228 }
2229 while(levels[k]==level) {
2230 k--;
2231 }
2232 }
2233 if(levels[k]==level+2) {
2234 levels[k]=level;
2235 continue;
2236 }
2237 levels[k]=level+1;
2238 }
2239 break;
2240
2241 case 14: /* R after L+ON+EN/AN/ON */
2242 level=pLevState->runLevel+1;
2243 for(k=start0-1; k>=pLevState->startON; k--) {
2244 if(levels[k]>level) {
2245 levels[k]-=2;
2246 }
2247 }
2248 break;
2249
2250 default: /* we should never get here */
2251 U_ASSERT(FALSE);
2252 break;
2253 }
2254 }
2255 if((addLevel) || (start < start0)) {
2256 level=pLevState->runLevel + addLevel;
2257 if(start>=pLevState->runStart) {
2258 for(k=start; k<limit; k++) {
2259 levels[k]=level;
2260 }
2261 } else {
2262 setLevelsOutsideIsolates(pBiDi, start, limit, level);
2263 }
2264 }
2265 }
2266
2267 /**
2268 * Returns the directionality of the last strong character at the end of the prologue, if any.
2269 * Requires prologue!=null.
2270 */
2271 static DirProp
2272 lastL_R_AL(UBiDi *pBiDi) {
2273 const UChar *text=pBiDi->prologue;
2274 int32_t length=pBiDi->proLength;
2275 int32_t i;
2276 UChar32 uchar;
2277 DirProp dirProp;
2278 for(i=length; i>0; ) {
2279 /* i is decremented by U16_PREV */
2280 U16_PREV(text, 0, i, uchar);
2281 dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
2282 if(dirProp==L) {
2283 return DirProp_L;
2284 }
2285 if(dirProp==R || dirProp==AL) {
2286 return DirProp_R;
2287 }
2288 if(dirProp==B) {
2289 return DirProp_ON;
2290 }
2291 }
2292 return DirProp_ON;
2293 }
2294
2295 /**
2296 * Returns the directionality of the first strong character, or digit, in the epilogue, if any.
2297 * Requires epilogue!=null.
2298 */
2299 static DirProp
2300 firstL_R_AL_EN_AN(UBiDi *pBiDi) {
2301 const UChar *text=pBiDi->epilogue;
2302 int32_t length=pBiDi->epiLength;
2303 int32_t i;
2304 UChar32 uchar;
2305 DirProp dirProp;
2306 for(i=0; i<length; ) {
2307 /* i is incremented by U16_NEXT */
2308 U16_NEXT(text, i, length, uchar);
2309 dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
2310 if(dirProp==L) {
2311 return DirProp_L;
2312 }
2313 if(dirProp==R || dirProp==AL) {
2314 return DirProp_R;
2315 }
2316 if(dirProp==EN) {
2317 return DirProp_EN;
2318 }
2319 if(dirProp==AN) {
2320 return DirProp_AN;
2321 }
2322 }
2323 return DirProp_ON;
2324 }
2325
2326 static void
2327 resolveImplicitLevels(UBiDi *pBiDi,
2328 int32_t start, int32_t limit,
2329 DirProp sor, DirProp eor) {
2330 const DirProp *dirProps=pBiDi->dirProps;
2331 uint16_t *dirInsert = pBiDi->dirInsert; /* may be NULL */
2332 DirProp dirProp;
2333 int32_t dirInsertValue;
2334 int8_t dirInsertIndex; /* position within dirInsertValue, if any */
2335 LevState levState;
2336 int32_t i, start1, start2;
2337 uint16_t oldStateImp, stateImp, actionImp;
2338 uint8_t gprop, resProp, cell;
2339 UBool inverseRTL;
2340 DirProp nextStrongProp=R;
2341 int32_t nextStrongPos=-1;
2342
2343 /* check for RTL inverse BiDi mode */
2344 /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to
2345 * loop on the text characters from end to start.
2346 * This would need a different properties state table (at least different
2347 * actions) and different levels state tables (maybe very similar to the
2348 * LTR corresponding ones.
2349 */
2350 inverseRTL=(UBool)
2351 ((start<pBiDi->lastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) &&
2352 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
2353 pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL));
2354
2355 /* initialize for property and levels state tables */
2356 levState.startL2EN=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
2357 levState.lastStrongRTL=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
2358 levState.runStart=start;
2359 levState.runLevel=pBiDi->levels[start];
2360 levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1];
2361 levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1];
2362 if(start==0 && pBiDi->proLength>0) {
2363 DirProp lastStrong=lastL_R_AL(pBiDi);
2364 if(lastStrong!=DirProp_ON) {
2365 sor=lastStrong;
2366 }
2367 }
2368 /* The isolates[] entries contain enough information to
2369 resume the bidi algorithm in the same state as it was
2370 when it was interrupted by an isolate sequence. */
2371 dirInsertValue = 0;
2372 if (dirInsert != NULL) {
2373 dirInsertValue = dirInsert[start];
2374 while (dirInsertValue > 0) {
2375 if ((dirInsertValue & 0x000F) == Insert_PDI) {
2376 break;
2377 }
2378 dirInsertValue >>= 4;
2379 }
2380 }
2381 if((dirProps[start]==PDI || dirInsertValue>0) && pBiDi->isolateCount >= 0) {
2382 levState.startON=pBiDi->isolates[pBiDi->isolateCount].startON;
2383 start1=pBiDi->isolates[pBiDi->isolateCount].start1;
2384 stateImp=pBiDi->isolates[pBiDi->isolateCount].stateImp;
2385 levState.state=pBiDi->isolates[pBiDi->isolateCount].state;
2386 pBiDi->isolateCount--;
2387 } else {
2388 levState.startON=-1;
2389 start1=start;
2390 if(dirProps[start]==NSM)
2391 stateImp = 1 + sor;
2392 else
2393 stateImp=0;
2394 levState.state=0;
2395 processPropertySeq(pBiDi, &levState, sor, start, start);
2396 }
2397 start2=start; /* to make Java compiler happy */
2398
2399 for(i=start; i<=limit; i++) {
2400 if(i>=limit) {
2401 int32_t k;
2402 dirInsertValue = 0;
2403 for(k=limit-1; k>start && dirInsertValue <= 0; k--) {
2404 dirProp = dirProps[k];
2405 if ((DIRPROP_FLAG(dirProp)&MASK_BN_EXPLICIT) == 0) {
2406 break;
2407 }
2408 dirProp = ON;
2409 if (dirInsert != NULL) {
2410 dirInsertValue = dirInsert[k];
2411 while (dirInsertValue > 0) {
2412 dirProp = (DirProp)stdDirFromInsertDir[dirInsertValue & 0x000F];
2413 if ((DIRPROP_FLAG(dirProp)&MASK_BN_EXPLICIT) == 0) {
2414 break;
2415 }
2416 dirInsertValue >>= 4;
2417 }
2418 }
2419 }
2420 if (k == start) {
2421 dirProp = dirProps[k];
2422 }
2423 if(dirProp==LRI || dirProp==RLI)
2424 break; /* no forced closing for sequence ending with LRI/RLI */
2425 gprop=eor;
2426 } else {
2427 DirProp prop, prop1;
2428 prop=dirProps[i];
2429 if(prop==B) {
2430 pBiDi->isolateCount=-1; /* current isolates stack entry == none */
2431 }
2432 if(inverseRTL) {
2433 if(prop==AL) {
2434 /* AL before EN does not make it AN */
2435 prop=R;
2436 } else if(prop==EN) {
2437 if(nextStrongPos<=i) {
2438 /* look for next strong char (L/R/AL) */
2439 int32_t j;
2440 nextStrongProp=R; /* set default */
2441 nextStrongPos=limit;
2442 for(j=i+1; j<limit; j++) {
2443 prop1=dirProps[j];
2444 if(prop1==L || prop1==R || prop1==AL) {
2445 nextStrongProp=prop1;
2446 nextStrongPos=j;
2447 break;
2448 }
2449 }
2450 }
2451 if(nextStrongProp==AL) {
2452 prop=AN;
2453 }
2454 }
2455 }
2456 gprop=groupProp[prop];
2457 }
2458 oldStateImp=stateImp;
2459 cell=impTabProps[oldStateImp][gprop];
2460 stateImp=GET_STATEPROPS(cell); /* isolate the new state */
2461 actionImp=GET_ACTIONPROPS(cell); /* isolate the action */
2462 if((i==limit) && (actionImp==0)) {
2463 /* there is an unprocessed sequence if its property == eor */
2464 actionImp=1; /* process the last sequence */
2465 }
2466 if(actionImp) {
2467 resProp=impTabProps[oldStateImp][IMPTABPROPS_RES];
2468 switch(actionImp) {
2469 case 1: /* process current seq1, init new seq1 */
2470 processPropertySeq(pBiDi, &levState, resProp, start1, i);
2471 start1=i;
2472 break;
2473 case 2: /* init new seq2 */
2474 start2=i;
2475 break;
2476 case 3: /* process seq1, process seq2, init new seq1 */
2477 processPropertySeq(pBiDi, &levState, resProp, start1, start2);
2478 processPropertySeq(pBiDi, &levState, DirProp_ON, start2, i);
2479 start1=i;
2480 break;
2481 case 4: /* process seq1, set seq1=seq2, init new seq2 */
2482 processPropertySeq(pBiDi, &levState, resProp, start1, start2);
2483 start1=start2;
2484 start2=i;
2485 break;
2486 default: /* we should never get here */
2487 U_ASSERT(FALSE);
2488 break;
2489 }
2490 }
2491 }
2492
2493 /* flush possible pending sequence, e.g. ON */
2494 if(limit==pBiDi->length && pBiDi->epiLength>0) {
2495 DirProp firstStrong=firstL_R_AL_EN_AN(pBiDi);
2496 if(firstStrong!=DirProp_ON) {
2497 eor=firstStrong;
2498 }
2499 }
2500
2501 /* look for the last char not a BN or LRE/RLE/LRO/RLO/PDF */
2502 dirInsertValue = 0;
2503 for(i=limit-1; i>start && dirInsertValue <= 0; i--) {
2504 dirProp=dirProps[i];
2505 if ((DIRPROP_FLAG(dirProp)&MASK_BN_EXPLICIT) == 0) {
2506 break;
2507 }
2508 dirProp = ON;
2509 if (dirInsert != NULL) {
2510 dirInsertValue = dirInsert[i];
2511 while (dirInsertValue > 0) {
2512 dirProp = (DirProp)stdDirFromInsertDir[dirInsertValue & 0x000F];
2513 if ((DIRPROP_FLAG(dirProp)&MASK_BN_EXPLICIT) == 0) {
2514 break;
2515 }
2516 dirInsertValue >>= 4;
2517 }
2518 }
2519 }
2520 if (i == start) {
2521 dirProp=dirProps[i];
2522 }
2523 if((dirProp==LRI || dirProp==RLI) && limit<pBiDi->length) {
2524 pBiDi->isolateCount++;
2525 pBiDi->isolates[pBiDi->isolateCount].stateImp=stateImp;
2526 pBiDi->isolates[pBiDi->isolateCount].state=levState.state;
2527 pBiDi->isolates[pBiDi->isolateCount].start1=start1;
2528 pBiDi->isolates[pBiDi->isolateCount].startON=levState.startON;
2529 }
2530 else
2531 processPropertySeq(pBiDi, &levState, eor, limit, limit);
2532 }
2533
2534 /* perform (L1) and (X9) ---------------------------------------------------- */
2535
2536 /*
2537 * Reset the embedding levels for some non-graphic characters (L1).
2538 * This function also sets appropriate levels for BN, and
2539 * explicit embedding types that are supposed to have been removed
2540 * from the paragraph in (X9).
2541 */
2542 static void
2543 adjustWSLevels(UBiDi *pBiDi) {
2544 const DirProp *dirProps=pBiDi->dirProps;
2545 UBiDiLevel *levels=pBiDi->levels;
2546 int32_t i;
2547
2548 if(pBiDi->flags&MASK_WS) {
2549 UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR;
2550 Flags flag;
2551
2552 i=pBiDi->trailingWSStart;
2553 while(i>0) {
2554 /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
2555 while(i>0 && (flag=DIRPROP_FLAG(dirProps[--i]))&MASK_WS) {
2556 if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
2557 levels[i]=0;
2558 } else {
2559 levels[i]=GET_PARALEVEL(pBiDi, i);
2560 }
2561 }
2562
2563 /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
2564 /* here, i+1 is guaranteed to be <length */
2565 while(i>0) {
2566 flag=DIRPROP_FLAG(dirProps[--i]);
2567 if(flag&MASK_BN_EXPLICIT) {
2568 levels[i]=levels[i+1];
2569 } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
2570 levels[i]=0;
2571 break;
2572 } else if(flag&MASK_B_S) {
2573 levels[i]=GET_PARALEVEL(pBiDi, i);
2574 break;
2575 }
2576 }
2577 }
2578 }
2579 }
2580
2581 U_CAPI void U_EXPORT2
2582 ubidi_setContext(UBiDi *pBiDi,
2583 const UChar *prologue, int32_t proLength,
2584 const UChar *epilogue, int32_t epiLength,
2585 UErrorCode *pErrorCode) {
2586 /* check the argument values */
2587 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2588 if(pBiDi==NULL || proLength<-1 || epiLength<-1 ||
2589 (prologue==NULL && proLength!=0) || (epilogue==NULL && epiLength!=0)) {
2590 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2591 return;
2592 }
2593
2594 if(proLength==-1) {
2595 pBiDi->proLength=u_strlen(prologue);
2596 } else {
2597 pBiDi->proLength=proLength;
2598 }
2599 if(epiLength==-1) {
2600 pBiDi->epiLength=u_strlen(epilogue);
2601 } else {
2602 pBiDi->epiLength=epiLength;
2603 }
2604 pBiDi->prologue=prologue;
2605 pBiDi->epilogue=epilogue;
2606 }
2607
2608 static void
2609 setParaSuccess(UBiDi *pBiDi) {
2610 pBiDi->proLength=0; /* forget the last context */
2611 pBiDi->epiLength=0;
2612 pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */
2613 }
2614
2615 #define BIDI_MIN(x, y) ((x)<(y) ? (x) : (y))
2616 #define BIDI_ABS(x) ((x)>=0 ? (x) : (-(x)))
2617
2618 static void
2619 setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length,
2620 UBiDiLevel paraLevel, UErrorCode *pErrorCode) {
2621 int32_t *runsOnlyMemory = NULL;
2622 int32_t *visualMap;
2623 UChar *visualText;
2624 int32_t saveLength, saveTrailingWSStart;
2625 const UBiDiLevel *levels;
2626 UBiDiLevel *saveLevels;
2627 UBiDiDirection saveDirection;
2628 UBool saveMayAllocateText;
2629 Run *runs;
2630 int32_t visualLength, i, j, visualStart, logicalStart,
2631 runCount, runLength, addedRuns, insertRemove,
2632 start, limit, step, indexOddBit, logicalPos,
2633 index0, index1;
2634 uint32_t saveOptions;
2635
2636 pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT;
2637 if(length==0) {
2638 ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
2639 goto cleanup3;
2640 }
2641 /* obtain memory for mapping table and visual text */
2642 runsOnlyMemory=static_cast<int32_t *>(uprv_malloc(length*(sizeof(int32_t)+sizeof(UChar)+sizeof(UBiDiLevel))));
2643 if(runsOnlyMemory==NULL) {
2644 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2645 goto cleanup3;
2646 }
2647 visualMap=runsOnlyMemory;
2648 visualText=(UChar *)&visualMap[length];
2649 saveLevels=(UBiDiLevel *)&visualText[length];
2650 saveOptions=pBiDi->reorderingOptions;
2651 if(saveOptions & UBIDI_OPTION_INSERT_MARKS) {
2652 pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
2653 pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS;
2654 }
2655 paraLevel&=1; /* accept only 0 or 1 */
2656 ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
2657 if(U_FAILURE(*pErrorCode)) {
2658 goto cleanup3;
2659 }
2660 /* we cannot access directly pBiDi->levels since it is not yet set if
2661 * direction is not MIXED
2662 */
2663 levels=ubidi_getLevels(pBiDi, pErrorCode);
2664 uprv_memcpy(saveLevels, levels, (size_t)pBiDi->length*sizeof(UBiDiLevel));
2665 saveTrailingWSStart=pBiDi->trailingWSStart;
2666 saveLength=pBiDi->length;
2667 saveDirection=pBiDi->direction;
2668
2669 /* FOOD FOR THOUGHT: instead of writing the visual text, we could use
2670 * the visual map and the dirProps array to drive the second call
2671 * to ubidi_setPara (but must make provision for possible removal of
2672 * BiDi controls. Alternatively, only use the dirProps array via
2673 * customized classifier callback.
2674 */
2675 visualLength=ubidi_writeReordered(pBiDi, visualText, length,
2676 UBIDI_DO_MIRRORING, pErrorCode);
2677 ubidi_getVisualMap(pBiDi, visualMap, pErrorCode);
2678 if(U_FAILURE(*pErrorCode)) {
2679 goto cleanup2;
2680 }
2681 pBiDi->reorderingOptions=saveOptions;
2682
2683 pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT;
2684 paraLevel^=1;
2685 /* Because what we did with reorderingOptions, visualText may be shorter
2686 * than the original text. But we don't want the levels memory to be
2687 * reallocated shorter than the original length, since we need to restore
2688 * the levels as after the first call to ubidi_setpara() before returning.
2689 * We will force mayAllocateText to FALSE before the second call to
2690 * ubidi_setpara(), and will restore it afterwards.
2691 */
2692 saveMayAllocateText=pBiDi->mayAllocateText;
2693 pBiDi->mayAllocateText=FALSE;
2694 ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode);
2695 pBiDi->mayAllocateText=saveMayAllocateText;
2696 ubidi_getRuns(pBiDi, pErrorCode);
2697 if(U_FAILURE(*pErrorCode)) {
2698 goto cleanup1;
2699 }
2700 /* check if some runs must be split, count how many splits */
2701 addedRuns=0;
2702 runCount=pBiDi->runCount;
2703 runs=pBiDi->runs;
2704 visualStart=0;
2705 for(i=0; i<runCount; i++, visualStart+=runLength) {
2706 runLength=runs[i].visualLimit-visualStart;
2707 if(runLength<2) {
2708 continue;
2709 }
2710 logicalStart=GET_INDEX(runs[i].logicalStart);
2711 for(j=logicalStart+1; j<logicalStart+runLength; j++) {
2712 index0=visualMap[j];
2713 index1=visualMap[j-1];
2714 if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
2715 addedRuns++;
2716 }
2717 }
2718 }
2719 if(addedRuns) {
2720 if(getRunsMemory(pBiDi, runCount+addedRuns)) {
2721 if(runCount==1) {
2722 /* because we switch from UBiDi.simpleRuns to UBiDi.runs */
2723 pBiDi->runsMemory[0]=runs[0];
2724 }
2725 runs=pBiDi->runs=pBiDi->runsMemory;
2726 pBiDi->runCount+=addedRuns;
2727 } else {
2728 goto cleanup1;
2729 }
2730 }
2731 /* split runs which are not consecutive in source text */
2732 for(i=runCount-1; i>=0; i--) {
2733 runLength= i==0 ? runs[0].visualLimit :
2734 runs[i].visualLimit-runs[i-1].visualLimit;
2735 logicalStart=runs[i].logicalStart;
2736 indexOddBit=GET_ODD_BIT(logicalStart);
2737 logicalStart=GET_INDEX(logicalStart);
2738 if(runLength<2) {
2739 if(addedRuns) {
2740 runs[i+addedRuns]=runs[i];
2741 }
2742 logicalPos=visualMap[logicalStart];
2743 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2744 saveLevels[logicalPos]^indexOddBit);
2745 continue;
2746 }
2747 if(indexOddBit) {
2748 start=logicalStart;
2749 limit=logicalStart+runLength-1;
2750 step=1;
2751 } else {
2752 start=logicalStart+runLength-1;
2753 limit=logicalStart;
2754 step=-1;
2755 }
2756 for(j=start; j!=limit; j+=step) {
2757 index0=visualMap[j];
2758 index1=visualMap[j+step];
2759 if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
2760 logicalPos=BIDI_MIN(visualMap[start], index0);
2761 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2762 saveLevels[logicalPos]^indexOddBit);
2763 runs[i+addedRuns].visualLimit=runs[i].visualLimit;
2764 runs[i].visualLimit-=BIDI_ABS(j-start)+1;
2765 insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER);
2766 runs[i+addedRuns].insertRemove=insertRemove;
2767 runs[i].insertRemove&=~insertRemove;
2768 start=j+step;
2769 addedRuns--;
2770 }
2771 }
2772 if(addedRuns) {
2773 runs[i+addedRuns]=runs[i];
2774 }
2775 logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]);
2776 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2777 saveLevels[logicalPos]^indexOddBit);
2778 }
2779
2780 cleanup1:
2781 /* restore initial paraLevel */
2782 pBiDi->paraLevel^=1;
2783 cleanup2:
2784 /* restore real text */
2785 pBiDi->text=text;
2786 pBiDi->length=saveLength;
2787 pBiDi->originalLength=length;
2788 pBiDi->direction=saveDirection;
2789 /* the saved levels should never excess levelsSize, but we check anyway */
2790 if(saveLength>pBiDi->levelsSize) {
2791 saveLength=pBiDi->levelsSize;
2792 }
2793 uprv_memcpy(pBiDi->levels, saveLevels, (size_t)saveLength*sizeof(UBiDiLevel));
2794 pBiDi->trailingWSStart=saveTrailingWSStart;
2795 if(pBiDi->runCount>1) {
2796 pBiDi->direction=UBIDI_MIXED;
2797 }
2798 cleanup3:
2799 /* free memory for mapping table and visual text */
2800 uprv_free(runsOnlyMemory);
2801
2802 pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY;
2803 }
2804
2805 /* -------------------------------------------------------------------------- */
2806 /* internal proptotype */
2807
2808 static void
2809 ubidi_setParaInternal(UBiDi *pBiDi,
2810 const UChar *text, int32_t length,
2811 UBiDiLevel paraLevel,
2812 UBiDiLevel *embeddingLevels,
2813 const int32_t *offsets, int32_t offsetCount,
2814 const int32_t *controlStringIndices,
2815 const UChar * const * controlStrings,
2816 UErrorCode *pErrorCode);
2817
2818 /* ubidi_setPara ------------------------------------------------------------ */
2819
2820 U_CAPI void U_EXPORT2
2821 ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
2822 UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
2823 UErrorCode *pErrorCode) {
2824 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2825 ubidi_setParaInternal(pBiDi, text, length, paraLevel,
2826 embeddingLevels,
2827 NULL, 0, NULL, NULL,
2828 pErrorCode);
2829 }
2830
2831 /* ubidi_setParaWithControls ------------------------------------------------ */
2832
2833 U_CAPI void U_EXPORT2
2834 ubidi_setParaWithControls(UBiDi *pBiDi,
2835 const UChar *text, int32_t length,
2836 UBiDiLevel paraLevel,
2837 const int32_t *offsets, int32_t offsetCount,
2838 const int32_t *controlStringIndices,
2839 const UChar * const * controlStrings,
2840 UErrorCode *pErrorCode) {
2841 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2842 /* check the argument values that are not already checked in ubidi_setParaInternal */
2843 if ( offsetCount < 0 || (offsetCount > 0 && (offsets == NULL || controlStrings == NULL)) ) {
2844 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2845 return;
2846 }
2847 ubidi_setParaInternal(pBiDi, text, length, paraLevel,
2848 NULL,
2849 offsets, offsetCount, controlStringIndices, controlStrings,
2850 pErrorCode);
2851 }
2852
2853 /* ubidi_setParaInternal ---------------------------------------------------- */
2854
2855 void
2856 ubidi_setParaInternal(UBiDi *pBiDi,
2857 const UChar *text, int32_t length,
2858 UBiDiLevel paraLevel,
2859 UBiDiLevel *embeddingLevels,
2860 const int32_t *offsets, int32_t offsetCount,
2861 const int32_t *controlStringIndices,
2862 const UChar * const * controlStrings,
2863 UErrorCode *pErrorCode) {
2864 UBiDiDirection direction;
2865 DirProp *dirProps;
2866
2867 /* check the argument values (pErrorCode status alrecy checked before getting here) */
2868 if(pBiDi==NULL || text==NULL || length<-1 ||
2869 (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevel<UBIDI_DEFAULT_LTR)) {
2870 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2871 return;
2872 }
2873
2874 if(length==-1) {
2875 length=u_strlen(text);
2876 }
2877 if (offsetCount > 0 && pBiDi->reorderingMode > UBIDI_REORDER_GROUP_NUMBERS_WITH_R) {
2878 offsetCount = 0;
2879 }
2880
2881 /* special treatment for RUNS_ONLY mode */
2882 if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) {
2883 setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode);
2884 return;
2885 }
2886
2887 /* initialize the UBiDi structure */
2888 pBiDi->pParaBiDi=NULL; /* mark unfinished setPara */
2889 pBiDi->text=text;
2890 pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length;
2891 pBiDi->paraLevel=paraLevel;
2892 pBiDi->direction=(UBiDiDirection)(paraLevel&1);
2893 pBiDi->paraCount=1;
2894
2895 pBiDi->dirInsert=NULL;
2896 pBiDi->dirProps=NULL;
2897 pBiDi->levels=NULL;
2898 pBiDi->runs=NULL;
2899 pBiDi->insertPoints.size=0; /* clean up from last call */
2900 pBiDi->insertPoints.confirmed=0; /* clean up from last call */
2901
2902 /*
2903 * Save the original paraLevel if contextual; otherwise, set to 0.
2904 */
2905 pBiDi->defaultParaLevel=IS_DEFAULT_LEVEL(paraLevel);
2906
2907 if(length==0) {
2908 /*
2909 * For an empty paragraph, create a UBiDi object with the paraLevel and
2910 * the flags and the direction set but without allocating zero-length arrays.
2911 * There is nothing more to do.
2912 */
2913 if(IS_DEFAULT_LEVEL(paraLevel)) {
2914 pBiDi->paraLevel&=1;
2915 pBiDi->defaultParaLevel=0;
2916 }
2917 pBiDi->flags=DIRPROP_FLAG_LR(paraLevel);
2918 pBiDi->runCount=0;
2919 pBiDi->paraCount=0;
2920 setParaSuccess(pBiDi); /* mark successful setPara */
2921 return;
2922 }
2923
2924 pBiDi->runCount=-1;
2925
2926 /* allocate paras memory */
2927 if(pBiDi->parasMemory)
2928 pBiDi->paras=pBiDi->parasMemory;
2929 else
2930 pBiDi->paras=pBiDi->simpleParas;
2931
2932 /*
2933 * Get the inserted directional properties
2934 * if necessary.
2935 */
2936 if (offsetCount > 0) {
2937 if(getDirInsertMemory(pBiDi, length)) {
2938 pBiDi->dirInsert=pBiDi->dirInsertMemory;
2939 if(!getDirInsert(pBiDi, offsets, offsetCount, controlStringIndices, controlStrings)) {
2940 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2941 return;
2942 }
2943 } else {
2944 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2945 return;
2946 }
2947 }
2948
2949 /*
2950 * Get the directional properties,
2951 * the flags bit-set, and
2952 * determine the paragraph level if necessary.
2953 */
2954 if(getDirPropsMemory(pBiDi, length)) {
2955 pBiDi->dirProps=pBiDi->dirPropsMemory;
2956 if(!getDirProps(pBiDi)) {
2957 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2958 return;
2959 }
2960 } else {
2961 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2962 return;
2963 }
2964 dirProps=pBiDi->dirProps;
2965 /* the processed length may have changed if UBIDI_OPTION_STREAMING */
2966 length= pBiDi->length;
2967 pBiDi->trailingWSStart=length; /* the levels[] will reflect the WS run */
2968
2969 /* are explicit levels specified? */
2970 if(embeddingLevels==NULL) {
2971 /* no: determine explicit levels according to the (Xn) rules */\
2972 if(getLevelsMemory(pBiDi, length)) {
2973 pBiDi->levels=pBiDi->levelsMemory;
2974 direction=resolveExplicitLevels(pBiDi, pErrorCode);
2975 if(U_FAILURE(*pErrorCode)) {
2976 return;
2977 }
2978 } else {
2979 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2980 return;
2981 }
2982 } else {
2983 /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */
2984 pBiDi->levels=embeddingLevels;
2985 direction=checkExplicitLevels(pBiDi, pErrorCode);
2986 if(U_FAILURE(*pErrorCode)) {
2987 return;
2988 }
2989 }
2990
2991 /* allocate isolate memory */
2992 if(pBiDi->isolateCount<=SIMPLE_ISOLATES_COUNT)
2993 pBiDi->isolates=pBiDi->simpleIsolates;
2994 else
2995 if((int32_t)(pBiDi->isolateCount*sizeof(Isolate))<=pBiDi->isolatesSize)
2996 pBiDi->isolates=pBiDi->isolatesMemory;
2997 else {
2998 if(getInitialIsolatesMemory(pBiDi, pBiDi->isolateCount)) {
2999 pBiDi->isolates=pBiDi->isolatesMemory;
3000 } else {
3001 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
3002 return;
3003 }
3004 }
3005 pBiDi->isolateCount=-1; /* current isolates stack entry == none */
3006
3007 /*
3008 * The steps after (X9) in the UBiDi algorithm are performed only if
3009 * the paragraph text has mixed directionality!
3010 */
3011 pBiDi->direction=direction;
3012 switch(direction) {
3013 case UBIDI_LTR:
3014 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
3015 pBiDi->trailingWSStart=0;
3016 break;
3017 case UBIDI_RTL:
3018 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
3019 pBiDi->trailingWSStart=0;
3020 break;
3021 default:
3022 /*
3023 * Choose the right implicit state table
3024 */
3025 switch(pBiDi->reorderingMode) {
3026 case UBIDI_REORDER_DEFAULT:
3027 pBiDi->pImpTabPair=&impTab_DEFAULT;
3028 break;
3029 case UBIDI_REORDER_NUMBERS_SPECIAL:
3030 pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL;
3031 break;
3032 case UBIDI_REORDER_GROUP_NUMBERS_WITH_R:
3033 pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R;
3034 break;
3035 case UBIDI_REORDER_INVERSE_NUMBERS_AS_L:
3036 pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L;
3037 break;
3038 case UBIDI_REORDER_INVERSE_LIKE_DIRECT:
3039 if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
3040 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS;
3041 } else {
3042 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT;
3043 }
3044 break;
3045 case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL:
3046 if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
3047 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS;
3048 } else {
3049 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL;
3050 }
3051 break;
3052 default:
3053 /* we should never get here */
3054 U_ASSERT(FALSE);
3055 break;
3056 }
3057 /*
3058 * If there are no external levels specified and there
3059 * are no significant explicit level codes in the text,
3060 * then we can treat the entire paragraph as one run.
3061 * Otherwise, we need to perform the following rules on runs of
3062 * the text with the same embedding levels. (X10)
3063 * "Significant" explicit level codes are ones that actually
3064 * affect non-BN characters.
3065 * Examples for "insignificant" ones are empty embeddings
3066 * LRE-PDF, LRE-RLE-PDF-PDF, etc.
3067 */
3068 if(embeddingLevels==NULL && pBiDi->paraCount<=1 &&
3069 !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) {
3070 resolveImplicitLevels(pBiDi, 0, length,
3071 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)),
3072 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1)));
3073 } else {
3074 /* sor, eor: start and end types of same-level-run */
3075 UBiDiLevel *levels=pBiDi->levels;
3076 int32_t start, limit=0;
3077 UBiDiLevel level, nextLevel;
3078 DirProp sor, eor;
3079
3080 /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
3081 level=GET_PARALEVEL(pBiDi, 0);
3082 nextLevel=levels[0];
3083 if(level<nextLevel) {
3084 eor=GET_LR_FROM_LEVEL(nextLevel);
3085 } else {
3086 eor=GET_LR_FROM_LEVEL(level);
3087 }
3088
3089 do {
3090 /* determine start and limit of the run (end points just behind the run) */
3091
3092 /* the values for this run's start are the same as for the previous run's end */
3093 start=limit;
3094 level=nextLevel;
3095 if((start>0) && (dirProps[start-1]==B)) {
3096 /* except if this is a new paragraph, then set sor = para level */
3097 sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start));
3098 } else {
3099 sor=eor;
3100 }
3101
3102 /* search for the limit of this run */
3103 while((++limit<length) &&
3104 ((levels[limit]==level) ||
3105 (DIRPROP_FLAG(dirProps[limit])&MASK_BN_EXPLICIT))) {}
3106
3107 /* get the correct level of the next run */
3108 if(limit<length) {
3109 nextLevel=levels[limit];
3110 } else {
3111 nextLevel=GET_PARALEVEL(pBiDi, length-1);
3112 }
3113
3114 /* determine eor from max(level, nextLevel); sor is last run's eor */
3115 if(NO_OVERRIDE(level)<NO_OVERRIDE(nextLevel)) {
3116 eor=GET_LR_FROM_LEVEL(nextLevel);
3117 } else {
3118 eor=GET_LR_FROM_LEVEL(level);
3119 }
3120
3121 /* if the run consists of overridden directional types, then there
3122 are no implicit types to be resolved */
3123 if(!(level&UBIDI_LEVEL_OVERRIDE)) {
3124 resolveImplicitLevels(pBiDi, start, limit, sor, eor);
3125 } else {
3126 /* remove the UBIDI_LEVEL_OVERRIDE flags */
3127 do {
3128 levels[start++]&=~UBIDI_LEVEL_OVERRIDE;
3129 } while(start<limit);
3130 }
3131 } while(limit<length);
3132 }
3133 /* check if we got any memory shortage while adding insert points */
3134 if (U_FAILURE(pBiDi->insertPoints.errorCode))
3135 {
3136 *pErrorCode=pBiDi->insertPoints.errorCode;
3137 return;
3138 }
3139 /* reset the embedding levels for some non-graphic characters (L1), (X9) */
3140 adjustWSLevels(pBiDi);
3141 break;
3142 }
3143 /* add RLM for inverse Bidi with contextual orientation resolving
3144 * to RTL which would not round-trip otherwise
3145 */
3146 if((pBiDi->defaultParaLevel>0) &&
3147 (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) &&
3148 ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) ||
3149 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) {
3150 int32_t i, j, start, last;
3151 UBiDiLevel level;
3152 DirProp dirProp;
3153 for(i=0; i<pBiDi->paraCount; i++) {
3154 last=(pBiDi->paras[i].limit)-1;
3155 level=pBiDi->paras[i].level;
3156 if(level==0)
3157 continue; /* LTR paragraph */
3158 start= i==0 ? 0 : pBiDi->paras[i-1].limit;
3159 for(j=last; j>=start; j--) {
3160 dirProp=dirProps[j];
3161 if(dirProp==L) {
3162 if(j<last) {
3163 while(dirProps[last]==B) {
3164 last--;
3165 }
3166 }
3167 addPoint(pBiDi, last, RLM_BEFORE);
3168 break;
3169 }
3170 if(DIRPROP_FLAG(dirProp) & MASK_R_AL) {
3171 break;
3172 }
3173 }
3174 }
3175 }
3176
3177 if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
3178 pBiDi->resultLength -= pBiDi->controlCount;
3179 } else {
3180 pBiDi->resultLength += pBiDi->insertPoints.size;
3181 }
3182 setParaSuccess(pBiDi); /* mark successful setPara */
3183 }
3184
3185 /* -------------------------------------------------------------------------- */
3186
3187 U_CAPI void U_EXPORT2
3188 ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) {
3189 if(pBiDi!=NULL) {
3190 pBiDi->orderParagraphsLTR=orderParagraphsLTR;
3191 }
3192 }
3193
3194 U_CAPI UBool U_EXPORT2
3195 ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) {
3196 if(pBiDi!=NULL) {
3197 return pBiDi->orderParagraphsLTR;
3198 } else {
3199 return FALSE;
3200 }
3201 }
3202
3203 U_CAPI UBiDiDirection U_EXPORT2
3204 ubidi_getDirection(const UBiDi *pBiDi) {
3205 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
3206 return pBiDi->direction;
3207 } else {
3208 return UBIDI_LTR;
3209 }
3210 }
3211
3212 U_CAPI const UChar * U_EXPORT2
3213 ubidi_getText(const UBiDi *pBiDi) {
3214 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
3215 return pBiDi->text;
3216 } else {
3217 return NULL;
3218 }
3219 }
3220
3221 U_CAPI int32_t U_EXPORT2
3222 ubidi_getLength(const UBiDi *pBiDi) {
3223 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
3224 return pBiDi->originalLength;
3225 } else {
3226 return 0;
3227 }
3228 }
3229
3230 U_CAPI int32_t U_EXPORT2
3231 ubidi_getProcessedLength(const UBiDi *pBiDi) {
3232 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
3233 return pBiDi->length;
3234 } else {
3235 return 0;
3236 }
3237 }
3238
3239 U_CAPI int32_t U_EXPORT2
3240 ubidi_getResultLength(const UBiDi *pBiDi) {
3241 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
3242 return pBiDi->resultLength;
3243 } else {
3244 return 0;
3245 }
3246 }
3247
3248 /* paragraphs API functions ------------------------------------------------- */
3249
3250 U_CAPI UBiDiLevel U_EXPORT2
3251 ubidi_getParaLevel(const UBiDi *pBiDi) {
3252 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
3253 return pBiDi->paraLevel;
3254 } else {
3255 return 0;
3256 }
3257 }
3258
3259 U_CAPI int32_t U_EXPORT2
3260 ubidi_countParagraphs(UBiDi *pBiDi) {
3261 if(!IS_VALID_PARA_OR_LINE(pBiDi)) {
3262 return 0;
3263 } else {
3264 return pBiDi->paraCount;
3265 }
3266 }
3267
3268 U_CAPI void U_EXPORT2
3269 ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
3270 int32_t *pParaStart, int32_t *pParaLimit,
3271 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
3272 int32_t paraStart;
3273
3274 /* check the argument values */
3275 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
3276 RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode);
3277 RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode);
3278
3279 pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */
3280 if(paraIndex) {
3281 paraStart=pBiDi->paras[paraIndex-1].limit;
3282 } else {
3283 paraStart=0;
3284 }
3285 if(pParaStart!=NULL) {
3286 *pParaStart=paraStart;
3287 }
3288 if(pParaLimit!=NULL) {
3289 *pParaLimit=pBiDi->paras[paraIndex].limit;
3290 }
3291 if(pParaLevel!=NULL) {
3292 *pParaLevel=GET_PARALEVEL(pBiDi, paraStart);
3293 }
3294 }
3295
3296 U_CAPI int32_t U_EXPORT2
3297 ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex,
3298 int32_t *pParaStart, int32_t *pParaLimit,
3299 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
3300 int32_t paraIndex;
3301
3302 /* check the argument values */
3303 /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */
3304 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
3305 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
3306 pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */
3307 RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1);
3308
3309 for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex].limit; paraIndex++);
3310 ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode);
3311 return paraIndex;
3312 }
3313
3314 U_CAPI void U_EXPORT2
3315 ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn,
3316 const void *newContext, UBiDiClassCallback **oldFn,
3317 const void **oldContext, UErrorCode *pErrorCode)
3318 {
3319 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
3320 if(pBiDi==NULL) {
3321 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
3322 return;
3323 }
3324 if( oldFn )
3325 {
3326 *oldFn = pBiDi->fnClassCallback;
3327 }
3328 if( oldContext )
3329 {
3330 *oldContext = pBiDi->coClassCallback;
3331 }
3332 pBiDi->fnClassCallback = newFn;
3333 pBiDi->coClassCallback = newContext;
3334 }
3335
3336 U_CAPI void U_EXPORT2
3337 ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context)
3338 {
3339 if(pBiDi==NULL) {
3340 return;
3341 }
3342 if( fn )
3343 {
3344 *fn = pBiDi->fnClassCallback;
3345 }
3346 if( context )
3347 {
3348 *context = pBiDi->coClassCallback;
3349 }
3350 }
3351
3352 U_CAPI UCharDirection U_EXPORT2
3353 ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c)
3354 {
3355 UCharDirection dir;
3356
3357 if( pBiDi->fnClassCallback == NULL ||
3358 (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT )
3359 {
3360 dir = ubidi_getClass(pBiDi->bdp, c);
3361 }
3362 if(dir >= U_CHAR_DIRECTION_COUNT) {
3363 dir = (UCharDirection)ON;
3364 }
3365 return dir;
3366 }