1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 ******************************************************************************
6 * Copyright (C) 1999-2015, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 ******************************************************************************
12 * tab size: 8 (not used)
15 * created on: 1999jul27
16 * created by: Markus W. Scherer, updated by Matitiahu Allouche
21 #include "unicode/utypes.h"
22 #include "unicode/ustring.h"
23 #include "unicode/uchar.h"
24 #include "unicode/ubidi.h"
25 #include "unicode/utf16.h"
26 #include "ubidi_props.h"
31 * General implementation notes:
33 * Throughout the implementation, there are comments like (W2) that refer to
34 * rules of the BiDi algorithm, in this example to the second rule of the
35 * resolution of weak types.
37 * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
38 * character according to UTF-16, the second UChar gets the directional property of
39 * the entire character assigned, while the first one gets a BN, a boundary
40 * neutral, type, which is ignored by most of the algorithm according to
41 * rule (X9) and the implementation suggestions of the BiDi algorithm.
43 * Later, adjustWSLevels() will set the level for each BN to that of the
44 * following character (UChar), which results in surrogate pairs getting the
45 * same level on each of their surrogates.
47 * In a UTF-8 implementation, the same thing could be done: the last byte of
48 * a multi-byte sequence would get the "real" property, while all previous
49 * bytes of that sequence would get BN.
51 * It is not possible to assign all those parts of a character the same real
52 * property because this would fail in the resolution of weak types with rules
53 * that look at immediately surrounding types.
55 * As a related topic, this implementation does not remove Boundary Neutral
56 * types from the input, but ignores them wherever this is relevant.
57 * For example, the loop for the resolution of the weak types reads
58 * types until it finds a non-BN.
59 * Also, explicit embedding codes are neither changed into BN nor removed.
60 * They are only treated the same way real BNs are.
61 * As stated before, adjustWSLevels() takes care of them at the end.
62 * For the purpose of conformance, the levels of all these codes
65 * Note that this implementation modifies the dirProps
66 * after the initial setup, when applying X5c (replace FSI by LRI or RLI),
67 * X6, N0 (replace paired brackets by L or R).
69 * In this implementation, the resolution of weak types (W1 to W6),
70 * neutrals (N1 and N2), and the assignment of the resolved level (In)
71 * are all done in one single loop, in resolveImplicitLevels().
72 * Changes of dirProp values are done on the fly, without writing
73 * them back to the dirProps array.
76 * This implementation contains code that allows to bypass steps of the
77 * algorithm that are not needed on the specific paragraph
78 * in order to speed up the most common cases considerably,
79 * like text that is entirely LTR, or RTL text without numbers.
81 * Most of this is done by setting a bit for each directional property
82 * in a flags variable and later checking for whether there are
83 * any LTR characters or any RTL characters, or both, whether
84 * there are any explicit embedding codes, etc.
86 * If the (Xn) steps are performed, then the flags are re-evaluated,
87 * because they will then not contain the embedding codes any more
88 * and will be adjusted for override codes, so that subsequently
89 * more bypassing may be possible than what the initial flags suggested.
91 * If the text is not mixed-directional, then the
92 * algorithm steps for the weak type resolution are not performed,
93 * and all levels are set to the paragraph level.
95 * If there are no explicit embedding codes, then the (Xn) steps
98 * If embedding levels are supplied as a parameter, then all
99 * explicit embedding codes are ignored, and the (Xn) steps
102 * White Space types could get the level of the run they belong to,
103 * and are checked with a test of (flags&MASK_EMBEDDING) to
104 * consider if the paragraph direction should be considered in
105 * the flags variable.
107 * If there are no White Space types in the paragraph, then
108 * (L1) is not necessary in adjustWSLevels().
111 /* to avoid some conditional statements, use tiny constant arrays */
112 static const Flags flagLR
[2]={ DIRPROP_FLAG(L
), DIRPROP_FLAG(R
) };
113 static const Flags flagE
[2]={ DIRPROP_FLAG(LRE
), DIRPROP_FLAG(RLE
) };
114 static const Flags flagO
[2]={ DIRPROP_FLAG(LRO
), DIRPROP_FLAG(RLO
) };
116 #define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
117 #define DIRPROP_FLAG_E(level) flagE[(level)&1]
118 #define DIRPROP_FLAG_O(level) flagO[(level)&1]
120 #define DIR_FROM_STRONG(strong) ((strong)==L ? L : R)
122 #define NO_OVERRIDE(level) ((level)&~UBIDI_LEVEL_OVERRIDE)
124 /* UBiDi object management -------------------------------------------------- */
126 U_CAPI UBiDi
* U_EXPORT2
129 UErrorCode errorCode
=U_ZERO_ERROR
;
130 return ubidi_openSized(0, 0, &errorCode
);
133 U_CAPI UBiDi
* U_EXPORT2
134 ubidi_openSized(int32_t maxLength
, int32_t maxRunCount
, UErrorCode
*pErrorCode
) {
137 /* check the argument values */
138 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
140 } else if(maxLength
<0 || maxRunCount
<0) {
141 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
142 return NULL
; /* invalid arguments */
145 /* allocate memory for the object */
146 pBiDi
=(UBiDi
*)uprv_malloc(sizeof(UBiDi
));
148 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
152 /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */
153 uprv_memset(pBiDi
, 0, sizeof(UBiDi
));
155 /* get BiDi properties */
156 pBiDi
->bdp
=ubidi_getSingleton();
158 /* allocate memory for arrays as requested */
160 if( !getInitialDirPropsMemory(pBiDi
, maxLength
) ||
161 !getInitialLevelsMemory(pBiDi
, maxLength
)
163 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
166 pBiDi
->mayAllocateText
=TRUE
;
171 /* use simpleRuns[] */
172 pBiDi
->runsSize
=sizeof(Run
);
173 } else if(!getInitialRunsMemory(pBiDi
, maxRunCount
)) {
174 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
177 pBiDi
->mayAllocateRuns
=TRUE
;
180 if(U_SUCCESS(*pErrorCode
)) {
189 * We are allowed to allocate memory if memory==NULL or
190 * mayAllocate==TRUE for each array that we need.
191 * We also try to grow memory as needed if we
194 * Assume sizeNeeded>0.
195 * If *pMemory!=NULL, then assume *pSize>0.
197 * ### this realloc() may unnecessarily copy the old data,
198 * which we know we don't need any more;
199 * is this the best way to do this??
202 ubidi_getMemory(BidiMemoryForAllocation
*bidiMem
, int32_t *pSize
, UBool mayAllocate
, int32_t sizeNeeded
) {
203 void **pMemory
= (void **)bidiMem
;
204 /* check for existing memory */
206 /* we need to allocate memory */
207 if(mayAllocate
&& (*pMemory
=uprv_malloc(sizeNeeded
))!=NULL
) {
214 if(sizeNeeded
<=*pSize
) {
215 /* there is already enough memory */
218 else if(!mayAllocate
) {
219 /* not enough memory, and we must not allocate */
224 /* in most cases, we do not need the copy-old-data part of
225 * realloc, but it is needed when adding runs using getRunsMemory()
226 * in setParaRunsOnly()
228 if((memory
=uprv_realloc(*pMemory
, sizeNeeded
))!=NULL
) {
233 /* we failed to grow */
240 U_CAPI
void U_EXPORT2
241 ubidi_close(UBiDi
*pBiDi
) {
243 pBiDi
->pParaBiDi
=NULL
; /* in case one tries to reuse this block */
244 if(pBiDi
->dirInsertMemory
!=NULL
) {
245 uprv_free(pBiDi
->dirInsertMemory
);
247 if(pBiDi
->dirPropsMemory
!=NULL
) {
248 uprv_free(pBiDi
->dirPropsMemory
);
250 if(pBiDi
->levelsMemory
!=NULL
) {
251 uprv_free(pBiDi
->levelsMemory
);
253 if(pBiDi
->openingsMemory
!=NULL
) {
254 uprv_free(pBiDi
->openingsMemory
);
256 if(pBiDi
->parasMemory
!=NULL
) {
257 uprv_free(pBiDi
->parasMemory
);
259 if(pBiDi
->runsMemory
!=NULL
) {
260 uprv_free(pBiDi
->runsMemory
);
262 if(pBiDi
->isolatesMemory
!=NULL
) {
263 uprv_free(pBiDi
->isolatesMemory
);
265 if(pBiDi
->insertPoints
.points
!=NULL
) {
266 uprv_free(pBiDi
->insertPoints
.points
);
273 /* set to approximate "inverse BiDi" ---------------------------------------- */
275 U_CAPI
void U_EXPORT2
276 ubidi_setInverse(UBiDi
*pBiDi
, UBool isInverse
) {
278 pBiDi
->isInverse
=isInverse
;
279 pBiDi
->reorderingMode
= isInverse
? UBIDI_REORDER_INVERSE_NUMBERS_AS_L
280 : UBIDI_REORDER_DEFAULT
;
284 U_CAPI UBool U_EXPORT2
285 ubidi_isInverse(UBiDi
*pBiDi
) {
287 return pBiDi
->isInverse
;
293 /* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
294 * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre
295 * concept of RUNS_ONLY which is a double operation.
296 * It could be advantageous to divide this into 3 concepts:
297 * a) Operation: direct / inverse / RUNS_ONLY
298 * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R
299 * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
300 * This would allow combinations not possible today like RUNS_ONLY with
302 * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
303 * REMOVE_CONTROLS for the inverse step.
304 * Not all combinations would be supported, and probably not all do make sense.
305 * This would need to document which ones are supported and what are the
306 * fallbacks for unsupported combinations.
308 U_CAPI
void U_EXPORT2
309 ubidi_setReorderingMode(UBiDi
*pBiDi
, UBiDiReorderingMode reorderingMode
) {
310 if ((pBiDi
!=NULL
) && (reorderingMode
>= UBIDI_REORDER_DEFAULT
)
311 && (reorderingMode
< UBIDI_REORDER_COUNT
)) {
312 pBiDi
->reorderingMode
= reorderingMode
;
313 pBiDi
->isInverse
= (UBool
)(reorderingMode
== UBIDI_REORDER_INVERSE_NUMBERS_AS_L
);
317 U_CAPI UBiDiReorderingMode U_EXPORT2
318 ubidi_getReorderingMode(UBiDi
*pBiDi
) {
320 return pBiDi
->reorderingMode
;
322 return UBIDI_REORDER_DEFAULT
;
326 U_CAPI
void U_EXPORT2
327 ubidi_setReorderingOptions(UBiDi
*pBiDi
, uint32_t reorderingOptions
) {
328 if (reorderingOptions
& UBIDI_OPTION_REMOVE_CONTROLS
) {
329 reorderingOptions
&=~UBIDI_OPTION_INSERT_MARKS
;
332 pBiDi
->reorderingOptions
=reorderingOptions
;
336 U_CAPI
uint32_t U_EXPORT2
337 ubidi_getReorderingOptions(UBiDi
*pBiDi
) {
339 return pBiDi
->reorderingOptions
;
345 U_CAPI UBiDiDirection U_EXPORT2
346 ubidi_getBaseDirection(const UChar
*text
,
353 if( text
==NULL
|| length
<-1 ){
354 return UBIDI_NEUTRAL
;
358 length
=u_strlen(text
);
361 for( i
= 0 ; i
< length
; ) {
362 /* i is incremented by U16_NEXT */
363 U16_NEXT(text
, i
, length
, uchar
);
364 dir
= u_charDirection(uchar
);
365 if( dir
== U_LEFT_TO_RIGHT
)
367 if( dir
== U_RIGHT_TO_LEFT
|| dir
==U_RIGHT_TO_LEFT_ARABIC
)
370 return UBIDI_NEUTRAL
;
373 /* perform (P2)..(P3) ------------------------------------------------------- */
376 * Returns the directionality of the first strong character
377 * after the last B in prologue, if any.
378 * Requires prologue!=null.
381 firstL_R_AL(UBiDi
*pBiDi
) {
382 const UChar
*text
=pBiDi
->prologue
;
383 int32_t length
=pBiDi
->proLength
;
386 DirProp dirProp
, result
=ON
;
387 for(i
=0; i
<length
; ) {
388 /* i is incremented by U16_NEXT */
389 U16_NEXT(text
, i
, length
, uchar
);
390 dirProp
=(DirProp
)ubidi_getCustomizedClass(pBiDi
, uchar
);
392 if(dirProp
==L
|| dirProp
==R
|| dirProp
==AL
) {
405 * Check that there are enough entries in the array pointed to by pBiDi->paras
408 checkParaCount(UBiDi
*pBiDi
) {
409 int32_t count
=pBiDi
->paraCount
;
410 if(pBiDi
->paras
==pBiDi
->simpleParas
) {
411 if(count
<=SIMPLE_PARAS_COUNT
)
413 if(!getInitialParasMemory(pBiDi
, SIMPLE_PARAS_COUNT
* 2))
415 pBiDi
->paras
=pBiDi
->parasMemory
;
416 uprv_memcpy(pBiDi
->parasMemory
, pBiDi
->simpleParas
, SIMPLE_PARAS_COUNT
* sizeof(Para
));
419 if(!getInitialParasMemory(pBiDi
, count
* 2))
421 pBiDi
->paras
=pBiDi
->parasMemory
;
426 * Get the directional properties for the inserted bidi controls.
429 /* subset of bidi properties, fit in 4 bits */
430 enum { /* correspondence to standard class */
431 Insert_none
= 0, /* 0 all others */
432 Insert_L
, /* 1 L = U_LEFT_TO_RIGHT */
433 Insert_R
, /* 2 R = U_RIGHT_TO_LEFT */
434 Insert_AL
, /* 3 AL = U_RIGHT_TO_LEFT_ARABIC */
435 Insert_LRE
, /* 4 LRE = U_LEFT_TO_RIGHT_EMBEDDING */
436 Insert_LRO
, /* 5 LRO = U_LEFT_TO_RIGHT_OVERRIDE */
437 Insert_RLE
, /* 6 RLE = U_RIGHT_TO_LEFT_EMBEDDING */
438 Insert_RLO
, /* 7 RLO = U_RIGHT_TO_LEFT_OVERRIDE */
439 Insert_PDF
, /* 8 PDF = U_POP_DIRECTIONAL_FORMAT */
440 Insert_FSI
, /* 9 FSI = U_FIRST_STRONG_ISOLATE */
441 Insert_LRI
, /* 10 LRI = U_LEFT_TO_RIGHT_ISOLATE */
442 Insert_RLI
, /* 11 RLI = U_RIGHT_TO_LEFT_ISOLATE */
443 Insert_PDI
, /* 12 PDI = U_POP_DIRECTIONAL_ISOLATE */
444 Insert_B
, /* 13 B = U_BLOCK_SEPARATOR */
445 Insert_S
, /* 14 S = U_SEGMENT_SEPARATOR */
446 Insert_WS
, /* 15 WS = U_WHITE_SPACE_NEUTRAL */
447 Insert_count
/* 16 */
450 /* map standard dir class to special 4-bit insert value (Insert_none as default) */
451 static const uint16_t insertDirFromStdDir
[dirPropCount
] = {
452 Insert_none
, /* L= U_LEFT_TO_RIGHT */
453 Insert_none
, /* R= U_RIGHT_TO_LEFT, */
454 Insert_none
, /* EN= U_EUROPEAN_NUMBER */
455 Insert_none
, /* ES= U_EUROPEAN_NUMBER_SEPARATOR */
456 Insert_none
, /* ET= U_EUROPEAN_NUMBER_TERMINATOR */
457 Insert_none
, /* AN= U_ARABIC_NUMBER */
458 Insert_none
, /* CS= U_COMMON_NUMBER_SEPARATOR */
459 Insert_none
, /* B= U_BLOCK_SEPARATOR */
460 Insert_none
, /* S= U_SEGMENT_SEPARATOR */
461 Insert_none
, /* WS= U_WHITE_SPACE_NEUTRAL */
462 Insert_none
, /* ON= U_OTHER_NEUTRAL */
463 Insert_LRE
, /* LRE=U_LEFT_TO_RIGHT_EMBEDDING */
464 Insert_LRO
, /* LRO=U_LEFT_TO_RIGHT_OVERRIDE */
465 Insert_none
, /* AL= U_RIGHT_TO_LEFT_ARABIC */
466 Insert_RLE
, /* RLE=U_RIGHT_TO_LEFT_EMBEDDING */
467 Insert_RLO
, /* RLO=U_RIGHT_TO_LEFT_OVERRIDE */
468 Insert_PDF
, /* PDF=U_POP_DIRECTIONAL_FORMAT */
469 Insert_none
, /* NSM=U_DIR_NON_SPACING_MARK */
470 Insert_none
, /* BN= U_BOUNDARY_NEUTRAL */
471 Insert_FSI
, /* FSI=U_FIRST_STRONG_ISOLATE */
472 Insert_LRI
, /* LRI=U_LEFT_TO_RIGHT_ISOLATE */
473 Insert_RLI
, /* RLI=U_RIGHT_TO_LEFT_ISOLATE */
474 Insert_PDI
, /* PDI=U_POP_DIRECTIONAL_ISOLATE */
475 Insert_none
, /* ENL */
476 Insert_none
, /* ENR */
479 /* map special 4-bit insert direction class to standard dir class (ON as default) */
480 static const uint8_t stdDirFromInsertDir
[Insert_count
] = {
481 ON
, /* Insert_none > ON */
485 LRE
, /* Insert_LRE */
486 LRO
, /* Insert_LRO */
487 RLE
, /* Insert_RLE */
488 RLO
, /* Insert_RLO */
489 PDF
, /* Insert_PDF */
490 FSI
, /* Insert_FSI */
491 LRI
, /* Insert_LRI */
492 RLI
, /* Insert_RLI */
493 PDI
, /* Insert_PDI */
499 enum { kMaxControlStringLen
= 4 };
502 getDirInsert(UBiDi
*pBiDi
,
503 const int32_t *offsets
, int32_t offsetCount
,
504 const int32_t *controlStringIndices
,
505 const UChar
* const * controlStrings
) {
506 int32_t offset
, offsetsIndex
;
507 uint16_t *dirInsert
= pBiDi
->dirInsert
;
508 /* initialize dirInsert */
509 for (offset
= 0; offset
< pBiDi
->length
; offset
++) {
510 dirInsert
[offset
] = 0;
512 for (offsetsIndex
= 0; offsetsIndex
< offsetCount
; offsetsIndex
++) {
513 const UChar
* controlString
;
515 int32_t controlStringIndex
, dirInsertIndex
= 0;
516 uint16_t dirInsertValue
= 0;
517 offset
= offsets
[offsetsIndex
];
518 if (offset
< 0 || offset
>= pBiDi
->length
) {
519 return FALSE
; /* param err in offsets array */
521 controlStringIndex
= (controlStringIndices
== NULL
)? offsetsIndex
: controlStringIndices
[offsetsIndex
];
522 controlString
= controlStrings
[controlStringIndex
];
523 if (controlString
== NULL
) {
524 return FALSE
; /* param err in controlStrings array */
526 while ((uchar
= *controlString
++) != 0) {
527 uint16_t insertValue
= (U16_IS_SURROGATE(uchar
))? Insert_none
:
528 insertDirFromStdDir
[(uint32_t)ubidi_getCustomizedClass(pBiDi
, uchar
)];
529 if (dirInsertIndex
>= kMaxControlStringLen
|| insertValue
== Insert_none
) {
530 return FALSE
; /* param err in controlStrings array */
532 dirInsertValue
|= (insertValue
<< (4 * dirInsertIndex
++));
534 dirInsert
[offset
] = dirInsertValue
;
540 * Get the directional properties for the text, calculate the flags bit-set, and
541 * determine the paragraph level if necessary (in pBiDi->paras[i].level).
542 * FSI initiators are also resolved and their dirProp replaced with LRI or RLI.
543 * When encountering an FSI, it is initially replaced with an LRI, which is the
544 * default. Only if a strong R or AL is found within its scope will the LRI be
545 * replaced by an RLI.
548 getDirProps(UBiDi
*pBiDi
) {
549 const UChar
*text
=pBiDi
->text
;
550 DirProp
*dirProps
=pBiDi
->dirPropsMemory
; /* pBiDi->dirProps is const */
551 uint16_t *dirInsert
= pBiDi
->dirInsert
; /* may be NULL */
553 int32_t i
=0, originalLength
=pBiDi
->originalLength
;
554 Flags flags
=0; /* collect all directionalities in the text */
556 DirProp dirProp
=0, defaultParaLevel
=0; /* initialize to avoid compiler warnings */
557 int32_t dirInsertValue
;
558 int8_t dirInsertIndex
; /* position within dirInsertValue, if any */
559 UBool isDefaultLevel
=IS_DEFAULT_LEVEL(pBiDi
->paraLevel
);
560 /* for inverse BiDi, the default para level is set to RTL if there is a
561 strong R or AL character at either end of the text */
562 UBool isDefaultLevelInverse
=isDefaultLevel
&& (UBool
)
563 (pBiDi
->reorderingMode
==UBIDI_REORDER_INVERSE_LIKE_DIRECT
||
564 pBiDi
->reorderingMode
==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
);
565 int32_t lastArabicPos
=-1;
566 int32_t controlCount
=0;
567 UBool removeBiDiControls
= (UBool
)(pBiDi
->reorderingOptions
&
568 UBIDI_OPTION_REMOVE_CONTROLS
);
571 NOT_SEEKING_STRONG
, /* 0: not contextual paraLevel, not after FSI */
572 SEEKING_STRONG_FOR_PARA
, /* 1: looking for first strong char in para */
573 SEEKING_STRONG_FOR_FSI
, /* 2: looking for first strong after FSI */
574 LOOKING_FOR_PDI
/* 3: found strong after FSI, looking for PDI */
577 DirProp lastStrong
=ON
; /* for default level & inverse BiDi */
578 /* The following stacks are used to manage isolate sequences. Those
579 sequences may be nested, but obviously never more deeply than the
580 maximum explicit embedding level.
581 lastStack is the index of the last used entry in the stack. A value of -1
582 means that there is no open isolate sequence.
583 lastStack is reset to -1 on paragraph boundaries. */
584 /* The following stack contains the position of the initiator of
585 each open isolate sequence */
586 int32_t isolateStartStack
[UBIDI_MAX_EXPLICIT_LEVEL
+1];
587 int8_t isolateStartInsertIndex
[UBIDI_MAX_EXPLICIT_LEVEL
+1];
588 /* The following stack contains the last known state before
589 encountering the initiator of an isolate sequence */
590 State previousStateStack
[UBIDI_MAX_EXPLICIT_LEVEL
+1];
591 int32_t stackLast
=-1;
593 if(pBiDi
->reorderingOptions
& UBIDI_OPTION_STREAMING
)
595 defaultParaLevel
=pBiDi
->paraLevel
&1;
597 pBiDi
->paras
[0].level
=defaultParaLevel
;
598 lastStrong
=defaultParaLevel
;
599 if(pBiDi
->proLength
>0 && /* there is a prologue */
600 (dirProp
=firstL_R_AL(pBiDi
))!=ON
) { /* with a strong character */
602 pBiDi
->paras
[0].level
=0; /* set the default para level */
604 pBiDi
->paras
[0].level
=1; /* set the default para level */
605 state
=NOT_SEEKING_STRONG
;
607 state
=SEEKING_STRONG_FOR_PARA
;
610 pBiDi
->paras
[0].level
=pBiDi
->paraLevel
;
611 state
=NOT_SEEKING_STRONG
;
613 /* count paragraphs and determine the paragraph level (P2..P3) */
615 * see comment in ubidi.h:
616 * the UBIDI_DEFAULT_XXX values are designed so that
617 * their bit 0 alone yields the intended default
620 dirInsertIndex
= -1; /* indicate that we have not checked dirInsert yet */
621 for( /* i=0 above */ ; i
<originalLength
; ) {
622 if (dirInsert
!= NULL
&& dirInsertIndex
< 0) {
623 dirInsertValue
= dirInsert
[i
];
625 if (dirInsertValue
> 0) {
627 dirProp
= (DirProp
)stdDirFromInsertDir
[dirInsertValue
& 0x000F];
628 dirInsertValue
>>= 4;
629 flags
|=DIRPROP_FLAG(dirProp
);
633 /* i is incremented by U16_NEXT */
634 U16_NEXT(text
, i
, originalLength
, uchar
);
635 flags
|=DIRPROP_FLAG(dirProp
=(DirProp
)ubidi_getCustomizedClass(pBiDi
, uchar
));
636 dirProps
[i
-1]=dirProp
;
637 if(uchar
>0xffff) { /* set the lead surrogate's property to BN */
638 flags
|=DIRPROP_FLAG(BN
);
642 if(removeBiDiControls
&& IS_BIDI_CONTROL_CHAR(uchar
))
645 if(state
==SEEKING_STRONG_FOR_PARA
) {
646 pBiDi
->paras
[pBiDi
->paraCount
-1].level
=0;
647 state
=NOT_SEEKING_STRONG
;
649 else if(state
==SEEKING_STRONG_FOR_FSI
) {
650 if(stackLast
<=UBIDI_MAX_EXPLICIT_LEVEL
) {
651 /* no need for next statement, already set by default */
652 /* dirProps[isolateStartStack[stackLast]]=LRI; */
653 flags
|=DIRPROP_FLAG(LRI
);
655 state
=LOOKING_FOR_PDI
;
660 if(dirProp
==R
|| dirProp
==AL
) {
661 if(state
==SEEKING_STRONG_FOR_PARA
) {
662 pBiDi
->paras
[pBiDi
->paraCount
-1].level
=1;
663 state
=NOT_SEEKING_STRONG
;
665 else if(state
==SEEKING_STRONG_FOR_FSI
) {
666 if(stackLast
<=UBIDI_MAX_EXPLICIT_LEVEL
) {
667 if (isolateStartInsertIndex
[stackLast
] < 0) {
668 dirProps
[isolateStartStack
[stackLast
]]=RLI
;
670 dirInsert
[stackLast
] &= ~(0x000F << (4*isolateStartInsertIndex
[stackLast
]));
671 dirInsert
[stackLast
] |= (Insert_RLI
<< (4*isolateStartInsertIndex
[stackLast
]));
673 flags
|=DIRPROP_FLAG(RLI
);
675 state
=LOOKING_FOR_PDI
;
682 if(dirProp
>=FSI
&& dirProp
<=RLI
) { /* FSI, LRI or RLI */
684 if(stackLast
<=UBIDI_MAX_EXPLICIT_LEVEL
) {
685 isolateStartStack
[stackLast
]= (dirInsertIndex
< 0)? i
-1: i
/* we have not incremented with U16_NEXT yet */;
686 isolateStartInsertIndex
[stackLast
] = dirInsertIndex
;
687 previousStateStack
[stackLast
]=state
;
690 if (dirInsertIndex
< 0) {
691 dirProps
[i
-1]=LRI
; /* default if no strong char */
693 dirInsert
[i
] &= ~(0x000F << (4*dirInsertIndex
));
694 dirInsert
[i
] |= (Insert_LRI
<< (4*dirInsertIndex
));
696 state
=SEEKING_STRONG_FOR_FSI
;
699 state
=LOOKING_FOR_PDI
;
703 if(state
==SEEKING_STRONG_FOR_FSI
) {
704 if(stackLast
<=UBIDI_MAX_EXPLICIT_LEVEL
) {
705 /* no need for next statement, already set by default */
706 /* dirProps[isolateStartStack[stackLast]]=LRI; */
707 flags
|=DIRPROP_FLAG(LRI
);
711 if(stackLast
<=UBIDI_MAX_EXPLICIT_LEVEL
)
712 state
=previousStateStack
[stackLast
];
718 if(i
<originalLength
&& uchar
==CR
&& text
[i
]==LF
) /* do nothing on the CR */
720 pBiDi
->paras
[pBiDi
->paraCount
-1].limit
=i
;
721 if(isDefaultLevelInverse
&& lastStrong
==R
)
722 pBiDi
->paras
[pBiDi
->paraCount
-1].level
=1;
723 if(pBiDi
->reorderingOptions
& UBIDI_OPTION_STREAMING
) {
724 /* When streaming, we only process whole paragraphs
725 thus some updates are only done on paragraph boundaries */
726 pBiDi
->length
=i
; /* i is index to next character */
727 pBiDi
->controlCount
=controlCount
;
729 if(i
<originalLength
) { /* B not last char in text */
731 if(checkParaCount(pBiDi
)==FALSE
) /* not enough memory for a new para entry */
734 pBiDi
->paras
[pBiDi
->paraCount
-1].level
=defaultParaLevel
;
735 state
=SEEKING_STRONG_FOR_PARA
;
736 lastStrong
=defaultParaLevel
;
738 pBiDi
->paras
[pBiDi
->paraCount
-1].level
=pBiDi
->paraLevel
;
739 state
=NOT_SEEKING_STRONG
;
746 /* Ignore still open isolate sequences with overflow */
747 if(stackLast
>UBIDI_MAX_EXPLICIT_LEVEL
) {
748 stackLast
=UBIDI_MAX_EXPLICIT_LEVEL
;
749 state
=SEEKING_STRONG_FOR_FSI
; /* to be on the safe side */
751 /* Resolve direction of still unresolved open FSI sequences */
752 while(stackLast
>=0) {
753 if(state
==SEEKING_STRONG_FOR_FSI
) {
754 /* no need for next statement, already set by default */
755 /* dirProps[isolateStartStack[stackLast]]=LRI; */
756 flags
|=DIRPROP_FLAG(LRI
);
759 state
=previousStateStack
[stackLast
];
762 /* When streaming, ignore text after the last paragraph separator */
763 if(pBiDi
->reorderingOptions
& UBIDI_OPTION_STREAMING
) {
764 if(pBiDi
->length
<originalLength
)
767 pBiDi
->paras
[pBiDi
->paraCount
-1].limit
=originalLength
;
768 pBiDi
->controlCount
=controlCount
;
770 /* For inverse bidi, default para direction is RTL if there is
771 a strong R or AL at either end of the paragraph */
772 if(isDefaultLevelInverse
&& lastStrong
==R
) {
773 pBiDi
->paras
[pBiDi
->paraCount
-1].level
=1;
776 pBiDi
->paraLevel
=pBiDi
->paras
[0].level
;
778 /* The following is needed to resolve the text direction for default level
779 paragraphs containing no strong character */
780 for(i
=0; i
<pBiDi
->paraCount
; i
++)
781 flags
|=DIRPROP_FLAG_LR(pBiDi
->paras
[i
].level
);
783 if(pBiDi
->orderParagraphsLTR
&& (flags
&DIRPROP_FLAG(B
))) {
784 flags
|=DIRPROP_FLAG(L
);
787 pBiDi
->lastArabicPos
=lastArabicPos
;
791 /* determine the paragraph level at position index */
793 ubidi_getParaLevelAtIndex(const UBiDi
*pBiDi
, int32_t pindex
) {
795 for(i
=0; i
<pBiDi
->paraCount
; i
++)
796 if(pindex
<pBiDi
->paras
[i
].limit
)
798 if(i
>=pBiDi
->paraCount
)
799 i
=pBiDi
->paraCount
-1;
800 return (UBiDiLevel
)(pBiDi
->paras
[i
].level
);
803 /* Functions for handling paired brackets ----------------------------------- */
805 /* In the isoRuns array, the first entry is used for text outside of any
806 isolate sequence. Higher entries are used for each more deeply nested
807 isolate sequence. isoRunLast is the index of the last used entry. The
808 openings array is used to note the data of opening brackets not yet
809 matched by a closing bracket, or matched but still susceptible to change
811 Each isoRun entry contains the index of the first and
812 one-after-last openings entries for pending opening brackets it
813 contains. The next openings entry to use is the one-after-last of the
814 most deeply nested isoRun entry.
815 isoRun entries also contain their current embedding level and the last
816 encountered strong character, since these will be needed to resolve
817 the level of paired brackets. */
820 bracketInit(UBiDi
*pBiDi
, BracketData
*bd
) {
823 bd
->isoRuns
[0].start
=0;
824 bd
->isoRuns
[0].limit
=0;
825 bd
->isoRuns
[0].level
=GET_PARALEVEL(pBiDi
, 0);
826 UBiDiLevel t
= GET_PARALEVEL(pBiDi
, 0) & 1;
827 bd
->isoRuns
[0].lastStrong
= bd
->isoRuns
[0].lastBase
= t
;
828 bd
->isoRuns
[0].contextDir
= (UBiDiDirection
)t
;
829 bd
->isoRuns
[0].contextPos
=0;
830 if(pBiDi
->openingsMemory
) {
831 bd
->openings
=pBiDi
->openingsMemory
;
832 bd
->openingsCount
=pBiDi
->openingsSize
/ sizeof(Opening
);
834 bd
->openings
=bd
->simpleOpenings
;
835 bd
->openingsCount
=SIMPLE_OPENINGS_COUNT
;
837 bd
->isNumbersSpecial
=bd
->pBiDi
->reorderingMode
==UBIDI_REORDER_NUMBERS_SPECIAL
||
838 bd
->pBiDi
->reorderingMode
==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
;
841 /* paragraph boundary */
843 bracketProcessB(BracketData
*bd
, UBiDiLevel level
) {
845 bd
->isoRuns
[0].limit
=0;
846 bd
->isoRuns
[0].level
=level
;
847 bd
->isoRuns
[0].lastStrong
=bd
->isoRuns
[0].lastBase
=level
&1;
848 bd
->isoRuns
[0].contextDir
=(UBiDiDirection
)(level
&1);
849 bd
->isoRuns
[0].contextPos
=0;
852 /* LRE, LRO, RLE, RLO, PDF */
854 bracketProcessBoundary(BracketData
*bd
, int32_t lastCcPos
, DirProp lastCcDirProp
,
855 UBiDiLevel contextLevel
, UBiDiLevel embeddingLevel
) {
856 IsoRun
*pLastIsoRun
=&bd
->isoRuns
[bd
->isoRunLast
];
857 if(DIRPROP_FLAG(lastCcDirProp
)&MASK_ISO
) /* after an isolate */
859 if(NO_OVERRIDE(embeddingLevel
)>NO_OVERRIDE(contextLevel
)) /* not a PDF */
860 contextLevel
=embeddingLevel
;
861 pLastIsoRun
->limit
=pLastIsoRun
->start
;
862 pLastIsoRun
->level
=embeddingLevel
;
863 pLastIsoRun
->lastStrong
=pLastIsoRun
->lastBase
=contextLevel
&1;
864 pLastIsoRun
->contextDir
=(UBiDiDirection
)(contextLevel
&1);
865 pLastIsoRun
->contextPos
=(UBiDiDirection
)lastCcPos
;
870 bracketProcessLRI_RLI(BracketData
*bd
, UBiDiLevel level
) {
871 IsoRun
*pLastIsoRun
=&bd
->isoRuns
[bd
->isoRunLast
];
873 pLastIsoRun
->lastBase
=ON
;
874 lastLimit
=pLastIsoRun
->limit
;
877 pLastIsoRun
->start
=pLastIsoRun
->limit
=lastLimit
;
878 pLastIsoRun
->level
=level
;
879 pLastIsoRun
->lastStrong
=pLastIsoRun
->lastBase
=level
&1;
880 pLastIsoRun
->contextDir
=(UBiDiDirection
)(level
&1);
881 pLastIsoRun
->contextPos
=0;
886 bracketProcessPDI(BracketData
*bd
) {
889 pLastIsoRun
=&bd
->isoRuns
[bd
->isoRunLast
];
890 pLastIsoRun
->lastBase
=ON
;
893 /* newly found opening bracket: create an openings entry */
894 static UBool
/* return TRUE if success */
895 bracketAddOpening(BracketData
*bd
, UChar match
, int32_t position
) {
896 IsoRun
*pLastIsoRun
=&bd
->isoRuns
[bd
->isoRunLast
];
898 if(pLastIsoRun
->limit
>=bd
->openingsCount
) { /* no available new entry */
899 UBiDi
*pBiDi
=bd
->pBiDi
;
900 if(!getInitialOpeningsMemory(pBiDi
, pLastIsoRun
->limit
* 2))
902 if(bd
->openings
==bd
->simpleOpenings
)
903 uprv_memcpy(pBiDi
->openingsMemory
, bd
->simpleOpenings
,
904 SIMPLE_OPENINGS_COUNT
* sizeof(Opening
));
905 bd
->openings
=pBiDi
->openingsMemory
; /* may have changed */
906 bd
->openingsCount
=pBiDi
->openingsSize
/ sizeof(Opening
);
908 pOpening
=&bd
->openings
[pLastIsoRun
->limit
];
909 pOpening
->position
=position
;
910 pOpening
->match
=match
;
911 pOpening
->contextDir
=pLastIsoRun
->contextDir
;
912 pOpening
->contextPos
=pLastIsoRun
->contextPos
;
914 pLastIsoRun
->limit
++;
918 /* change N0c1 to N0c2 when a preceding bracket is assigned the embedding level */
920 fixN0c(BracketData
*bd
, int32_t openingIndex
, int32_t newPropPosition
, DirProp newProp
) {
921 /* This function calls itself recursively */
922 IsoRun
*pLastIsoRun
=&bd
->isoRuns
[bd
->isoRunLast
];
924 DirProp
*dirProps
=bd
->pBiDi
->dirProps
;
925 int32_t k
, openingPosition
, closingPosition
;
926 for(k
=openingIndex
+1, qOpening
=&bd
->openings
[k
]; k
<pLastIsoRun
->limit
; k
++, qOpening
++) {
927 if(qOpening
->match
>=0) /* not an N0c match */
929 if(newPropPosition
<qOpening
->contextPos
)
931 if(newPropPosition
>=qOpening
->position
)
933 if(newProp
==qOpening
->contextDir
)
935 openingPosition
=qOpening
->position
;
936 dirProps
[openingPosition
]=newProp
;
937 closingPosition
=-(qOpening
->match
);
938 dirProps
[closingPosition
]=newProp
;
939 qOpening
->match
=0; /* prevent further changes */
940 fixN0c(bd
, k
, openingPosition
, newProp
);
941 fixN0c(bd
, k
, closingPosition
, newProp
);
945 /* process closing bracket */
946 static DirProp
/* return L or R if N0b or N0c, ON if N0d */
947 bracketProcessClosing(BracketData
*bd
, int32_t openIdx
, int32_t position
) {
948 IsoRun
*pLastIsoRun
=&bd
->isoRuns
[bd
->isoRunLast
];
949 Opening
*pOpening
, *qOpening
;
950 UBiDiDirection direction
;
953 pOpening
=&bd
->openings
[openIdx
];
954 direction
=(UBiDiDirection
)(pLastIsoRun
->level
&1);
955 stable
=TRUE
; /* assume stable until proved otherwise */
957 /* The stable flag is set when brackets are paired and their
958 level is resolved and cannot be changed by what will be
959 found later in the source string.
960 An unstable match can occur only when applying N0c, where
961 the resolved level depends on the preceding context, and
962 this context may be affected by text occurring later.
963 Example: RTL paragraph containing: abc[(latin) HEBREW]
964 When the closing parenthesis is encountered, it appears
965 that N0c1 must be applied since 'abc' sets an opposite
966 direction context and both parentheses receive level 2.
967 However, when the closing square bracket is processed,
968 N0b applies because of 'HEBREW' being included within the
969 brackets, thus the square brackets are treated like R and
970 receive level 1. However, this changes the preceding
971 context of the opening parenthesis, and it now appears
972 that N0c2 must be applied to the parentheses rather than
975 if((direction
==0 && pOpening
->flags
&FOUND_L
) ||
976 (direction
==1 && pOpening
->flags
&FOUND_R
)) { /* N0b */
979 else if(pOpening
->flags
&(FOUND_L
|FOUND_R
)) { /* N0c */
980 /* it is stable if there is no containing pair or in
981 conditions too complicated and not worth checking */
982 stable
=(openIdx
==pLastIsoRun
->start
);
983 if(direction
!=pOpening
->contextDir
)
984 newProp
=pOpening
->contextDir
; /* N0c1 */
986 newProp
=direction
; /* N0c2 */
988 /* forget this and any brackets nested within this pair */
989 pLastIsoRun
->limit
=openIdx
;
992 bd
->pBiDi
->dirProps
[pOpening
->position
]=newProp
;
993 bd
->pBiDi
->dirProps
[position
]=newProp
;
994 /* Update nested N0c pairs that may be affected */
995 fixN0c(bd
, openIdx
, pOpening
->position
, newProp
);
997 pLastIsoRun
->limit
=openIdx
; /* forget any brackets nested within this pair */
998 /* remove lower located synonyms if any */
999 while(pLastIsoRun
->limit
>pLastIsoRun
->start
&&
1000 bd
->openings
[pLastIsoRun
->limit
-1].position
==pOpening
->position
)
1001 pLastIsoRun
->limit
--;
1004 pOpening
->match
=-position
;
1005 /* neutralize lower located synonyms if any */
1007 while(k
>=pLastIsoRun
->start
&&
1008 bd
->openings
[k
].position
==pOpening
->position
)
1009 bd
->openings
[k
--].match
=0;
1010 /* neutralize any unmatched opening between the current pair;
1011 this will also neutralize higher located synonyms if any */
1012 for(k
=openIdx
+1; k
<pLastIsoRun
->limit
; k
++) {
1013 qOpening
=&bd
->openings
[k
];
1014 if(qOpening
->position
>=position
)
1016 if(qOpening
->match
>0)
1023 /* handle strong characters, digits and candidates for closing brackets */
1024 static UBool
/* return TRUE if success */
1025 bracketProcessChar(BracketData
*bd
, int32_t position
) {
1026 IsoRun
*pLastIsoRun
=&bd
->isoRuns
[bd
->isoRunLast
];
1027 DirProp
*dirProps
, dirProp
, newProp
;
1029 dirProps
=bd
->pBiDi
->dirProps
;
1030 dirProp
=dirProps
[position
];
1034 /* First see if it is a matching closing bracket. Hopefully, this is
1035 more efficient than checking if it is a closing bracket at all */
1036 c
=bd
->pBiDi
->text
[position
];
1037 for(idx
=pLastIsoRun
->limit
-1; idx
>=pLastIsoRun
->start
; idx
--) {
1038 if(bd
->openings
[idx
].match
!=c
)
1040 /* We have a match */
1041 newProp
=bracketProcessClosing(bd
, idx
, position
);
1042 if(newProp
==ON
) { /* N0d */
1043 c
=0; /* prevent handling as an opening */
1046 pLastIsoRun
->lastBase
=ON
;
1047 pLastIsoRun
->contextDir
=(UBiDiDirection
)newProp
;
1048 pLastIsoRun
->contextPos
=position
;
1049 level
=bd
->pBiDi
->levels
[position
];
1050 if(level
&UBIDI_LEVEL_OVERRIDE
) { /* X4, X5 */
1054 pLastIsoRun
->lastStrong
=newProp
;
1055 flag
=DIRPROP_FLAG(newProp
);
1056 for(i
=pLastIsoRun
->start
; i
<idx
; i
++)
1057 bd
->openings
[i
].flags
|=flag
;
1058 /* matching brackets are not overridden by LRO/RLO */
1059 bd
->pBiDi
->levels
[position
]&=~UBIDI_LEVEL_OVERRIDE
;
1061 /* matching brackets are not overridden by LRO/RLO */
1062 bd
->pBiDi
->levels
[bd
->openings
[idx
].position
]&=~UBIDI_LEVEL_OVERRIDE
;
1065 /* We get here only if the ON character is not a matching closing
1066 bracket or it is a case of N0d */
1067 /* Now see if it is an opening bracket */
1069 match
=u_getBidiPairedBracket(c
); /* get the matching char */
1072 if(match
!=c
&& /* has a matching char */
1073 ubidi_getPairedBracketType(bd
->pBiDi
->bdp
, c
)==U_BPT_OPEN
) { /* opening bracket */
1074 /* special case: process synonyms
1075 create an opening entry for each synonym */
1076 if(match
==0x232A) { /* RIGHT-POINTING ANGLE BRACKET */
1077 if(!bracketAddOpening(bd
, 0x3009, position
))
1080 else if(match
==0x3009) { /* RIGHT ANGLE BRACKET */
1081 if(!bracketAddOpening(bd
, 0x232A, position
))
1084 if(!bracketAddOpening(bd
, match
, position
))
1088 level
=bd
->pBiDi
->levels
[position
];
1089 if(level
&UBIDI_LEVEL_OVERRIDE
) { /* X4, X5 */
1091 if(dirProp
!=S
&& dirProp
!=WS
&& dirProp
!=ON
)
1092 dirProps
[position
]=newProp
;
1093 pLastIsoRun
->lastBase
=newProp
;
1094 pLastIsoRun
->lastStrong
=newProp
;
1095 pLastIsoRun
->contextDir
=(UBiDiDirection
)newProp
;
1096 pLastIsoRun
->contextPos
=position
;
1098 else if(dirProp
<=R
|| dirProp
==AL
) {
1099 newProp
=DIR_FROM_STRONG(dirProp
);
1100 pLastIsoRun
->lastBase
=dirProp
;
1101 pLastIsoRun
->lastStrong
=dirProp
;
1102 pLastIsoRun
->contextDir
=(UBiDiDirection
)newProp
;
1103 pLastIsoRun
->contextPos
=position
;
1105 else if(dirProp
==EN
) {
1106 pLastIsoRun
->lastBase
=EN
;
1107 if(pLastIsoRun
->lastStrong
==L
) {
1109 if(!bd
->isNumbersSpecial
)
1110 dirProps
[position
]=ENL
;
1111 pLastIsoRun
->contextDir
=(UBiDiDirection
)L
;
1112 pLastIsoRun
->contextPos
=position
;
1116 if(pLastIsoRun
->lastStrong
==AL
)
1117 dirProps
[position
]=AN
; /* W2 */
1119 dirProps
[position
]=ENR
;
1120 pLastIsoRun
->contextDir
=(UBiDiDirection
)R
;
1121 pLastIsoRun
->contextPos
=position
;
1124 else if(dirProp
==AN
) {
1126 pLastIsoRun
->lastBase
=AN
;
1127 pLastIsoRun
->contextDir
=(UBiDiDirection
)R
;
1128 pLastIsoRun
->contextPos
=position
;
1130 else if(dirProp
==NSM
) {
1131 /* if the last real char was ON, change NSM to ON so that it
1132 will stay ON even if the last real char is a bracket which
1133 may be changed to L or R */
1134 newProp
=pLastIsoRun
->lastBase
;
1136 dirProps
[position
]=newProp
;
1140 pLastIsoRun
->lastBase
=dirProp
;
1142 if(newProp
<=R
|| newProp
==AL
) {
1144 uint16_t flag
=DIRPROP_FLAG(DIR_FROM_STRONG(newProp
));
1145 for(i
=pLastIsoRun
->start
; i
<pLastIsoRun
->limit
; i
++)
1146 if(position
>bd
->openings
[i
].position
)
1147 bd
->openings
[i
].flags
|=flag
;
1152 /* perform (X1)..(X9) ------------------------------------------------------- */
1154 /* determine if the text is mixed-directional or single-directional */
1155 static UBiDiDirection
1156 directionFromFlags(UBiDi
*pBiDi
) {
1157 Flags flags
=pBiDi
->flags
;
1158 /* if the text contains AN and neutrals, then some neutrals may become RTL */
1159 if(!(flags
&MASK_RTL
|| ((flags
&DIRPROP_FLAG(AN
)) && (flags
&MASK_POSSIBLE_N
)))) {
1161 } else if(!(flags
&MASK_LTR
)) {
1169 * Resolve the explicit levels as specified by explicit embedding codes.
1170 * Recalculate the flags to have them reflect the real properties
1171 * after taking the explicit embeddings into account.
1173 * The BiDi algorithm is designed to result in the same behavior whether embedding
1174 * levels are externally specified (from "styled text", supposedly the preferred
1175 * method) or set by explicit embedding codes (LRx, RLx, PDF, FSI, PDI) in the plain text.
1176 * That is why (X9) instructs to remove all not-isolate explicit codes (and BN).
1177 * However, in a real implementation, the removal of these codes and their index
1178 * positions in the plain text is undesirable since it would result in
1179 * reallocated, reindexed text.
1180 * Instead, this implementation leaves the codes in there and just ignores them
1181 * in the subsequent processing.
1182 * In order to get the same reordering behavior, positions with a BN or a not-isolate
1183 * explicit embedding code just get the same level assigned as the last "real"
1186 * Some implementations, not this one, then overwrite some of these
1187 * directionality properties at "real" same-level-run boundaries by
1188 * L or R codes so that the resolution of weak types can be performed on the
1189 * entire paragraph at once instead of having to parse it once more and
1190 * perform that resolution on same-level-runs.
1191 * This limits the scope of the implicit rules in effectively
1192 * the same way as the run limits.
1194 * Instead, this implementation does not modify these codes, except for
1195 * paired brackets whose properties (ON) may be replaced by L or R.
1196 * On one hand, the paragraph has to be scanned for same-level-runs, but
1197 * on the other hand, this saves another loop to reset these codes,
1198 * or saves making and modifying a copy of dirProps[].
1201 * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
1204 * Handling the stack of explicit levels (Xn):
1206 * With the BiDi stack of explicit levels, as pushed with each
1207 * LRE, RLE, LRO, RLO, LRI, RLI and FSI and popped with each PDF and PDI,
1208 * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL.
1210 * In order to have a correct push-pop semantics even in the case of overflows,
1211 * overflow counters and a valid isolate counter are used as described in UAX#9
1212 * section 3.3.2 "Explicit Levels and Directions".
1214 * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
1216 * Returns normally the direction; -1 if there was a memory shortage
1219 static UBiDiDirection
1220 resolveExplicitLevels(UBiDi
*pBiDi
, UErrorCode
*pErrorCode
) {
1221 DirProp
*dirProps
=pBiDi
->dirProps
;
1222 uint16_t *dirInsert
= pBiDi
->dirInsert
; /* may be NULL */
1223 UBiDiLevel
*levels
=pBiDi
->levels
;
1224 const UChar
*text
=pBiDi
->text
;
1226 int32_t i
=0, length
=pBiDi
->length
;
1227 Flags flags
=pBiDi
->flags
; /* collect all directionalities in the text */
1229 int32_t dirInsertValue
;
1230 int8_t dirInsertIndex
; /* position within dirInsertValue, if any */
1231 UBiDiLevel level
=GET_PARALEVEL(pBiDi
, 0);
1232 UBiDiDirection direction
;
1233 pBiDi
->isolateCount
=0;
1235 if(U_FAILURE(*pErrorCode
)) { return UBIDI_LTR
; }
1237 /* determine if the text is mixed-directional or single-directional */
1238 direction
=directionFromFlags(pBiDi
);
1240 /* we may not need to resolve any explicit levels */
1241 if((direction
!=UBIDI_MIXED
)) {
1242 /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
1245 if(pBiDi
->reorderingMode
> UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL
) {
1246 /* inverse BiDi: mixed, but all characters are at the same embedding level */
1247 /* set all levels to the paragraph level */
1248 int32_t paraIndex
, start
, limit
;
1249 for(paraIndex
=0; paraIndex
<pBiDi
->paraCount
; paraIndex
++) {
1253 start
=pBiDi
->paras
[paraIndex
-1].limit
;
1254 limit
=pBiDi
->paras
[paraIndex
].limit
;
1255 level
=pBiDi
->paras
[paraIndex
].level
;
1256 for(i
=start
; i
<limit
; i
++)
1259 return direction
; /* no bracket matching for inverse BiDi */
1261 if(!(flags
&(MASK_EXPLICIT
|MASK_ISO
))) {
1262 /* no embeddings, set all levels to the paragraph level */
1263 /* we still have to perform bracket matching */
1264 int32_t paraIndex
, start
, limit
;
1265 BracketData bracketData
;
1266 bracketInit(pBiDi
, &bracketData
);
1267 for(paraIndex
=0; paraIndex
<pBiDi
->paraCount
; paraIndex
++) {
1271 start
=pBiDi
->paras
[paraIndex
-1].limit
;
1272 limit
=pBiDi
->paras
[paraIndex
].limit
;
1273 level
=pBiDi
->paras
[paraIndex
].level
;
1274 for(i
=start
; i
<limit
; i
++) {
1276 dirProp
=dirProps
[i
];
1281 if(text
[i
]==CR
&& text
[i
+1]==LF
)
1282 continue; /* skip CR when followed by LF */
1283 bracketProcessB(&bracketData
, level
);
1287 if(!bracketProcessChar(&bracketData
, i
)) {
1288 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
1296 /* continue to perform (Xn) */
1298 /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
1299 /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */
1300 UBiDiLevel embeddingLevel
=level
, newLevel
;
1301 UBiDiLevel previousLevel
=level
; /* previous level for regular (not CC) characters */
1302 int32_t lastCcPos
=0; /* index of last effective LRx,RLx, PDx */
1303 DirProp lastCcDirProp
=0; /* dirProp of last effective LRx,RLx, PDx */
1305 /* The following stack remembers the embedding level and the ISOLATE flag of level runs.
1306 stackLast points to its current entry. */
1307 uint16_t stack
[UBIDI_MAX_EXPLICIT_LEVEL
+2]; /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL
1308 but we need one more entry as base */
1309 uint32_t stackLast
=0;
1310 int32_t overflowIsolateCount
=0;
1311 int32_t overflowEmbeddingCount
=0;
1312 int32_t validIsolateCount
=0;
1313 BracketData bracketData
;
1314 bracketInit(pBiDi
, &bracketData
);
1315 stack
[0]=level
; /* initialize base entry to para level, no override, no isolate */
1317 /* recalculate the flags */
1321 dirInsertIndex
= -1; /* indicate that we have not checked dirInsert yet */
1322 for(i
=0; i
<length
; ) { /* now conditionally increment at end */
1323 if (dirInsert
!= NULL
&& dirInsertIndex
< 0) {
1324 dirInsertValue
= dirInsert
[i
];
1326 if (dirInsertValue
> 0) {
1328 dirProp
= (DirProp
)stdDirFromInsertDir
[dirInsertValue
& 0x000F];
1329 dirInsertValue
>>= 4;
1331 dirInsertIndex
= -1;
1332 dirProp
=dirProps
[i
];
1339 /* (X2, X3, X4, X5) */
1340 flags
|=DIRPROP_FLAG(BN
);
1341 levels
[i
]=previousLevel
;
1342 if (dirProp
==LRE
|| dirProp
==LRO
)
1343 /* least greater even level */
1344 newLevel
=(UBiDiLevel
)((embeddingLevel
+2)&~(UBIDI_LEVEL_OVERRIDE
|1));
1346 /* least greater odd level */
1347 newLevel
=(UBiDiLevel
)((NO_OVERRIDE(embeddingLevel
)+1)|1);
1348 if(newLevel
<=UBIDI_MAX_EXPLICIT_LEVEL
&& overflowIsolateCount
==0 &&
1349 overflowEmbeddingCount
==0) {
1351 lastCcDirProp
= dirProp
;
1352 embeddingLevel
=newLevel
;
1353 if(dirProp
==LRO
|| dirProp
==RLO
)
1354 embeddingLevel
|=UBIDI_LEVEL_OVERRIDE
;
1356 stack
[stackLast
]=embeddingLevel
;
1357 /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE and RLE
1358 since this has already been done for newLevel which is
1359 the source for embeddingLevel.
1362 if(overflowIsolateCount
==0)
1363 overflowEmbeddingCount
++;
1368 flags
|=DIRPROP_FLAG(BN
);
1369 levels
[i
]=previousLevel
;
1370 /* handle all the overflow cases first */
1371 if(overflowIsolateCount
) {
1374 if(overflowEmbeddingCount
) {
1375 overflowEmbeddingCount
--;
1378 if(stackLast
>0 && stack
[stackLast
]<ISOLATE
) { /* not an isolate entry */
1380 lastCcDirProp
= dirProp
;
1382 embeddingLevel
=(UBiDiLevel
)stack
[stackLast
];
1387 flags
|=(DIRPROP_FLAG(ON
)|DIRPROP_FLAG_LR(embeddingLevel
));
1388 levels
[i
]=NO_OVERRIDE(embeddingLevel
);
1389 if(NO_OVERRIDE(embeddingLevel
)!=NO_OVERRIDE(previousLevel
)) {
1390 bracketProcessBoundary(&bracketData
, lastCcPos
, lastCcDirProp
,
1391 previousLevel
, embeddingLevel
);
1392 flags
|=DIRPROP_FLAG_MULTI_RUNS
;
1394 previousLevel
=embeddingLevel
;
1397 /* least greater even level */
1398 newLevel
=(UBiDiLevel
)((embeddingLevel
+2)&~(UBIDI_LEVEL_OVERRIDE
|1));
1400 /* least greater odd level */
1401 newLevel
=(UBiDiLevel
)((NO_OVERRIDE(embeddingLevel
)+1)|1);
1402 if(newLevel
<=UBIDI_MAX_EXPLICIT_LEVEL
&& overflowIsolateCount
==0 &&
1403 overflowEmbeddingCount
==0) {
1404 flags
|=DIRPROP_FLAG(dirProp
);
1406 lastCcDirProp
= dirProp
;
1407 validIsolateCount
++;
1408 if(validIsolateCount
>pBiDi
->isolateCount
)
1409 pBiDi
->isolateCount
=validIsolateCount
;
1410 embeddingLevel
=newLevel
;
1411 /* we can increment stackLast without checking because newLevel
1412 will exceed UBIDI_MAX_EXPLICIT_LEVEL before stackLast overflows */
1414 stack
[stackLast
]=embeddingLevel
+ISOLATE
;
1415 bracketProcessLRI_RLI(&bracketData
, embeddingLevel
);
1417 /* make it WS so that it is handled by adjustWSLevels() */
1418 if (dirInsertIndex
< 0) {
1421 dirInsert
[i
] &= ~(0x000F << (4*dirInsertIndex
));
1422 dirInsert
[i
] |= (Insert_WS
<< (4*dirInsertIndex
));
1424 overflowIsolateCount
++;
1428 if(NO_OVERRIDE(embeddingLevel
)!=NO_OVERRIDE(previousLevel
)) {
1429 bracketProcessBoundary(&bracketData
, lastCcPos
, lastCcDirProp
,
1430 previousLevel
, embeddingLevel
);
1431 flags
|=DIRPROP_FLAG_MULTI_RUNS
;
1434 if(overflowIsolateCount
) {
1435 overflowIsolateCount
--;
1436 /* make it WS so that it is handled by adjustWSLevels() */
1437 if (dirInsertIndex
< 0) {
1440 dirInsert
[i
] &= ~(0x000F << (4*dirInsertIndex
));
1441 dirInsert
[i
] |= (Insert_WS
<< (4*dirInsertIndex
));
1444 else if(validIsolateCount
) {
1445 flags
|=DIRPROP_FLAG(PDI
);
1447 lastCcDirProp
= dirProp
;
1448 overflowEmbeddingCount
=0;
1449 while(stack
[stackLast
]<ISOLATE
) /* pop embedding entries */
1450 stackLast
--; /* until the last isolate entry */
1451 stackLast
--; /* pop also the last isolate entry */
1452 validIsolateCount
--;
1453 bracketProcessPDI(&bracketData
);
1455 /* make it WS so that it is handled by adjustWSLevels() */
1456 if (dirInsertIndex
< 0) {
1459 dirInsert
[i
] &= ~(0x000F << (4*dirInsertIndex
));
1460 dirInsert
[i
] |= (Insert_WS
<< (4*dirInsertIndex
));
1462 embeddingLevel
=(UBiDiLevel
)stack
[stackLast
]&~ISOLATE
;
1463 flags
|=(DIRPROP_FLAG(ON
)|DIRPROP_FLAG_LR(embeddingLevel
));
1464 previousLevel
=embeddingLevel
;
1465 levels
[i
]=NO_OVERRIDE(embeddingLevel
);
1468 flags
|=DIRPROP_FLAG(B
);
1469 levels
[i
]=GET_PARALEVEL(pBiDi
, i
);
1471 if(text
[i
]==CR
&& text
[i
+1]==LF
)
1472 break; /* skip CR when followed by LF */
1473 overflowEmbeddingCount
=overflowIsolateCount
=0;
1474 validIsolateCount
=0;
1476 previousLevel
=embeddingLevel
=GET_PARALEVEL(pBiDi
, i
+1);
1477 stack
[0]=embeddingLevel
; /* initialize base entry to para level, no override, no isolate */
1478 bracketProcessB(&bracketData
, embeddingLevel
);
1482 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
1483 /* they will get their levels set correctly in adjustWSLevels() */
1484 levels
[i
]=previousLevel
;
1485 flags
|=DIRPROP_FLAG(BN
);
1488 /* all other types are normal characters and get the "real" level */
1489 if(NO_OVERRIDE(embeddingLevel
)!=NO_OVERRIDE(previousLevel
)) {
1490 bracketProcessBoundary(&bracketData
, lastCcPos
, lastCcDirProp
,
1491 previousLevel
, embeddingLevel
);
1492 flags
|=DIRPROP_FLAG_MULTI_RUNS
;
1493 if(embeddingLevel
&UBIDI_LEVEL_OVERRIDE
)
1494 flags
|=DIRPROP_FLAG_O(embeddingLevel
);
1496 flags
|=DIRPROP_FLAG_E(embeddingLevel
);
1498 previousLevel
=embeddingLevel
;
1499 levels
[i
]=embeddingLevel
;
1500 if(!bracketProcessChar(&bracketData
, i
))
1501 return (UBiDiDirection
)-1;
1502 /* the dirProp may have been changed in bracketProcessChar() */
1503 flags
|=DIRPROP_FLAG(dirProps
[i
]);
1506 if (dirInsertIndex
< 0) {
1510 if(flags
&MASK_EMBEDDING
)
1511 flags
|=DIRPROP_FLAG_LR(pBiDi
->paraLevel
);
1512 if(pBiDi
->orderParagraphsLTR
&& (flags
&DIRPROP_FLAG(B
)))
1513 flags
|=DIRPROP_FLAG(L
);
1514 /* again, determine if the text is mixed-directional or single-directional */
1516 direction
=directionFromFlags(pBiDi
);
1522 * Use a pre-specified embedding levels array:
1524 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
1525 * ignore all explicit codes (X9),
1526 * and check all the preset levels.
1528 * Recalculate the flags to have them reflect the real properties
1529 * after taking the explicit embeddings into account.
1531 static UBiDiDirection
1532 checkExplicitLevels(UBiDi
*pBiDi
, UErrorCode
*pErrorCode
) {
1533 DirProp
*dirProps
=pBiDi
->dirProps
;
1534 UBiDiLevel
*levels
=pBiDi
->levels
;
1535 int32_t isolateCount
=0;
1537 int32_t length
=pBiDi
->length
;
1538 Flags flags
=0; /* collect all directionalities in the text */
1539 pBiDi
->isolateCount
=0;
1541 int32_t currentParaIndex
= 0;
1542 int32_t currentParaLimit
= pBiDi
->paras
[0].limit
;
1543 int32_t currentParaLevel
= pBiDi
->paraLevel
;
1545 for(int32_t i
=0; i
<length
; ++i
) {
1546 UBiDiLevel level
=levels
[i
];
1547 DirProp dirProp
=dirProps
[i
];
1548 if(dirProp
==LRI
|| dirProp
==RLI
) {
1550 if(isolateCount
>pBiDi
->isolateCount
)
1551 pBiDi
->isolateCount
=isolateCount
;
1553 else if(dirProp
==PDI
)
1558 // optimized version of int32_t currentParaLevel = GET_PARALEVEL(pBiDi, i);
1559 if (pBiDi
->defaultParaLevel
!= 0 &&
1560 i
== currentParaLimit
&& (currentParaIndex
+ 1) < pBiDi
->paraCount
) {
1561 currentParaLevel
= pBiDi
->paras
[++currentParaIndex
].level
;
1562 currentParaLimit
= pBiDi
->paras
[currentParaIndex
].limit
;
1565 UBiDiLevel overrideFlag
= level
& UBIDI_LEVEL_OVERRIDE
;
1566 level
&= ~UBIDI_LEVEL_OVERRIDE
;
1567 if (level
< currentParaLevel
|| UBIDI_MAX_EXPLICIT_LEVEL
< level
) {
1570 // Paragraph separators are ok with explicit level 0.
1571 // Prevents reordering of paragraphs.
1573 // Treat explicit level 0 as a wildcard for the paragraph level.
1574 // Avoid making the caller guess what the paragraph level would be.
1575 level
= (UBiDiLevel
)currentParaLevel
;
1576 levels
[i
] = level
| overrideFlag
;
1579 // 1 <= level < currentParaLevel or UBIDI_MAX_EXPLICIT_LEVEL < level
1580 /* level out of bounds */
1581 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
1585 if (overrideFlag
!= 0) {
1586 /* keep the override flag in levels[i] but adjust the flags */
1587 flags
|=DIRPROP_FLAG_O(level
);
1590 flags
|=DIRPROP_FLAG_E(level
)|DIRPROP_FLAG(dirProp
);
1593 if(flags
&MASK_EMBEDDING
)
1594 flags
|=DIRPROP_FLAG_LR(pBiDi
->paraLevel
);
1595 /* determine if the text is mixed-directional or single-directional */
1597 return directionFromFlags(pBiDi
);
1600 /******************************************************************
1601 The Properties state machine table
1602 *******************************************************************
1604 All table cells are 8 bits:
1605 bits 0..4: next state
1606 bits 5..7: action to perform (if > 0)
1608 Cells may be of format "n" where n represents the next state
1609 (except for the rightmost column).
1610 Cells may also be of format "s(x,y)" where x represents an action
1611 to perform and y represents the next state.
1613 *******************************************************************
1614 Definitions and type for properties state table
1615 *******************************************************************
1617 #define IMPTABPROPS_COLUMNS 16
1618 #define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1)
1619 #define GET_STATEPROPS(cell) ((cell)&0x1f)
1620 #define GET_ACTIONPROPS(cell) ((cell)>>5)
1621 #define s(action, newState) ((uint8_t)(newState+(action<<5)))
1623 static const uint8_t groupProp
[] = /* dirProp regrouped */
1625 /* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN FSI LRI RLI PDI ENL ENR */
1626 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10, 4, 4, 4, 4, 13, 14
1628 enum { DirProp_L
=0, DirProp_R
=1, DirProp_EN
=2, DirProp_AN
=3, DirProp_ON
=4, DirProp_S
=5, DirProp_B
=6 }; /* reduced dirProp */
1630 /******************************************************************
1632 PROPERTIES STATE TABLE
1634 In table impTabProps,
1635 - the ON column regroups ON and WS, FSI, RLI, LRI and PDI
1636 - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF
1637 - the Res column is the reduced property assigned to a run
1639 Action 1: process current run1, init new run1
1641 3: process run1, process run2, init new run1
1642 4: process run1, set run1=run2, init new run2
1645 1) This table is used in resolveImplicitLevels().
1646 2) This table triggers actions when there is a change in the Bidi
1647 property of incoming characters (action 1).
1648 3) Most such property sequences are processed immediately (in
1649 fact, passed to processPropertySeq().
1650 4) However, numbers are assembled as one sequence. This means
1651 that undefined situations (like CS following digits, until
1652 it is known if the next char will be a digit) are held until
1653 following chars define them.
1654 Example: digits followed by CS, then comes another CS or ON;
1655 the digits will be processed, then the CS assigned
1656 as the start of an ON sequence (action 3).
1657 5) There are cases where more than one sequence must be
1658 processed, for instance digits followed by CS followed by L:
1659 the digits must be processed as one sequence, and the CS
1660 must be processed as an ON sequence, all this before starting
1661 assembling chars for the opening L sequence.
1665 static const uint8_t impTabProps
[][IMPTABPROPS_COLUMNS
] =
1667 /* L , R , EN , AN , ON , S , B , ES , ET , CS , BN , NSM , AL , ENL , ENR , Res */
1668 /* 0 Init */ { 1 , 2 , 4 , 5 , 7 , 15 , 17 , 7 , 9 , 7 , 0 , 7 , 3 , 18 , 21 , DirProp_ON
},
1669 /* 1 L */ { 1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 1 , 1 , s(1,3),s(1,18),s(1,21), DirProp_L
},
1670 /* 2 R */ { s(1,1), 2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 2 , 2 , s(1,3),s(1,18),s(1,21), DirProp_R
},
1671 /* 3 AL */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8), 3 , 3 , 3 ,s(1,18),s(1,21), DirProp_R
},
1672 /* 4 EN */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10), 11 ,s(2,10), 4 , 4 , s(1,3), 18 , 21 , DirProp_EN
},
1673 /* 5 AN */ { s(1,1), s(1,2), s(1,4), 5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12), 5 , 5 , s(1,3),s(1,18),s(1,21), DirProp_AN
},
1674 /* 6 AL:EN/AN */ { s(1,1), s(1,2), 6 , 6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13), 6 , 6 , s(1,3), 18 , 21 , DirProp_AN
},
1675 /* 7 ON */ { s(1,1), s(1,2), s(1,4), s(1,5), 7 ,s(1,15),s(1,17), 7 ,s(2,14), 7 , 7 , 7 , s(1,3),s(1,18),s(1,21), DirProp_ON
},
1676 /* 8 AL:ON */ { s(1,1), s(1,2), s(1,6), s(1,6), 8 ,s(1,16),s(1,17), 8 , 8 , 8 , 8 , 8 , s(1,3),s(1,18),s(1,21), DirProp_ON
},
1677 /* 9 ET */ { s(1,1), s(1,2), 4 , s(1,5), 7 ,s(1,15),s(1,17), 7 , 9 , 7 , 9 , 9 , s(1,3), 18 , 21 , DirProp_ON
},
1678 /*10 EN+ES/CS */ { s(3,1), s(3,2), 4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 10 , s(4,7), s(3,3), 18 , 21 , DirProp_EN
},
1679 /*11 EN+ET */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 11 , s(1,7), 11 , 11 , s(1,3), 18 , 21 , DirProp_EN
},
1680 /*12 AN+CS */ { s(3,1), s(3,2), s(3,4), 5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 12 , s(4,7), s(3,3),s(3,18),s(3,21), DirProp_AN
},
1681 /*13 AL:EN/AN+CS */ { s(3,1), s(3,2), 6 , 6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8), 13 , s(4,8), s(3,3), 18 , 21 , DirProp_AN
},
1682 /*14 ON+ET */ { s(1,1), s(1,2), s(4,4), s(1,5), 7 ,s(1,15),s(1,17), 7 , 14 , 7 , 14 , 14 , s(1,3),s(4,18),s(4,21), DirProp_ON
},
1683 /*15 S */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7), 15 ,s(1,17), s(1,7), s(1,9), s(1,7), 15 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_S
},
1684 /*16 AL:S */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8), 16 ,s(1,17), s(1,8), s(1,8), s(1,8), 16 , s(1,8), s(1,3),s(1,18),s(1,21), DirProp_S
},
1685 /*17 B */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15), 17 , s(1,7), s(1,9), s(1,7), 17 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_B
},
1686 /*18 ENL */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,19), 20 ,s(2,19), 18 , 18 , s(1,3), 18 , 21 , DirProp_L
},
1687 /*19 ENL+ES/CS */ { s(3,1), s(3,2), 18 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 19 , s(4,7), s(3,3), 18 , 21 , DirProp_L
},
1688 /*20 ENL+ET */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 20 , s(1,7), 20 , 20 , s(1,3), 18 , 21 , DirProp_L
},
1689 /*21 ENR */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,22), 23 ,s(2,22), 21 , 21 , s(1,3), 18 , 21 , DirProp_AN
},
1690 /*22 ENR+ES/CS */ { s(3,1), s(3,2), 21 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 22 , s(4,7), s(3,3), 18 , 21 , DirProp_AN
},
1691 /*23 ENR+ET */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 23 , s(1,7), 23 , 23 , s(1,3), 18 , 21 , DirProp_AN
}
1694 /* we must undef macro s because the levels tables have a different
1695 * structure (4 bits for action and 4 bits for next state.
1699 /******************************************************************
1700 The levels state machine tables
1701 *******************************************************************
1703 All table cells are 8 bits:
1704 bits 0..3: next state
1705 bits 4..7: action to perform (if > 0)
1707 Cells may be of format "n" where n represents the next state
1708 (except for the rightmost column).
1709 Cells may also be of format "s(x,y)" where x represents an action
1710 to perform and y represents the next state.
1712 This format limits each table to 16 states each and to 15 actions.
1714 *******************************************************************
1715 Definitions and type for levels state tables
1716 *******************************************************************
1718 #define IMPTABLEVELS_COLUMNS (DirProp_B + 2)
1719 #define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1)
1720 #define GET_STATE(cell) ((cell)&0x0f)
1721 #define GET_ACTION(cell) ((cell)>>4)
1722 #define s(action, newState) ((uint8_t)(newState+(action<<4)))
1724 typedef uint8_t ImpTab
[][IMPTABLEVELS_COLUMNS
];
1725 typedef uint8_t ImpAct
[];
1727 /* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct,
1728 * instead of having a pair of ImpTab and a pair of ImpAct.
1730 typedef struct ImpTabPair
{
1731 const void * pImpTab
[2];
1732 const void * pImpAct
[2];
1735 /******************************************************************
1739 In all levels state tables,
1740 - state 0 is the initial state
1741 - the Res column is the increment to add to the text level
1742 for this property sequence.
1744 The impAct arrays for each table of a pair map the local action
1745 numbers of the table to the total list of actions. For instance,
1746 action 2 in a given table corresponds to the action number which
1747 appears in entry [2] of the impAct array for that table.
1748 The first entry of all impAct arrays must be 0.
1750 Action 1: init conditional sequence
1751 2: prepend conditional sequence to current sequence
1752 3: set ON sequence to new level - 1
1753 4: init EN/AN/ON sequence
1754 5: fix EN/AN/ON sequence followed by R
1755 6: set previous level sequence to level 2
1758 1) These tables are used in processPropertySeq(). The input
1759 is property sequences as determined by resolveImplicitLevels.
1760 2) Most such property sequences are processed immediately
1761 (levels are assigned).
1762 3) However, some sequences cannot be assigned a final level till
1763 one or more following sequences are received. For instance,
1764 ON following an R sequence within an even-level paragraph.
1765 If the following sequence is R, the ON sequence will be
1766 assigned basic run level+1, and so will the R sequence.
1767 4) S is generally handled like ON, since its level will be fixed
1768 to paragraph level in adjustWSLevels().
1772 static const ImpTab impTabL_DEFAULT
= /* Even paragraph level */
1773 /* In this table, conditional sequences receive the lower possible level
1774 until proven otherwise.
1777 /* L , R , EN , AN , ON , S , B , Res */
1778 /* 0 : init */ { 0 , 1 , 0 , 2 , 0 , 0 , 0 , 0 },
1779 /* 1 : R */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 1 },
1780 /* 2 : AN */ { 0 , 1 , 0 , 2 , s(1,5), s(1,5), 0 , 2 },
1781 /* 3 : R+EN/AN */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 2 },
1782 /* 4 : R+ON */ { 0 , s(2,1), s(3,3), s(3,3), 4 , 4 , 0 , 0 },
1783 /* 5 : AN+ON */ { 0 , s(2,1), 0 , s(3,2), 5 , 5 , 0 , 0 }
1785 static const ImpTab impTabR_DEFAULT
= /* Odd paragraph level */
1786 /* In this table, conditional sequences receive the lower possible level
1787 until proven otherwise.
1790 /* L , R , EN , AN , ON , S , B , Res */
1791 /* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 },
1792 /* 1 : L */ { 1 , 0 , 1 , 3 , s(1,4), s(1,4), 0 , 1 },
1793 /* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 },
1794 /* 3 : L+AN */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 1 },
1795 /* 4 : L+ON */ { s(2,1), 0 , s(2,1), 3 , 4 , 4 , 0 , 0 },
1796 /* 5 : L+AN+ON */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 0 }
1798 static const ImpAct impAct0
= {0,1,2,3,4};
1799 static const ImpTabPair impTab_DEFAULT
= {{&impTabL_DEFAULT
,
1801 {&impAct0
, &impAct0
}};
1803 static const ImpTab impTabL_NUMBERS_SPECIAL
= /* Even paragraph level */
1804 /* In this table, conditional sequences receive the lower possible level
1805 until proven otherwise.
1808 /* L , R , EN , AN , ON , S , B , Res */
1809 /* 0 : init */ { 0 , 2 , s(1,1), s(1,1), 0 , 0 , 0 , 0 },
1810 /* 1 : L+EN/AN */ { 0 , s(4,2), 1 , 1 , 0 , 0 , 0 , 0 },
1811 /* 2 : R */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 1 },
1812 /* 3 : R+ON */ { 0 , s(2,2), s(3,4), s(3,4), 3 , 3 , 0 , 0 },
1813 /* 4 : R+EN/AN */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 2 }
1815 static const ImpTabPair impTab_NUMBERS_SPECIAL
= {{&impTabL_NUMBERS_SPECIAL
,
1817 {&impAct0
, &impAct0
}};
1819 static const ImpTab impTabL_GROUP_NUMBERS_WITH_R
=
1820 /* In this table, EN/AN+ON sequences receive levels as if associated with R
1821 until proven that there is L or sor/eor on both sides. AN is handled like EN.
1824 /* L , R , EN , AN , ON , S , B , Res */
1825 /* 0 init */ { 0 , 3 , s(1,1), s(1,1), 0 , 0 , 0 , 0 },
1826 /* 1 EN/AN */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 2 },
1827 /* 2 EN/AN+ON */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 1 },
1828 /* 3 R */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 1 },
1829 /* 4 R+ON */ { s(2,0), 3 , 5 , 5 , 4 , s(2,0), s(2,0), 1 },
1830 /* 5 R+EN/AN */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 2 }
1832 static const ImpTab impTabR_GROUP_NUMBERS_WITH_R
=
1833 /* In this table, EN/AN+ON sequences receive levels as if associated with R
1834 until proven that there is L on both sides. AN is handled like EN.
1837 /* L , R , EN , AN , ON , S , B , Res */
1838 /* 0 init */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1839 /* 1 EN/AN */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 1 },
1840 /* 2 L */ { 2 , 0 , s(1,4), s(1,4), s(1,3), 0 , 0 , 1 },
1841 /* 3 L+ON */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 0 },
1842 /* 4 L+EN/AN */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 1 }
1844 static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R
= {
1845 {&impTabL_GROUP_NUMBERS_WITH_R
,
1846 &impTabR_GROUP_NUMBERS_WITH_R
},
1847 {&impAct0
, &impAct0
}};
1850 static const ImpTab impTabL_INVERSE_NUMBERS_AS_L
=
1851 /* This table is identical to the Default LTR table except that EN and AN are
1855 /* L , R , EN , AN , ON , S , B , Res */
1856 /* 0 : init */ { 0 , 1 , 0 , 0 , 0 , 0 , 0 , 0 },
1857 /* 1 : R */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 1 },
1858 /* 2 : AN */ { 0 , 1 , 0 , 0 , s(1,5), s(1,5), 0 , 2 },
1859 /* 3 : R+EN/AN */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 2 },
1860 /* 4 : R+ON */ { s(2,0), 1 , s(2,0), s(2,0), 4 , 4 , s(2,0), 1 },
1861 /* 5 : AN+ON */ { s(2,0), 1 , s(2,0), s(2,0), 5 , 5 , s(2,0), 1 }
1863 static const ImpTab impTabR_INVERSE_NUMBERS_AS_L
=
1864 /* This table is identical to the Default RTL table except that EN and AN are
1868 /* L , R , EN , AN , ON , S , B , Res */
1869 /* 0 : init */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1870 /* 1 : L */ { 1 , 0 , 1 , 1 , s(1,4), s(1,4), 0 , 1 },
1871 /* 2 : EN/AN */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 1 },
1872 /* 3 : L+AN */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 1 },
1873 /* 4 : L+ON */ { s(2,1), 0 , s(2,1), s(2,1), 4 , 4 , 0 , 0 },
1874 /* 5 : L+AN+ON */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 0 }
1876 static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L
= {
1877 {&impTabL_INVERSE_NUMBERS_AS_L
,
1878 &impTabR_INVERSE_NUMBERS_AS_L
},
1879 {&impAct0
, &impAct0
}};
1881 static const ImpTab impTabR_INVERSE_LIKE_DIRECT
= /* Odd paragraph level */
1882 /* In this table, conditional sequences receive the lower possible level
1883 until proven otherwise.
1886 /* L , R , EN , AN , ON , S , B , Res */
1887 /* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 },
1888 /* 1 : L */ { 1 , 0 , 1 , 2 , s(1,3), s(1,3), 0 , 1 },
1889 /* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 },
1890 /* 3 : L+ON */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 0 },
1891 /* 4 : L+ON+AN */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 3 },
1892 /* 5 : L+AN+ON */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 2 },
1893 /* 6 : L+ON+EN */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 1 }
1895 static const ImpAct impAct1
= {0,1,13,14};
1896 /* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc"
1898 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT
= {
1900 &impTabR_INVERSE_LIKE_DIRECT
},
1901 {&impAct0
, &impAct1
}};
1903 static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS
=
1904 /* The case handled in this table is (visually): R EN L
1907 /* L , R , EN , AN , ON , S , B , Res */
1908 /* 0 : init */ { 0 , s(6,3), 0 , 1 , 0 , 0 , 0 , 0 },
1909 /* 1 : L+AN */ { 0 , s(6,3), 0 , 1 , s(1,2), s(3,0), 0 , 4 },
1910 /* 2 : L+AN+ON */ { s(2,0), s(6,3), s(2,0), 1 , 2 , s(3,0), s(2,0), 3 },
1911 /* 3 : R */ { 0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0), 0 , 3 },
1912 /* 4 : R+ON */ { s(3,0), s(4,3), s(5,5), s(5,6), 4 , s(3,0), s(3,0), 3 },
1913 /* 5 : R+EN */ { s(3,0), s(4,3), 5 , s(5,6), s(1,4), s(3,0), s(3,0), 4 },
1914 /* 6 : R+AN */ { s(3,0), s(4,3), s(5,5), 6 , s(1,4), s(3,0), s(3,0), 4 }
1916 static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS
=
1917 /* The cases handled in this table are (visually): R EN L
1921 /* L , R , EN , AN , ON , S , B , Res */
1922 /* 0 : init */ { s(1,3), 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1923 /* 1 : R+EN/AN */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 1 },
1924 /* 2 : R+EN/AN+ON */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 0 },
1925 /* 3 : L */ { 3 , 0 , 3 , s(3,6), s(1,4), s(4,0), 0 , 1 },
1926 /* 4 : L+ON */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 0 },
1927 /* 5 : L+ON+EN */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 1 },
1928 /* 6 : L+AN */ { s(5,3), s(4,0), 6 , 6 , 4 , s(4,0), s(4,0), 3 }
1930 static const ImpAct impAct2
= {0,1,2,5,6,7,8};
1931 static const ImpAct impAct3
= {0,1,9,10,11,12};
1932 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS
= {
1933 {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS
,
1934 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS
},
1935 {&impAct2
, &impAct3
}};
1937 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL
= {
1938 {&impTabL_NUMBERS_SPECIAL
,
1939 &impTabR_INVERSE_LIKE_DIRECT
},
1940 {&impAct0
, &impAct1
}};
1942 static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS
=
1943 /* The case handled in this table is (visually): R EN L
1946 /* L , R , EN , AN , ON , S , B , Res */
1947 /* 0 : init */ { 0 , s(6,2), 1 , 1 , 0 , 0 , 0 , 0 },
1948 /* 1 : L+EN/AN */ { 0 , s(6,2), 1 , 1 , 0 , s(3,0), 0 , 4 },
1949 /* 2 : R */ { 0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0), 0 , 3 },
1950 /* 3 : R+ON */ { s(3,0), s(4,2), s(5,4), s(5,4), 3 , s(3,0), s(3,0), 3 },
1951 /* 4 : R+EN/AN */ { s(3,0), s(4,2), 4 , 4 , s(1,3), s(3,0), s(3,0), 4 }
1953 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS
= {
1954 {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS
,
1955 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS
},
1956 {&impAct2
, &impAct3
}};
1961 const ImpTab
* pImpTab
; /* level table pointer */
1962 const ImpAct
* pImpAct
; /* action map array */
1963 int32_t startON
; /* start of ON sequence */
1964 int32_t startL2EN
; /* start of level 2 sequence */
1965 int32_t lastStrongRTL
; /* index of last found R or AL */
1966 int32_t state
; /* current state */
1967 int32_t runStart
; /* start position of the run */
1968 UBiDiLevel runLevel
; /* run level before implicit solving */
1971 /*------------------------------------------------------------------------*/
1974 addPoint(UBiDi
*pBiDi
, int32_t pos
, int32_t flag
)
1975 /* param pos: position where to insert
1976 param flag: one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER
1979 #define FIRSTALLOC 10
1981 InsertPoints
* pInsertPoints
=&(pBiDi
->insertPoints
);
1983 if (pInsertPoints
->capacity
== 0)
1985 pInsertPoints
->points
=static_cast<Point
*>(uprv_malloc(sizeof(Point
)*FIRSTALLOC
));
1986 if (pInsertPoints
->points
== NULL
)
1988 pInsertPoints
->errorCode
=U_MEMORY_ALLOCATION_ERROR
;
1991 pInsertPoints
->capacity
=FIRSTALLOC
;
1993 if (pInsertPoints
->size
>= pInsertPoints
->capacity
) /* no room for new point */
1995 Point
* savePoints
=pInsertPoints
->points
;
1996 pInsertPoints
->points
=static_cast<Point
*>(uprv_realloc(pInsertPoints
->points
,
1997 pInsertPoints
->capacity
*2*sizeof(Point
)));
1998 if (pInsertPoints
->points
== NULL
)
2000 pInsertPoints
->points
=savePoints
;
2001 pInsertPoints
->errorCode
=U_MEMORY_ALLOCATION_ERROR
;
2004 else pInsertPoints
->capacity
*=2;
2008 pInsertPoints
->points
[pInsertPoints
->size
]=point
;
2009 pInsertPoints
->size
++;
2014 setLevelsOutsideIsolates(UBiDi
*pBiDi
, int32_t start
, int32_t limit
, UBiDiLevel level
)
2016 DirProp
*dirProps
=pBiDi
->dirProps
, dirProp
;
2017 uint16_t *dirInsert
= pBiDi
->dirInsert
; /* may be NULL */
2018 UBiDiLevel
*levels
=pBiDi
->levels
;
2019 int32_t dirInsertValue
;
2020 int8_t dirInsertIndex
; /* position within dirInsertValue, if any */
2021 int32_t isolateCount
=0, k
;
2023 dirInsertIndex
= -1; /* indicate that we have not checked dirInsert yet */
2024 for(k
=start
; k
<limit
; k
++) {
2025 if (dirInsert
!= NULL
&& dirInsertIndex
< 0) {
2026 dirInsertValue
= dirInsert
[k
];
2028 if (dirInsertValue
> 0) {
2030 dirProp
= (DirProp
)stdDirFromInsertDir
[dirInsertValue
& 0x000F];
2031 dirInsertValue
>>= 4;
2033 dirInsertIndex
= -1;
2034 dirProp
=dirProps
[k
];
2040 if(dirProp
==LRI
|| dirProp
==RLI
)
2045 /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
2048 * This implementation of the (Wn) rules applies all rules in one pass.
2049 * In order to do so, it needs a look-ahead of typically 1 character
2050 * (except for W5: sequences of ET) and keeps track of changes
2051 * in a rule Wp that affect a later Wq (p<q).
2053 * The (Nn) and (In) rules are also performed in that same single loop,
2054 * but effectively one iteration behind for white space.
2056 * Since all implicit rules are performed in one step, it is not necessary
2057 * to actually store the intermediate directional properties in dirProps[].
2061 processPropertySeq(UBiDi
*pBiDi
, LevState
*pLevState
, uint8_t _prop
,
2062 int32_t start
, int32_t limit
) {
2063 uint8_t cell
, oldStateSeq
, actionSeq
;
2064 const ImpTab
* pImpTab
=pLevState
->pImpTab
;
2065 const ImpAct
* pImpAct
=pLevState
->pImpAct
;
2066 UBiDiLevel
* levels
=pBiDi
->levels
;
2067 UBiDiLevel level
, addLevel
;
2068 InsertPoints
* pInsertPoints
;
2071 start0
=start
; /* save original start position */
2072 oldStateSeq
=(uint8_t)pLevState
->state
;
2073 cell
=(*pImpTab
)[oldStateSeq
][_prop
];
2074 pLevState
->state
=GET_STATE(cell
); /* isolate the new state */
2075 actionSeq
=(*pImpAct
)[GET_ACTION(cell
)]; /* isolate the action */
2076 addLevel
=(*pImpTab
)[pLevState
->state
][IMPTABLEVELS_RES
];
2080 case 1: /* init ON seq */
2081 pLevState
->startON
=start0
;
2084 case 2: /* prepend ON seq to current seq */
2085 start
=pLevState
->startON
;
2088 case 3: /* EN/AN after R+ON */
2089 level
=pLevState
->runLevel
+1;
2090 setLevelsOutsideIsolates(pBiDi
, pLevState
->startON
, start0
, level
);
2093 case 4: /* EN/AN before R for NUMBERS_SPECIAL */
2094 level
=pLevState
->runLevel
+2;
2095 setLevelsOutsideIsolates(pBiDi
, pLevState
->startON
, start0
, level
);
2098 case 5: /* L or S after possible relevant EN/AN */
2099 /* check if we had EN after R/AL */
2100 if (pLevState
->startL2EN
>= 0) {
2101 addPoint(pBiDi
, pLevState
->startL2EN
, LRM_BEFORE
);
2103 pLevState
->startL2EN
=-1; /* not within previous if since could also be -2 */
2104 /* check if we had any relevant EN/AN after R/AL */
2105 pInsertPoints
=&(pBiDi
->insertPoints
);
2106 if ((pInsertPoints
->capacity
== 0) ||
2107 (pInsertPoints
->size
<= pInsertPoints
->confirmed
))
2109 /* nothing, just clean up */
2110 pLevState
->lastStrongRTL
=-1;
2111 /* check if we have a pending conditional segment */
2112 level
=(*pImpTab
)[oldStateSeq
][IMPTABLEVELS_RES
];
2113 if ((level
& 1) && (pLevState
->startON
> 0)) { /* after ON */
2114 start
=pLevState
->startON
; /* reset to basic run level */
2116 if (_prop
== DirProp_S
) /* add LRM before S */
2118 addPoint(pBiDi
, start0
, LRM_BEFORE
);
2119 pInsertPoints
->confirmed
=pInsertPoints
->size
;
2123 /* reset previous RTL cont to level for LTR text */
2124 for (k
=pLevState
->lastStrongRTL
+1; k
<start0
; k
++)
2126 /* reset odd level, leave runLevel+2 as is */
2127 levels
[k
]=(levels
[k
] - 2) & ~1;
2129 /* mark insert points as confirmed */
2130 pInsertPoints
->confirmed
=pInsertPoints
->size
;
2131 pLevState
->lastStrongRTL
=-1;
2132 if (_prop
== DirProp_S
) /* add LRM before S */
2134 addPoint(pBiDi
, start0
, LRM_BEFORE
);
2135 pInsertPoints
->confirmed
=pInsertPoints
->size
;
2139 case 6: /* R/AL after possible relevant EN/AN */
2141 pInsertPoints
=&(pBiDi
->insertPoints
);
2142 if (pInsertPoints
->capacity
> 0)
2143 /* remove all non confirmed insert points */
2144 pInsertPoints
->size
=pInsertPoints
->confirmed
;
2145 pLevState
->startON
=-1;
2146 pLevState
->startL2EN
=-1;
2147 pLevState
->lastStrongRTL
=limit
- 1;
2150 case 7: /* EN/AN after R/AL + possible cont */
2151 /* check for real AN */
2152 if ((_prop
== DirProp_AN
) && (pBiDi
->dirProps
[start0
] == AN
) &&
2153 (pBiDi
->reorderingMode
!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
))
2156 if (pLevState
->startL2EN
== -1) /* if no relevant EN already found */
2158 /* just note the righmost digit as a strong RTL */
2159 pLevState
->lastStrongRTL
=limit
- 1;
2162 if (pLevState
->startL2EN
>= 0) /* after EN, no AN */
2164 addPoint(pBiDi
, pLevState
->startL2EN
, LRM_BEFORE
);
2165 pLevState
->startL2EN
=-2;
2168 addPoint(pBiDi
, start0
, LRM_BEFORE
);
2171 /* if first EN/AN after R/AL */
2172 if (pLevState
->startL2EN
== -1) {
2173 pLevState
->startL2EN
=start0
;
2177 case 8: /* note location of latest R/AL */
2178 pLevState
->lastStrongRTL
=limit
- 1;
2179 pLevState
->startON
=-1;
2182 case 9: /* L after R+ON/EN/AN */
2183 /* include possible adjacent number on the left */
2184 for (k
=start0
-1; k
>=0 && !(levels
[k
]&1); k
--);
2186 addPoint(pBiDi
, k
, RLM_BEFORE
); /* add RLM before */
2187 pInsertPoints
=&(pBiDi
->insertPoints
);
2188 pInsertPoints
->confirmed
=pInsertPoints
->size
; /* confirm it */
2190 pLevState
->startON
=start0
;
2193 case 10: /* AN after L */
2194 /* AN numbers between L text on both sides may be trouble. */
2195 /* tentatively bracket with LRMs; will be confirmed if followed by L */
2196 addPoint(pBiDi
, start0
, LRM_BEFORE
); /* add LRM before */
2197 addPoint(pBiDi
, start0
, LRM_AFTER
); /* add LRM after */
2200 case 11: /* R after L+ON/EN/AN */
2201 /* false alert, infirm LRMs around previous AN */
2202 pInsertPoints
=&(pBiDi
->insertPoints
);
2203 pInsertPoints
->size
=pInsertPoints
->confirmed
;
2204 if (_prop
== DirProp_S
) /* add RLM before S */
2206 addPoint(pBiDi
, start0
, RLM_BEFORE
);
2207 pInsertPoints
->confirmed
=pInsertPoints
->size
;
2211 case 12: /* L after L+ON/AN */
2212 level
=pLevState
->runLevel
+ addLevel
;
2213 for(k
=pLevState
->startON
; k
<start0
; k
++) {
2214 if (levels
[k
]<level
)
2217 pInsertPoints
=&(pBiDi
->insertPoints
);
2218 pInsertPoints
->confirmed
=pInsertPoints
->size
; /* confirm inserts */
2219 pLevState
->startON
=start0
;
2222 case 13: /* L after L+ON+EN/AN/ON */
2223 level
=pLevState
->runLevel
;
2224 for(k
=start0
-1; k
>=pLevState
->startON
; k
--) {
2225 if(levels
[k
]==level
+3) {
2226 while(levels
[k
]==level
+3) {
2229 while(levels
[k
]==level
) {
2233 if(levels
[k
]==level
+2) {
2241 case 14: /* R after L+ON+EN/AN/ON */
2242 level
=pLevState
->runLevel
+1;
2243 for(k
=start0
-1; k
>=pLevState
->startON
; k
--) {
2244 if(levels
[k
]>level
) {
2250 default: /* we should never get here */
2255 if((addLevel
) || (start
< start0
)) {
2256 level
=pLevState
->runLevel
+ addLevel
;
2257 if(start
>=pLevState
->runStart
) {
2258 for(k
=start
; k
<limit
; k
++) {
2262 setLevelsOutsideIsolates(pBiDi
, start
, limit
, level
);
2268 * Returns the directionality of the last strong character at the end of the prologue, if any.
2269 * Requires prologue!=null.
2272 lastL_R_AL(UBiDi
*pBiDi
) {
2273 const UChar
*text
=pBiDi
->prologue
;
2274 int32_t length
=pBiDi
->proLength
;
2278 for(i
=length
; i
>0; ) {
2279 /* i is decremented by U16_PREV */
2280 U16_PREV(text
, 0, i
, uchar
);
2281 dirProp
=(DirProp
)ubidi_getCustomizedClass(pBiDi
, uchar
);
2285 if(dirProp
==R
|| dirProp
==AL
) {
2296 * Returns the directionality of the first strong character, or digit, in the epilogue, if any.
2297 * Requires epilogue!=null.
2300 firstL_R_AL_EN_AN(UBiDi
*pBiDi
) {
2301 const UChar
*text
=pBiDi
->epilogue
;
2302 int32_t length
=pBiDi
->epiLength
;
2306 for(i
=0; i
<length
; ) {
2307 /* i is incremented by U16_NEXT */
2308 U16_NEXT(text
, i
, length
, uchar
);
2309 dirProp
=(DirProp
)ubidi_getCustomizedClass(pBiDi
, uchar
);
2313 if(dirProp
==R
|| dirProp
==AL
) {
2327 resolveImplicitLevels(UBiDi
*pBiDi
,
2328 int32_t start
, int32_t limit
,
2329 DirProp sor
, DirProp eor
) {
2330 const DirProp
*dirProps
=pBiDi
->dirProps
;
2331 uint16_t *dirInsert
= pBiDi
->dirInsert
; /* may be NULL */
2333 int32_t dirInsertValue
;
2334 int8_t dirInsertIndex
; /* position within dirInsertValue, if any */
2336 int32_t i
, start1
, start2
;
2337 uint16_t oldStateImp
, stateImp
, actionImp
;
2338 uint8_t gprop
, resProp
, cell
;
2340 DirProp nextStrongProp
=R
;
2341 int32_t nextStrongPos
=-1;
2343 /* check for RTL inverse BiDi mode */
2344 /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to
2345 * loop on the text characters from end to start.
2346 * This would need a different properties state table (at least different
2347 * actions) and different levels state tables (maybe very similar to the
2348 * LTR corresponding ones.
2351 ((start
<pBiDi
->lastArabicPos
) && (GET_PARALEVEL(pBiDi
, start
) & 1) &&
2352 (pBiDi
->reorderingMode
==UBIDI_REORDER_INVERSE_LIKE_DIRECT
||
2353 pBiDi
->reorderingMode
==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
));
2355 /* initialize for property and levels state tables */
2356 levState
.startL2EN
=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
2357 levState
.lastStrongRTL
=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
2358 levState
.runStart
=start
;
2359 levState
.runLevel
=pBiDi
->levels
[start
];
2360 levState
.pImpTab
=(const ImpTab
*)((pBiDi
->pImpTabPair
)->pImpTab
)[levState
.runLevel
&1];
2361 levState
.pImpAct
=(const ImpAct
*)((pBiDi
->pImpTabPair
)->pImpAct
)[levState
.runLevel
&1];
2362 if(start
==0 && pBiDi
->proLength
>0) {
2363 DirProp lastStrong
=lastL_R_AL(pBiDi
);
2364 if(lastStrong
!=DirProp_ON
) {
2368 /* The isolates[] entries contain enough information to
2369 resume the bidi algorithm in the same state as it was
2370 when it was interrupted by an isolate sequence. */
2372 if (dirInsert
!= NULL
) {
2373 dirInsertValue
= dirInsert
[start
];
2374 while (dirInsertValue
> 0) {
2375 if ((dirInsertValue
& 0x000F) == Insert_PDI
) {
2378 dirInsertValue
>>= 4;
2381 if((dirProps
[start
]==PDI
|| dirInsertValue
>0) && pBiDi
->isolateCount
>= 0) {
2382 levState
.startON
=pBiDi
->isolates
[pBiDi
->isolateCount
].startON
;
2383 start1
=pBiDi
->isolates
[pBiDi
->isolateCount
].start1
;
2384 stateImp
=pBiDi
->isolates
[pBiDi
->isolateCount
].stateImp
;
2385 levState
.state
=pBiDi
->isolates
[pBiDi
->isolateCount
].state
;
2386 pBiDi
->isolateCount
--;
2388 levState
.startON
=-1;
2390 if(dirProps
[start
]==NSM
)
2395 processPropertySeq(pBiDi
, &levState
, sor
, start
, start
);
2397 start2
=start
; /* to make Java compiler happy */
2399 for(i
=start
; i
<=limit
; i
++) {
2403 for(k
=limit
-1; k
>start
&& dirInsertValue
<= 0; k
--) {
2404 dirProp
= dirProps
[k
];
2405 if ((DIRPROP_FLAG(dirProp
)&MASK_BN_EXPLICIT
) == 0) {
2409 if (dirInsert
!= NULL
) {
2410 dirInsertValue
= dirInsert
[k
];
2411 while (dirInsertValue
> 0) {
2412 dirProp
= (DirProp
)stdDirFromInsertDir
[dirInsertValue
& 0x000F];
2413 if ((DIRPROP_FLAG(dirProp
)&MASK_BN_EXPLICIT
) == 0) {
2416 dirInsertValue
>>= 4;
2421 dirProp
= dirProps
[k
];
2423 if(dirProp
==LRI
|| dirProp
==RLI
)
2424 break; /* no forced closing for sequence ending with LRI/RLI */
2427 DirProp prop
, prop1
;
2430 pBiDi
->isolateCount
=-1; /* current isolates stack entry == none */
2434 /* AL before EN does not make it AN */
2436 } else if(prop
==EN
) {
2437 if(nextStrongPos
<=i
) {
2438 /* look for next strong char (L/R/AL) */
2440 nextStrongProp
=R
; /* set default */
2441 nextStrongPos
=limit
;
2442 for(j
=i
+1; j
<limit
; j
++) {
2444 if(prop1
==L
|| prop1
==R
|| prop1
==AL
) {
2445 nextStrongProp
=prop1
;
2451 if(nextStrongProp
==AL
) {
2456 gprop
=groupProp
[prop
];
2458 oldStateImp
=stateImp
;
2459 cell
=impTabProps
[oldStateImp
][gprop
];
2460 stateImp
=GET_STATEPROPS(cell
); /* isolate the new state */
2461 actionImp
=GET_ACTIONPROPS(cell
); /* isolate the action */
2462 if((i
==limit
) && (actionImp
==0)) {
2463 /* there is an unprocessed sequence if its property == eor */
2464 actionImp
=1; /* process the last sequence */
2467 resProp
=impTabProps
[oldStateImp
][IMPTABPROPS_RES
];
2469 case 1: /* process current seq1, init new seq1 */
2470 processPropertySeq(pBiDi
, &levState
, resProp
, start1
, i
);
2473 case 2: /* init new seq2 */
2476 case 3: /* process seq1, process seq2, init new seq1 */
2477 processPropertySeq(pBiDi
, &levState
, resProp
, start1
, start2
);
2478 processPropertySeq(pBiDi
, &levState
, DirProp_ON
, start2
, i
);
2481 case 4: /* process seq1, set seq1=seq2, init new seq2 */
2482 processPropertySeq(pBiDi
, &levState
, resProp
, start1
, start2
);
2486 default: /* we should never get here */
2493 /* flush possible pending sequence, e.g. ON */
2494 if(limit
==pBiDi
->length
&& pBiDi
->epiLength
>0) {
2495 DirProp firstStrong
=firstL_R_AL_EN_AN(pBiDi
);
2496 if(firstStrong
!=DirProp_ON
) {
2501 /* look for the last char not a BN or LRE/RLE/LRO/RLO/PDF */
2503 for(i
=limit
-1; i
>start
&& dirInsertValue
<= 0; i
--) {
2504 dirProp
=dirProps
[i
];
2505 if ((DIRPROP_FLAG(dirProp
)&MASK_BN_EXPLICIT
) == 0) {
2509 if (dirInsert
!= NULL
) {
2510 dirInsertValue
= dirInsert
[i
];
2511 while (dirInsertValue
> 0) {
2512 dirProp
= (DirProp
)stdDirFromInsertDir
[dirInsertValue
& 0x000F];
2513 if ((DIRPROP_FLAG(dirProp
)&MASK_BN_EXPLICIT
) == 0) {
2516 dirInsertValue
>>= 4;
2521 dirProp
=dirProps
[i
];
2523 if((dirProp
==LRI
|| dirProp
==RLI
) && limit
<pBiDi
->length
) {
2524 pBiDi
->isolateCount
++;
2525 pBiDi
->isolates
[pBiDi
->isolateCount
].stateImp
=stateImp
;
2526 pBiDi
->isolates
[pBiDi
->isolateCount
].state
=levState
.state
;
2527 pBiDi
->isolates
[pBiDi
->isolateCount
].start1
=start1
;
2528 pBiDi
->isolates
[pBiDi
->isolateCount
].startON
=levState
.startON
;
2531 processPropertySeq(pBiDi
, &levState
, eor
, limit
, limit
);
2534 /* perform (L1) and (X9) ---------------------------------------------------- */
2537 * Reset the embedding levels for some non-graphic characters (L1).
2538 * This function also sets appropriate levels for BN, and
2539 * explicit embedding types that are supposed to have been removed
2540 * from the paragraph in (X9).
2543 adjustWSLevels(UBiDi
*pBiDi
) {
2544 const DirProp
*dirProps
=pBiDi
->dirProps
;
2545 UBiDiLevel
*levels
=pBiDi
->levels
;
2548 if(pBiDi
->flags
&MASK_WS
) {
2549 UBool orderParagraphsLTR
=pBiDi
->orderParagraphsLTR
;
2552 i
=pBiDi
->trailingWSStart
;
2554 /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
2555 while(i
>0 && (flag
=DIRPROP_FLAG(dirProps
[--i
]))&MASK_WS
) {
2556 if(orderParagraphsLTR
&&(flag
&DIRPROP_FLAG(B
))) {
2559 levels
[i
]=GET_PARALEVEL(pBiDi
, i
);
2563 /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
2564 /* here, i+1 is guaranteed to be <length */
2566 flag
=DIRPROP_FLAG(dirProps
[--i
]);
2567 if(flag
&MASK_BN_EXPLICIT
) {
2568 levels
[i
]=levels
[i
+1];
2569 } else if(orderParagraphsLTR
&&(flag
&DIRPROP_FLAG(B
))) {
2572 } else if(flag
&MASK_B_S
) {
2573 levels
[i
]=GET_PARALEVEL(pBiDi
, i
);
2581 U_CAPI
void U_EXPORT2
2582 ubidi_setContext(UBiDi
*pBiDi
,
2583 const UChar
*prologue
, int32_t proLength
,
2584 const UChar
*epilogue
, int32_t epiLength
,
2585 UErrorCode
*pErrorCode
) {
2586 /* check the argument values */
2587 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
);
2588 if(pBiDi
==NULL
|| proLength
<-1 || epiLength
<-1 ||
2589 (prologue
==NULL
&& proLength
!=0) || (epilogue
==NULL
&& epiLength
!=0)) {
2590 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2595 pBiDi
->proLength
=u_strlen(prologue
);
2597 pBiDi
->proLength
=proLength
;
2600 pBiDi
->epiLength
=u_strlen(epilogue
);
2602 pBiDi
->epiLength
=epiLength
;
2604 pBiDi
->prologue
=prologue
;
2605 pBiDi
->epilogue
=epilogue
;
2609 setParaSuccess(UBiDi
*pBiDi
) {
2610 pBiDi
->proLength
=0; /* forget the last context */
2612 pBiDi
->pParaBiDi
=pBiDi
; /* mark successful setPara */
2615 #define BIDI_MIN(x, y) ((x)<(y) ? (x) : (y))
2616 #define BIDI_ABS(x) ((x)>=0 ? (x) : (-(x)))
2619 setParaRunsOnly(UBiDi
*pBiDi
, const UChar
*text
, int32_t length
,
2620 UBiDiLevel paraLevel
, UErrorCode
*pErrorCode
) {
2621 int32_t *runsOnlyMemory
= NULL
;
2624 int32_t saveLength
, saveTrailingWSStart
;
2625 const UBiDiLevel
*levels
;
2626 UBiDiLevel
*saveLevels
;
2627 UBiDiDirection saveDirection
;
2628 UBool saveMayAllocateText
;
2630 int32_t visualLength
, i
, j
, visualStart
, logicalStart
,
2631 runCount
, runLength
, addedRuns
, insertRemove
,
2632 start
, limit
, step
, indexOddBit
, logicalPos
,
2634 uint32_t saveOptions
;
2636 pBiDi
->reorderingMode
=UBIDI_REORDER_DEFAULT
;
2638 ubidi_setPara(pBiDi
, text
, length
, paraLevel
, NULL
, pErrorCode
);
2641 /* obtain memory for mapping table and visual text */
2642 runsOnlyMemory
=static_cast<int32_t *>(uprv_malloc(length
*(sizeof(int32_t)+sizeof(UChar
)+sizeof(UBiDiLevel
))));
2643 if(runsOnlyMemory
==NULL
) {
2644 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
2647 visualMap
=runsOnlyMemory
;
2648 visualText
=(UChar
*)&visualMap
[length
];
2649 saveLevels
=(UBiDiLevel
*)&visualText
[length
];
2650 saveOptions
=pBiDi
->reorderingOptions
;
2651 if(saveOptions
& UBIDI_OPTION_INSERT_MARKS
) {
2652 pBiDi
->reorderingOptions
&=~UBIDI_OPTION_INSERT_MARKS
;
2653 pBiDi
->reorderingOptions
|=UBIDI_OPTION_REMOVE_CONTROLS
;
2655 paraLevel
&=1; /* accept only 0 or 1 */
2656 ubidi_setPara(pBiDi
, text
, length
, paraLevel
, NULL
, pErrorCode
);
2657 if(U_FAILURE(*pErrorCode
)) {
2660 /* we cannot access directly pBiDi->levels since it is not yet set if
2661 * direction is not MIXED
2663 levels
=ubidi_getLevels(pBiDi
, pErrorCode
);
2664 uprv_memcpy(saveLevels
, levels
, (size_t)pBiDi
->length
*sizeof(UBiDiLevel
));
2665 saveTrailingWSStart
=pBiDi
->trailingWSStart
;
2666 saveLength
=pBiDi
->length
;
2667 saveDirection
=pBiDi
->direction
;
2669 /* FOOD FOR THOUGHT: instead of writing the visual text, we could use
2670 * the visual map and the dirProps array to drive the second call
2671 * to ubidi_setPara (but must make provision for possible removal of
2672 * BiDi controls. Alternatively, only use the dirProps array via
2673 * customized classifier callback.
2675 visualLength
=ubidi_writeReordered(pBiDi
, visualText
, length
,
2676 UBIDI_DO_MIRRORING
, pErrorCode
);
2677 ubidi_getVisualMap(pBiDi
, visualMap
, pErrorCode
);
2678 if(U_FAILURE(*pErrorCode
)) {
2681 pBiDi
->reorderingOptions
=saveOptions
;
2683 pBiDi
->reorderingMode
=UBIDI_REORDER_INVERSE_LIKE_DIRECT
;
2685 /* Because what we did with reorderingOptions, visualText may be shorter
2686 * than the original text. But we don't want the levels memory to be
2687 * reallocated shorter than the original length, since we need to restore
2688 * the levels as after the first call to ubidi_setpara() before returning.
2689 * We will force mayAllocateText to FALSE before the second call to
2690 * ubidi_setpara(), and will restore it afterwards.
2692 saveMayAllocateText
=pBiDi
->mayAllocateText
;
2693 pBiDi
->mayAllocateText
=FALSE
;
2694 ubidi_setPara(pBiDi
, visualText
, visualLength
, paraLevel
, NULL
, pErrorCode
);
2695 pBiDi
->mayAllocateText
=saveMayAllocateText
;
2696 ubidi_getRuns(pBiDi
, pErrorCode
);
2697 if(U_FAILURE(*pErrorCode
)) {
2700 /* check if some runs must be split, count how many splits */
2702 runCount
=pBiDi
->runCount
;
2705 for(i
=0; i
<runCount
; i
++, visualStart
+=runLength
) {
2706 runLength
=runs
[i
].visualLimit
-visualStart
;
2710 logicalStart
=GET_INDEX(runs
[i
].logicalStart
);
2711 for(j
=logicalStart
+1; j
<logicalStart
+runLength
; j
++) {
2712 index0
=visualMap
[j
];
2713 index1
=visualMap
[j
-1];
2714 if((BIDI_ABS(index0
-index1
)!=1) || (saveLevels
[index0
]!=saveLevels
[index1
])) {
2720 if(getRunsMemory(pBiDi
, runCount
+addedRuns
)) {
2722 /* because we switch from UBiDi.simpleRuns to UBiDi.runs */
2723 pBiDi
->runsMemory
[0]=runs
[0];
2725 runs
=pBiDi
->runs
=pBiDi
->runsMemory
;
2726 pBiDi
->runCount
+=addedRuns
;
2731 /* split runs which are not consecutive in source text */
2732 for(i
=runCount
-1; i
>=0; i
--) {
2733 runLength
= i
==0 ? runs
[0].visualLimit
:
2734 runs
[i
].visualLimit
-runs
[i
-1].visualLimit
;
2735 logicalStart
=runs
[i
].logicalStart
;
2736 indexOddBit
=GET_ODD_BIT(logicalStart
);
2737 logicalStart
=GET_INDEX(logicalStart
);
2740 runs
[i
+addedRuns
]=runs
[i
];
2742 logicalPos
=visualMap
[logicalStart
];
2743 runs
[i
+addedRuns
].logicalStart
=MAKE_INDEX_ODD_PAIR(logicalPos
,
2744 saveLevels
[logicalPos
]^indexOddBit
);
2749 limit
=logicalStart
+runLength
-1;
2752 start
=logicalStart
+runLength
-1;
2756 for(j
=start
; j
!=limit
; j
+=step
) {
2757 index0
=visualMap
[j
];
2758 index1
=visualMap
[j
+step
];
2759 if((BIDI_ABS(index0
-index1
)!=1) || (saveLevels
[index0
]!=saveLevels
[index1
])) {
2760 logicalPos
=BIDI_MIN(visualMap
[start
], index0
);
2761 runs
[i
+addedRuns
].logicalStart
=MAKE_INDEX_ODD_PAIR(logicalPos
,
2762 saveLevels
[logicalPos
]^indexOddBit
);
2763 runs
[i
+addedRuns
].visualLimit
=runs
[i
].visualLimit
;
2764 runs
[i
].visualLimit
-=BIDI_ABS(j
-start
)+1;
2765 insertRemove
=runs
[i
].insertRemove
&(LRM_AFTER
|RLM_AFTER
);
2766 runs
[i
+addedRuns
].insertRemove
=insertRemove
;
2767 runs
[i
].insertRemove
&=~insertRemove
;
2773 runs
[i
+addedRuns
]=runs
[i
];
2775 logicalPos
=BIDI_MIN(visualMap
[start
], visualMap
[limit
]);
2776 runs
[i
+addedRuns
].logicalStart
=MAKE_INDEX_ODD_PAIR(logicalPos
,
2777 saveLevels
[logicalPos
]^indexOddBit
);
2781 /* restore initial paraLevel */
2782 pBiDi
->paraLevel
^=1;
2784 /* restore real text */
2786 pBiDi
->length
=saveLength
;
2787 pBiDi
->originalLength
=length
;
2788 pBiDi
->direction
=saveDirection
;
2789 /* the saved levels should never excess levelsSize, but we check anyway */
2790 if(saveLength
>pBiDi
->levelsSize
) {
2791 saveLength
=pBiDi
->levelsSize
;
2793 uprv_memcpy(pBiDi
->levels
, saveLevels
, (size_t)saveLength
*sizeof(UBiDiLevel
));
2794 pBiDi
->trailingWSStart
=saveTrailingWSStart
;
2795 if(pBiDi
->runCount
>1) {
2796 pBiDi
->direction
=UBIDI_MIXED
;
2799 /* free memory for mapping table and visual text */
2800 uprv_free(runsOnlyMemory
);
2802 pBiDi
->reorderingMode
=UBIDI_REORDER_RUNS_ONLY
;
2805 /* -------------------------------------------------------------------------- */
2806 /* internal proptotype */
2809 ubidi_setParaInternal(UBiDi
*pBiDi
,
2810 const UChar
*text
, int32_t length
,
2811 UBiDiLevel paraLevel
,
2812 UBiDiLevel
*embeddingLevels
,
2813 const int32_t *offsets
, int32_t offsetCount
,
2814 const int32_t *controlStringIndices
,
2815 const UChar
* const * controlStrings
,
2816 UErrorCode
*pErrorCode
);
2818 /* ubidi_setPara ------------------------------------------------------------ */
2820 U_CAPI
void U_EXPORT2
2821 ubidi_setPara(UBiDi
*pBiDi
, const UChar
*text
, int32_t length
,
2822 UBiDiLevel paraLevel
, UBiDiLevel
*embeddingLevels
,
2823 UErrorCode
*pErrorCode
) {
2824 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
);
2825 ubidi_setParaInternal(pBiDi
, text
, length
, paraLevel
,
2827 NULL
, 0, NULL
, NULL
,
2831 /* ubidi_setParaWithControls ------------------------------------------------ */
2833 U_CAPI
void U_EXPORT2
2834 ubidi_setParaWithControls(UBiDi
*pBiDi
,
2835 const UChar
*text
, int32_t length
,
2836 UBiDiLevel paraLevel
,
2837 const int32_t *offsets
, int32_t offsetCount
,
2838 const int32_t *controlStringIndices
,
2839 const UChar
* const * controlStrings
,
2840 UErrorCode
*pErrorCode
) {
2841 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
);
2842 /* check the argument values that are not already checked in ubidi_setParaInternal */
2843 if ( offsetCount
< 0 || (offsetCount
> 0 && (offsets
== NULL
|| controlStrings
== NULL
)) ) {
2844 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2847 ubidi_setParaInternal(pBiDi
, text
, length
, paraLevel
,
2849 offsets
, offsetCount
, controlStringIndices
, controlStrings
,
2853 /* ubidi_setParaInternal ---------------------------------------------------- */
2856 ubidi_setParaInternal(UBiDi
*pBiDi
,
2857 const UChar
*text
, int32_t length
,
2858 UBiDiLevel paraLevel
,
2859 UBiDiLevel
*embeddingLevels
,
2860 const int32_t *offsets
, int32_t offsetCount
,
2861 const int32_t *controlStringIndices
,
2862 const UChar
* const * controlStrings
,
2863 UErrorCode
*pErrorCode
) {
2864 UBiDiDirection direction
;
2867 /* check the argument values (pErrorCode status alrecy checked before getting here) */
2868 if(pBiDi
==NULL
|| text
==NULL
|| length
<-1 ||
2869 (paraLevel
>UBIDI_MAX_EXPLICIT_LEVEL
&& paraLevel
<UBIDI_DEFAULT_LTR
)) {
2870 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2875 length
=u_strlen(text
);
2877 if (offsetCount
> 0 && pBiDi
->reorderingMode
> UBIDI_REORDER_GROUP_NUMBERS_WITH_R
) {
2881 /* special treatment for RUNS_ONLY mode */
2882 if(pBiDi
->reorderingMode
==UBIDI_REORDER_RUNS_ONLY
) {
2883 setParaRunsOnly(pBiDi
, text
, length
, paraLevel
, pErrorCode
);
2887 /* initialize the UBiDi structure */
2888 pBiDi
->pParaBiDi
=NULL
; /* mark unfinished setPara */
2890 pBiDi
->length
=pBiDi
->originalLength
=pBiDi
->resultLength
=length
;
2891 pBiDi
->paraLevel
=paraLevel
;
2892 pBiDi
->direction
=(UBiDiDirection
)(paraLevel
&1);
2895 pBiDi
->dirInsert
=NULL
;
2896 pBiDi
->dirProps
=NULL
;
2899 pBiDi
->insertPoints
.size
=0; /* clean up from last call */
2900 pBiDi
->insertPoints
.confirmed
=0; /* clean up from last call */
2903 * Save the original paraLevel if contextual; otherwise, set to 0.
2905 pBiDi
->defaultParaLevel
=IS_DEFAULT_LEVEL(paraLevel
);
2909 * For an empty paragraph, create a UBiDi object with the paraLevel and
2910 * the flags and the direction set but without allocating zero-length arrays.
2911 * There is nothing more to do.
2913 if(IS_DEFAULT_LEVEL(paraLevel
)) {
2914 pBiDi
->paraLevel
&=1;
2915 pBiDi
->defaultParaLevel
=0;
2917 pBiDi
->flags
=DIRPROP_FLAG_LR(paraLevel
);
2920 setParaSuccess(pBiDi
); /* mark successful setPara */
2926 /* allocate paras memory */
2927 if(pBiDi
->parasMemory
)
2928 pBiDi
->paras
=pBiDi
->parasMemory
;
2930 pBiDi
->paras
=pBiDi
->simpleParas
;
2933 * Get the inserted directional properties
2936 if (offsetCount
> 0) {
2937 if(getDirInsertMemory(pBiDi
, length
)) {
2938 pBiDi
->dirInsert
=pBiDi
->dirInsertMemory
;
2939 if(!getDirInsert(pBiDi
, offsets
, offsetCount
, controlStringIndices
, controlStrings
)) {
2940 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2944 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
2950 * Get the directional properties,
2951 * the flags bit-set, and
2952 * determine the paragraph level if necessary.
2954 if(getDirPropsMemory(pBiDi
, length
)) {
2955 pBiDi
->dirProps
=pBiDi
->dirPropsMemory
;
2956 if(!getDirProps(pBiDi
)) {
2957 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
2961 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
2964 dirProps
=pBiDi
->dirProps
;
2965 /* the processed length may have changed if UBIDI_OPTION_STREAMING */
2966 length
= pBiDi
->length
;
2967 pBiDi
->trailingWSStart
=length
; /* the levels[] will reflect the WS run */
2969 /* are explicit levels specified? */
2970 if(embeddingLevels
==NULL
) {
2971 /* no: determine explicit levels according to the (Xn) rules */\
2972 if(getLevelsMemory(pBiDi
, length
)) {
2973 pBiDi
->levels
=pBiDi
->levelsMemory
;
2974 direction
=resolveExplicitLevels(pBiDi
, pErrorCode
);
2975 if(U_FAILURE(*pErrorCode
)) {
2979 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
2983 /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */
2984 pBiDi
->levels
=embeddingLevels
;
2985 direction
=checkExplicitLevels(pBiDi
, pErrorCode
);
2986 if(U_FAILURE(*pErrorCode
)) {
2991 /* allocate isolate memory */
2992 if(pBiDi
->isolateCount
<=SIMPLE_ISOLATES_COUNT
)
2993 pBiDi
->isolates
=pBiDi
->simpleIsolates
;
2995 if((int32_t)(pBiDi
->isolateCount
*sizeof(Isolate
))<=pBiDi
->isolatesSize
)
2996 pBiDi
->isolates
=pBiDi
->isolatesMemory
;
2998 if(getInitialIsolatesMemory(pBiDi
, pBiDi
->isolateCount
)) {
2999 pBiDi
->isolates
=pBiDi
->isolatesMemory
;
3001 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
3005 pBiDi
->isolateCount
=-1; /* current isolates stack entry == none */
3008 * The steps after (X9) in the UBiDi algorithm are performed only if
3009 * the paragraph text has mixed directionality!
3011 pBiDi
->direction
=direction
;
3014 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
3015 pBiDi
->trailingWSStart
=0;
3018 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
3019 pBiDi
->trailingWSStart
=0;
3023 * Choose the right implicit state table
3025 switch(pBiDi
->reorderingMode
) {
3026 case UBIDI_REORDER_DEFAULT
:
3027 pBiDi
->pImpTabPair
=&impTab_DEFAULT
;
3029 case UBIDI_REORDER_NUMBERS_SPECIAL
:
3030 pBiDi
->pImpTabPair
=&impTab_NUMBERS_SPECIAL
;
3032 case UBIDI_REORDER_GROUP_NUMBERS_WITH_R
:
3033 pBiDi
->pImpTabPair
=&impTab_GROUP_NUMBERS_WITH_R
;
3035 case UBIDI_REORDER_INVERSE_NUMBERS_AS_L
:
3036 pBiDi
->pImpTabPair
=&impTab_INVERSE_NUMBERS_AS_L
;
3038 case UBIDI_REORDER_INVERSE_LIKE_DIRECT
:
3039 if (pBiDi
->reorderingOptions
& UBIDI_OPTION_INSERT_MARKS
) {
3040 pBiDi
->pImpTabPair
=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS
;
3042 pBiDi
->pImpTabPair
=&impTab_INVERSE_LIKE_DIRECT
;
3045 case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
:
3046 if (pBiDi
->reorderingOptions
& UBIDI_OPTION_INSERT_MARKS
) {
3047 pBiDi
->pImpTabPair
=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS
;
3049 pBiDi
->pImpTabPair
=&impTab_INVERSE_FOR_NUMBERS_SPECIAL
;
3053 /* we should never get here */
3058 * If there are no external levels specified and there
3059 * are no significant explicit level codes in the text,
3060 * then we can treat the entire paragraph as one run.
3061 * Otherwise, we need to perform the following rules on runs of
3062 * the text with the same embedding levels. (X10)
3063 * "Significant" explicit level codes are ones that actually
3064 * affect non-BN characters.
3065 * Examples for "insignificant" ones are empty embeddings
3066 * LRE-PDF, LRE-RLE-PDF-PDF, etc.
3068 if(embeddingLevels
==NULL
&& pBiDi
->paraCount
<=1 &&
3069 !(pBiDi
->flags
&DIRPROP_FLAG_MULTI_RUNS
)) {
3070 resolveImplicitLevels(pBiDi
, 0, length
,
3071 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi
, 0)),
3072 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi
, length
-1)));
3074 /* sor, eor: start and end types of same-level-run */
3075 UBiDiLevel
*levels
=pBiDi
->levels
;
3076 int32_t start
, limit
=0;
3077 UBiDiLevel level
, nextLevel
;
3080 /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
3081 level
=GET_PARALEVEL(pBiDi
, 0);
3082 nextLevel
=levels
[0];
3083 if(level
<nextLevel
) {
3084 eor
=GET_LR_FROM_LEVEL(nextLevel
);
3086 eor
=GET_LR_FROM_LEVEL(level
);
3090 /* determine start and limit of the run (end points just behind the run) */
3092 /* the values for this run's start are the same as for the previous run's end */
3095 if((start
>0) && (dirProps
[start
-1]==B
)) {
3096 /* except if this is a new paragraph, then set sor = para level */
3097 sor
=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi
, start
));
3102 /* search for the limit of this run */
3103 while((++limit
<length
) &&
3104 ((levels
[limit
]==level
) ||
3105 (DIRPROP_FLAG(dirProps
[limit
])&MASK_BN_EXPLICIT
))) {}
3107 /* get the correct level of the next run */
3109 nextLevel
=levels
[limit
];
3111 nextLevel
=GET_PARALEVEL(pBiDi
, length
-1);
3114 /* determine eor from max(level, nextLevel); sor is last run's eor */
3115 if(NO_OVERRIDE(level
)<NO_OVERRIDE(nextLevel
)) {
3116 eor
=GET_LR_FROM_LEVEL(nextLevel
);
3118 eor
=GET_LR_FROM_LEVEL(level
);
3121 /* if the run consists of overridden directional types, then there
3122 are no implicit types to be resolved */
3123 if(!(level
&UBIDI_LEVEL_OVERRIDE
)) {
3124 resolveImplicitLevels(pBiDi
, start
, limit
, sor
, eor
);
3126 /* remove the UBIDI_LEVEL_OVERRIDE flags */
3128 levels
[start
++]&=~UBIDI_LEVEL_OVERRIDE
;
3129 } while(start
<limit
);
3131 } while(limit
<length
);
3133 /* check if we got any memory shortage while adding insert points */
3134 if (U_FAILURE(pBiDi
->insertPoints
.errorCode
))
3136 *pErrorCode
=pBiDi
->insertPoints
.errorCode
;
3139 /* reset the embedding levels for some non-graphic characters (L1), (X9) */
3140 adjustWSLevels(pBiDi
);
3143 /* add RLM for inverse Bidi with contextual orientation resolving
3144 * to RTL which would not round-trip otherwise
3146 if((pBiDi
->defaultParaLevel
>0) &&
3147 (pBiDi
->reorderingOptions
& UBIDI_OPTION_INSERT_MARKS
) &&
3148 ((pBiDi
->reorderingMode
==UBIDI_REORDER_INVERSE_LIKE_DIRECT
) ||
3149 (pBiDi
->reorderingMode
==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
))) {
3150 int32_t i
, j
, start
, last
;
3153 for(i
=0; i
<pBiDi
->paraCount
; i
++) {
3154 last
=(pBiDi
->paras
[i
].limit
)-1;
3155 level
=pBiDi
->paras
[i
].level
;
3157 continue; /* LTR paragraph */
3158 start
= i
==0 ? 0 : pBiDi
->paras
[i
-1].limit
;
3159 for(j
=last
; j
>=start
; j
--) {
3160 dirProp
=dirProps
[j
];
3163 while(dirProps
[last
]==B
) {
3167 addPoint(pBiDi
, last
, RLM_BEFORE
);
3170 if(DIRPROP_FLAG(dirProp
) & MASK_R_AL
) {
3177 if(pBiDi
->reorderingOptions
& UBIDI_OPTION_REMOVE_CONTROLS
) {
3178 pBiDi
->resultLength
-= pBiDi
->controlCount
;
3180 pBiDi
->resultLength
+= pBiDi
->insertPoints
.size
;
3182 setParaSuccess(pBiDi
); /* mark successful setPara */
3185 /* -------------------------------------------------------------------------- */
3187 U_CAPI
void U_EXPORT2
3188 ubidi_orderParagraphsLTR(UBiDi
*pBiDi
, UBool orderParagraphsLTR
) {
3190 pBiDi
->orderParagraphsLTR
=orderParagraphsLTR
;
3194 U_CAPI UBool U_EXPORT2
3195 ubidi_isOrderParagraphsLTR(UBiDi
*pBiDi
) {
3197 return pBiDi
->orderParagraphsLTR
;
3203 U_CAPI UBiDiDirection U_EXPORT2
3204 ubidi_getDirection(const UBiDi
*pBiDi
) {
3205 if(IS_VALID_PARA_OR_LINE(pBiDi
)) {
3206 return pBiDi
->direction
;
3212 U_CAPI
const UChar
* U_EXPORT2
3213 ubidi_getText(const UBiDi
*pBiDi
) {
3214 if(IS_VALID_PARA_OR_LINE(pBiDi
)) {
3221 U_CAPI
int32_t U_EXPORT2
3222 ubidi_getLength(const UBiDi
*pBiDi
) {
3223 if(IS_VALID_PARA_OR_LINE(pBiDi
)) {
3224 return pBiDi
->originalLength
;
3230 U_CAPI
int32_t U_EXPORT2
3231 ubidi_getProcessedLength(const UBiDi
*pBiDi
) {
3232 if(IS_VALID_PARA_OR_LINE(pBiDi
)) {
3233 return pBiDi
->length
;
3239 U_CAPI
int32_t U_EXPORT2
3240 ubidi_getResultLength(const UBiDi
*pBiDi
) {
3241 if(IS_VALID_PARA_OR_LINE(pBiDi
)) {
3242 return pBiDi
->resultLength
;
3248 /* paragraphs API functions ------------------------------------------------- */
3250 U_CAPI UBiDiLevel U_EXPORT2
3251 ubidi_getParaLevel(const UBiDi
*pBiDi
) {
3252 if(IS_VALID_PARA_OR_LINE(pBiDi
)) {
3253 return pBiDi
->paraLevel
;
3259 U_CAPI
int32_t U_EXPORT2
3260 ubidi_countParagraphs(UBiDi
*pBiDi
) {
3261 if(!IS_VALID_PARA_OR_LINE(pBiDi
)) {
3264 return pBiDi
->paraCount
;
3268 U_CAPI
void U_EXPORT2
3269 ubidi_getParagraphByIndex(const UBiDi
*pBiDi
, int32_t paraIndex
,
3270 int32_t *pParaStart
, int32_t *pParaLimit
,
3271 UBiDiLevel
*pParaLevel
, UErrorCode
*pErrorCode
) {
3274 /* check the argument values */
3275 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
);
3276 RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi
, *pErrorCode
);
3277 RETURN_VOID_IF_BAD_RANGE(paraIndex
, 0, pBiDi
->paraCount
, *pErrorCode
);
3279 pBiDi
=pBiDi
->pParaBiDi
; /* get Para object if Line object */
3281 paraStart
=pBiDi
->paras
[paraIndex
-1].limit
;
3285 if(pParaStart
!=NULL
) {
3286 *pParaStart
=paraStart
;
3288 if(pParaLimit
!=NULL
) {
3289 *pParaLimit
=pBiDi
->paras
[paraIndex
].limit
;
3291 if(pParaLevel
!=NULL
) {
3292 *pParaLevel
=GET_PARALEVEL(pBiDi
, paraStart
);
3296 U_CAPI
int32_t U_EXPORT2
3297 ubidi_getParagraph(const UBiDi
*pBiDi
, int32_t charIndex
,
3298 int32_t *pParaStart
, int32_t *pParaLimit
,
3299 UBiDiLevel
*pParaLevel
, UErrorCode
*pErrorCode
) {
3302 /* check the argument values */
3303 /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */
3304 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
, -1);
3305 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi
, *pErrorCode
, -1);
3306 pBiDi
=pBiDi
->pParaBiDi
; /* get Para object if Line object */
3307 RETURN_IF_BAD_RANGE(charIndex
, 0, pBiDi
->length
, *pErrorCode
, -1);
3309 for(paraIndex
=0; charIndex
>=pBiDi
->paras
[paraIndex
].limit
; paraIndex
++);
3310 ubidi_getParagraphByIndex(pBiDi
, paraIndex
, pParaStart
, pParaLimit
, pParaLevel
, pErrorCode
);
3314 U_CAPI
void U_EXPORT2
3315 ubidi_setClassCallback(UBiDi
*pBiDi
, UBiDiClassCallback
*newFn
,
3316 const void *newContext
, UBiDiClassCallback
**oldFn
,
3317 const void **oldContext
, UErrorCode
*pErrorCode
)
3319 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
);
3321 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
3326 *oldFn
= pBiDi
->fnClassCallback
;
3330 *oldContext
= pBiDi
->coClassCallback
;
3332 pBiDi
->fnClassCallback
= newFn
;
3333 pBiDi
->coClassCallback
= newContext
;
3336 U_CAPI
void U_EXPORT2
3337 ubidi_getClassCallback(UBiDi
*pBiDi
, UBiDiClassCallback
**fn
, const void **context
)
3344 *fn
= pBiDi
->fnClassCallback
;
3348 *context
= pBiDi
->coClassCallback
;
3352 U_CAPI UCharDirection U_EXPORT2
3353 ubidi_getCustomizedClass(UBiDi
*pBiDi
, UChar32 c
)
3357 if( pBiDi
->fnClassCallback
== NULL
||
3358 (dir
= (*pBiDi
->fnClassCallback
)(pBiDi
->coClassCallback
, c
)) == U_BIDI_CLASS_DEFAULT
)
3360 dir
= ubidi_getClass(pBiDi
->bdp
, c
);
3362 if(dir
>= U_CHAR_DIRECTION_COUNT
) {
3363 dir
= (UCharDirection
)ON
;