1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 ******************************************************************************
6 * Copyright (C) 1999-2015, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 ******************************************************************************
12 * tab size: 8 (not used)
15 * created on: 1999jul27
16 * created by: Markus W. Scherer, updated by Matitiahu Allouche
21 #include "unicode/utypes.h"
22 #include "unicode/ustring.h"
23 #include "unicode/uchar.h"
24 #include "unicode/ubidi.h"
25 #include "unicode/utf16.h"
26 #include "ubidi_props.h"
31 * General implementation notes:
33 * Throughout the implementation, there are comments like (W2) that refer to
34 * rules of the BiDi algorithm, in this example to the second rule of the
35 * resolution of weak types.
37 * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
38 * character according to UTF-16, the second UChar gets the directional property of
39 * the entire character assigned, while the first one gets a BN, a boundary
40 * neutral, type, which is ignored by most of the algorithm according to
41 * rule (X9) and the implementation suggestions of the BiDi algorithm.
43 * Later, adjustWSLevels() will set the level for each BN to that of the
44 * following character (UChar), which results in surrogate pairs getting the
45 * same level on each of their surrogates.
47 * In a UTF-8 implementation, the same thing could be done: the last byte of
48 * a multi-byte sequence would get the "real" property, while all previous
49 * bytes of that sequence would get BN.
51 * It is not possible to assign all those parts of a character the same real
52 * property because this would fail in the resolution of weak types with rules
53 * that look at immediately surrounding types.
55 * As a related topic, this implementation does not remove Boundary Neutral
56 * types from the input, but ignores them wherever this is relevant.
57 * For example, the loop for the resolution of the weak types reads
58 * types until it finds a non-BN.
59 * Also, explicit embedding codes are neither changed into BN nor removed.
60 * They are only treated the same way real BNs are.
61 * As stated before, adjustWSLevels() takes care of them at the end.
62 * For the purpose of conformance, the levels of all these codes
65 * Note that this implementation modifies the dirProps
66 * after the initial setup, when applying X5c (replace FSI by LRI or RLI),
67 * X6, N0 (replace paired brackets by L or R).
69 * In this implementation, the resolution of weak types (W1 to W6),
70 * neutrals (N1 and N2), and the assignment of the resolved level (In)
71 * are all done in one single loop, in resolveImplicitLevels().
72 * Changes of dirProp values are done on the fly, without writing
73 * them back to the dirProps array.
76 * This implementation contains code that allows to bypass steps of the
77 * algorithm that are not needed on the specific paragraph
78 * in order to speed up the most common cases considerably,
79 * like text that is entirely LTR, or RTL text without numbers.
81 * Most of this is done by setting a bit for each directional property
82 * in a flags variable and later checking for whether there are
83 * any LTR characters or any RTL characters, or both, whether
84 * there are any explicit embedding codes, etc.
86 * If the (Xn) steps are performed, then the flags are re-evaluated,
87 * because they will then not contain the embedding codes any more
88 * and will be adjusted for override codes, so that subsequently
89 * more bypassing may be possible than what the initial flags suggested.
91 * If the text is not mixed-directional, then the
92 * algorithm steps for the weak type resolution are not performed,
93 * and all levels are set to the paragraph level.
95 * If there are no explicit embedding codes, then the (Xn) steps
98 * If embedding levels are supplied as a parameter, then all
99 * explicit embedding codes are ignored, and the (Xn) steps
102 * White Space types could get the level of the run they belong to,
103 * and are checked with a test of (flags&MASK_EMBEDDING) to
104 * consider if the paragraph direction should be considered in
105 * the flags variable.
107 * If there are no White Space types in the paragraph, then
108 * (L1) is not necessary in adjustWSLevels().
111 /* to avoid some conditional statements, use tiny constant arrays */
112 static const Flags flagLR
[2]={ DIRPROP_FLAG(L
), DIRPROP_FLAG(R
) };
113 static const Flags flagE
[2]={ DIRPROP_FLAG(LRE
), DIRPROP_FLAG(RLE
) };
114 static const Flags flagO
[2]={ DIRPROP_FLAG(LRO
), DIRPROP_FLAG(RLO
) };
116 #define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
117 #define DIRPROP_FLAG_E(level) flagE[(level)&1]
118 #define DIRPROP_FLAG_O(level) flagO[(level)&1]
120 #define DIR_FROM_STRONG(strong) ((strong)==L ? L : R)
122 #define NO_OVERRIDE(level) ((level)&~UBIDI_LEVEL_OVERRIDE)
124 /* UBiDi object management -------------------------------------------------- */
126 U_CAPI UBiDi
* U_EXPORT2
129 UErrorCode errorCode
=U_ZERO_ERROR
;
130 return ubidi_openSized(0, 0, &errorCode
);
133 U_CAPI UBiDi
* U_EXPORT2
134 ubidi_openSized(int32_t maxLength
, int32_t maxRunCount
, UErrorCode
*pErrorCode
) {
137 /* check the argument values */
138 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
140 } else if(maxLength
<0 || maxRunCount
<0) {
141 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
142 return NULL
; /* invalid arguments */
145 /* allocate memory for the object */
146 pBiDi
=(UBiDi
*)uprv_malloc(sizeof(UBiDi
));
148 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
152 /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */
153 uprv_memset(pBiDi
, 0, sizeof(UBiDi
));
155 /* allocate memory for arrays as requested */
157 if( !getInitialDirPropsMemory(pBiDi
, maxLength
) ||
158 !getInitialLevelsMemory(pBiDi
, maxLength
)
160 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
163 pBiDi
->mayAllocateText
=TRUE
;
168 /* use simpleRuns[] */
169 pBiDi
->runsSize
=sizeof(Run
);
170 } else if(!getInitialRunsMemory(pBiDi
, maxRunCount
)) {
171 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
174 pBiDi
->mayAllocateRuns
=TRUE
;
177 if(U_SUCCESS(*pErrorCode
)) {
186 * We are allowed to allocate memory if memory==NULL or
187 * mayAllocate==TRUE for each array that we need.
188 * We also try to grow memory as needed if we
191 * Assume sizeNeeded>0.
192 * If *pMemory!=NULL, then assume *pSize>0.
194 * ### this realloc() may unnecessarily copy the old data,
195 * which we know we don't need any more;
196 * is this the best way to do this??
199 ubidi_getMemory(BidiMemoryForAllocation
*bidiMem
, int32_t *pSize
, UBool mayAllocate
, int32_t sizeNeeded
) {
200 void **pMemory
= (void **)bidiMem
;
201 /* check for existing memory */
203 /* we need to allocate memory */
204 if(mayAllocate
&& (*pMemory
=uprv_malloc(sizeNeeded
))!=NULL
) {
211 if(sizeNeeded
<=*pSize
) {
212 /* there is already enough memory */
215 else if(!mayAllocate
) {
216 /* not enough memory, and we must not allocate */
221 /* in most cases, we do not need the copy-old-data part of
222 * realloc, but it is needed when adding runs using getRunsMemory()
223 * in setParaRunsOnly()
225 if((memory
=uprv_realloc(*pMemory
, sizeNeeded
))!=NULL
) {
230 /* we failed to grow */
237 U_CAPI
void U_EXPORT2
238 ubidi_close(UBiDi
*pBiDi
) {
240 pBiDi
->pParaBiDi
=NULL
; /* in case one tries to reuse this block */
241 if(pBiDi
->dirInsertMemory
!=NULL
) {
242 uprv_free(pBiDi
->dirInsertMemory
);
244 if(pBiDi
->dirPropsMemory
!=NULL
) {
245 uprv_free(pBiDi
->dirPropsMemory
);
247 if(pBiDi
->levelsMemory
!=NULL
) {
248 uprv_free(pBiDi
->levelsMemory
);
250 if(pBiDi
->openingsMemory
!=NULL
) {
251 uprv_free(pBiDi
->openingsMemory
);
253 if(pBiDi
->parasMemory
!=NULL
) {
254 uprv_free(pBiDi
->parasMemory
);
256 if(pBiDi
->runsMemory
!=NULL
) {
257 uprv_free(pBiDi
->runsMemory
);
259 if(pBiDi
->isolatesMemory
!=NULL
) {
260 uprv_free(pBiDi
->isolatesMemory
);
262 if(pBiDi
->insertPoints
.points
!=NULL
) {
263 uprv_free(pBiDi
->insertPoints
.points
);
270 /* set to approximate "inverse BiDi" ---------------------------------------- */
272 U_CAPI
void U_EXPORT2
273 ubidi_setInverse(UBiDi
*pBiDi
, UBool isInverse
) {
275 pBiDi
->isInverse
=isInverse
;
276 pBiDi
->reorderingMode
= isInverse
? UBIDI_REORDER_INVERSE_NUMBERS_AS_L
277 : UBIDI_REORDER_DEFAULT
;
281 U_CAPI UBool U_EXPORT2
282 ubidi_isInverse(UBiDi
*pBiDi
) {
284 return pBiDi
->isInverse
;
290 /* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
291 * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre
292 * concept of RUNS_ONLY which is a double operation.
293 * It could be advantageous to divide this into 3 concepts:
294 * a) Operation: direct / inverse / RUNS_ONLY
295 * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R
296 * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
297 * This would allow combinations not possible today like RUNS_ONLY with
299 * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
300 * REMOVE_CONTROLS for the inverse step.
301 * Not all combinations would be supported, and probably not all do make sense.
302 * This would need to document which ones are supported and what are the
303 * fallbacks for unsupported combinations.
305 U_CAPI
void U_EXPORT2
306 ubidi_setReorderingMode(UBiDi
*pBiDi
, UBiDiReorderingMode reorderingMode
) {
307 if ((pBiDi
!=NULL
) && (reorderingMode
>= UBIDI_REORDER_DEFAULT
)
308 && (reorderingMode
< UBIDI_REORDER_COUNT
)) {
309 pBiDi
->reorderingMode
= reorderingMode
;
310 pBiDi
->isInverse
= (UBool
)(reorderingMode
== UBIDI_REORDER_INVERSE_NUMBERS_AS_L
);
314 U_CAPI UBiDiReorderingMode U_EXPORT2
315 ubidi_getReorderingMode(UBiDi
*pBiDi
) {
317 return pBiDi
->reorderingMode
;
319 return UBIDI_REORDER_DEFAULT
;
323 U_CAPI
void U_EXPORT2
324 ubidi_setReorderingOptions(UBiDi
*pBiDi
, uint32_t reorderingOptions
) {
325 if (reorderingOptions
& UBIDI_OPTION_REMOVE_CONTROLS
) {
326 reorderingOptions
&=~UBIDI_OPTION_INSERT_MARKS
;
329 pBiDi
->reorderingOptions
=reorderingOptions
;
333 U_CAPI
uint32_t U_EXPORT2
334 ubidi_getReorderingOptions(UBiDi
*pBiDi
) {
336 return pBiDi
->reorderingOptions
;
342 U_CAPI UBiDiDirection U_EXPORT2
343 ubidi_getBaseDirection(const UChar
*text
,
350 if( text
==NULL
|| length
<-1 ){
351 return UBIDI_NEUTRAL
;
355 length
=u_strlen(text
);
358 for( i
= 0 ; i
< length
; ) {
359 /* i is incremented by U16_NEXT */
360 U16_NEXT(text
, i
, length
, uchar
);
361 dir
= u_charDirection(uchar
);
362 if( dir
== U_LEFT_TO_RIGHT
)
364 if( dir
== U_RIGHT_TO_LEFT
|| dir
==U_RIGHT_TO_LEFT_ARABIC
)
367 return UBIDI_NEUTRAL
;
370 /* perform (P2)..(P3) ------------------------------------------------------- */
373 * Returns the directionality of the first strong character
374 * after the last B in prologue, if any.
375 * Requires prologue!=null.
378 firstL_R_AL(UBiDi
*pBiDi
) {
379 const UChar
*text
=pBiDi
->prologue
;
380 int32_t length
=pBiDi
->proLength
;
383 DirProp dirProp
, result
=ON
;
384 for(i
=0; i
<length
; ) {
385 /* i is incremented by U16_NEXT */
386 U16_NEXT(text
, i
, length
, uchar
);
387 dirProp
=(DirProp
)ubidi_getCustomizedClass(pBiDi
, uchar
);
389 if(dirProp
==L
|| dirProp
==R
|| dirProp
==AL
) {
402 * Check that there are enough entries in the array pointed to by pBiDi->paras
405 checkParaCount(UBiDi
*pBiDi
) {
406 int32_t count
=pBiDi
->paraCount
;
407 if(pBiDi
->paras
==pBiDi
->simpleParas
) {
408 if(count
<=SIMPLE_PARAS_COUNT
)
410 if(!getInitialParasMemory(pBiDi
, SIMPLE_PARAS_COUNT
* 2))
412 pBiDi
->paras
=pBiDi
->parasMemory
;
413 uprv_memcpy(pBiDi
->parasMemory
, pBiDi
->simpleParas
, SIMPLE_PARAS_COUNT
* sizeof(Para
));
416 if(!getInitialParasMemory(pBiDi
, count
* 2))
418 pBiDi
->paras
=pBiDi
->parasMemory
;
423 * Get the directional properties for the inserted bidi controls.
426 /* subset of bidi properties, fit in 4 bits */
427 enum { /* correspondence to standard class */
428 Insert_none
= 0, /* 0 all others */
429 Insert_L
, /* 1 L = U_LEFT_TO_RIGHT */
430 Insert_R
, /* 2 R = U_RIGHT_TO_LEFT */
431 Insert_AL
, /* 3 AL = U_RIGHT_TO_LEFT_ARABIC */
432 Insert_LRE
, /* 4 LRE = U_LEFT_TO_RIGHT_EMBEDDING */
433 Insert_LRO
, /* 5 LRO = U_LEFT_TO_RIGHT_OVERRIDE */
434 Insert_RLE
, /* 6 RLE = U_RIGHT_TO_LEFT_EMBEDDING */
435 Insert_RLO
, /* 7 RLO = U_RIGHT_TO_LEFT_OVERRIDE */
436 Insert_PDF
, /* 8 PDF = U_POP_DIRECTIONAL_FORMAT */
437 Insert_FSI
, /* 9 FSI = U_FIRST_STRONG_ISOLATE */
438 Insert_LRI
, /* 10 LRI = U_LEFT_TO_RIGHT_ISOLATE */
439 Insert_RLI
, /* 11 RLI = U_RIGHT_TO_LEFT_ISOLATE */
440 Insert_PDI
, /* 12 PDI = U_POP_DIRECTIONAL_ISOLATE */
441 Insert_B
, /* 13 B = U_BLOCK_SEPARATOR */
442 Insert_S
, /* 14 S = U_SEGMENT_SEPARATOR */
443 Insert_WS
, /* 15 WS = U_WHITE_SPACE_NEUTRAL */
444 Insert_count
/* 16 */
447 /* map standard dir class to special 4-bit insert value (Insert_none as default) */
448 static const uint16_t insertDirFromStdDir
[dirPropCount
] = {
449 Insert_none
, /* L= U_LEFT_TO_RIGHT */
450 Insert_none
, /* R= U_RIGHT_TO_LEFT, */
451 Insert_none
, /* EN= U_EUROPEAN_NUMBER */
452 Insert_none
, /* ES= U_EUROPEAN_NUMBER_SEPARATOR */
453 Insert_none
, /* ET= U_EUROPEAN_NUMBER_TERMINATOR */
454 Insert_none
, /* AN= U_ARABIC_NUMBER */
455 Insert_none
, /* CS= U_COMMON_NUMBER_SEPARATOR */
456 Insert_none
, /* B= U_BLOCK_SEPARATOR */
457 Insert_none
, /* S= U_SEGMENT_SEPARATOR */
458 Insert_none
, /* WS= U_WHITE_SPACE_NEUTRAL */
459 Insert_none
, /* ON= U_OTHER_NEUTRAL */
460 Insert_LRE
, /* LRE=U_LEFT_TO_RIGHT_EMBEDDING */
461 Insert_LRO
, /* LRO=U_LEFT_TO_RIGHT_OVERRIDE */
462 Insert_none
, /* AL= U_RIGHT_TO_LEFT_ARABIC */
463 Insert_RLE
, /* RLE=U_RIGHT_TO_LEFT_EMBEDDING */
464 Insert_RLO
, /* RLO=U_RIGHT_TO_LEFT_OVERRIDE */
465 Insert_PDF
, /* PDF=U_POP_DIRECTIONAL_FORMAT */
466 Insert_none
, /* NSM=U_DIR_NON_SPACING_MARK */
467 Insert_none
, /* BN= U_BOUNDARY_NEUTRAL */
468 Insert_FSI
, /* FSI=U_FIRST_STRONG_ISOLATE */
469 Insert_LRI
, /* LRI=U_LEFT_TO_RIGHT_ISOLATE */
470 Insert_RLI
, /* RLI=U_RIGHT_TO_LEFT_ISOLATE */
471 Insert_PDI
, /* PDI=U_POP_DIRECTIONAL_ISOLATE */
472 Insert_none
, /* ENL */
473 Insert_none
, /* ENR */
476 /* map special 4-bit insert direction class to standard dir class (ON as default) */
477 static const uint8_t stdDirFromInsertDir
[Insert_count
] = {
478 ON
, /* Insert_none > ON */
482 LRE
, /* Insert_LRE */
483 LRO
, /* Insert_LRO */
484 RLE
, /* Insert_RLE */
485 RLO
, /* Insert_RLO */
486 PDF
, /* Insert_PDF */
487 FSI
, /* Insert_FSI */
488 LRI
, /* Insert_LRI */
489 RLI
, /* Insert_RLI */
490 PDI
, /* Insert_PDI */
496 enum { kMaxControlStringLen
= 4 };
499 getDirInsert(UBiDi
*pBiDi
,
500 const int32_t *offsets
, int32_t offsetCount
,
501 const int32_t *controlStringIndices
,
502 const UChar
* const * controlStrings
) {
503 int32_t offset
, offsetsIndex
;
504 uint16_t *dirInsert
= pBiDi
->dirInsert
;
505 /* initialize dirInsert */
506 for (offset
= 0; offset
< pBiDi
->length
; offset
++) {
507 dirInsert
[offset
] = 0;
509 for (offsetsIndex
= 0; offsetsIndex
< offsetCount
; offsetsIndex
++) {
510 const UChar
* controlString
;
512 int32_t controlStringIndex
, dirInsertIndex
= 0;
513 uint16_t dirInsertValue
= 0;
514 offset
= offsets
[offsetsIndex
];
515 if (offset
< 0 || offset
>= pBiDi
->length
) {
516 return FALSE
; /* param err in offsets array */
518 controlStringIndex
= (controlStringIndices
== NULL
)? offsetsIndex
: controlStringIndices
[offsetsIndex
];
519 controlString
= controlStrings
[controlStringIndex
];
520 if (controlString
== NULL
) {
521 return FALSE
; /* param err in controlStrings array */
523 while ((uchar
= *controlString
++) != 0) {
524 uint16_t insertValue
= (U16_IS_SURROGATE(uchar
))? Insert_none
:
525 insertDirFromStdDir
[(uint32_t)ubidi_getCustomizedClass(pBiDi
, uchar
)];
526 if (dirInsertIndex
>= kMaxControlStringLen
|| insertValue
== Insert_none
) {
527 return FALSE
; /* param err in controlStrings array */
529 dirInsertValue
|= (insertValue
<< (4 * dirInsertIndex
++));
531 dirInsert
[offset
] = dirInsertValue
;
537 * Get the directional properties for the text, calculate the flags bit-set, and
538 * determine the paragraph level if necessary (in pBiDi->paras[i].level).
539 * FSI initiators are also resolved and their dirProp replaced with LRI or RLI.
540 * When encountering an FSI, it is initially replaced with an LRI, which is the
541 * default. Only if a strong R or AL is found within its scope will the LRI be
542 * replaced by an RLI.
545 getDirProps(UBiDi
*pBiDi
) {
546 const UChar
*text
=pBiDi
->text
;
547 DirProp
*dirProps
=pBiDi
->dirPropsMemory
; /* pBiDi->dirProps is const */
548 uint16_t *dirInsert
= pBiDi
->dirInsert
; /* may be NULL */
550 int32_t i
=0, originalLength
=pBiDi
->originalLength
;
551 Flags flags
=0; /* collect all directionalities in the text */
553 DirProp dirProp
=0, defaultParaLevel
=0; /* initialize to avoid compiler warnings */
554 int32_t dirInsertValue
;
555 int8_t dirInsertIndex
; /* position within dirInsertValue, if any */
556 UBool isDefaultLevel
=IS_DEFAULT_LEVEL(pBiDi
->paraLevel
);
557 /* for inverse BiDi, the default para level is set to RTL if there is a
558 strong R or AL character at either end of the text */
559 UBool isDefaultLevelInverse
=isDefaultLevel
&& (UBool
)
560 (pBiDi
->reorderingMode
==UBIDI_REORDER_INVERSE_LIKE_DIRECT
||
561 pBiDi
->reorderingMode
==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
);
562 int32_t lastArabicPos
=-1;
563 int32_t controlCount
=0;
564 UBool removeBiDiControls
= (UBool
)(pBiDi
->reorderingOptions
&
565 UBIDI_OPTION_REMOVE_CONTROLS
);
568 NOT_SEEKING_STRONG
, /* 0: not contextual paraLevel, not after FSI */
569 SEEKING_STRONG_FOR_PARA
, /* 1: looking for first strong char in para */
570 SEEKING_STRONG_FOR_FSI
, /* 2: looking for first strong after FSI */
571 LOOKING_FOR_PDI
/* 3: found strong after FSI, looking for PDI */
574 DirProp lastStrong
=ON
; /* for default level & inverse BiDi */
575 /* The following stacks are used to manage isolate sequences. Those
576 sequences may be nested, but obviously never more deeply than the
577 maximum explicit embedding level.
578 lastStack is the index of the last used entry in the stack. A value of -1
579 means that there is no open isolate sequence.
580 lastStack is reset to -1 on paragraph boundaries. */
581 /* The following stack contains the position of the initiator of
582 each open isolate sequence */
583 int32_t isolateStartStack
[UBIDI_MAX_EXPLICIT_LEVEL
+1];
584 int8_t isolateStartInsertIndex
[UBIDI_MAX_EXPLICIT_LEVEL
+1];
585 /* The following stack contains the last known state before
586 encountering the initiator of an isolate sequence */
587 State previousStateStack
[UBIDI_MAX_EXPLICIT_LEVEL
+1];
588 int32_t stackLast
=-1;
590 if(pBiDi
->reorderingOptions
& UBIDI_OPTION_STREAMING
)
592 defaultParaLevel
=pBiDi
->paraLevel
&1;
594 pBiDi
->paras
[0].level
=defaultParaLevel
;
595 lastStrong
=defaultParaLevel
;
596 if(pBiDi
->proLength
>0 && /* there is a prologue */
597 (dirProp
=firstL_R_AL(pBiDi
))!=ON
) { /* with a strong character */
599 pBiDi
->paras
[0].level
=0; /* set the default para level */
601 pBiDi
->paras
[0].level
=1; /* set the default para level */
602 state
=NOT_SEEKING_STRONG
;
604 state
=SEEKING_STRONG_FOR_PARA
;
607 pBiDi
->paras
[0].level
=pBiDi
->paraLevel
;
608 state
=NOT_SEEKING_STRONG
;
610 /* count paragraphs and determine the paragraph level (P2..P3) */
612 * see comment in ubidi.h:
613 * the UBIDI_DEFAULT_XXX values are designed so that
614 * their bit 0 alone yields the intended default
617 dirInsertIndex
= -1; /* indicate that we have not checked dirInsert yet */
618 for( /* i=0 above */ ; i
<originalLength
; ) {
619 if (dirInsert
!= NULL
&& dirInsertIndex
< 0) {
620 dirInsertValue
= dirInsert
[i
];
622 if (dirInsertValue
> 0) {
624 dirProp
= (DirProp
)stdDirFromInsertDir
[dirInsertValue
& 0x000F];
625 dirInsertValue
>>= 4;
626 flags
|=DIRPROP_FLAG(dirProp
);
630 /* i is incremented by U16_NEXT */
631 U16_NEXT(text
, i
, originalLength
, uchar
);
632 flags
|=DIRPROP_FLAG(dirProp
=(DirProp
)ubidi_getCustomizedClass(pBiDi
, uchar
));
633 dirProps
[i
-1]=dirProp
;
634 if(uchar
>0xffff) { /* set the lead surrogate's property to BN */
635 flags
|=DIRPROP_FLAG(BN
);
639 if(removeBiDiControls
&& IS_BIDI_CONTROL_CHAR(uchar
))
642 if(state
==SEEKING_STRONG_FOR_PARA
) {
643 pBiDi
->paras
[pBiDi
->paraCount
-1].level
=0;
644 state
=NOT_SEEKING_STRONG
;
646 else if(state
==SEEKING_STRONG_FOR_FSI
) {
647 if(stackLast
<=UBIDI_MAX_EXPLICIT_LEVEL
) {
648 /* no need for next statement, already set by default */
649 /* dirProps[isolateStartStack[stackLast]]=LRI; */
650 flags
|=DIRPROP_FLAG(LRI
);
652 state
=LOOKING_FOR_PDI
;
657 if(dirProp
==R
|| dirProp
==AL
) {
658 if(state
==SEEKING_STRONG_FOR_PARA
) {
659 pBiDi
->paras
[pBiDi
->paraCount
-1].level
=1;
660 state
=NOT_SEEKING_STRONG
;
662 else if(state
==SEEKING_STRONG_FOR_FSI
) {
663 if(stackLast
<=UBIDI_MAX_EXPLICIT_LEVEL
) {
664 if (isolateStartInsertIndex
[stackLast
] < 0) {
665 dirProps
[isolateStartStack
[stackLast
]]=RLI
;
667 dirInsert
[stackLast
] &= ~(0x000F << (4*isolateStartInsertIndex
[stackLast
]));
668 dirInsert
[stackLast
] |= (Insert_RLI
<< (4*isolateStartInsertIndex
[stackLast
]));
670 flags
|=DIRPROP_FLAG(RLI
);
672 state
=LOOKING_FOR_PDI
;
679 if(dirProp
>=FSI
&& dirProp
<=RLI
) { /* FSI, LRI or RLI */
681 if(stackLast
<=UBIDI_MAX_EXPLICIT_LEVEL
) {
682 isolateStartStack
[stackLast
]= (dirInsertIndex
< 0)? i
-1: i
/* we have not incremented with U16_NEXT yet */;
683 isolateStartInsertIndex
[stackLast
] = dirInsertIndex
;
684 previousStateStack
[stackLast
]=state
;
687 if (dirInsertIndex
< 0) {
688 dirProps
[i
-1]=LRI
; /* default if no strong char */
690 dirInsert
[i
] &= ~(0x000F << (4*dirInsertIndex
));
691 dirInsert
[i
] |= (Insert_LRI
<< (4*dirInsertIndex
));
693 state
=SEEKING_STRONG_FOR_FSI
;
696 state
=LOOKING_FOR_PDI
;
700 if(state
==SEEKING_STRONG_FOR_FSI
) {
701 if(stackLast
<=UBIDI_MAX_EXPLICIT_LEVEL
) {
702 /* no need for next statement, already set by default */
703 /* dirProps[isolateStartStack[stackLast]]=LRI; */
704 flags
|=DIRPROP_FLAG(LRI
);
708 if(stackLast
<=UBIDI_MAX_EXPLICIT_LEVEL
)
709 state
=previousStateStack
[stackLast
];
715 if(i
<originalLength
&& uchar
==CR
&& text
[i
]==LF
) /* do nothing on the CR */
717 pBiDi
->paras
[pBiDi
->paraCount
-1].limit
=i
;
718 if(isDefaultLevelInverse
&& lastStrong
==R
)
719 pBiDi
->paras
[pBiDi
->paraCount
-1].level
=1;
720 if(pBiDi
->reorderingOptions
& UBIDI_OPTION_STREAMING
) {
721 /* When streaming, we only process whole paragraphs
722 thus some updates are only done on paragraph boundaries */
723 pBiDi
->length
=i
; /* i is index to next character */
724 pBiDi
->controlCount
=controlCount
;
726 if(i
<originalLength
) { /* B not last char in text */
728 if(checkParaCount(pBiDi
)==FALSE
) /* not enough memory for a new para entry */
731 pBiDi
->paras
[pBiDi
->paraCount
-1].level
=defaultParaLevel
;
732 state
=SEEKING_STRONG_FOR_PARA
;
733 lastStrong
=defaultParaLevel
;
735 pBiDi
->paras
[pBiDi
->paraCount
-1].level
=pBiDi
->paraLevel
;
736 state
=NOT_SEEKING_STRONG
;
743 /* Ignore still open isolate sequences with overflow */
744 if(stackLast
>UBIDI_MAX_EXPLICIT_LEVEL
) {
745 stackLast
=UBIDI_MAX_EXPLICIT_LEVEL
;
746 state
=SEEKING_STRONG_FOR_FSI
; /* to be on the safe side */
748 /* Resolve direction of still unresolved open FSI sequences */
749 while(stackLast
>=0) {
750 if(state
==SEEKING_STRONG_FOR_FSI
) {
751 /* no need for next statement, already set by default */
752 /* dirProps[isolateStartStack[stackLast]]=LRI; */
753 flags
|=DIRPROP_FLAG(LRI
);
756 state
=previousStateStack
[stackLast
];
759 /* When streaming, ignore text after the last paragraph separator */
760 if(pBiDi
->reorderingOptions
& UBIDI_OPTION_STREAMING
) {
761 if(pBiDi
->length
<originalLength
)
764 pBiDi
->paras
[pBiDi
->paraCount
-1].limit
=originalLength
;
765 pBiDi
->controlCount
=controlCount
;
767 /* For inverse bidi, default para direction is RTL if there is
768 a strong R or AL at either end of the paragraph */
769 if(isDefaultLevelInverse
&& lastStrong
==R
) {
770 pBiDi
->paras
[pBiDi
->paraCount
-1].level
=1;
773 pBiDi
->paraLevel
=static_cast<UBiDiLevel
>(pBiDi
->paras
[0].level
);
775 /* The following is needed to resolve the text direction for default level
776 paragraphs containing no strong character */
777 for(i
=0; i
<pBiDi
->paraCount
; i
++)
778 flags
|=DIRPROP_FLAG_LR(pBiDi
->paras
[i
].level
);
780 if(pBiDi
->orderParagraphsLTR
&& (flags
&DIRPROP_FLAG(B
))) {
781 flags
|=DIRPROP_FLAG(L
);
784 pBiDi
->lastArabicPos
=lastArabicPos
;
788 /* determine the paragraph level at position index */
790 ubidi_getParaLevelAtIndex(const UBiDi
*pBiDi
, int32_t pindex
) {
792 for(i
=0; i
<pBiDi
->paraCount
; i
++)
793 if(pindex
<pBiDi
->paras
[i
].limit
)
795 if(i
>=pBiDi
->paraCount
)
796 i
=pBiDi
->paraCount
-1;
797 return (UBiDiLevel
)(pBiDi
->paras
[i
].level
);
800 /* Functions for handling paired brackets ----------------------------------- */
802 /* In the isoRuns array, the first entry is used for text outside of any
803 isolate sequence. Higher entries are used for each more deeply nested
804 isolate sequence. isoRunLast is the index of the last used entry. The
805 openings array is used to note the data of opening brackets not yet
806 matched by a closing bracket, or matched but still susceptible to change
808 Each isoRun entry contains the index of the first and
809 one-after-last openings entries for pending opening brackets it
810 contains. The next openings entry to use is the one-after-last of the
811 most deeply nested isoRun entry.
812 isoRun entries also contain their current embedding level and the last
813 encountered strong character, since these will be needed to resolve
814 the level of paired brackets. */
817 bracketInit(UBiDi
*pBiDi
, BracketData
*bd
) {
820 bd
->isoRuns
[0].start
=0;
821 bd
->isoRuns
[0].limit
=0;
822 bd
->isoRuns
[0].level
=GET_PARALEVEL(pBiDi
, 0);
823 UBiDiLevel t
= GET_PARALEVEL(pBiDi
, 0) & 1;
824 bd
->isoRuns
[0].lastStrong
= bd
->isoRuns
[0].lastBase
= t
;
825 bd
->isoRuns
[0].contextDir
= (UBiDiDirection
)t
;
826 bd
->isoRuns
[0].contextPos
=0;
827 if(pBiDi
->openingsMemory
) {
828 bd
->openings
=pBiDi
->openingsMemory
;
829 bd
->openingsCount
=pBiDi
->openingsSize
/ sizeof(Opening
);
831 bd
->openings
=bd
->simpleOpenings
;
832 bd
->openingsCount
=SIMPLE_OPENINGS_COUNT
;
834 bd
->isNumbersSpecial
=bd
->pBiDi
->reorderingMode
==UBIDI_REORDER_NUMBERS_SPECIAL
||
835 bd
->pBiDi
->reorderingMode
==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
;
838 /* paragraph boundary */
840 bracketProcessB(BracketData
*bd
, UBiDiLevel level
) {
842 bd
->isoRuns
[0].limit
=0;
843 bd
->isoRuns
[0].level
=level
;
844 bd
->isoRuns
[0].lastStrong
=bd
->isoRuns
[0].lastBase
=level
&1;
845 bd
->isoRuns
[0].contextDir
=(UBiDiDirection
)(level
&1);
846 bd
->isoRuns
[0].contextPos
=0;
849 /* LRE, LRO, RLE, RLO, PDF */
851 bracketProcessBoundary(BracketData
*bd
, int32_t lastCcPos
, DirProp lastCcDirProp
,
852 UBiDiLevel contextLevel
, UBiDiLevel embeddingLevel
) {
853 IsoRun
*pLastIsoRun
=&bd
->isoRuns
[bd
->isoRunLast
];
854 if(DIRPROP_FLAG(lastCcDirProp
)&MASK_ISO
) /* after an isolate */
856 if(NO_OVERRIDE(embeddingLevel
)>NO_OVERRIDE(contextLevel
)) /* not a PDF */
857 contextLevel
=embeddingLevel
;
858 pLastIsoRun
->limit
=pLastIsoRun
->start
;
859 pLastIsoRun
->level
=embeddingLevel
;
860 pLastIsoRun
->lastStrong
=pLastIsoRun
->lastBase
=contextLevel
&1;
861 pLastIsoRun
->contextDir
=(UBiDiDirection
)(contextLevel
&1);
862 pLastIsoRun
->contextPos
=(UBiDiDirection
)lastCcPos
;
867 bracketProcessLRI_RLI(BracketData
*bd
, UBiDiLevel level
) {
868 IsoRun
*pLastIsoRun
=&bd
->isoRuns
[bd
->isoRunLast
];
870 pLastIsoRun
->lastBase
=ON
;
871 lastLimit
=pLastIsoRun
->limit
;
874 pLastIsoRun
->start
=pLastIsoRun
->limit
=lastLimit
;
875 pLastIsoRun
->level
=level
;
876 pLastIsoRun
->lastStrong
=pLastIsoRun
->lastBase
=level
&1;
877 pLastIsoRun
->contextDir
=(UBiDiDirection
)(level
&1);
878 pLastIsoRun
->contextPos
=0;
883 bracketProcessPDI(BracketData
*bd
) {
886 pLastIsoRun
=&bd
->isoRuns
[bd
->isoRunLast
];
887 pLastIsoRun
->lastBase
=ON
;
890 /* newly found opening bracket: create an openings entry */
891 static UBool
/* return TRUE if success */
892 bracketAddOpening(BracketData
*bd
, UChar match
, int32_t position
) {
893 IsoRun
*pLastIsoRun
=&bd
->isoRuns
[bd
->isoRunLast
];
895 if(pLastIsoRun
->limit
>=bd
->openingsCount
) { /* no available new entry */
896 UBiDi
*pBiDi
=bd
->pBiDi
;
897 if(!getInitialOpeningsMemory(pBiDi
, pLastIsoRun
->limit
* 2))
899 if(bd
->openings
==bd
->simpleOpenings
)
900 uprv_memcpy(pBiDi
->openingsMemory
, bd
->simpleOpenings
,
901 SIMPLE_OPENINGS_COUNT
* sizeof(Opening
));
902 bd
->openings
=pBiDi
->openingsMemory
; /* may have changed */
903 bd
->openingsCount
=pBiDi
->openingsSize
/ sizeof(Opening
);
905 pOpening
=&bd
->openings
[pLastIsoRun
->limit
];
906 pOpening
->position
=position
;
907 pOpening
->match
=match
;
908 pOpening
->contextDir
=pLastIsoRun
->contextDir
;
909 pOpening
->contextPos
=pLastIsoRun
->contextPos
;
911 pLastIsoRun
->limit
++;
915 /* change N0c1 to N0c2 when a preceding bracket is assigned the embedding level */
917 fixN0c(BracketData
*bd
, int32_t openingIndex
, int32_t newPropPosition
, DirProp newProp
) {
918 /* This function calls itself recursively */
919 IsoRun
*pLastIsoRun
=&bd
->isoRuns
[bd
->isoRunLast
];
921 DirProp
*dirProps
=bd
->pBiDi
->dirProps
;
922 int32_t k
, openingPosition
, closingPosition
;
923 for(k
=openingIndex
+1, qOpening
=&bd
->openings
[k
]; k
<pLastIsoRun
->limit
; k
++, qOpening
++) {
924 if(qOpening
->match
>=0) /* not an N0c match */
926 if(newPropPosition
<qOpening
->contextPos
)
928 if(newPropPosition
>=qOpening
->position
)
930 if(newProp
==qOpening
->contextDir
)
932 openingPosition
=qOpening
->position
;
933 dirProps
[openingPosition
]=newProp
;
934 closingPosition
=-(qOpening
->match
);
935 dirProps
[closingPosition
]=newProp
;
936 qOpening
->match
=0; /* prevent further changes */
937 fixN0c(bd
, k
, openingPosition
, newProp
);
938 fixN0c(bd
, k
, closingPosition
, newProp
);
942 /* process closing bracket */
943 static DirProp
/* return L or R if N0b or N0c, ON if N0d */
944 bracketProcessClosing(BracketData
*bd
, int32_t openIdx
, int32_t position
) {
945 IsoRun
*pLastIsoRun
=&bd
->isoRuns
[bd
->isoRunLast
];
946 Opening
*pOpening
, *qOpening
;
947 UBiDiDirection direction
;
950 pOpening
=&bd
->openings
[openIdx
];
951 direction
=(UBiDiDirection
)(pLastIsoRun
->level
&1);
952 stable
=TRUE
; /* assume stable until proved otherwise */
954 /* The stable flag is set when brackets are paired and their
955 level is resolved and cannot be changed by what will be
956 found later in the source string.
957 An unstable match can occur only when applying N0c, where
958 the resolved level depends on the preceding context, and
959 this context may be affected by text occurring later.
960 Example: RTL paragraph containing: abc[(latin) HEBREW]
961 When the closing parenthesis is encountered, it appears
962 that N0c1 must be applied since 'abc' sets an opposite
963 direction context and both parentheses receive level 2.
964 However, when the closing square bracket is processed,
965 N0b applies because of 'HEBREW' being included within the
966 brackets, thus the square brackets are treated like R and
967 receive level 1. However, this changes the preceding
968 context of the opening parenthesis, and it now appears
969 that N0c2 must be applied to the parentheses rather than
972 if((direction
==0 && pOpening
->flags
&FOUND_L
) ||
973 (direction
==1 && pOpening
->flags
&FOUND_R
)) { /* N0b */
974 newProp
=static_cast<DirProp
>(direction
);
976 else if(pOpening
->flags
&(FOUND_L
|FOUND_R
)) { /* N0c */
977 /* it is stable if there is no containing pair or in
978 conditions too complicated and not worth checking */
979 stable
=(openIdx
==pLastIsoRun
->start
);
980 if(direction
!=pOpening
->contextDir
)
981 newProp
= static_cast<DirProp
>(pOpening
->contextDir
); /* N0c1 */
983 newProp
= static_cast<DirProp
>(direction
); /* N0c2 */
985 /* forget this and any brackets nested within this pair */
986 pLastIsoRun
->limit
= static_cast<uint16_t>(openIdx
);
989 bd
->pBiDi
->dirProps
[pOpening
->position
]=newProp
;
990 bd
->pBiDi
->dirProps
[position
]=newProp
;
991 /* Update nested N0c pairs that may be affected */
992 fixN0c(bd
, openIdx
, pOpening
->position
, newProp
);
994 pLastIsoRun
->limit
= static_cast<uint16_t>(openIdx
); /* forget any brackets nested within this pair */
995 /* remove lower located synonyms if any */
996 while(pLastIsoRun
->limit
>pLastIsoRun
->start
&&
997 bd
->openings
[pLastIsoRun
->limit
-1].position
==pOpening
->position
)
998 pLastIsoRun
->limit
--;
1001 pOpening
->match
=-position
;
1002 /* neutralize lower located synonyms if any */
1004 while(k
>=pLastIsoRun
->start
&&
1005 bd
->openings
[k
].position
==pOpening
->position
)
1006 bd
->openings
[k
--].match
=0;
1007 /* neutralize any unmatched opening between the current pair;
1008 this will also neutralize higher located synonyms if any */
1009 for(k
=openIdx
+1; k
<pLastIsoRun
->limit
; k
++) {
1010 qOpening
=&bd
->openings
[k
];
1011 if(qOpening
->position
>=position
)
1013 if(qOpening
->match
>0)
1020 /* handle strong characters, digits and candidates for closing brackets */
1021 static UBool
/* return TRUE if success */
1022 bracketProcessChar(BracketData
*bd
, int32_t position
) {
1023 IsoRun
*pLastIsoRun
=&bd
->isoRuns
[bd
->isoRunLast
];
1024 DirProp
*dirProps
, dirProp
, newProp
;
1026 dirProps
=bd
->pBiDi
->dirProps
;
1027 dirProp
=dirProps
[position
];
1031 /* First see if it is a matching closing bracket. Hopefully, this is
1032 more efficient than checking if it is a closing bracket at all */
1033 c
=bd
->pBiDi
->text
[position
];
1034 for(idx
=pLastIsoRun
->limit
-1; idx
>=pLastIsoRun
->start
; idx
--) {
1035 if(bd
->openings
[idx
].match
!=c
)
1037 /* We have a match */
1038 newProp
=bracketProcessClosing(bd
, idx
, position
);
1039 if(newProp
==ON
) { /* N0d */
1040 c
=0; /* prevent handling as an opening */
1043 pLastIsoRun
->lastBase
=ON
;
1044 pLastIsoRun
->contextDir
=(UBiDiDirection
)newProp
;
1045 pLastIsoRun
->contextPos
=position
;
1046 level
=bd
->pBiDi
->levels
[position
];
1047 if(level
&UBIDI_LEVEL_OVERRIDE
) { /* X4, X5 */
1051 pLastIsoRun
->lastStrong
=newProp
;
1052 flag
=DIRPROP_FLAG(newProp
);
1053 for(i
=pLastIsoRun
->start
; i
<idx
; i
++)
1054 bd
->openings
[i
].flags
|=flag
;
1055 /* matching brackets are not overridden by LRO/RLO */
1056 bd
->pBiDi
->levels
[position
]&=~UBIDI_LEVEL_OVERRIDE
;
1058 /* matching brackets are not overridden by LRO/RLO */
1059 bd
->pBiDi
->levels
[bd
->openings
[idx
].position
]&=~UBIDI_LEVEL_OVERRIDE
;
1062 /* We get here only if the ON character is not a matching closing
1063 bracket or it is a case of N0d */
1064 /* Now see if it is an opening bracket */
1066 match
= static_cast<UChar
>(u_getBidiPairedBracket(c
)); /* get the matching char */
1069 if(match
!=c
&& /* has a matching char */
1070 ubidi_getPairedBracketType(c
)==U_BPT_OPEN
) { /* opening bracket */
1071 /* special case: process synonyms
1072 create an opening entry for each synonym */
1073 if(match
==0x232A) { /* RIGHT-POINTING ANGLE BRACKET */
1074 if(!bracketAddOpening(bd
, 0x3009, position
))
1077 else if(match
==0x3009) { /* RIGHT ANGLE BRACKET */
1078 if(!bracketAddOpening(bd
, 0x232A, position
))
1081 if(!bracketAddOpening(bd
, match
, position
))
1085 level
=bd
->pBiDi
->levels
[position
];
1086 if(level
&UBIDI_LEVEL_OVERRIDE
) { /* X4, X5 */
1088 if(dirProp
!=S
&& dirProp
!=WS
&& dirProp
!=ON
)
1089 dirProps
[position
]=newProp
;
1090 pLastIsoRun
->lastBase
=newProp
;
1091 pLastIsoRun
->lastStrong
=newProp
;
1092 pLastIsoRun
->contextDir
=(UBiDiDirection
)newProp
;
1093 pLastIsoRun
->contextPos
=position
;
1095 else if(dirProp
<=R
|| dirProp
==AL
) {
1096 newProp
= static_cast<DirProp
>(DIR_FROM_STRONG(dirProp
));
1097 pLastIsoRun
->lastBase
=dirProp
;
1098 pLastIsoRun
->lastStrong
=dirProp
;
1099 pLastIsoRun
->contextDir
=(UBiDiDirection
)newProp
;
1100 pLastIsoRun
->contextPos
=position
;
1102 else if(dirProp
==EN
) {
1103 pLastIsoRun
->lastBase
=EN
;
1104 if(pLastIsoRun
->lastStrong
==L
) {
1106 if(!bd
->isNumbersSpecial
)
1107 dirProps
[position
]=ENL
;
1108 pLastIsoRun
->contextDir
=(UBiDiDirection
)L
;
1109 pLastIsoRun
->contextPos
=position
;
1113 if(pLastIsoRun
->lastStrong
==AL
)
1114 dirProps
[position
]=AN
; /* W2 */
1116 dirProps
[position
]=ENR
;
1117 pLastIsoRun
->contextDir
=(UBiDiDirection
)R
;
1118 pLastIsoRun
->contextPos
=position
;
1121 else if(dirProp
==AN
) {
1123 pLastIsoRun
->lastBase
=AN
;
1124 pLastIsoRun
->contextDir
=(UBiDiDirection
)R
;
1125 pLastIsoRun
->contextPos
=position
;
1127 else if(dirProp
==NSM
) {
1128 /* if the last real char was ON, change NSM to ON so that it
1129 will stay ON even if the last real char is a bracket which
1130 may be changed to L or R */
1131 newProp
=pLastIsoRun
->lastBase
;
1133 dirProps
[position
]=newProp
;
1137 pLastIsoRun
->lastBase
=dirProp
;
1139 if(newProp
<=R
|| newProp
==AL
) {
1141 uint16_t flag
=DIRPROP_FLAG(DIR_FROM_STRONG(newProp
));
1142 for(i
=pLastIsoRun
->start
; i
<pLastIsoRun
->limit
; i
++)
1143 if(position
>bd
->openings
[i
].position
)
1144 bd
->openings
[i
].flags
|=flag
;
1149 /* perform (X1)..(X9) ------------------------------------------------------- */
1151 /* determine if the text is mixed-directional or single-directional */
1152 static UBiDiDirection
1153 directionFromFlags(UBiDi
*pBiDi
) {
1154 Flags flags
=pBiDi
->flags
;
1155 /* if the text contains AN and neutrals, then some neutrals may become RTL */
1156 if(!(flags
&MASK_RTL
|| ((flags
&DIRPROP_FLAG(AN
)) && (flags
&MASK_POSSIBLE_N
)))) {
1158 } else if(!(flags
&MASK_LTR
)) {
1166 * Resolve the explicit levels as specified by explicit embedding codes.
1167 * Recalculate the flags to have them reflect the real properties
1168 * after taking the explicit embeddings into account.
1170 * The BiDi algorithm is designed to result in the same behavior whether embedding
1171 * levels are externally specified (from "styled text", supposedly the preferred
1172 * method) or set by explicit embedding codes (LRx, RLx, PDF, FSI, PDI) in the plain text.
1173 * That is why (X9) instructs to remove all not-isolate explicit codes (and BN).
1174 * However, in a real implementation, the removal of these codes and their index
1175 * positions in the plain text is undesirable since it would result in
1176 * reallocated, reindexed text.
1177 * Instead, this implementation leaves the codes in there and just ignores them
1178 * in the subsequent processing.
1179 * In order to get the same reordering behavior, positions with a BN or a not-isolate
1180 * explicit embedding code just get the same level assigned as the last "real"
1183 * Some implementations, not this one, then overwrite some of these
1184 * directionality properties at "real" same-level-run boundaries by
1185 * L or R codes so that the resolution of weak types can be performed on the
1186 * entire paragraph at once instead of having to parse it once more and
1187 * perform that resolution on same-level-runs.
1188 * This limits the scope of the implicit rules in effectively
1189 * the same way as the run limits.
1191 * Instead, this implementation does not modify these codes, except for
1192 * paired brackets whose properties (ON) may be replaced by L or R.
1193 * On one hand, the paragraph has to be scanned for same-level-runs, but
1194 * on the other hand, this saves another loop to reset these codes,
1195 * or saves making and modifying a copy of dirProps[].
1198 * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
1201 * Handling the stack of explicit levels (Xn):
1203 * With the BiDi stack of explicit levels, as pushed with each
1204 * LRE, RLE, LRO, RLO, LRI, RLI and FSI and popped with each PDF and PDI,
1205 * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL.
1207 * In order to have a correct push-pop semantics even in the case of overflows,
1208 * overflow counters and a valid isolate counter are used as described in UAX#9
1209 * section 3.3.2 "Explicit Levels and Directions".
1211 * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
1213 * Returns normally the direction; -1 if there was a memory shortage
1216 static UBiDiDirection
1217 resolveExplicitLevels(UBiDi
*pBiDi
, UErrorCode
*pErrorCode
) {
1218 DirProp
*dirProps
=pBiDi
->dirProps
;
1219 uint16_t *dirInsert
= pBiDi
->dirInsert
; /* may be NULL */
1220 UBiDiLevel
*levels
=pBiDi
->levels
;
1221 const UChar
*text
=pBiDi
->text
;
1223 int32_t i
=0, length
=pBiDi
->length
;
1224 Flags flags
=pBiDi
->flags
; /* collect all directionalities in the text */
1226 int32_t dirInsertValue
;
1227 int8_t dirInsertIndex
; /* position within dirInsertValue, if any */
1228 UBiDiLevel level
=GET_PARALEVEL(pBiDi
, 0);
1229 UBiDiDirection direction
;
1230 pBiDi
->isolateCount
=0;
1232 if(U_FAILURE(*pErrorCode
)) { return UBIDI_LTR
; }
1234 /* determine if the text is mixed-directional or single-directional */
1235 direction
=directionFromFlags(pBiDi
);
1237 /* we may not need to resolve any explicit levels */
1238 if((direction
!=UBIDI_MIXED
)) {
1239 /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
1242 if(pBiDi
->reorderingMode
> UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL
) {
1243 /* inverse BiDi: mixed, but all characters are at the same embedding level */
1244 /* set all levels to the paragraph level */
1245 int32_t paraIndex
, start
, limit
;
1246 for(paraIndex
=0; paraIndex
<pBiDi
->paraCount
; paraIndex
++) {
1250 start
=pBiDi
->paras
[paraIndex
-1].limit
;
1251 limit
=pBiDi
->paras
[paraIndex
].limit
;
1252 level
= static_cast<UBiDiLevel
>(pBiDi
->paras
[paraIndex
].level
);
1253 for(i
=start
; i
<limit
; i
++)
1256 return direction
; /* no bracket matching for inverse BiDi */
1258 if(!(flags
&(MASK_EXPLICIT
|MASK_ISO
))) {
1259 /* no embeddings, set all levels to the paragraph level */
1260 /* we still have to perform bracket matching */
1261 int32_t paraIndex
, start
, limit
;
1262 BracketData bracketData
;
1263 bracketInit(pBiDi
, &bracketData
);
1264 for(paraIndex
=0; paraIndex
<pBiDi
->paraCount
; paraIndex
++) {
1268 start
=pBiDi
->paras
[paraIndex
-1].limit
;
1269 limit
=pBiDi
->paras
[paraIndex
].limit
;
1270 level
= static_cast<UBiDiLevel
>(pBiDi
->paras
[paraIndex
].level
);
1271 for(i
=start
; i
<limit
; i
++) {
1273 dirProp
=dirProps
[i
];
1278 if(text
[i
]==CR
&& text
[i
+1]==LF
)
1279 continue; /* skip CR when followed by LF */
1280 bracketProcessB(&bracketData
, level
);
1284 if(!bracketProcessChar(&bracketData
, i
)) {
1285 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
1293 /* continue to perform (Xn) */
1295 /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
1296 /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */
1297 UBiDiLevel embeddingLevel
=level
, newLevel
;
1298 UBiDiLevel previousLevel
=level
; /* previous level for regular (not CC) characters */
1299 int32_t lastCcPos
=0; /* index of last effective LRx,RLx, PDx */
1300 DirProp lastCcDirProp
=0; /* dirProp of last effective LRx,RLx, PDx */
1302 /* The following stack remembers the embedding level and the ISOLATE flag of level runs.
1303 stackLast points to its current entry. */
1304 uint16_t stack
[UBIDI_MAX_EXPLICIT_LEVEL
+2]; /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL
1305 but we need one more entry as base */
1306 uint32_t stackLast
=0;
1307 int32_t overflowIsolateCount
=0;
1308 int32_t overflowEmbeddingCount
=0;
1309 int32_t validIsolateCount
=0;
1310 BracketData bracketData
;
1311 bracketInit(pBiDi
, &bracketData
);
1312 stack
[0]=level
; /* initialize base entry to para level, no override, no isolate */
1314 /* recalculate the flags */
1318 dirInsertIndex
= -1; /* indicate that we have not checked dirInsert yet */
1319 for(i
=0; i
<length
; ) { /* now conditionally increment at end */
1320 if (dirInsert
!= NULL
&& dirInsertIndex
< 0) {
1321 dirInsertValue
= dirInsert
[i
];
1323 if (dirInsertValue
> 0) {
1325 dirProp
= (DirProp
)stdDirFromInsertDir
[dirInsertValue
& 0x000F];
1326 dirInsertValue
>>= 4;
1328 dirInsertIndex
= -1;
1329 dirProp
=dirProps
[i
];
1336 /* (X2, X3, X4, X5) */
1337 flags
|=DIRPROP_FLAG(BN
);
1338 levels
[i
]=previousLevel
;
1339 if (dirProp
==LRE
|| dirProp
==LRO
)
1340 /* least greater even level */
1341 newLevel
=(UBiDiLevel
)((embeddingLevel
+2)&~(UBIDI_LEVEL_OVERRIDE
|1));
1343 /* least greater odd level */
1344 newLevel
=(UBiDiLevel
)((NO_OVERRIDE(embeddingLevel
)+1)|1);
1345 if(newLevel
<=UBIDI_MAX_EXPLICIT_LEVEL
&& overflowIsolateCount
==0 &&
1346 overflowEmbeddingCount
==0) {
1348 lastCcDirProp
= dirProp
;
1349 embeddingLevel
=newLevel
;
1350 if(dirProp
==LRO
|| dirProp
==RLO
)
1351 embeddingLevel
|=UBIDI_LEVEL_OVERRIDE
;
1353 stack
[stackLast
]=embeddingLevel
;
1354 /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE and RLE
1355 since this has already been done for newLevel which is
1356 the source for embeddingLevel.
1359 if(overflowIsolateCount
==0)
1360 overflowEmbeddingCount
++;
1365 flags
|=DIRPROP_FLAG(BN
);
1366 levels
[i
]=previousLevel
;
1367 /* handle all the overflow cases first */
1368 if(overflowIsolateCount
) {
1371 if(overflowEmbeddingCount
) {
1372 overflowEmbeddingCount
--;
1375 if(stackLast
>0 && stack
[stackLast
]<ISOLATE
) { /* not an isolate entry */
1377 lastCcDirProp
= dirProp
;
1379 embeddingLevel
=(UBiDiLevel
)stack
[stackLast
];
1384 flags
|=(DIRPROP_FLAG(ON
)|DIRPROP_FLAG_LR(embeddingLevel
));
1385 levels
[i
]=NO_OVERRIDE(embeddingLevel
);
1386 if(NO_OVERRIDE(embeddingLevel
)!=NO_OVERRIDE(previousLevel
)) {
1387 bracketProcessBoundary(&bracketData
, lastCcPos
, lastCcDirProp
,
1388 previousLevel
, embeddingLevel
);
1389 flags
|=DIRPROP_FLAG_MULTI_RUNS
;
1391 previousLevel
=embeddingLevel
;
1394 /* least greater even level */
1395 newLevel
=(UBiDiLevel
)((embeddingLevel
+2)&~(UBIDI_LEVEL_OVERRIDE
|1));
1397 /* least greater odd level */
1398 newLevel
=(UBiDiLevel
)((NO_OVERRIDE(embeddingLevel
)+1)|1);
1399 if(newLevel
<=UBIDI_MAX_EXPLICIT_LEVEL
&& overflowIsolateCount
==0 &&
1400 overflowEmbeddingCount
==0) {
1401 flags
|=DIRPROP_FLAG(dirProp
);
1403 lastCcDirProp
= dirProp
;
1404 validIsolateCount
++;
1405 if(validIsolateCount
>pBiDi
->isolateCount
)
1406 pBiDi
->isolateCount
=validIsolateCount
;
1407 embeddingLevel
=newLevel
;
1408 /* we can increment stackLast without checking because newLevel
1409 will exceed UBIDI_MAX_EXPLICIT_LEVEL before stackLast overflows */
1411 stack
[stackLast
]=embeddingLevel
+ISOLATE
;
1412 bracketProcessLRI_RLI(&bracketData
, embeddingLevel
);
1414 /* make it WS so that it is handled by adjustWSLevels() */
1415 if (dirInsertIndex
< 0) {
1418 dirInsert
[i
] &= ~(0x000F << (4*dirInsertIndex
));
1419 dirInsert
[i
] |= (Insert_WS
<< (4*dirInsertIndex
));
1421 overflowIsolateCount
++;
1425 if(NO_OVERRIDE(embeddingLevel
)!=NO_OVERRIDE(previousLevel
)) {
1426 bracketProcessBoundary(&bracketData
, lastCcPos
, lastCcDirProp
,
1427 previousLevel
, embeddingLevel
);
1428 flags
|=DIRPROP_FLAG_MULTI_RUNS
;
1431 if(overflowIsolateCount
) {
1432 overflowIsolateCount
--;
1433 /* make it WS so that it is handled by adjustWSLevels() */
1434 if (dirInsertIndex
< 0) {
1437 dirInsert
[i
] &= ~(0x000F << (4*dirInsertIndex
));
1438 dirInsert
[i
] |= (Insert_WS
<< (4*dirInsertIndex
));
1441 else if(validIsolateCount
) {
1442 flags
|=DIRPROP_FLAG(PDI
);
1444 lastCcDirProp
= dirProp
;
1445 overflowEmbeddingCount
=0;
1446 while(stack
[stackLast
]<ISOLATE
) /* pop embedding entries */
1447 stackLast
--; /* until the last isolate entry */
1448 stackLast
--; /* pop also the last isolate entry */
1449 validIsolateCount
--;
1450 bracketProcessPDI(&bracketData
);
1452 /* make it WS so that it is handled by adjustWSLevels() */
1453 if (dirInsertIndex
< 0) {
1456 dirInsert
[i
] &= ~(0x000F << (4*dirInsertIndex
));
1457 dirInsert
[i
] |= (Insert_WS
<< (4*dirInsertIndex
));
1459 embeddingLevel
=(UBiDiLevel
)stack
[stackLast
]&~ISOLATE
;
1460 flags
|=(DIRPROP_FLAG(ON
)|DIRPROP_FLAG_LR(embeddingLevel
));
1461 previousLevel
=embeddingLevel
;
1462 levels
[i
]=NO_OVERRIDE(embeddingLevel
);
1465 flags
|=DIRPROP_FLAG(B
);
1466 levels
[i
]=GET_PARALEVEL(pBiDi
, i
);
1468 if(text
[i
]==CR
&& text
[i
+1]==LF
)
1469 break; /* skip CR when followed by LF */
1470 overflowEmbeddingCount
=overflowIsolateCount
=0;
1471 validIsolateCount
=0;
1473 previousLevel
=embeddingLevel
=GET_PARALEVEL(pBiDi
, i
+1);
1474 stack
[0]=embeddingLevel
; /* initialize base entry to para level, no override, no isolate */
1475 bracketProcessB(&bracketData
, embeddingLevel
);
1479 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
1480 /* they will get their levels set correctly in adjustWSLevels() */
1481 levels
[i
]=previousLevel
;
1482 flags
|=DIRPROP_FLAG(BN
);
1485 /* all other types are normal characters and get the "real" level */
1486 if(NO_OVERRIDE(embeddingLevel
)!=NO_OVERRIDE(previousLevel
)) {
1487 bracketProcessBoundary(&bracketData
, lastCcPos
, lastCcDirProp
,
1488 previousLevel
, embeddingLevel
);
1489 flags
|=DIRPROP_FLAG_MULTI_RUNS
;
1490 if(embeddingLevel
&UBIDI_LEVEL_OVERRIDE
)
1491 flags
|=DIRPROP_FLAG_O(embeddingLevel
);
1493 flags
|=DIRPROP_FLAG_E(embeddingLevel
);
1495 previousLevel
=embeddingLevel
;
1496 levels
[i
]=embeddingLevel
;
1497 if(!bracketProcessChar(&bracketData
, i
))
1498 return (UBiDiDirection
)-1;
1499 /* the dirProp may have been changed in bracketProcessChar() */
1500 flags
|=DIRPROP_FLAG(dirProps
[i
]);
1503 if (dirInsertIndex
< 0) {
1507 if(flags
&MASK_EMBEDDING
)
1508 flags
|=DIRPROP_FLAG_LR(pBiDi
->paraLevel
);
1509 if(pBiDi
->orderParagraphsLTR
&& (flags
&DIRPROP_FLAG(B
)))
1510 flags
|=DIRPROP_FLAG(L
);
1511 /* again, determine if the text is mixed-directional or single-directional */
1513 direction
=directionFromFlags(pBiDi
);
1519 * Use a pre-specified embedding levels array:
1521 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
1522 * ignore all explicit codes (X9),
1523 * and check all the preset levels.
1525 * Recalculate the flags to have them reflect the real properties
1526 * after taking the explicit embeddings into account.
1528 static UBiDiDirection
1529 checkExplicitLevels(UBiDi
*pBiDi
, UErrorCode
*pErrorCode
) {
1530 DirProp
*dirProps
=pBiDi
->dirProps
;
1531 UBiDiLevel
*levels
=pBiDi
->levels
;
1532 int32_t isolateCount
=0;
1534 int32_t length
=pBiDi
->length
;
1535 Flags flags
=0; /* collect all directionalities in the text */
1536 pBiDi
->isolateCount
=0;
1538 int32_t currentParaIndex
= 0;
1539 int32_t currentParaLimit
= pBiDi
->paras
[0].limit
;
1540 int32_t currentParaLevel
= pBiDi
->paraLevel
;
1542 for(int32_t i
=0; i
<length
; ++i
) {
1543 UBiDiLevel level
=levels
[i
];
1544 DirProp dirProp
=dirProps
[i
];
1545 if(dirProp
==LRI
|| dirProp
==RLI
) {
1547 if(isolateCount
>pBiDi
->isolateCount
)
1548 pBiDi
->isolateCount
=isolateCount
;
1550 else if(dirProp
==PDI
)
1555 // optimized version of int32_t currentParaLevel = GET_PARALEVEL(pBiDi, i);
1556 if (pBiDi
->defaultParaLevel
!= 0 &&
1557 i
== currentParaLimit
&& (currentParaIndex
+ 1) < pBiDi
->paraCount
) {
1558 currentParaLevel
= pBiDi
->paras
[++currentParaIndex
].level
;
1559 currentParaLimit
= pBiDi
->paras
[currentParaIndex
].limit
;
1562 UBiDiLevel overrideFlag
= level
& UBIDI_LEVEL_OVERRIDE
;
1563 level
&= ~UBIDI_LEVEL_OVERRIDE
;
1564 if (level
< currentParaLevel
|| UBIDI_MAX_EXPLICIT_LEVEL
< level
) {
1567 // Paragraph separators are ok with explicit level 0.
1568 // Prevents reordering of paragraphs.
1570 // Treat explicit level 0 as a wildcard for the paragraph level.
1571 // Avoid making the caller guess what the paragraph level would be.
1572 level
= (UBiDiLevel
)currentParaLevel
;
1573 levels
[i
] = level
| overrideFlag
;
1576 // 1 <= level < currentParaLevel or UBIDI_MAX_EXPLICIT_LEVEL < level
1577 /* level out of bounds */
1578 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
1582 if (overrideFlag
!= 0) {
1583 /* keep the override flag in levels[i] but adjust the flags */
1584 flags
|=DIRPROP_FLAG_O(level
);
1587 flags
|=DIRPROP_FLAG_E(level
)|DIRPROP_FLAG(dirProp
);
1590 if(flags
&MASK_EMBEDDING
)
1591 flags
|=DIRPROP_FLAG_LR(pBiDi
->paraLevel
);
1592 /* determine if the text is mixed-directional or single-directional */
1594 return directionFromFlags(pBiDi
);
1597 /******************************************************************
1598 The Properties state machine table
1599 *******************************************************************
1601 All table cells are 8 bits:
1602 bits 0..4: next state
1603 bits 5..7: action to perform (if > 0)
1605 Cells may be of format "n" where n represents the next state
1606 (except for the rightmost column).
1607 Cells may also be of format "s(x,y)" where x represents an action
1608 to perform and y represents the next state.
1610 *******************************************************************
1611 Definitions and type for properties state table
1612 *******************************************************************
1614 #define IMPTABPROPS_COLUMNS 16
1615 #define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1)
1616 #define GET_STATEPROPS(cell) ((cell)&0x1f)
1617 #define GET_ACTIONPROPS(cell) ((cell)>>5)
1618 #define s(action, newState) ((uint8_t)(newState+(action<<5)))
1620 static const uint8_t groupProp
[] = /* dirProp regrouped */
1622 /* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN FSI LRI RLI PDI ENL ENR */
1623 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10, 4, 4, 4, 4, 13, 14
1625 enum { DirProp_L
=0, DirProp_R
=1, DirProp_EN
=2, DirProp_AN
=3, DirProp_ON
=4, DirProp_S
=5, DirProp_B
=6 }; /* reduced dirProp */
1627 /******************************************************************
1629 PROPERTIES STATE TABLE
1631 In table impTabProps,
1632 - the ON column regroups ON and WS, FSI, RLI, LRI and PDI
1633 - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF
1634 - the Res column is the reduced property assigned to a run
1636 Action 1: process current run1, init new run1
1638 3: process run1, process run2, init new run1
1639 4: process run1, set run1=run2, init new run2
1642 1) This table is used in resolveImplicitLevels().
1643 2) This table triggers actions when there is a change in the Bidi
1644 property of incoming characters (action 1).
1645 3) Most such property sequences are processed immediately (in
1646 fact, passed to processPropertySeq().
1647 4) However, numbers are assembled as one sequence. This means
1648 that undefined situations (like CS following digits, until
1649 it is known if the next char will be a digit) are held until
1650 following chars define them.
1651 Example: digits followed by CS, then comes another CS or ON;
1652 the digits will be processed, then the CS assigned
1653 as the start of an ON sequence (action 3).
1654 5) There are cases where more than one sequence must be
1655 processed, for instance digits followed by CS followed by L:
1656 the digits must be processed as one sequence, and the CS
1657 must be processed as an ON sequence, all this before starting
1658 assembling chars for the opening L sequence.
1662 static const uint8_t impTabProps
[][IMPTABPROPS_COLUMNS
] =
1664 /* L , R , EN , AN , ON , S , B , ES , ET , CS , BN , NSM , AL , ENL , ENR , Res */
1665 /* 0 Init */ { 1 , 2 , 4 , 5 , 7 , 15 , 17 , 7 , 9 , 7 , 0 , 7 , 3 , 18 , 21 , DirProp_ON
},
1666 /* 1 L */ { 1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 1 , 1 , s(1,3),s(1,18),s(1,21), DirProp_L
},
1667 /* 2 R */ { s(1,1), 2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 2 , 2 , s(1,3),s(1,18),s(1,21), DirProp_R
},
1668 /* 3 AL */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8), 3 , 3 , 3 ,s(1,18),s(1,21), DirProp_R
},
1669 /* 4 EN */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10), 11 ,s(2,10), 4 , 4 , s(1,3), 18 , 21 , DirProp_EN
},
1670 /* 5 AN */ { s(1,1), s(1,2), s(1,4), 5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12), 5 , 5 , s(1,3),s(1,18),s(1,21), DirProp_AN
},
1671 /* 6 AL:EN/AN */ { s(1,1), s(1,2), 6 , 6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13), 6 , 6 , s(1,3), 18 , 21 , DirProp_AN
},
1672 /* 7 ON */ { s(1,1), s(1,2), s(1,4), s(1,5), 7 ,s(1,15),s(1,17), 7 ,s(2,14), 7 , 7 , 7 , s(1,3),s(1,18),s(1,21), DirProp_ON
},
1673 /* 8 AL:ON */ { s(1,1), s(1,2), s(1,6), s(1,6), 8 ,s(1,16),s(1,17), 8 , 8 , 8 , 8 , 8 , s(1,3),s(1,18),s(1,21), DirProp_ON
},
1674 /* 9 ET */ { s(1,1), s(1,2), 4 , s(1,5), 7 ,s(1,15),s(1,17), 7 , 9 , 7 , 9 , 9 , s(1,3), 18 , 21 , DirProp_ON
},
1675 /*10 EN+ES/CS */ { s(3,1), s(3,2), 4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 10 , s(4,7), s(3,3), 18 , 21 , DirProp_EN
},
1676 /*11 EN+ET */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 11 , s(1,7), 11 , 11 , s(1,3), 18 , 21 , DirProp_EN
},
1677 /*12 AN+CS */ { s(3,1), s(3,2), s(3,4), 5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 12 , s(4,7), s(3,3),s(3,18),s(3,21), DirProp_AN
},
1678 /*13 AL:EN/AN+CS */ { s(3,1), s(3,2), 6 , 6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8), 13 , s(4,8), s(3,3), 18 , 21 , DirProp_AN
},
1679 /*14 ON+ET */ { s(1,1), s(1,2), s(4,4), s(1,5), 7 ,s(1,15),s(1,17), 7 , 14 , 7 , 14 , 14 , s(1,3),s(4,18),s(4,21), DirProp_ON
},
1680 /*15 S */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7), 15 ,s(1,17), s(1,7), s(1,9), s(1,7), 15 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_S
},
1681 /*16 AL:S */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8), 16 ,s(1,17), s(1,8), s(1,8), s(1,8), 16 , s(1,8), s(1,3),s(1,18),s(1,21), DirProp_S
},
1682 /*17 B */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15), 17 , s(1,7), s(1,9), s(1,7), 17 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_B
},
1683 /*18 ENL */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,19), 20 ,s(2,19), 18 , 18 , s(1,3), 18 , 21 , DirProp_L
},
1684 /*19 ENL+ES/CS */ { s(3,1), s(3,2), 18 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 19 , s(4,7), s(3,3), 18 , 21 , DirProp_L
},
1685 /*20 ENL+ET */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 20 , s(1,7), 20 , 20 , s(1,3), 18 , 21 , DirProp_L
},
1686 /*21 ENR */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,22), 23 ,s(2,22), 21 , 21 , s(1,3), 18 , 21 , DirProp_AN
},
1687 /*22 ENR+ES/CS */ { s(3,1), s(3,2), 21 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 22 , s(4,7), s(3,3), 18 , 21 , DirProp_AN
},
1688 /*23 ENR+ET */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 23 , s(1,7), 23 , 23 , s(1,3), 18 , 21 , DirProp_AN
}
1691 /* we must undef macro s because the levels tables have a different
1692 * structure (4 bits for action and 4 bits for next state.
1696 /******************************************************************
1697 The levels state machine tables
1698 *******************************************************************
1700 All table cells are 8 bits:
1701 bits 0..3: next state
1702 bits 4..7: action to perform (if > 0)
1704 Cells may be of format "n" where n represents the next state
1705 (except for the rightmost column).
1706 Cells may also be of format "s(x,y)" where x represents an action
1707 to perform and y represents the next state.
1709 This format limits each table to 16 states each and to 15 actions.
1711 *******************************************************************
1712 Definitions and type for levels state tables
1713 *******************************************************************
1715 #define IMPTABLEVELS_COLUMNS (DirProp_B + 2)
1716 #define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1)
1717 #define GET_STATE(cell) ((cell)&0x0f)
1718 #define GET_ACTION(cell) ((cell)>>4)
1719 #define s(action, newState) ((uint8_t)(newState+(action<<4)))
1721 typedef uint8_t ImpTab
[][IMPTABLEVELS_COLUMNS
];
1722 typedef uint8_t ImpAct
[];
1724 /* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct,
1725 * instead of having a pair of ImpTab and a pair of ImpAct.
1727 typedef struct ImpTabPair
{
1728 const void * pImpTab
[2];
1729 const void * pImpAct
[2];
1732 /******************************************************************
1736 In all levels state tables,
1737 - state 0 is the initial state
1738 - the Res column is the increment to add to the text level
1739 for this property sequence.
1741 The impAct arrays for each table of a pair map the local action
1742 numbers of the table to the total list of actions. For instance,
1743 action 2 in a given table corresponds to the action number which
1744 appears in entry [2] of the impAct array for that table.
1745 The first entry of all impAct arrays must be 0.
1747 Action 1: init conditional sequence
1748 2: prepend conditional sequence to current sequence
1749 3: set ON sequence to new level - 1
1750 4: init EN/AN/ON sequence
1751 5: fix EN/AN/ON sequence followed by R
1752 6: set previous level sequence to level 2
1755 1) These tables are used in processPropertySeq(). The input
1756 is property sequences as determined by resolveImplicitLevels.
1757 2) Most such property sequences are processed immediately
1758 (levels are assigned).
1759 3) However, some sequences cannot be assigned a final level till
1760 one or more following sequences are received. For instance,
1761 ON following an R sequence within an even-level paragraph.
1762 If the following sequence is R, the ON sequence will be
1763 assigned basic run level+1, and so will the R sequence.
1764 4) S is generally handled like ON, since its level will be fixed
1765 to paragraph level in adjustWSLevels().
1769 static const ImpTab impTabL_DEFAULT
= /* Even paragraph level */
1770 /* In this table, conditional sequences receive the lower possible level
1771 until proven otherwise.
1774 /* L , R , EN , AN , ON , S , B , Res */
1775 /* 0 : init */ { 0 , 1 , 0 , 2 , 0 , 0 , 0 , 0 },
1776 /* 1 : R */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 1 },
1777 /* 2 : AN */ { 0 , 1 , 0 , 2 , s(1,5), s(1,5), 0 , 2 },
1778 /* 3 : R+EN/AN */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 2 },
1779 /* 4 : R+ON */ { 0 , s(2,1), s(3,3), s(3,3), 4 , 4 , 0 , 0 },
1780 /* 5 : AN+ON */ { 0 , s(2,1), 0 , s(3,2), 5 , 5 , 0 , 0 }
1782 static const ImpTab impTabR_DEFAULT
= /* Odd paragraph level */
1783 /* In this table, conditional sequences receive the lower possible level
1784 until proven otherwise.
1787 /* L , R , EN , AN , ON , S , B , Res */
1788 /* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 },
1789 /* 1 : L */ { 1 , 0 , 1 , 3 , s(1,4), s(1,4), 0 , 1 },
1790 /* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 },
1791 /* 3 : L+AN */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 1 },
1792 /* 4 : L+ON */ { s(2,1), 0 , s(2,1), 3 , 4 , 4 , 0 , 0 },
1793 /* 5 : L+AN+ON */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 0 }
1795 static const ImpAct impAct0
= {0,1,2,3,4};
1796 static const ImpTabPair impTab_DEFAULT
= {{&impTabL_DEFAULT
,
1798 {&impAct0
, &impAct0
}};
1800 static const ImpTab impTabL_NUMBERS_SPECIAL
= /* Even paragraph level */
1801 /* In this table, conditional sequences receive the lower possible level
1802 until proven otherwise.
1805 /* L , R , EN , AN , ON , S , B , Res */
1806 /* 0 : init */ { 0 , 2 , s(1,1), s(1,1), 0 , 0 , 0 , 0 },
1807 /* 1 : L+EN/AN */ { 0 , s(4,2), 1 , 1 , 0 , 0 , 0 , 0 },
1808 /* 2 : R */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 1 },
1809 /* 3 : R+ON */ { 0 , s(2,2), s(3,4), s(3,4), 3 , 3 , 0 , 0 },
1810 /* 4 : R+EN/AN */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 2 }
1812 static const ImpTabPair impTab_NUMBERS_SPECIAL
= {{&impTabL_NUMBERS_SPECIAL
,
1814 {&impAct0
, &impAct0
}};
1816 static const ImpTab impTabL_GROUP_NUMBERS_WITH_R
=
1817 /* In this table, EN/AN+ON sequences receive levels as if associated with R
1818 until proven that there is L or sor/eor on both sides. AN is handled like EN.
1821 /* L , R , EN , AN , ON , S , B , Res */
1822 /* 0 init */ { 0 , 3 , s(1,1), s(1,1), 0 , 0 , 0 , 0 },
1823 /* 1 EN/AN */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 2 },
1824 /* 2 EN/AN+ON */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 1 },
1825 /* 3 R */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 1 },
1826 /* 4 R+ON */ { s(2,0), 3 , 5 , 5 , 4 , s(2,0), s(2,0), 1 },
1827 /* 5 R+EN/AN */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 2 }
1829 static const ImpTab impTabR_GROUP_NUMBERS_WITH_R
=
1830 /* In this table, EN/AN+ON sequences receive levels as if associated with R
1831 until proven that there is L on both sides. AN is handled like EN.
1834 /* L , R , EN , AN , ON , S , B , Res */
1835 /* 0 init */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1836 /* 1 EN/AN */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 1 },
1837 /* 2 L */ { 2 , 0 , s(1,4), s(1,4), s(1,3), 0 , 0 , 1 },
1838 /* 3 L+ON */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 0 },
1839 /* 4 L+EN/AN */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 1 }
1841 static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R
= {
1842 {&impTabL_GROUP_NUMBERS_WITH_R
,
1843 &impTabR_GROUP_NUMBERS_WITH_R
},
1844 {&impAct0
, &impAct0
}};
1847 static const ImpTab impTabL_INVERSE_NUMBERS_AS_L
=
1848 /* This table is identical to the Default LTR table except that EN and AN are
1852 /* L , R , EN , AN , ON , S , B , Res */
1853 /* 0 : init */ { 0 , 1 , 0 , 0 , 0 , 0 , 0 , 0 },
1854 /* 1 : R */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 1 },
1855 /* 2 : AN */ { 0 , 1 , 0 , 0 , s(1,5), s(1,5), 0 , 2 },
1856 /* 3 : R+EN/AN */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 2 },
1857 /* 4 : R+ON */ { s(2,0), 1 , s(2,0), s(2,0), 4 , 4 , s(2,0), 1 },
1858 /* 5 : AN+ON */ { s(2,0), 1 , s(2,0), s(2,0), 5 , 5 , s(2,0), 1 }
1860 static const ImpTab impTabR_INVERSE_NUMBERS_AS_L
=
1861 /* This table is identical to the Default RTL table except that EN and AN are
1865 /* L , R , EN , AN , ON , S , B , Res */
1866 /* 0 : init */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1867 /* 1 : L */ { 1 , 0 , 1 , 1 , s(1,4), s(1,4), 0 , 1 },
1868 /* 2 : EN/AN */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 1 },
1869 /* 3 : L+AN */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 1 },
1870 /* 4 : L+ON */ { s(2,1), 0 , s(2,1), s(2,1), 4 , 4 , 0 , 0 },
1871 /* 5 : L+AN+ON */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 0 }
1873 static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L
= {
1874 {&impTabL_INVERSE_NUMBERS_AS_L
,
1875 &impTabR_INVERSE_NUMBERS_AS_L
},
1876 {&impAct0
, &impAct0
}};
1878 static const ImpTab impTabR_INVERSE_LIKE_DIRECT
= /* Odd paragraph level */
1879 /* In this table, conditional sequences receive the lower possible level
1880 until proven otherwise.
1883 /* L , R , EN , AN , ON , S , B , Res */
1884 /* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 },
1885 /* 1 : L */ { 1 , 0 , 1 , 2 , s(1,3), s(1,3), 0 , 1 },
1886 /* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 },
1887 /* 3 : L+ON */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 0 },
1888 /* 4 : L+ON+AN */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 3 },
1889 /* 5 : L+AN+ON */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 2 },
1890 /* 6 : L+ON+EN */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 1 }
1892 static const ImpAct impAct1
= {0,1,13,14};
1893 /* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc"
1895 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT
= {
1897 &impTabR_INVERSE_LIKE_DIRECT
},
1898 {&impAct0
, &impAct1
}};
1900 static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS
=
1901 /* The case handled in this table is (visually): R EN L
1904 /* L , R , EN , AN , ON , S , B , Res */
1905 /* 0 : init */ { 0 , s(6,3), 0 , 1 , 0 , 0 , 0 , 0 },
1906 /* 1 : L+AN */ { 0 , s(6,3), 0 , 1 , s(1,2), s(3,0), 0 , 4 },
1907 /* 2 : L+AN+ON */ { s(2,0), s(6,3), s(2,0), 1 , 2 , s(3,0), s(2,0), 3 },
1908 /* 3 : R */ { 0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0), 0 , 3 },
1909 /* 4 : R+ON */ { s(3,0), s(4,3), s(5,5), s(5,6), 4 , s(3,0), s(3,0), 3 },
1910 /* 5 : R+EN */ { s(3,0), s(4,3), 5 , s(5,6), s(1,4), s(3,0), s(3,0), 4 },
1911 /* 6 : R+AN */ { s(3,0), s(4,3), s(5,5), 6 , s(1,4), s(3,0), s(3,0), 4 }
1913 static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS
=
1914 /* The cases handled in this table are (visually): R EN L
1918 /* L , R , EN , AN , ON , S , B , Res */
1919 /* 0 : init */ { s(1,3), 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1920 /* 1 : R+EN/AN */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 1 },
1921 /* 2 : R+EN/AN+ON */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 0 },
1922 /* 3 : L */ { 3 , 0 , 3 , s(3,6), s(1,4), s(4,0), 0 , 1 },
1923 /* 4 : L+ON */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 0 },
1924 /* 5 : L+ON+EN */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 1 },
1925 /* 6 : L+AN */ { s(5,3), s(4,0), 6 , 6 , 4 , s(4,0), s(4,0), 3 }
1927 static const ImpAct impAct2
= {0,1,2,5,6,7,8};
1928 static const ImpAct impAct3
= {0,1,9,10,11,12};
1929 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS
= {
1930 {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS
,
1931 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS
},
1932 {&impAct2
, &impAct3
}};
1934 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL
= {
1935 {&impTabL_NUMBERS_SPECIAL
,
1936 &impTabR_INVERSE_LIKE_DIRECT
},
1937 {&impAct0
, &impAct1
}};
1939 static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS
=
1940 /* The case handled in this table is (visually): R EN L
1943 /* L , R , EN , AN , ON , S , B , Res */
1944 /* 0 : init */ { 0 , s(6,2), 1 , 1 , 0 , 0 , 0 , 0 },
1945 /* 1 : L+EN/AN */ { 0 , s(6,2), 1 , 1 , 0 , s(3,0), 0 , 4 },
1946 /* 2 : R */ { 0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0), 0 , 3 },
1947 /* 3 : R+ON */ { s(3,0), s(4,2), s(5,4), s(5,4), 3 , s(3,0), s(3,0), 3 },
1948 /* 4 : R+EN/AN */ { s(3,0), s(4,2), 4 , 4 , s(1,3), s(3,0), s(3,0), 4 }
1950 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS
= {
1951 {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS
,
1952 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS
},
1953 {&impAct2
, &impAct3
}};
1958 const ImpTab
* pImpTab
; /* level table pointer */
1959 const ImpAct
* pImpAct
; /* action map array */
1960 int32_t startON
; /* start of ON sequence */
1961 int32_t startL2EN
; /* start of level 2 sequence */
1962 int32_t lastStrongRTL
; /* index of last found R or AL */
1963 int32_t state
; /* current state */
1964 int32_t runStart
; /* start position of the run */
1965 UBiDiLevel runLevel
; /* run level before implicit solving */
1968 /*------------------------------------------------------------------------*/
1971 addPoint(UBiDi
*pBiDi
, int32_t pos
, int32_t flag
)
1972 /* param pos: position where to insert
1973 param flag: one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER
1976 #define FIRSTALLOC 10
1978 InsertPoints
* pInsertPoints
=&(pBiDi
->insertPoints
);
1980 if (pInsertPoints
->capacity
== 0)
1982 pInsertPoints
->points
=static_cast<Point
*>(uprv_malloc(sizeof(Point
)*FIRSTALLOC
));
1983 if (pInsertPoints
->points
== NULL
)
1985 pInsertPoints
->errorCode
=U_MEMORY_ALLOCATION_ERROR
;
1988 pInsertPoints
->capacity
=FIRSTALLOC
;
1990 if (pInsertPoints
->size
>= pInsertPoints
->capacity
) /* no room for new point */
1992 Point
* savePoints
=pInsertPoints
->points
;
1993 pInsertPoints
->points
=static_cast<Point
*>(uprv_realloc(pInsertPoints
->points
,
1994 pInsertPoints
->capacity
*2*sizeof(Point
)));
1995 if (pInsertPoints
->points
== NULL
)
1997 pInsertPoints
->points
=savePoints
;
1998 pInsertPoints
->errorCode
=U_MEMORY_ALLOCATION_ERROR
;
2001 else pInsertPoints
->capacity
*=2;
2005 pInsertPoints
->points
[pInsertPoints
->size
]=point
;
2006 pInsertPoints
->size
++;
2011 setLevelsOutsideIsolates(UBiDi
*pBiDi
, int32_t start
, int32_t limit
, UBiDiLevel level
)
2013 DirProp
*dirProps
=pBiDi
->dirProps
, dirProp
;
2014 uint16_t *dirInsert
= pBiDi
->dirInsert
; /* may be NULL */
2015 UBiDiLevel
*levels
=pBiDi
->levels
;
2016 int32_t dirInsertValue
;
2017 int8_t dirInsertIndex
; /* position within dirInsertValue, if any */
2018 int32_t isolateCount
=0, k
;
2020 dirInsertIndex
= -1; /* indicate that we have not checked dirInsert yet */
2021 for(k
=start
; k
<limit
; k
++) {
2022 if (dirInsert
!= NULL
&& dirInsertIndex
< 0) {
2023 dirInsertValue
= dirInsert
[k
];
2025 if (dirInsertValue
> 0) {
2027 dirProp
= (DirProp
)stdDirFromInsertDir
[dirInsertValue
& 0x000F];
2028 dirInsertValue
>>= 4;
2030 dirInsertIndex
= -1;
2031 dirProp
=dirProps
[k
];
2037 if(dirProp
==LRI
|| dirProp
==RLI
)
2042 /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
2045 * This implementation of the (Wn) rules applies all rules in one pass.
2046 * In order to do so, it needs a look-ahead of typically 1 character
2047 * (except for W5: sequences of ET) and keeps track of changes
2048 * in a rule Wp that affect a later Wq (p<q).
2050 * The (Nn) and (In) rules are also performed in that same single loop,
2051 * but effectively one iteration behind for white space.
2053 * Since all implicit rules are performed in one step, it is not necessary
2054 * to actually store the intermediate directional properties in dirProps[].
2058 processPropertySeq(UBiDi
*pBiDi
, LevState
*pLevState
, uint8_t _prop
,
2059 int32_t start
, int32_t limit
) {
2060 uint8_t cell
, oldStateSeq
, actionSeq
;
2061 const ImpTab
* pImpTab
=pLevState
->pImpTab
;
2062 const ImpAct
* pImpAct
=pLevState
->pImpAct
;
2063 UBiDiLevel
* levels
=pBiDi
->levels
;
2064 UBiDiLevel level
, addLevel
;
2065 InsertPoints
* pInsertPoints
;
2068 start0
=start
; /* save original start position */
2069 oldStateSeq
=(uint8_t)pLevState
->state
;
2070 cell
=(*pImpTab
)[oldStateSeq
][_prop
];
2071 pLevState
->state
=GET_STATE(cell
); /* isolate the new state */
2072 actionSeq
=(*pImpAct
)[GET_ACTION(cell
)]; /* isolate the action */
2073 addLevel
=(*pImpTab
)[pLevState
->state
][IMPTABLEVELS_RES
];
2077 case 1: /* init ON seq */
2078 pLevState
->startON
=start0
;
2081 case 2: /* prepend ON seq to current seq */
2082 start
=pLevState
->startON
;
2085 case 3: /* EN/AN after R+ON */
2086 level
=pLevState
->runLevel
+1;
2087 setLevelsOutsideIsolates(pBiDi
, pLevState
->startON
, start0
, level
);
2090 case 4: /* EN/AN before R for NUMBERS_SPECIAL */
2091 level
=pLevState
->runLevel
+2;
2092 setLevelsOutsideIsolates(pBiDi
, pLevState
->startON
, start0
, level
);
2095 case 5: /* L or S after possible relevant EN/AN */
2096 /* check if we had EN after R/AL */
2097 if (pLevState
->startL2EN
>= 0) {
2098 addPoint(pBiDi
, pLevState
->startL2EN
, LRM_BEFORE
);
2100 pLevState
->startL2EN
=-1; /* not within previous if since could also be -2 */
2101 /* check if we had any relevant EN/AN after R/AL */
2102 pInsertPoints
=&(pBiDi
->insertPoints
);
2103 if ((pInsertPoints
->capacity
== 0) ||
2104 (pInsertPoints
->size
<= pInsertPoints
->confirmed
))
2106 /* nothing, just clean up */
2107 pLevState
->lastStrongRTL
=-1;
2108 /* check if we have a pending conditional segment */
2109 level
=(*pImpTab
)[oldStateSeq
][IMPTABLEVELS_RES
];
2110 if ((level
& 1) && (pLevState
->startON
> 0)) { /* after ON */
2111 start
=pLevState
->startON
; /* reset to basic run level */
2113 if (_prop
== DirProp_S
) /* add LRM before S */
2115 addPoint(pBiDi
, start0
, LRM_BEFORE
);
2116 pInsertPoints
->confirmed
=pInsertPoints
->size
;
2120 /* reset previous RTL cont to level for LTR text */
2121 for (k
=pLevState
->lastStrongRTL
+1; k
<start0
; k
++)
2123 /* reset odd level, leave runLevel+2 as is */
2124 levels
[k
]=(levels
[k
] - 2) & ~1;
2126 /* mark insert points as confirmed */
2127 pInsertPoints
->confirmed
=pInsertPoints
->size
;
2128 pLevState
->lastStrongRTL
=-1;
2129 if (_prop
== DirProp_S
) /* add LRM before S */
2131 addPoint(pBiDi
, start0
, LRM_BEFORE
);
2132 pInsertPoints
->confirmed
=pInsertPoints
->size
;
2136 case 6: /* R/AL after possible relevant EN/AN */
2138 pInsertPoints
=&(pBiDi
->insertPoints
);
2139 if (pInsertPoints
->capacity
> 0)
2140 /* remove all non confirmed insert points */
2141 pInsertPoints
->size
=pInsertPoints
->confirmed
;
2142 pLevState
->startON
=-1;
2143 pLevState
->startL2EN
=-1;
2144 pLevState
->lastStrongRTL
=limit
- 1;
2147 case 7: /* EN/AN after R/AL + possible cont */
2148 /* check for real AN */
2149 if ((_prop
== DirProp_AN
) && (pBiDi
->dirProps
[start0
] == AN
) &&
2150 (pBiDi
->reorderingMode
!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
))
2153 if (pLevState
->startL2EN
== -1) /* if no relevant EN already found */
2155 /* just note the righmost digit as a strong RTL */
2156 pLevState
->lastStrongRTL
=limit
- 1;
2159 if (pLevState
->startL2EN
>= 0) /* after EN, no AN */
2161 addPoint(pBiDi
, pLevState
->startL2EN
, LRM_BEFORE
);
2162 pLevState
->startL2EN
=-2;
2165 addPoint(pBiDi
, start0
, LRM_BEFORE
);
2168 /* if first EN/AN after R/AL */
2169 if (pLevState
->startL2EN
== -1) {
2170 pLevState
->startL2EN
=start0
;
2174 case 8: /* note location of latest R/AL */
2175 pLevState
->lastStrongRTL
=limit
- 1;
2176 pLevState
->startON
=-1;
2179 case 9: /* L after R+ON/EN/AN */
2180 /* include possible adjacent number on the left */
2181 for (k
=start0
-1; k
>=0 && !(levels
[k
]&1); k
--);
2183 addPoint(pBiDi
, k
, RLM_BEFORE
); /* add RLM before */
2184 pInsertPoints
=&(pBiDi
->insertPoints
);
2185 pInsertPoints
->confirmed
=pInsertPoints
->size
; /* confirm it */
2187 pLevState
->startON
=start0
;
2190 case 10: /* AN after L */
2191 /* AN numbers between L text on both sides may be trouble. */
2192 /* tentatively bracket with LRMs; will be confirmed if followed by L */
2193 addPoint(pBiDi
, start0
, LRM_BEFORE
); /* add LRM before */
2194 addPoint(pBiDi
, start0
, LRM_AFTER
); /* add LRM after */
2197 case 11: /* R after L+ON/EN/AN */
2198 /* false alert, infirm LRMs around previous AN */
2199 pInsertPoints
=&(pBiDi
->insertPoints
);
2200 pInsertPoints
->size
=pInsertPoints
->confirmed
;
2201 if (_prop
== DirProp_S
) /* add RLM before S */
2203 addPoint(pBiDi
, start0
, RLM_BEFORE
);
2204 pInsertPoints
->confirmed
=pInsertPoints
->size
;
2208 case 12: /* L after L+ON/AN */
2209 level
=pLevState
->runLevel
+ addLevel
;
2210 for(k
=pLevState
->startON
; k
<start0
; k
++) {
2211 if (levels
[k
]<level
)
2214 pInsertPoints
=&(pBiDi
->insertPoints
);
2215 pInsertPoints
->confirmed
=pInsertPoints
->size
; /* confirm inserts */
2216 pLevState
->startON
=start0
;
2219 case 13: /* L after L+ON+EN/AN/ON */
2220 level
=pLevState
->runLevel
;
2221 for(k
=start0
-1; k
>=pLevState
->startON
; k
--) {
2222 if(levels
[k
]==level
+3) {
2223 while(levels
[k
]==level
+3) {
2226 while(levels
[k
]==level
) {
2230 if(levels
[k
]==level
+2) {
2238 case 14: /* R after L+ON+EN/AN/ON */
2239 level
=pLevState
->runLevel
+1;
2240 for(k
=start0
-1; k
>=pLevState
->startON
; k
--) {
2241 if(levels
[k
]>level
) {
2247 default: /* we should never get here */
2251 if((addLevel
) || (start
< start0
)) {
2252 level
=pLevState
->runLevel
+ addLevel
;
2253 if(start
>=pLevState
->runStart
) {
2254 for(k
=start
; k
<limit
; k
++) {
2258 setLevelsOutsideIsolates(pBiDi
, start
, limit
, level
);
2264 * Returns the directionality of the last strong character at the end of the prologue, if any.
2265 * Requires prologue!=null.
2268 lastL_R_AL(UBiDi
*pBiDi
) {
2269 const UChar
*text
=pBiDi
->prologue
;
2270 int32_t length
=pBiDi
->proLength
;
2274 for(i
=length
; i
>0; ) {
2275 /* i is decremented by U16_PREV */
2276 U16_PREV(text
, 0, i
, uchar
);
2277 dirProp
=(DirProp
)ubidi_getCustomizedClass(pBiDi
, uchar
);
2281 if(dirProp
==R
|| dirProp
==AL
) {
2292 * Returns the directionality of the first strong character, or digit, in the epilogue, if any.
2293 * Requires epilogue!=null.
2296 firstL_R_AL_EN_AN(UBiDi
*pBiDi
) {
2297 const UChar
*text
=pBiDi
->epilogue
;
2298 int32_t length
=pBiDi
->epiLength
;
2302 for(i
=0; i
<length
; ) {
2303 /* i is incremented by U16_NEXT */
2304 U16_NEXT(text
, i
, length
, uchar
);
2305 dirProp
=(DirProp
)ubidi_getCustomizedClass(pBiDi
, uchar
);
2309 if(dirProp
==R
|| dirProp
==AL
) {
2323 resolveImplicitLevels(UBiDi
*pBiDi
,
2324 int32_t start
, int32_t limit
,
2325 DirProp sor
, DirProp eor
) {
2326 const DirProp
*dirProps
=pBiDi
->dirProps
;
2327 uint16_t *dirInsert
= pBiDi
->dirInsert
; /* may be NULL */
2329 int32_t dirInsertValue
;
2331 int32_t i
, start1
, start2
;
2332 uint16_t oldStateImp
, stateImp
, actionImp
;
2333 uint8_t gprop
, resProp
, cell
;
2335 DirProp nextStrongProp
=R
;
2336 int32_t nextStrongPos
=-1;
2338 /* check for RTL inverse BiDi mode */
2339 /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to
2340 * loop on the text characters from end to start.
2341 * This would need a different properties state table (at least different
2342 * actions) and different levels state tables (maybe very similar to the
2343 * LTR corresponding ones.
2346 ((start
<pBiDi
->lastArabicPos
) && (GET_PARALEVEL(pBiDi
, start
) & 1) &&
2347 (pBiDi
->reorderingMode
==UBIDI_REORDER_INVERSE_LIKE_DIRECT
||
2348 pBiDi
->reorderingMode
==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
));
2350 /* initialize for property and levels state tables */
2351 levState
.startL2EN
=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
2352 levState
.lastStrongRTL
=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
2353 levState
.runStart
=start
;
2354 levState
.runLevel
=pBiDi
->levels
[start
];
2355 levState
.pImpTab
=(const ImpTab
*)((pBiDi
->pImpTabPair
)->pImpTab
)[levState
.runLevel
&1];
2356 levState
.pImpAct
=(const ImpAct
*)((pBiDi
->pImpTabPair
)->pImpAct
)[levState
.runLevel
&1];
2357 if(start
==0 && pBiDi
->proLength
>0) {
2358 DirProp lastStrong
=lastL_R_AL(pBiDi
);
2359 if(lastStrong
!=DirProp_ON
) {
2363 /* The isolates[] entries contain enough information to
2364 resume the bidi algorithm in the same state as it was
2365 when it was interrupted by an isolate sequence. */
2367 if (dirInsert
!= NULL
) {
2368 dirInsertValue
= dirInsert
[start
];
2369 while (dirInsertValue
> 0) {
2370 if ((dirInsertValue
& 0x000F) == Insert_PDI
) {
2373 dirInsertValue
>>= 4;
2376 if((dirProps
[start
]==PDI
|| dirInsertValue
>0) && pBiDi
->isolateCount
>= 0) {
2377 levState
.startON
=pBiDi
->isolates
[pBiDi
->isolateCount
].startON
;
2378 start1
=pBiDi
->isolates
[pBiDi
->isolateCount
].start1
;
2379 stateImp
=pBiDi
->isolates
[pBiDi
->isolateCount
].stateImp
;
2380 levState
.state
=pBiDi
->isolates
[pBiDi
->isolateCount
].state
;
2381 pBiDi
->isolateCount
--;
2383 levState
.startON
=-1;
2385 if(dirProps
[start
]==NSM
)
2390 processPropertySeq(pBiDi
, &levState
, sor
, start
, start
);
2392 start2
=start
; /* to make Java compiler happy */
2394 for(i
=start
; i
<=limit
; i
++) {
2398 for(k
=limit
-1; k
>start
&& dirInsertValue
<= 0; k
--) {
2399 dirProp
= dirProps
[k
];
2400 if ((DIRPROP_FLAG(dirProp
)&MASK_BN_EXPLICIT
) == 0) {
2404 if (dirInsert
!= NULL
) {
2405 dirInsertValue
= dirInsert
[k
];
2406 while (dirInsertValue
> 0) {
2407 dirProp
= (DirProp
)stdDirFromInsertDir
[dirInsertValue
& 0x000F];
2408 if ((DIRPROP_FLAG(dirProp
)&MASK_BN_EXPLICIT
) == 0) {
2411 dirInsertValue
>>= 4;
2416 dirProp
= dirProps
[k
];
2418 if(dirProp
==LRI
|| dirProp
==RLI
)
2419 break; /* no forced closing for sequence ending with LRI/RLI */
2422 DirProp prop
, prop1
;
2425 pBiDi
->isolateCount
=-1; /* current isolates stack entry == none */
2429 /* AL before EN does not make it AN */
2431 } else if(prop
==EN
) {
2432 if(nextStrongPos
<=i
) {
2433 /* look for next strong char (L/R/AL) */
2435 nextStrongProp
=R
; /* set default */
2436 nextStrongPos
=limit
;
2437 for(j
=i
+1; j
<limit
; j
++) {
2439 if(prop1
==L
|| prop1
==R
|| prop1
==AL
) {
2440 nextStrongProp
=prop1
;
2446 if(nextStrongProp
==AL
) {
2451 gprop
=groupProp
[prop
];
2453 oldStateImp
=stateImp
;
2454 cell
=impTabProps
[oldStateImp
][gprop
];
2455 stateImp
=GET_STATEPROPS(cell
); /* isolate the new state */
2456 actionImp
=GET_ACTIONPROPS(cell
); /* isolate the action */
2457 if((i
==limit
) && (actionImp
==0)) {
2458 /* there is an unprocessed sequence if its property == eor */
2459 actionImp
=1; /* process the last sequence */
2462 resProp
=impTabProps
[oldStateImp
][IMPTABPROPS_RES
];
2464 case 1: /* process current seq1, init new seq1 */
2465 processPropertySeq(pBiDi
, &levState
, resProp
, start1
, i
);
2468 case 2: /* init new seq2 */
2471 case 3: /* process seq1, process seq2, init new seq1 */
2472 processPropertySeq(pBiDi
, &levState
, resProp
, start1
, start2
);
2473 processPropertySeq(pBiDi
, &levState
, DirProp_ON
, start2
, i
);
2476 case 4: /* process seq1, set seq1=seq2, init new seq2 */
2477 processPropertySeq(pBiDi
, &levState
, resProp
, start1
, start2
);
2481 default: /* we should never get here */
2487 /* flush possible pending sequence, e.g. ON */
2488 if(limit
==pBiDi
->length
&& pBiDi
->epiLength
>0) {
2489 DirProp firstStrong
=firstL_R_AL_EN_AN(pBiDi
);
2490 if(firstStrong
!=DirProp_ON
) {
2495 /* look for the last char not a BN or LRE/RLE/LRO/RLO/PDF */
2497 for(i
=limit
-1; i
>start
&& dirInsertValue
<= 0; i
--) {
2498 dirProp
=dirProps
[i
];
2499 if ((DIRPROP_FLAG(dirProp
)&MASK_BN_EXPLICIT
) == 0) {
2503 if (dirInsert
!= NULL
) {
2504 dirInsertValue
= dirInsert
[i
];
2505 while (dirInsertValue
> 0) {
2506 dirProp
= (DirProp
)stdDirFromInsertDir
[dirInsertValue
& 0x000F];
2507 if ((DIRPROP_FLAG(dirProp
)&MASK_BN_EXPLICIT
) == 0) {
2510 dirInsertValue
>>= 4;
2515 dirProp
=dirProps
[i
];
2517 if((dirProp
==LRI
|| dirProp
==RLI
) && limit
<pBiDi
->length
) {
2518 pBiDi
->isolateCount
++;
2519 pBiDi
->isolates
[pBiDi
->isolateCount
].stateImp
=stateImp
;
2520 pBiDi
->isolates
[pBiDi
->isolateCount
].state
=levState
.state
;
2521 pBiDi
->isolates
[pBiDi
->isolateCount
].start1
=start1
;
2522 pBiDi
->isolates
[pBiDi
->isolateCount
].startON
=levState
.startON
;
2525 processPropertySeq(pBiDi
, &levState
, eor
, limit
, limit
);
2528 /* perform (L1) and (X9) ---------------------------------------------------- */
2531 * Reset the embedding levels for some non-graphic characters (L1).
2532 * This function also sets appropriate levels for BN, and
2533 * explicit embedding types that are supposed to have been removed
2534 * from the paragraph in (X9).
2537 adjustWSLevels(UBiDi
*pBiDi
) {
2538 const DirProp
*dirProps
=pBiDi
->dirProps
;
2539 UBiDiLevel
*levels
=pBiDi
->levels
;
2542 if(pBiDi
->flags
&MASK_WS
) {
2543 UBool orderParagraphsLTR
=pBiDi
->orderParagraphsLTR
;
2546 i
=pBiDi
->trailingWSStart
;
2548 /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
2549 while(i
>0 && (flag
=DIRPROP_FLAG(dirProps
[--i
]))&MASK_WS
) {
2550 if(orderParagraphsLTR
&&(flag
&DIRPROP_FLAG(B
))) {
2553 levels
[i
]=GET_PARALEVEL(pBiDi
, i
);
2557 /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
2558 /* here, i+1 is guaranteed to be <length */
2560 flag
=DIRPROP_FLAG(dirProps
[--i
]);
2561 if(flag
&MASK_BN_EXPLICIT
) {
2562 levels
[i
]=levels
[i
+1];
2563 } else if(orderParagraphsLTR
&&(flag
&DIRPROP_FLAG(B
))) {
2566 } else if(flag
&MASK_B_S
) {
2567 levels
[i
]=GET_PARALEVEL(pBiDi
, i
);
2575 U_CAPI
void U_EXPORT2
2576 ubidi_setContext(UBiDi
*pBiDi
,
2577 const UChar
*prologue
, int32_t proLength
,
2578 const UChar
*epilogue
, int32_t epiLength
,
2579 UErrorCode
*pErrorCode
) {
2580 /* check the argument values */
2581 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
);
2582 if(pBiDi
==NULL
|| proLength
<-1 || epiLength
<-1 ||
2583 (prologue
==NULL
&& proLength
!=0) || (epilogue
==NULL
&& epiLength
!=0)) {
2584 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2589 pBiDi
->proLength
=u_strlen(prologue
);
2591 pBiDi
->proLength
=proLength
;
2594 pBiDi
->epiLength
=u_strlen(epilogue
);
2596 pBiDi
->epiLength
=epiLength
;
2598 pBiDi
->prologue
=prologue
;
2599 pBiDi
->epilogue
=epilogue
;
2603 setParaSuccess(UBiDi
*pBiDi
) {
2604 pBiDi
->proLength
=0; /* forget the last context */
2606 pBiDi
->pParaBiDi
=pBiDi
; /* mark successful setPara */
2609 #define BIDI_MIN(x, y) ((x)<(y) ? (x) : (y))
2610 #define BIDI_ABS(x) ((x)>=0 ? (x) : (-(x)))
2613 setParaRunsOnly(UBiDi
*pBiDi
, const UChar
*text
, int32_t length
,
2614 UBiDiLevel paraLevel
, UErrorCode
*pErrorCode
) {
2615 int32_t *runsOnlyMemory
= NULL
;
2618 int32_t saveLength
, saveTrailingWSStart
;
2619 const UBiDiLevel
*levels
;
2620 UBiDiLevel
*saveLevels
;
2621 UBiDiDirection saveDirection
;
2622 UBool saveMayAllocateText
;
2624 int32_t visualLength
, i
, j
, visualStart
, logicalStart
,
2625 runCount
, runLength
, addedRuns
, insertRemove
,
2626 start
, limit
, step
, indexOddBit
, logicalPos
,
2628 uint32_t saveOptions
;
2630 pBiDi
->reorderingMode
=UBIDI_REORDER_DEFAULT
;
2632 ubidi_setPara(pBiDi
, text
, length
, paraLevel
, NULL
, pErrorCode
);
2635 /* obtain memory for mapping table and visual text */
2636 runsOnlyMemory
=static_cast<int32_t *>(uprv_malloc(length
*(sizeof(int32_t)+sizeof(UChar
)+sizeof(UBiDiLevel
))));
2637 if(runsOnlyMemory
==NULL
) {
2638 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
2641 visualMap
=runsOnlyMemory
;
2642 visualText
=(UChar
*)&visualMap
[length
];
2643 saveLevels
=(UBiDiLevel
*)&visualText
[length
];
2644 saveOptions
=pBiDi
->reorderingOptions
;
2645 if(saveOptions
& UBIDI_OPTION_INSERT_MARKS
) {
2646 pBiDi
->reorderingOptions
&=~UBIDI_OPTION_INSERT_MARKS
;
2647 pBiDi
->reorderingOptions
|=UBIDI_OPTION_REMOVE_CONTROLS
;
2649 paraLevel
&=1; /* accept only 0 or 1 */
2650 ubidi_setPara(pBiDi
, text
, length
, paraLevel
, NULL
, pErrorCode
);
2651 if(U_FAILURE(*pErrorCode
)) {
2654 /* we cannot access directly pBiDi->levels since it is not yet set if
2655 * direction is not MIXED
2657 levels
=ubidi_getLevels(pBiDi
, pErrorCode
);
2658 uprv_memcpy(saveLevels
, levels
, (size_t)pBiDi
->length
*sizeof(UBiDiLevel
));
2659 saveTrailingWSStart
=pBiDi
->trailingWSStart
;
2660 saveLength
=pBiDi
->length
;
2661 saveDirection
=pBiDi
->direction
;
2663 /* FOOD FOR THOUGHT: instead of writing the visual text, we could use
2664 * the visual map and the dirProps array to drive the second call
2665 * to ubidi_setPara (but must make provision for possible removal of
2666 * BiDi controls. Alternatively, only use the dirProps array via
2667 * customized classifier callback.
2669 visualLength
=ubidi_writeReordered(pBiDi
, visualText
, length
,
2670 UBIDI_DO_MIRRORING
, pErrorCode
);
2671 ubidi_getVisualMap(pBiDi
, visualMap
, pErrorCode
);
2672 if(U_FAILURE(*pErrorCode
)) {
2675 pBiDi
->reorderingOptions
=saveOptions
;
2677 pBiDi
->reorderingMode
=UBIDI_REORDER_INVERSE_LIKE_DIRECT
;
2679 /* Because what we did with reorderingOptions, visualText may be shorter
2680 * than the original text. But we don't want the levels memory to be
2681 * reallocated shorter than the original length, since we need to restore
2682 * the levels as after the first call to ubidi_setpara() before returning.
2683 * We will force mayAllocateText to FALSE before the second call to
2684 * ubidi_setpara(), and will restore it afterwards.
2686 saveMayAllocateText
=pBiDi
->mayAllocateText
;
2687 pBiDi
->mayAllocateText
=FALSE
;
2688 ubidi_setPara(pBiDi
, visualText
, visualLength
, paraLevel
, NULL
, pErrorCode
);
2689 pBiDi
->mayAllocateText
=saveMayAllocateText
;
2690 ubidi_getRuns(pBiDi
, pErrorCode
);
2691 if(U_FAILURE(*pErrorCode
)) {
2694 /* check if some runs must be split, count how many splits */
2696 runCount
=pBiDi
->runCount
;
2699 for(i
=0; i
<runCount
; i
++, visualStart
+=runLength
) {
2700 runLength
=runs
[i
].visualLimit
-visualStart
;
2704 logicalStart
=GET_INDEX(runs
[i
].logicalStart
);
2705 for(j
=logicalStart
+1; j
<logicalStart
+runLength
; j
++) {
2706 index0
=visualMap
[j
];
2707 index1
=visualMap
[j
-1];
2708 if((BIDI_ABS(index0
-index1
)!=1) || (saveLevels
[index0
]!=saveLevels
[index1
])) {
2714 if(getRunsMemory(pBiDi
, runCount
+addedRuns
)) {
2716 /* because we switch from UBiDi.simpleRuns to UBiDi.runs */
2717 pBiDi
->runsMemory
[0]=runs
[0];
2719 runs
=pBiDi
->runs
=pBiDi
->runsMemory
;
2720 pBiDi
->runCount
+=addedRuns
;
2725 /* split runs which are not consecutive in source text */
2726 for(i
=runCount
-1; i
>=0; i
--) {
2727 runLength
= i
==0 ? runs
[0].visualLimit
:
2728 runs
[i
].visualLimit
-runs
[i
-1].visualLimit
;
2729 logicalStart
=runs
[i
].logicalStart
;
2730 indexOddBit
=GET_ODD_BIT(logicalStart
);
2731 logicalStart
=GET_INDEX(logicalStart
);
2734 runs
[i
+addedRuns
]=runs
[i
];
2736 logicalPos
=visualMap
[logicalStart
];
2737 runs
[i
+addedRuns
].logicalStart
=MAKE_INDEX_ODD_PAIR(logicalPos
,
2738 saveLevels
[logicalPos
]^indexOddBit
);
2743 limit
=logicalStart
+runLength
-1;
2746 start
=logicalStart
+runLength
-1;
2750 for(j
=start
; j
!=limit
; j
+=step
) {
2751 index0
=visualMap
[j
];
2752 index1
=visualMap
[j
+step
];
2753 if((BIDI_ABS(index0
-index1
)!=1) || (saveLevels
[index0
]!=saveLevels
[index1
])) {
2754 logicalPos
=BIDI_MIN(visualMap
[start
], index0
);
2755 runs
[i
+addedRuns
].logicalStart
=MAKE_INDEX_ODD_PAIR(logicalPos
,
2756 saveLevels
[logicalPos
]^indexOddBit
);
2757 runs
[i
+addedRuns
].visualLimit
=runs
[i
].visualLimit
;
2758 runs
[i
].visualLimit
-=BIDI_ABS(j
-start
)+1;
2759 insertRemove
=runs
[i
].insertRemove
&(LRM_AFTER
|RLM_AFTER
);
2760 runs
[i
+addedRuns
].insertRemove
=insertRemove
;
2761 runs
[i
].insertRemove
&=~insertRemove
;
2767 runs
[i
+addedRuns
]=runs
[i
];
2769 logicalPos
=BIDI_MIN(visualMap
[start
], visualMap
[limit
]);
2770 runs
[i
+addedRuns
].logicalStart
=MAKE_INDEX_ODD_PAIR(logicalPos
,
2771 saveLevels
[logicalPos
]^indexOddBit
);
2775 /* restore initial paraLevel */
2776 pBiDi
->paraLevel
^=1;
2778 /* restore real text */
2780 pBiDi
->length
=saveLength
;
2781 pBiDi
->originalLength
=length
;
2782 pBiDi
->direction
=saveDirection
;
2783 /* the saved levels should never excess levelsSize, but we check anyway */
2784 if(saveLength
>pBiDi
->levelsSize
) {
2785 saveLength
=pBiDi
->levelsSize
;
2787 uprv_memcpy(pBiDi
->levels
, saveLevels
, (size_t)saveLength
*sizeof(UBiDiLevel
));
2788 pBiDi
->trailingWSStart
=saveTrailingWSStart
;
2789 if(pBiDi
->runCount
>1) {
2790 pBiDi
->direction
=UBIDI_MIXED
;
2793 /* free memory for mapping table and visual text */
2794 uprv_free(runsOnlyMemory
);
2796 pBiDi
->reorderingMode
=UBIDI_REORDER_RUNS_ONLY
;
2799 /* -------------------------------------------------------------------------- */
2800 /* internal proptotype */
2803 ubidi_setParaInternal(UBiDi
*pBiDi
,
2804 const UChar
*text
, int32_t length
,
2805 UBiDiLevel paraLevel
,
2806 UBiDiLevel
*embeddingLevels
,
2807 const int32_t *offsets
, int32_t offsetCount
,
2808 const int32_t *controlStringIndices
,
2809 const UChar
* const * controlStrings
,
2810 UErrorCode
*pErrorCode
);
2812 /* ubidi_setPara ------------------------------------------------------------ */
2814 U_CAPI
void U_EXPORT2
2815 ubidi_setPara(UBiDi
*pBiDi
, const UChar
*text
, int32_t length
,
2816 UBiDiLevel paraLevel
, UBiDiLevel
*embeddingLevels
,
2817 UErrorCode
*pErrorCode
) {
2818 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
);
2819 ubidi_setParaInternal(pBiDi
, text
, length
, paraLevel
,
2821 NULL
, 0, NULL
, NULL
,
2825 /* ubidi_setParaWithControls ------------------------------------------------ */
2827 U_CAPI
void U_EXPORT2
2828 ubidi_setParaWithControls(UBiDi
*pBiDi
,
2829 const UChar
*text
, int32_t length
,
2830 UBiDiLevel paraLevel
,
2831 const int32_t *offsets
, int32_t offsetCount
,
2832 const int32_t *controlStringIndices
,
2833 const UChar
* const * controlStrings
,
2834 UErrorCode
*pErrorCode
) {
2835 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
);
2836 /* check the argument values that are not already checked in ubidi_setParaInternal */
2837 if ( offsetCount
< 0 || (offsetCount
> 0 && (offsets
== NULL
|| controlStrings
== NULL
)) ) {
2838 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2841 ubidi_setParaInternal(pBiDi
, text
, length
, paraLevel
,
2843 offsets
, offsetCount
, controlStringIndices
, controlStrings
,
2847 /* ubidi_setParaInternal ---------------------------------------------------- */
2850 ubidi_setParaInternal(UBiDi
*pBiDi
,
2851 const UChar
*text
, int32_t length
,
2852 UBiDiLevel paraLevel
,
2853 UBiDiLevel
*embeddingLevels
,
2854 const int32_t *offsets
, int32_t offsetCount
,
2855 const int32_t *controlStringIndices
,
2856 const UChar
* const * controlStrings
,
2857 UErrorCode
*pErrorCode
) {
2858 UBiDiDirection direction
;
2861 /* check the argument values (pErrorCode status alrecy checked before getting here) */
2862 if(pBiDi
==NULL
|| text
==NULL
|| length
<-1 ||
2863 (paraLevel
>UBIDI_MAX_EXPLICIT_LEVEL
&& paraLevel
<UBIDI_DEFAULT_LTR
)) {
2864 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2869 length
=u_strlen(text
);
2871 if (offsetCount
> 0 && pBiDi
->reorderingMode
> UBIDI_REORDER_GROUP_NUMBERS_WITH_R
) {
2875 /* special treatment for RUNS_ONLY mode */
2876 if(pBiDi
->reorderingMode
==UBIDI_REORDER_RUNS_ONLY
) {
2877 setParaRunsOnly(pBiDi
, text
, length
, paraLevel
, pErrorCode
);
2881 /* initialize the UBiDi structure */
2882 pBiDi
->pParaBiDi
=NULL
; /* mark unfinished setPara */
2884 pBiDi
->length
=pBiDi
->originalLength
=pBiDi
->resultLength
=length
;
2885 pBiDi
->paraLevel
=paraLevel
;
2886 pBiDi
->direction
=(UBiDiDirection
)(paraLevel
&1);
2889 pBiDi
->dirInsert
=NULL
;
2890 pBiDi
->dirProps
=NULL
;
2893 pBiDi
->insertPoints
.size
=0; /* clean up from last call */
2894 pBiDi
->insertPoints
.confirmed
=0; /* clean up from last call */
2897 * Save the original paraLevel if contextual; otherwise, set to 0.
2899 pBiDi
->defaultParaLevel
=IS_DEFAULT_LEVEL(paraLevel
);
2903 * For an empty paragraph, create a UBiDi object with the paraLevel and
2904 * the flags and the direction set but without allocating zero-length arrays.
2905 * There is nothing more to do.
2907 if(IS_DEFAULT_LEVEL(paraLevel
)) {
2908 pBiDi
->paraLevel
&=1;
2909 pBiDi
->defaultParaLevel
=0;
2911 pBiDi
->flags
=DIRPROP_FLAG_LR(paraLevel
);
2914 setParaSuccess(pBiDi
); /* mark successful setPara */
2920 /* allocate paras memory */
2921 if(pBiDi
->parasMemory
)
2922 pBiDi
->paras
=pBiDi
->parasMemory
;
2924 pBiDi
->paras
=pBiDi
->simpleParas
;
2927 * Get the inserted directional properties
2930 if (offsetCount
> 0) {
2931 if(getDirInsertMemory(pBiDi
, length
)) {
2932 pBiDi
->dirInsert
=pBiDi
->dirInsertMemory
;
2933 if(!getDirInsert(pBiDi
, offsets
, offsetCount
, controlStringIndices
, controlStrings
)) {
2934 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2938 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
2944 * Get the directional properties,
2945 * the flags bit-set, and
2946 * determine the paragraph level if necessary.
2948 if(getDirPropsMemory(pBiDi
, length
)) {
2949 pBiDi
->dirProps
=pBiDi
->dirPropsMemory
;
2950 if(!getDirProps(pBiDi
)) {
2951 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
2955 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
2958 dirProps
=pBiDi
->dirProps
;
2959 /* the processed length may have changed if UBIDI_OPTION_STREAMING */
2960 length
= pBiDi
->length
;
2961 pBiDi
->trailingWSStart
=length
; /* the levels[] will reflect the WS run */
2963 /* are explicit levels specified? */
2964 if(embeddingLevels
==NULL
) {
2965 /* no: determine explicit levels according to the (Xn) rules */\
2966 if(getLevelsMemory(pBiDi
, length
)) {
2967 pBiDi
->levels
=pBiDi
->levelsMemory
;
2968 direction
=resolveExplicitLevels(pBiDi
, pErrorCode
);
2969 if(U_FAILURE(*pErrorCode
)) {
2973 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
2977 /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */
2978 pBiDi
->levels
=embeddingLevels
;
2979 direction
=checkExplicitLevels(pBiDi
, pErrorCode
);
2980 if(U_FAILURE(*pErrorCode
)) {
2985 /* allocate isolate memory */
2986 if(pBiDi
->isolateCount
<=SIMPLE_ISOLATES_COUNT
)
2987 pBiDi
->isolates
=pBiDi
->simpleIsolates
;
2989 if((int32_t)(pBiDi
->isolateCount
*sizeof(Isolate
))<=pBiDi
->isolatesSize
)
2990 pBiDi
->isolates
=pBiDi
->isolatesMemory
;
2992 if(getInitialIsolatesMemory(pBiDi
, pBiDi
->isolateCount
)) {
2993 pBiDi
->isolates
=pBiDi
->isolatesMemory
;
2995 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
2999 pBiDi
->isolateCount
=-1; /* current isolates stack entry == none */
3002 * The steps after (X9) in the UBiDi algorithm are performed only if
3003 * the paragraph text has mixed directionality!
3005 pBiDi
->direction
=direction
;
3008 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
3009 pBiDi
->trailingWSStart
=0;
3012 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
3013 pBiDi
->trailingWSStart
=0;
3017 * Choose the right implicit state table
3019 switch(pBiDi
->reorderingMode
) {
3020 case UBIDI_REORDER_DEFAULT
:
3021 pBiDi
->pImpTabPair
=&impTab_DEFAULT
;
3023 case UBIDI_REORDER_NUMBERS_SPECIAL
:
3024 pBiDi
->pImpTabPair
=&impTab_NUMBERS_SPECIAL
;
3026 case UBIDI_REORDER_GROUP_NUMBERS_WITH_R
:
3027 pBiDi
->pImpTabPair
=&impTab_GROUP_NUMBERS_WITH_R
;
3029 case UBIDI_REORDER_INVERSE_NUMBERS_AS_L
:
3030 pBiDi
->pImpTabPair
=&impTab_INVERSE_NUMBERS_AS_L
;
3032 case UBIDI_REORDER_INVERSE_LIKE_DIRECT
:
3033 if (pBiDi
->reorderingOptions
& UBIDI_OPTION_INSERT_MARKS
) {
3034 pBiDi
->pImpTabPair
=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS
;
3036 pBiDi
->pImpTabPair
=&impTab_INVERSE_LIKE_DIRECT
;
3039 case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
:
3040 if (pBiDi
->reorderingOptions
& UBIDI_OPTION_INSERT_MARKS
) {
3041 pBiDi
->pImpTabPair
=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS
;
3043 pBiDi
->pImpTabPair
=&impTab_INVERSE_FOR_NUMBERS_SPECIAL
;
3047 /* we should never get here */
3051 * If there are no external levels specified and there
3052 * are no significant explicit level codes in the text,
3053 * then we can treat the entire paragraph as one run.
3054 * Otherwise, we need to perform the following rules on runs of
3055 * the text with the same embedding levels. (X10)
3056 * "Significant" explicit level codes are ones that actually
3057 * affect non-BN characters.
3058 * Examples for "insignificant" ones are empty embeddings
3059 * LRE-PDF, LRE-RLE-PDF-PDF, etc.
3061 if(embeddingLevels
==NULL
&& pBiDi
->paraCount
<=1 &&
3062 !(pBiDi
->flags
&DIRPROP_FLAG_MULTI_RUNS
)) {
3063 resolveImplicitLevels(pBiDi
, 0, length
,
3064 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi
, 0)),
3065 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi
, length
-1)));
3067 /* sor, eor: start and end types of same-level-run */
3068 UBiDiLevel
*levels
=pBiDi
->levels
;
3069 int32_t start
, limit
=0;
3070 UBiDiLevel level
, nextLevel
;
3073 /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
3074 level
=GET_PARALEVEL(pBiDi
, 0);
3075 nextLevel
=levels
[0];
3076 if(level
<nextLevel
) {
3077 eor
=GET_LR_FROM_LEVEL(nextLevel
);
3079 eor
=GET_LR_FROM_LEVEL(level
);
3083 /* determine start and limit of the run (end points just behind the run) */
3085 /* the values for this run's start are the same as for the previous run's end */
3088 if((start
>0) && (dirProps
[start
-1]==B
)) {
3089 /* except if this is a new paragraph, then set sor = para level */
3090 sor
=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi
, start
));
3095 /* search for the limit of this run */
3096 while((++limit
<length
) &&
3097 ((levels
[limit
]==level
) ||
3098 (DIRPROP_FLAG(dirProps
[limit
])&MASK_BN_EXPLICIT
))) {}
3100 /* get the correct level of the next run */
3102 nextLevel
=levels
[limit
];
3104 nextLevel
=GET_PARALEVEL(pBiDi
, length
-1);
3107 /* determine eor from max(level, nextLevel); sor is last run's eor */
3108 if(NO_OVERRIDE(level
)<NO_OVERRIDE(nextLevel
)) {
3109 eor
=GET_LR_FROM_LEVEL(nextLevel
);
3111 eor
=GET_LR_FROM_LEVEL(level
);
3114 /* if the run consists of overridden directional types, then there
3115 are no implicit types to be resolved */
3116 if(!(level
&UBIDI_LEVEL_OVERRIDE
)) {
3117 resolveImplicitLevels(pBiDi
, start
, limit
, sor
, eor
);
3119 /* remove the UBIDI_LEVEL_OVERRIDE flags */
3121 levels
[start
++]&=~UBIDI_LEVEL_OVERRIDE
;
3122 } while(start
<limit
);
3124 } while(limit
<length
);
3126 /* check if we got any memory shortage while adding insert points */
3127 if (U_FAILURE(pBiDi
->insertPoints
.errorCode
))
3129 *pErrorCode
=pBiDi
->insertPoints
.errorCode
;
3132 /* reset the embedding levels for some non-graphic characters (L1), (X9) */
3133 adjustWSLevels(pBiDi
);
3136 /* add RLM for inverse Bidi with contextual orientation resolving
3137 * to RTL which would not round-trip otherwise
3139 if((pBiDi
->defaultParaLevel
>0) &&
3140 (pBiDi
->reorderingOptions
& UBIDI_OPTION_INSERT_MARKS
) &&
3141 ((pBiDi
->reorderingMode
==UBIDI_REORDER_INVERSE_LIKE_DIRECT
) ||
3142 (pBiDi
->reorderingMode
==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
))) {
3143 int32_t i
, j
, start
, last
;
3146 for(i
=0; i
<pBiDi
->paraCount
; i
++) {
3147 last
=(pBiDi
->paras
[i
].limit
)-1;
3148 level
= static_cast<UBiDiLevel
>(pBiDi
->paras
[i
].level
);
3150 continue; /* LTR paragraph */
3151 start
= i
==0 ? 0 : pBiDi
->paras
[i
-1].limit
;
3152 for(j
=last
; j
>=start
; j
--) {
3153 dirProp
=dirProps
[j
];
3156 while(dirProps
[last
]==B
) {
3160 addPoint(pBiDi
, last
, RLM_BEFORE
);
3163 if(DIRPROP_FLAG(dirProp
) & MASK_R_AL
) {
3170 if(pBiDi
->reorderingOptions
& UBIDI_OPTION_REMOVE_CONTROLS
) {
3171 pBiDi
->resultLength
-= pBiDi
->controlCount
;
3173 pBiDi
->resultLength
+= pBiDi
->insertPoints
.size
;
3175 setParaSuccess(pBiDi
); /* mark successful setPara */
3178 /* -------------------------------------------------------------------------- */
3180 U_CAPI
void U_EXPORT2
3181 ubidi_orderParagraphsLTR(UBiDi
*pBiDi
, UBool orderParagraphsLTR
) {
3183 pBiDi
->orderParagraphsLTR
=orderParagraphsLTR
;
3187 U_CAPI UBool U_EXPORT2
3188 ubidi_isOrderParagraphsLTR(UBiDi
*pBiDi
) {
3190 return pBiDi
->orderParagraphsLTR
;
3196 U_CAPI UBiDiDirection U_EXPORT2
3197 ubidi_getDirection(const UBiDi
*pBiDi
) {
3198 if(IS_VALID_PARA_OR_LINE(pBiDi
)) {
3199 return pBiDi
->direction
;
3205 U_CAPI
const UChar
* U_EXPORT2
3206 ubidi_getText(const UBiDi
*pBiDi
) {
3207 if(IS_VALID_PARA_OR_LINE(pBiDi
)) {
3214 U_CAPI
int32_t U_EXPORT2
3215 ubidi_getLength(const UBiDi
*pBiDi
) {
3216 if(IS_VALID_PARA_OR_LINE(pBiDi
)) {
3217 return pBiDi
->originalLength
;
3223 U_CAPI
int32_t U_EXPORT2
3224 ubidi_getProcessedLength(const UBiDi
*pBiDi
) {
3225 if(IS_VALID_PARA_OR_LINE(pBiDi
)) {
3226 return pBiDi
->length
;
3232 U_CAPI
int32_t U_EXPORT2
3233 ubidi_getResultLength(const UBiDi
*pBiDi
) {
3234 if(IS_VALID_PARA_OR_LINE(pBiDi
)) {
3235 return pBiDi
->resultLength
;
3241 /* paragraphs API functions ------------------------------------------------- */
3243 U_CAPI UBiDiLevel U_EXPORT2
3244 ubidi_getParaLevel(const UBiDi
*pBiDi
) {
3245 if(IS_VALID_PARA_OR_LINE(pBiDi
)) {
3246 return pBiDi
->paraLevel
;
3252 U_CAPI
int32_t U_EXPORT2
3253 ubidi_countParagraphs(UBiDi
*pBiDi
) {
3254 if(!IS_VALID_PARA_OR_LINE(pBiDi
)) {
3257 return pBiDi
->paraCount
;
3261 U_CAPI
void U_EXPORT2
3262 ubidi_getParagraphByIndex(const UBiDi
*pBiDi
, int32_t paraIndex
,
3263 int32_t *pParaStart
, int32_t *pParaLimit
,
3264 UBiDiLevel
*pParaLevel
, UErrorCode
*pErrorCode
) {
3267 /* check the argument values */
3268 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
);
3269 RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi
, *pErrorCode
);
3270 RETURN_VOID_IF_BAD_RANGE(paraIndex
, 0, pBiDi
->paraCount
, *pErrorCode
);
3272 pBiDi
=pBiDi
->pParaBiDi
; /* get Para object if Line object */
3274 paraStart
=pBiDi
->paras
[paraIndex
-1].limit
;
3278 if(pParaStart
!=NULL
) {
3279 *pParaStart
=paraStart
;
3281 if(pParaLimit
!=NULL
) {
3282 *pParaLimit
=pBiDi
->paras
[paraIndex
].limit
;
3284 if(pParaLevel
!=NULL
) {
3285 *pParaLevel
=GET_PARALEVEL(pBiDi
, paraStart
);
3289 U_CAPI
int32_t U_EXPORT2
3290 ubidi_getParagraph(const UBiDi
*pBiDi
, int32_t charIndex
,
3291 int32_t *pParaStart
, int32_t *pParaLimit
,
3292 UBiDiLevel
*pParaLevel
, UErrorCode
*pErrorCode
) {
3295 /* check the argument values */
3296 /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */
3297 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
, -1);
3298 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi
, *pErrorCode
, -1);
3299 pBiDi
=pBiDi
->pParaBiDi
; /* get Para object if Line object */
3300 RETURN_IF_BAD_RANGE(charIndex
, 0, pBiDi
->length
, *pErrorCode
, -1);
3302 for(paraIndex
=0; charIndex
>=pBiDi
->paras
[paraIndex
].limit
; paraIndex
++);
3303 ubidi_getParagraphByIndex(pBiDi
, paraIndex
, pParaStart
, pParaLimit
, pParaLevel
, pErrorCode
);
3307 U_CAPI
void U_EXPORT2
3308 ubidi_setClassCallback(UBiDi
*pBiDi
, UBiDiClassCallback
*newFn
,
3309 const void *newContext
, UBiDiClassCallback
**oldFn
,
3310 const void **oldContext
, UErrorCode
*pErrorCode
)
3312 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
);
3314 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
3319 *oldFn
= pBiDi
->fnClassCallback
;
3323 *oldContext
= pBiDi
->coClassCallback
;
3325 pBiDi
->fnClassCallback
= newFn
;
3326 pBiDi
->coClassCallback
= newContext
;
3329 U_CAPI
void U_EXPORT2
3330 ubidi_getClassCallback(UBiDi
*pBiDi
, UBiDiClassCallback
**fn
, const void **context
)
3337 *fn
= pBiDi
->fnClassCallback
;
3341 *context
= pBiDi
->coClassCallback
;
3345 U_CAPI UCharDirection U_EXPORT2
3346 ubidi_getCustomizedClass(UBiDi
*pBiDi
, UChar32 c
)
3350 if( pBiDi
->fnClassCallback
== NULL
||
3351 (dir
= (*pBiDi
->fnClassCallback
)(pBiDi
->coClassCallback
, c
)) == U_BIDI_CLASS_DEFAULT
)
3353 dir
= ubidi_getClass(c
);
3355 if(dir
>= U_CHAR_DIRECTION_COUNT
) {
3356 dir
= (UCharDirection
)ON
;