2 ******************************************************************************
4 * Copyright (C) 1999-2010, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 ******************************************************************************
10 * tab size: 8 (not used)
13 * created on: 1999jul27
14 * created by: Markus W. Scherer, updated by Matitiahu Allouche
18 #include "unicode/utypes.h"
19 #include "unicode/ustring.h"
20 #include "unicode/uchar.h"
21 #include "unicode/ubidi.h"
22 #include "ubidi_props.h"
27 * General implementation notes:
29 * Throughout the implementation, there are comments like (W2) that refer to
30 * rules of the BiDi algorithm in its version 5, in this example to the second
31 * rule of the resolution of weak types.
33 * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
34 * character according to UTF-16, the second UChar gets the directional property of
35 * the entire character assigned, while the first one gets a BN, a boundary
36 * neutral, type, which is ignored by most of the algorithm according to
37 * rule (X9) and the implementation suggestions of the BiDi algorithm.
39 * Later, adjustWSLevels() will set the level for each BN to that of the
40 * following character (UChar), which results in surrogate pairs getting the
41 * same level on each of their surrogates.
43 * In a UTF-8 implementation, the same thing could be done: the last byte of
44 * a multi-byte sequence would get the "real" property, while all previous
45 * bytes of that sequence would get BN.
47 * It is not possible to assign all those parts of a character the same real
48 * property because this would fail in the resolution of weak types with rules
49 * that look at immediately surrounding types.
51 * As a related topic, this implementation does not remove Boundary Neutral
52 * types from the input, but ignores them wherever this is relevant.
53 * For example, the loop for the resolution of the weak types reads
54 * types until it finds a non-BN.
55 * Also, explicit embedding codes are neither changed into BN nor removed.
56 * They are only treated the same way real BNs are.
57 * As stated before, adjustWSLevels() takes care of them at the end.
58 * For the purpose of conformance, the levels of all these codes
61 * Note that this implementation never modifies the dirProps
62 * after the initial setup.
65 * In this implementation, the resolution of weak types (Wn),
66 * neutrals (Nn), and the assignment of the resolved level (In)
67 * are all done in one single loop, in resolveImplicitLevels().
68 * Changes of dirProp values are done on the fly, without writing
69 * them back to the dirProps array.
72 * This implementation contains code that allows to bypass steps of the
73 * algorithm that are not needed on the specific paragraph
74 * in order to speed up the most common cases considerably,
75 * like text that is entirely LTR, or RTL text without numbers.
77 * Most of this is done by setting a bit for each directional property
78 * in a flags variable and later checking for whether there are
79 * any LTR characters or any RTL characters, or both, whether
80 * there are any explicit embedding codes, etc.
82 * If the (Xn) steps are performed, then the flags are re-evaluated,
83 * because they will then not contain the embedding codes any more
84 * and will be adjusted for override codes, so that subsequently
85 * more bypassing may be possible than what the initial flags suggested.
87 * If the text is not mixed-directional, then the
88 * algorithm steps for the weak type resolution are not performed,
89 * and all levels are set to the paragraph level.
91 * If there are no explicit embedding codes, then the (Xn) steps
94 * If embedding levels are supplied as a parameter, then all
95 * explicit embedding codes are ignored, and the (Xn) steps
98 * White Space types could get the level of the run they belong to,
99 * and are checked with a test of (flags&MASK_EMBEDDING) to
100 * consider if the paragraph direction should be considered in
101 * the flags variable.
103 * If there are no White Space types in the paragraph, then
104 * (L1) is not necessary in adjustWSLevels().
107 /* to avoid some conditional statements, use tiny constant arrays */
108 static const Flags flagLR
[2]={ DIRPROP_FLAG(L
), DIRPROP_FLAG(R
) };
109 static const Flags flagE
[2]={ DIRPROP_FLAG(LRE
), DIRPROP_FLAG(RLE
) };
110 static const Flags flagO
[2]={ DIRPROP_FLAG(LRO
), DIRPROP_FLAG(RLO
) };
112 #define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
113 #define DIRPROP_FLAG_E(level) flagE[(level)&1]
114 #define DIRPROP_FLAG_O(level) flagO[(level)&1]
116 /* UBiDi object management -------------------------------------------------- */
118 U_CAPI UBiDi
* U_EXPORT2
121 UErrorCode errorCode
=U_ZERO_ERROR
;
122 return ubidi_openSized(0, 0, &errorCode
);
125 U_CAPI UBiDi
* U_EXPORT2
126 ubidi_openSized(int32_t maxLength
, int32_t maxRunCount
, UErrorCode
*pErrorCode
) {
129 /* check the argument values */
130 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
132 } else if(maxLength
<0 || maxRunCount
<0) {
133 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
134 return NULL
; /* invalid arguments */
137 /* allocate memory for the object */
138 pBiDi
=(UBiDi
*)uprv_malloc(sizeof(UBiDi
));
140 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
144 /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */
145 uprv_memset(pBiDi
, 0, sizeof(UBiDi
));
147 /* get BiDi properties */
148 pBiDi
->bdp
=ubidi_getSingleton();
150 /* allocate memory for arrays as requested */
152 if( !getInitialDirPropsMemory(pBiDi
, maxLength
) ||
153 !getInitialLevelsMemory(pBiDi
, maxLength
)
155 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
158 pBiDi
->mayAllocateText
=TRUE
;
163 /* use simpleRuns[] */
164 pBiDi
->runsSize
=sizeof(Run
);
165 } else if(!getInitialRunsMemory(pBiDi
, maxRunCount
)) {
166 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
169 pBiDi
->mayAllocateRuns
=TRUE
;
172 if(U_SUCCESS(*pErrorCode
)) {
181 * We are allowed to allocate memory if memory==NULL or
182 * mayAllocate==TRUE for each array that we need.
183 * We also try to grow memory as needed if we
186 * Assume sizeNeeded>0.
187 * If *pMemory!=NULL, then assume *pSize>0.
189 * ### this realloc() may unnecessarily copy the old data,
190 * which we know we don't need any more;
191 * is this the best way to do this??
194 ubidi_getMemory(BidiMemoryForAllocation
*bidiMem
, int32_t *pSize
, UBool mayAllocate
, int32_t sizeNeeded
) {
195 void **pMemory
= (void **)bidiMem
;
196 /* check for existing memory */
198 /* we need to allocate memory */
199 if(mayAllocate
&& (*pMemory
=uprv_malloc(sizeNeeded
))!=NULL
) {
206 if(sizeNeeded
<=*pSize
) {
207 /* there is already enough memory */
210 else if(!mayAllocate
) {
211 /* not enough memory, and we must not allocate */
216 /* in most cases, we do not need the copy-old-data part of
217 * realloc, but it is needed when adding runs using getRunsMemory()
218 * in setParaRunsOnly()
220 if((memory
=uprv_realloc(*pMemory
, sizeNeeded
))!=NULL
) {
225 /* we failed to grow */
232 U_CAPI
void U_EXPORT2
233 ubidi_close(UBiDi
*pBiDi
) {
235 pBiDi
->pParaBiDi
=NULL
; /* in case one tries to reuse this block */
236 if(pBiDi
->dirPropsMemory
!=NULL
) {
237 uprv_free(pBiDi
->dirPropsMemory
);
239 if(pBiDi
->levelsMemory
!=NULL
) {
240 uprv_free(pBiDi
->levelsMemory
);
242 if(pBiDi
->runsMemory
!=NULL
) {
243 uprv_free(pBiDi
->runsMemory
);
245 if(pBiDi
->parasMemory
!=NULL
) {
246 uprv_free(pBiDi
->parasMemory
);
248 if(pBiDi
->insertPoints
.points
!=NULL
) {
249 uprv_free(pBiDi
->insertPoints
.points
);
256 /* set to approximate "inverse BiDi" ---------------------------------------- */
258 U_CAPI
void U_EXPORT2
259 ubidi_setInverse(UBiDi
*pBiDi
, UBool isInverse
) {
261 pBiDi
->isInverse
=isInverse
;
262 pBiDi
->reorderingMode
= isInverse
? UBIDI_REORDER_INVERSE_NUMBERS_AS_L
263 : UBIDI_REORDER_DEFAULT
;
267 U_CAPI UBool U_EXPORT2
268 ubidi_isInverse(UBiDi
*pBiDi
) {
270 return pBiDi
->isInverse
;
276 /* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
277 * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre
278 * concept of RUNS_ONLY which is a double operation.
279 * It could be advantageous to divide this into 3 concepts:
280 * a) Operation: direct / inverse / RUNS_ONLY
281 * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R
282 * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
283 * This would allow combinations not possible today like RUNS_ONLY with
285 * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
286 * REMOVE_CONTROLS for the inverse step.
287 * Not all combinations would be supported, and probably not all do make sense.
288 * This would need to document which ones are supported and what are the
289 * fallbacks for unsupported combinations.
291 U_CAPI
void U_EXPORT2
292 ubidi_setReorderingMode(UBiDi
*pBiDi
, UBiDiReorderingMode reorderingMode
) {
293 if ((pBiDi
!=NULL
) && (reorderingMode
>= UBIDI_REORDER_DEFAULT
)
294 && (reorderingMode
< UBIDI_REORDER_COUNT
)) {
295 pBiDi
->reorderingMode
= reorderingMode
;
296 pBiDi
->isInverse
= (UBool
)(reorderingMode
== UBIDI_REORDER_INVERSE_NUMBERS_AS_L
);
300 U_CAPI UBiDiReorderingMode U_EXPORT2
301 ubidi_getReorderingMode(UBiDi
*pBiDi
) {
303 return pBiDi
->reorderingMode
;
305 return UBIDI_REORDER_DEFAULT
;
309 U_CAPI
void U_EXPORT2
310 ubidi_setReorderingOptions(UBiDi
*pBiDi
, uint32_t reorderingOptions
) {
311 if (reorderingOptions
& UBIDI_OPTION_REMOVE_CONTROLS
) {
312 reorderingOptions
&=~UBIDI_OPTION_INSERT_MARKS
;
315 pBiDi
->reorderingOptions
=reorderingOptions
;
319 U_CAPI
uint32_t U_EXPORT2
320 ubidi_getReorderingOptions(UBiDi
*pBiDi
) {
322 return pBiDi
->reorderingOptions
;
328 U_CAPI UBiDiDirection U_EXPORT2
329 ubidi_getBaseDirection(const UChar
*text
,
336 if( text
==NULL
|| length
<-1 ){
337 return UBIDI_NEUTRAL
;
341 length
=u_strlen(text
);
344 for( i
= 0 ; i
< length
; ) {
345 /* i is incremented by U16_NEXT */
346 U16_NEXT(text
, i
, length
, uchar
);
347 dir
= u_charDirection(uchar
);
348 if( dir
== U_LEFT_TO_RIGHT
)
350 if( dir
== U_RIGHT_TO_LEFT
|| dir
==U_RIGHT_TO_LEFT_ARABIC
)
353 return UBIDI_NEUTRAL
;
356 /* perform (P2)..(P3) ------------------------------------------------------- */
359 * Get the directional properties for the text,
360 * calculate the flags bit-set, and
361 * determine the paragraph level if necessary.
364 getDirProps(UBiDi
*pBiDi
) {
365 const UChar
*text
=pBiDi
->text
;
366 DirProp
*dirProps
=pBiDi
->dirPropsMemory
; /* pBiDi->dirProps is const */
368 int32_t i
=0, i1
, length
=pBiDi
->originalLength
;
369 Flags flags
=0; /* collect all directionalities in the text */
371 DirProp dirProp
=0, paraDirDefault
=0;/* initialize to avoid compiler warnings */
372 UBool isDefaultLevel
=IS_DEFAULT_LEVEL(pBiDi
->paraLevel
);
373 /* for inverse BiDi, the default para level is set to RTL if there is a
374 strong R or AL character at either end of the text */
375 UBool isDefaultLevelInverse
=isDefaultLevel
&& (UBool
)
376 (pBiDi
->reorderingMode
==UBIDI_REORDER_INVERSE_LIKE_DIRECT
||
377 pBiDi
->reorderingMode
==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
);
378 int32_t lastArabicPos
=-1;
379 int32_t controlCount
=0;
380 UBool removeBiDiControls
= (UBool
)(pBiDi
->reorderingOptions
&
381 UBIDI_OPTION_REMOVE_CONTROLS
);
384 NOT_CONTEXTUAL
, /* 0: not contextual paraLevel */
385 LOOKING_FOR_STRONG
, /* 1: looking for first strong char */
386 FOUND_STRONG_CHAR
/* 2: found first strong char */
389 int32_t paraStart
=0; /* index of first char in paragraph */
390 DirProp paraDir
; /* == CONTEXT_RTL within paragraphs
391 starting with strong R char */
392 DirProp lastStrongDir
=0; /* for default level & inverse BiDi */
393 int32_t lastStrongLTR
=0; /* for STREAMING option */
395 if(pBiDi
->reorderingOptions
& UBIDI_OPTION_STREAMING
) {
400 paraDirDefault
=pBiDi
->paraLevel
&1 ? CONTEXT_RTL
: 0;
401 paraDir
=paraDirDefault
;
402 lastStrongDir
=paraDirDefault
;
403 state
=LOOKING_FOR_STRONG
;
405 state
=NOT_CONTEXTUAL
;
408 /* count paragraphs and determine the paragraph level (P2..P3) */
410 * see comment in ubidi.h:
411 * the DEFAULT_XXX values are designed so that
412 * their bit 0 alone yields the intended default
414 for( /* i=0 above */ ; i
<length
; ) {
415 /* i is incremented by U16_NEXT */
416 U16_NEXT(text
, i
, length
, uchar
);
417 flags
|=DIRPROP_FLAG(dirProp
=(DirProp
)ubidi_getCustomizedClass(pBiDi
, uchar
));
418 dirProps
[i
-1]=dirProp
|paraDir
;
419 if(uchar
>0xffff) { /* set the lead surrogate's property to BN */
420 flags
|=DIRPROP_FLAG(BN
);
421 dirProps
[i
-2]=(DirProp
)(BN
|paraDir
);
423 if(state
==LOOKING_FOR_STRONG
) {
425 state
=FOUND_STRONG_CHAR
;
428 for(i1
=paraStart
; i1
<i
; i1
++) {
429 dirProps
[i1
]&=~CONTEXT_RTL
;
434 if(dirProp
==R
|| dirProp
==AL
) {
435 state
=FOUND_STRONG_CHAR
;
438 for(i1
=paraStart
; i1
<i
; i1
++) {
439 dirProps
[i1
]|=CONTEXT_RTL
;
447 lastStrongLTR
=i
; /* i is index to next character */
449 else if(dirProp
==R
) {
450 lastStrongDir
=CONTEXT_RTL
;
452 else if(dirProp
==AL
) {
453 lastStrongDir
=CONTEXT_RTL
;
456 else if(dirProp
==B
) {
457 if(pBiDi
->reorderingOptions
& UBIDI_OPTION_STREAMING
) {
458 pBiDi
->length
=i
; /* i is index to next character */
460 if(isDefaultLevelInverse
&& (lastStrongDir
==CONTEXT_RTL
) &&(paraDir
!=lastStrongDir
)) {
461 for( ; paraStart
<i
; paraStart
++) {
462 dirProps
[paraStart
]|=CONTEXT_RTL
;
465 if(i
<length
) { /* B not last char in text */
466 if(!((uchar
==CR
) && (text
[i
]==LF
))) {
470 state
=LOOKING_FOR_STRONG
;
471 paraStart
=i
; /* i is index to next character */
472 paraDir
=paraDirDefault
;
473 lastStrongDir
=paraDirDefault
;
477 if(removeBiDiControls
&& IS_BIDI_CONTROL_CHAR(uchar
)) {
481 if(isDefaultLevelInverse
&& (lastStrongDir
==CONTEXT_RTL
) &&(paraDir
!=lastStrongDir
)) {
482 for(i1
=paraStart
; i1
<length
; i1
++) {
483 dirProps
[i1
]|=CONTEXT_RTL
;
487 pBiDi
->paraLevel
=GET_PARALEVEL(pBiDi
, 0);
489 if(pBiDi
->reorderingOptions
& UBIDI_OPTION_STREAMING
) {
490 if((lastStrongLTR
>pBiDi
->length
) &&
491 (GET_PARALEVEL(pBiDi
, lastStrongLTR
)==0)) {
492 pBiDi
->length
= lastStrongLTR
;
494 if(pBiDi
->length
<pBiDi
->originalLength
) {
498 /* The following line does nothing new for contextual paraLevel, but is
499 needed for absolute paraLevel. */
500 flags
|=DIRPROP_FLAG_LR(pBiDi
->paraLevel
);
502 if(pBiDi
->orderParagraphsLTR
&& (flags
&DIRPROP_FLAG(B
))) {
503 flags
|=DIRPROP_FLAG(L
);
506 pBiDi
->controlCount
= controlCount
;
508 pBiDi
->lastArabicPos
=lastArabicPos
;
511 /* perform (X1)..(X9) ------------------------------------------------------- */
513 /* determine if the text is mixed-directional or single-directional */
514 static UBiDiDirection
515 directionFromFlags(UBiDi
*pBiDi
) {
516 Flags flags
=pBiDi
->flags
;
517 /* if the text contains AN and neutrals, then some neutrals may become RTL */
518 if(!(flags
&MASK_RTL
|| ((flags
&DIRPROP_FLAG(AN
)) && (flags
&MASK_POSSIBLE_N
)))) {
520 } else if(!(flags
&MASK_LTR
)) {
528 * Resolve the explicit levels as specified by explicit embedding codes.
529 * Recalculate the flags to have them reflect the real properties
530 * after taking the explicit embeddings into account.
532 * The BiDi algorithm is designed to result in the same behavior whether embedding
533 * levels are externally specified (from "styled text", supposedly the preferred
534 * method) or set by explicit embedding codes (LRx, RLx, PDF) in the plain text.
535 * That is why (X9) instructs to remove all explicit codes (and BN).
536 * However, in a real implementation, this removal of these codes and their index
537 * positions in the plain text is undesirable since it would result in
538 * reallocated, reindexed text.
539 * Instead, this implementation leaves the codes in there and just ignores them
540 * in the subsequent processing.
541 * In order to get the same reordering behavior, positions with a BN or an
542 * explicit embedding code just get the same level assigned as the last "real"
545 * Some implementations, not this one, then overwrite some of these
546 * directionality properties at "real" same-level-run boundaries by
547 * L or R codes so that the resolution of weak types can be performed on the
548 * entire paragraph at once instead of having to parse it once more and
549 * perform that resolution on same-level-runs.
550 * This limits the scope of the implicit rules in effectively
551 * the same way as the run limits.
553 * Instead, this implementation does not modify these codes.
554 * On one hand, the paragraph has to be scanned for same-level-runs, but
555 * on the other hand, this saves another loop to reset these codes,
556 * or saves making and modifying a copy of dirProps[].
559 * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
562 * Handling the stack of explicit levels (Xn):
564 * With the BiDi stack of explicit levels,
565 * as pushed with each LRE, RLE, LRO, and RLO and popped with each PDF,
566 * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL==61.
568 * In order to have a correct push-pop semantics even in the case of overflows,
569 * there are two overflow counters:
570 * - countOver60 is incremented with each LRx at level 60
571 * - from level 60, one RLx increases the level to 61
572 * - countOver61 is incremented with each LRx and RLx at level 61
574 * Popping levels with PDF must work in the opposite order so that level 61
575 * is correct at the correct point. Underflows (too many PDFs) must be checked.
577 * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
579 static UBiDiDirection
580 resolveExplicitLevels(UBiDi
*pBiDi
) {
581 const DirProp
*dirProps
=pBiDi
->dirProps
;
582 UBiDiLevel
*levels
=pBiDi
->levels
;
583 const UChar
*text
=pBiDi
->text
;
585 int32_t i
=0, length
=pBiDi
->length
;
586 Flags flags
=pBiDi
->flags
; /* collect all directionalities in the text */
588 UBiDiLevel level
=GET_PARALEVEL(pBiDi
, 0);
590 UBiDiDirection direction
;
593 /* determine if the text is mixed-directional or single-directional */
594 direction
=directionFromFlags(pBiDi
);
596 /* we may not need to resolve any explicit levels, but for multiple
597 paragraphs we want to loop on all chars to set the para boundaries */
598 if((direction
!=UBIDI_MIXED
) && (pBiDi
->paraCount
==1)) {
599 /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
600 } else if((pBiDi
->paraCount
==1) &&
601 (!(flags
&MASK_EXPLICIT
) ||
602 (pBiDi
->reorderingMode
> UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL
))) {
603 /* mixed, but all characters are at the same embedding level */
604 /* or we are in "inverse BiDi" */
605 /* and we don't have contextual multiple paragraphs with some B char */
606 /* set all levels to the paragraph level */
607 for(i
=0; i
<length
; ++i
) {
611 /* continue to perform (Xn) */
613 /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
614 /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */
615 UBiDiLevel embeddingLevel
=level
, newLevel
, stackTop
=0;
617 UBiDiLevel stack
[UBIDI_MAX_EXPLICIT_LEVEL
]; /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL */
618 uint32_t countOver60
=0, countOver61
=0; /* count overflows of explicit levels */
620 /* recalculate the flags */
623 for(i
=0; i
<length
; ++i
) {
624 dirProp
=NO_CONTEXT_RTL(dirProps
[i
]);
629 newLevel
=(UBiDiLevel
)((embeddingLevel
+2)&~(UBIDI_LEVEL_OVERRIDE
|1)); /* least greater even level */
630 if(newLevel
<=UBIDI_MAX_EXPLICIT_LEVEL
) {
631 stack
[stackTop
]=embeddingLevel
;
633 embeddingLevel
=newLevel
;
635 embeddingLevel
|=UBIDI_LEVEL_OVERRIDE
;
637 /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE
638 since this has already been done for newLevel which is
639 the source for embeddingLevel.
641 } else if((embeddingLevel
&~UBIDI_LEVEL_OVERRIDE
)==UBIDI_MAX_EXPLICIT_LEVEL
) {
643 } else /* (embeddingLevel&~UBIDI_LEVEL_OVERRIDE)==UBIDI_MAX_EXPLICIT_LEVEL-1 */ {
646 flags
|=DIRPROP_FLAG(BN
);
651 newLevel
=(UBiDiLevel
)(((embeddingLevel
&~UBIDI_LEVEL_OVERRIDE
)+1)|1); /* least greater odd level */
652 if(newLevel
<=UBIDI_MAX_EXPLICIT_LEVEL
) {
653 stack
[stackTop
]=embeddingLevel
;
655 embeddingLevel
=newLevel
;
657 embeddingLevel
|=UBIDI_LEVEL_OVERRIDE
;
659 /* we don't need to set UBIDI_LEVEL_OVERRIDE off for RLE
660 since this has already been done for newLevel which is
661 the source for embeddingLevel.
666 flags
|=DIRPROP_FLAG(BN
);
670 /* handle all the overflow cases first */
673 } else if(countOver60
>0 && (embeddingLevel
&~UBIDI_LEVEL_OVERRIDE
)!=UBIDI_MAX_EXPLICIT_LEVEL
) {
674 /* handle LRx overflows from level 60 */
676 } else if(stackTop
>0) {
677 /* this is the pop operation; it also pops level 61 while countOver60>0 */
679 embeddingLevel
=stack
[stackTop
];
680 /* } else { (underflow) */
682 flags
|=DIRPROP_FLAG(BN
);
686 countOver60
=countOver61
=0;
687 level
=GET_PARALEVEL(pBiDi
, i
);
689 embeddingLevel
=GET_PARALEVEL(pBiDi
, i
+1);
690 if(!((text
[i
]==CR
) && (text
[i
+1]==LF
))) {
691 pBiDi
->paras
[paraIndex
++]=i
+1;
694 flags
|=DIRPROP_FLAG(B
);
697 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
698 /* they will get their levels set correctly in adjustWSLevels() */
699 flags
|=DIRPROP_FLAG(BN
);
702 /* all other types get the "real" level */
703 if(level
!=embeddingLevel
) {
704 level
=embeddingLevel
;
705 if(level
&UBIDI_LEVEL_OVERRIDE
) {
706 flags
|=DIRPROP_FLAG_O(level
)|DIRPROP_FLAG_MULTI_RUNS
;
708 flags
|=DIRPROP_FLAG_E(level
)|DIRPROP_FLAG_MULTI_RUNS
;
711 if(!(level
&UBIDI_LEVEL_OVERRIDE
)) {
712 flags
|=DIRPROP_FLAG(dirProp
);
718 * We need to set reasonable levels even on BN codes and
719 * explicit codes because we will later look at same-level runs (X10).
723 if(flags
&MASK_EMBEDDING
) {
724 flags
|=DIRPROP_FLAG_LR(pBiDi
->paraLevel
);
726 if(pBiDi
->orderParagraphsLTR
&& (flags
&DIRPROP_FLAG(B
))) {
727 flags
|=DIRPROP_FLAG(L
);
730 /* subsequently, ignore the explicit codes and BN (X9) */
732 /* again, determine if the text is mixed-directional or single-directional */
734 direction
=directionFromFlags(pBiDi
);
741 * Use a pre-specified embedding levels array:
743 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
744 * ignore all explicit codes (X9),
745 * and check all the preset levels.
747 * Recalculate the flags to have them reflect the real properties
748 * after taking the explicit embeddings into account.
750 static UBiDiDirection
751 checkExplicitLevels(UBiDi
*pBiDi
, UErrorCode
*pErrorCode
) {
752 const DirProp
*dirProps
=pBiDi
->dirProps
;
754 UBiDiLevel
*levels
=pBiDi
->levels
;
755 const UChar
*text
=pBiDi
->text
;
757 int32_t i
, length
=pBiDi
->length
;
758 Flags flags
=0; /* collect all directionalities in the text */
760 uint32_t paraIndex
=0;
762 for(i
=0; i
<length
; ++i
) {
764 dirProp
=NO_CONTEXT_RTL(dirProps
[i
]);
765 if(level
&UBIDI_LEVEL_OVERRIDE
) {
766 /* keep the override flag in levels[i] but adjust the flags */
767 level
&=~UBIDI_LEVEL_OVERRIDE
; /* make the range check below simpler */
768 flags
|=DIRPROP_FLAG_O(level
);
771 flags
|=DIRPROP_FLAG_E(level
)|DIRPROP_FLAG(dirProp
);
773 if((level
<GET_PARALEVEL(pBiDi
, i
) &&
774 !((0==level
)&&(dirProp
==B
))) ||
775 (UBIDI_MAX_EXPLICIT_LEVEL
<level
)) {
776 /* level out of bounds */
777 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
780 if((dirProp
==B
) && ((i
+1)<length
)) {
781 if(!((text
[i
]==CR
) && (text
[i
+1]==LF
))) {
782 pBiDi
->paras
[paraIndex
++]=i
+1;
786 if(flags
&MASK_EMBEDDING
) {
787 flags
|=DIRPROP_FLAG_LR(pBiDi
->paraLevel
);
790 /* determine if the text is mixed-directional or single-directional */
792 return directionFromFlags(pBiDi
);
795 /******************************************************************
796 The Properties state machine table
797 *******************************************************************
799 All table cells are 8 bits:
800 bits 0..4: next state
801 bits 5..7: action to perform (if > 0)
803 Cells may be of format "n" where n represents the next state
804 (except for the rightmost column).
805 Cells may also be of format "s(x,y)" where x represents an action
806 to perform and y represents the next state.
808 *******************************************************************
809 Definitions and type for properties state table
810 *******************************************************************
812 #define IMPTABPROPS_COLUMNS 14
813 #define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1)
814 #define GET_STATEPROPS(cell) ((cell)&0x1f)
815 #define GET_ACTIONPROPS(cell) ((cell)>>5)
816 #define s(action, newState) ((uint8_t)(newState+(action<<5)))
818 static const uint8_t groupProp
[] = /* dirProp regrouped */
820 /* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN */
821 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10
823 enum { DirProp_L
=0, DirProp_R
=1, DirProp_EN
=2, DirProp_AN
=3, DirProp_ON
=4, DirProp_S
=5, DirProp_B
=6 }; /* reduced dirProp */
825 /******************************************************************
827 PROPERTIES STATE TABLE
829 In table impTabProps,
830 - the ON column regroups ON and WS
831 - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF
832 - the Res column is the reduced property assigned to a run
834 Action 1: process current run1, init new run1
836 3: process run1, process run2, init new run1
837 4: process run1, set run1=run2, init new run2
840 1) This table is used in resolveImplicitLevels().
841 2) This table triggers actions when there is a change in the Bidi
842 property of incoming characters (action 1).
843 3) Most such property sequences are processed immediately (in
844 fact, passed to processPropertySeq().
845 4) However, numbers are assembled as one sequence. This means
846 that undefined situations (like CS following digits, until
847 it is known if the next char will be a digit) are held until
848 following chars define them.
849 Example: digits followed by CS, then comes another CS or ON;
850 the digits will be processed, then the CS assigned
851 as the start of an ON sequence (action 3).
852 5) There are cases where more than one sequence must be
853 processed, for instance digits followed by CS followed by L:
854 the digits must be processed as one sequence, and the CS
855 must be processed as an ON sequence, all this before starting
856 assembling chars for the opening L sequence.
860 static const uint8_t impTabProps
[][IMPTABPROPS_COLUMNS
] =
862 /* L , R , EN , AN , ON , S , B , ES , ET , CS , BN , NSM , AL , Res */
863 /* 0 Init */ { 1 , 2 , 4 , 5 , 7 , 15 , 17 , 7 , 9 , 7 , 0 , 7 , 3 , DirProp_ON
},
864 /* 1 L */ { 1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 1 , 1 , s(1,3), DirProp_L
},
865 /* 2 R */ { s(1,1), 2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 2 , 2 , s(1,3), DirProp_R
},
866 /* 3 AL */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8), 3 , 3 , 3 , DirProp_R
},
867 /* 4 EN */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10), 11 ,s(2,10), 4 , 4 , s(1,3), DirProp_EN
},
868 /* 5 AN */ { s(1,1), s(1,2), s(1,4), 5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12), 5 , 5 , s(1,3), DirProp_AN
},
869 /* 6 AL:EN/AN */ { s(1,1), s(1,2), 6 , 6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13), 6 , 6 , s(1,3), DirProp_AN
},
870 /* 7 ON */ { s(1,1), s(1,2), s(1,4), s(1,5), 7 ,s(1,15),s(1,17), 7 ,s(2,14), 7 , 7 , 7 , s(1,3), DirProp_ON
},
871 /* 8 AL:ON */ { s(1,1), s(1,2), s(1,6), s(1,6), 8 ,s(1,16),s(1,17), 8 , 8 , 8 , 8 , 8 , s(1,3), DirProp_ON
},
872 /* 9 ET */ { s(1,1), s(1,2), 4 , s(1,5), 7 ,s(1,15),s(1,17), 7 , 9 , 7 , 9 , 9 , s(1,3), DirProp_ON
},
873 /*10 EN+ES/CS */ { s(3,1), s(3,2), 4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 10 , s(4,7), s(3,3), DirProp_EN
},
874 /*11 EN+ET */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 11 , s(1,7), 11 , 11 , s(1,3), DirProp_EN
},
875 /*12 AN+CS */ { s(3,1), s(3,2), s(3,4), 5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 12 , s(4,7), s(3,3), DirProp_AN
},
876 /*13 AL:EN/AN+CS */ { s(3,1), s(3,2), 6 , 6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8), 13 , s(4,8), s(3,3), DirProp_AN
},
877 /*14 ON+ET */ { s(1,1), s(1,2), s(4,4), s(1,5), 7 ,s(1,15),s(1,17), 7 , 14 , 7 , 14 , 14 , s(1,3), DirProp_ON
},
878 /*15 S */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7), 15 ,s(1,17), s(1,7), s(1,9), s(1,7), 15 , s(1,7), s(1,3), DirProp_S
},
879 /*16 AL:S */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8), 16 ,s(1,17), s(1,8), s(1,8), s(1,8), 16 , s(1,8), s(1,3), DirProp_S
},
880 /*17 B */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15), 17 , s(1,7), s(1,9), s(1,7), 17 , s(1,7), s(1,3), DirProp_B
}
883 /* we must undef macro s because the levels table have a different
884 * structure (4 bits for action and 4 bits for next state.
888 /******************************************************************
889 The levels state machine tables
890 *******************************************************************
892 All table cells are 8 bits:
893 bits 0..3: next state
894 bits 4..7: action to perform (if > 0)
896 Cells may be of format "n" where n represents the next state
897 (except for the rightmost column).
898 Cells may also be of format "s(x,y)" where x represents an action
899 to perform and y represents the next state.
901 This format limits each table to 16 states each and to 15 actions.
903 *******************************************************************
904 Definitions and type for levels state tables
905 *******************************************************************
907 #define IMPTABLEVELS_COLUMNS (DirProp_B + 2)
908 #define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1)
909 #define GET_STATE(cell) ((cell)&0x0f)
910 #define GET_ACTION(cell) ((cell)>>4)
911 #define s(action, newState) ((uint8_t)(newState+(action<<4)))
913 typedef uint8_t ImpTab
[][IMPTABLEVELS_COLUMNS
];
914 typedef uint8_t ImpAct
[];
916 /* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct,
917 * instead of having a pair of ImpTab and a pair of ImpAct.
919 typedef struct ImpTabPair
{
920 const void * pImpTab
[2];
921 const void * pImpAct
[2];
924 /******************************************************************
928 In all levels state tables,
929 - state 0 is the initial state
930 - the Res column is the increment to add to the text level
931 for this property sequence.
933 The impAct arrays for each table of a pair map the local action
934 numbers of the table to the total list of actions. For instance,
935 action 2 in a given table corresponds to the action number which
936 appears in entry [2] of the impAct array for that table.
937 The first entry of all impAct arrays must be 0.
939 Action 1: init conditional sequence
940 2: prepend conditional sequence to current sequence
941 3: set ON sequence to new level - 1
942 4: init EN/AN/ON sequence
943 5: fix EN/AN/ON sequence followed by R
944 6: set previous level sequence to level 2
947 1) These tables are used in processPropertySeq(). The input
948 is property sequences as determined by resolveImplicitLevels.
949 2) Most such property sequences are processed immediately
950 (levels are assigned).
951 3) However, some sequences cannot be assigned a final level till
952 one or more following sequences are received. For instance,
953 ON following an R sequence within an even-level paragraph.
954 If the following sequence is R, the ON sequence will be
955 assigned basic run level+1, and so will the R sequence.
956 4) S is generally handled like ON, since its level will be fixed
957 to paragraph level in adjustWSLevels().
961 static const ImpTab impTabL_DEFAULT
= /* Even paragraph level */
962 /* In this table, conditional sequences receive the higher possible level
963 until proven otherwise.
966 /* L , R , EN , AN , ON , S , B , Res */
967 /* 0 : init */ { 0 , 1 , 0 , 2 , 0 , 0 , 0 , 0 },
968 /* 1 : R */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 1 },
969 /* 2 : AN */ { 0 , 1 , 0 , 2 , s(1,5), s(1,5), 0 , 2 },
970 /* 3 : R+EN/AN */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 2 },
971 /* 4 : R+ON */ { s(2,0), 1 , 3 , 3 , 4 , 4 , s(2,0), 1 },
972 /* 5 : AN+ON */ { s(2,0), 1 , s(2,0), 2 , 5 , 5 , s(2,0), 1 }
974 static const ImpTab impTabR_DEFAULT
= /* Odd paragraph level */
975 /* In this table, conditional sequences receive the lower possible level
976 until proven otherwise.
979 /* L , R , EN , AN , ON , S , B , Res */
980 /* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 },
981 /* 1 : L */ { 1 , 0 , 1 , 3 , s(1,4), s(1,4), 0 , 1 },
982 /* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 },
983 /* 3 : L+AN */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 1 },
984 /* 4 : L+ON */ { s(2,1), 0 , s(2,1), 3 , 4 , 4 , 0 , 0 },
985 /* 5 : L+AN+ON */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 0 }
987 static const ImpAct impAct0
= {0,1,2,3,4,5,6};
988 static const ImpTabPair impTab_DEFAULT
= {{&impTabL_DEFAULT
,
990 {&impAct0
, &impAct0
}};
992 static const ImpTab impTabL_NUMBERS_SPECIAL
= /* Even paragraph level */
993 /* In this table, conditional sequences receive the higher possible level
994 until proven otherwise.
997 /* L , R , EN , AN , ON , S , B , Res */
998 /* 0 : init */ { 0 , 2 , 1 , 1 , 0 , 0 , 0 , 0 },
999 /* 1 : L+EN/AN */ { 0 , 2 , 1 , 1 , 0 , 0 , 0 , 2 },
1000 /* 2 : R */ { 0 , 2 , 4 , 4 , s(1,3), 0 , 0 , 1 },
1001 /* 3 : R+ON */ { s(2,0), 2 , 4 , 4 , 3 , 3 , s(2,0), 1 },
1002 /* 4 : R+EN/AN */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 2 }
1004 static const ImpTabPair impTab_NUMBERS_SPECIAL
= {{&impTabL_NUMBERS_SPECIAL
,
1006 {&impAct0
, &impAct0
}};
1008 static const ImpTab impTabL_GROUP_NUMBERS_WITH_R
=
1009 /* In this table, EN/AN+ON sequences receive levels as if associated with R
1010 until proven that there is L or sor/eor on both sides. AN is handled like EN.
1013 /* L , R , EN , AN , ON , S , B , Res */
1014 /* 0 init */ { 0 , 3 , s(1,1), s(1,1), 0 , 0 , 0 , 0 },
1015 /* 1 EN/AN */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 2 },
1016 /* 2 EN/AN+ON */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 1 },
1017 /* 3 R */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 1 },
1018 /* 4 R+ON */ { s(2,0), 3 , 5 , 5 , 4 , s(2,0), s(2,0), 1 },
1019 /* 5 R+EN/AN */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 2 }
1021 static const ImpTab impTabR_GROUP_NUMBERS_WITH_R
=
1022 /* In this table, EN/AN+ON sequences receive levels as if associated with R
1023 until proven that there is L on both sides. AN is handled like EN.
1026 /* L , R , EN , AN , ON , S , B , Res */
1027 /* 0 init */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1028 /* 1 EN/AN */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 1 },
1029 /* 2 L */ { 2 , 0 , s(1,4), s(1,4), s(1,3), 0 , 0 , 1 },
1030 /* 3 L+ON */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 0 },
1031 /* 4 L+EN/AN */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 1 }
1033 static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R
= {
1034 {&impTabL_GROUP_NUMBERS_WITH_R
,
1035 &impTabR_GROUP_NUMBERS_WITH_R
},
1036 {&impAct0
, &impAct0
}};
1039 static const ImpTab impTabL_INVERSE_NUMBERS_AS_L
=
1040 /* This table is identical to the Default LTR table except that EN and AN are
1044 /* L , R , EN , AN , ON , S , B , Res */
1045 /* 0 : init */ { 0 , 1 , 0 , 0 , 0 , 0 , 0 , 0 },
1046 /* 1 : R */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 1 },
1047 /* 2 : AN */ { 0 , 1 , 0 , 0 , s(1,5), s(1,5), 0 , 2 },
1048 /* 3 : R+EN/AN */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 2 },
1049 /* 4 : R+ON */ { s(2,0), 1 , s(2,0), s(2,0), 4 , 4 , s(2,0), 1 },
1050 /* 5 : AN+ON */ { s(2,0), 1 , s(2,0), s(2,0), 5 , 5 , s(2,0), 1 }
1052 static const ImpTab impTabR_INVERSE_NUMBERS_AS_L
=
1053 /* This table is identical to the Default RTL table except that EN and AN are
1057 /* L , R , EN , AN , ON , S , B , Res */
1058 /* 0 : init */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1059 /* 1 : L */ { 1 , 0 , 1 , 1 , s(1,4), s(1,4), 0 , 1 },
1060 /* 2 : EN/AN */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 1 },
1061 /* 3 : L+AN */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 1 },
1062 /* 4 : L+ON */ { s(2,1), 0 , s(2,1), s(2,1), 4 , 4 , 0 , 0 },
1063 /* 5 : L+AN+ON */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 0 }
1065 static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L
= {
1066 {&impTabL_INVERSE_NUMBERS_AS_L
,
1067 &impTabR_INVERSE_NUMBERS_AS_L
},
1068 {&impAct0
, &impAct0
}};
1070 static const ImpTab impTabR_INVERSE_LIKE_DIRECT
= /* Odd paragraph level */
1071 /* In this table, conditional sequences receive the lower possible level
1072 until proven otherwise.
1075 /* L , R , EN , AN , ON , S , B , Res */
1076 /* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 },
1077 /* 1 : L */ { 1 , 0 , 1 , 2 , s(1,3), s(1,3), 0 , 1 },
1078 /* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 },
1079 /* 3 : L+ON */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 0 },
1080 /* 4 : L+ON+AN */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 3 },
1081 /* 5 : L+AN+ON */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 2 },
1082 /* 6 : L+ON+EN */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 1 }
1084 static const ImpAct impAct1
= {0,1,11,12};
1085 /* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc"
1087 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT
= {
1089 &impTabR_INVERSE_LIKE_DIRECT
},
1090 {&impAct0
, &impAct1
}};
1092 static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS
=
1093 /* The case handled in this table is (visually): R EN L
1096 /* L , R , EN , AN , ON , S , B , Res */
1097 /* 0 : init */ { 0 , s(6,3), 0 , 1 , 0 , 0 , 0 , 0 },
1098 /* 1 : L+AN */ { 0 , s(6,3), 0 , 1 , s(1,2), s(3,0), 0 , 4 },
1099 /* 2 : L+AN+ON */ { s(2,0), s(6,3), s(2,0), 1 , 2 , s(3,0), s(2,0), 3 },
1100 /* 3 : R */ { 0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0), 0 , 3 },
1101 /* 4 : R+ON */ { s(3,0), s(4,3), s(5,5), s(5,6), 4 , s(3,0), s(3,0), 3 },
1102 /* 5 : R+EN */ { s(3,0), s(4,3), 5 , s(5,6), s(1,4), s(3,0), s(3,0), 4 },
1103 /* 6 : R+AN */ { s(3,0), s(4,3), s(5,5), 6 , s(1,4), s(3,0), s(3,0), 4 }
1105 static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS
=
1106 /* The cases handled in this table are (visually): R EN L
1110 /* L , R , EN , AN , ON , S , B , Res */
1111 /* 0 : init */ { s(1,3), 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1112 /* 1 : R+EN/AN */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 1 },
1113 /* 2 : R+EN/AN+ON */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 0 },
1114 /* 3 : L */ { 3 , 0 , 3 , s(3,6), s(1,4), s(4,0), 0 , 1 },
1115 /* 4 : L+ON */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 0 },
1116 /* 5 : L+ON+EN */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 1 },
1117 /* 6 : L+AN */ { s(5,3), s(4,0), 6 , 6 , 4 , s(4,0), s(4,0), 3 }
1119 static const ImpAct impAct2
= {0,1,7,8,9,10};
1120 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS
= {
1121 {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS
,
1122 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS
},
1123 {&impAct0
, &impAct2
}};
1125 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL
= {
1126 {&impTabL_NUMBERS_SPECIAL
,
1127 &impTabR_INVERSE_LIKE_DIRECT
},
1128 {&impAct0
, &impAct1
}};
1130 static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS
=
1131 /* The case handled in this table is (visually): R EN L
1134 /* L , R , EN , AN , ON , S , B , Res */
1135 /* 0 : init */ { 0 , s(6,2), 1 , 1 , 0 , 0 , 0 , 0 },
1136 /* 1 : L+EN/AN */ { 0 , s(6,2), 1 , 1 , 0 , s(3,0), 0 , 4 },
1137 /* 2 : R */ { 0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0), 0 , 3 },
1138 /* 3 : R+ON */ { s(3,0), s(4,2), s(5,4), s(5,4), 3 , s(3,0), s(3,0), 3 },
1139 /* 4 : R+EN/AN */ { s(3,0), s(4,2), 4 , 4 , s(1,3), s(3,0), s(3,0), 4 }
1141 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS
= {
1142 {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS
,
1143 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS
},
1144 {&impAct0
, &impAct2
}};
1149 const ImpTab
* pImpTab
; /* level table pointer */
1150 const ImpAct
* pImpAct
; /* action map array */
1151 int32_t startON
; /* start of ON sequence */
1152 int32_t startL2EN
; /* start of level 2 sequence */
1153 int32_t lastStrongRTL
; /* index of last found R or AL */
1154 int32_t state
; /* current state */
1155 UBiDiLevel runLevel
; /* run level before implicit solving */
1158 /*------------------------------------------------------------------------*/
1161 addPoint(UBiDi
*pBiDi
, int32_t pos
, int32_t flag
)
1162 /* param pos: position where to insert
1163 param flag: one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER
1166 #define FIRSTALLOC 10
1168 InsertPoints
* pInsertPoints
=&(pBiDi
->insertPoints
);
1170 if (pInsertPoints
->capacity
== 0)
1172 pInsertPoints
->points
=uprv_malloc(sizeof(Point
)*FIRSTALLOC
);
1173 if (pInsertPoints
->points
== NULL
)
1175 pInsertPoints
->errorCode
=U_MEMORY_ALLOCATION_ERROR
;
1178 pInsertPoints
->capacity
=FIRSTALLOC
;
1180 if (pInsertPoints
->size
>= pInsertPoints
->capacity
) /* no room for new point */
1182 void * savePoints
=pInsertPoints
->points
;
1183 pInsertPoints
->points
=uprv_realloc(pInsertPoints
->points
,
1184 pInsertPoints
->capacity
*2*sizeof(Point
));
1185 if (pInsertPoints
->points
== NULL
)
1187 pInsertPoints
->points
=savePoints
;
1188 pInsertPoints
->errorCode
=U_MEMORY_ALLOCATION_ERROR
;
1191 else pInsertPoints
->capacity
*=2;
1195 pInsertPoints
->points
[pInsertPoints
->size
]=point
;
1196 pInsertPoints
->size
++;
1200 /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
1203 * This implementation of the (Wn) rules applies all rules in one pass.
1204 * In order to do so, it needs a look-ahead of typically 1 character
1205 * (except for W5: sequences of ET) and keeps track of changes
1206 * in a rule Wp that affect a later Wq (p<q).
1208 * The (Nn) and (In) rules are also performed in that same single loop,
1209 * but effectively one iteration behind for white space.
1211 * Since all implicit rules are performed in one step, it is not necessary
1212 * to actually store the intermediate directional properties in dirProps[].
1216 processPropertySeq(UBiDi
*pBiDi
, LevState
*pLevState
, uint8_t _prop
,
1217 int32_t start
, int32_t limit
) {
1218 uint8_t cell
, oldStateSeq
, actionSeq
;
1219 const ImpTab
* pImpTab
=pLevState
->pImpTab
;
1220 const ImpAct
* pImpAct
=pLevState
->pImpAct
;
1221 UBiDiLevel
* levels
=pBiDi
->levels
;
1222 UBiDiLevel level
, addLevel
;
1223 InsertPoints
* pInsertPoints
;
1226 start0
=start
; /* save original start position */
1227 oldStateSeq
=(uint8_t)pLevState
->state
;
1228 cell
=(*pImpTab
)[oldStateSeq
][_prop
];
1229 pLevState
->state
=GET_STATE(cell
); /* isolate the new state */
1230 actionSeq
=(*pImpAct
)[GET_ACTION(cell
)]; /* isolate the action */
1231 addLevel
=(*pImpTab
)[pLevState
->state
][IMPTABLEVELS_RES
];
1235 case 1: /* init ON seq */
1236 pLevState
->startON
=start0
;
1239 case 2: /* prepend ON seq to current seq */
1240 start
=pLevState
->startON
;
1243 case 3: /* L or S after possible relevant EN/AN */
1244 /* check if we had EN after R/AL */
1245 if (pLevState
->startL2EN
>= 0) {
1246 addPoint(pBiDi
, pLevState
->startL2EN
, LRM_BEFORE
);
1248 pLevState
->startL2EN
=-1; /* not within previous if since could also be -2 */
1249 /* check if we had any relevant EN/AN after R/AL */
1250 pInsertPoints
=&(pBiDi
->insertPoints
);
1251 if ((pInsertPoints
->capacity
== 0) ||
1252 (pInsertPoints
->size
<= pInsertPoints
->confirmed
))
1254 /* nothing, just clean up */
1255 pLevState
->lastStrongRTL
=-1;
1256 /* check if we have a pending conditional segment */
1257 level
=(*pImpTab
)[oldStateSeq
][IMPTABLEVELS_RES
];
1258 if ((level
& 1) && (pLevState
->startON
> 0)) { /* after ON */
1259 start
=pLevState
->startON
; /* reset to basic run level */
1261 if (_prop
== DirProp_S
) /* add LRM before S */
1263 addPoint(pBiDi
, start0
, LRM_BEFORE
);
1264 pInsertPoints
->confirmed
=pInsertPoints
->size
;
1268 /* reset previous RTL cont to level for LTR text */
1269 for (k
=pLevState
->lastStrongRTL
+1; k
<start0
; k
++)
1271 /* reset odd level, leave runLevel+2 as is */
1272 levels
[k
]=(levels
[k
] - 2) & ~1;
1274 /* mark insert points as confirmed */
1275 pInsertPoints
->confirmed
=pInsertPoints
->size
;
1276 pLevState
->lastStrongRTL
=-1;
1277 if (_prop
== DirProp_S
) /* add LRM before S */
1279 addPoint(pBiDi
, start0
, LRM_BEFORE
);
1280 pInsertPoints
->confirmed
=pInsertPoints
->size
;
1284 case 4: /* R/AL after possible relevant EN/AN */
1286 pInsertPoints
=&(pBiDi
->insertPoints
);
1287 if (pInsertPoints
->capacity
> 0)
1288 /* remove all non confirmed insert points */
1289 pInsertPoints
->size
=pInsertPoints
->confirmed
;
1290 pLevState
->startON
=-1;
1291 pLevState
->startL2EN
=-1;
1292 pLevState
->lastStrongRTL
=limit
- 1;
1295 case 5: /* EN/AN after R/AL + possible cont */
1296 /* check for real AN */
1297 if ((_prop
== DirProp_AN
) && (NO_CONTEXT_RTL(pBiDi
->dirProps
[start0
]) == AN
) &&
1298 (pBiDi
->reorderingMode
!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
))
1301 if (pLevState
->startL2EN
== -1) /* if no relevant EN already found */
1303 /* just note the righmost digit as a strong RTL */
1304 pLevState
->lastStrongRTL
=limit
- 1;
1307 if (pLevState
->startL2EN
>= 0) /* after EN, no AN */
1309 addPoint(pBiDi
, pLevState
->startL2EN
, LRM_BEFORE
);
1310 pLevState
->startL2EN
=-2;
1313 addPoint(pBiDi
, start0
, LRM_BEFORE
);
1316 /* if first EN/AN after R/AL */
1317 if (pLevState
->startL2EN
== -1) {
1318 pLevState
->startL2EN
=start0
;
1322 case 6: /* note location of latest R/AL */
1323 pLevState
->lastStrongRTL
=limit
- 1;
1324 pLevState
->startON
=-1;
1327 case 7: /* L after R+ON/EN/AN */
1328 /* include possible adjacent number on the left */
1329 for (k
=start0
-1; k
>=0 && !(levels
[k
]&1); k
--);
1331 addPoint(pBiDi
, k
, RLM_BEFORE
); /* add RLM before */
1332 pInsertPoints
=&(pBiDi
->insertPoints
);
1333 pInsertPoints
->confirmed
=pInsertPoints
->size
; /* confirm it */
1335 pLevState
->startON
=start0
;
1338 case 8: /* AN after L */
1339 /* AN numbers between L text on both sides may be trouble. */
1340 /* tentatively bracket with LRMs; will be confirmed if followed by L */
1341 addPoint(pBiDi
, start0
, LRM_BEFORE
); /* add LRM before */
1342 addPoint(pBiDi
, start0
, LRM_AFTER
); /* add LRM after */
1345 case 9: /* R after L+ON/EN/AN */
1346 /* false alert, infirm LRMs around previous AN */
1347 pInsertPoints
=&(pBiDi
->insertPoints
);
1348 pInsertPoints
->size
=pInsertPoints
->confirmed
;
1349 if (_prop
== DirProp_S
) /* add RLM before S */
1351 addPoint(pBiDi
, start0
, RLM_BEFORE
);
1352 pInsertPoints
->confirmed
=pInsertPoints
->size
;
1356 case 10: /* L after L+ON/AN */
1357 level
=pLevState
->runLevel
+ addLevel
;
1358 for(k
=pLevState
->startON
; k
<start0
; k
++) {
1359 if (levels
[k
]<level
)
1362 pInsertPoints
=&(pBiDi
->insertPoints
);
1363 pInsertPoints
->confirmed
=pInsertPoints
->size
; /* confirm inserts */
1364 pLevState
->startON
=start0
;
1367 case 11: /* L after L+ON+EN/AN/ON */
1368 level
=pLevState
->runLevel
;
1369 for(k
=start0
-1; k
>=pLevState
->startON
; k
--) {
1370 if(levels
[k
]==level
+3) {
1371 while(levels
[k
]==level
+3) {
1374 while(levels
[k
]==level
) {
1378 if(levels
[k
]==level
+2) {
1386 case 12: /* R after L+ON+EN/AN/ON */
1387 level
=pLevState
->runLevel
+1;
1388 for(k
=start0
-1; k
>=pLevState
->startON
; k
--) {
1389 if(levels
[k
]>level
) {
1395 default: /* we should never get here */
1400 if((addLevel
) || (start
< start0
)) {
1401 level
=pLevState
->runLevel
+ addLevel
;
1402 for(k
=start
; k
<limit
; k
++) {
1409 resolveImplicitLevels(UBiDi
*pBiDi
,
1410 int32_t start
, int32_t limit
,
1411 DirProp sor
, DirProp eor
) {
1412 const DirProp
*dirProps
=pBiDi
->dirProps
;
1415 int32_t i
, start1
, start2
;
1416 uint8_t oldStateImp
, stateImp
, actionImp
;
1417 uint8_t gprop
, resProp
, cell
;
1419 DirProp nextStrongProp
=R
;
1420 int32_t nextStrongPos
=-1;
1422 levState
.startON
= -1; /* silence gcc flow analysis */
1424 /* check for RTL inverse BiDi mode */
1425 /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to
1426 * loop on the text characters from end to start.
1427 * This would need a different properties state table (at least different
1428 * actions) and different levels state tables (maybe very similar to the
1429 * LTR corresponding ones.
1432 ((start
<pBiDi
->lastArabicPos
) && (GET_PARALEVEL(pBiDi
, start
) & 1) &&
1433 (pBiDi
->reorderingMode
==UBIDI_REORDER_INVERSE_LIKE_DIRECT
||
1434 pBiDi
->reorderingMode
==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
));
1435 /* initialize for levels state table */
1436 levState
.startL2EN
=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
1437 levState
.lastStrongRTL
=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
1439 levState
.runLevel
=pBiDi
->levels
[start
];
1440 levState
.pImpTab
=(const ImpTab
*)((pBiDi
->pImpTabPair
)->pImpTab
)[levState
.runLevel
&1];
1441 levState
.pImpAct
=(const ImpAct
*)((pBiDi
->pImpTabPair
)->pImpAct
)[levState
.runLevel
&1];
1442 processPropertySeq(pBiDi
, &levState
, sor
, start
, start
);
1443 /* initialize for property state table */
1444 if(NO_CONTEXT_RTL(dirProps
[start
])==NSM
) {
1452 for(i
=start
; i
<=limit
; i
++) {
1456 DirProp prop
, prop1
;
1457 prop
=NO_CONTEXT_RTL(dirProps
[i
]);
1460 /* AL before EN does not make it AN */
1462 } else if(prop
==EN
) {
1463 if(nextStrongPos
<=i
) {
1464 /* look for next strong char (L/R/AL) */
1466 nextStrongProp
=R
; /* set default */
1467 nextStrongPos
=limit
;
1468 for(j
=i
+1; j
<limit
; j
++) {
1469 prop1
=NO_CONTEXT_RTL(dirProps
[j
]);
1470 if(prop1
==L
|| prop1
==R
|| prop1
==AL
) {
1471 nextStrongProp
=prop1
;
1477 if(nextStrongProp
==AL
) {
1482 gprop
=groupProp
[prop
];
1484 oldStateImp
=stateImp
;
1485 cell
=impTabProps
[oldStateImp
][gprop
];
1486 stateImp
=GET_STATEPROPS(cell
); /* isolate the new state */
1487 actionImp
=GET_ACTIONPROPS(cell
); /* isolate the action */
1488 if((i
==limit
) && (actionImp
==0)) {
1489 /* there is an unprocessed sequence if its property == eor */
1490 actionImp
=1; /* process the last sequence */
1493 resProp
=impTabProps
[oldStateImp
][IMPTABPROPS_RES
];
1495 case 1: /* process current seq1, init new seq1 */
1496 processPropertySeq(pBiDi
, &levState
, resProp
, start1
, i
);
1499 case 2: /* init new seq2 */
1502 case 3: /* process seq1, process seq2, init new seq1 */
1503 processPropertySeq(pBiDi
, &levState
, resProp
, start1
, start2
);
1504 processPropertySeq(pBiDi
, &levState
, DirProp_ON
, start2
, i
);
1507 case 4: /* process seq1, set seq1=seq2, init new seq2 */
1508 processPropertySeq(pBiDi
, &levState
, resProp
, start1
, start2
);
1512 default: /* we should never get here */
1518 /* flush possible pending sequence, e.g. ON */
1519 processPropertySeq(pBiDi
, &levState
, eor
, limit
, limit
);
1522 /* perform (L1) and (X9) ---------------------------------------------------- */
1525 * Reset the embedding levels for some non-graphic characters (L1).
1526 * This function also sets appropriate levels for BN, and
1527 * explicit embedding types that are supposed to have been removed
1528 * from the paragraph in (X9).
1531 adjustWSLevels(UBiDi
*pBiDi
) {
1532 const DirProp
*dirProps
=pBiDi
->dirProps
;
1533 UBiDiLevel
*levels
=pBiDi
->levels
;
1536 if(pBiDi
->flags
&MASK_WS
) {
1537 UBool orderParagraphsLTR
=pBiDi
->orderParagraphsLTR
;
1540 i
=pBiDi
->trailingWSStart
;
1542 /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
1543 while(i
>0 && (flag
=DIRPROP_FLAG_NC(dirProps
[--i
]))&MASK_WS
) {
1544 if(orderParagraphsLTR
&&(flag
&DIRPROP_FLAG(B
))) {
1547 levels
[i
]=GET_PARALEVEL(pBiDi
, i
);
1551 /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
1552 /* here, i+1 is guaranteed to be <length */
1554 flag
=DIRPROP_FLAG_NC(dirProps
[--i
]);
1555 if(flag
&MASK_BN_EXPLICIT
) {
1556 levels
[i
]=levels
[i
+1];
1557 } else if(orderParagraphsLTR
&&(flag
&DIRPROP_FLAG(B
))) {
1560 } else if(flag
&MASK_B_S
) {
1561 levels
[i
]=GET_PARALEVEL(pBiDi
, i
);
1569 #define BIDI_MIN(x, y) ((x)<(y) ? (x) : (y))
1570 #define BIDI_ABS(x) ((x)>=0 ? (x) : (-(x)))
1572 setParaRunsOnly(UBiDi
*pBiDi
, const UChar
*text
, int32_t length
,
1573 UBiDiLevel paraLevel
, UErrorCode
*pErrorCode
) {
1574 void *runsOnlyMemory
;
1577 int32_t saveLength
, saveTrailingWSStart
;
1578 const UBiDiLevel
*levels
;
1579 UBiDiLevel
*saveLevels
;
1580 UBiDiDirection saveDirection
;
1581 UBool saveMayAllocateText
;
1583 int32_t visualLength
, i
, j
, visualStart
, logicalStart
,
1584 runCount
, runLength
, addedRuns
, insertRemove
,
1585 start
, limit
, step
, indexOddBit
, logicalPos
,
1587 uint32_t saveOptions
;
1589 pBiDi
->reorderingMode
=UBIDI_REORDER_DEFAULT
;
1591 ubidi_setPara(pBiDi
, text
, length
, paraLevel
, NULL
, pErrorCode
);
1594 /* obtain memory for mapping table and visual text */
1595 runsOnlyMemory
=uprv_malloc(length
*(sizeof(int32_t)+sizeof(UChar
)+sizeof(UBiDiLevel
)));
1596 if(runsOnlyMemory
==NULL
) {
1597 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
1600 visualMap
=runsOnlyMemory
;
1601 visualText
=(UChar
*)&visualMap
[length
];
1602 saveLevels
=(UBiDiLevel
*)&visualText
[length
];
1603 saveOptions
=pBiDi
->reorderingOptions
;
1604 if(saveOptions
& UBIDI_OPTION_INSERT_MARKS
) {
1605 pBiDi
->reorderingOptions
&=~UBIDI_OPTION_INSERT_MARKS
;
1606 pBiDi
->reorderingOptions
|=UBIDI_OPTION_REMOVE_CONTROLS
;
1608 paraLevel
&=1; /* accept only 0 or 1 */
1609 ubidi_setPara(pBiDi
, text
, length
, paraLevel
, NULL
, pErrorCode
);
1610 if(U_FAILURE(*pErrorCode
)) {
1613 /* we cannot access directly pBiDi->levels since it is not yet set if
1614 * direction is not MIXED
1616 levels
=ubidi_getLevels(pBiDi
, pErrorCode
);
1617 uprv_memcpy(saveLevels
, levels
, pBiDi
->length
*sizeof(UBiDiLevel
));
1618 saveTrailingWSStart
=pBiDi
->trailingWSStart
;
1619 saveLength
=pBiDi
->length
;
1620 saveDirection
=pBiDi
->direction
;
1622 /* FOOD FOR THOUGHT: instead of writing the visual text, we could use
1623 * the visual map and the dirProps array to drive the second call
1624 * to ubidi_setPara (but must make provision for possible removal of
1625 * BiDi controls. Alternatively, only use the dirProps array via
1626 * customized classifier callback.
1628 visualLength
=ubidi_writeReordered(pBiDi
, visualText
, length
,
1629 UBIDI_DO_MIRRORING
, pErrorCode
);
1630 ubidi_getVisualMap(pBiDi
, visualMap
, pErrorCode
);
1631 if(U_FAILURE(*pErrorCode
)) {
1634 pBiDi
->reorderingOptions
=saveOptions
;
1636 pBiDi
->reorderingMode
=UBIDI_REORDER_INVERSE_LIKE_DIRECT
;
1638 /* Because what we did with reorderingOptions, visualText may be shorter
1639 * than the original text. But we don't want the levels memory to be
1640 * reallocated shorter than the original length, since we need to restore
1641 * the levels as after the first call to ubidi_setpara() before returning.
1642 * We will force mayAllocateText to FALSE before the second call to
1643 * ubidi_setpara(), and will restore it afterwards.
1645 saveMayAllocateText
=pBiDi
->mayAllocateText
;
1646 pBiDi
->mayAllocateText
=FALSE
;
1647 ubidi_setPara(pBiDi
, visualText
, visualLength
, paraLevel
, NULL
, pErrorCode
);
1648 pBiDi
->mayAllocateText
=saveMayAllocateText
;
1649 ubidi_getRuns(pBiDi
, pErrorCode
);
1650 if(U_FAILURE(*pErrorCode
)) {
1653 /* check if some runs must be split, count how many splits */
1655 runCount
=pBiDi
->runCount
;
1658 for(i
=0; i
<runCount
; i
++, visualStart
+=runLength
) {
1659 runLength
=runs
[i
].visualLimit
-visualStart
;
1663 logicalStart
=GET_INDEX(runs
[i
].logicalStart
);
1664 for(j
=logicalStart
+1; j
<logicalStart
+runLength
; j
++) {
1665 index0
=visualMap
[j
];
1666 index1
=visualMap
[j
-1];
1667 if((BIDI_ABS(index0
-index1
)!=1) || (saveLevels
[index0
]!=saveLevels
[index1
])) {
1673 if(getRunsMemory(pBiDi
, runCount
+addedRuns
)) {
1675 /* because we switch from UBiDi.simpleRuns to UBiDi.runs */
1676 pBiDi
->runsMemory
[0]=runs
[0];
1678 runs
=pBiDi
->runs
=pBiDi
->runsMemory
;
1679 pBiDi
->runCount
+=addedRuns
;
1684 /* split runs which are not consecutive in source text */
1685 for(i
=runCount
-1; i
>=0; i
--) {
1686 runLength
= i
==0 ? runs
[0].visualLimit
:
1687 runs
[i
].visualLimit
-runs
[i
-1].visualLimit
;
1688 logicalStart
=runs
[i
].logicalStart
;
1689 indexOddBit
=GET_ODD_BIT(logicalStart
);
1690 logicalStart
=GET_INDEX(logicalStart
);
1693 runs
[i
+addedRuns
]=runs
[i
];
1695 logicalPos
=visualMap
[logicalStart
];
1696 runs
[i
+addedRuns
].logicalStart
=MAKE_INDEX_ODD_PAIR(logicalPos
,
1697 saveLevels
[logicalPos
]^indexOddBit
);
1702 limit
=logicalStart
+runLength
-1;
1705 start
=logicalStart
+runLength
-1;
1709 for(j
=start
; j
!=limit
; j
+=step
) {
1710 index0
=visualMap
[j
];
1711 index1
=visualMap
[j
+step
];
1712 if((BIDI_ABS(index0
-index1
)!=1) || (saveLevels
[index0
]!=saveLevels
[index1
])) {
1713 logicalPos
=BIDI_MIN(visualMap
[start
], index0
);
1714 runs
[i
+addedRuns
].logicalStart
=MAKE_INDEX_ODD_PAIR(logicalPos
,
1715 saveLevels
[logicalPos
]^indexOddBit
);
1716 runs
[i
+addedRuns
].visualLimit
=runs
[i
].visualLimit
;
1717 runs
[i
].visualLimit
-=BIDI_ABS(j
-start
)+1;
1718 insertRemove
=runs
[i
].insertRemove
&(LRM_AFTER
|RLM_AFTER
);
1719 runs
[i
+addedRuns
].insertRemove
=insertRemove
;
1720 runs
[i
].insertRemove
&=~insertRemove
;
1726 runs
[i
+addedRuns
]=runs
[i
];
1728 logicalPos
=BIDI_MIN(visualMap
[start
], visualMap
[limit
]);
1729 runs
[i
+addedRuns
].logicalStart
=MAKE_INDEX_ODD_PAIR(logicalPos
,
1730 saveLevels
[logicalPos
]^indexOddBit
);
1734 /* restore initial paraLevel */
1735 pBiDi
->paraLevel
^=1;
1737 /* restore real text */
1739 pBiDi
->length
=saveLength
;
1740 pBiDi
->originalLength
=length
;
1741 pBiDi
->direction
=saveDirection
;
1742 /* the saved levels should never excess levelsSize, but we check anyway */
1743 if(saveLength
>pBiDi
->levelsSize
) {
1744 saveLength
=pBiDi
->levelsSize
;
1746 uprv_memcpy(pBiDi
->levels
, saveLevels
, saveLength
*sizeof(UBiDiLevel
));
1747 pBiDi
->trailingWSStart
=saveTrailingWSStart
;
1748 /* free memory for mapping table and visual text */
1749 uprv_free(runsOnlyMemory
);
1750 if(pBiDi
->runCount
>1) {
1751 pBiDi
->direction
=UBIDI_MIXED
;
1754 pBiDi
->reorderingMode
=UBIDI_REORDER_RUNS_ONLY
;
1757 /* ubidi_setPara ------------------------------------------------------------ */
1759 U_CAPI
void U_EXPORT2
1760 ubidi_setPara(UBiDi
*pBiDi
, const UChar
*text
, int32_t length
,
1761 UBiDiLevel paraLevel
, UBiDiLevel
*embeddingLevels
,
1762 UErrorCode
*pErrorCode
) {
1763 UBiDiDirection direction
;
1765 /* check the argument values */
1766 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
);
1767 if(pBiDi
==NULL
|| text
==NULL
|| length
<-1 ||
1768 (paraLevel
>UBIDI_MAX_EXPLICIT_LEVEL
&& paraLevel
<UBIDI_DEFAULT_LTR
)) {
1769 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
1774 length
=u_strlen(text
);
1777 /* special treatment for RUNS_ONLY mode */
1778 if(pBiDi
->reorderingMode
==UBIDI_REORDER_RUNS_ONLY
) {
1779 setParaRunsOnly(pBiDi
, text
, length
, paraLevel
, pErrorCode
);
1783 /* initialize the UBiDi structure */
1784 pBiDi
->pParaBiDi
=NULL
; /* mark unfinished setPara */
1786 pBiDi
->length
=pBiDi
->originalLength
=pBiDi
->resultLength
=length
;
1787 pBiDi
->paraLevel
=paraLevel
;
1788 pBiDi
->direction
=UBIDI_LTR
;
1791 pBiDi
->dirProps
=NULL
;
1794 pBiDi
->insertPoints
.size
=0; /* clean up from last call */
1795 pBiDi
->insertPoints
.confirmed
=0; /* clean up from last call */
1798 * Save the original paraLevel if contextual; otherwise, set to 0.
1800 if(IS_DEFAULT_LEVEL(paraLevel
)) {
1801 pBiDi
->defaultParaLevel
=paraLevel
;
1803 pBiDi
->defaultParaLevel
=0;
1808 * For an empty paragraph, create a UBiDi object with the paraLevel and
1809 * the flags and the direction set but without allocating zero-length arrays.
1810 * There is nothing more to do.
1812 if(IS_DEFAULT_LEVEL(paraLevel
)) {
1813 pBiDi
->paraLevel
&=1;
1814 pBiDi
->defaultParaLevel
=0;
1817 pBiDi
->flags
=DIRPROP_FLAG(R
);
1818 pBiDi
->direction
=UBIDI_RTL
;
1820 pBiDi
->flags
=DIRPROP_FLAG(L
);
1821 pBiDi
->direction
=UBIDI_LTR
;
1826 pBiDi
->pParaBiDi
=pBiDi
; /* mark successful setPara */
1833 * Get the directional properties,
1834 * the flags bit-set, and
1835 * determine the paragraph level if necessary.
1837 if(getDirPropsMemory(pBiDi
, length
)) {
1838 pBiDi
->dirProps
=pBiDi
->dirPropsMemory
;
1841 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
1844 /* the processed length may have changed if UBIDI_OPTION_STREAMING */
1845 length
= pBiDi
->length
;
1846 pBiDi
->trailingWSStart
=length
; /* the levels[] will reflect the WS run */
1847 /* allocate paras memory */
1848 if(pBiDi
->paraCount
>1) {
1849 if(getInitialParasMemory(pBiDi
, pBiDi
->paraCount
)) {
1850 pBiDi
->paras
=pBiDi
->parasMemory
;
1851 pBiDi
->paras
[pBiDi
->paraCount
-1]=length
;
1853 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
1857 /* initialize paras for single paragraph */
1858 pBiDi
->paras
=pBiDi
->simpleParas
;
1859 pBiDi
->simpleParas
[0]=length
;
1862 /* are explicit levels specified? */
1863 if(embeddingLevels
==NULL
) {
1864 /* no: determine explicit levels according to the (Xn) rules */\
1865 if(getLevelsMemory(pBiDi
, length
)) {
1866 pBiDi
->levels
=pBiDi
->levelsMemory
;
1867 direction
=resolveExplicitLevels(pBiDi
);
1869 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
1873 /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */
1874 pBiDi
->levels
=embeddingLevels
;
1875 direction
=checkExplicitLevels(pBiDi
, pErrorCode
);
1876 if(U_FAILURE(*pErrorCode
)) {
1882 * The steps after (X9) in the UBiDi algorithm are performed only if
1883 * the paragraph text has mixed directionality!
1885 pBiDi
->direction
=direction
;
1888 /* make sure paraLevel is even */
1889 pBiDi
->paraLevel
=(UBiDiLevel
)((pBiDi
->paraLevel
+1)&~1);
1891 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
1892 pBiDi
->trailingWSStart
=0;
1895 /* make sure paraLevel is odd */
1896 pBiDi
->paraLevel
|=1;
1898 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
1899 pBiDi
->trailingWSStart
=0;
1903 * Choose the right implicit state table
1905 switch(pBiDi
->reorderingMode
) {
1906 case UBIDI_REORDER_DEFAULT
:
1907 pBiDi
->pImpTabPair
=&impTab_DEFAULT
;
1909 case UBIDI_REORDER_NUMBERS_SPECIAL
:
1910 pBiDi
->pImpTabPair
=&impTab_NUMBERS_SPECIAL
;
1912 case UBIDI_REORDER_GROUP_NUMBERS_WITH_R
:
1913 pBiDi
->pImpTabPair
=&impTab_GROUP_NUMBERS_WITH_R
;
1915 case UBIDI_REORDER_INVERSE_NUMBERS_AS_L
:
1916 pBiDi
->pImpTabPair
=&impTab_INVERSE_NUMBERS_AS_L
;
1918 case UBIDI_REORDER_INVERSE_LIKE_DIRECT
:
1919 if (pBiDi
->reorderingOptions
& UBIDI_OPTION_INSERT_MARKS
) {
1920 pBiDi
->pImpTabPair
=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS
;
1922 pBiDi
->pImpTabPair
=&impTab_INVERSE_LIKE_DIRECT
;
1925 case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
:
1926 if (pBiDi
->reorderingOptions
& UBIDI_OPTION_INSERT_MARKS
) {
1927 pBiDi
->pImpTabPair
=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS
;
1929 pBiDi
->pImpTabPair
=&impTab_INVERSE_FOR_NUMBERS_SPECIAL
;
1933 /* we should never get here */
1938 * If there are no external levels specified and there
1939 * are no significant explicit level codes in the text,
1940 * then we can treat the entire paragraph as one run.
1941 * Otherwise, we need to perform the following rules on runs of
1942 * the text with the same embedding levels. (X10)
1943 * "Significant" explicit level codes are ones that actually
1944 * affect non-BN characters.
1945 * Examples for "insignificant" ones are empty embeddings
1946 * LRE-PDF, LRE-RLE-PDF-PDF, etc.
1948 if(embeddingLevels
==NULL
&& pBiDi
->paraCount
<=1 &&
1949 !(pBiDi
->flags
&DIRPROP_FLAG_MULTI_RUNS
)) {
1950 resolveImplicitLevels(pBiDi
, 0, length
,
1951 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi
, 0)),
1952 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi
, length
-1)));
1954 /* sor, eor: start and end types of same-level-run */
1955 UBiDiLevel
*levels
=pBiDi
->levels
;
1956 int32_t start
, limit
=0;
1957 UBiDiLevel level
, nextLevel
;
1960 /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
1961 level
=GET_PARALEVEL(pBiDi
, 0);
1962 nextLevel
=levels
[0];
1963 if(level
<nextLevel
) {
1964 eor
=GET_LR_FROM_LEVEL(nextLevel
);
1966 eor
=GET_LR_FROM_LEVEL(level
);
1970 /* determine start and limit of the run (end points just behind the run) */
1972 /* the values for this run's start are the same as for the previous run's end */
1975 if((start
>0) && (NO_CONTEXT_RTL(pBiDi
->dirProps
[start
-1])==B
)) {
1976 /* except if this is a new paragraph, then set sor = para level */
1977 sor
=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi
, start
));
1982 /* search for the limit of this run */
1983 while(++limit
<length
&& levels
[limit
]==level
) {}
1985 /* get the correct level of the next run */
1987 nextLevel
=levels
[limit
];
1989 nextLevel
=GET_PARALEVEL(pBiDi
, length
-1);
1992 /* determine eor from max(level, nextLevel); sor is last run's eor */
1993 if((level
&~UBIDI_LEVEL_OVERRIDE
)<(nextLevel
&~UBIDI_LEVEL_OVERRIDE
)) {
1994 eor
=GET_LR_FROM_LEVEL(nextLevel
);
1996 eor
=GET_LR_FROM_LEVEL(level
);
1999 /* if the run consists of overridden directional types, then there
2000 are no implicit types to be resolved */
2001 if(!(level
&UBIDI_LEVEL_OVERRIDE
)) {
2002 resolveImplicitLevels(pBiDi
, start
, limit
, sor
, eor
);
2004 /* remove the UBIDI_LEVEL_OVERRIDE flags */
2006 levels
[start
++]&=~UBIDI_LEVEL_OVERRIDE
;
2007 } while(start
<limit
);
2009 } while(limit
<length
);
2011 /* check if we got any memory shortage while adding insert points */
2012 if (U_FAILURE(pBiDi
->insertPoints
.errorCode
))
2014 *pErrorCode
=pBiDi
->insertPoints
.errorCode
;
2017 /* reset the embedding levels for some non-graphic characters (L1), (X9) */
2018 adjustWSLevels(pBiDi
);
2021 /* add RLM for inverse Bidi with contextual orientation resolving
2022 * to RTL which would not round-trip otherwise
2024 if((pBiDi
->defaultParaLevel
>0) &&
2025 (pBiDi
->reorderingOptions
& UBIDI_OPTION_INSERT_MARKS
) &&
2026 ((pBiDi
->reorderingMode
==UBIDI_REORDER_INVERSE_LIKE_DIRECT
) ||
2027 (pBiDi
->reorderingMode
==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
))) {
2028 int32_t i
, j
, start
, last
;
2030 for(i
=0; i
<pBiDi
->paraCount
; i
++) {
2031 last
=pBiDi
->paras
[i
]-1;
2032 if((pBiDi
->dirProps
[last
] & CONTEXT_RTL
)==0) {
2033 continue; /* LTR paragraph */
2035 start
= i
==0 ? 0 : pBiDi
->paras
[i
- 1];
2036 for(j
=last
; j
>=start
; j
--) {
2037 dirProp
=NO_CONTEXT_RTL(pBiDi
->dirProps
[j
]);
2040 while(NO_CONTEXT_RTL(pBiDi
->dirProps
[last
])==B
) {
2044 addPoint(pBiDi
, last
, RLM_BEFORE
);
2047 if(DIRPROP_FLAG(dirProp
) & MASK_R_AL
) {
2054 if(pBiDi
->reorderingOptions
& UBIDI_OPTION_REMOVE_CONTROLS
) {
2055 pBiDi
->resultLength
-= pBiDi
->controlCount
;
2057 pBiDi
->resultLength
+= pBiDi
->insertPoints
.size
;
2059 pBiDi
->pParaBiDi
=pBiDi
; /* mark successful setPara */
2062 U_CAPI
void U_EXPORT2
2063 ubidi_orderParagraphsLTR(UBiDi
*pBiDi
, UBool orderParagraphsLTR
) {
2065 pBiDi
->orderParagraphsLTR
=orderParagraphsLTR
;
2069 U_CAPI UBool U_EXPORT2
2070 ubidi_isOrderParagraphsLTR(UBiDi
*pBiDi
) {
2072 return pBiDi
->orderParagraphsLTR
;
2078 U_CAPI UBiDiDirection U_EXPORT2
2079 ubidi_getDirection(const UBiDi
*pBiDi
) {
2080 if(IS_VALID_PARA_OR_LINE(pBiDi
)) {
2081 return pBiDi
->direction
;
2087 U_CAPI
const UChar
* U_EXPORT2
2088 ubidi_getText(const UBiDi
*pBiDi
) {
2089 if(IS_VALID_PARA_OR_LINE(pBiDi
)) {
2096 U_CAPI
int32_t U_EXPORT2
2097 ubidi_getLength(const UBiDi
*pBiDi
) {
2098 if(IS_VALID_PARA_OR_LINE(pBiDi
)) {
2099 return pBiDi
->originalLength
;
2105 U_CAPI
int32_t U_EXPORT2
2106 ubidi_getProcessedLength(const UBiDi
*pBiDi
) {
2107 if(IS_VALID_PARA_OR_LINE(pBiDi
)) {
2108 return pBiDi
->length
;
2114 U_CAPI
int32_t U_EXPORT2
2115 ubidi_getResultLength(const UBiDi
*pBiDi
) {
2116 if(IS_VALID_PARA_OR_LINE(pBiDi
)) {
2117 return pBiDi
->resultLength
;
2123 /* paragraphs API functions ------------------------------------------------- */
2125 U_CAPI UBiDiLevel U_EXPORT2
2126 ubidi_getParaLevel(const UBiDi
*pBiDi
) {
2127 if(IS_VALID_PARA_OR_LINE(pBiDi
)) {
2128 return pBiDi
->paraLevel
;
2134 U_CAPI
int32_t U_EXPORT2
2135 ubidi_countParagraphs(UBiDi
*pBiDi
) {
2136 if(!IS_VALID_PARA_OR_LINE(pBiDi
)) {
2139 return pBiDi
->paraCount
;
2143 U_CAPI
void U_EXPORT2
2144 ubidi_getParagraphByIndex(const UBiDi
*pBiDi
, int32_t paraIndex
,
2145 int32_t *pParaStart
, int32_t *pParaLimit
,
2146 UBiDiLevel
*pParaLevel
, UErrorCode
*pErrorCode
) {
2149 /* check the argument values */
2150 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
);
2151 RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi
, *pErrorCode
);
2152 RETURN_VOID_IF_BAD_RANGE(paraIndex
, 0, pBiDi
->paraCount
, *pErrorCode
);
2154 pBiDi
=pBiDi
->pParaBiDi
; /* get Para object if Line object */
2156 paraStart
=pBiDi
->paras
[paraIndex
-1];
2160 if(pParaStart
!=NULL
) {
2161 *pParaStart
=paraStart
;
2163 if(pParaLimit
!=NULL
) {
2164 *pParaLimit
=pBiDi
->paras
[paraIndex
];
2166 if(pParaLevel
!=NULL
) {
2167 *pParaLevel
=GET_PARALEVEL(pBiDi
, paraStart
);
2171 U_CAPI
int32_t U_EXPORT2
2172 ubidi_getParagraph(const UBiDi
*pBiDi
, int32_t charIndex
,
2173 int32_t *pParaStart
, int32_t *pParaLimit
,
2174 UBiDiLevel
*pParaLevel
, UErrorCode
*pErrorCode
) {
2177 /* check the argument values */
2178 /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */
2179 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
, -1);
2180 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi
, *pErrorCode
, -1);
2181 pBiDi
=pBiDi
->pParaBiDi
; /* get Para object if Line object */
2182 RETURN_IF_BAD_RANGE(charIndex
, 0, pBiDi
->length
, *pErrorCode
, -1);
2184 for(paraIndex
=0; charIndex
>=pBiDi
->paras
[paraIndex
]; paraIndex
++);
2185 ubidi_getParagraphByIndex(pBiDi
, paraIndex
, pParaStart
, pParaLimit
, pParaLevel
, pErrorCode
);
2189 U_CAPI
void U_EXPORT2
2190 ubidi_setClassCallback(UBiDi
*pBiDi
, UBiDiClassCallback
*newFn
,
2191 const void *newContext
, UBiDiClassCallback
**oldFn
,
2192 const void **oldContext
, UErrorCode
*pErrorCode
)
2194 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
);
2196 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2201 *oldFn
= pBiDi
->fnClassCallback
;
2205 *oldContext
= pBiDi
->coClassCallback
;
2207 pBiDi
->fnClassCallback
= newFn
;
2208 pBiDi
->coClassCallback
= newContext
;
2211 U_CAPI
void U_EXPORT2
2212 ubidi_getClassCallback(UBiDi
*pBiDi
, UBiDiClassCallback
**fn
, const void **context
)
2219 *fn
= pBiDi
->fnClassCallback
;
2223 *context
= pBiDi
->coClassCallback
;
2227 U_CAPI UCharDirection U_EXPORT2
2228 ubidi_getCustomizedClass(UBiDi
*pBiDi
, UChar32 c
)
2232 if( pBiDi
->fnClassCallback
== NULL
||
2233 (dir
= (*pBiDi
->fnClassCallback
)(pBiDi
->coClassCallback
, c
)) == U_BIDI_CLASS_DEFAULT
)
2235 return ubidi_getClass(pBiDi
->bdp
, c
);