2 ******************************************************************************
4 * Copyright (C) 1999-2006, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 ******************************************************************************
10 * tab size: 8 (not used)
13 * created on: 1999jul27
14 * created by: Markus W. Scherer
17 /* set import/export definitions */
18 #ifndef U_COMMON_IMPLEMENTATION
19 # define U_COMMON_IMPLEMENTATION
23 #include "unicode/utypes.h"
24 #include "unicode/ustring.h"
25 #include "unicode/uchar.h"
26 #include "unicode/ubidi.h"
27 #include "ubidi_props.h"
31 * General implementation notes:
33 * Throughout the implementation, there are comments like (W2) that refer to
34 * rules of the BiDi algorithm in its version 5, in this example to the second
35 * rule of the resolution of weak types.
37 * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
38 * character according to UTF-16, the second UChar gets the directional property of
39 * the entire character assigned, while the first one gets a BN, a boundary
40 * neutral, type, which is ignored by most of the algorithm according to
41 * rule (X9) and the implementation suggestions of the BiDi algorithm.
43 * Later, adjustWSLevels() will set the level for each BN to that of the
44 * following character (UChar), which results in surrogate pairs getting the
45 * same level on each of their surrogates.
47 * In a UTF-8 implementation, the same thing could be done: the last byte of
48 * a multi-byte sequence would get the "real" property, while all previous
49 * bytes of that sequence would get BN.
51 * It is not possible to assign all those parts of a character the same real
52 * property because this would fail in the resolution of weak types with rules
53 * that look at immediately surrounding types.
55 * As a related topic, this implementation does not remove Boundary Neutral
56 * types from the input, but ignores them wherever this is relevant.
57 * For example, the loop for the resolution of the weak types reads
58 * types until it finds a non-BN.
59 * Also, explicit embedding codes are neither changed into BN nor removed.
60 * They are only treated the same way real BNs are.
61 * As stated before, adjustWSLevels() takes care of them at the end.
62 * For the purpose of conformance, the levels of all these codes
65 * Note that this implementation never modifies the dirProps
66 * after the initial setup.
69 * In this implementation, the resolution of weak types (Wn),
70 * neutrals (Nn), and the assignment of the resolved level (In)
71 * are all done in one single loop, in resolveImplicitLevels().
72 * Changes of dirProp values are done on the fly, without writing
73 * them back to the dirProps array.
76 * This implementation contains code that allows to bypass steps of the
77 * algorithm that are not needed on the specific paragraph
78 * in order to speed up the most common cases considerably,
79 * like text that is entirely LTR, or RTL text without numbers.
81 * Most of this is done by setting a bit for each directional property
82 * in a flags variable and later checking for whether there are
83 * any LTR characters or any RTL characters, or both, whether
84 * there are any explicit embedding codes, etc.
86 * If the (Xn) steps are performed, then the flags are re-evaluated,
87 * because they will then not contain the embedding codes any more
88 * and will be adjusted for override codes, so that subsequently
89 * more bypassing may be possible than what the initial flags suggested.
91 * If the text is not mixed-directional, then the
92 * algorithm steps for the weak type resolution are not performed,
93 * and all levels are set to the paragraph level.
95 * If there are no explicit embedding codes, then the (Xn) steps
98 * If embedding levels are supplied as a parameter, then all
99 * explicit embedding codes are ignored, and the (Xn) steps
102 * White Space types could get the level of the run they belong to,
103 * and are checked with a test of (flags&MASK_EMBEDDING) to
104 * consider if the paragraph direction should be considered in
105 * the flags variable.
107 * If there are no White Space types in the paragraph, then
108 * (L1) is not necessary in adjustWSLevels().
111 /* to avoid some conditional statements, use tiny constant arrays */
112 static const Flags flagLR
[2]={ DIRPROP_FLAG(L
), DIRPROP_FLAG(R
) };
113 static const Flags flagE
[2]={ DIRPROP_FLAG(LRE
), DIRPROP_FLAG(RLE
) };
114 static const Flags flagO
[2]={ DIRPROP_FLAG(LRO
), DIRPROP_FLAG(RLO
) };
116 #define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
117 #define DIRPROP_FLAG_E(level) flagE[(level)&1]
118 #define DIRPROP_FLAG_O(level) flagO[(level)&1]
120 /* UBiDi object management -------------------------------------------------- */
122 U_CAPI UBiDi
* U_EXPORT2
125 UErrorCode errorCode
=U_ZERO_ERROR
;
126 return ubidi_openSized(0, 0, &errorCode
);
129 U_CAPI UBiDi
* U_EXPORT2
130 ubidi_openSized(int32_t maxLength
, int32_t maxRunCount
, UErrorCode
*pErrorCode
) {
133 /* check the argument values */
134 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
136 } else if(maxLength
<0 || maxRunCount
<0) {
137 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
138 return NULL
; /* invalid arguments */
141 /* allocate memory for the object */
142 pBiDi
=(UBiDi
*)uprv_malloc(sizeof(UBiDi
));
144 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
148 /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */
149 uprv_memset(pBiDi
, 0, sizeof(UBiDi
));
151 /* get BiDi properties */
152 pBiDi
->bdp
=ubidi_getSingleton(pErrorCode
);
153 if(U_FAILURE(*pErrorCode
)) {
158 /* allocate memory for arrays as requested */
160 if( !getInitialDirPropsMemory(pBiDi
, maxLength
) ||
161 !getInitialLevelsMemory(pBiDi
, maxLength
)
163 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
166 pBiDi
->mayAllocateText
=TRUE
;
171 /* use simpleRuns[] */
172 pBiDi
->runsSize
=sizeof(Run
);
173 } else if(!getInitialRunsMemory(pBiDi
, maxRunCount
)) {
174 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
177 pBiDi
->mayAllocateRuns
=TRUE
;
180 if(U_SUCCESS(*pErrorCode
)) {
189 * We are allowed to allocate memory if memory==NULL or
190 * mayAllocate==TRUE for each array that we need.
191 * We also try to grow and shrink memory as needed if we
194 * Assume sizeNeeded>0.
195 * If *pMemory!=NULL, then assume *pSize>0.
197 * ### this realloc() may unnecessarily copy the old data,
198 * which we know we don't need any more;
199 * is this the best way to do this??
202 ubidi_getMemory(void **pMemory
, int32_t *pSize
, UBool mayAllocate
, int32_t sizeNeeded
) {
203 /* check for existing memory */
205 /* we need to allocate memory */
206 if(mayAllocate
&& (*pMemory
=uprv_malloc(sizeNeeded
))!=NULL
) {
213 /* there is some memory, is it enough or too much? */
214 if(sizeNeeded
>*pSize
&& !mayAllocate
) {
215 /* not enough memory, and we must not allocate */
217 } else if(sizeNeeded
!=*pSize
&& mayAllocate
) {
218 /* FOOD FOR THOUGHT: in hope to improve performance, we should
219 * try never shrinking memory, only growing it when required.
221 /* we may try to grow or shrink */
224 if((memory
=uprv_realloc(*pMemory
, sizeNeeded
))!=NULL
) {
229 /* we failed to grow */
233 /* we have at least enough memory and must not allocate */
239 U_CAPI
void U_EXPORT2
240 ubidi_close(UBiDi
*pBiDi
) {
242 pBiDi
->pParaBiDi
=NULL
; /* in case one tries to reuse this block */
243 if(pBiDi
->dirPropsMemory
!=NULL
) {
244 uprv_free(pBiDi
->dirPropsMemory
);
246 if(pBiDi
->levelsMemory
!=NULL
) {
247 uprv_free(pBiDi
->levelsMemory
);
249 if(pBiDi
->runsMemory
!=NULL
) {
250 uprv_free(pBiDi
->runsMemory
);
252 if(pBiDi
->parasMemory
!=NULL
) {
253 uprv_free(pBiDi
->parasMemory
);
255 if(pBiDi
->insertPoints
.points
!=NULL
) {
256 uprv_free(pBiDi
->insertPoints
.points
);
263 /* set to approximate "inverse BiDi" ---------------------------------------- */
265 U_CAPI
void U_EXPORT2
266 ubidi_setInverse(UBiDi
*pBiDi
, UBool isInverse
) {
268 pBiDi
->isInverse
=isInverse
;
269 pBiDi
->reorderingMode
= isInverse
? UBIDI_REORDER_INVERSE_NUMBERS_AS_L
270 : UBIDI_REORDER_DEFAULT
;
274 U_CAPI UBool U_EXPORT2
275 ubidi_isInverse(UBiDi
*pBiDi
) {
277 return pBiDi
->isInverse
;
283 /* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
284 * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre
285 * concept of RUNS_ONLY which is a double operation.
286 * It could be advantageous to divide this into 3 concepts:
287 * a) Operation: direct / inverse / RUNS_ONLY
288 * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_L
289 * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
290 * This would allow combinations not possible today like RUNS_ONLY with
292 * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
293 * REMOVE_CONTROLS for the inverse step.
294 * Not all combinations would be supported, and probably not all do make sense.
295 * This would need to document which ones are supported and what are the
296 * fallbacks for unsupported combinations.
298 U_CAPI
void U_EXPORT2
299 ubidi_setReorderingMode(UBiDi
*pBiDi
, UBiDiReorderingMode reorderingMode
) {
300 if ((pBiDi
!= NULL
) && (reorderingMode
>= UBIDI_REORDER_DEFAULT
)
301 && (reorderingMode
< UBIDI_REORDER_COUNT
)) {
302 pBiDi
->reorderingMode
= reorderingMode
;
303 pBiDi
->isInverse
= reorderingMode
== UBIDI_REORDER_INVERSE_NUMBERS_AS_L
;
307 U_CAPI UBiDiReorderingMode U_EXPORT2
308 ubidi_getReorderingMode(UBiDi
*pBiDi
) {
310 return pBiDi
->reorderingMode
;
312 return UBIDI_REORDER_DEFAULT
;
316 U_CAPI
void U_EXPORT2
317 ubidi_setReorderingOptions(UBiDi
*pBiDi
, uint32_t reorderingOptions
) {
318 if (reorderingOptions
& UBIDI_OPTION_REMOVE_CONTROLS
) {
319 reorderingOptions
&=~UBIDI_OPTION_INSERT_MARKS
;
322 pBiDi
->reorderingOptions
= reorderingOptions
;
326 U_CAPI
uint32_t U_EXPORT2
327 ubidi_getReorderingOptions(UBiDi
*pBiDi
) {
329 return pBiDi
->reorderingOptions
;
335 /* perform (P2)..(P3) ------------------------------------------------------- */
338 * Get the directional properties for the text,
339 * calculate the flags bit-set, and
340 * determine the paragraph level if necessary.
343 getDirProps(UBiDi
*pBiDi
) {
344 const UChar
*text
=pBiDi
->text
;
345 DirProp
*dirProps
=pBiDi
->dirPropsMemory
; /* pBiDi->dirProps is const */
347 int32_t i
=0, i0
, i1
, length
=pBiDi
->originalLength
;
348 Flags flags
=0; /* collect all directionalities in the text */
350 DirProp dirProp
=0, paraDirDefault
=0;/* initialize to avoid compiler warnings */
351 UBool isDefaultLevel
=IS_DEFAULT_LEVEL(pBiDi
->paraLevel
);
352 /* for inverse BiDi, the default para level is set to RTL if there is a
353 strong character at either end of the text */
354 UBool isDefaultLevelInverse
=isDefaultLevel
&&
355 (pBiDi
->reorderingMode
==UBIDI_REORDER_INVERSE_LIKE_DIRECT
||
356 pBiDi
->reorderingMode
==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
);
357 int32_t lastArabicPos
=-1;
358 int32_t controlCount
=0;
359 UBool removeBiDiControls
= pBiDi
->reorderingOptions
&
360 UBIDI_OPTION_REMOVE_CONTROLS
;
363 NOT_CONTEXTUAL
, /* 0: not contextual paraLevel */
364 LOOKING_FOR_STRONG
, /* 1: looking for first strong char */
365 FOUND_STRONG_CHAR
/* 2: found first strong char */
368 int32_t paraStart
=0; /* index of first char in paragraph */
369 DirProp paraDir
; /* == CONTEXT_RTL within paragraphs
370 starting with strong R char */
371 DirProp lastStrongDir
=0; /* for default level & inverse BiDi */
372 int32_t lastStrongLTR
=0; /* for STREAMING option */
374 if(pBiDi
->reorderingOptions
& UBIDI_OPTION_STREAMING
) {
379 paraDirDefault
=pBiDi
->paraLevel
&1 ? CONTEXT_RTL
: 0;
380 paraDir
=paraDirDefault
;
381 lastStrongDir
=paraDirDefault
;
382 state
=LOOKING_FOR_STRONG
;
384 state
=NOT_CONTEXTUAL
;
387 /* count paragraphs and determine the paragraph level (P2..P3) */
389 * see comment in ubidi.h:
390 * the DEFAULT_XXX values are designed so that
391 * their bit 0 alone yields the intended default
393 for( /* i=0 above */ ; i
<length
; ) {
394 /* i is incremented by UTF_NEXT_CHAR */
395 i0
=i
; /* index of first code unit */
396 UTF_NEXT_CHAR(text
, i
, length
, uchar
);
397 i1
=i
-1; /* index of last code unit, gets the directional property */
398 flags
|=DIRPROP_FLAG(dirProp
=ubidi_getCustomizedClass(pBiDi
, uchar
));
399 dirProps
[i1
]=dirProp
|paraDir
;
400 if(i1
>i0
) { /* set previous code units' properties to BN */
401 flags
|=DIRPROP_FLAG(BN
);
403 dirProps
[--i1
]=BN
|paraDir
;
406 if(state
==LOOKING_FOR_STRONG
) {
408 state
=FOUND_STRONG_CHAR
;
411 for(i1
=paraStart
; i1
<i
; i1
++) {
412 dirProps
[i1
]&=~CONTEXT_RTL
;
417 if(dirProp
==R
|| dirProp
==AL
) {
418 state
=FOUND_STRONG_CHAR
;
421 for(i1
=paraStart
; i1
<i
; i1
++) {
422 dirProps
[i1
]|=CONTEXT_RTL
;
430 lastStrongLTR
=i
; /* i is index to next character */
432 else if(dirProp
==R
) {
433 lastStrongDir
=CONTEXT_RTL
;
435 else if(dirProp
==AL
) {
436 lastStrongDir
=CONTEXT_RTL
;
439 else if(dirProp
==B
) {
440 if(pBiDi
->reorderingOptions
& UBIDI_OPTION_STREAMING
) {
441 pBiDi
->length
=i
; /* i is index to next character */
443 if(isDefaultLevelInverse
&& (lastStrongDir
==CONTEXT_RTL
) &&(paraDir
!=lastStrongDir
)) {
444 for( ; paraStart
<i
; paraStart
++) {
445 dirProps
[paraStart
]|=CONTEXT_RTL
;
448 if(i
<length
) { /* B not last char in text */
449 if(!((uchar
==CR
) && (text
[i
]==LF
))) {
453 state
=LOOKING_FOR_STRONG
;
454 paraStart
=i
; /* i is index to next character */
455 paraDir
=paraDirDefault
;
456 lastStrongDir
=paraDirDefault
;
460 if(removeBiDiControls
&& IS_BIDI_CONTROL_CHAR(uchar
)) {
464 if(isDefaultLevelInverse
&& (lastStrongDir
==CONTEXT_RTL
) &&(paraDir
!=lastStrongDir
)) {
465 for(i1
=paraStart
; i1
<length
; i1
++) {
466 dirProps
[i1
]|=CONTEXT_RTL
;
470 pBiDi
->paraLevel
=GET_PARALEVEL(pBiDi
, 0);
472 if(pBiDi
->reorderingOptions
& UBIDI_OPTION_STREAMING
) {
473 if((lastStrongLTR
>pBiDi
->length
) &&
474 (GET_PARALEVEL(pBiDi
, lastStrongLTR
)==0)) {
475 pBiDi
->length
= lastStrongLTR
;
477 if(pBiDi
->length
<pBiDi
->originalLength
) {
481 /* The following line does nothing new for contextual paraLevel, but is
482 needed for absolute paraLevel. */
483 flags
|=DIRPROP_FLAG_LR(pBiDi
->paraLevel
);
485 if(pBiDi
->orderParagraphsLTR
&& (flags
&DIRPROP_FLAG(B
))) {
486 flags
|=DIRPROP_FLAG(L
);
489 pBiDi
->controlCount
= controlCount
;
491 pBiDi
->lastArabicPos
=lastArabicPos
;
494 /* perform (X1)..(X9) ------------------------------------------------------- */
496 /* determine if the text is mixed-directional or single-directional */
497 static UBiDiDirection
498 directionFromFlags(UBiDi
*pBiDi
) {
499 Flags flags
=pBiDi
->flags
;
500 /* if the text contains AN and neutrals, then some neutrals may become RTL */
501 if(!(flags
&MASK_RTL
|| ((flags
&DIRPROP_FLAG(AN
)) && (flags
&MASK_POSSIBLE_N
)))) {
503 } else if(!(flags
&MASK_LTR
)) {
511 * Resolve the explicit levels as specified by explicit embedding codes.
512 * Recalculate the flags to have them reflect the real properties
513 * after taking the explicit embeddings into account.
515 * The BiDi algorithm is designed to result in the same behavior whether embedding
516 * levels are externally specified (from "styled text", supposedly the preferred
517 * method) or set by explicit embedding codes (LRx, RLx, PDF) in the plain text.
518 * That is why (X9) instructs to remove all explicit codes (and BN).
519 * However, in a real implementation, this removal of these codes and their index
520 * positions in the plain text is undesirable since it would result in
521 * reallocated, reindexed text.
522 * Instead, this implementation leaves the codes in there and just ignores them
523 * in the subsequent processing.
524 * In order to get the same reordering behavior, positions with a BN or an
525 * explicit embedding code just get the same level assigned as the last "real"
528 * Some implementations, not this one, then overwrite some of these
529 * directionality properties at "real" same-level-run boundaries by
530 * L or R codes so that the resolution of weak types can be performed on the
531 * entire paragraph at once instead of having to parse it once more and
532 * perform that resolution on same-level-runs.
533 * This limits the scope of the implicit rules in effectively
534 * the same way as the run limits.
536 * Instead, this implementation does not modify these codes.
537 * On one hand, the paragraph has to be scanned for same-level-runs, but
538 * on the other hand, this saves another loop to reset these codes,
539 * or saves making and modifying a copy of dirProps[].
542 * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
545 * Handling the stack of explicit levels (Xn):
547 * With the BiDi stack of explicit levels,
548 * as pushed with each LRE, RLE, LRO, and RLO and popped with each PDF,
549 * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL==61.
551 * In order to have a correct push-pop semantics even in the case of overflows,
552 * there are two overflow counters:
553 * - countOver60 is incremented with each LRx at level 60
554 * - from level 60, one RLx increases the level to 61
555 * - countOver61 is incremented with each LRx and RLx at level 61
557 * Popping levels with PDF must work in the opposite order so that level 61
558 * is correct at the correct point. Underflows (too many PDFs) must be checked.
560 * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
562 static UBiDiDirection
563 resolveExplicitLevels(UBiDi
*pBiDi
) {
564 const DirProp
*dirProps
=pBiDi
->dirProps
;
565 UBiDiLevel
*levels
=pBiDi
->levels
;
566 const UChar
*text
=pBiDi
->text
;
568 int32_t i
=0, length
=pBiDi
->length
;
569 Flags flags
=pBiDi
->flags
; /* collect all directionalities in the text */
571 UBiDiLevel level
=GET_PARALEVEL(pBiDi
, 0);
573 UBiDiDirection direction
;
576 /* determine if the text is mixed-directional or single-directional */
577 direction
=directionFromFlags(pBiDi
);
579 /* we may not need to resolve any explicit levels, but for multiple
580 paragraphs we want to loop on all chars to set the para boundaries */
581 if((direction
!=UBIDI_MIXED
) && (pBiDi
->paraCount
==1)) {
582 /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
583 } else if((pBiDi
->paraCount
==1) &&
584 (!(flags
&MASK_EXPLICIT
) ||
585 (pBiDi
->reorderingMode
> UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL
))) {
586 /* mixed, but all characters are at the same embedding level */
587 /* or we are in "inverse BiDi" */
588 /* and we don't have contextual multiple paragraphs with some B char */
589 /* set all levels to the paragraph level */
590 for(i
=0; i
<length
; ++i
) {
594 /* continue to perform (Xn) */
596 /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
597 /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */
598 UBiDiLevel embeddingLevel
=level
, newLevel
, stackTop
=0;
600 UBiDiLevel stack
[UBIDI_MAX_EXPLICIT_LEVEL
]; /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL */
601 uint32_t countOver60
=0, countOver61
=0; /* count overflows of explicit levels */
603 /* recalculate the flags */
606 for(i
=0; i
<length
; ++i
) {
607 dirProp
=NO_CONTEXT_RTL(dirProps
[i
]);
612 newLevel
=(UBiDiLevel
)((embeddingLevel
+2)&~(UBIDI_LEVEL_OVERRIDE
|1)); /* least greater even level */
613 if(newLevel
<=UBIDI_MAX_EXPLICIT_LEVEL
) {
614 stack
[stackTop
]=embeddingLevel
;
616 embeddingLevel
=newLevel
;
618 embeddingLevel
|=UBIDI_LEVEL_OVERRIDE
;
620 /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE
621 since this has already been done for newLevel which is
622 the source for embeddingLevel.
624 } else if((embeddingLevel
&~UBIDI_LEVEL_OVERRIDE
)==UBIDI_MAX_EXPLICIT_LEVEL
) {
626 } else /* (embeddingLevel&~UBIDI_LEVEL_OVERRIDE)==UBIDI_MAX_EXPLICIT_LEVEL-1 */ {
629 flags
|=DIRPROP_FLAG(BN
);
634 newLevel
=(UBiDiLevel
)(((embeddingLevel
&~UBIDI_LEVEL_OVERRIDE
)+1)|1); /* least greater odd level */
635 if(newLevel
<=UBIDI_MAX_EXPLICIT_LEVEL
) {
636 stack
[stackTop
]=embeddingLevel
;
638 embeddingLevel
=newLevel
;
640 embeddingLevel
|=UBIDI_LEVEL_OVERRIDE
;
642 /* we don't need to set UBIDI_LEVEL_OVERRIDE off for RLE
643 since this has already been done for newLevel which is
644 the source for embeddingLevel.
649 flags
|=DIRPROP_FLAG(BN
);
653 /* handle all the overflow cases first */
656 } else if(countOver60
>0 && (embeddingLevel
&~UBIDI_LEVEL_OVERRIDE
)!=UBIDI_MAX_EXPLICIT_LEVEL
) {
657 /* handle LRx overflows from level 60 */
659 } else if(stackTop
>0) {
660 /* this is the pop operation; it also pops level 61 while countOver60>0 */
662 embeddingLevel
=stack
[stackTop
];
663 /* } else { (underflow) */
665 flags
|=DIRPROP_FLAG(BN
);
669 countOver60
=countOver61
=0;
670 level
=GET_PARALEVEL(pBiDi
, i
);
672 embeddingLevel
=GET_PARALEVEL(pBiDi
, i
+1);
673 if(!((text
[i
]==CR
) && (text
[i
+1]==LF
))) {
674 pBiDi
->paras
[paraIndex
++]=i
+1;
677 flags
|=DIRPROP_FLAG(B
);
680 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
681 /* they will get their levels set correctly in adjustWSLevels() */
682 flags
|=DIRPROP_FLAG(BN
);
685 /* all other types get the "real" level */
686 if(level
!=embeddingLevel
) {
687 level
=embeddingLevel
;
688 if(level
&UBIDI_LEVEL_OVERRIDE
) {
689 flags
|=DIRPROP_FLAG_O(level
)|DIRPROP_FLAG_MULTI_RUNS
;
691 flags
|=DIRPROP_FLAG_E(level
)|DIRPROP_FLAG_MULTI_RUNS
;
694 if(!(level
&UBIDI_LEVEL_OVERRIDE
)) {
695 flags
|=DIRPROP_FLAG(dirProp
);
701 * We need to set reasonable levels even on BN codes and
702 * explicit codes because we will later look at same-level runs (X10).
706 if(flags
&MASK_EMBEDDING
) {
707 flags
|=DIRPROP_FLAG_LR(pBiDi
->paraLevel
);
709 if(pBiDi
->orderParagraphsLTR
&& (flags
&DIRPROP_FLAG(B
))) {
710 flags
|=DIRPROP_FLAG(L
);
713 /* subsequently, ignore the explicit codes and BN (X9) */
715 /* again, determine if the text is mixed-directional or single-directional */
717 direction
=directionFromFlags(pBiDi
);
724 * Use a pre-specified embedding levels array:
726 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
727 * ignore all explicit codes (X9),
728 * and check all the preset levels.
730 * Recalculate the flags to have them reflect the real properties
731 * after taking the explicit embeddings into account.
733 static UBiDiDirection
734 checkExplicitLevels(UBiDi
*pBiDi
, UErrorCode
*pErrorCode
) {
735 const DirProp
*dirProps
=pBiDi
->dirProps
;
737 UBiDiLevel
*levels
=pBiDi
->levels
;
738 const UChar
*text
=pBiDi
->text
;
740 int32_t i
, length
=pBiDi
->length
;
741 Flags flags
=0; /* collect all directionalities in the text */
743 uint32_t paraIndex
=0;
745 for(i
=0; i
<length
; ++i
) {
747 dirProp
=NO_CONTEXT_RTL(dirProps
[i
]);
748 if(level
&UBIDI_LEVEL_OVERRIDE
) {
749 /* keep the override flag in levels[i] but adjust the flags */
750 level
&=~UBIDI_LEVEL_OVERRIDE
; /* make the range check below simpler */
751 flags
|=DIRPROP_FLAG_O(level
);
754 flags
|=DIRPROP_FLAG_E(level
)|DIRPROP_FLAG(dirProp
);
756 if((level
<GET_PARALEVEL(pBiDi
, i
) &&
757 !((0==level
)&&(dirProp
==B
))) ||
758 (UBIDI_MAX_EXPLICIT_LEVEL
<level
)) {
759 /* level out of bounds */
760 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
763 if((dirProp
==B
) && ((i
+1)<length
)) {
764 if(!((text
[i
]==CR
) && (text
[i
+1]==LF
))) {
765 pBiDi
->paras
[paraIndex
++]=i
+1;
769 if(flags
&MASK_EMBEDDING
) {
770 flags
|=DIRPROP_FLAG_LR(pBiDi
->paraLevel
);
773 /* determine if the text is mixed-directional or single-directional */
775 return directionFromFlags(pBiDi
);
778 /*********************************************************************/
779 /* The Properties state machine table */
780 /*********************************************************************/
782 /* All table cells are 8 bits: */
783 /* bits 0..4: next state */
784 /* bits 5..7: action to perform (if > 0) */
786 /* Cells may be of format "n" where n represents the next state */
787 /* (except for the rightmost column). */
788 /* Cells may also be of format "_(x,y)" where x represents an action */
789 /* to perform and y represents the next state. */
791 /*********************************************************************/
792 /* Definitions and type for properties state table */
793 /*********************************************************************/
794 #define IMPTABPROPS_COLUMNS 14
795 #define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1)
796 #define GET_STATEPROPS(cell) ((cell)&0x1f)
797 #define GET_ACTIONPROPS(cell) ((cell)>>5)
798 #define _(action, newState) ((uint8_t)(newState+(action<<5)))
800 static const uint8_t groupProp
[] = /* dirProp regrouped */
802 /* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN */
803 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10
805 enum { _L
=0, _R
=1, _EN
=2, _AN
=3, _ON
=4, _S
=5, _B
=6 }; /* reduced dirProp */
807 /*********************************************************************/
809 /* PROPERTIES STATE TABLE */
811 /* In table impTabProps, */
812 /* - the ON column regroups ON and WS */
813 /* - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF */
814 /* - the Res column is the reduced property assigned to a run */
816 /* Action 1: process current run1, init new run1 */
817 /* 2: init new run2 */
818 /* 3: process run1, process run2, init new run1 */
819 /* 4: process run1, set run1=run2, init new run2 */
822 /* 1) This table is used in resolveImplicitLevels(). */
823 /* 2) This table triggers actions when there is a change in the Bidi*/
824 /* property of incoming characters (action 1). */
825 /* 3) Most such property sequences are processed immediately (in */
826 /* fact, passed to processPropertySeq(). */
827 /* 4) However, numbers are assembled as one sequence. This means */
828 /* that undefined situations (like CS following digits, until */
829 /* it is known if the next char will be a digit) are held until */
830 /* following chars define them. */
831 /* Example: digits followed by CS, then comes another CS or ON; */
832 /* the digits will be processed, then the CS assigned */
833 /* as the start of an ON sequence (action 3). */
834 /* 5) There are cases where more than one sequence must be */
835 /* processed, for instance digits followed by CS followed by L: */
836 /* the digits must be processed as one sequence, and the CS */
837 /* must be processed as an ON sequence, all this before starting */
838 /* assembling chars for the opening L sequence. */
841 static const uint8_t impTabProps
[][IMPTABPROPS_COLUMNS
] =
843 /* L , R , EN , AN , ON , S , B , ES , ET , CS , BN , NSM , AL , Res */
844 /* 0 Init */ { 1 , 2 , 4 , 5 , 7 , 15 , 17 , 7 , 9 , 7 , 0 , 7 , 3 , _ON
},
845 /* 1 L */ { 1 , _(1,2), _(1,4), _(1,5), _(1,7),_(1,15),_(1,17), _(1,7), _(1,9), _(1,7), 1 , 1 , _(1,3), _L
},
846 /* 2 R */ { _(1,1), 2 , _(1,4), _(1,5), _(1,7),_(1,15),_(1,17), _(1,7), _(1,9), _(1,7), 2 , 2 , _(1,3), _R
},
847 /* 3 AL */ { _(1,1), _(1,2), _(1,6), _(1,6), _(1,8),_(1,16),_(1,17), _(1,8), _(1,8), _(1,8), 3 , 3 , 3 , _R
},
848 /* 4 EN */ { _(1,1), _(1,2), 4 , _(1,5), _(1,7),_(1,15),_(1,17),_(2,10), 11 ,_(2,10), 4 , 4 , _(1,3), _EN
},
849 /* 5 AN */ { _(1,1), _(1,2), _(1,4), 5 , _(1,7),_(1,15),_(1,17), _(1,7), _(1,9),_(2,12), 5 , 5 , _(1,3), _AN
},
850 /* 6 AL:EN/AN */ { _(1,1), _(1,2), 6 , 6 , _(1,8),_(1,16),_(1,17), _(1,8), _(1,8),_(2,13), 6 , 6 , _(1,3), _AN
},
851 /* 7 ON */ { _(1,1), _(1,2), _(1,4), _(1,5), 7 ,_(1,15),_(1,17), 7 ,_(2,14), 7 , 7 , 7 , _(1,3), _ON
},
852 /* 8 AL:ON */ { _(1,1), _(1,2), _(1,6), _(1,6), 8 ,_(1,16),_(1,17), 8 , 8 , 8 , 8 , 8 , _(1,3), _ON
},
853 /* 9 ET */ { _(1,1), _(1,2), 4 , _(1,5), 7 ,_(1,15),_(1,17), 7 , 9 , 7 , 9 , 9 , _(1,3), _ON
},
854 /*10 EN+ES/CS */ { _(3,1), _(3,2), 4 , _(3,5), _(4,7),_(3,15),_(3,17), _(4,7),_(4,14), _(4,7), 10 , _(4,7), _(3,3), _EN
},
855 /*11 EN+ET */ { _(1,1), _(1,2), 4 , _(1,5), _(1,7),_(1,15),_(1,17), _(1,7), 11 , _(1,7), 11 , 11 , _(1,3), _EN
},
856 /*12 AN+CS */ { _(3,1), _(3,2), _(3,4), 5 , _(4,7),_(3,15),_(3,17), _(4,7),_(4,14), _(4,7), 12 , _(4,7), _(3,3), _AN
},
857 /*13 AL:EN/AN+CS */ { _(3,1), _(3,2), 6 , 6 , _(4,8),_(3,16),_(3,17), _(4,8), _(4,8), _(4,8), 13 , _(4,8), _(3,3), _AN
},
858 /*14 ON+ET */ { _(1,1), _(1,2), _(4,4), _(1,5), 7 ,_(1,15),_(1,17), 7 , 14 , 7 , 14 , 14 , _(1,3), _ON
},
859 /*15 S */ { _(1,1), _(1,2), _(1,4), _(1,5), _(1,7), 15 ,_(1,17), _(1,7), _(1,9), _(1,7), 15 , _(1,7), _(1,3), _S
},
860 /*16 AL:S */ { _(1,1), _(1,2), _(1,6), _(1,6), _(1,8), 16 ,_(1,17), _(1,8), _(1,8), _(1,8), 16 , _(1,8), _(1,3), _S
},
861 /*17 B */ { _(1,1), _(1,2), _(1,4), _(1,5), _(1,7),_(1,15), 17 , _(1,7), _(1,9), _(1,7), 17 , _(1,7), _(1,3), _B
}
864 /* we must undef macro _ because the levels table have a different
865 * structure (4 bits for action and 4 bits for next state.
869 /*********************************************************************/
870 /* The levels state machine tables */
871 /*********************************************************************/
873 /* All table cells are 8 bits: */
874 /* bits 0..3: next state */
875 /* bits 4..7: action to perform (if > 0) */
877 /* Cells may be of format "n" where n represents the next state */
878 /* (except for the rightmost column). */
879 /* Cells may also be of format "_(x,y)" where x represents an action */
880 /* to perform and y represents the next state. */
882 /* This format limits each table to 16 states each and to 15 actions.*/
884 /*********************************************************************/
885 /* Definitions and type for levels state tables */
886 /*********************************************************************/
887 #define IMPTABLEVELS_COLUMNS (_B + 2)
888 #define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1)
889 #define GET_STATE(cell) ((cell)&0x0f)
890 #define GET_ACTION(cell) ((cell)>>4)
891 #define _(action, newState) ((uint8_t)(newState+(action<<4)))
893 typedef uint8_t ImpTab
[][IMPTABLEVELS_COLUMNS
];
894 typedef uint8_t ImpAct
[];
896 /* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct,
897 * instead of having a pair of ImpTab and a pair of ImpAct.
899 typedef struct ImpTabPair
{
904 /*********************************************************************/
906 /* LEVELS STATE TABLES */
908 /* In all levels state tables, */
909 /* - state 0 is the initial state */
910 /* - the Res column is the increment to add to the text level */
911 /* for this property sequence. */
913 /* The impAct arrays for each table of a pair map the local action */
914 /* numbers of the table to the total list of actions. For instance, */
915 /* action 2 in a given table corresponds to the action number which */
916 /* appears in entry [2] of the impAct array for that table. */
917 /* The first entry of all impAct arrays must be 0. */
919 /* Action 1: init conditional sequence */
920 /* 2: prepend conditional sequence to current sequence */
921 /* 3: set ON sequence to new level - 1 */
922 /* 4: init EN/AN/ON sequence */
923 /* 5: fix EN/AN/ON sequence followed by R */
924 /* 6: set previous level sequence to level 2 */
927 /* 1) These tables are used in processPropertySeq(). The input */
928 /* is property sequences as determined by resolveImplicitLevels. */
929 /* 2) Most such property sequences are processed immediately */
930 /* (levels are assigned). */
931 /* 3) However, some sequences cannot be assigned a final level till */
932 /* one or more following sequences are received. For instance, */
933 /* ON following an R sequence within an even-level paragraph. */
934 /* If the following sequence is R, the ON sequence will be */
935 /* assigned basic run level+1, and so will the R sequence. */
936 /* 4) S is generally handled like ON, since its level will be fixed */
937 /* to paragraph level in adjustWSLevels(). */
940 static const ImpTab impTabL_DEFAULT
= /* Even paragraph level */
941 /* In this table, conditional sequences receive the higher possible level
942 until proven otherwise.
945 /* L , R , EN , AN , ON , S , B , Res */
946 /* 0 : init */ { 0 , 1 , 0 , 2 , 0 , 0 , 0 , 0 },
947 /* 1 : R */ { 0 , 1 , 3 , 3 , _(1,4), _(1,4), 0 , 1 },
948 /* 2 : AN */ { 0 , 1 , 0 , 2 , _(1,5), _(1,5), 0 , 2 },
949 /* 3 : R+EN/AN */ { 0 , 1 , 3 , 3 , _(1,4), _(1,4), 0 , 2 },
950 /* 4 : R+ON */ { _(2,0), 1 , 3 , 3 , 4 , 4 , _(2,0), 1 },
951 /* 5 : AN+ON */ { _(2,0), 1 , _(2,0), 2 , 5 , 5 , _(2,0), 1 }
953 static const ImpTab impTabR_DEFAULT
= /* Odd paragraph level */
954 /* In this table, conditional sequences receive the lower possible level
955 until proven otherwise.
958 /* L , R , EN , AN , ON , S , B , Res */
959 /* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 },
960 /* 1 : L */ { 1 , 0 , 1 , 3 , _(1,4), _(1,4), 0 , 1 },
961 /* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 },
962 /* 3 : L+AN */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 1 },
963 /* 4 : L+ON */ { _(2,1), 0 , _(2,1), 3 , 4 , 4 , 0 , 0 },
964 /* 5 : L+AN+ON */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 0 }
966 static const ImpAct impAct0
= {0,1,2,3,4,5,6};
967 static const ImpTabPair impTab_DEFAULT
= {{(ImpTab
*)&impTabL_DEFAULT
,
968 (ImpTab
*)&impTabR_DEFAULT
},
969 {(ImpAct
*)&impAct0
, (ImpAct
*)&impAct0
}};
971 static const ImpTab impTabL_NUMBERS_SPECIAL
= /* Even paragraph level */
972 /* In this table, conditional sequences receive the higher possible level
973 until proven otherwise.
976 /* L , R , EN , AN , ON , S , B , Res */
977 /* 0 : init */ { 0 , 2 , 1 , 1 , 0 , 0 , 0 , 0 },
978 /* 1 : L+EN/AN */ { 0 , 2 , 1 , 1 , 0 , 0 , 0 , 2 },
979 /* 2 : R */ { 0 , 2 , 4 , 4 , _(1,3), 0 , 0 , 1 },
980 /* 3 : R+ON */ { _(2,0), 2 , 4 , 4 , 3 , 3 , _(2,0), 1 },
981 /* 4 : R+EN/AN */ { 0 , 2 , 4 , 4 , _(1,3), _(1,3), 0 , 2 }
983 static const ImpTabPair impTab_NUMBERS_SPECIAL
= {{(ImpTab
*)&impTabL_NUMBERS_SPECIAL
,
984 (ImpTab
*)&impTabR_DEFAULT
},
985 {(ImpAct
*)&impAct0
, (ImpAct
*)&impAct0
}};
987 static const ImpTab impTabL_GROUP_NUMBERS_WITH_R
=
988 /* In this table, EN/AN+ON sequences receive levels as if associated with R
989 until proven that there is L or sor/eor on both sides. AN is handled like EN.
992 /* L , R , EN , AN , ON , S , B , Res */
993 /* 0 init */ { 0 , 3 , _(1,1), _(1,1), 0 , 0 , 0 , 0 },
994 /* 1 EN/AN */ { _(2,0), 3 , 1 , 1 , 2 , _(2,0), _(2,0), 2 },
995 /* 2 EN/AN+ON */ { _(2,0), 3 , 1 , 1 , 2 , _(2,0), _(2,0), 1 },
996 /* 3 R */ { 0 , 3 , 5 , 5 , _(1,4), 0 , 0 , 1 },
997 /* 4 R+ON */ { _(2,0), 3 , 5 , 5 , 4 , _(2,0), _(2,0), 1 },
998 /* 5 R+EN/AN */ { 0 , 3 , 5 , 5 , _(1,4), 0 , 0 , 2 }
1000 static const ImpTab impTabR_GROUP_NUMBERS_WITH_R
=
1001 /* In this table, EN/AN+ON sequences receive levels as if associated with R
1002 until proven that there is L on both sides. AN is handled like EN.
1005 /* L , R , EN , AN , ON , S , B , Res */
1006 /* 0 init */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1007 /* 1 EN/AN */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 1 },
1008 /* 2 L */ { 2 , 0 , _(1,4), _(1,4), _(1,3), 0 , 0 , 1 },
1009 /* 3 L+ON */ { _(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 0 },
1010 /* 4 L+EN/AN */ { _(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 1 }
1012 static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R
= {
1013 {(ImpTab
*)&impTabL_GROUP_NUMBERS_WITH_R
,
1014 (ImpTab
*)&impTabR_GROUP_NUMBERS_WITH_R
},
1015 {(ImpAct
*)&impAct0
, (ImpAct
*)&impAct0
}};
1018 static const ImpTab impTabL_INVERSE_NUMBERS_AS_L
=
1019 /* This table is identical to the Default LTR table except that EN and AN are
1023 /* L , R , EN , AN , ON , S , B , Res */
1024 /* 0 : init */ { 0 , 1 , 0 , 0 , 0 , 0 , 0 , 0 },
1025 /* 1 : R */ { 0 , 1 , 0 , 0 , _(1,4), _(1,4), 0 , 1 },
1026 /* 2 : AN */ { 0 , 1 , 0 , 0 , _(1,5), _(1,5), 0 , 2 },
1027 /* 3 : R+EN/AN */ { 0 , 1 , 0 , 0 , _(1,4), _(1,4), 0 , 2 },
1028 /* 4 : R+ON */ { _(2,0), 1 , _(2,0), _(2,0), 4 , 4 , _(2,0), 1 },
1029 /* 5 : AN+ON */ { _(2,0), 1 , _(2,0), _(2,0), 5 , 5 , _(2,0), 1 }
1031 static const ImpTab impTabR_INVERSE_NUMBERS_AS_L
=
1032 /* This table is identical to the Default RTL table except that EN and AN are
1036 /* L , R , EN , AN , ON , S , B , Res */
1037 /* 0 : init */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1038 /* 1 : L */ { 1 , 0 , 1 , 1 , _(1,4), _(1,4), 0 , 1 },
1039 /* 2 : EN/AN */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 1 },
1040 /* 3 : L+AN */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 1 },
1041 /* 4 : L+ON */ { _(2,1), 0 , _(2,1), _(2,1), 4 , 4 , 0 , 0 },
1042 /* 5 : L+AN+ON */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 0 }
1044 static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L
= {
1045 {(ImpTab
*)&impTabL_INVERSE_NUMBERS_AS_L
,
1046 (ImpTab
*)&impTabR_INVERSE_NUMBERS_AS_L
},
1047 {(ImpAct
*)&impAct0
, (ImpAct
*)&impAct0
}};
1049 static const ImpTab impTabR_INVERSE_LIKE_DIRECT
= /* Odd paragraph level */
1050 /* In this table, conditional sequences receive the lower possible level
1051 until proven otherwise.
1054 /* L , R , EN , AN , ON , S , B , Res */
1055 /* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 },
1056 /* 1 : L */ { 1 , 0 , 1 , 2 , _(1,3), _(1,3), 0 , 1 },
1057 /* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 },
1058 /* 3 : L+ON */ { _(2,1), _(3,0), 6 , 4 , 3 , 3 , _(3,0), 0 },
1059 /* 4 : L+ON+AN */ { _(2,1), _(3,0), 6 , 4 , 5 , 5 , _(3,0), 3 },
1060 /* 5 : L+AN+ON */ { _(2,1), _(3,0), 6 , 4 , 5 , 5 , _(3,0), 2 },
1061 /* 6 : L+ON+EN */ { _(2,1), _(3,0), 6 , 4 , 3 , 3 , _(3,0), 1 }
1063 static const ImpAct impAct1
= {0,1,11,12};
1064 /* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc"
1066 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT
= {
1067 {(ImpTab
*)&impTabL_DEFAULT
,
1068 (ImpTab
*)&impTabR_INVERSE_LIKE_DIRECT
},
1069 {(ImpAct
*)&impAct0
, (ImpAct
*)&impAct1
}};
1071 static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS
=
1072 /* The case handled in this table is (visually): R EN L
1075 /* L , R , EN , AN , ON , S , B , Res */
1076 /* 0 : init */ { 0 , _(6,3), 0 , 1 , 0 , 0 , 0 , 0 },
1077 /* 1 : L+AN */ { 0 , _(6,3), 0 , 1 , _(1,2), _(3,0), 0 , 4 },
1078 /* 2 : L+AN+ON */ { _(2,0), _(6,3), _(2,0), 1 , 2 , _(3,0), _(2,0), 3 },
1079 /* 3 : R */ { 0 , _(6,3), _(5,5), _(5,6), _(1,4), _(3,0), 0 , 3 },
1080 /* 4 : R+ON */ { _(3,0), _(4,3), _(5,5), _(5,6), 4 , _(3,0), _(3,0), 3 },
1081 /* 5 : R+EN */ { _(3,0), _(4,3), 5 , _(5,6), _(1,4), _(3,0), _(3,0), 4 },
1082 /* 6 : R+AN */ { _(3,0), _(4,3), _(5,5), 6 , _(1,4), _(3,0), _(3,0), 4 }
1084 static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS
=
1085 /* The cases handled in this table are (visually): R EN L
1089 /* L , R , EN , AN , ON , S , B , Res */
1090 /* 0 : init */ { _(1,3), 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1091 /* 1 : R+EN/AN */ { _(2,3), 0 , 1 , 1 , 2 , _(4,0), 0 , 1 },
1092 /* 2 : R+EN/AN+ON */ { _(2,3), 0 , 1 , 1 , 2 , _(4,0), 0 , 0 },
1093 /* 3 : L */ { 3 , 0 , 3 , _(3,6), _(1,4), _(4,0), 0 , 1 },
1094 /* 4 : L+ON */ { _(5,3), _(4,0), 5 , _(3,6), 4 , _(4,0), _(4,0), 0 },
1095 /* 5 : L+ON+EN */ { _(5,3), _(4,0), 5 , _(3,6), 4 , _(4,0), _(4,0), 1 },
1096 /* 6 : L+AN */ { _(5,3), _(4,0), 6 , 6 , 4 , _(4,0), _(4,0), 3 }
1098 static const ImpAct impAct2
= {0,1,7,8,9,10};
1099 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS
= {
1100 {(ImpTab
*)&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS
,
1101 (ImpTab
*)&impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS
},
1102 {(ImpAct
*)&impAct0
, (ImpAct
*)&impAct2
}};
1104 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL
= {
1105 {(ImpTab
*)&impTabL_NUMBERS_SPECIAL
,
1106 (ImpTab
*)&impTabR_INVERSE_LIKE_DIRECT
},
1107 {(ImpAct
*)&impAct0
, (ImpAct
*)&impAct1
}};
1109 static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS
=
1110 /* The case handled in this table is (visually): R EN L
1113 /* L , R , EN , AN , ON , S , B , Res */
1114 /* 0 : init */ { 0 , _(6,2), 1 , 1 , 0 , 0 , 0 , 0 },
1115 /* 1 : L+EN/AN */ { 0 , _(6,2), 1 , 1 , 0 , _(3,0), 0 , 4 },
1116 /* 2 : R */ { 0 , _(6,2), _(5,4), _(5,4), _(1,3), _(3,0), 0 , 3 },
1117 /* 3 : R+ON */ { _(3,0), _(4,2), _(5,4), _(5,4), 3 , _(3,0), _(3,0), 3 },
1118 /* 4 : R+EN/AN */ { _(3,0), _(4,2), 4 , 4 , _(1,3), _(3,0), _(3,0), 4 }
1120 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS
= {
1121 {(ImpTab
*)&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS
,
1122 (ImpTab
*)&impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS
},
1123 {(ImpAct
*)&impAct0
, (ImpAct
*)&impAct2
}};
1128 ImpTab
* pImpTab
; /* level table pointer */
1129 ImpAct
* pImpAct
; /* action map array */
1130 int32_t startON
; /* start of ON sequence */
1131 int32_t startL2EN
; /* start of level 2 sequence */
1132 int32_t lastStrongRTL
; /* index of last found R or AL */
1133 int32_t state
; /* current state */
1134 UBiDiLevel runLevel
; /* run level before implicit solving */
1137 /*------------------------------------------------------------------------*/
1140 addPoint(UBiDi
*pBiDi
, int32_t pos
, int32_t flag
)
1141 /* param pos: position where to insert
1142 param flag: one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER
1145 #define FIRSTALLOC 10
1147 InsertPoints
* pInsertPoints
=&(pBiDi
->insertPoints
);
1149 if (pInsertPoints
->capacity
== 0)
1151 pInsertPoints
->points
=uprv_malloc(sizeof(Point
)*FIRSTALLOC
);
1152 if (pInsertPoints
->points
== NULL
)
1154 pInsertPoints
->errorCode
=U_MEMORY_ALLOCATION_ERROR
;
1157 pInsertPoints
->capacity
=FIRSTALLOC
;
1159 if (pInsertPoints
->size
>= pInsertPoints
->capacity
) /* no room for new point */
1161 void * savePoints
=pInsertPoints
->points
;
1162 pInsertPoints
->points
=uprv_realloc(pInsertPoints
->points
,
1163 pInsertPoints
->capacity
*2*sizeof(Point
));
1164 if (pInsertPoints
->points
== NULL
)
1166 pInsertPoints
->points
=savePoints
;
1167 pInsertPoints
->errorCode
=U_MEMORY_ALLOCATION_ERROR
;
1170 else pInsertPoints
->capacity
*=2;
1174 pInsertPoints
->points
[pInsertPoints
->size
]=point
;
1175 pInsertPoints
->size
++;
1179 /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
1182 * This implementation of the (Wn) rules applies all rules in one pass.
1183 * In order to do so, it needs a look-ahead of typically 1 character
1184 * (except for W5: sequences of ET) and keeps track of changes
1185 * in a rule Wp that affect a later Wq (p<q).
1187 * The (Nn) and (In) rules are also performed in that same single loop,
1188 * but effectively one iteration behind for white space.
1190 * Since all implicit rules are performed in one step, it is not necessary
1191 * to actually store the intermediate directional properties in dirProps[].
1195 processPropertySeq(UBiDi
*pBiDi
, LevState
*pLevState
, uint8_t _prop
,
1196 int32_t start
, int32_t limit
) {
1197 uint8_t cell
, oldStateSeq
, actionSeq
;
1198 ImpTab
* pImpTab
=pLevState
->pImpTab
;
1199 ImpAct
* pImpAct
=pLevState
->pImpAct
;
1200 UBiDiLevel
* levels
=pBiDi
->levels
;
1201 UBiDiLevel level
, addLevel
;
1202 InsertPoints
* pInsertPoints
;
1205 start0
=start
; /* save original start position */
1206 oldStateSeq
=pLevState
->state
;
1207 cell
=(*pImpTab
)[oldStateSeq
][_prop
];
1208 pLevState
->state
=GET_STATE(cell
); /* isolate the new state */
1209 actionSeq
=(*pImpAct
)[GET_ACTION(cell
)]; /* isolate the action */
1210 addLevel
=(*pImpTab
)[pLevState
->state
][IMPTABLEVELS_RES
];
1214 case 1: /* init ON seq */
1215 pLevState
->startON
=start0
;
1218 case 2: /* prepend ON seq to current seq */
1219 start
=pLevState
->startON
;
1222 case 3: /* L or S after possible relevant EN/AN */
1223 /* check if we had EN after R/AL */
1224 if (pLevState
->startL2EN
>= 0) {
1225 addPoint(pBiDi
, pLevState
->startL2EN
, LRM_BEFORE
);
1227 pLevState
->startL2EN
=-1; /* not within previous if since could also be -2 */
1228 /* check if we had any relevant EN/AN after R/AL */
1229 pInsertPoints
=&(pBiDi
->insertPoints
);
1230 if ((pInsertPoints
->capacity
== 0) ||
1231 (pInsertPoints
->size
<= pInsertPoints
->confirmed
))
1233 /* nothing, just clean up */
1234 pLevState
->lastStrongRTL
=-1;
1235 /* check if we have a pending conditional segment */
1236 level
=(*pImpTab
)[oldStateSeq
][IMPTABLEVELS_RES
];
1237 if ((level
& 1) && (pLevState
->startON
> 0)) { /* after ON */
1238 start
=pLevState
->startON
; /* reset to basic run level */
1240 if (_prop
== _S
) /* add LRM before S */
1242 addPoint(pBiDi
, start0
, LRM_BEFORE
);
1243 pInsertPoints
->confirmed
=pInsertPoints
->size
;
1247 /* reset previous RTL cont to level for LTR text */
1248 for (k
=pLevState
->lastStrongRTL
+1; k
<start0
; k
++)
1250 /* reset odd level, leave runLevel+2 as is */
1251 levels
[k
]=(levels
[k
] - 2) & ~1;
1253 /* mark insert points as confirmed */
1254 pInsertPoints
->confirmed
=pInsertPoints
->size
;
1255 pLevState
->lastStrongRTL
=-1;
1256 if (_prop
== _S
) /* add LRM before S */
1258 addPoint(pBiDi
, start0
, LRM_BEFORE
);
1259 pInsertPoints
->confirmed
=pInsertPoints
->size
;
1263 case 4: /* R/AL after possible relevant EN/AN */
1265 pInsertPoints
=&(pBiDi
->insertPoints
);
1266 if (pInsertPoints
->capacity
> 0)
1267 /* remove all non confirmed insert points */
1268 pInsertPoints
->size
=pInsertPoints
->confirmed
;
1269 pLevState
->startON
=-1;
1270 pLevState
->startL2EN
=-1;
1271 pLevState
->lastStrongRTL
=limit
- 1;
1274 case 5: /* EN/AN after R/AL + possible cont */
1275 /* check for real AN */
1276 if ((_prop
== _AN
) && (NO_CONTEXT_RTL(pBiDi
->dirProps
[start0
]) == AN
) &&
1277 (pBiDi
->reorderingMode
!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
))
1280 if (pLevState
->startL2EN
== -1) /* if no relevant EN already found */
1282 /* just note the righmost digit as a strong RTL */
1283 pLevState
->lastStrongRTL
=limit
- 1;
1286 if (pLevState
->startL2EN
>= 0) /* after EN, no AN */
1288 addPoint(pBiDi
, pLevState
->startL2EN
, LRM_BEFORE
);
1289 pLevState
->startL2EN
=-2;
1292 addPoint(pBiDi
, start0
, LRM_BEFORE
);
1295 /* if first EN/AN after R/AL */
1296 if (pLevState
->startL2EN
== -1) {
1297 pLevState
->startL2EN
=start0
;
1301 case 6: /* note location of latest R/AL */
1302 pLevState
->lastStrongRTL
=limit
- 1;
1303 pLevState
->startON
=-1;
1306 case 7: /* L after R+ON/EN/AN */
1307 /* include possible adjacent number on the left */
1308 for (k
=start0
-1; k
>=0 && !(levels
[k
]&1); k
--);
1310 addPoint(pBiDi
, k
, RLM_BEFORE
); /* add RLM before */
1311 pInsertPoints
=&(pBiDi
->insertPoints
);
1312 pInsertPoints
->confirmed
=pInsertPoints
->size
; /* confirm it */
1314 pLevState
->startON
=start0
;
1317 case 8: /* AN after L */
1318 /* AN numbers between L text on both sides may be trouble. */
1319 /* tentatively bracket with LRMs; will be confirmed if followed by L */
1320 addPoint(pBiDi
, start0
, LRM_BEFORE
); /* add LRM before */
1321 addPoint(pBiDi
, start0
, LRM_AFTER
); /* add LRM after */
1324 case 9: /* R after L+ON/EN/AN */
1325 /* false alert, infirm LRMs around previous AN */
1326 pInsertPoints
=&(pBiDi
->insertPoints
);
1327 pInsertPoints
->size
=pInsertPoints
->confirmed
;
1328 if (_prop
== _S
) /* add RLM before S */
1330 addPoint(pBiDi
, start0
, RLM_BEFORE
);
1331 pInsertPoints
->confirmed
=pInsertPoints
->size
;
1335 case 10: /* L after L+ON/AN */
1336 level
=pLevState
->runLevel
+ addLevel
;
1337 for(k
=pLevState
->startON
; k
<start0
; k
++) {
1338 if (levels
[k
]<level
)
1341 pInsertPoints
=&(pBiDi
->insertPoints
);
1342 pInsertPoints
->confirmed
=pInsertPoints
->size
; /* confirm inserts */
1343 pLevState
->startON
=start0
;
1346 case 11: /* L after L+ON+EN/AN/ON */
1347 level
=pLevState
->runLevel
;
1348 for(k
=start0
-1; k
>=pLevState
->startON
; k
--) {
1349 if(levels
[k
]==level
+3) {
1350 while(levels
[k
]==level
+3) {
1353 while(levels
[k
]==level
) {
1357 if(levels
[k
]==level
+2) {
1365 case 12: /* R after L+ON+EN/AN/ON */
1366 level
=pLevState
->runLevel
+1;
1367 for(k
=start0
-1; k
>=pLevState
->startON
; k
--) {
1368 if(levels
[k
]>level
) {
1374 default: /* we should never get here */
1376 start
/=(start
-start0
-25); /* force program crash */
1380 if((addLevel
) || (start
< start0
)) {
1381 level
=pLevState
->runLevel
+ addLevel
;
1382 for(k
=start
; k
<limit
; k
++) {
1389 resolveImplicitLevels(UBiDi
*pBiDi
,
1390 int32_t start
, int32_t limit
,
1391 DirProp sor
, DirProp eor
) {
1392 const DirProp
*dirProps
=pBiDi
->dirProps
;
1395 int32_t i
, start1
, start2
;
1396 uint8_t oldStateImp
, stateImp
, actionImp
;
1397 uint8_t gprop
, resProp
, cell
;
1399 DirProp nextStrongProp
=R
;
1400 int32_t nextStrongPos
=-1;
1402 /* check for RTL inverse BiDi mode */
1403 /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to
1404 * loop on the text characters from end to start.
1405 * This would need a different properties state table (at least different
1406 * actions) and different levels state tables (maybe very similar to the
1407 * LTR corresponding ones.
1409 inverseRTL
=((start
<pBiDi
->lastArabicPos
) && (GET_PARALEVEL(pBiDi
, start
) & 1) &&
1410 (pBiDi
->reorderingMode
==UBIDI_REORDER_INVERSE_LIKE_DIRECT
||
1411 pBiDi
->reorderingMode
==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
));
1412 /* initialize for levels state table */
1413 levState
.startL2EN
=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
1414 levState
.lastStrongRTL
=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
1416 levState
.runLevel
=pBiDi
->levels
[start
];
1417 levState
.pImpTab
=((pBiDi
->pImpTabPair
)->pImpTab
)[levState
.runLevel
&1];
1418 levState
.pImpAct
=((pBiDi
->pImpTabPair
)->pImpAct
)[levState
.runLevel
&1];
1419 processPropertySeq(pBiDi
, &levState
, sor
, start
, start
);
1420 /* initialize for property state table */
1421 if(dirProps
[start
]==NSM
) {
1429 for(i
=start
; i
<=limit
; i
++) {
1433 DirProp prop
, prop1
;
1434 prop
=NO_CONTEXT_RTL(dirProps
[i
]);
1437 /* AL before EN does not make it AN */
1439 } else if(prop
==EN
) {
1440 if(nextStrongPos
<=i
) {
1441 /* look for next strong char (L/R/AL) */
1443 nextStrongProp
=R
; /* set default */
1444 nextStrongPos
=limit
;
1445 for(j
=i
+1; j
<limit
; j
++) {
1446 prop1
=NO_CONTEXT_RTL(dirProps
[j
]);
1447 if(prop1
==L
|| prop1
==R
|| prop1
==AL
) {
1448 nextStrongProp
=prop1
;
1454 if(nextStrongProp
==AL
) {
1459 gprop
=groupProp
[prop
];
1461 oldStateImp
=stateImp
;
1462 cell
=impTabProps
[oldStateImp
][gprop
];
1463 stateImp
=GET_STATEPROPS(cell
); /* isolate the new state */
1464 actionImp
=GET_ACTIONPROPS(cell
); /* isolate the action */
1465 if((i
==limit
) && (actionImp
==0)) {
1466 /* there is an unprocessed sequence if its property == eor */
1467 actionImp
=1; /* process the last sequence */
1470 resProp
=impTabProps
[oldStateImp
][IMPTABPROPS_RES
];
1472 case 1: /* process current seq1, init new seq1 */
1473 processPropertySeq(pBiDi
, &levState
, resProp
, start1
, i
);
1476 case 2: /* init new seq2 */
1479 case 3: /* process seq1, process seq2, init new seq1 */
1480 processPropertySeq(pBiDi
, &levState
, resProp
, start1
, start2
);
1481 processPropertySeq(pBiDi
, &levState
, _ON
, start2
, i
);
1484 case 4: /* process seq1, set seq1=seq2, init new seq2 */
1485 processPropertySeq(pBiDi
, &levState
, resProp
, start1
, start2
);
1489 default: /* we should never get here */
1491 start
/=(start
-start1
-25); /* force program crash */
1496 /* flush possible pending sequence, e.g. ON */
1497 processPropertySeq(pBiDi
, &levState
, eor
, limit
, limit
);
1500 /* perform (L1) and (X9) ---------------------------------------------------- */
1503 * Reset the embedding levels for some non-graphic characters (L1).
1504 * This function also sets appropriate levels for BN, and
1505 * explicit embedding types that are supposed to have been removed
1506 * from the paragraph in (X9).
1509 adjustWSLevels(UBiDi
*pBiDi
) {
1510 const DirProp
*dirProps
=pBiDi
->dirProps
;
1511 UBiDiLevel
*levels
=pBiDi
->levels
;
1514 if(pBiDi
->flags
&MASK_WS
) {
1515 UBool orderParagraphsLTR
=pBiDi
->orderParagraphsLTR
;
1518 i
=pBiDi
->trailingWSStart
;
1520 /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
1521 while(i
>0 && (flag
=DIRPROP_FLAG_NC(dirProps
[--i
]))&MASK_WS
) {
1522 if(orderParagraphsLTR
&&(flag
&DIRPROP_FLAG(B
))) {
1525 levels
[i
]=GET_PARALEVEL(pBiDi
, i
);
1529 /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
1530 /* here, i+1 is guaranteed to be <length */
1532 flag
=DIRPROP_FLAG_NC(dirProps
[--i
]);
1533 if(flag
&MASK_BN_EXPLICIT
) {
1534 levels
[i
]=levels
[i
+1];
1535 } else if(orderParagraphsLTR
&&(flag
&DIRPROP_FLAG(B
))) {
1538 } else if(flag
&MASK_B_S
) {
1539 levels
[i
]=GET_PARALEVEL(pBiDi
, i
);
1547 #define BIDI_MIN(x, y) ((x)<(y) ? (x) : (y))
1548 #define BIDI_ABS(x) ((x)>=0 ? (x) : (-(x)))
1550 setParaRunsOnly(UBiDi
*pBiDi
, const UChar
*text
, int32_t length
,
1551 UBiDiLevel paraLevel
, UErrorCode
*pErrorCode
) {
1552 void *runsOnlyMemory
;
1555 const UBiDiLevel
*levels
;
1556 UBiDiLevel
*saveLevels
;
1558 int32_t visualLength
, i
, j
, visualStart
, logicalStart
,
1559 runCount
, runLength
, addedRuns
, insertRemove
,
1560 start
, limit
, step
, indexOddBit
, logicalPos
,
1562 uint32_t saveOptions
;
1564 pBiDi
->reorderingMode
=UBIDI_REORDER_DEFAULT
;
1566 ubidi_setPara(pBiDi
, text
, length
, paraLevel
, NULL
, pErrorCode
);
1569 /* obtain memory for mapping table and visual text */
1570 runsOnlyMemory
=uprv_malloc(length
*(sizeof(int32_t)+sizeof(UChar
)+sizeof(UBiDiLevel
)));
1571 if(runsOnlyMemory
==NULL
) {
1572 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
1575 visualMap
=runsOnlyMemory
;
1576 visualText
=(UChar
*)&visualMap
[length
];
1577 saveLevels
=(UBiDiLevel
*)&visualText
[length
];
1578 saveOptions
=pBiDi
->reorderingOptions
;
1579 if(saveOptions
& UBIDI_OPTION_INSERT_MARKS
) {
1580 pBiDi
->reorderingOptions
&=~UBIDI_OPTION_INSERT_MARKS
;
1581 pBiDi
->reorderingOptions
|=UBIDI_OPTION_REMOVE_CONTROLS
;
1583 ubidi_setPara(pBiDi
, text
, length
, paraLevel
, NULL
, pErrorCode
);
1584 levels
=ubidi_getLevels(pBiDi
, pErrorCode
);
1586 /* FOOD FOR THOUGHT: instead of writing the visual text, we could use
1587 * the visual map and the dirProps array to drive the second call
1588 * to ubidi_setPara (but must make provision for possible removal of
1589 * BiDi controls. Alternatively, only use the dirProps array via
1590 * customized classifier callback.
1592 visualLength
=ubidi_writeReordered(pBiDi
, visualText
, length
,
1593 UBIDI_DO_MIRRORING
, pErrorCode
);
1594 pBiDi
->reorderingOptions
=saveOptions
;
1595 ubidi_getVisualMap(pBiDi
, visualMap
, pErrorCode
);
1596 if(U_FAILURE(*pErrorCode
)) {
1599 uprv_memcpy(saveLevels
, levels
, length
*sizeof(UBiDiLevel
));
1601 pBiDi
->reorderingMode
=UBIDI_REORDER_INVERSE_LIKE_DIRECT
;
1602 paraLevel
=pBiDi
->paraLevel
^1;
1603 ubidi_setPara(pBiDi
, visualText
, visualLength
, paraLevel
, NULL
, pErrorCode
);
1604 if(U_FAILURE(*pErrorCode
)) {
1607 ubidi_getRuns(pBiDi
);
1608 /* check if some runs must be split, count how many splits */
1610 runCount
=pBiDi
->runCount
;
1613 for(i
=0; i
<runCount
; i
++, visualStart
+=runLength
) {
1614 runLength
=runs
[i
].visualLimit
-visualStart
;
1618 logicalStart
=GET_INDEX(runs
[i
].logicalStart
);
1619 for(j
=logicalStart
+1; j
<logicalStart
+runLength
; j
++) {
1621 index1
=visualMap
[j
-1];
1622 if((BIDI_ABS(index
-index1
)!=1) || (saveLevels
[index
]!=saveLevels
[index1
])) {
1628 if(getRunsMemory(pBiDi
, runCount
+addedRuns
)) {
1630 /* because we switch from UBiDi.simpleRuns to UBiDi.runs */
1631 pBiDi
->runsMemory
[0]=runs
[0];
1633 runs
=pBiDi
->runs
=pBiDi
->runsMemory
;
1634 pBiDi
->runCount
+=addedRuns
;
1639 /* split runs which are not consecutive in source text */
1640 for(i
=runCount
-1; i
>=0; i
--) {
1641 runLength
= i
==0 ? runs
[0].visualLimit
:
1642 runs
[i
].visualLimit
-runs
[i
-1].visualLimit
;
1643 logicalStart
=runs
[i
].logicalStart
;
1644 indexOddBit
=GET_ODD_BIT(logicalStart
);
1645 logicalStart
=GET_INDEX(logicalStart
);
1648 runs
[i
+addedRuns
]=runs
[i
];
1650 logicalPos
=visualMap
[logicalStart
];
1651 runs
[i
+addedRuns
].logicalStart
=MAKE_INDEX_ODD_PAIR(logicalPos
,
1652 saveLevels
[logicalPos
]^indexOddBit
);
1657 limit
=logicalStart
+runLength
-1;
1660 start
=logicalStart
+runLength
-1;
1664 for(j
=start
; j
!=limit
; j
+=step
) {
1666 index1
=visualMap
[j
+step
];
1667 if((BIDI_ABS(index
-index1
)!=1) || (saveLevels
[index
]!=saveLevels
[index1
])) {
1668 logicalPos
=BIDI_MIN(visualMap
[start
], index
);
1669 runs
[i
+addedRuns
].logicalStart
=MAKE_INDEX_ODD_PAIR(logicalPos
,
1670 saveLevels
[logicalPos
]^indexOddBit
);
1671 runs
[i
+addedRuns
].visualLimit
=runs
[i
].visualLimit
;
1672 runs
[i
].visualLimit
-=BIDI_ABS(j
-start
)+1;
1673 insertRemove
=runs
[i
].insertRemove
&(LRM_AFTER
|RLM_AFTER
);
1674 runs
[i
+addedRuns
].insertRemove
=insertRemove
;
1675 runs
[i
].insertRemove
&=~insertRemove
;
1681 runs
[i
+addedRuns
]=runs
[i
];
1683 logicalPos
=BIDI_MIN(visualMap
[start
], visualMap
[limit
]);
1684 runs
[i
+addedRuns
].logicalStart
=MAKE_INDEX_ODD_PAIR(logicalPos
,
1685 saveLevels
[logicalPos
]^indexOddBit
);
1689 /* restore initial paraLevel */
1690 pBiDi
->paraLevel
^=1;
1692 /* restore real text */
1694 /* free memory for mapping table and visual text */
1695 uprv_free(runsOnlyMemory
);
1697 pBiDi
->reorderingMode
=UBIDI_REORDER_RUNS_ONLY
;
1700 /* ubidi_setPara ------------------------------------------------------------ */
1702 U_CAPI
void U_EXPORT2
1703 ubidi_setPara(UBiDi
*pBiDi
, const UChar
*text
, int32_t length
,
1704 UBiDiLevel paraLevel
, UBiDiLevel
*embeddingLevels
,
1705 UErrorCode
*pErrorCode
) {
1706 UBiDiDirection direction
;
1708 /* check the argument values */
1709 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
1711 } else if(pBiDi
==NULL
|| text
==NULL
||
1712 ((UBIDI_MAX_EXPLICIT_LEVEL
<paraLevel
) && !IS_DEFAULT_LEVEL(paraLevel
)) ||
1715 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
1720 length
=u_strlen(text
);
1723 /* special treatment for RUNS_ONLY mode */
1724 if(pBiDi
->reorderingMode
==UBIDI_REORDER_RUNS_ONLY
) {
1725 setParaRunsOnly(pBiDi
, text
, length
, paraLevel
, pErrorCode
);
1729 /* initialize the UBiDi structure */
1730 pBiDi
->pParaBiDi
=NULL
; /* mark unfinished setPara */
1732 pBiDi
->length
=pBiDi
->originalLength
=pBiDi
->resultLength
=length
;
1733 pBiDi
->paraLevel
=paraLevel
;
1734 pBiDi
->direction
=UBIDI_LTR
;
1737 pBiDi
->dirProps
=NULL
;
1740 pBiDi
->insertPoints
.size
=0; /* clean up from last call */
1741 pBiDi
->insertPoints
.confirmed
=0; /* clean up from last call */
1744 * Save the original paraLevel if contextual; otherwise, set to 0.
1746 if(IS_DEFAULT_LEVEL(paraLevel
)) {
1747 pBiDi
->defaultParaLevel
=paraLevel
;
1749 pBiDi
->defaultParaLevel
=0;
1754 * For an empty paragraph, create a UBiDi object with the paraLevel and
1755 * the flags and the direction set but without allocating zero-length arrays.
1756 * There is nothing more to do.
1758 if(IS_DEFAULT_LEVEL(paraLevel
)) {
1759 pBiDi
->paraLevel
&=1;
1760 pBiDi
->defaultParaLevel
=0;
1763 pBiDi
->flags
=DIRPROP_FLAG(R
);
1764 pBiDi
->direction
=UBIDI_RTL
;
1766 pBiDi
->flags
=DIRPROP_FLAG(L
);
1767 pBiDi
->direction
=UBIDI_LTR
;
1771 pBiDi
->pParaBiDi
=pBiDi
; /* mark successful setPara */
1778 * Get the directional properties,
1779 * the flags bit-set, and
1780 * determine the paragraph level if necessary.
1782 if(getDirPropsMemory(pBiDi
, length
)) {
1783 pBiDi
->dirProps
=pBiDi
->dirPropsMemory
;
1786 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
1789 /* the processed length may have changed if UBIDI_OPTION_STREAMING */
1790 length
= pBiDi
->length
;
1791 pBiDi
->trailingWSStart
=length
; /* the levels[] will reflect the WS run */
1792 /* allocate paras memory */
1793 if(pBiDi
->paraCount
>1) {
1794 if(getInitialParasMemory(pBiDi
, pBiDi
->paraCount
)) {
1795 pBiDi
->paras
=pBiDi
->parasMemory
;
1796 pBiDi
->paras
[pBiDi
->paraCount
-1]=length
;
1798 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
1802 /* initialize paras for single paragraph */
1803 pBiDi
->paras
=pBiDi
->simpleParas
;
1804 pBiDi
->simpleParas
[0]=length
;
1807 /* are explicit levels specified? */
1808 if(embeddingLevels
==NULL
) {
1809 /* no: determine explicit levels according to the (Xn) rules */\
1810 if(getLevelsMemory(pBiDi
, length
)) {
1811 pBiDi
->levels
=pBiDi
->levelsMemory
;
1812 direction
=resolveExplicitLevels(pBiDi
);
1814 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
1818 /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */
1819 pBiDi
->levels
=embeddingLevels
;
1820 direction
=checkExplicitLevels(pBiDi
, pErrorCode
);
1821 if(U_FAILURE(*pErrorCode
)) {
1827 * The steps after (X9) in the UBiDi algorithm are performed only if
1828 * the paragraph text has mixed directionality!
1830 pBiDi
->direction
=direction
;
1833 /* make sure paraLevel is even */
1834 pBiDi
->paraLevel
=(UBiDiLevel
)((pBiDi
->paraLevel
+1)&~1);
1836 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
1837 pBiDi
->trailingWSStart
=0;
1840 /* make sure paraLevel is odd */
1841 pBiDi
->paraLevel
|=1;
1843 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
1844 pBiDi
->trailingWSStart
=0;
1848 * Choose the right implicit state table
1850 switch(pBiDi
->reorderingMode
) {
1851 case UBIDI_REORDER_DEFAULT
:
1852 pBiDi
->pImpTabPair
=&impTab_DEFAULT
;
1854 case UBIDI_REORDER_NUMBERS_SPECIAL
:
1855 pBiDi
->pImpTabPair
=&impTab_NUMBERS_SPECIAL
;
1857 case UBIDI_REORDER_GROUP_NUMBERS_WITH_R
:
1858 pBiDi
->pImpTabPair
=&impTab_GROUP_NUMBERS_WITH_R
;
1860 case UBIDI_REORDER_RUNS_ONLY
:
1861 /* we should never get here */
1863 pBiDi
->text
=NULL
; /* make the program crash! */
1865 case UBIDI_REORDER_INVERSE_NUMBERS_AS_L
:
1866 pBiDi
->pImpTabPair
=&impTab_INVERSE_NUMBERS_AS_L
;
1868 case UBIDI_REORDER_INVERSE_LIKE_DIRECT
:
1869 if (pBiDi
->reorderingOptions
& UBIDI_OPTION_INSERT_MARKS
) {
1870 pBiDi
->pImpTabPair
=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS
;
1872 pBiDi
->pImpTabPair
=&impTab_INVERSE_LIKE_DIRECT
;
1875 case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
:
1876 if (pBiDi
->reorderingOptions
& UBIDI_OPTION_INSERT_MARKS
) {
1877 pBiDi
->pImpTabPair
=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS
;
1879 pBiDi
->pImpTabPair
=&impTab_INVERSE_FOR_NUMBERS_SPECIAL
;
1883 pBiDi
->pImpTabPair
=&impTab_DEFAULT
;
1887 * If there are no external levels specified and there
1888 * are no significant explicit level codes in the text,
1889 * then we can treat the entire paragraph as one run.
1890 * Otherwise, we need to perform the following rules on runs of
1891 * the text with the same embedding levels. (X10)
1892 * "Significant" explicit level codes are ones that actually
1893 * affect non-BN characters.
1894 * Examples for "insignificant" ones are empty embeddings
1895 * LRE-PDF, LRE-RLE-PDF-PDF, etc.
1897 if(embeddingLevels
==NULL
&& !(pBiDi
->flags
&DIRPROP_FLAG_MULTI_RUNS
)) {
1898 resolveImplicitLevels(pBiDi
, 0, length
,
1899 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi
, 0)),
1900 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi
, length
-1)));
1902 /* sor, eor: start and end types of same-level-run */
1903 UBiDiLevel
*levels
=pBiDi
->levels
;
1904 int32_t start
, limit
=0;
1905 UBiDiLevel level
, nextLevel
;
1908 /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
1909 level
=GET_PARALEVEL(pBiDi
, 0);
1910 nextLevel
=levels
[0];
1911 if(level
<nextLevel
) {
1912 eor
=GET_LR_FROM_LEVEL(nextLevel
);
1914 eor
=GET_LR_FROM_LEVEL(level
);
1918 /* determine start and limit of the run (end points just behind the run) */
1920 /* the values for this run's start are the same as for the previous run's end */
1923 if((start
>0) && (NO_CONTEXT_RTL(pBiDi
->dirProps
[start
-1])==B
)) {
1924 /* except if this is a new paragraph, then set sor = para level */
1925 sor
=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi
, start
));
1930 /* search for the limit of this run */
1931 while(++limit
<length
&& levels
[limit
]==level
) {}
1933 /* get the correct level of the next run */
1935 nextLevel
=levels
[limit
];
1937 nextLevel
=GET_PARALEVEL(pBiDi
, length
-1);
1940 /* determine eor from max(level, nextLevel); sor is last run's eor */
1941 if((level
&~UBIDI_LEVEL_OVERRIDE
)<(nextLevel
&~UBIDI_LEVEL_OVERRIDE
)) {
1942 eor
=GET_LR_FROM_LEVEL(nextLevel
);
1944 eor
=GET_LR_FROM_LEVEL(level
);
1947 /* if the run consists of overridden directional types, then there
1948 are no implicit types to be resolved */
1949 if(!(level
&UBIDI_LEVEL_OVERRIDE
)) {
1950 resolveImplicitLevels(pBiDi
, start
, limit
, sor
, eor
);
1952 /* remove the UBIDI_LEVEL_OVERRIDE flags */
1954 levels
[start
++]&=~UBIDI_LEVEL_OVERRIDE
;
1955 } while(start
<limit
);
1957 } while(limit
<length
);
1959 /* check if we got any memory shortage while adding insert points */
1960 if (U_FAILURE(pBiDi
->insertPoints
.errorCode
))
1962 *pErrorCode
=pBiDi
->insertPoints
.errorCode
;
1965 /* reset the embedding levels for some non-graphic characters (L1), (X9) */
1966 adjustWSLevels(pBiDi
);
1969 if(pBiDi
->reorderingOptions
& UBIDI_OPTION_REMOVE_CONTROLS
) {
1970 pBiDi
->resultLength
-= pBiDi
->controlCount
;
1972 pBiDi
->resultLength
+= pBiDi
->insertPoints
.size
;
1974 pBiDi
->pParaBiDi
=pBiDi
; /* mark successful setPara */
1977 U_CAPI
void U_EXPORT2
1978 ubidi_orderParagraphsLTR(UBiDi
*pBiDi
, UBool orderParagraphsLTR
) {
1980 pBiDi
->orderParagraphsLTR
=orderParagraphsLTR
;
1984 U_CAPI UBool U_EXPORT2
1985 ubidi_isOrderParagraphsLTR(UBiDi
*pBiDi
) {
1987 return pBiDi
->orderParagraphsLTR
;
1993 U_CAPI UBiDiDirection U_EXPORT2
1994 ubidi_getDirection(const UBiDi
*pBiDi
) {
1995 if(IS_VALID_PARA_OR_LINE(pBiDi
)) {
1996 return pBiDi
->direction
;
2002 U_CAPI
const UChar
* U_EXPORT2
2003 ubidi_getText(const UBiDi
*pBiDi
) {
2004 if(IS_VALID_PARA_OR_LINE(pBiDi
)) {
2011 U_CAPI
int32_t U_EXPORT2
2012 ubidi_getLength(const UBiDi
*pBiDi
) {
2013 if(IS_VALID_PARA_OR_LINE(pBiDi
)) {
2014 return pBiDi
->originalLength
;
2020 U_CAPI
int32_t U_EXPORT2
2021 ubidi_getProcessedLength(const UBiDi
*pBiDi
) {
2022 if(IS_VALID_PARA_OR_LINE(pBiDi
)) {
2023 return pBiDi
->length
;
2029 U_CAPI
int32_t U_EXPORT2
2030 ubidi_getResultLength(const UBiDi
*pBiDi
) {
2031 if(IS_VALID_PARA_OR_LINE(pBiDi
)) {
2032 return pBiDi
->resultLength
;
2038 /* paragraphs API functions ------------------------------------------------- */
2040 U_CAPI UBiDiLevel U_EXPORT2
2041 ubidi_getParaLevel(const UBiDi
*pBiDi
) {
2042 if(IS_VALID_PARA_OR_LINE(pBiDi
)) {
2043 return pBiDi
->paraLevel
;
2049 U_CAPI
int32_t U_EXPORT2
2050 ubidi_countParagraphs(UBiDi
*pBiDi
) {
2051 if(!IS_VALID_PARA_OR_LINE(pBiDi
)) {
2054 return pBiDi
->paraCount
;
2058 U_CAPI
void U_EXPORT2
2059 ubidi_getParagraphByIndex(const UBiDi
*pBiDi
, int32_t paraIndex
,
2060 int32_t *pParaStart
, int32_t *pParaLimit
,
2061 UBiDiLevel
*pParaLevel
, UErrorCode
*pErrorCode
) {
2064 /* check the argument values */
2065 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
2067 } else if( !IS_VALID_PARA_OR_LINE(pBiDi
) || /* no valid setPara/setLine */
2068 paraIndex
<0 || paraIndex
>=pBiDi
->paraCount
) {
2069 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2072 pBiDi
=pBiDi
->pParaBiDi
; /* get Para object if Line object */
2074 paraStart
=pBiDi
->paras
[paraIndex
-1];
2078 if(pParaStart
!=NULL
) {
2079 *pParaStart
=paraStart
;
2081 if(pParaLimit
!=NULL
) {
2082 *pParaLimit
=pBiDi
->paras
[paraIndex
];
2084 if(pParaLevel
!=NULL
) {
2085 *pParaLevel
=GET_PARALEVEL(pBiDi
, paraStart
);
2090 U_CAPI
int32_t U_EXPORT2
2091 ubidi_getParagraph(const UBiDi
*pBiDi
, int32_t charIndex
,
2092 int32_t *pParaStart
, int32_t *pParaLimit
,
2093 UBiDiLevel
*pParaLevel
, UErrorCode
*pErrorCode
) {
2096 /* check the argument values */
2097 /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */
2098 if( !IS_VALID_PARA_OR_LINE(pBiDi
)) {/* no valid setPara/setLine */
2099 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2102 pBiDi
=pBiDi
->pParaBiDi
; /* get Para object if Line object */
2103 if( charIndex
<0 || charIndex
>=pBiDi
->length
) {
2104 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2107 for(paraIndex
=0; charIndex
>=pBiDi
->paras
[paraIndex
]; paraIndex
++);
2108 ubidi_getParagraphByIndex(pBiDi
, paraIndex
, pParaStart
, pParaLimit
, pParaLevel
, pErrorCode
);
2112 U_CAPI
void U_EXPORT2
2113 ubidi_setClassCallback(UBiDi
*pBiDi
, UBiDiClassCallback
*newFn
,
2114 const void *newContext
, UBiDiClassCallback
**oldFn
,
2115 const void **oldContext
, UErrorCode
*pErrorCode
)
2117 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
2119 } else if(pBiDi
==NULL
) {
2120 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2125 *oldFn
= pBiDi
->fnClassCallback
;
2129 *oldContext
= pBiDi
->coClassCallback
;
2131 pBiDi
->fnClassCallback
= newFn
;
2132 pBiDi
->coClassCallback
= newContext
;
2135 U_CAPI
void U_EXPORT2
2136 ubidi_getClassCallback(UBiDi
*pBiDi
, UBiDiClassCallback
**fn
, const void **context
)
2140 *fn
= pBiDi
->fnClassCallback
;
2144 *context
= pBiDi
->coClassCallback
;
2148 U_CAPI UCharDirection U_EXPORT2
2149 ubidi_getCustomizedClass(UBiDi
*pBiDi
, UChar32 c
)
2153 if( pBiDi
->fnClassCallback
== NULL
||
2154 (dir
= (*pBiDi
->fnClassCallback
)(pBiDi
->coClassCallback
, c
)) == U_BIDI_CLASS_DEFAULT
)
2156 return ubidi_getClass(pBiDi
->bdp
, c
);