2 ******************************************************************************
4 * Copyright (C) 1999-2011, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 ******************************************************************************
10 * tab size: 8 (not used)
13 * created on: 1999aug06
14 * created by: Markus W. Scherer, updated by Matitiahu Allouche
18 #include "unicode/utypes.h"
19 #include "unicode/ustring.h"
20 #include "unicode/uchar.h"
21 #include "unicode/ubidi.h"
25 #ifndef U_COMMON_IMPLEMENTATION
26 #error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see http://userguide.icu-project.org/howtouseicu
30 * General remarks about the functions in this file:
32 * These functions deal with the aspects of potentially mixed-directional
33 * text in a single paragraph or in a line of a single paragraph
34 * which has already been processed according to
35 * the Unicode 3.0 BiDi algorithm as defined in
36 * http://www.unicode.org/unicode/reports/tr9/ , version 13,
37 * also described in The Unicode Standard, Version 4.0.1 .
39 * This means that there is a UBiDi object with a levels
40 * and a dirProps array.
41 * paraLevel and direction are also set.
42 * Only if the length of the text is zero, then levels==dirProps==NULL.
44 * The overall directionality of the paragraph
45 * or line is used to bypass the reordering steps if possible.
46 * Even purely RTL text does not need reordering there because
47 * the ubidi_getLogical/VisualIndex() functions can compute the
48 * index on the fly in such a case.
50 * The implementation of the access to same-level-runs and of the reordering
51 * do attempt to provide better performance and less memory usage compared to
52 * a direct implementation of especially rule (L2) with an array of
53 * one (32-bit) integer per text character.
55 * Here, the levels array is scanned as soon as necessary, and a vector of
56 * same-level-runs is created. Reordering then is done on this vector.
57 * For each run of text positions that were resolved to the same level,
58 * only 8 bytes are stored: the first text position of the run and the visual
59 * position behind the run after reordering.
60 * One sign bit is used to hold the directionality of the run.
61 * This is inefficient if there are many very short runs. If the average run
62 * length is <2, then this uses more memory.
64 * In a further attempt to save memory, the levels array is never changed
65 * after all the resolution rules (Xn, Wn, Nn, In).
66 * Many functions have to consider the field trailingWSStart:
67 * if it is less than length, then there is an implicit trailing run
69 * which is not reflected in the levels array.
70 * This allows a line UBiDi object to use the same levels array as
71 * its paragraph parent object.
73 * When a UBiDi object is created for a line of a paragraph, then the
74 * paragraph's levels and dirProps arrays are reused by way of setting
75 * a pointer into them, not by copying. This again saves memory and forbids to
76 * change the now shared levels for (L1).
79 /* handle trailing WS (L1) -------------------------------------------------- */
82 * setTrailingWSStart() sets the start index for a trailing
83 * run of WS in the line. This is necessary because we do not modify
84 * the paragraph's levels array that we just point into.
85 * Using trailingWSStart is another form of performing (L1).
87 * To make subsequent operations easier, we also include the run
88 * before the WS if it is at the paraLevel - we merge the two here.
90 * This function is called only from ubidi_setLine(), so pBiDi->paraLevel is
91 * set correctly for the line even when contextual multiple paragraphs.
94 setTrailingWSStart(UBiDi
*pBiDi
) {
95 /* pBiDi->direction!=UBIDI_MIXED */
97 const DirProp
*dirProps
=pBiDi
->dirProps
;
98 UBiDiLevel
*levels
=pBiDi
->levels
;
99 int32_t start
=pBiDi
->length
;
100 UBiDiLevel paraLevel
=pBiDi
->paraLevel
;
102 /* If the line is terminated by a block separator, all preceding WS etc...
103 are already set to paragraph level.
104 Setting trailingWSStart to pBidi->length will avoid changing the
105 level of B chars from 0 to paraLevel in ubidi_getLevels when
106 orderParagraphsLTR==TRUE.
108 if(NO_CONTEXT_RTL(dirProps
[start
-1])==B
) {
109 pBiDi
->trailingWSStart
=start
; /* currently == pBiDi->length */
112 /* go backwards across all WS, BN, explicit codes */
113 while(start
>0 && DIRPROP_FLAG_NC(dirProps
[start
-1])&MASK_WS
) {
117 /* if the WS run can be merged with the previous run then do so here */
118 while(start
>0 && levels
[start
-1]==paraLevel
) {
122 pBiDi
->trailingWSStart
=start
;
125 /* ubidi_setLine ------------------------------------------------------------ */
127 U_CAPI
void U_EXPORT2
128 ubidi_setLine(const UBiDi
*pParaBiDi
,
129 int32_t start
, int32_t limit
,
131 UErrorCode
*pErrorCode
) {
134 /* check the argument values */
135 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
);
136 RETURN_VOID_IF_NOT_VALID_PARA(pParaBiDi
, *pErrorCode
);
137 RETURN_VOID_IF_BAD_RANGE(start
, 0, limit
, *pErrorCode
);
138 RETURN_VOID_IF_BAD_RANGE(limit
, 0, pParaBiDi
->length
+1, *pErrorCode
);
139 if(pLineBiDi
==NULL
) {
140 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
143 if(ubidi_getParagraph(pParaBiDi
, start
, NULL
, NULL
, NULL
, pErrorCode
) !=
144 ubidi_getParagraph(pParaBiDi
, limit
-1, NULL
, NULL
, NULL
, pErrorCode
)) {
145 /* the line crosses a paragraph boundary */
146 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
150 /* set the values in pLineBiDi from its pParaBiDi parent */
151 pLineBiDi
->pParaBiDi
=NULL
; /* mark unfinished setLine */
152 pLineBiDi
->text
=pParaBiDi
->text
+start
;
153 length
=pLineBiDi
->length
=limit
-start
;
154 pLineBiDi
->resultLength
=pLineBiDi
->originalLength
=length
;
155 pLineBiDi
->paraLevel
=GET_PARALEVEL(pParaBiDi
, start
);
156 pLineBiDi
->paraCount
=pParaBiDi
->paraCount
;
157 pLineBiDi
->runs
=NULL
;
159 pLineBiDi
->reorderingMode
=pParaBiDi
->reorderingMode
;
160 pLineBiDi
->reorderingOptions
=pParaBiDi
->reorderingOptions
;
161 pLineBiDi
->controlCount
=0;
162 if(pParaBiDi
->controlCount
>0) {
164 for(j
=start
; j
<limit
; j
++) {
165 if(IS_BIDI_CONTROL_CHAR(pParaBiDi
->text
[j
])) {
166 pLineBiDi
->controlCount
++;
169 pLineBiDi
->resultLength
-=pLineBiDi
->controlCount
;
172 pLineBiDi
->dirProps
=pParaBiDi
->dirProps
+start
;
173 pLineBiDi
->levels
=pParaBiDi
->levels
+start
;
174 pLineBiDi
->runCount
=-1;
176 if(pParaBiDi
->direction
!=UBIDI_MIXED
) {
177 /* the parent is already trivial */
178 pLineBiDi
->direction
=pParaBiDi
->direction
;
181 * The parent's levels are all either
182 * implicitly or explicitly ==paraLevel;
185 if(pParaBiDi
->trailingWSStart
<=start
) {
186 pLineBiDi
->trailingWSStart
=0;
187 } else if(pParaBiDi
->trailingWSStart
<limit
) {
188 pLineBiDi
->trailingWSStart
=pParaBiDi
->trailingWSStart
-start
;
190 pLineBiDi
->trailingWSStart
=length
;
193 const UBiDiLevel
*levels
=pLineBiDi
->levels
;
194 int32_t i
, trailingWSStart
;
197 setTrailingWSStart(pLineBiDi
);
198 trailingWSStart
=pLineBiDi
->trailingWSStart
;
200 /* recalculate pLineBiDi->direction */
201 if(trailingWSStart
==0) {
202 /* all levels are at paraLevel */
203 pLineBiDi
->direction
=(UBiDiDirection
)(pLineBiDi
->paraLevel
&1);
205 /* get the level of the first character */
206 level
=(UBiDiLevel
)(levels
[0]&1);
208 /* if there is anything of a different level, then the line is mixed */
209 if(trailingWSStart
<length
&& (pLineBiDi
->paraLevel
&1)!=level
) {
210 /* the trailing WS is at paraLevel, which differs from levels[0] */
211 pLineBiDi
->direction
=UBIDI_MIXED
;
213 /* see if levels[1..trailingWSStart-1] have the same direction as levels[0] and paraLevel */
216 if(i
==trailingWSStart
) {
217 /* the direction values match those in level */
218 pLineBiDi
->direction
=(UBiDiDirection
)level
;
220 } else if((levels
[i
]&1)!=level
) {
221 pLineBiDi
->direction
=UBIDI_MIXED
;
229 switch(pLineBiDi
->direction
) {
231 /* make sure paraLevel is even */
232 pLineBiDi
->paraLevel
=(UBiDiLevel
)((pLineBiDi
->paraLevel
+1)&~1);
234 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
235 pLineBiDi
->trailingWSStart
=0;
238 /* make sure paraLevel is odd */
239 pLineBiDi
->paraLevel
|=1;
241 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
242 pLineBiDi
->trailingWSStart
=0;
248 pLineBiDi
->pParaBiDi
=pParaBiDi
; /* mark successful setLine */
252 U_CAPI UBiDiLevel U_EXPORT2
253 ubidi_getLevelAt(const UBiDi
*pBiDi
, int32_t charIndex
) {
254 /* return paraLevel if in the trailing WS run, otherwise the real level */
255 if(!IS_VALID_PARA_OR_LINE(pBiDi
) || charIndex
<0 || pBiDi
->length
<=charIndex
) {
257 } else if(pBiDi
->direction
!=UBIDI_MIXED
|| charIndex
>=pBiDi
->trailingWSStart
) {
258 return GET_PARALEVEL(pBiDi
, charIndex
);
260 return pBiDi
->levels
[charIndex
];
264 U_CAPI
const UBiDiLevel
* U_EXPORT2
265 ubidi_getLevels(UBiDi
*pBiDi
, UErrorCode
*pErrorCode
) {
266 int32_t start
, length
;
268 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
, NULL
);
269 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi
, *pErrorCode
, NULL
);
270 if((length
=pBiDi
->length
)<=0) {
271 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
274 if((start
=pBiDi
->trailingWSStart
)==length
) {
275 /* the current levels array reflects the WS run */
276 return pBiDi
->levels
;
280 * After the previous if(), we know that the levels array
281 * has an implicit trailing WS run and therefore does not fully
282 * reflect itself all the levels.
283 * This must be a UBiDi object for a line, and
284 * we need to create a new levels array.
286 if(getLevelsMemory(pBiDi
, length
)) {
287 UBiDiLevel
*levels
=pBiDi
->levelsMemory
;
289 if(start
>0 && levels
!=pBiDi
->levels
) {
290 uprv_memcpy(levels
, pBiDi
->levels
, start
);
292 /* pBiDi->paraLevel is ok even if contextual multiple paragraphs,
293 since pBidi is a line object */
294 uprv_memset(levels
+start
, pBiDi
->paraLevel
, length
-start
);
296 /* this new levels array is set for the line and reflects the WS run */
297 pBiDi
->trailingWSStart
=length
;
298 return pBiDi
->levels
=levels
;
301 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
306 U_CAPI
void U_EXPORT2
307 ubidi_getLogicalRun(const UBiDi
*pBiDi
, int32_t logicalPosition
,
308 int32_t *pLogicalLimit
, UBiDiLevel
*pLevel
) {
309 UErrorCode errorCode
;
310 int32_t runCount
, visualStart
, logicalLimit
, logicalFirst
, i
;
313 errorCode
=U_ZERO_ERROR
;
314 RETURN_VOID_IF_BAD_RANGE(logicalPosition
, 0, pBiDi
->length
, errorCode
);
315 /* ubidi_countRuns will check VALID_PARA_OR_LINE */
316 runCount
=ubidi_countRuns((UBiDi
*)pBiDi
, &errorCode
);
317 if(U_FAILURE(errorCode
)) {
320 /* this is done based on runs rather than on levels since levels have
321 a special interpretation when UBIDI_REORDER_RUNS_ONLY
323 visualStart
=logicalLimit
=0;
326 for(i
=0; i
<runCount
; i
++) {
327 iRun
= pBiDi
->runs
[i
];
328 logicalFirst
=GET_INDEX(iRun
.logicalStart
);
329 logicalLimit
=logicalFirst
+iRun
.visualLimit
-visualStart
;
330 if((logicalPosition
>=logicalFirst
) &&
331 (logicalPosition
<logicalLimit
)) {
334 visualStart
= iRun
.visualLimit
;
337 *pLogicalLimit
=logicalLimit
;
340 if(pBiDi
->reorderingMode
==UBIDI_REORDER_RUNS_ONLY
) {
341 *pLevel
=(UBiDiLevel
)GET_ODD_BIT(iRun
.logicalStart
);
343 else if(pBiDi
->direction
!=UBIDI_MIXED
|| logicalPosition
>=pBiDi
->trailingWSStart
) {
344 *pLevel
=GET_PARALEVEL(pBiDi
, logicalPosition
);
346 *pLevel
=pBiDi
->levels
[logicalPosition
];
351 /* runs API functions ------------------------------------------------------- */
353 U_CAPI
int32_t U_EXPORT2
354 ubidi_countRuns(UBiDi
*pBiDi
, UErrorCode
*pErrorCode
) {
355 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
, -1);
356 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi
, *pErrorCode
, -1);
357 ubidi_getRuns(pBiDi
, pErrorCode
);
358 if(U_FAILURE(*pErrorCode
)) {
361 return pBiDi
->runCount
;
364 U_CAPI UBiDiDirection U_EXPORT2
365 ubidi_getVisualRun(UBiDi
*pBiDi
, int32_t runIndex
,
366 int32_t *pLogicalStart
, int32_t *pLength
)
369 UErrorCode errorCode
= U_ZERO_ERROR
;
370 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi
, errorCode
, UBIDI_LTR
);
371 ubidi_getRuns(pBiDi
, &errorCode
);
372 if(U_FAILURE(errorCode
)) {
375 RETURN_IF_BAD_RANGE(runIndex
, 0, pBiDi
->runCount
, errorCode
, UBIDI_LTR
);
377 start
=pBiDi
->runs
[runIndex
].logicalStart
;
378 if(pLogicalStart
!=NULL
) {
379 *pLogicalStart
=GET_INDEX(start
);
383 *pLength
=pBiDi
->runs
[runIndex
].visualLimit
-
384 pBiDi
->runs
[runIndex
-1].visualLimit
;
386 *pLength
=pBiDi
->runs
[0].visualLimit
;
389 return (UBiDiDirection
)GET_ODD_BIT(start
);
392 /* in trivial cases there is only one trivial run; called by ubidi_getRuns() */
394 getSingleRun(UBiDi
*pBiDi
, UBiDiLevel level
) {
395 /* simple, single-run case */
396 pBiDi
->runs
=pBiDi
->simpleRuns
;
399 /* fill and reorder the single run */
400 pBiDi
->runs
[0].logicalStart
=MAKE_INDEX_ODD_PAIR(0, level
);
401 pBiDi
->runs
[0].visualLimit
=pBiDi
->length
;
402 pBiDi
->runs
[0].insertRemove
=0;
405 /* reorder the runs array (L2) ---------------------------------------------- */
408 * Reorder the same-level runs in the runs array.
409 * Here, runCount>1 and maxLevel>=minLevel>=paraLevel.
410 * All the visualStart fields=logical start before reordering.
411 * The "odd" bits are not set yet.
413 * Reordering with this data structure lends itself to some handy shortcuts:
415 * Since each run is moved but not modified, and since at the initial maxLevel
416 * each sequence of same-level runs consists of only one run each, we
417 * don't need to do anything there and can predecrement maxLevel.
418 * In many simple cases, the reordering is thus done entirely in the
420 * Also, reordering occurs only down to the lowest odd level that occurs,
421 * which is minLevel|1. However, if the lowest level itself is odd, then
422 * in the last reordering the sequence of the runs at this level or higher
423 * will be all runs, and we don't need the elaborate loop to search for them.
424 * This is covered by ++minLevel instead of minLevel|=1 followed
425 * by an extra reorder-all after the reorder-some loop.
426 * About a trailing WS run:
427 * Such a run would need special treatment because its level is not
428 * reflected in levels[] if this is not a paragraph object.
429 * Instead, all characters from trailingWSStart on are implicitly at
431 * However, for all maxLevel>paraLevel, this run will never be reordered
432 * and does not need to be taken into account. maxLevel==paraLevel is only reordered
433 * if minLevel==paraLevel is odd, which is done in the extra segment.
434 * This means that for the main reordering loop we don't need to consider
435 * this run and can --runCount. If it is later part of the all-runs
436 * reordering, then runCount is adjusted accordingly.
439 reorderLine(UBiDi
*pBiDi
, UBiDiLevel minLevel
, UBiDiLevel maxLevel
) {
442 int32_t firstRun
, endRun
, limitRun
, runCount
;
445 if(maxLevel
<=(minLevel
|1)) {
450 * Reorder only down to the lowest odd level
451 * and reorder at an odd minLevel in a separate, simpler loop.
452 * See comments above for why minLevel is always incremented.
457 levels
=pBiDi
->levels
;
458 runCount
=pBiDi
->runCount
;
460 /* do not include the WS run at paraLevel<=old minLevel except in the simple loop */
461 if(pBiDi
->trailingWSStart
<pBiDi
->length
) {
465 while(--maxLevel
>=minLevel
) {
468 /* loop for all sequences of runs */
470 /* look for a sequence of runs that are all at >=maxLevel */
471 /* look for the first run of such a sequence */
472 while(firstRun
<runCount
&& levels
[runs
[firstRun
].logicalStart
]<maxLevel
) {
475 if(firstRun
>=runCount
) {
476 break; /* no more such runs */
479 /* look for the limit run of such a sequence (the run behind it) */
480 for(limitRun
=firstRun
; ++limitRun
<runCount
&& levels
[runs
[limitRun
].logicalStart
]>=maxLevel
;) {}
482 /* Swap the entire sequence of runs from firstRun to limitRun-1. */
484 while(firstRun
<endRun
) {
485 tempRun
= runs
[firstRun
];
486 runs
[firstRun
]=runs
[endRun
];
487 runs
[endRun
]=tempRun
;
492 if(limitRun
==runCount
) {
493 break; /* no more such runs */
500 /* now do maxLevel==old minLevel (==odd!), see above */
504 /* include the trailing WS run in this complete reordering */
505 if(pBiDi
->trailingWSStart
==pBiDi
->length
) {
509 /* Swap the entire sequence of all runs. (endRun==runCount) */
510 while(firstRun
<runCount
) {
511 tempRun
=runs
[firstRun
];
512 runs
[firstRun
]=runs
[runCount
];
513 runs
[runCount
]=tempRun
;
520 /* compute the runs array --------------------------------------------------- */
522 static int32_t getRunFromLogicalIndex(UBiDi
*pBiDi
, int32_t logicalIndex
, UErrorCode
*pErrorCode
) {
523 Run
*runs
=pBiDi
->runs
;
524 int32_t runCount
=pBiDi
->runCount
, visualStart
=0, i
, length
, logicalStart
;
526 for(i
=0; i
<runCount
; i
++) {
527 length
=runs
[i
].visualLimit
-visualStart
;
528 logicalStart
=GET_INDEX(runs
[i
].logicalStart
);
529 if((logicalIndex
>=logicalStart
) && (logicalIndex
<(logicalStart
+length
))) {
534 /* we should never get here */
536 *pErrorCode
= U_INVALID_STATE_ERROR
;
541 * Compute the runs array from the levels array.
542 * After ubidi_getRuns() returns TRUE, runCount is guaranteed to be >0
543 * and the runs are reordered.
544 * Odd-level runs have visualStart on their visual right edge and
545 * they progress visually to the left.
546 * If option UBIDI_OPTION_INSERT_MARKS is set, insertRemove will contain the
547 * sum of appropriate LRM/RLM_BEFORE/AFTER flags.
548 * If option UBIDI_OPTION_REMOVE_CONTROLS is set, insertRemove will contain the
549 * negative number of BiDi control characters within this run.
552 ubidi_getRuns(UBiDi
*pBiDi
, UErrorCode
*pErrorCode
) {
554 * This method returns immediately if the runs are already set. This
555 * includes the case of length==0 (handled in setPara)..
557 if (pBiDi
->runCount
>=0) {
561 if(pBiDi
->direction
!=UBIDI_MIXED
) {
562 /* simple, single-run case - this covers length==0 */
563 /* pBiDi->paraLevel is ok even for contextual multiple paragraphs */
564 getSingleRun(pBiDi
, pBiDi
->paraLevel
);
565 } else /* UBIDI_MIXED, length>0 */ {
566 /* mixed directionality */
567 int32_t length
=pBiDi
->length
, limit
;
568 UBiDiLevel
*levels
=pBiDi
->levels
;
570 UBiDiLevel level
=UBIDI_DEFAULT_LTR
; /* initialize with no valid level */
572 * If there are WS characters at the end of the line
573 * and the run preceding them has a level different from
574 * paraLevel, then they will form their own run at paraLevel (L1).
575 * Count them separately.
576 * We need some special treatment for this in order to not
577 * modify the levels array which a line UBiDi object shares
578 * with its paragraph parent and its other line siblings.
579 * In other words, for the trailing WS, it may be
580 * levels[]!=paraLevel but we have to treat it like it were so.
582 limit
=pBiDi
->trailingWSStart
;
583 /* count the runs, there is at least one non-WS run, and limit>0 */
585 for(i
=0; i
<limit
; ++i
) {
586 /* increment runCount at the start of each run */
587 if(levels
[i
]!=level
) {
594 * We don't need to see if the last run can be merged with a trailing
595 * WS run because setTrailingWSStart() would have done that.
597 if(runCount
==1 && limit
==length
) {
598 /* There is only one non-WS run and no trailing WS-run. */
599 getSingleRun(pBiDi
, levels
[0]);
600 } else /* runCount>1 || limit<length */ {
601 /* allocate and set the runs */
603 int32_t runIndex
, start
;
604 UBiDiLevel minLevel
=UBIDI_MAX_EXPLICIT_LEVEL
+1, maxLevel
=0;
606 /* now, count a (non-mergeable) WS run */
612 if(getRunsMemory(pBiDi
, runCount
)) {
613 runs
=pBiDi
->runsMemory
;
619 /* FOOD FOR THOUGHT: this could be optimized, e.g.:
620 * 464->444, 484->444, 575->555, 595->555
621 * However, that would take longer. Check also how it would
622 * interact with BiDi control removal and inserting Marks.
626 /* search for the run limits and initialize visualLimit values with the run lengths */
629 /* prepare this run */
639 /* look for the run limit */
640 while(++i
<limit
&& levels
[i
]==level
) {}
642 /* i is another run limit */
643 runs
[runIndex
].logicalStart
=start
;
644 runs
[runIndex
].visualLimit
=i
-start
;
645 runs
[runIndex
].insertRemove
=0;
650 /* there is a separate WS run */
651 runs
[runIndex
].logicalStart
=limit
;
652 runs
[runIndex
].visualLimit
=length
-limit
;
653 /* For the trailing WS run, pBiDi->paraLevel is ok even
654 if contextual multiple paragraphs. */
655 if(pBiDi
->paraLevel
<minLevel
) {
656 minLevel
=pBiDi
->paraLevel
;
660 /* set the object fields */
662 pBiDi
->runCount
=runCount
;
664 reorderLine(pBiDi
, minLevel
, maxLevel
);
666 /* now add the direction flags and adjust the visualLimit's to be just that */
667 /* this loop will also handle the trailing WS run */
669 for(i
=0; i
<runCount
; ++i
) {
670 ADD_ODD_BIT_FROM_LEVEL(runs
[i
].logicalStart
, levels
[runs
[i
].logicalStart
]);
671 limit
+=runs
[i
].visualLimit
;
672 runs
[i
].visualLimit
=limit
;
675 /* Set the "odd" bit for the trailing WS run. */
676 /* For a RTL paragraph, it will be the *first* run in visual order. */
677 /* For the trailing WS run, pBiDi->paraLevel is ok even if
678 contextual multiple paragraphs. */
679 if(runIndex
<runCount
) {
680 int32_t trailingRun
= ((pBiDi
->paraLevel
& 1) != 0)? 0 : runIndex
;
682 ADD_ODD_BIT_FROM_LEVEL(runs
[trailingRun
].logicalStart
, pBiDi
->paraLevel
);
687 /* handle insert LRM/RLM BEFORE/AFTER run */
688 if(pBiDi
->insertPoints
.size
>0) {
689 Point
*point
, *start
=pBiDi
->insertPoints
.points
,
690 *limit
=start
+pBiDi
->insertPoints
.size
;
692 for(point
=start
; point
<limit
; point
++) {
693 runIndex
=getRunFromLogicalIndex(pBiDi
, point
->pos
, pErrorCode
);
694 pBiDi
->runs
[runIndex
].insertRemove
|=point
->flag
;
698 /* handle remove BiDi control characters */
699 if(pBiDi
->controlCount
>0) {
701 const UChar
*start
=pBiDi
->text
, *limit
=start
+pBiDi
->length
, *pu
;
702 for(pu
=start
; pu
<limit
; pu
++) {
703 if(IS_BIDI_CONTROL_CHAR(*pu
)) {
704 runIndex
=getRunFromLogicalIndex(pBiDi
, (int32_t)(pu
-start
), pErrorCode
);
705 pBiDi
->runs
[runIndex
].insertRemove
--;
714 prepareReorder(const UBiDiLevel
*levels
, int32_t length
,
716 UBiDiLevel
*pMinLevel
, UBiDiLevel
*pMaxLevel
) {
718 UBiDiLevel level
, minLevel
, maxLevel
;
720 if(levels
==NULL
|| length
<=0) {
724 /* determine minLevel and maxLevel */
725 minLevel
=UBIDI_MAX_EXPLICIT_LEVEL
+1;
727 for(start
=length
; start
>0;) {
728 level
=levels
[--start
];
729 if(level
>UBIDI_MAX_EXPLICIT_LEVEL
+1) {
742 /* initialize the index map */
743 for(start
=length
; start
>0;) {
745 indexMap
[start
]=start
;
751 /* reorder a line based on a levels array (L2) ------------------------------ */
753 U_CAPI
void U_EXPORT2
754 ubidi_reorderLogical(const UBiDiLevel
*levels
, int32_t length
, int32_t *indexMap
) {
755 int32_t start
, limit
, sumOfSosEos
;
756 UBiDiLevel minLevel
= 0, maxLevel
= 0;
758 if(indexMap
==NULL
|| !prepareReorder(levels
, length
, indexMap
, &minLevel
, &maxLevel
)) {
763 if(minLevel
==maxLevel
&& (minLevel
&1)==0) {
767 /* reorder only down to the lowest odd level */
770 /* loop maxLevel..minLevel */
774 /* loop for all sequences of levels to reorder at the current maxLevel */
776 /* look for a sequence of levels that are all at >=maxLevel */
777 /* look for the first index of such a sequence */
778 while(start
<length
&& levels
[start
]<maxLevel
) {
782 break; /* no more such sequences */
785 /* look for the limit of such a sequence (the index behind it) */
786 for(limit
=start
; ++limit
<length
&& levels
[limit
]>=maxLevel
;) {}
789 * sos=start of sequence, eos=end of sequence
791 * The closed (inclusive) interval from sos to eos includes all the logical
792 * and visual indexes within this sequence. They are logically and
793 * visually contiguous and in the same range.
795 * For each run, the new visual index=sos+eos-old visual index;
796 * we pre-add sos+eos into sumOfSosEos ->
797 * new visual index=sumOfSosEos-old visual index;
799 sumOfSosEos
=start
+limit
-1;
801 /* reorder each index in the sequence */
803 indexMap
[start
]=sumOfSosEos
-indexMap
[start
];
804 } while(++start
<limit
);
808 break; /* no more such sequences */
813 } while(--maxLevel
>=minLevel
);
816 U_CAPI
void U_EXPORT2
817 ubidi_reorderVisual(const UBiDiLevel
*levels
, int32_t length
, int32_t *indexMap
) {
818 int32_t start
, end
, limit
, temp
;
819 UBiDiLevel minLevel
= 0, maxLevel
= 0;
821 if(indexMap
==NULL
|| !prepareReorder(levels
, length
, indexMap
, &minLevel
, &maxLevel
)) {
826 if(minLevel
==maxLevel
&& (minLevel
&1)==0) {
830 /* reorder only down to the lowest odd level */
833 /* loop maxLevel..minLevel */
837 /* loop for all sequences of levels to reorder at the current maxLevel */
839 /* look for a sequence of levels that are all at >=maxLevel */
840 /* look for the first index of such a sequence */
841 while(start
<length
&& levels
[start
]<maxLevel
) {
845 break; /* no more such runs */
848 /* look for the limit of such a sequence (the index behind it) */
849 for(limit
=start
; ++limit
<length
&& levels
[limit
]>=maxLevel
;) {}
852 * Swap the entire interval of indexes from start to limit-1.
853 * We don't need to swap the levels for the purpose of this
854 * algorithm: the sequence of levels that we look at does not
859 temp
=indexMap
[start
];
860 indexMap
[start
]=indexMap
[end
];
868 break; /* no more such sequences */
873 } while(--maxLevel
>=minLevel
);
876 /* API functions for logical<->visual mapping ------------------------------- */
878 U_CAPI
int32_t U_EXPORT2
879 ubidi_getVisualIndex(UBiDi
*pBiDi
, int32_t logicalIndex
, UErrorCode
*pErrorCode
) {
880 int32_t visualIndex
=UBIDI_MAP_NOWHERE
;
881 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
, -1);
882 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi
, *pErrorCode
, -1);
883 RETURN_IF_BAD_RANGE(logicalIndex
, 0, pBiDi
->length
, *pErrorCode
, -1);
885 /* we can do the trivial cases without the runs array */
886 switch(pBiDi
->direction
) {
888 visualIndex
=logicalIndex
;
891 visualIndex
=pBiDi
->length
-logicalIndex
-1;
894 if(!ubidi_getRuns(pBiDi
, pErrorCode
)) {
895 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
898 Run
*runs
=pBiDi
->runs
;
899 int32_t i
, visualStart
=0, offset
, length
;
901 /* linear search for the run, search on the visual runs */
902 for(i
=0; i
<pBiDi
->runCount
; ++i
) {
903 length
=runs
[i
].visualLimit
-visualStart
;
904 offset
=logicalIndex
-GET_INDEX(runs
[i
].logicalStart
);
905 if(offset
>=0 && offset
<length
) {
906 if(IS_EVEN_RUN(runs
[i
].logicalStart
)) {
908 visualIndex
=visualStart
+offset
;
911 visualIndex
=visualStart
+length
-offset
-1;
913 break; /* exit for loop */
917 if(i
>=pBiDi
->runCount
) {
918 return UBIDI_MAP_NOWHERE
;
923 if(pBiDi
->insertPoints
.size
>0) {
924 /* add the number of added marks until the calculated visual index */
925 Run
*runs
=pBiDi
->runs
;
926 int32_t i
, length
, insertRemove
;
927 int32_t visualStart
=0, markFound
=0;
928 for(i
=0; ; i
++, visualStart
+=length
) {
929 length
=runs
[i
].visualLimit
-visualStart
;
930 insertRemove
=runs
[i
].insertRemove
;
931 if(insertRemove
& (LRM_BEFORE
|RLM_BEFORE
)) {
934 /* is it the run containing the visual index? */
935 if(visualIndex
<runs
[i
].visualLimit
) {
936 return visualIndex
+markFound
;
938 if(insertRemove
& (LRM_AFTER
|RLM_AFTER
)) {
943 else if(pBiDi
->controlCount
>0) {
944 /* subtract the number of controls until the calculated visual index */
945 Run
*runs
=pBiDi
->runs
;
946 int32_t i
, j
, start
, limit
, length
, insertRemove
;
947 int32_t visualStart
=0, controlFound
=0;
948 UChar uchar
=pBiDi
->text
[logicalIndex
];
949 /* is the logical index pointing to a control ? */
950 if(IS_BIDI_CONTROL_CHAR(uchar
)) {
951 return UBIDI_MAP_NOWHERE
;
954 for(i
=0; ; i
++, visualStart
+=length
) {
955 length
=runs
[i
].visualLimit
-visualStart
;
956 insertRemove
=runs
[i
].insertRemove
;
957 /* calculated visual index is beyond this run? */
958 if(visualIndex
>=runs
[i
].visualLimit
) {
959 controlFound
-=insertRemove
;
962 /* calculated visual index must be within current run */
963 if(insertRemove
==0) {
964 return visualIndex
-controlFound
;
966 if(IS_EVEN_RUN(runs
[i
].logicalStart
)) {
967 /* LTR: check from run start to logical index */
968 start
=runs
[i
].logicalStart
;
971 /* RTL: check from logical index to run end */
972 start
=logicalIndex
+1;
973 limit
=GET_INDEX(runs
[i
].logicalStart
)+length
;
975 for(j
=start
; j
<limit
; j
++) {
976 uchar
=pBiDi
->text
[j
];
977 if(IS_BIDI_CONTROL_CHAR(uchar
)) {
981 return visualIndex
-controlFound
;
988 U_CAPI
int32_t U_EXPORT2
989 ubidi_getLogicalIndex(UBiDi
*pBiDi
, int32_t visualIndex
, UErrorCode
*pErrorCode
) {
991 int32_t i
, runCount
, start
;
992 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
, -1);
993 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi
, *pErrorCode
, -1);
994 RETURN_IF_BAD_RANGE(visualIndex
, 0, pBiDi
->resultLength
, *pErrorCode
, -1);
995 /* we can do the trivial cases without the runs array */
996 if(pBiDi
->insertPoints
.size
==0 && pBiDi
->controlCount
==0) {
997 if(pBiDi
->direction
==UBIDI_LTR
) {
1000 else if(pBiDi
->direction
==UBIDI_RTL
) {
1001 return pBiDi
->length
-visualIndex
-1;
1004 if(!ubidi_getRuns(pBiDi
, pErrorCode
)) {
1005 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
1010 runCount
=pBiDi
->runCount
;
1011 if(pBiDi
->insertPoints
.size
>0) {
1012 /* handle inserted LRM/RLM */
1013 int32_t markFound
=0, insertRemove
;
1014 int32_t visualStart
=0, length
;
1016 /* subtract number of marks until visual index */
1017 for(i
=0; ; i
++, visualStart
+=length
) {
1018 length
=runs
[i
].visualLimit
-visualStart
;
1019 insertRemove
=runs
[i
].insertRemove
;
1020 if(insertRemove
&(LRM_BEFORE
|RLM_BEFORE
)) {
1021 if(visualIndex
<=(visualStart
+markFound
)) {
1022 return UBIDI_MAP_NOWHERE
;
1026 /* is adjusted visual index within this run? */
1027 if(visualIndex
<(runs
[i
].visualLimit
+markFound
)) {
1028 visualIndex
-=markFound
;
1031 if(insertRemove
&(LRM_AFTER
|RLM_AFTER
)) {
1032 if(visualIndex
==(visualStart
+length
+markFound
)) {
1033 return UBIDI_MAP_NOWHERE
;
1039 else if(pBiDi
->controlCount
>0) {
1040 /* handle removed BiDi control characters */
1041 int32_t controlFound
=0, insertRemove
, length
;
1042 int32_t logicalStart
, logicalEnd
, visualStart
=0, j
, k
;
1045 /* add number of controls until visual index */
1046 for(i
=0; ; i
++, visualStart
+=length
) {
1047 length
=runs
[i
].visualLimit
-visualStart
;
1048 insertRemove
=runs
[i
].insertRemove
;
1049 /* is adjusted visual index beyond current run? */
1050 if(visualIndex
>=(runs
[i
].visualLimit
-controlFound
+insertRemove
)) {
1051 controlFound
-=insertRemove
;
1054 /* adjusted visual index is within current run */
1055 if(insertRemove
==0) {
1056 visualIndex
+=controlFound
;
1059 /* count non-control chars until visualIndex */
1060 logicalStart
=runs
[i
].logicalStart
;
1061 evenRun
=IS_EVEN_RUN(logicalStart
);
1062 REMOVE_ODD_BIT(logicalStart
);
1063 logicalEnd
=logicalStart
+length
-1;
1064 for(j
=0; j
<length
; j
++) {
1065 k
= evenRun
? logicalStart
+j
: logicalEnd
-j
;
1066 uchar
=pBiDi
->text
[k
];
1067 if(IS_BIDI_CONTROL_CHAR(uchar
)) {
1070 if((visualIndex
+controlFound
)==(visualStart
+j
)) {
1074 visualIndex
+=controlFound
;
1078 /* handle all cases */
1080 /* linear search for the run */
1081 for(i
=0; visualIndex
>=runs
[i
].visualLimit
; ++i
) {}
1083 /* binary search for the run */
1084 int32_t begin
=0, limit
=runCount
;
1086 /* the middle if() is guaranteed to find the run, we don't need a loop limit */
1089 if(visualIndex
>=runs
[i
].visualLimit
) {
1091 } else if(i
==0 || visualIndex
>=runs
[i
-1].visualLimit
) {
1099 start
=runs
[i
].logicalStart
;
1100 if(IS_EVEN_RUN(start
)) {
1102 /* the offset in runs[i] is visualIndex-runs[i-1].visualLimit */
1104 visualIndex
-=runs
[i
-1].visualLimit
;
1106 return start
+visualIndex
;
1109 return GET_INDEX(start
)+runs
[i
].visualLimit
-visualIndex
-1;
1113 U_CAPI
void U_EXPORT2
1114 ubidi_getLogicalMap(UBiDi
*pBiDi
, int32_t *indexMap
, UErrorCode
*pErrorCode
) {
1115 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
);
1116 /* ubidi_countRuns() checks for VALID_PARA_OR_LINE */
1117 ubidi_countRuns(pBiDi
, pErrorCode
);
1118 if(U_FAILURE(*pErrorCode
)) {
1120 } else if(indexMap
==NULL
) {
1121 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
1123 /* fill a logical-to-visual index map using the runs[] */
1124 int32_t visualStart
, visualLimit
, i
, j
, k
;
1125 int32_t logicalStart
, logicalLimit
;
1126 Run
*runs
=pBiDi
->runs
;
1127 if (pBiDi
->length
<=0) {
1130 if (pBiDi
->length
>pBiDi
->resultLength
) {
1131 uprv_memset(indexMap
, 0xFF, pBiDi
->length
*sizeof(int32_t));
1135 for(j
=0; j
<pBiDi
->runCount
; ++j
) {
1136 logicalStart
=GET_INDEX(runs
[j
].logicalStart
);
1137 visualLimit
=runs
[j
].visualLimit
;
1138 if(IS_EVEN_RUN(runs
[j
].logicalStart
)) {
1140 indexMap
[logicalStart
++]=visualStart
++;
1141 } while(visualStart
<visualLimit
);
1143 logicalStart
+=visualLimit
-visualStart
; /* logicalLimit */
1145 indexMap
[--logicalStart
]=visualStart
++;
1146 } while(visualStart
<visualLimit
);
1148 /* visualStart==visualLimit; */
1151 if(pBiDi
->insertPoints
.size
>0) {
1152 int32_t markFound
=0, runCount
=pBiDi
->runCount
;
1153 int32_t length
, insertRemove
;
1155 /* add number of marks found until each index */
1156 for(i
=0; i
<runCount
; i
++, visualStart
+=length
) {
1157 length
=runs
[i
].visualLimit
-visualStart
;
1158 insertRemove
=runs
[i
].insertRemove
;
1159 if(insertRemove
&(LRM_BEFORE
|RLM_BEFORE
)) {
1163 logicalStart
=GET_INDEX(runs
[i
].logicalStart
);
1164 logicalLimit
=logicalStart
+length
;
1165 for(j
=logicalStart
; j
<logicalLimit
; j
++) {
1166 indexMap
[j
]+=markFound
;
1169 if(insertRemove
&(LRM_AFTER
|RLM_AFTER
)) {
1174 else if(pBiDi
->controlCount
>0) {
1175 int32_t controlFound
=0, runCount
=pBiDi
->runCount
;
1176 int32_t length
, insertRemove
;
1180 /* subtract number of controls found until each index */
1181 for(i
=0; i
<runCount
; i
++, visualStart
+=length
) {
1182 length
=runs
[i
].visualLimit
-visualStart
;
1183 insertRemove
=runs
[i
].insertRemove
;
1184 /* no control found within previous runs nor within this run */
1185 if((controlFound
-insertRemove
)==0) {
1188 logicalStart
=runs
[i
].logicalStart
;
1189 evenRun
=IS_EVEN_RUN(logicalStart
);
1190 REMOVE_ODD_BIT(logicalStart
);
1191 logicalLimit
=logicalStart
+length
;
1192 /* if no control within this run */
1193 if(insertRemove
==0) {
1194 for(j
=logicalStart
; j
<logicalLimit
; j
++) {
1195 indexMap
[j
]-=controlFound
;
1199 for(j
=0; j
<length
; j
++) {
1200 k
= evenRun
? logicalStart
+j
: logicalLimit
-j
-1;
1201 uchar
=pBiDi
->text
[k
];
1202 if(IS_BIDI_CONTROL_CHAR(uchar
)) {
1204 indexMap
[k
]=UBIDI_MAP_NOWHERE
;
1207 indexMap
[k
]-=controlFound
;
1214 U_CAPI
void U_EXPORT2
1215 ubidi_getVisualMap(UBiDi
*pBiDi
, int32_t *indexMap
, UErrorCode
*pErrorCode
) {
1216 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
);
1217 if(indexMap
==NULL
) {
1218 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
1221 /* ubidi_countRuns() checks for VALID_PARA_OR_LINE */
1222 ubidi_countRuns(pBiDi
, pErrorCode
);
1223 if(U_SUCCESS(*pErrorCode
)) {
1224 /* fill a visual-to-logical index map using the runs[] */
1225 Run
*runs
=pBiDi
->runs
, *runsLimit
=runs
+pBiDi
->runCount
;
1226 int32_t logicalStart
, visualStart
, visualLimit
, *pi
=indexMap
;
1228 if (pBiDi
->resultLength
<=0) {
1232 for(; runs
<runsLimit
; ++runs
) {
1233 logicalStart
=runs
->logicalStart
;
1234 visualLimit
=runs
->visualLimit
;
1235 if(IS_EVEN_RUN(logicalStart
)) {
1237 *pi
++ = logicalStart
++;
1238 } while(++visualStart
<visualLimit
);
1240 REMOVE_ODD_BIT(logicalStart
);
1241 logicalStart
+=visualLimit
-visualStart
; /* logicalLimit */
1243 *pi
++ = --logicalStart
;
1244 } while(++visualStart
<visualLimit
);
1246 /* visualStart==visualLimit; */
1249 if(pBiDi
->insertPoints
.size
>0) {
1250 int32_t markFound
=0, runCount
=pBiDi
->runCount
;
1251 int32_t insertRemove
, i
, j
, k
;
1253 /* count all inserted marks */
1254 for(i
=0; i
<runCount
; i
++) {
1255 insertRemove
=runs
[i
].insertRemove
;
1256 if(insertRemove
&(LRM_BEFORE
|RLM_BEFORE
)) {
1259 if(insertRemove
&(LRM_AFTER
|RLM_AFTER
)) {
1263 /* move back indexes by number of preceding marks */
1264 k
=pBiDi
->resultLength
;
1265 for(i
=runCount
-1; i
>=0 && markFound
>0; i
--) {
1266 insertRemove
=runs
[i
].insertRemove
;
1267 if(insertRemove
&(LRM_AFTER
|RLM_AFTER
)) {
1268 indexMap
[--k
]= UBIDI_MAP_NOWHERE
;
1271 visualStart
= i
>0 ? runs
[i
-1].visualLimit
: 0;
1272 for(j
=runs
[i
].visualLimit
-1; j
>=visualStart
&& markFound
>0; j
--) {
1273 indexMap
[--k
]=indexMap
[j
];
1275 if(insertRemove
&(LRM_BEFORE
|RLM_BEFORE
)) {
1276 indexMap
[--k
]= UBIDI_MAP_NOWHERE
;
1281 else if(pBiDi
->controlCount
>0) {
1282 int32_t runCount
=pBiDi
->runCount
, logicalEnd
;
1283 int32_t insertRemove
, length
, i
, j
, k
, m
;
1288 /* move forward indexes by number of preceding controls */
1290 for(i
=0; i
<runCount
; i
++, visualStart
+=length
) {
1291 length
=runs
[i
].visualLimit
-visualStart
;
1292 insertRemove
=runs
[i
].insertRemove
;
1293 /* if no control found yet, nothing to do in this run */
1294 if((insertRemove
==0)&&(k
==visualStart
)) {
1298 /* if no control in this run */
1299 if(insertRemove
==0) {
1300 visualLimit
=runs
[i
].visualLimit
;
1301 for(j
=visualStart
; j
<visualLimit
; j
++) {
1302 indexMap
[k
++]=indexMap
[j
];
1306 logicalStart
=runs
[i
].logicalStart
;
1307 evenRun
=IS_EVEN_RUN(logicalStart
);
1308 REMOVE_ODD_BIT(logicalStart
);
1309 logicalEnd
=logicalStart
+length
-1;
1310 for(j
=0; j
<length
; j
++) {
1311 m
= evenRun
? logicalStart
+j
: logicalEnd
-j
;
1312 uchar
=pBiDi
->text
[m
];
1313 if(!IS_BIDI_CONTROL_CHAR(uchar
)) {
1322 U_CAPI
void U_EXPORT2
1323 ubidi_invertMap(const int32_t *srcMap
, int32_t *destMap
, int32_t length
) {
1324 if(srcMap
!=NULL
&& destMap
!=NULL
&& length
>0) {
1326 int32_t destLength
=-1, count
=0;
1327 /* find highest value and count positive indexes in srcMap */
1330 if(*--pi
>destLength
) {
1337 destLength
++; /* add 1 for origin 0 */
1338 if(count
<destLength
) {
1339 /* we must fill unmatched destMap entries with -1 */
1340 uprv_memset(destMap
, 0xFF, destLength
*sizeof(int32_t));
1345 destMap
[*pi
]=--length
;