2 ******************************************************************************
4 * Copyright (C) 1999-2007, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 ******************************************************************************
10 * tab size: 8 (not used)
13 * created on: 1999aug06
14 * created by: Markus W. Scherer, updated by Matitiahu Allouche
18 #include "unicode/utypes.h"
19 #include "unicode/ustring.h"
20 #include "unicode/uchar.h"
21 #include "unicode/ubidi.h"
26 * General remarks about the functions in this file:
28 * These functions deal with the aspects of potentially mixed-directional
29 * text in a single paragraph or in a line of a single paragraph
30 * which has already been processed according to
31 * the Unicode 3.0 BiDi algorithm as defined in
32 * http://www.unicode.org/unicode/reports/tr9/ , version 13,
33 * also described in The Unicode Standard, Version 4.0.1 .
35 * This means that there is a UBiDi object with a levels
36 * and a dirProps array.
37 * paraLevel and direction are also set.
38 * Only if the length of the text is zero, then levels==dirProps==NULL.
40 * The overall directionality of the paragraph
41 * or line is used to bypass the reordering steps if possible.
42 * Even purely RTL text does not need reordering there because
43 * the ubidi_getLogical/VisualIndex() functions can compute the
44 * index on the fly in such a case.
46 * The implementation of the access to same-level-runs and of the reordering
47 * do attempt to provide better performance and less memory usage compared to
48 * a direct implementation of especially rule (L2) with an array of
49 * one (32-bit) integer per text character.
51 * Here, the levels array is scanned as soon as necessary, and a vector of
52 * same-level-runs is created. Reordering then is done on this vector.
53 * For each run of text positions that were resolved to the same level,
54 * only 8 bytes are stored: the first text position of the run and the visual
55 * position behind the run after reordering.
56 * One sign bit is used to hold the directionality of the run.
57 * This is inefficient if there are many very short runs. If the average run
58 * length is <2, then this uses more memory.
60 * In a further attempt to save memory, the levels array is never changed
61 * after all the resolution rules (Xn, Wn, Nn, In).
62 * Many functions have to consider the field trailingWSStart:
63 * if it is less than length, then there is an implicit trailing run
65 * which is not reflected in the levels array.
66 * This allows a line UBiDi object to use the same levels array as
67 * its paragraph parent object.
69 * When a UBiDi object is created for a line of a paragraph, then the
70 * paragraph's levels and dirProps arrays are reused by way of setting
71 * a pointer into them, not by copying. This again saves memory and forbids to
72 * change the now shared levels for (L1).
75 /* handle trailing WS (L1) -------------------------------------------------- */
78 * setTrailingWSStart() sets the start index for a trailing
79 * run of WS in the line. This is necessary because we do not modify
80 * the paragraph's levels array that we just point into.
81 * Using trailingWSStart is another form of performing (L1).
83 * To make subsequent operations easier, we also include the run
84 * before the WS if it is at the paraLevel - we merge the two here.
86 * This function is called only from ubidi_setLine(), so pBiDi->paraLevel is
87 * set correctly for the line even when contextual multiple paragraphs.
90 setTrailingWSStart(UBiDi
*pBiDi
) {
91 /* pBiDi->direction!=UBIDI_MIXED */
93 const DirProp
*dirProps
=pBiDi
->dirProps
;
94 UBiDiLevel
*levels
=pBiDi
->levels
;
95 int32_t start
=pBiDi
->length
;
96 UBiDiLevel paraLevel
=pBiDi
->paraLevel
;
98 /* If the line is terminated by a block separator, all preceding WS etc...
99 are already set to paragraph level.
100 Setting trailingWSStart to pBidi->length will avoid changing the
101 level of B chars from 0 to paraLevel in ubidi_getLevels when
102 orderParagraphsLTR==TRUE.
104 if(NO_CONTEXT_RTL(dirProps
[start
-1])==B
) {
105 pBiDi
->trailingWSStart
=start
; /* currently == pBiDi->length */
108 /* go backwards across all WS, BN, explicit codes */
109 while(start
>0 && DIRPROP_FLAG_NC(dirProps
[start
-1])&MASK_WS
) {
113 /* if the WS run can be merged with the previous run then do so here */
114 while(start
>0 && levels
[start
-1]==paraLevel
) {
118 pBiDi
->trailingWSStart
=start
;
121 /* ubidi_setLine ------------------------------------------------------------ */
123 U_CAPI
void U_EXPORT2
124 ubidi_setLine(const UBiDi
*pParaBiDi
,
125 int32_t start
, int32_t limit
,
127 UErrorCode
*pErrorCode
) {
130 /* check the argument values */
131 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
);
132 RETURN_VOID_IF_NOT_VALID_PARA(pParaBiDi
, *pErrorCode
);
133 RETURN_VOID_IF_BAD_RANGE(start
, 0, limit
, *pErrorCode
);
134 RETURN_VOID_IF_BAD_RANGE(limit
, 0, pParaBiDi
->length
+1, *pErrorCode
);
135 if(pLineBiDi
==NULL
) {
136 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
139 if(ubidi_getParagraph(pParaBiDi
, start
, NULL
, NULL
, NULL
, pErrorCode
) !=
140 ubidi_getParagraph(pParaBiDi
, limit
-1, NULL
, NULL
, NULL
, pErrorCode
)) {
141 /* the line crosses a paragraph boundary */
142 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
146 /* set the values in pLineBiDi from its pParaBiDi parent */
147 pLineBiDi
->pParaBiDi
=NULL
; /* mark unfinished setLine */
148 pLineBiDi
->text
=pParaBiDi
->text
+start
;
149 length
=pLineBiDi
->length
=limit
-start
;
150 pLineBiDi
->resultLength
=pLineBiDi
->originalLength
=length
;
151 pLineBiDi
->paraLevel
=GET_PARALEVEL(pParaBiDi
, start
);
152 pLineBiDi
->paraCount
=pParaBiDi
->paraCount
;
153 pLineBiDi
->runs
=NULL
;
155 pLineBiDi
->reorderingMode
=pParaBiDi
->reorderingMode
;
156 pLineBiDi
->reorderingOptions
=pParaBiDi
->reorderingOptions
;
157 pLineBiDi
->controlCount
=0;
158 if(pParaBiDi
->controlCount
>0) {
160 for(j
=start
; j
<limit
; j
++) {
161 if(IS_BIDI_CONTROL_CHAR(pParaBiDi
->text
[j
])) {
162 pLineBiDi
->controlCount
++;
165 pLineBiDi
->resultLength
-=pLineBiDi
->controlCount
;
168 pLineBiDi
->dirProps
=pParaBiDi
->dirProps
+start
;
169 pLineBiDi
->levels
=pParaBiDi
->levels
+start
;
170 pLineBiDi
->runCount
=-1;
172 if(pParaBiDi
->direction
!=UBIDI_MIXED
) {
173 /* the parent is already trivial */
174 pLineBiDi
->direction
=pParaBiDi
->direction
;
177 * The parent's levels are all either
178 * implicitly or explicitly ==paraLevel;
181 if(pParaBiDi
->trailingWSStart
<=start
) {
182 pLineBiDi
->trailingWSStart
=0;
183 } else if(pParaBiDi
->trailingWSStart
<limit
) {
184 pLineBiDi
->trailingWSStart
=pParaBiDi
->trailingWSStart
-start
;
186 pLineBiDi
->trailingWSStart
=length
;
189 const UBiDiLevel
*levels
=pLineBiDi
->levels
;
190 int32_t i
, trailingWSStart
;
193 setTrailingWSStart(pLineBiDi
);
194 trailingWSStart
=pLineBiDi
->trailingWSStart
;
196 /* recalculate pLineBiDi->direction */
197 if(trailingWSStart
==0) {
198 /* all levels are at paraLevel */
199 pLineBiDi
->direction
=(UBiDiDirection
)(pLineBiDi
->paraLevel
&1);
201 /* get the level of the first character */
202 level
=(UBiDiLevel
)(levels
[0]&1);
204 /* if there is anything of a different level, then the line is mixed */
205 if(trailingWSStart
<length
&& (pLineBiDi
->paraLevel
&1)!=level
) {
206 /* the trailing WS is at paraLevel, which differs from levels[0] */
207 pLineBiDi
->direction
=UBIDI_MIXED
;
209 /* see if levels[1..trailingWSStart-1] have the same direction as levels[0] and paraLevel */
212 if(i
==trailingWSStart
) {
213 /* the direction values match those in level */
214 pLineBiDi
->direction
=(UBiDiDirection
)level
;
216 } else if((levels
[i
]&1)!=level
) {
217 pLineBiDi
->direction
=UBIDI_MIXED
;
225 switch(pLineBiDi
->direction
) {
227 /* make sure paraLevel is even */
228 pLineBiDi
->paraLevel
=(UBiDiLevel
)((pLineBiDi
->paraLevel
+1)&~1);
230 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
231 pLineBiDi
->trailingWSStart
=0;
234 /* make sure paraLevel is odd */
235 pLineBiDi
->paraLevel
|=1;
237 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
238 pLineBiDi
->trailingWSStart
=0;
244 pLineBiDi
->pParaBiDi
=pParaBiDi
; /* mark successful setLine */
248 U_CAPI UBiDiLevel U_EXPORT2
249 ubidi_getLevelAt(const UBiDi
*pBiDi
, int32_t charIndex
) {
250 /* return paraLevel if in the trailing WS run, otherwise the real level */
251 if(!IS_VALID_PARA_OR_LINE(pBiDi
) || charIndex
<0 || pBiDi
->length
<=charIndex
) {
253 } else if(pBiDi
->direction
!=UBIDI_MIXED
|| charIndex
>=pBiDi
->trailingWSStart
) {
254 return GET_PARALEVEL(pBiDi
, charIndex
);
256 return pBiDi
->levels
[charIndex
];
260 U_CAPI
const UBiDiLevel
* U_EXPORT2
261 ubidi_getLevels(UBiDi
*pBiDi
, UErrorCode
*pErrorCode
) {
262 int32_t start
, length
;
264 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
, NULL
);
265 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi
, *pErrorCode
, NULL
);
266 if((length
=pBiDi
->length
)<=0) {
267 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
270 if((start
=pBiDi
->trailingWSStart
)==length
) {
271 /* the current levels array reflects the WS run */
272 return pBiDi
->levels
;
276 * After the previous if(), we know that the levels array
277 * has an implicit trailing WS run and therefore does not fully
278 * reflect itself all the levels.
279 * This must be a UBiDi object for a line, and
280 * we need to create a new levels array.
282 if(getLevelsMemory(pBiDi
, length
)) {
283 UBiDiLevel
*levels
=pBiDi
->levelsMemory
;
285 if(start
>0 && levels
!=pBiDi
->levels
) {
286 uprv_memcpy(levels
, pBiDi
->levels
, start
);
288 /* pBiDi->paraLevel is ok even if contextual multiple paragraphs,
289 since pBidi is a line object */
290 uprv_memset(levels
+start
, pBiDi
->paraLevel
, length
-start
);
292 /* this new levels array is set for the line and reflects the WS run */
293 pBiDi
->trailingWSStart
=length
;
294 return pBiDi
->levels
=levels
;
297 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
302 U_CAPI
void U_EXPORT2
303 ubidi_getLogicalRun(const UBiDi
*pBiDi
, int32_t logicalPosition
,
304 int32_t *pLogicalLimit
, UBiDiLevel
*pLevel
) {
305 UErrorCode errorCode
;
306 int32_t runCount
, visualStart
, logicalLimit
, logicalFirst
, i
;
309 errorCode
=U_ZERO_ERROR
;
310 RETURN_VOID_IF_BAD_RANGE(logicalPosition
, 0, pBiDi
->length
, errorCode
);
311 /* ubidi_countRuns will check VALID_PARA_OR_LINE */
312 runCount
=ubidi_countRuns((UBiDi
*)pBiDi
, &errorCode
);
313 if(U_FAILURE(errorCode
)) {
316 /* this is done based on runs rather than on levels since levels have
317 a special interpretation when UBIDI_REORDER_RUNS_ONLY
319 visualStart
=logicalLimit
=0;
322 for(i
=0; i
<runCount
; i
++) {
323 iRun
= pBiDi
->runs
[i
];
324 logicalFirst
=GET_INDEX(iRun
.logicalStart
);
325 logicalLimit
=logicalFirst
+iRun
.visualLimit
-visualStart
;
326 if((logicalPosition
>=logicalFirst
) &&
327 (logicalPosition
<logicalLimit
)) {
330 visualStart
= iRun
.visualLimit
;
333 *pLogicalLimit
=logicalLimit
;
336 if(pBiDi
->reorderingMode
==UBIDI_REORDER_RUNS_ONLY
) {
337 *pLevel
=(UBiDiLevel
)GET_ODD_BIT(iRun
.logicalStart
);
339 else if(pBiDi
->direction
!=UBIDI_MIXED
|| logicalPosition
>=pBiDi
->trailingWSStart
) {
340 *pLevel
=GET_PARALEVEL(pBiDi
, logicalPosition
);
342 *pLevel
=pBiDi
->levels
[logicalPosition
];
347 /* runs API functions ------------------------------------------------------- */
349 U_CAPI
int32_t U_EXPORT2
350 ubidi_countRuns(UBiDi
*pBiDi
, UErrorCode
*pErrorCode
) {
351 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
, -1);
352 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi
, *pErrorCode
, -1);
353 ubidi_getRuns(pBiDi
, pErrorCode
);
354 if(U_FAILURE(*pErrorCode
)) {
357 return pBiDi
->runCount
;
360 U_CAPI UBiDiDirection U_EXPORT2
361 ubidi_getVisualRun(UBiDi
*pBiDi
, int32_t runIndex
,
362 int32_t *pLogicalStart
, int32_t *pLength
)
365 UErrorCode errorCode
= U_ZERO_ERROR
;
366 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi
, errorCode
, UBIDI_LTR
);
367 ubidi_getRuns(pBiDi
, &errorCode
);
368 if(U_FAILURE(errorCode
)) {
371 RETURN_IF_BAD_RANGE(runIndex
, 0, pBiDi
->runCount
, errorCode
, UBIDI_LTR
);
373 start
=pBiDi
->runs
[runIndex
].logicalStart
;
374 if(pLogicalStart
!=NULL
) {
375 *pLogicalStart
=GET_INDEX(start
);
379 *pLength
=pBiDi
->runs
[runIndex
].visualLimit
-
380 pBiDi
->runs
[runIndex
-1].visualLimit
;
382 *pLength
=pBiDi
->runs
[0].visualLimit
;
385 return (UBiDiDirection
)GET_ODD_BIT(start
);
388 /* in trivial cases there is only one trivial run; called by ubidi_getRuns() */
390 getSingleRun(UBiDi
*pBiDi
, UBiDiLevel level
) {
391 /* simple, single-run case */
392 pBiDi
->runs
=pBiDi
->simpleRuns
;
395 /* fill and reorder the single run */
396 pBiDi
->runs
[0].logicalStart
=MAKE_INDEX_ODD_PAIR(0, level
);
397 pBiDi
->runs
[0].visualLimit
=pBiDi
->length
;
398 pBiDi
->runs
[0].insertRemove
=0;
401 /* reorder the runs array (L2) ---------------------------------------------- */
404 * Reorder the same-level runs in the runs array.
405 * Here, runCount>1 and maxLevel>=minLevel>=paraLevel.
406 * All the visualStart fields=logical start before reordering.
407 * The "odd" bits are not set yet.
409 * Reordering with this data structure lends itself to some handy shortcuts:
411 * Since each run is moved but not modified, and since at the initial maxLevel
412 * each sequence of same-level runs consists of only one run each, we
413 * don't need to do anything there and can predecrement maxLevel.
414 * In many simple cases, the reordering is thus done entirely in the
416 * Also, reordering occurs only down to the lowest odd level that occurs,
417 * which is minLevel|1. However, if the lowest level itself is odd, then
418 * in the last reordering the sequence of the runs at this level or higher
419 * will be all runs, and we don't need the elaborate loop to search for them.
420 * This is covered by ++minLevel instead of minLevel|=1 followed
421 * by an extra reorder-all after the reorder-some loop.
422 * About a trailing WS run:
423 * Such a run would need special treatment because its level is not
424 * reflected in levels[] if this is not a paragraph object.
425 * Instead, all characters from trailingWSStart on are implicitly at
427 * However, for all maxLevel>paraLevel, this run will never be reordered
428 * and does not need to be taken into account. maxLevel==paraLevel is only reordered
429 * if minLevel==paraLevel is odd, which is done in the extra segment.
430 * This means that for the main reordering loop we don't need to consider
431 * this run and can --runCount. If it is later part of the all-runs
432 * reordering, then runCount is adjusted accordingly.
435 reorderLine(UBiDi
*pBiDi
, UBiDiLevel minLevel
, UBiDiLevel maxLevel
) {
438 int32_t firstRun
, endRun
, limitRun
, runCount
;
441 if(maxLevel
<=(minLevel
|1)) {
446 * Reorder only down to the lowest odd level
447 * and reorder at an odd minLevel in a separate, simpler loop.
448 * See comments above for why minLevel is always incremented.
453 levels
=pBiDi
->levels
;
454 runCount
=pBiDi
->runCount
;
456 /* do not include the WS run at paraLevel<=old minLevel except in the simple loop */
457 if(pBiDi
->trailingWSStart
<pBiDi
->length
) {
461 while(--maxLevel
>=minLevel
) {
464 /* loop for all sequences of runs */
466 /* look for a sequence of runs that are all at >=maxLevel */
467 /* look for the first run of such a sequence */
468 while(firstRun
<runCount
&& levels
[runs
[firstRun
].logicalStart
]<maxLevel
) {
471 if(firstRun
>=runCount
) {
472 break; /* no more such runs */
475 /* look for the limit run of such a sequence (the run behind it) */
476 for(limitRun
=firstRun
; ++limitRun
<runCount
&& levels
[runs
[limitRun
].logicalStart
]>=maxLevel
;) {}
478 /* Swap the entire sequence of runs from firstRun to limitRun-1. */
480 while(firstRun
<endRun
) {
481 tempRun
= runs
[firstRun
];
482 runs
[firstRun
]=runs
[endRun
];
483 runs
[endRun
]=tempRun
;
488 if(limitRun
==runCount
) {
489 break; /* no more such runs */
496 /* now do maxLevel==old minLevel (==odd!), see above */
500 /* include the trailing WS run in this complete reordering */
501 if(pBiDi
->trailingWSStart
==pBiDi
->length
) {
505 /* Swap the entire sequence of all runs. (endRun==runCount) */
506 while(firstRun
<runCount
) {
507 tempRun
=runs
[firstRun
];
508 runs
[firstRun
]=runs
[runCount
];
509 runs
[runCount
]=tempRun
;
516 /* compute the runs array --------------------------------------------------- */
518 static int32_t getRunFromLogicalIndex(UBiDi
*pBiDi
, int32_t logicalIndex
, UErrorCode
*pErrorCode
) {
519 Run
*runs
=pBiDi
->runs
;
520 int32_t runCount
=pBiDi
->runCount
, visualStart
=0, i
, length
, logicalStart
;
522 for(i
=0; i
<runCount
; i
++) {
523 length
=runs
[i
].visualLimit
-visualStart
;
524 logicalStart
=GET_INDEX(runs
[i
].logicalStart
);
525 if((logicalIndex
>=logicalStart
) && (logicalIndex
<(logicalStart
+length
))) {
530 /* we should never get here */
532 *pErrorCode
= U_INVALID_STATE_ERROR
;
537 * Compute the runs array from the levels array.
538 * After ubidi_getRuns() returns TRUE, runCount is guaranteed to be >0
539 * and the runs are reordered.
540 * Odd-level runs have visualStart on their visual right edge and
541 * they progress visually to the left.
542 * If option UBIDI_OPTION_INSERT_MARKS is set, insertRemove will contain the
543 * sum of appropriate LRM/RLM_BEFORE/AFTER flags.
544 * If option UBIDI_OPTION_REMOVE_CONTROLS is set, insertRemove will contain the
545 * negative number of BiDi control characters within this run.
548 ubidi_getRuns(UBiDi
*pBiDi
, UErrorCode
*pErrorCode
) {
550 * This method returns immediately if the runs are already set. This
551 * includes the case of length==0 (handled in setPara)..
553 if (pBiDi
->runCount
>=0) {
557 if(pBiDi
->direction
!=UBIDI_MIXED
) {
558 /* simple, single-run case - this covers length==0 */
559 /* pBiDi->paraLevel is ok even for contextual multiple paragraphs */
560 getSingleRun(pBiDi
, pBiDi
->paraLevel
);
561 } else /* UBIDI_MIXED, length>0 */ {
562 /* mixed directionality */
563 int32_t length
=pBiDi
->length
, limit
;
564 UBiDiLevel
*levels
=pBiDi
->levels
;
566 UBiDiLevel level
=UBIDI_DEFAULT_LTR
; /* initialize with no valid level */
568 * If there are WS characters at the end of the line
569 * and the run preceding them has a level different from
570 * paraLevel, then they will form their own run at paraLevel (L1).
571 * Count them separately.
572 * We need some special treatment for this in order to not
573 * modify the levels array which a line UBiDi object shares
574 * with its paragraph parent and its other line siblings.
575 * In other words, for the trailing WS, it may be
576 * levels[]!=paraLevel but we have to treat it like it were so.
578 limit
=pBiDi
->trailingWSStart
;
579 /* count the runs, there is at least one non-WS run, and limit>0 */
581 for(i
=0; i
<limit
; ++i
) {
582 /* increment runCount at the start of each run */
583 if(levels
[i
]!=level
) {
590 * We don't need to see if the last run can be merged with a trailing
591 * WS run because setTrailingWSStart() would have done that.
593 if(runCount
==1 && limit
==length
) {
594 /* There is only one non-WS run and no trailing WS-run. */
595 getSingleRun(pBiDi
, levels
[0]);
596 } else /* runCount>1 || limit<length */ {
597 /* allocate and set the runs */
599 int32_t runIndex
, start
;
600 UBiDiLevel minLevel
=UBIDI_MAX_EXPLICIT_LEVEL
+1, maxLevel
=0;
602 /* now, count a (non-mergeable) WS run */
608 if(getRunsMemory(pBiDi
, runCount
)) {
609 runs
=pBiDi
->runsMemory
;
615 /* FOOD FOR THOUGHT: this could be optimized, e.g.:
616 * 464->444, 484->444, 575->555, 595->555
617 * However, that would take longer. Check also how it would
618 * interact with BiDi control removal and inserting Marks.
622 /* search for the run limits and initialize visualLimit values with the run lengths */
625 /* prepare this run */
635 /* look for the run limit */
636 while(++i
<limit
&& levels
[i
]==level
) {}
638 /* i is another run limit */
639 runs
[runIndex
].logicalStart
=start
;
640 runs
[runIndex
].visualLimit
=i
-start
;
641 runs
[runIndex
].insertRemove
=0;
646 /* there is a separate WS run */
647 runs
[runIndex
].logicalStart
=limit
;
648 runs
[runIndex
].visualLimit
=length
-limit
;
649 /* For the trailing WS run, pBiDi->paraLevel is ok even
650 if contextual multiple paragraphs. */
651 if(pBiDi
->paraLevel
<minLevel
) {
652 minLevel
=pBiDi
->paraLevel
;
656 /* set the object fields */
658 pBiDi
->runCount
=runCount
;
660 reorderLine(pBiDi
, minLevel
, maxLevel
);
662 /* now add the direction flags and adjust the visualLimit's to be just that */
663 /* this loop will also handle the trailing WS run */
665 for(i
=0; i
<runCount
; ++i
) {
666 ADD_ODD_BIT_FROM_LEVEL(runs
[i
].logicalStart
, levels
[runs
[i
].logicalStart
]);
667 limit
=runs
[i
].visualLimit
+=limit
;
670 /* Set the "odd" bit for the trailing WS run. */
671 /* For a RTL paragraph, it will be the *first* run in visual order. */
672 /* For the trailing WS run, pBiDi->paraLevel is ok even if
673 contextual multiple paragraphs. */
674 if(runIndex
<runCount
) {
675 int32_t trailingRun
= ((pBiDi
->paraLevel
& 1) != 0)? 0 : runIndex
;
677 ADD_ODD_BIT_FROM_LEVEL(runs
[trailingRun
].logicalStart
, pBiDi
->paraLevel
);
682 /* handle insert LRM/RLM BEFORE/AFTER run */
683 if(pBiDi
->insertPoints
.size
>0) {
684 Point
*point
, *start
=pBiDi
->insertPoints
.points
,
685 *limit
=start
+pBiDi
->insertPoints
.size
;
687 for(point
=start
; point
<limit
; point
++) {
688 runIndex
=getRunFromLogicalIndex(pBiDi
, point
->pos
, pErrorCode
);
689 pBiDi
->runs
[runIndex
].insertRemove
|=point
->flag
;
693 /* handle remove BiDi control characters */
694 if(pBiDi
->controlCount
>0) {
696 const UChar
*start
=pBiDi
->text
, *limit
=start
+pBiDi
->length
, *pu
;
697 for(pu
=start
; pu
<limit
; pu
++) {
698 if(IS_BIDI_CONTROL_CHAR(*pu
)) {
699 runIndex
=getRunFromLogicalIndex(pBiDi
, pu
-start
, pErrorCode
);
700 pBiDi
->runs
[runIndex
].insertRemove
--;
709 prepareReorder(const UBiDiLevel
*levels
, int32_t length
,
711 UBiDiLevel
*pMinLevel
, UBiDiLevel
*pMaxLevel
) {
713 UBiDiLevel level
, minLevel
, maxLevel
;
715 if(levels
==NULL
|| length
<=0) {
719 /* determine minLevel and maxLevel */
720 minLevel
=UBIDI_MAX_EXPLICIT_LEVEL
+1;
722 for(start
=length
; start
>0;) {
723 level
=levels
[--start
];
724 if(level
>UBIDI_MAX_EXPLICIT_LEVEL
+1) {
737 /* initialize the index map */
738 for(start
=length
; start
>0;) {
740 indexMap
[start
]=start
;
746 /* reorder a line based on a levels array (L2) ------------------------------ */
748 U_CAPI
void U_EXPORT2
749 ubidi_reorderLogical(const UBiDiLevel
*levels
, int32_t length
, int32_t *indexMap
) {
750 int32_t start
, limit
, sumOfSosEos
;
751 UBiDiLevel minLevel
, maxLevel
;
753 if(indexMap
==NULL
|| !prepareReorder(levels
, length
, indexMap
, &minLevel
, &maxLevel
)) {
758 if(minLevel
==maxLevel
&& (minLevel
&1)==0) {
762 /* reorder only down to the lowest odd level */
765 /* loop maxLevel..minLevel */
769 /* loop for all sequences of levels to reorder at the current maxLevel */
771 /* look for a sequence of levels that are all at >=maxLevel */
772 /* look for the first index of such a sequence */
773 while(start
<length
&& levels
[start
]<maxLevel
) {
777 break; /* no more such sequences */
780 /* look for the limit of such a sequence (the index behind it) */
781 for(limit
=start
; ++limit
<length
&& levels
[limit
]>=maxLevel
;) {}
784 * sos=start of sequence, eos=end of sequence
786 * The closed (inclusive) interval from sos to eos includes all the logical
787 * and visual indexes within this sequence. They are logically and
788 * visually contiguous and in the same range.
790 * For each run, the new visual index=sos+eos-old visual index;
791 * we pre-add sos+eos into sumOfSosEos ->
792 * new visual index=sumOfSosEos-old visual index;
794 sumOfSosEos
=start
+limit
-1;
796 /* reorder each index in the sequence */
798 indexMap
[start
]=sumOfSosEos
-indexMap
[start
];
799 } while(++start
<limit
);
803 break; /* no more such sequences */
808 } while(--maxLevel
>=minLevel
);
811 U_CAPI
void U_EXPORT2
812 ubidi_reorderVisual(const UBiDiLevel
*levels
, int32_t length
, int32_t *indexMap
) {
813 int32_t start
, end
, limit
, temp
;
814 UBiDiLevel minLevel
, maxLevel
;
816 if(indexMap
==NULL
|| !prepareReorder(levels
, length
, indexMap
, &minLevel
, &maxLevel
)) {
821 if(minLevel
==maxLevel
&& (minLevel
&1)==0) {
825 /* reorder only down to the lowest odd level */
828 /* loop maxLevel..minLevel */
832 /* loop for all sequences of levels to reorder at the current maxLevel */
834 /* look for a sequence of levels that are all at >=maxLevel */
835 /* look for the first index of such a sequence */
836 while(start
<length
&& levels
[start
]<maxLevel
) {
840 break; /* no more such runs */
843 /* look for the limit of such a sequence (the index behind it) */
844 for(limit
=start
; ++limit
<length
&& levels
[limit
]>=maxLevel
;) {}
847 * Swap the entire interval of indexes from start to limit-1.
848 * We don't need to swap the levels for the purpose of this
849 * algorithm: the sequence of levels that we look at does not
854 temp
=indexMap
[start
];
855 indexMap
[start
]=indexMap
[end
];
863 break; /* no more such sequences */
868 } while(--maxLevel
>=minLevel
);
871 /* API functions for logical<->visual mapping ------------------------------- */
873 U_CAPI
int32_t U_EXPORT2
874 ubidi_getVisualIndex(UBiDi
*pBiDi
, int32_t logicalIndex
, UErrorCode
*pErrorCode
) {
875 int32_t visualIndex
=UBIDI_MAP_NOWHERE
;
876 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
, -1);
877 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi
, *pErrorCode
, -1);
878 RETURN_IF_BAD_RANGE(logicalIndex
, 0, pBiDi
->length
, *pErrorCode
, -1);
880 /* we can do the trivial cases without the runs array */
881 switch(pBiDi
->direction
) {
883 visualIndex
=logicalIndex
;
886 visualIndex
=pBiDi
->length
-logicalIndex
-1;
889 if(!ubidi_getRuns(pBiDi
, pErrorCode
)) {
890 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
893 Run
*runs
=pBiDi
->runs
;
894 int32_t i
, visualStart
=0, offset
, length
;
896 /* linear search for the run, search on the visual runs */
897 for(i
=0; i
<pBiDi
->runCount
; ++i
) {
898 length
=runs
[i
].visualLimit
-visualStart
;
899 offset
=logicalIndex
-GET_INDEX(runs
[i
].logicalStart
);
900 if(offset
>=0 && offset
<length
) {
901 if(IS_EVEN_RUN(runs
[i
].logicalStart
)) {
903 visualIndex
=visualStart
+offset
;
906 visualIndex
=visualStart
+length
-offset
-1;
908 break; /* exit for loop */
912 if(i
>=pBiDi
->runCount
) {
913 return UBIDI_MAP_NOWHERE
;
918 if(pBiDi
->insertPoints
.size
>0) {
919 /* add the number of added marks until the calculated visual index */
920 Run
*runs
=pBiDi
->runs
;
921 int32_t i
, length
, insertRemove
;
922 int32_t visualStart
=0, markFound
=0;
923 for(i
=0; ; i
++, visualStart
+=length
) {
924 length
=runs
[i
].visualLimit
-visualStart
;
925 insertRemove
=runs
[i
].insertRemove
;
926 if(insertRemove
& (LRM_BEFORE
|RLM_BEFORE
)) {
929 /* is it the run containing the visual index? */
930 if(visualIndex
<runs
[i
].visualLimit
) {
931 return visualIndex
+markFound
;
933 if(insertRemove
& (LRM_AFTER
|RLM_AFTER
)) {
938 else if(pBiDi
->controlCount
>0) {
939 /* subtract the number of controls until the calculated visual index */
940 Run
*runs
=pBiDi
->runs
;
941 int32_t i
, j
, start
, limit
, length
, insertRemove
;
942 int32_t visualStart
=0, controlFound
=0;
943 UChar uchar
=pBiDi
->text
[logicalIndex
];
944 /* is the logical index pointing to a control ? */
945 if(IS_BIDI_CONTROL_CHAR(uchar
)) {
946 return UBIDI_MAP_NOWHERE
;
949 for(i
=0; ; i
++, visualStart
+=length
) {
950 length
=runs
[i
].visualLimit
-visualStart
;
951 insertRemove
=runs
[i
].insertRemove
;
952 /* calculated visual index is beyond this run? */
953 if(visualIndex
>=runs
[i
].visualLimit
) {
954 controlFound
-=insertRemove
;
957 /* calculated visual index must be within current run */
958 if(insertRemove
==0) {
959 return visualIndex
-controlFound
;
961 if(IS_EVEN_RUN(runs
[i
].logicalStart
)) {
962 /* LTR: check from run start to logical index */
963 start
=runs
[i
].logicalStart
;
966 /* RTL: check from logical index to run end */
967 start
=logicalIndex
+1;
968 limit
=GET_INDEX(runs
[i
].logicalStart
)+length
;
970 for(j
=start
; j
<limit
; j
++) {
971 uchar
=pBiDi
->text
[j
];
972 if(IS_BIDI_CONTROL_CHAR(uchar
)) {
976 return visualIndex
-controlFound
;
983 U_CAPI
int32_t U_EXPORT2
984 ubidi_getLogicalIndex(UBiDi
*pBiDi
, int32_t visualIndex
, UErrorCode
*pErrorCode
) {
986 int32_t i
, runCount
, start
;
987 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
, -1);
988 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi
, *pErrorCode
, -1);
989 RETURN_IF_BAD_RANGE(visualIndex
, 0, pBiDi
->resultLength
, *pErrorCode
, -1);
990 /* we can do the trivial cases without the runs array */
991 if(pBiDi
->insertPoints
.size
==0 && pBiDi
->controlCount
==0) {
992 if(pBiDi
->direction
==UBIDI_LTR
) {
995 else if(pBiDi
->direction
==UBIDI_RTL
) {
996 return pBiDi
->length
-visualIndex
-1;
999 if(!ubidi_getRuns(pBiDi
, pErrorCode
)) {
1000 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
1005 runCount
=pBiDi
->runCount
;
1006 if(pBiDi
->insertPoints
.size
>0) {
1007 /* handle inserted LRM/RLM */
1008 int32_t markFound
=0, insertRemove
;
1009 int32_t visualStart
=0, length
;
1011 /* subtract number of marks until visual index */
1012 for(i
=0; ; i
++, visualStart
+=length
) {
1013 length
=runs
[i
].visualLimit
-visualStart
;
1014 insertRemove
=runs
[i
].insertRemove
;
1015 if(insertRemove
&(LRM_BEFORE
|RLM_BEFORE
)) {
1016 if(visualIndex
<=(visualStart
+markFound
)) {
1017 return UBIDI_MAP_NOWHERE
;
1021 /* is adjusted visual index within this run? */
1022 if(visualIndex
<(runs
[i
].visualLimit
+markFound
)) {
1023 visualIndex
-=markFound
;
1026 if(insertRemove
&(LRM_AFTER
|RLM_AFTER
)) {
1027 if(visualIndex
==(visualStart
+length
+markFound
)) {
1028 return UBIDI_MAP_NOWHERE
;
1034 else if(pBiDi
->controlCount
>0) {
1035 /* handle removed BiDi control characters */
1036 int32_t controlFound
=0, insertRemove
, length
;
1037 int32_t logicalStart
, logicalEnd
, visualStart
=0, j
, k
;
1040 /* add number of controls until visual index */
1041 for(i
=0; ; i
++, visualStart
+=length
) {
1042 length
=runs
[i
].visualLimit
-visualStart
;
1043 insertRemove
=runs
[i
].insertRemove
;
1044 /* is adjusted visual index beyond current run? */
1045 if(visualIndex
>=(runs
[i
].visualLimit
-controlFound
+insertRemove
)) {
1046 controlFound
-=insertRemove
;
1049 /* adjusted visual index is within current run */
1050 if(insertRemove
==0) {
1051 visualIndex
+=controlFound
;
1054 /* count non-control chars until visualIndex */
1055 logicalStart
=runs
[i
].logicalStart
;
1056 evenRun
=IS_EVEN_RUN(logicalStart
);
1057 REMOVE_ODD_BIT(logicalStart
);
1058 logicalEnd
=logicalStart
+length
-1;
1059 for(j
=0; j
<length
; j
++) {
1060 k
= evenRun
? logicalStart
+j
: logicalEnd
-j
;
1061 uchar
=pBiDi
->text
[k
];
1062 if(IS_BIDI_CONTROL_CHAR(uchar
)) {
1065 if((visualIndex
+controlFound
)==(visualStart
+j
)) {
1069 visualIndex
+=controlFound
;
1073 /* handle all cases */
1075 /* linear search for the run */
1076 for(i
=0; visualIndex
>=runs
[i
].visualLimit
; ++i
) {}
1078 /* binary search for the run */
1079 int32_t begin
=0, limit
=runCount
;
1081 /* the middle if() is guaranteed to find the run, we don't need a loop limit */
1084 if(visualIndex
>=runs
[i
].visualLimit
) {
1086 } else if(i
==0 || visualIndex
>=runs
[i
-1].visualLimit
) {
1094 start
=runs
[i
].logicalStart
;
1095 if(IS_EVEN_RUN(start
)) {
1097 /* the offset in runs[i] is visualIndex-runs[i-1].visualLimit */
1099 visualIndex
-=runs
[i
-1].visualLimit
;
1101 return start
+visualIndex
;
1104 return GET_INDEX(start
)+runs
[i
].visualLimit
-visualIndex
-1;
1108 U_CAPI
void U_EXPORT2
1109 ubidi_getLogicalMap(UBiDi
*pBiDi
, int32_t *indexMap
, UErrorCode
*pErrorCode
) {
1110 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
);
1111 /* ubidi_countRuns() checks for VALID_PARA_OR_LINE */
1112 ubidi_countRuns(pBiDi
, pErrorCode
);
1113 if(U_FAILURE(*pErrorCode
)) {
1115 } else if(indexMap
==NULL
) {
1116 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
1118 /* fill a logical-to-visual index map using the runs[] */
1119 int32_t visualStart
, visualLimit
, i
, j
, k
;
1120 int32_t logicalStart
, logicalLimit
;
1121 Run
*runs
=pBiDi
->runs
;
1122 if (pBiDi
->length
<=0) {
1125 if (pBiDi
->length
>pBiDi
->resultLength
) {
1126 uprv_memset(indexMap
, 0xFF, pBiDi
->length
*sizeof(int32_t));
1130 for(j
=0; j
<pBiDi
->runCount
; ++j
) {
1131 logicalStart
=GET_INDEX(runs
[j
].logicalStart
);
1132 visualLimit
=runs
[j
].visualLimit
;
1133 if(IS_EVEN_RUN(runs
[j
].logicalStart
)) {
1135 indexMap
[logicalStart
++]=visualStart
++;
1136 } while(visualStart
<visualLimit
);
1138 logicalStart
+=visualLimit
-visualStart
; /* logicalLimit */
1140 indexMap
[--logicalStart
]=visualStart
++;
1141 } while(visualStart
<visualLimit
);
1143 /* visualStart==visualLimit; */
1146 if(pBiDi
->insertPoints
.size
>0) {
1147 int32_t markFound
=0, runCount
=pBiDi
->runCount
;
1148 int32_t length
, insertRemove
;
1150 /* add number of marks found until each index */
1151 for(i
=0; i
<runCount
; i
++, visualStart
+=length
) {
1152 length
=runs
[i
].visualLimit
-visualStart
;
1153 insertRemove
=runs
[i
].insertRemove
;
1154 if(insertRemove
&(LRM_BEFORE
|RLM_BEFORE
)) {
1158 logicalStart
=GET_INDEX(runs
[i
].logicalStart
);
1159 logicalLimit
=logicalStart
+length
;
1160 for(j
=logicalStart
; j
<logicalLimit
; j
++) {
1161 indexMap
[j
]+=markFound
;
1164 if(insertRemove
&(LRM_AFTER
|RLM_AFTER
)) {
1169 else if(pBiDi
->controlCount
>0) {
1170 int32_t controlFound
=0, runCount
=pBiDi
->runCount
;
1171 int32_t length
, insertRemove
;
1175 /* subtract number of controls found until each index */
1176 for(i
=0; i
<runCount
; i
++, visualStart
+=length
) {
1177 length
=runs
[i
].visualLimit
-visualStart
;
1178 insertRemove
=runs
[i
].insertRemove
;
1179 /* no control found within previous runs nor within this run */
1180 if((controlFound
-insertRemove
)==0) {
1183 logicalStart
=runs
[i
].logicalStart
;
1184 evenRun
=IS_EVEN_RUN(logicalStart
);
1185 REMOVE_ODD_BIT(logicalStart
);
1186 logicalLimit
=logicalStart
+length
;
1187 /* if no control within this run */
1188 if(insertRemove
==0) {
1189 for(j
=logicalStart
; j
<logicalLimit
; j
++) {
1190 indexMap
[j
]-=controlFound
;
1194 for(j
=0; j
<length
; j
++) {
1195 k
= evenRun
? logicalStart
+j
: logicalLimit
-j
-1;
1196 uchar
=pBiDi
->text
[k
];
1197 if(IS_BIDI_CONTROL_CHAR(uchar
)) {
1199 indexMap
[k
]=UBIDI_MAP_NOWHERE
;
1202 indexMap
[k
]-=controlFound
;
1209 U_CAPI
void U_EXPORT2
1210 ubidi_getVisualMap(UBiDi
*pBiDi
, int32_t *indexMap
, UErrorCode
*pErrorCode
) {
1211 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
);
1212 if(indexMap
==NULL
) {
1213 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
1216 /* ubidi_countRuns() checks for VALID_PARA_OR_LINE */
1217 ubidi_countRuns(pBiDi
, pErrorCode
);
1218 if(U_SUCCESS(*pErrorCode
)) {
1219 /* fill a visual-to-logical index map using the runs[] */
1220 Run
*runs
=pBiDi
->runs
, *runsLimit
=runs
+pBiDi
->runCount
;
1221 int32_t logicalStart
, visualStart
, visualLimit
, *pi
=indexMap
;
1223 if (pBiDi
->resultLength
<=0) {
1227 for(; runs
<runsLimit
; ++runs
) {
1228 logicalStart
=runs
->logicalStart
;
1229 visualLimit
=runs
->visualLimit
;
1230 if(IS_EVEN_RUN(logicalStart
)) {
1232 *pi
++ = logicalStart
++;
1233 } while(++visualStart
<visualLimit
);
1235 REMOVE_ODD_BIT(logicalStart
);
1236 logicalStart
+=visualLimit
-visualStart
; /* logicalLimit */
1238 *pi
++ = --logicalStart
;
1239 } while(++visualStart
<visualLimit
);
1241 /* visualStart==visualLimit; */
1244 if(pBiDi
->insertPoints
.size
>0) {
1245 int32_t markFound
=0, runCount
=pBiDi
->runCount
;
1246 int32_t insertRemove
, i
, j
, k
;
1248 /* count all inserted marks */
1249 for(i
=0; i
<runCount
; i
++) {
1250 insertRemove
=runs
[i
].insertRemove
;
1251 if(insertRemove
&(LRM_BEFORE
|RLM_BEFORE
)) {
1254 if(insertRemove
&(LRM_AFTER
|RLM_AFTER
)) {
1258 /* move back indexes by number of preceding marks */
1259 k
=pBiDi
->resultLength
;
1260 for(i
=runCount
-1; i
>=0 && markFound
>0; i
--) {
1261 insertRemove
=runs
[i
].insertRemove
;
1262 if(insertRemove
&(LRM_AFTER
|RLM_AFTER
)) {
1263 indexMap
[--k
]= UBIDI_MAP_NOWHERE
;
1266 visualStart
= i
>0 ? runs
[i
-1].visualLimit
: 0;
1267 for(j
=runs
[i
].visualLimit
-1; j
>=visualStart
&& markFound
>0; j
--) {
1268 indexMap
[--k
]=indexMap
[j
];
1270 if(insertRemove
&(LRM_BEFORE
|RLM_BEFORE
)) {
1271 indexMap
[--k
]= UBIDI_MAP_NOWHERE
;
1276 else if(pBiDi
->controlCount
>0) {
1277 int32_t runCount
=pBiDi
->runCount
, logicalEnd
;
1278 int32_t insertRemove
, length
, i
, j
, k
, m
;
1283 /* move forward indexes by number of preceding controls */
1285 for(i
=0; i
<runCount
; i
++, visualStart
+=length
) {
1286 length
=runs
[i
].visualLimit
-visualStart
;
1287 insertRemove
=runs
[i
].insertRemove
;
1288 /* if no control found yet, nothing to do in this run */
1289 if((insertRemove
==0)&&(k
==visualStart
)) {
1293 /* if no control in this run */
1294 if(insertRemove
==0) {
1295 visualLimit
=runs
[i
].visualLimit
;
1296 for(j
=visualStart
; j
<visualLimit
; j
++) {
1297 indexMap
[k
++]=indexMap
[j
];
1301 logicalStart
=runs
[i
].logicalStart
;
1302 evenRun
=IS_EVEN_RUN(logicalStart
);
1303 REMOVE_ODD_BIT(logicalStart
);
1304 logicalEnd
=logicalStart
+length
-1;
1305 for(j
=0; j
<length
; j
++) {
1306 m
= evenRun
? logicalStart
+j
: logicalEnd
-j
;
1307 uchar
=pBiDi
->text
[m
];
1308 if(!IS_BIDI_CONTROL_CHAR(uchar
)) {
1317 U_CAPI
void U_EXPORT2
1318 ubidi_invertMap(const int32_t *srcMap
, int32_t *destMap
, int32_t length
) {
1319 if(srcMap
!=NULL
&& destMap
!=NULL
&& length
>0) {
1321 int32_t destLength
=-1, count
=0;
1322 /* find highest value and count positive indexes in srcMap */
1325 if(*--pi
>destLength
) {
1332 destLength
++; /* add 1 for origin 0 */
1333 if(count
<destLength
) {
1334 /* we must fill unmatched destMap entries with -1 */
1335 uprv_memset(destMap
, 0xFF, destLength
*sizeof(int32_t));
1340 destMap
[*pi
]=--length
;