2 ******************************************************************************
4 * Copyright (C) 1999-2010, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 ******************************************************************************
10 * tab size: 8 (not used)
13 * created on: 1999aug06
14 * created by: Markus W. Scherer, updated by Matitiahu Allouche
18 #include "unicode/utypes.h"
19 #include "unicode/ustring.h"
20 #include "unicode/uchar.h"
21 #include "unicode/ubidi.h"
26 * General remarks about the functions in this file:
28 * These functions deal with the aspects of potentially mixed-directional
29 * text in a single paragraph or in a line of a single paragraph
30 * which has already been processed according to
31 * the Unicode 3.0 BiDi algorithm as defined in
32 * http://www.unicode.org/unicode/reports/tr9/ , version 13,
33 * also described in The Unicode Standard, Version 4.0.1 .
35 * This means that there is a UBiDi object with a levels
36 * and a dirProps array.
37 * paraLevel and direction are also set.
38 * Only if the length of the text is zero, then levels==dirProps==NULL.
40 * The overall directionality of the paragraph
41 * or line is used to bypass the reordering steps if possible.
42 * Even purely RTL text does not need reordering there because
43 * the ubidi_getLogical/VisualIndex() functions can compute the
44 * index on the fly in such a case.
46 * The implementation of the access to same-level-runs and of the reordering
47 * do attempt to provide better performance and less memory usage compared to
48 * a direct implementation of especially rule (L2) with an array of
49 * one (32-bit) integer per text character.
51 * Here, the levels array is scanned as soon as necessary, and a vector of
52 * same-level-runs is created. Reordering then is done on this vector.
53 * For each run of text positions that were resolved to the same level,
54 * only 8 bytes are stored: the first text position of the run and the visual
55 * position behind the run after reordering.
56 * One sign bit is used to hold the directionality of the run.
57 * This is inefficient if there are many very short runs. If the average run
58 * length is <2, then this uses more memory.
60 * In a further attempt to save memory, the levels array is never changed
61 * after all the resolution rules (Xn, Wn, Nn, In).
62 * Many functions have to consider the field trailingWSStart:
63 * if it is less than length, then there is an implicit trailing run
65 * which is not reflected in the levels array.
66 * This allows a line UBiDi object to use the same levels array as
67 * its paragraph parent object.
69 * When a UBiDi object is created for a line of a paragraph, then the
70 * paragraph's levels and dirProps arrays are reused by way of setting
71 * a pointer into them, not by copying. This again saves memory and forbids to
72 * change the now shared levels for (L1).
75 /* handle trailing WS (L1) -------------------------------------------------- */
78 * setTrailingWSStart() sets the start index for a trailing
79 * run of WS in the line. This is necessary because we do not modify
80 * the paragraph's levels array that we just point into.
81 * Using trailingWSStart is another form of performing (L1).
83 * To make subsequent operations easier, we also include the run
84 * before the WS if it is at the paraLevel - we merge the two here.
86 * This function is called only from ubidi_setLine(), so pBiDi->paraLevel is
87 * set correctly for the line even when contextual multiple paragraphs.
90 setTrailingWSStart(UBiDi
*pBiDi
) {
91 /* pBiDi->direction!=UBIDI_MIXED */
93 const DirProp
*dirProps
=pBiDi
->dirProps
;
94 UBiDiLevel
*levels
=pBiDi
->levels
;
95 int32_t start
=pBiDi
->length
;
96 UBiDiLevel paraLevel
=pBiDi
->paraLevel
;
98 /* If the line is terminated by a block separator, all preceding WS etc...
99 are already set to paragraph level.
100 Setting trailingWSStart to pBidi->length will avoid changing the
101 level of B chars from 0 to paraLevel in ubidi_getLevels when
102 orderParagraphsLTR==TRUE.
104 if(NO_CONTEXT_RTL(dirProps
[start
-1])==B
) {
105 pBiDi
->trailingWSStart
=start
; /* currently == pBiDi->length */
108 /* go backwards across all WS, BN, explicit codes */
109 while(start
>0 && DIRPROP_FLAG_NC(dirProps
[start
-1])&MASK_WS
) {
113 /* if the WS run can be merged with the previous run then do so here */
114 while(start
>0 && levels
[start
-1]==paraLevel
) {
118 pBiDi
->trailingWSStart
=start
;
121 /* ubidi_setLine ------------------------------------------------------------ */
123 U_CAPI
void U_EXPORT2
124 ubidi_setLine(const UBiDi
*pParaBiDi
,
125 int32_t start
, int32_t limit
,
127 UErrorCode
*pErrorCode
) {
130 /* check the argument values */
131 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
);
132 RETURN_VOID_IF_NOT_VALID_PARA(pParaBiDi
, *pErrorCode
);
133 RETURN_VOID_IF_BAD_RANGE(start
, 0, limit
, *pErrorCode
);
134 RETURN_VOID_IF_BAD_RANGE(limit
, 0, pParaBiDi
->length
+1, *pErrorCode
);
135 if(pLineBiDi
==NULL
) {
136 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
139 if(ubidi_getParagraph(pParaBiDi
, start
, NULL
, NULL
, NULL
, pErrorCode
) !=
140 ubidi_getParagraph(pParaBiDi
, limit
-1, NULL
, NULL
, NULL
, pErrorCode
)) {
141 /* the line crosses a paragraph boundary */
142 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
146 /* set the values in pLineBiDi from its pParaBiDi parent */
147 pLineBiDi
->pParaBiDi
=NULL
; /* mark unfinished setLine */
148 pLineBiDi
->text
=pParaBiDi
->text
+start
;
149 length
=pLineBiDi
->length
=limit
-start
;
150 pLineBiDi
->resultLength
=pLineBiDi
->originalLength
=length
;
151 pLineBiDi
->paraLevel
=GET_PARALEVEL(pParaBiDi
, start
);
152 pLineBiDi
->paraCount
=pParaBiDi
->paraCount
;
153 pLineBiDi
->runs
=NULL
;
155 pLineBiDi
->reorderingMode
=pParaBiDi
->reorderingMode
;
156 pLineBiDi
->reorderingOptions
=pParaBiDi
->reorderingOptions
;
157 pLineBiDi
->controlCount
=0;
158 if(pParaBiDi
->controlCount
>0) {
160 for(j
=start
; j
<limit
; j
++) {
161 if(IS_BIDI_CONTROL_CHAR(pParaBiDi
->text
[j
])) {
162 pLineBiDi
->controlCount
++;
165 pLineBiDi
->resultLength
-=pLineBiDi
->controlCount
;
168 pLineBiDi
->dirProps
=pParaBiDi
->dirProps
+start
;
169 pLineBiDi
->levels
=pParaBiDi
->levels
+start
;
170 pLineBiDi
->runCount
=-1;
172 if(pParaBiDi
->direction
!=UBIDI_MIXED
) {
173 /* the parent is already trivial */
174 pLineBiDi
->direction
=pParaBiDi
->direction
;
177 * The parent's levels are all either
178 * implicitly or explicitly ==paraLevel;
181 if(pParaBiDi
->trailingWSStart
<=start
) {
182 pLineBiDi
->trailingWSStart
=0;
183 } else if(pParaBiDi
->trailingWSStart
<limit
) {
184 pLineBiDi
->trailingWSStart
=pParaBiDi
->trailingWSStart
-start
;
186 pLineBiDi
->trailingWSStart
=length
;
189 const UBiDiLevel
*levels
=pLineBiDi
->levels
;
190 int32_t i
, trailingWSStart
;
193 setTrailingWSStart(pLineBiDi
);
194 trailingWSStart
=pLineBiDi
->trailingWSStart
;
196 /* recalculate pLineBiDi->direction */
197 if(trailingWSStart
==0) {
198 /* all levels are at paraLevel */
199 pLineBiDi
->direction
=(UBiDiDirection
)(pLineBiDi
->paraLevel
&1);
201 /* get the level of the first character */
202 level
=(UBiDiLevel
)(levels
[0]&1);
204 /* if there is anything of a different level, then the line is mixed */
205 if(trailingWSStart
<length
&& (pLineBiDi
->paraLevel
&1)!=level
) {
206 /* the trailing WS is at paraLevel, which differs from levels[0] */
207 pLineBiDi
->direction
=UBIDI_MIXED
;
209 /* see if levels[1..trailingWSStart-1] have the same direction as levels[0] and paraLevel */
212 if(i
==trailingWSStart
) {
213 /* the direction values match those in level */
214 pLineBiDi
->direction
=(UBiDiDirection
)level
;
216 } else if((levels
[i
]&1)!=level
) {
217 pLineBiDi
->direction
=UBIDI_MIXED
;
225 switch(pLineBiDi
->direction
) {
227 /* make sure paraLevel is even */
228 pLineBiDi
->paraLevel
=(UBiDiLevel
)((pLineBiDi
->paraLevel
+1)&~1);
230 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
231 pLineBiDi
->trailingWSStart
=0;
234 /* make sure paraLevel is odd */
235 pLineBiDi
->paraLevel
|=1;
237 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
238 pLineBiDi
->trailingWSStart
=0;
244 pLineBiDi
->pParaBiDi
=pParaBiDi
; /* mark successful setLine */
248 U_CAPI UBiDiLevel U_EXPORT2
249 ubidi_getLevelAt(const UBiDi
*pBiDi
, int32_t charIndex
) {
250 /* return paraLevel if in the trailing WS run, otherwise the real level */
251 if(!IS_VALID_PARA_OR_LINE(pBiDi
) || charIndex
<0 || pBiDi
->length
<=charIndex
) {
253 } else if(pBiDi
->direction
!=UBIDI_MIXED
|| charIndex
>=pBiDi
->trailingWSStart
) {
254 return GET_PARALEVEL(pBiDi
, charIndex
);
256 return pBiDi
->levels
[charIndex
];
260 U_CAPI
const UBiDiLevel
* U_EXPORT2
261 ubidi_getLevels(UBiDi
*pBiDi
, UErrorCode
*pErrorCode
) {
262 int32_t start
, length
;
264 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
, NULL
);
265 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi
, *pErrorCode
, NULL
);
266 if((length
=pBiDi
->length
)<=0) {
267 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
270 if((start
=pBiDi
->trailingWSStart
)==length
) {
271 /* the current levels array reflects the WS run */
272 return pBiDi
->levels
;
276 * After the previous if(), we know that the levels array
277 * has an implicit trailing WS run and therefore does not fully
278 * reflect itself all the levels.
279 * This must be a UBiDi object for a line, and
280 * we need to create a new levels array.
282 if(getLevelsMemory(pBiDi
, length
)) {
283 UBiDiLevel
*levels
=pBiDi
->levelsMemory
;
285 if(start
>0 && levels
!=pBiDi
->levels
) {
286 uprv_memcpy(levels
, pBiDi
->levels
, start
);
288 /* pBiDi->paraLevel is ok even if contextual multiple paragraphs,
289 since pBidi is a line object */
290 uprv_memset(levels
+start
, pBiDi
->paraLevel
, length
-start
);
292 /* this new levels array is set for the line and reflects the WS run */
293 pBiDi
->trailingWSStart
=length
;
294 return pBiDi
->levels
=levels
;
297 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
302 U_CAPI
void U_EXPORT2
303 ubidi_getLogicalRun(const UBiDi
*pBiDi
, int32_t logicalPosition
,
304 int32_t *pLogicalLimit
, UBiDiLevel
*pLevel
) {
305 UErrorCode errorCode
;
306 int32_t runCount
, visualStart
, logicalLimit
, logicalFirst
, i
;
309 errorCode
=U_ZERO_ERROR
;
310 RETURN_VOID_IF_BAD_RANGE(logicalPosition
, 0, pBiDi
->length
, errorCode
);
311 /* ubidi_countRuns will check VALID_PARA_OR_LINE */
312 runCount
=ubidi_countRuns((UBiDi
*)pBiDi
, &errorCode
);
313 if(U_FAILURE(errorCode
)) {
316 /* this is done based on runs rather than on levels since levels have
317 a special interpretation when UBIDI_REORDER_RUNS_ONLY
319 visualStart
=logicalLimit
=0;
322 for(i
=0; i
<runCount
; i
++) {
323 iRun
= pBiDi
->runs
[i
];
324 logicalFirst
=GET_INDEX(iRun
.logicalStart
);
325 logicalLimit
=logicalFirst
+iRun
.visualLimit
-visualStart
;
326 if((logicalPosition
>=logicalFirst
) &&
327 (logicalPosition
<logicalLimit
)) {
330 visualStart
= iRun
.visualLimit
;
333 *pLogicalLimit
=logicalLimit
;
336 if(pBiDi
->reorderingMode
==UBIDI_REORDER_RUNS_ONLY
) {
337 *pLevel
=(UBiDiLevel
)GET_ODD_BIT(iRun
.logicalStart
);
339 else if(pBiDi
->direction
!=UBIDI_MIXED
|| logicalPosition
>=pBiDi
->trailingWSStart
) {
340 *pLevel
=GET_PARALEVEL(pBiDi
, logicalPosition
);
342 *pLevel
=pBiDi
->levels
[logicalPosition
];
347 /* runs API functions ------------------------------------------------------- */
349 U_CAPI
int32_t U_EXPORT2
350 ubidi_countRuns(UBiDi
*pBiDi
, UErrorCode
*pErrorCode
) {
351 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
, -1);
352 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi
, *pErrorCode
, -1);
353 ubidi_getRuns(pBiDi
, pErrorCode
);
354 if(U_FAILURE(*pErrorCode
)) {
357 return pBiDi
->runCount
;
360 U_CAPI UBiDiDirection U_EXPORT2
361 ubidi_getVisualRun(UBiDi
*pBiDi
, int32_t runIndex
,
362 int32_t *pLogicalStart
, int32_t *pLength
)
365 UErrorCode errorCode
= U_ZERO_ERROR
;
366 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi
, errorCode
, UBIDI_LTR
);
367 ubidi_getRuns(pBiDi
, &errorCode
);
368 if(U_FAILURE(errorCode
)) {
371 RETURN_IF_BAD_RANGE(runIndex
, 0, pBiDi
->runCount
, errorCode
, UBIDI_LTR
);
373 start
=pBiDi
->runs
[runIndex
].logicalStart
;
374 if(pLogicalStart
!=NULL
) {
375 *pLogicalStart
=GET_INDEX(start
);
379 *pLength
=pBiDi
->runs
[runIndex
].visualLimit
-
380 pBiDi
->runs
[runIndex
-1].visualLimit
;
382 *pLength
=pBiDi
->runs
[0].visualLimit
;
385 return (UBiDiDirection
)GET_ODD_BIT(start
);
388 /* in trivial cases there is only one trivial run; called by ubidi_getRuns() */
390 getSingleRun(UBiDi
*pBiDi
, UBiDiLevel level
) {
391 /* simple, single-run case */
392 pBiDi
->runs
=pBiDi
->simpleRuns
;
395 /* fill and reorder the single run */
396 pBiDi
->runs
[0].logicalStart
=MAKE_INDEX_ODD_PAIR(0, level
);
397 pBiDi
->runs
[0].visualLimit
=pBiDi
->length
;
398 pBiDi
->runs
[0].insertRemove
=0;
401 /* reorder the runs array (L2) ---------------------------------------------- */
404 * Reorder the same-level runs in the runs array.
405 * Here, runCount>1 and maxLevel>=minLevel>=paraLevel.
406 * All the visualStart fields=logical start before reordering.
407 * The "odd" bits are not set yet.
409 * Reordering with this data structure lends itself to some handy shortcuts:
411 * Since each run is moved but not modified, and since at the initial maxLevel
412 * each sequence of same-level runs consists of only one run each, we
413 * don't need to do anything there and can predecrement maxLevel.
414 * In many simple cases, the reordering is thus done entirely in the
416 * Also, reordering occurs only down to the lowest odd level that occurs,
417 * which is minLevel|1. However, if the lowest level itself is odd, then
418 * in the last reordering the sequence of the runs at this level or higher
419 * will be all runs, and we don't need the elaborate loop to search for them.
420 * This is covered by ++minLevel instead of minLevel|=1 followed
421 * by an extra reorder-all after the reorder-some loop.
422 * About a trailing WS run:
423 * Such a run would need special treatment because its level is not
424 * reflected in levels[] if this is not a paragraph object.
425 * Instead, all characters from trailingWSStart on are implicitly at
427 * However, for all maxLevel>paraLevel, this run will never be reordered
428 * and does not need to be taken into account. maxLevel==paraLevel is only reordered
429 * if minLevel==paraLevel is odd, which is done in the extra segment.
430 * This means that for the main reordering loop we don't need to consider
431 * this run and can --runCount. If it is later part of the all-runs
432 * reordering, then runCount is adjusted accordingly.
435 reorderLine(UBiDi
*pBiDi
, UBiDiLevel minLevel
, UBiDiLevel maxLevel
) {
438 int32_t firstRun
, endRun
, limitRun
, runCount
;
441 if(maxLevel
<=(minLevel
|1)) {
446 * Reorder only down to the lowest odd level
447 * and reorder at an odd minLevel in a separate, simpler loop.
448 * See comments above for why minLevel is always incremented.
453 levels
=pBiDi
->levels
;
454 runCount
=pBiDi
->runCount
;
456 /* do not include the WS run at paraLevel<=old minLevel except in the simple loop */
457 if(pBiDi
->trailingWSStart
<pBiDi
->length
) {
461 while(--maxLevel
>=minLevel
) {
464 /* loop for all sequences of runs */
466 /* look for a sequence of runs that are all at >=maxLevel */
467 /* look for the first run of such a sequence */
468 while(firstRun
<runCount
&& levels
[runs
[firstRun
].logicalStart
]<maxLevel
) {
471 if(firstRun
>=runCount
) {
472 break; /* no more such runs */
475 /* look for the limit run of such a sequence (the run behind it) */
476 for(limitRun
=firstRun
; ++limitRun
<runCount
&& levels
[runs
[limitRun
].logicalStart
]>=maxLevel
;) {}
478 /* Swap the entire sequence of runs from firstRun to limitRun-1. */
480 while(firstRun
<endRun
) {
481 tempRun
= runs
[firstRun
];
482 runs
[firstRun
]=runs
[endRun
];
483 runs
[endRun
]=tempRun
;
488 if(limitRun
==runCount
) {
489 break; /* no more such runs */
496 /* now do maxLevel==old minLevel (==odd!), see above */
500 /* include the trailing WS run in this complete reordering */
501 if(pBiDi
->trailingWSStart
==pBiDi
->length
) {
505 /* Swap the entire sequence of all runs. (endRun==runCount) */
506 while(firstRun
<runCount
) {
507 tempRun
=runs
[firstRun
];
508 runs
[firstRun
]=runs
[runCount
];
509 runs
[runCount
]=tempRun
;
516 /* compute the runs array --------------------------------------------------- */
518 static int32_t getRunFromLogicalIndex(UBiDi
*pBiDi
, int32_t logicalIndex
, UErrorCode
*pErrorCode
) {
519 Run
*runs
=pBiDi
->runs
;
520 int32_t runCount
=pBiDi
->runCount
, visualStart
=0, i
, length
, logicalStart
;
522 for(i
=0; i
<runCount
; i
++) {
523 length
=runs
[i
].visualLimit
-visualStart
;
524 logicalStart
=GET_INDEX(runs
[i
].logicalStart
);
525 if((logicalIndex
>=logicalStart
) && (logicalIndex
<(logicalStart
+length
))) {
530 /* we should never get here */
532 *pErrorCode
= U_INVALID_STATE_ERROR
;
537 * Compute the runs array from the levels array.
538 * After ubidi_getRuns() returns TRUE, runCount is guaranteed to be >0
539 * and the runs are reordered.
540 * Odd-level runs have visualStart on their visual right edge and
541 * they progress visually to the left.
542 * If option UBIDI_OPTION_INSERT_MARKS is set, insertRemove will contain the
543 * sum of appropriate LRM/RLM_BEFORE/AFTER flags.
544 * If option UBIDI_OPTION_REMOVE_CONTROLS is set, insertRemove will contain the
545 * negative number of BiDi control characters within this run.
548 ubidi_getRuns(UBiDi
*pBiDi
, UErrorCode
*pErrorCode
) {
550 * This method returns immediately if the runs are already set. This
551 * includes the case of length==0 (handled in setPara)..
553 if (pBiDi
->runCount
>=0) {
557 if(pBiDi
->direction
!=UBIDI_MIXED
) {
558 /* simple, single-run case - this covers length==0 */
559 /* pBiDi->paraLevel is ok even for contextual multiple paragraphs */
560 getSingleRun(pBiDi
, pBiDi
->paraLevel
);
561 } else /* UBIDI_MIXED, length>0 */ {
562 /* mixed directionality */
563 int32_t length
=pBiDi
->length
, limit
;
564 UBiDiLevel
*levels
=pBiDi
->levels
;
566 UBiDiLevel level
=UBIDI_DEFAULT_LTR
; /* initialize with no valid level */
568 * If there are WS characters at the end of the line
569 * and the run preceding them has a level different from
570 * paraLevel, then they will form their own run at paraLevel (L1).
571 * Count them separately.
572 * We need some special treatment for this in order to not
573 * modify the levels array which a line UBiDi object shares
574 * with its paragraph parent and its other line siblings.
575 * In other words, for the trailing WS, it may be
576 * levels[]!=paraLevel but we have to treat it like it were so.
578 limit
=pBiDi
->trailingWSStart
;
579 /* count the runs, there is at least one non-WS run, and limit>0 */
581 for(i
=0; i
<limit
; ++i
) {
582 /* increment runCount at the start of each run */
583 if(levels
[i
]!=level
) {
590 * We don't need to see if the last run can be merged with a trailing
591 * WS run because setTrailingWSStart() would have done that.
593 if(runCount
==1 && limit
==length
) {
594 /* There is only one non-WS run and no trailing WS-run. */
595 getSingleRun(pBiDi
, levels
[0]);
596 } else /* runCount>1 || limit<length */ {
597 /* allocate and set the runs */
599 int32_t runIndex
, start
;
600 UBiDiLevel minLevel
=UBIDI_MAX_EXPLICIT_LEVEL
+1, maxLevel
=0;
602 /* now, count a (non-mergeable) WS run */
608 if(getRunsMemory(pBiDi
, runCount
)) {
609 runs
=pBiDi
->runsMemory
;
615 /* FOOD FOR THOUGHT: this could be optimized, e.g.:
616 * 464->444, 484->444, 575->555, 595->555
617 * However, that would take longer. Check also how it would
618 * interact with BiDi control removal and inserting Marks.
622 /* search for the run limits and initialize visualLimit values with the run lengths */
625 /* prepare this run */
635 /* look for the run limit */
636 while(++i
<limit
&& levels
[i
]==level
) {}
638 /* i is another run limit */
639 runs
[runIndex
].logicalStart
=start
;
640 runs
[runIndex
].visualLimit
=i
-start
;
641 runs
[runIndex
].insertRemove
=0;
646 /* there is a separate WS run */
647 runs
[runIndex
].logicalStart
=limit
;
648 runs
[runIndex
].visualLimit
=length
-limit
;
649 /* For the trailing WS run, pBiDi->paraLevel is ok even
650 if contextual multiple paragraphs. */
651 if(pBiDi
->paraLevel
<minLevel
) {
652 minLevel
=pBiDi
->paraLevel
;
656 /* set the object fields */
658 pBiDi
->runCount
=runCount
;
660 reorderLine(pBiDi
, minLevel
, maxLevel
);
662 /* now add the direction flags and adjust the visualLimit's to be just that */
663 /* this loop will also handle the trailing WS run */
665 for(i
=0; i
<runCount
; ++i
) {
666 ADD_ODD_BIT_FROM_LEVEL(runs
[i
].logicalStart
, levels
[runs
[i
].logicalStart
]);
667 limit
+=runs
[i
].visualLimit
;
668 runs
[i
].visualLimit
=limit
;
671 /* Set the "odd" bit for the trailing WS run. */
672 /* For a RTL paragraph, it will be the *first* run in visual order. */
673 /* For the trailing WS run, pBiDi->paraLevel is ok even if
674 contextual multiple paragraphs. */
675 if(runIndex
<runCount
) {
676 int32_t trailingRun
= ((pBiDi
->paraLevel
& 1) != 0)? 0 : runIndex
;
678 ADD_ODD_BIT_FROM_LEVEL(runs
[trailingRun
].logicalStart
, pBiDi
->paraLevel
);
683 /* handle insert LRM/RLM BEFORE/AFTER run */
684 if(pBiDi
->insertPoints
.size
>0) {
685 Point
*point
, *start
=pBiDi
->insertPoints
.points
,
686 *limit
=start
+pBiDi
->insertPoints
.size
;
688 for(point
=start
; point
<limit
; point
++) {
689 runIndex
=getRunFromLogicalIndex(pBiDi
, point
->pos
, pErrorCode
);
690 pBiDi
->runs
[runIndex
].insertRemove
|=point
->flag
;
694 /* handle remove BiDi control characters */
695 if(pBiDi
->controlCount
>0) {
697 const UChar
*start
=pBiDi
->text
, *limit
=start
+pBiDi
->length
, *pu
;
698 for(pu
=start
; pu
<limit
; pu
++) {
699 if(IS_BIDI_CONTROL_CHAR(*pu
)) {
700 runIndex
=getRunFromLogicalIndex(pBiDi
, (int32_t)(pu
-start
), pErrorCode
);
701 pBiDi
->runs
[runIndex
].insertRemove
--;
710 prepareReorder(const UBiDiLevel
*levels
, int32_t length
,
712 UBiDiLevel
*pMinLevel
, UBiDiLevel
*pMaxLevel
) {
714 UBiDiLevel level
, minLevel
, maxLevel
;
716 if(levels
==NULL
|| length
<=0) {
720 /* determine minLevel and maxLevel */
721 minLevel
=UBIDI_MAX_EXPLICIT_LEVEL
+1;
723 for(start
=length
; start
>0;) {
724 level
=levels
[--start
];
725 if(level
>UBIDI_MAX_EXPLICIT_LEVEL
+1) {
738 /* initialize the index map */
739 for(start
=length
; start
>0;) {
741 indexMap
[start
]=start
;
747 /* reorder a line based on a levels array (L2) ------------------------------ */
749 U_CAPI
void U_EXPORT2
750 ubidi_reorderLogical(const UBiDiLevel
*levels
, int32_t length
, int32_t *indexMap
) {
751 int32_t start
, limit
, sumOfSosEos
;
752 UBiDiLevel minLevel
= 0, maxLevel
= 0;
754 if(indexMap
==NULL
|| !prepareReorder(levels
, length
, indexMap
, &minLevel
, &maxLevel
)) {
759 if(minLevel
==maxLevel
&& (minLevel
&1)==0) {
763 /* reorder only down to the lowest odd level */
766 /* loop maxLevel..minLevel */
770 /* loop for all sequences of levels to reorder at the current maxLevel */
772 /* look for a sequence of levels that are all at >=maxLevel */
773 /* look for the first index of such a sequence */
774 while(start
<length
&& levels
[start
]<maxLevel
) {
778 break; /* no more such sequences */
781 /* look for the limit of such a sequence (the index behind it) */
782 for(limit
=start
; ++limit
<length
&& levels
[limit
]>=maxLevel
;) {}
785 * sos=start of sequence, eos=end of sequence
787 * The closed (inclusive) interval from sos to eos includes all the logical
788 * and visual indexes within this sequence. They are logically and
789 * visually contiguous and in the same range.
791 * For each run, the new visual index=sos+eos-old visual index;
792 * we pre-add sos+eos into sumOfSosEos ->
793 * new visual index=sumOfSosEos-old visual index;
795 sumOfSosEos
=start
+limit
-1;
797 /* reorder each index in the sequence */
799 indexMap
[start
]=sumOfSosEos
-indexMap
[start
];
800 } while(++start
<limit
);
804 break; /* no more such sequences */
809 } while(--maxLevel
>=minLevel
);
812 U_CAPI
void U_EXPORT2
813 ubidi_reorderVisual(const UBiDiLevel
*levels
, int32_t length
, int32_t *indexMap
) {
814 int32_t start
, end
, limit
, temp
;
815 UBiDiLevel minLevel
= 0, maxLevel
= 0;
817 if(indexMap
==NULL
|| !prepareReorder(levels
, length
, indexMap
, &minLevel
, &maxLevel
)) {
822 if(minLevel
==maxLevel
&& (minLevel
&1)==0) {
826 /* reorder only down to the lowest odd level */
829 /* loop maxLevel..minLevel */
833 /* loop for all sequences of levels to reorder at the current maxLevel */
835 /* look for a sequence of levels that are all at >=maxLevel */
836 /* look for the first index of such a sequence */
837 while(start
<length
&& levels
[start
]<maxLevel
) {
841 break; /* no more such runs */
844 /* look for the limit of such a sequence (the index behind it) */
845 for(limit
=start
; ++limit
<length
&& levels
[limit
]>=maxLevel
;) {}
848 * Swap the entire interval of indexes from start to limit-1.
849 * We don't need to swap the levels for the purpose of this
850 * algorithm: the sequence of levels that we look at does not
855 temp
=indexMap
[start
];
856 indexMap
[start
]=indexMap
[end
];
864 break; /* no more such sequences */
869 } while(--maxLevel
>=minLevel
);
872 /* API functions for logical<->visual mapping ------------------------------- */
874 U_CAPI
int32_t U_EXPORT2
875 ubidi_getVisualIndex(UBiDi
*pBiDi
, int32_t logicalIndex
, UErrorCode
*pErrorCode
) {
876 int32_t visualIndex
=UBIDI_MAP_NOWHERE
;
877 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
, -1);
878 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi
, *pErrorCode
, -1);
879 RETURN_IF_BAD_RANGE(logicalIndex
, 0, pBiDi
->length
, *pErrorCode
, -1);
881 /* we can do the trivial cases without the runs array */
882 switch(pBiDi
->direction
) {
884 visualIndex
=logicalIndex
;
887 visualIndex
=pBiDi
->length
-logicalIndex
-1;
890 if(!ubidi_getRuns(pBiDi
, pErrorCode
)) {
891 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
894 Run
*runs
=pBiDi
->runs
;
895 int32_t i
, visualStart
=0, offset
, length
;
897 /* linear search for the run, search on the visual runs */
898 for(i
=0; i
<pBiDi
->runCount
; ++i
) {
899 length
=runs
[i
].visualLimit
-visualStart
;
900 offset
=logicalIndex
-GET_INDEX(runs
[i
].logicalStart
);
901 if(offset
>=0 && offset
<length
) {
902 if(IS_EVEN_RUN(runs
[i
].logicalStart
)) {
904 visualIndex
=visualStart
+offset
;
907 visualIndex
=visualStart
+length
-offset
-1;
909 break; /* exit for loop */
913 if(i
>=pBiDi
->runCount
) {
914 return UBIDI_MAP_NOWHERE
;
919 if(pBiDi
->insertPoints
.size
>0) {
920 /* add the number of added marks until the calculated visual index */
921 Run
*runs
=pBiDi
->runs
;
922 int32_t i
, length
, insertRemove
;
923 int32_t visualStart
=0, markFound
=0;
924 for(i
=0; ; i
++, visualStart
+=length
) {
925 length
=runs
[i
].visualLimit
-visualStart
;
926 insertRemove
=runs
[i
].insertRemove
;
927 if(insertRemove
& (LRM_BEFORE
|RLM_BEFORE
)) {
930 /* is it the run containing the visual index? */
931 if(visualIndex
<runs
[i
].visualLimit
) {
932 return visualIndex
+markFound
;
934 if(insertRemove
& (LRM_AFTER
|RLM_AFTER
)) {
939 else if(pBiDi
->controlCount
>0) {
940 /* subtract the number of controls until the calculated visual index */
941 Run
*runs
=pBiDi
->runs
;
942 int32_t i
, j
, start
, limit
, length
, insertRemove
;
943 int32_t visualStart
=0, controlFound
=0;
944 UChar uchar
=pBiDi
->text
[logicalIndex
];
945 /* is the logical index pointing to a control ? */
946 if(IS_BIDI_CONTROL_CHAR(uchar
)) {
947 return UBIDI_MAP_NOWHERE
;
950 for(i
=0; ; i
++, visualStart
+=length
) {
951 length
=runs
[i
].visualLimit
-visualStart
;
952 insertRemove
=runs
[i
].insertRemove
;
953 /* calculated visual index is beyond this run? */
954 if(visualIndex
>=runs
[i
].visualLimit
) {
955 controlFound
-=insertRemove
;
958 /* calculated visual index must be within current run */
959 if(insertRemove
==0) {
960 return visualIndex
-controlFound
;
962 if(IS_EVEN_RUN(runs
[i
].logicalStart
)) {
963 /* LTR: check from run start to logical index */
964 start
=runs
[i
].logicalStart
;
967 /* RTL: check from logical index to run end */
968 start
=logicalIndex
+1;
969 limit
=GET_INDEX(runs
[i
].logicalStart
)+length
;
971 for(j
=start
; j
<limit
; j
++) {
972 uchar
=pBiDi
->text
[j
];
973 if(IS_BIDI_CONTROL_CHAR(uchar
)) {
977 return visualIndex
-controlFound
;
984 U_CAPI
int32_t U_EXPORT2
985 ubidi_getLogicalIndex(UBiDi
*pBiDi
, int32_t visualIndex
, UErrorCode
*pErrorCode
) {
987 int32_t i
, runCount
, start
;
988 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
, -1);
989 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi
, *pErrorCode
, -1);
990 RETURN_IF_BAD_RANGE(visualIndex
, 0, pBiDi
->resultLength
, *pErrorCode
, -1);
991 /* we can do the trivial cases without the runs array */
992 if(pBiDi
->insertPoints
.size
==0 && pBiDi
->controlCount
==0) {
993 if(pBiDi
->direction
==UBIDI_LTR
) {
996 else if(pBiDi
->direction
==UBIDI_RTL
) {
997 return pBiDi
->length
-visualIndex
-1;
1000 if(!ubidi_getRuns(pBiDi
, pErrorCode
)) {
1001 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
1006 runCount
=pBiDi
->runCount
;
1007 if(pBiDi
->insertPoints
.size
>0) {
1008 /* handle inserted LRM/RLM */
1009 int32_t markFound
=0, insertRemove
;
1010 int32_t visualStart
=0, length
;
1012 /* subtract number of marks until visual index */
1013 for(i
=0; ; i
++, visualStart
+=length
) {
1014 length
=runs
[i
].visualLimit
-visualStart
;
1015 insertRemove
=runs
[i
].insertRemove
;
1016 if(insertRemove
&(LRM_BEFORE
|RLM_BEFORE
)) {
1017 if(visualIndex
<=(visualStart
+markFound
)) {
1018 return UBIDI_MAP_NOWHERE
;
1022 /* is adjusted visual index within this run? */
1023 if(visualIndex
<(runs
[i
].visualLimit
+markFound
)) {
1024 visualIndex
-=markFound
;
1027 if(insertRemove
&(LRM_AFTER
|RLM_AFTER
)) {
1028 if(visualIndex
==(visualStart
+length
+markFound
)) {
1029 return UBIDI_MAP_NOWHERE
;
1035 else if(pBiDi
->controlCount
>0) {
1036 /* handle removed BiDi control characters */
1037 int32_t controlFound
=0, insertRemove
, length
;
1038 int32_t logicalStart
, logicalEnd
, visualStart
=0, j
, k
;
1041 /* add number of controls until visual index */
1042 for(i
=0; ; i
++, visualStart
+=length
) {
1043 length
=runs
[i
].visualLimit
-visualStart
;
1044 insertRemove
=runs
[i
].insertRemove
;
1045 /* is adjusted visual index beyond current run? */
1046 if(visualIndex
>=(runs
[i
].visualLimit
-controlFound
+insertRemove
)) {
1047 controlFound
-=insertRemove
;
1050 /* adjusted visual index is within current run */
1051 if(insertRemove
==0) {
1052 visualIndex
+=controlFound
;
1055 /* count non-control chars until visualIndex */
1056 logicalStart
=runs
[i
].logicalStart
;
1057 evenRun
=IS_EVEN_RUN(logicalStart
);
1058 REMOVE_ODD_BIT(logicalStart
);
1059 logicalEnd
=logicalStart
+length
-1;
1060 for(j
=0; j
<length
; j
++) {
1061 k
= evenRun
? logicalStart
+j
: logicalEnd
-j
;
1062 uchar
=pBiDi
->text
[k
];
1063 if(IS_BIDI_CONTROL_CHAR(uchar
)) {
1066 if((visualIndex
+controlFound
)==(visualStart
+j
)) {
1070 visualIndex
+=controlFound
;
1074 /* handle all cases */
1076 /* linear search for the run */
1077 for(i
=0; visualIndex
>=runs
[i
].visualLimit
; ++i
) {}
1079 /* binary search for the run */
1080 int32_t begin
=0, limit
=runCount
;
1082 /* the middle if() is guaranteed to find the run, we don't need a loop limit */
1085 if(visualIndex
>=runs
[i
].visualLimit
) {
1087 } else if(i
==0 || visualIndex
>=runs
[i
-1].visualLimit
) {
1095 start
=runs
[i
].logicalStart
;
1096 if(IS_EVEN_RUN(start
)) {
1098 /* the offset in runs[i] is visualIndex-runs[i-1].visualLimit */
1100 visualIndex
-=runs
[i
-1].visualLimit
;
1102 return start
+visualIndex
;
1105 return GET_INDEX(start
)+runs
[i
].visualLimit
-visualIndex
-1;
1109 U_CAPI
void U_EXPORT2
1110 ubidi_getLogicalMap(UBiDi
*pBiDi
, int32_t *indexMap
, UErrorCode
*pErrorCode
) {
1111 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
);
1112 /* ubidi_countRuns() checks for VALID_PARA_OR_LINE */
1113 ubidi_countRuns(pBiDi
, pErrorCode
);
1114 if(U_FAILURE(*pErrorCode
)) {
1116 } else if(indexMap
==NULL
) {
1117 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
1119 /* fill a logical-to-visual index map using the runs[] */
1120 int32_t visualStart
, visualLimit
, i
, j
, k
;
1121 int32_t logicalStart
, logicalLimit
;
1122 Run
*runs
=pBiDi
->runs
;
1123 if (pBiDi
->length
<=0) {
1126 if (pBiDi
->length
>pBiDi
->resultLength
) {
1127 uprv_memset(indexMap
, 0xFF, pBiDi
->length
*sizeof(int32_t));
1131 for(j
=0; j
<pBiDi
->runCount
; ++j
) {
1132 logicalStart
=GET_INDEX(runs
[j
].logicalStart
);
1133 visualLimit
=runs
[j
].visualLimit
;
1134 if(IS_EVEN_RUN(runs
[j
].logicalStart
)) {
1136 indexMap
[logicalStart
++]=visualStart
++;
1137 } while(visualStart
<visualLimit
);
1139 logicalStart
+=visualLimit
-visualStart
; /* logicalLimit */
1141 indexMap
[--logicalStart
]=visualStart
++;
1142 } while(visualStart
<visualLimit
);
1144 /* visualStart==visualLimit; */
1147 if(pBiDi
->insertPoints
.size
>0) {
1148 int32_t markFound
=0, runCount
=pBiDi
->runCount
;
1149 int32_t length
, insertRemove
;
1151 /* add number of marks found until each index */
1152 for(i
=0; i
<runCount
; i
++, visualStart
+=length
) {
1153 length
=runs
[i
].visualLimit
-visualStart
;
1154 insertRemove
=runs
[i
].insertRemove
;
1155 if(insertRemove
&(LRM_BEFORE
|RLM_BEFORE
)) {
1159 logicalStart
=GET_INDEX(runs
[i
].logicalStart
);
1160 logicalLimit
=logicalStart
+length
;
1161 for(j
=logicalStart
; j
<logicalLimit
; j
++) {
1162 indexMap
[j
]+=markFound
;
1165 if(insertRemove
&(LRM_AFTER
|RLM_AFTER
)) {
1170 else if(pBiDi
->controlCount
>0) {
1171 int32_t controlFound
=0, runCount
=pBiDi
->runCount
;
1172 int32_t length
, insertRemove
;
1176 /* subtract number of controls found until each index */
1177 for(i
=0; i
<runCount
; i
++, visualStart
+=length
) {
1178 length
=runs
[i
].visualLimit
-visualStart
;
1179 insertRemove
=runs
[i
].insertRemove
;
1180 /* no control found within previous runs nor within this run */
1181 if((controlFound
-insertRemove
)==0) {
1184 logicalStart
=runs
[i
].logicalStart
;
1185 evenRun
=IS_EVEN_RUN(logicalStart
);
1186 REMOVE_ODD_BIT(logicalStart
);
1187 logicalLimit
=logicalStart
+length
;
1188 /* if no control within this run */
1189 if(insertRemove
==0) {
1190 for(j
=logicalStart
; j
<logicalLimit
; j
++) {
1191 indexMap
[j
]-=controlFound
;
1195 for(j
=0; j
<length
; j
++) {
1196 k
= evenRun
? logicalStart
+j
: logicalLimit
-j
-1;
1197 uchar
=pBiDi
->text
[k
];
1198 if(IS_BIDI_CONTROL_CHAR(uchar
)) {
1200 indexMap
[k
]=UBIDI_MAP_NOWHERE
;
1203 indexMap
[k
]-=controlFound
;
1210 U_CAPI
void U_EXPORT2
1211 ubidi_getVisualMap(UBiDi
*pBiDi
, int32_t *indexMap
, UErrorCode
*pErrorCode
) {
1212 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
);
1213 if(indexMap
==NULL
) {
1214 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
1217 /* ubidi_countRuns() checks for VALID_PARA_OR_LINE */
1218 ubidi_countRuns(pBiDi
, pErrorCode
);
1219 if(U_SUCCESS(*pErrorCode
)) {
1220 /* fill a visual-to-logical index map using the runs[] */
1221 Run
*runs
=pBiDi
->runs
, *runsLimit
=runs
+pBiDi
->runCount
;
1222 int32_t logicalStart
, visualStart
, visualLimit
, *pi
=indexMap
;
1224 if (pBiDi
->resultLength
<=0) {
1228 for(; runs
<runsLimit
; ++runs
) {
1229 logicalStart
=runs
->logicalStart
;
1230 visualLimit
=runs
->visualLimit
;
1231 if(IS_EVEN_RUN(logicalStart
)) {
1233 *pi
++ = logicalStart
++;
1234 } while(++visualStart
<visualLimit
);
1236 REMOVE_ODD_BIT(logicalStart
);
1237 logicalStart
+=visualLimit
-visualStart
; /* logicalLimit */
1239 *pi
++ = --logicalStart
;
1240 } while(++visualStart
<visualLimit
);
1242 /* visualStart==visualLimit; */
1245 if(pBiDi
->insertPoints
.size
>0) {
1246 int32_t markFound
=0, runCount
=pBiDi
->runCount
;
1247 int32_t insertRemove
, i
, j
, k
;
1249 /* count all inserted marks */
1250 for(i
=0; i
<runCount
; i
++) {
1251 insertRemove
=runs
[i
].insertRemove
;
1252 if(insertRemove
&(LRM_BEFORE
|RLM_BEFORE
)) {
1255 if(insertRemove
&(LRM_AFTER
|RLM_AFTER
)) {
1259 /* move back indexes by number of preceding marks */
1260 k
=pBiDi
->resultLength
;
1261 for(i
=runCount
-1; i
>=0 && markFound
>0; i
--) {
1262 insertRemove
=runs
[i
].insertRemove
;
1263 if(insertRemove
&(LRM_AFTER
|RLM_AFTER
)) {
1264 indexMap
[--k
]= UBIDI_MAP_NOWHERE
;
1267 visualStart
= i
>0 ? runs
[i
-1].visualLimit
: 0;
1268 for(j
=runs
[i
].visualLimit
-1; j
>=visualStart
&& markFound
>0; j
--) {
1269 indexMap
[--k
]=indexMap
[j
];
1271 if(insertRemove
&(LRM_BEFORE
|RLM_BEFORE
)) {
1272 indexMap
[--k
]= UBIDI_MAP_NOWHERE
;
1277 else if(pBiDi
->controlCount
>0) {
1278 int32_t runCount
=pBiDi
->runCount
, logicalEnd
;
1279 int32_t insertRemove
, length
, i
, j
, k
, m
;
1284 /* move forward indexes by number of preceding controls */
1286 for(i
=0; i
<runCount
; i
++, visualStart
+=length
) {
1287 length
=runs
[i
].visualLimit
-visualStart
;
1288 insertRemove
=runs
[i
].insertRemove
;
1289 /* if no control found yet, nothing to do in this run */
1290 if((insertRemove
==0)&&(k
==visualStart
)) {
1294 /* if no control in this run */
1295 if(insertRemove
==0) {
1296 visualLimit
=runs
[i
].visualLimit
;
1297 for(j
=visualStart
; j
<visualLimit
; j
++) {
1298 indexMap
[k
++]=indexMap
[j
];
1302 logicalStart
=runs
[i
].logicalStart
;
1303 evenRun
=IS_EVEN_RUN(logicalStart
);
1304 REMOVE_ODD_BIT(logicalStart
);
1305 logicalEnd
=logicalStart
+length
-1;
1306 for(j
=0; j
<length
; j
++) {
1307 m
= evenRun
? logicalStart
+j
: logicalEnd
-j
;
1308 uchar
=pBiDi
->text
[m
];
1309 if(!IS_BIDI_CONTROL_CHAR(uchar
)) {
1318 U_CAPI
void U_EXPORT2
1319 ubidi_invertMap(const int32_t *srcMap
, int32_t *destMap
, int32_t length
) {
1320 if(srcMap
!=NULL
&& destMap
!=NULL
&& length
>0) {
1322 int32_t destLength
=-1, count
=0;
1323 /* find highest value and count positive indexes in srcMap */
1326 if(*--pi
>destLength
) {
1333 destLength
++; /* add 1 for origin 0 */
1334 if(count
<destLength
) {
1335 /* we must fill unmatched destMap entries with -1 */
1336 uprv_memset(destMap
, 0xFF, destLength
*sizeof(int32_t));
1341 destMap
[*pi
]=--length
;