1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 ******************************************************************************
6 * Copyright (C) 1999-2015, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 ******************************************************************************
10 * file name: ubidiln.c
12 * tab size: 8 (not used)
15 * created on: 1999aug06
16 * created by: Markus W. Scherer, updated by Matitiahu Allouche
20 #include "unicode/utypes.h"
21 #include "unicode/ustring.h"
22 #include "unicode/uchar.h"
23 #include "unicode/ubidi.h"
28 * General remarks about the functions in this file:
30 * These functions deal with the aspects of potentially mixed-directional
31 * text in a single paragraph or in a line of a single paragraph
32 * which has already been processed according to
33 * the Unicode 6.3 BiDi algorithm as defined in
34 * http://www.unicode.org/unicode/reports/tr9/ , version 28,
35 * also described in The Unicode Standard, Version 6.3.0 .
37 * This means that there is a UBiDi object with a levels
38 * and a dirProps array.
39 * paraLevel and direction are also set.
40 * Only if the length of the text is zero, then levels==dirProps==NULL.
42 * The overall directionality of the paragraph
43 * or line is used to bypass the reordering steps if possible.
44 * Even purely RTL text does not need reordering there because
45 * the ubidi_getLogical/VisualIndex() functions can compute the
46 * index on the fly in such a case.
48 * The implementation of the access to same-level-runs and of the reordering
49 * do attempt to provide better performance and less memory usage compared to
50 * a direct implementation of especially rule (L2) with an array of
51 * one (32-bit) integer per text character.
53 * Here, the levels array is scanned as soon as necessary, and a vector of
54 * same-level-runs is created. Reordering then is done on this vector.
55 * For each run of text positions that were resolved to the same level,
56 * only 8 bytes are stored: the first text position of the run and the visual
57 * position behind the run after reordering.
58 * One sign bit is used to hold the directionality of the run.
59 * This is inefficient if there are many very short runs. If the average run
60 * length is <2, then this uses more memory.
62 * In a further attempt to save memory, the levels array is never changed
63 * after all the resolution rules (Xn, Wn, Nn, In).
64 * Many functions have to consider the field trailingWSStart:
65 * if it is less than length, then there is an implicit trailing run
67 * which is not reflected in the levels array.
68 * This allows a line UBiDi object to use the same levels array as
69 * its paragraph parent object.
71 * When a UBiDi object is created for a line of a paragraph, then the
72 * paragraph's levels and dirProps arrays are reused by way of setting
73 * a pointer into them, not by copying. This again saves memory and forbids to
74 * change the now shared levels for (L1).
77 /* handle trailing WS (L1) -------------------------------------------------- */
80 * setTrailingWSStart() sets the start index for a trailing
81 * run of WS in the line. This is necessary because we do not modify
82 * the paragraph's levels array that we just point into.
83 * Using trailingWSStart is another form of performing (L1).
85 * To make subsequent operations easier, we also include the run
86 * before the WS if it is at the paraLevel - we merge the two here.
88 * This function is called only from ubidi_setLine(), so pBiDi->paraLevel is
89 * set correctly for the line even when contextual multiple paragraphs.
92 setTrailingWSStart(UBiDi
*pBiDi
) {
93 /* pBiDi->direction!=UBIDI_MIXED */
95 const DirProp
*dirProps
=pBiDi
->dirProps
;
96 UBiDiLevel
*levels
=pBiDi
->levels
;
97 int32_t start
=pBiDi
->length
;
98 UBiDiLevel paraLevel
=pBiDi
->paraLevel
;
100 /* If the line is terminated by a block separator, all preceding WS etc...
101 are already set to paragraph level.
102 Setting trailingWSStart to pBidi->length will avoid changing the
103 level of B chars from 0 to paraLevel in ubidi_getLevels when
104 orderParagraphsLTR==TRUE.
106 if(dirProps
[start
-1]==B
) {
107 pBiDi
->trailingWSStart
=start
; /* currently == pBiDi->length */
110 /* go backwards across all WS, BN, explicit codes */
111 while(start
>0 && DIRPROP_FLAG(dirProps
[start
-1])&MASK_WS
) {
115 /* if the WS run can be merged with the previous run then do so here */
116 while(start
>0 && levels
[start
-1]==paraLevel
) {
120 pBiDi
->trailingWSStart
=start
;
123 /* ubidi_setLine ------------------------------------------------------------ */
125 U_CAPI
void U_EXPORT2
126 ubidi_setLine(const UBiDi
*pParaBiDi
,
127 int32_t start
, int32_t limit
,
129 UErrorCode
*pErrorCode
) {
132 /* check the argument values */
133 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
);
134 RETURN_VOID_IF_NOT_VALID_PARA(pParaBiDi
, *pErrorCode
);
135 RETURN_VOID_IF_BAD_RANGE(start
, 0, limit
, *pErrorCode
);
136 RETURN_VOID_IF_BAD_RANGE(limit
, 0, pParaBiDi
->length
+1, *pErrorCode
);
137 if(pLineBiDi
==NULL
) {
138 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
141 if(ubidi_getParagraph(pParaBiDi
, start
, NULL
, NULL
, NULL
, pErrorCode
) !=
142 ubidi_getParagraph(pParaBiDi
, limit
-1, NULL
, NULL
, NULL
, pErrorCode
)) {
143 /* the line crosses a paragraph boundary */
144 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
148 /* set the values in pLineBiDi from its pParaBiDi parent */
149 pLineBiDi
->pParaBiDi
=NULL
; /* mark unfinished setLine */
150 pLineBiDi
->text
=pParaBiDi
->text
+start
;
151 length
=pLineBiDi
->length
=limit
-start
;
152 pLineBiDi
->resultLength
=pLineBiDi
->originalLength
=length
;
153 pLineBiDi
->paraLevel
=GET_PARALEVEL(pParaBiDi
, start
);
154 pLineBiDi
->paraCount
=pParaBiDi
->paraCount
;
155 pLineBiDi
->runs
=NULL
;
157 pLineBiDi
->reorderingMode
=pParaBiDi
->reorderingMode
;
158 pLineBiDi
->reorderingOptions
=pParaBiDi
->reorderingOptions
;
159 pLineBiDi
->controlCount
=0;
160 if(pParaBiDi
->controlCount
>0) {
162 for(j
=start
; j
<limit
; j
++) {
163 if(IS_BIDI_CONTROL_CHAR(pParaBiDi
->text
[j
])) {
164 pLineBiDi
->controlCount
++;
167 pLineBiDi
->resultLength
-=pLineBiDi
->controlCount
;
170 pLineBiDi
->dirProps
=pParaBiDi
->dirProps
+start
;
171 pLineBiDi
->levels
=pParaBiDi
->levels
+start
;
172 pLineBiDi
->runCount
=-1;
174 if(pParaBiDi
->direction
!=UBIDI_MIXED
) {
175 /* the parent is already trivial */
176 pLineBiDi
->direction
=pParaBiDi
->direction
;
179 * The parent's levels are all either
180 * implicitly or explicitly ==paraLevel;
183 if(pParaBiDi
->trailingWSStart
<=start
) {
184 pLineBiDi
->trailingWSStart
=0;
185 } else if(pParaBiDi
->trailingWSStart
<limit
) {
186 pLineBiDi
->trailingWSStart
=pParaBiDi
->trailingWSStart
-start
;
188 pLineBiDi
->trailingWSStart
=length
;
191 const UBiDiLevel
*levels
=pLineBiDi
->levels
;
192 int32_t i
, trailingWSStart
;
195 setTrailingWSStart(pLineBiDi
);
196 trailingWSStart
=pLineBiDi
->trailingWSStart
;
198 /* recalculate pLineBiDi->direction */
199 if(trailingWSStart
==0) {
200 /* all levels are at paraLevel */
201 pLineBiDi
->direction
=(UBiDiDirection
)(pLineBiDi
->paraLevel
&1);
203 /* get the level of the first character */
204 level
=(UBiDiLevel
)(levels
[0]&1);
206 /* if there is anything of a different level, then the line is mixed */
207 if(trailingWSStart
<length
&& (pLineBiDi
->paraLevel
&1)!=level
) {
208 /* the trailing WS is at paraLevel, which differs from levels[0] */
209 pLineBiDi
->direction
=UBIDI_MIXED
;
211 /* see if levels[1..trailingWSStart-1] have the same direction as levels[0] and paraLevel */
214 if(i
==trailingWSStart
) {
215 /* the direction values match those in level */
216 pLineBiDi
->direction
=(UBiDiDirection
)level
;
218 } else if((levels
[i
]&1)!=level
) {
219 pLineBiDi
->direction
=UBIDI_MIXED
;
227 switch(pLineBiDi
->direction
) {
229 /* make sure paraLevel is even */
230 pLineBiDi
->paraLevel
=(UBiDiLevel
)((pLineBiDi
->paraLevel
+1)&~1);
232 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
233 pLineBiDi
->trailingWSStart
=0;
236 /* make sure paraLevel is odd */
237 pLineBiDi
->paraLevel
|=1;
239 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
240 pLineBiDi
->trailingWSStart
=0;
246 pLineBiDi
->pParaBiDi
=pParaBiDi
; /* mark successful setLine */
250 U_CAPI UBiDiLevel U_EXPORT2
251 ubidi_getLevelAt(const UBiDi
*pBiDi
, int32_t charIndex
) {
252 /* return paraLevel if in the trailing WS run, otherwise the real level */
253 if(!IS_VALID_PARA_OR_LINE(pBiDi
) || charIndex
<0 || pBiDi
->length
<=charIndex
) {
255 } else if(pBiDi
->direction
!=UBIDI_MIXED
|| charIndex
>=pBiDi
->trailingWSStart
) {
256 return GET_PARALEVEL(pBiDi
, charIndex
);
258 return pBiDi
->levels
[charIndex
];
262 U_CAPI
const UBiDiLevel
* U_EXPORT2
263 ubidi_getLevels(UBiDi
*pBiDi
, UErrorCode
*pErrorCode
) {
264 int32_t start
, length
;
266 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
, NULL
);
267 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi
, *pErrorCode
, NULL
);
268 if((length
=pBiDi
->length
)<=0) {
269 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
272 if((start
=pBiDi
->trailingWSStart
)==length
) {
273 /* the current levels array reflects the WS run */
274 return pBiDi
->levels
;
278 * After the previous if(), we know that the levels array
279 * has an implicit trailing WS run and therefore does not fully
280 * reflect itself all the levels.
281 * This must be a UBiDi object for a line, and
282 * we need to create a new levels array.
284 if(getLevelsMemory(pBiDi
, length
)) {
285 UBiDiLevel
*levels
=pBiDi
->levelsMemory
;
287 if(start
>0 && levels
!=pBiDi
->levels
) {
288 uprv_memcpy(levels
, pBiDi
->levels
, start
);
290 /* pBiDi->paraLevel is ok even if contextual multiple paragraphs,
291 since pBidi is a line object */
292 uprv_memset(levels
+start
, pBiDi
->paraLevel
, length
-start
);
294 /* this new levels array is set for the line and reflects the WS run */
295 pBiDi
->trailingWSStart
=length
;
296 return pBiDi
->levels
=levels
;
299 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
304 U_CAPI
void U_EXPORT2
305 ubidi_getLogicalRun(const UBiDi
*pBiDi
, int32_t logicalPosition
,
306 int32_t *pLogicalLimit
, UBiDiLevel
*pLevel
) {
307 UErrorCode errorCode
;
308 int32_t runCount
, visualStart
, logicalLimit
, logicalFirst
, i
;
311 errorCode
=U_ZERO_ERROR
;
312 RETURN_VOID_IF_BAD_RANGE(logicalPosition
, 0, pBiDi
->length
, errorCode
);
313 /* ubidi_countRuns will check VALID_PARA_OR_LINE */
314 runCount
=ubidi_countRuns((UBiDi
*)pBiDi
, &errorCode
);
315 if(U_FAILURE(errorCode
)) {
318 /* this is done based on runs rather than on levels since levels have
319 a special interpretation when UBIDI_REORDER_RUNS_ONLY
321 visualStart
=logicalLimit
=0;
324 for(i
=0; i
<runCount
; i
++) {
325 iRun
= pBiDi
->runs
[i
];
326 logicalFirst
=GET_INDEX(iRun
.logicalStart
);
327 logicalLimit
=logicalFirst
+iRun
.visualLimit
-visualStart
;
328 if((logicalPosition
>=logicalFirst
) &&
329 (logicalPosition
<logicalLimit
)) {
332 visualStart
= iRun
.visualLimit
;
335 *pLogicalLimit
=logicalLimit
;
338 if(pBiDi
->reorderingMode
==UBIDI_REORDER_RUNS_ONLY
) {
339 *pLevel
=(UBiDiLevel
)GET_ODD_BIT(iRun
.logicalStart
);
341 else if(pBiDi
->direction
!=UBIDI_MIXED
|| logicalPosition
>=pBiDi
->trailingWSStart
) {
342 *pLevel
=GET_PARALEVEL(pBiDi
, logicalPosition
);
344 *pLevel
=pBiDi
->levels
[logicalPosition
];
349 /* runs API functions ------------------------------------------------------- */
351 U_CAPI
int32_t U_EXPORT2
352 ubidi_countRuns(UBiDi
*pBiDi
, UErrorCode
*pErrorCode
) {
353 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
, -1);
354 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi
, *pErrorCode
, -1);
355 ubidi_getRuns(pBiDi
, pErrorCode
);
356 if(U_FAILURE(*pErrorCode
)) {
359 return pBiDi
->runCount
;
362 U_CAPI UBiDiDirection U_EXPORT2
363 ubidi_getVisualRun(UBiDi
*pBiDi
, int32_t runIndex
,
364 int32_t *pLogicalStart
, int32_t *pLength
)
367 UErrorCode errorCode
= U_ZERO_ERROR
;
368 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi
, errorCode
, UBIDI_LTR
);
369 ubidi_getRuns(pBiDi
, &errorCode
);
370 if(U_FAILURE(errorCode
)) {
373 RETURN_IF_BAD_RANGE(runIndex
, 0, pBiDi
->runCount
, errorCode
, UBIDI_LTR
);
375 start
=pBiDi
->runs
[runIndex
].logicalStart
;
376 if(pLogicalStart
!=NULL
) {
377 *pLogicalStart
=GET_INDEX(start
);
381 *pLength
=pBiDi
->runs
[runIndex
].visualLimit
-
382 pBiDi
->runs
[runIndex
-1].visualLimit
;
384 *pLength
=pBiDi
->runs
[0].visualLimit
;
387 return (UBiDiDirection
)GET_ODD_BIT(start
);
390 /* in trivial cases there is only one trivial run; called by ubidi_getRuns() */
392 getSingleRun(UBiDi
*pBiDi
, UBiDiLevel level
) {
393 /* simple, single-run case */
394 pBiDi
->runs
=pBiDi
->simpleRuns
;
397 /* fill and reorder the single run */
398 pBiDi
->runs
[0].logicalStart
=MAKE_INDEX_ODD_PAIR(0, level
);
399 pBiDi
->runs
[0].visualLimit
=pBiDi
->length
;
400 pBiDi
->runs
[0].insertRemove
=0;
403 /* reorder the runs array (L2) ---------------------------------------------- */
406 * Reorder the same-level runs in the runs array.
407 * Here, runCount>1 and maxLevel>=minLevel>=paraLevel.
408 * All the visualStart fields=logical start before reordering.
409 * The "odd" bits are not set yet.
411 * Reordering with this data structure lends itself to some handy shortcuts:
413 * Since each run is moved but not modified, and since at the initial maxLevel
414 * each sequence of same-level runs consists of only one run each, we
415 * don't need to do anything there and can predecrement maxLevel.
416 * In many simple cases, the reordering is thus done entirely in the
418 * Also, reordering occurs only down to the lowest odd level that occurs,
419 * which is minLevel|1. However, if the lowest level itself is odd, then
420 * in the last reordering the sequence of the runs at this level or higher
421 * will be all runs, and we don't need the elaborate loop to search for them.
422 * This is covered by ++minLevel instead of minLevel|=1 followed
423 * by an extra reorder-all after the reorder-some loop.
424 * About a trailing WS run:
425 * Such a run would need special treatment because its level is not
426 * reflected in levels[] if this is not a paragraph object.
427 * Instead, all characters from trailingWSStart on are implicitly at
429 * However, for all maxLevel>paraLevel, this run will never be reordered
430 * and does not need to be taken into account. maxLevel==paraLevel is only reordered
431 * if minLevel==paraLevel is odd, which is done in the extra segment.
432 * This means that for the main reordering loop we don't need to consider
433 * this run and can --runCount. If it is later part of the all-runs
434 * reordering, then runCount is adjusted accordingly.
437 reorderLine(UBiDi
*pBiDi
, UBiDiLevel minLevel
, UBiDiLevel maxLevel
) {
440 int32_t firstRun
, endRun
, limitRun
, runCount
;
443 if(maxLevel
<=(minLevel
|1)) {
448 * Reorder only down to the lowest odd level
449 * and reorder at an odd minLevel in a separate, simpler loop.
450 * See comments above for why minLevel is always incremented.
455 levels
=pBiDi
->levels
;
456 runCount
=pBiDi
->runCount
;
458 /* do not include the WS run at paraLevel<=old minLevel except in the simple loop */
459 if(pBiDi
->trailingWSStart
<pBiDi
->length
) {
463 while(--maxLevel
>=minLevel
) {
466 /* loop for all sequences of runs */
468 /* look for a sequence of runs that are all at >=maxLevel */
469 /* look for the first run of such a sequence */
470 while(firstRun
<runCount
&& levels
[runs
[firstRun
].logicalStart
]<maxLevel
) {
473 if(firstRun
>=runCount
) {
474 break; /* no more such runs */
477 /* look for the limit run of such a sequence (the run behind it) */
478 for(limitRun
=firstRun
; ++limitRun
<runCount
&& levels
[runs
[limitRun
].logicalStart
]>=maxLevel
;) {}
480 /* Swap the entire sequence of runs from firstRun to limitRun-1. */
482 while(firstRun
<endRun
) {
483 tempRun
= runs
[firstRun
];
484 runs
[firstRun
]=runs
[endRun
];
485 runs
[endRun
]=tempRun
;
490 if(limitRun
==runCount
) {
491 break; /* no more such runs */
498 /* now do maxLevel==old minLevel (==odd!), see above */
502 /* include the trailing WS run in this complete reordering */
503 if(pBiDi
->trailingWSStart
==pBiDi
->length
) {
507 /* Swap the entire sequence of all runs. (endRun==runCount) */
508 while(firstRun
<runCount
) {
509 tempRun
=runs
[firstRun
];
510 runs
[firstRun
]=runs
[runCount
];
511 runs
[runCount
]=tempRun
;
518 /* compute the runs array --------------------------------------------------- */
520 static int32_t getRunFromLogicalIndex(UBiDi
*pBiDi
, int32_t logicalIndex
, UErrorCode
*pErrorCode
) {
521 Run
*runs
=pBiDi
->runs
;
522 int32_t runCount
=pBiDi
->runCount
, visualStart
=0, i
, length
, logicalStart
;
524 for(i
=0; i
<runCount
; i
++) {
525 length
=runs
[i
].visualLimit
-visualStart
;
526 logicalStart
=GET_INDEX(runs
[i
].logicalStart
);
527 if((logicalIndex
>=logicalStart
) && (logicalIndex
<(logicalStart
+length
))) {
532 /* we should never get here */
534 *pErrorCode
= U_INVALID_STATE_ERROR
;
539 * Compute the runs array from the levels array.
540 * After ubidi_getRuns() returns TRUE, runCount is guaranteed to be >0
541 * and the runs are reordered.
542 * Odd-level runs have visualStart on their visual right edge and
543 * they progress visually to the left.
544 * If option UBIDI_OPTION_INSERT_MARKS is set, insertRemove will contain the
545 * sum of appropriate LRM/RLM_BEFORE/AFTER flags.
546 * If option UBIDI_OPTION_REMOVE_CONTROLS is set, insertRemove will contain the
547 * negative number of BiDi control characters within this run.
550 ubidi_getRuns(UBiDi
*pBiDi
, UErrorCode
*pErrorCode
) {
552 * This method returns immediately if the runs are already set. This
553 * includes the case of length==0 (handled in setPara)..
555 if (pBiDi
->runCount
>=0) {
559 if(pBiDi
->direction
!=UBIDI_MIXED
) {
560 /* simple, single-run case - this covers length==0 */
561 /* pBiDi->paraLevel is ok even for contextual multiple paragraphs */
562 getSingleRun(pBiDi
, pBiDi
->paraLevel
);
563 } else /* UBIDI_MIXED, length>0 */ {
564 /* mixed directionality */
565 int32_t length
=pBiDi
->length
, limit
;
566 UBiDiLevel
*levels
=pBiDi
->levels
;
568 UBiDiLevel level
=UBIDI_DEFAULT_LTR
; /* initialize with no valid level */
570 * If there are WS characters at the end of the line
571 * and the run preceding them has a level different from
572 * paraLevel, then they will form their own run at paraLevel (L1).
573 * Count them separately.
574 * We need some special treatment for this in order to not
575 * modify the levels array which a line UBiDi object shares
576 * with its paragraph parent and its other line siblings.
577 * In other words, for the trailing WS, it may be
578 * levels[]!=paraLevel but we have to treat it like it were so.
580 limit
=pBiDi
->trailingWSStart
;
581 /* count the runs, there is at least one non-WS run, and limit>0 */
583 for(i
=0; i
<limit
; ++i
) {
584 /* increment runCount at the start of each run */
585 if(levels
[i
]!=level
) {
592 * We don't need to see if the last run can be merged with a trailing
593 * WS run because setTrailingWSStart() would have done that.
595 if(runCount
==1 && limit
==length
) {
596 /* There is only one non-WS run and no trailing WS-run. */
597 getSingleRun(pBiDi
, levels
[0]);
598 } else /* runCount>1 || limit<length */ {
599 /* allocate and set the runs */
601 int32_t runIndex
, start
;
602 UBiDiLevel minLevel
=UBIDI_MAX_EXPLICIT_LEVEL
+1, maxLevel
=0;
604 /* now, count a (non-mergeable) WS run */
610 if(getRunsMemory(pBiDi
, runCount
)) {
611 runs
=pBiDi
->runsMemory
;
617 /* FOOD FOR THOUGHT: this could be optimized, e.g.:
618 * 464->444, 484->444, 575->555, 595->555
619 * However, that would take longer. Check also how it would
620 * interact with BiDi control removal and inserting Marks.
624 /* search for the run limits and initialize visualLimit values with the run lengths */
627 /* prepare this run */
637 /* look for the run limit */
638 while(++i
<limit
&& levels
[i
]==level
) {}
640 /* i is another run limit */
641 runs
[runIndex
].logicalStart
=start
;
642 runs
[runIndex
].visualLimit
=i
-start
;
643 runs
[runIndex
].insertRemove
=0;
648 /* there is a separate WS run */
649 runs
[runIndex
].logicalStart
=limit
;
650 runs
[runIndex
].visualLimit
=length
-limit
;
651 /* For the trailing WS run, pBiDi->paraLevel is ok even
652 if contextual multiple paragraphs. */
653 if(pBiDi
->paraLevel
<minLevel
) {
654 minLevel
=pBiDi
->paraLevel
;
658 /* set the object fields */
660 pBiDi
->runCount
=runCount
;
662 reorderLine(pBiDi
, minLevel
, maxLevel
);
664 /* now add the direction flags and adjust the visualLimit's to be just that */
665 /* this loop will also handle the trailing WS run */
667 for(i
=0; i
<runCount
; ++i
) {
668 ADD_ODD_BIT_FROM_LEVEL(runs
[i
].logicalStart
, levels
[runs
[i
].logicalStart
]);
669 limit
+=runs
[i
].visualLimit
;
670 runs
[i
].visualLimit
=limit
;
673 /* Set the "odd" bit for the trailing WS run. */
674 /* For a RTL paragraph, it will be the *first* run in visual order. */
675 /* For the trailing WS run, pBiDi->paraLevel is ok even if
676 contextual multiple paragraphs. */
677 if(runIndex
<runCount
) {
678 int32_t trailingRun
= ((pBiDi
->paraLevel
& 1) != 0)? 0 : runIndex
;
680 ADD_ODD_BIT_FROM_LEVEL(runs
[trailingRun
].logicalStart
, pBiDi
->paraLevel
);
685 /* handle insert LRM/RLM BEFORE/AFTER run */
686 if(pBiDi
->insertPoints
.size
>0) {
687 Point
*point
, *start
=pBiDi
->insertPoints
.points
,
688 *limit
=start
+pBiDi
->insertPoints
.size
;
690 for(point
=start
; point
<limit
; point
++) {
691 runIndex
=getRunFromLogicalIndex(pBiDi
, point
->pos
, pErrorCode
);
692 pBiDi
->runs
[runIndex
].insertRemove
|=point
->flag
;
696 /* handle remove BiDi control characters */
697 if(pBiDi
->controlCount
>0) {
699 const UChar
*start
=pBiDi
->text
, *limit
=start
+pBiDi
->length
, *pu
;
700 for(pu
=start
; pu
<limit
; pu
++) {
701 if(IS_BIDI_CONTROL_CHAR(*pu
)) {
702 runIndex
=getRunFromLogicalIndex(pBiDi
, (int32_t)(pu
-start
), pErrorCode
);
703 pBiDi
->runs
[runIndex
].insertRemove
--;
712 prepareReorder(const UBiDiLevel
*levels
, int32_t length
,
714 UBiDiLevel
*pMinLevel
, UBiDiLevel
*pMaxLevel
) {
716 UBiDiLevel level
, minLevel
, maxLevel
;
718 if(levels
==NULL
|| length
<=0) {
722 /* determine minLevel and maxLevel */
723 minLevel
=UBIDI_MAX_EXPLICIT_LEVEL
+1;
725 for(start
=length
; start
>0;) {
726 level
=levels
[--start
];
727 if(level
>UBIDI_MAX_EXPLICIT_LEVEL
+1) {
740 /* initialize the index map */
741 for(start
=length
; start
>0;) {
743 indexMap
[start
]=start
;
749 /* reorder a line based on a levels array (L2) ------------------------------ */
751 U_CAPI
void U_EXPORT2
752 ubidi_reorderLogical(const UBiDiLevel
*levels
, int32_t length
, int32_t *indexMap
) {
753 int32_t start
, limit
, sumOfSosEos
;
754 UBiDiLevel minLevel
= 0, maxLevel
= 0;
756 if(indexMap
==NULL
|| !prepareReorder(levels
, length
, indexMap
, &minLevel
, &maxLevel
)) {
761 if(minLevel
==maxLevel
&& (minLevel
&1)==0) {
765 /* reorder only down to the lowest odd level */
768 /* loop maxLevel..minLevel */
772 /* loop for all sequences of levels to reorder at the current maxLevel */
774 /* look for a sequence of levels that are all at >=maxLevel */
775 /* look for the first index of such a sequence */
776 while(start
<length
&& levels
[start
]<maxLevel
) {
780 break; /* no more such sequences */
783 /* look for the limit of such a sequence (the index behind it) */
784 for(limit
=start
; ++limit
<length
&& levels
[limit
]>=maxLevel
;) {}
787 * sos=start of sequence, eos=end of sequence
789 * The closed (inclusive) interval from sos to eos includes all the logical
790 * and visual indexes within this sequence. They are logically and
791 * visually contiguous and in the same range.
793 * For each run, the new visual index=sos+eos-old visual index;
794 * we pre-add sos+eos into sumOfSosEos ->
795 * new visual index=sumOfSosEos-old visual index;
797 sumOfSosEos
=start
+limit
-1;
799 /* reorder each index in the sequence */
801 indexMap
[start
]=sumOfSosEos
-indexMap
[start
];
802 } while(++start
<limit
);
806 break; /* no more such sequences */
811 } while(--maxLevel
>=minLevel
);
814 U_CAPI
void U_EXPORT2
815 ubidi_reorderVisual(const UBiDiLevel
*levels
, int32_t length
, int32_t *indexMap
) {
816 int32_t start
, end
, limit
, temp
;
817 UBiDiLevel minLevel
= 0, maxLevel
= 0;
819 if(indexMap
==NULL
|| !prepareReorder(levels
, length
, indexMap
, &minLevel
, &maxLevel
)) {
824 if(minLevel
==maxLevel
&& (minLevel
&1)==0) {
828 /* reorder only down to the lowest odd level */
831 /* loop maxLevel..minLevel */
835 /* loop for all sequences of levels to reorder at the current maxLevel */
837 /* look for a sequence of levels that are all at >=maxLevel */
838 /* look for the first index of such a sequence */
839 while(start
<length
&& levels
[start
]<maxLevel
) {
843 break; /* no more such runs */
846 /* look for the limit of such a sequence (the index behind it) */
847 for(limit
=start
; ++limit
<length
&& levels
[limit
]>=maxLevel
;) {}
850 * Swap the entire interval of indexes from start to limit-1.
851 * We don't need to swap the levels for the purpose of this
852 * algorithm: the sequence of levels that we look at does not
857 temp
=indexMap
[start
];
858 indexMap
[start
]=indexMap
[end
];
866 break; /* no more such sequences */
871 } while(--maxLevel
>=minLevel
);
874 /* API functions for logical<->visual mapping ------------------------------- */
876 U_CAPI
int32_t U_EXPORT2
877 ubidi_getVisualIndex(UBiDi
*pBiDi
, int32_t logicalIndex
, UErrorCode
*pErrorCode
) {
878 int32_t visualIndex
=UBIDI_MAP_NOWHERE
;
879 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
, -1);
880 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi
, *pErrorCode
, -1);
881 RETURN_IF_BAD_RANGE(logicalIndex
, 0, pBiDi
->length
, *pErrorCode
, -1);
883 /* we can do the trivial cases without the runs array */
884 switch(pBiDi
->direction
) {
886 visualIndex
=logicalIndex
;
889 visualIndex
=pBiDi
->length
-logicalIndex
-1;
892 if(!ubidi_getRuns(pBiDi
, pErrorCode
)) {
893 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
896 Run
*runs
=pBiDi
->runs
;
897 int32_t i
, visualStart
=0, offset
, length
;
899 /* linear search for the run, search on the visual runs */
900 for(i
=0; i
<pBiDi
->runCount
; ++i
) {
901 length
=runs
[i
].visualLimit
-visualStart
;
902 offset
=logicalIndex
-GET_INDEX(runs
[i
].logicalStart
);
903 if(offset
>=0 && offset
<length
) {
904 if(IS_EVEN_RUN(runs
[i
].logicalStart
)) {
906 visualIndex
=visualStart
+offset
;
909 visualIndex
=visualStart
+length
-offset
-1;
911 break; /* exit for loop */
915 if(i
>=pBiDi
->runCount
) {
916 return UBIDI_MAP_NOWHERE
;
921 if(pBiDi
->insertPoints
.size
>0) {
922 /* add the number of added marks until the calculated visual index */
923 Run
*runs
=pBiDi
->runs
;
924 int32_t i
, length
, insertRemove
;
925 int32_t visualStart
=0, markFound
=0;
926 for(i
=0; ; i
++, visualStart
+=length
) {
927 length
=runs
[i
].visualLimit
-visualStart
;
928 insertRemove
=runs
[i
].insertRemove
;
929 if(insertRemove
& (LRM_BEFORE
|RLM_BEFORE
)) {
932 /* is it the run containing the visual index? */
933 if(visualIndex
<runs
[i
].visualLimit
) {
934 return visualIndex
+markFound
;
936 if(insertRemove
& (LRM_AFTER
|RLM_AFTER
)) {
941 else if(pBiDi
->controlCount
>0) {
942 /* subtract the number of controls until the calculated visual index */
943 Run
*runs
=pBiDi
->runs
;
944 int32_t i
, j
, start
, limit
, length
, insertRemove
;
945 int32_t visualStart
=0, controlFound
=0;
946 UChar uchar
=pBiDi
->text
[logicalIndex
];
947 /* is the logical index pointing to a control ? */
948 if(IS_BIDI_CONTROL_CHAR(uchar
)) {
949 return UBIDI_MAP_NOWHERE
;
952 for(i
=0; ; i
++, visualStart
+=length
) {
953 length
=runs
[i
].visualLimit
-visualStart
;
954 insertRemove
=runs
[i
].insertRemove
;
955 /* calculated visual index is beyond this run? */
956 if(visualIndex
>=runs
[i
].visualLimit
) {
957 controlFound
-=insertRemove
;
960 /* calculated visual index must be within current run */
961 if(insertRemove
==0) {
962 return visualIndex
-controlFound
;
964 if(IS_EVEN_RUN(runs
[i
].logicalStart
)) {
965 /* LTR: check from run start to logical index */
966 start
=runs
[i
].logicalStart
;
969 /* RTL: check from logical index to run end */
970 start
=logicalIndex
+1;
971 limit
=GET_INDEX(runs
[i
].logicalStart
)+length
;
973 for(j
=start
; j
<limit
; j
++) {
974 uchar
=pBiDi
->text
[j
];
975 if(IS_BIDI_CONTROL_CHAR(uchar
)) {
979 return visualIndex
-controlFound
;
986 U_CAPI
int32_t U_EXPORT2
987 ubidi_getLogicalIndex(UBiDi
*pBiDi
, int32_t visualIndex
, UErrorCode
*pErrorCode
) {
989 int32_t i
, runCount
, start
;
990 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
, -1);
991 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi
, *pErrorCode
, -1);
992 RETURN_IF_BAD_RANGE(visualIndex
, 0, pBiDi
->resultLength
, *pErrorCode
, -1);
993 /* we can do the trivial cases without the runs array */
994 if(pBiDi
->insertPoints
.size
==0 && pBiDi
->controlCount
==0) {
995 if(pBiDi
->direction
==UBIDI_LTR
) {
998 else if(pBiDi
->direction
==UBIDI_RTL
) {
999 return pBiDi
->length
-visualIndex
-1;
1002 if(!ubidi_getRuns(pBiDi
, pErrorCode
)) {
1003 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
1008 runCount
=pBiDi
->runCount
;
1009 if(pBiDi
->insertPoints
.size
>0) {
1010 /* handle inserted LRM/RLM */
1011 int32_t markFound
=0, insertRemove
;
1012 int32_t visualStart
=0, length
;
1014 /* subtract number of marks until visual index */
1015 for(i
=0; ; i
++, visualStart
+=length
) {
1016 length
=runs
[i
].visualLimit
-visualStart
;
1017 insertRemove
=runs
[i
].insertRemove
;
1018 if(insertRemove
&(LRM_BEFORE
|RLM_BEFORE
)) {
1019 if(visualIndex
<=(visualStart
+markFound
)) {
1020 return UBIDI_MAP_NOWHERE
;
1024 /* is adjusted visual index within this run? */
1025 if(visualIndex
<(runs
[i
].visualLimit
+markFound
)) {
1026 visualIndex
-=markFound
;
1029 if(insertRemove
&(LRM_AFTER
|RLM_AFTER
)) {
1030 if(visualIndex
==(visualStart
+length
+markFound
)) {
1031 return UBIDI_MAP_NOWHERE
;
1037 else if(pBiDi
->controlCount
>0) {
1038 /* handle removed BiDi control characters */
1039 int32_t controlFound
=0, insertRemove
, length
;
1040 int32_t logicalStart
, logicalEnd
, visualStart
=0, j
, k
;
1043 /* add number of controls until visual index */
1044 for(i
=0; ; i
++, visualStart
+=length
) {
1045 length
=runs
[i
].visualLimit
-visualStart
;
1046 insertRemove
=runs
[i
].insertRemove
;
1047 /* is adjusted visual index beyond current run? */
1048 if(visualIndex
>=(runs
[i
].visualLimit
-controlFound
+insertRemove
)) {
1049 controlFound
-=insertRemove
;
1052 /* adjusted visual index is within current run */
1053 if(insertRemove
==0) {
1054 visualIndex
+=controlFound
;
1057 /* count non-control chars until visualIndex */
1058 logicalStart
=runs
[i
].logicalStart
;
1059 evenRun
=IS_EVEN_RUN(logicalStart
);
1060 REMOVE_ODD_BIT(logicalStart
);
1061 logicalEnd
=logicalStart
+length
-1;
1062 for(j
=0; j
<length
; j
++) {
1063 k
= evenRun
? logicalStart
+j
: logicalEnd
-j
;
1064 uchar
=pBiDi
->text
[k
];
1065 if(IS_BIDI_CONTROL_CHAR(uchar
)) {
1068 if((visualIndex
+controlFound
)==(visualStart
+j
)) {
1072 visualIndex
+=controlFound
;
1076 /* handle all cases */
1078 /* linear search for the run */
1079 for(i
=0; visualIndex
>=runs
[i
].visualLimit
; ++i
) {}
1081 /* binary search for the run */
1082 int32_t begin
=0, limit
=runCount
;
1084 /* the middle if() is guaranteed to find the run, we don't need a loop limit */
1087 if(visualIndex
>=runs
[i
].visualLimit
) {
1089 } else if(i
==0 || visualIndex
>=runs
[i
-1].visualLimit
) {
1097 start
=runs
[i
].logicalStart
;
1098 if(IS_EVEN_RUN(start
)) {
1100 /* the offset in runs[i] is visualIndex-runs[i-1].visualLimit */
1102 visualIndex
-=runs
[i
-1].visualLimit
;
1104 return start
+visualIndex
;
1107 return GET_INDEX(start
)+runs
[i
].visualLimit
-visualIndex
-1;
1111 U_CAPI
void U_EXPORT2
1112 ubidi_getLogicalMap(UBiDi
*pBiDi
, int32_t *indexMap
, UErrorCode
*pErrorCode
) {
1113 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
);
1114 /* ubidi_countRuns() checks for VALID_PARA_OR_LINE */
1115 ubidi_countRuns(pBiDi
, pErrorCode
);
1116 if(U_FAILURE(*pErrorCode
)) {
1118 } else if(indexMap
==NULL
) {
1119 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
1121 /* fill a logical-to-visual index map using the runs[] */
1122 int32_t visualStart
, visualLimit
, i
, j
, k
;
1123 int32_t logicalStart
, logicalLimit
;
1124 Run
*runs
=pBiDi
->runs
;
1125 if (pBiDi
->length
<=0) {
1128 if (pBiDi
->length
>pBiDi
->resultLength
) {
1129 uprv_memset(indexMap
, 0xFF, pBiDi
->length
*sizeof(int32_t));
1133 for(j
=0; j
<pBiDi
->runCount
; ++j
) {
1134 logicalStart
=GET_INDEX(runs
[j
].logicalStart
);
1135 visualLimit
=runs
[j
].visualLimit
;
1136 if(IS_EVEN_RUN(runs
[j
].logicalStart
)) {
1138 indexMap
[logicalStart
++]=visualStart
++;
1139 } while(visualStart
<visualLimit
);
1141 logicalStart
+=visualLimit
-visualStart
; /* logicalLimit */
1143 indexMap
[--logicalStart
]=visualStart
++;
1144 } while(visualStart
<visualLimit
);
1146 /* visualStart==visualLimit; */
1149 if(pBiDi
->insertPoints
.size
>0) {
1150 int32_t markFound
=0, runCount
=pBiDi
->runCount
;
1151 int32_t length
, insertRemove
;
1153 /* add number of marks found until each index */
1154 for(i
=0; i
<runCount
; i
++, visualStart
+=length
) {
1155 length
=runs
[i
].visualLimit
-visualStart
;
1156 insertRemove
=runs
[i
].insertRemove
;
1157 if(insertRemove
&(LRM_BEFORE
|RLM_BEFORE
)) {
1161 logicalStart
=GET_INDEX(runs
[i
].logicalStart
);
1162 logicalLimit
=logicalStart
+length
;
1163 for(j
=logicalStart
; j
<logicalLimit
; j
++) {
1164 indexMap
[j
]+=markFound
;
1167 if(insertRemove
&(LRM_AFTER
|RLM_AFTER
)) {
1172 else if(pBiDi
->controlCount
>0) {
1173 int32_t controlFound
=0, runCount
=pBiDi
->runCount
;
1174 int32_t length
, insertRemove
;
1178 /* subtract number of controls found until each index */
1179 for(i
=0; i
<runCount
; i
++, visualStart
+=length
) {
1180 length
=runs
[i
].visualLimit
-visualStart
;
1181 insertRemove
=runs
[i
].insertRemove
;
1182 /* no control found within previous runs nor within this run */
1183 if((controlFound
-insertRemove
)==0) {
1186 logicalStart
=runs
[i
].logicalStart
;
1187 evenRun
=IS_EVEN_RUN(logicalStart
);
1188 REMOVE_ODD_BIT(logicalStart
);
1189 logicalLimit
=logicalStart
+length
;
1190 /* if no control within this run */
1191 if(insertRemove
==0) {
1192 for(j
=logicalStart
; j
<logicalLimit
; j
++) {
1193 indexMap
[j
]-=controlFound
;
1197 for(j
=0; j
<length
; j
++) {
1198 k
= evenRun
? logicalStart
+j
: logicalLimit
-j
-1;
1199 uchar
=pBiDi
->text
[k
];
1200 if(IS_BIDI_CONTROL_CHAR(uchar
)) {
1202 indexMap
[k
]=UBIDI_MAP_NOWHERE
;
1205 indexMap
[k
]-=controlFound
;
1212 U_CAPI
void U_EXPORT2
1213 ubidi_getVisualMap(UBiDi
*pBiDi
, int32_t *indexMap
, UErrorCode
*pErrorCode
) {
1214 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode
);
1215 if(indexMap
==NULL
) {
1216 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
1219 /* ubidi_countRuns() checks for VALID_PARA_OR_LINE */
1220 ubidi_countRuns(pBiDi
, pErrorCode
);
1221 if(U_SUCCESS(*pErrorCode
)) {
1222 /* fill a visual-to-logical index map using the runs[] */
1223 Run
*runs
=pBiDi
->runs
, *runsLimit
=runs
+pBiDi
->runCount
;
1224 int32_t logicalStart
, visualStart
, visualLimit
, *pi
=indexMap
;
1226 if (pBiDi
->resultLength
<=0) {
1230 for(; runs
<runsLimit
; ++runs
) {
1231 logicalStart
=runs
->logicalStart
;
1232 visualLimit
=runs
->visualLimit
;
1233 if(IS_EVEN_RUN(logicalStart
)) {
1235 *pi
++ = logicalStart
++;
1236 } while(++visualStart
<visualLimit
);
1238 REMOVE_ODD_BIT(logicalStart
);
1239 logicalStart
+=visualLimit
-visualStart
; /* logicalLimit */
1241 *pi
++ = --logicalStart
;
1242 } while(++visualStart
<visualLimit
);
1244 /* visualStart==visualLimit; */
1247 if(pBiDi
->insertPoints
.size
>0) {
1248 int32_t markFound
=0, runCount
=pBiDi
->runCount
;
1249 int32_t insertRemove
, i
, j
, k
;
1251 /* count all inserted marks */
1252 for(i
=0; i
<runCount
; i
++) {
1253 insertRemove
=runs
[i
].insertRemove
;
1254 if(insertRemove
&(LRM_BEFORE
|RLM_BEFORE
)) {
1257 if(insertRemove
&(LRM_AFTER
|RLM_AFTER
)) {
1261 /* move back indexes by number of preceding marks */
1262 k
=pBiDi
->resultLength
;
1263 for(i
=runCount
-1; i
>=0 && markFound
>0; i
--) {
1264 insertRemove
=runs
[i
].insertRemove
;
1265 if(insertRemove
&(LRM_AFTER
|RLM_AFTER
)) {
1266 indexMap
[--k
]= UBIDI_MAP_NOWHERE
;
1269 visualStart
= i
>0 ? runs
[i
-1].visualLimit
: 0;
1270 for(j
=runs
[i
].visualLimit
-1; j
>=visualStart
&& markFound
>0; j
--) {
1271 indexMap
[--k
]=indexMap
[j
];
1273 if(insertRemove
&(LRM_BEFORE
|RLM_BEFORE
)) {
1274 indexMap
[--k
]= UBIDI_MAP_NOWHERE
;
1279 else if(pBiDi
->controlCount
>0) {
1280 int32_t runCount
=pBiDi
->runCount
, logicalEnd
;
1281 int32_t insertRemove
, length
, i
, j
, k
, m
;
1286 /* move forward indexes by number of preceding controls */
1288 for(i
=0; i
<runCount
; i
++, visualStart
+=length
) {
1289 length
=runs
[i
].visualLimit
-visualStart
;
1290 insertRemove
=runs
[i
].insertRemove
;
1291 /* if no control found yet, nothing to do in this run */
1292 if((insertRemove
==0)&&(k
==visualStart
)) {
1296 /* if no control in this run */
1297 if(insertRemove
==0) {
1298 visualLimit
=runs
[i
].visualLimit
;
1299 for(j
=visualStart
; j
<visualLimit
; j
++) {
1300 indexMap
[k
++]=indexMap
[j
];
1304 logicalStart
=runs
[i
].logicalStart
;
1305 evenRun
=IS_EVEN_RUN(logicalStart
);
1306 REMOVE_ODD_BIT(logicalStart
);
1307 logicalEnd
=logicalStart
+length
-1;
1308 for(j
=0; j
<length
; j
++) {
1309 m
= evenRun
? logicalStart
+j
: logicalEnd
-j
;
1310 uchar
=pBiDi
->text
[m
];
1311 if(!IS_BIDI_CONTROL_CHAR(uchar
)) {
1320 U_CAPI
void U_EXPORT2
1321 ubidi_invertMap(const int32_t *srcMap
, int32_t *destMap
, int32_t length
) {
1322 if(srcMap
!=NULL
&& destMap
!=NULL
&& length
>0) {
1324 int32_t destLength
=-1, count
=0;
1325 /* find highest value and count positive indexes in srcMap */
1328 if(*--pi
>destLength
) {
1335 destLength
++; /* add 1 for origin 0 */
1336 if(count
<destLength
) {
1337 /* we must fill unmatched destMap entries with -1 */
1338 uprv_memset(destMap
, 0xFF, destLength
*sizeof(int32_t));
1343 destMap
[*pi
]=--length
;