]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/ubidiln.c
ICU-461.17.tar.gz
[apple/icu.git] / icuSources / common / ubidiln.c
CommitLineData
73c04bcf 1/*
b75a7d8f
A
2******************************************************************************
3*
729e4ab9 4* Copyright (C) 1999-2010, International Business Machines
b75a7d8f
A
5* Corporation and others. All Rights Reserved.
6*
7******************************************************************************
8* file name: ubidiln.c
9* encoding: US-ASCII
10* tab size: 8 (not used)
11* indentation:4
12*
13* created on: 1999aug06
46f4442e 14* created by: Markus W. Scherer, updated by Matitiahu Allouche
b75a7d8f
A
15*/
16
b75a7d8f
A
17#include "cmemory.h"
18#include "unicode/utypes.h"
19#include "unicode/ustring.h"
20#include "unicode/uchar.h"
21#include "unicode/ubidi.h"
22#include "ubidiimp.h"
46f4442e 23#include "uassert.h"
b75a7d8f
A
24
25/*
26 * General remarks about the functions in this file:
27 *
28 * These functions deal with the aspects of potentially mixed-directional
29 * text in a single paragraph or in a line of a single paragraph
30 * which has already been processed according to
31 * the Unicode 3.0 BiDi algorithm as defined in
73c04bcf
A
32 * http://www.unicode.org/unicode/reports/tr9/ , version 13,
33 * also described in The Unicode Standard, Version 4.0.1 .
b75a7d8f
A
34 *
35 * This means that there is a UBiDi object with a levels
36 * and a dirProps array.
37 * paraLevel and direction are also set.
38 * Only if the length of the text is zero, then levels==dirProps==NULL.
39 *
40 * The overall directionality of the paragraph
41 * or line is used to bypass the reordering steps if possible.
42 * Even purely RTL text does not need reordering there because
43 * the ubidi_getLogical/VisualIndex() functions can compute the
44 * index on the fly in such a case.
45 *
46 * The implementation of the access to same-level-runs and of the reordering
47 * do attempt to provide better performance and less memory usage compared to
48 * a direct implementation of especially rule (L2) with an array of
49 * one (32-bit) integer per text character.
50 *
51 * Here, the levels array is scanned as soon as necessary, and a vector of
52 * same-level-runs is created. Reordering then is done on this vector.
53 * For each run of text positions that were resolved to the same level,
54 * only 8 bytes are stored: the first text position of the run and the visual
55 * position behind the run after reordering.
56 * One sign bit is used to hold the directionality of the run.
57 * This is inefficient if there are many very short runs. If the average run
58 * length is <2, then this uses more memory.
59 *
60 * In a further attempt to save memory, the levels array is never changed
61 * after all the resolution rules (Xn, Wn, Nn, In).
62 * Many functions have to consider the field trailingWSStart:
63 * if it is less than length, then there is an implicit trailing run
64 * at the paraLevel,
65 * which is not reflected in the levels array.
66 * This allows a line UBiDi object to use the same levels array as
67 * its paragraph parent object.
68 *
69 * When a UBiDi object is created for a line of a paragraph, then the
70 * paragraph's levels and dirProps arrays are reused by way of setting
71 * a pointer into them, not by copying. This again saves memory and forbids to
72 * change the now shared levels for (L1).
73 */
74
374ca955 75/* handle trailing WS (L1) -------------------------------------------------- */
b75a7d8f 76
374ca955
A
77/*
78 * setTrailingWSStart() sets the start index for a trailing
79 * run of WS in the line. This is necessary because we do not modify
80 * the paragraph's levels array that we just point into.
81 * Using trailingWSStart is another form of performing (L1).
82 *
83 * To make subsequent operations easier, we also include the run
84 * before the WS if it is at the paraLevel - we merge the two here.
73c04bcf
A
85 *
86 * This function is called only from ubidi_setLine(), so pBiDi->paraLevel is
87 * set correctly for the line even when contextual multiple paragraphs.
374ca955 88 */
b75a7d8f 89static void
374ca955
A
90setTrailingWSStart(UBiDi *pBiDi) {
91 /* pBiDi->direction!=UBIDI_MIXED */
b75a7d8f 92
374ca955
A
93 const DirProp *dirProps=pBiDi->dirProps;
94 UBiDiLevel *levels=pBiDi->levels;
95 int32_t start=pBiDi->length;
96 UBiDiLevel paraLevel=pBiDi->paraLevel;
b75a7d8f 97
73c04bcf
A
98 /* If the line is terminated by a block separator, all preceding WS etc...
99 are already set to paragraph level.
100 Setting trailingWSStart to pBidi->length will avoid changing the
101 level of B chars from 0 to paraLevel in ubidi_getLevels when
102 orderParagraphsLTR==TRUE.
103 */
104 if(NO_CONTEXT_RTL(dirProps[start-1])==B) {
105 pBiDi->trailingWSStart=start; /* currently == pBiDi->length */
106 return;
107 }
374ca955 108 /* go backwards across all WS, BN, explicit codes */
73c04bcf 109 while(start>0 && DIRPROP_FLAG_NC(dirProps[start-1])&MASK_WS) {
374ca955
A
110 --start;
111 }
b75a7d8f 112
374ca955
A
113 /* if the WS run can be merged with the previous run then do so here */
114 while(start>0 && levels[start-1]==paraLevel) {
115 --start;
116 }
117
118 pBiDi->trailingWSStart=start;
119}
b75a7d8f
A
120
121/* ubidi_setLine ------------------------------------------------------------ */
122
123U_CAPI void U_EXPORT2
124ubidi_setLine(const UBiDi *pParaBiDi,
125 int32_t start, int32_t limit,
126 UBiDi *pLineBiDi,
127 UErrorCode *pErrorCode) {
128 int32_t length;
129
130 /* check the argument values */
46f4442e
A
131 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
132 RETURN_VOID_IF_NOT_VALID_PARA(pParaBiDi, *pErrorCode);
133 RETURN_VOID_IF_BAD_RANGE(start, 0, limit, *pErrorCode);
134 RETURN_VOID_IF_BAD_RANGE(limit, 0, pParaBiDi->length+1, *pErrorCode);
135 if(pLineBiDi==NULL) {
b75a7d8f
A
136 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
137 return;
46f4442e
A
138 }
139 if(ubidi_getParagraph(pParaBiDi, start, NULL, NULL, NULL, pErrorCode) !=
140 ubidi_getParagraph(pParaBiDi, limit-1, NULL, NULL, NULL, pErrorCode)) {
73c04bcf
A
141 /* the line crosses a paragraph boundary */
142 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
143 return;
b75a7d8f
A
144 }
145
146 /* set the values in pLineBiDi from its pParaBiDi parent */
73c04bcf 147 pLineBiDi->pParaBiDi=NULL; /* mark unfinished setLine */
b75a7d8f
A
148 pLineBiDi->text=pParaBiDi->text+start;
149 length=pLineBiDi->length=limit-start;
73c04bcf
A
150 pLineBiDi->resultLength=pLineBiDi->originalLength=length;
151 pLineBiDi->paraLevel=GET_PARALEVEL(pParaBiDi, start);
152 pLineBiDi->paraCount=pParaBiDi->paraCount;
b75a7d8f
A
153 pLineBiDi->runs=NULL;
154 pLineBiDi->flags=0;
73c04bcf
A
155 pLineBiDi->reorderingMode=pParaBiDi->reorderingMode;
156 pLineBiDi->reorderingOptions=pParaBiDi->reorderingOptions;
157 pLineBiDi->controlCount=0;
158 if(pParaBiDi->controlCount>0) {
159 int32_t j;
160 for(j=start; j<limit; j++) {
161 if(IS_BIDI_CONTROL_CHAR(pParaBiDi->text[j])) {
162 pLineBiDi->controlCount++;
163 }
164 }
46f4442e 165 pLineBiDi->resultLength-=pLineBiDi->controlCount;
73c04bcf 166 }
b75a7d8f 167
46f4442e
A
168 pLineBiDi->dirProps=pParaBiDi->dirProps+start;
169 pLineBiDi->levels=pParaBiDi->levels+start;
170 pLineBiDi->runCount=-1;
b75a7d8f 171
46f4442e
A
172 if(pParaBiDi->direction!=UBIDI_MIXED) {
173 /* the parent is already trivial */
174 pLineBiDi->direction=pParaBiDi->direction;
b75a7d8f 175
46f4442e
A
176 /*
177 * The parent's levels are all either
178 * implicitly or explicitly ==paraLevel;
179 * do the same here.
180 */
181 if(pParaBiDi->trailingWSStart<=start) {
182 pLineBiDi->trailingWSStart=0;
183 } else if(pParaBiDi->trailingWSStart<limit) {
184 pLineBiDi->trailingWSStart=pParaBiDi->trailingWSStart-start;
b75a7d8f 185 } else {
46f4442e
A
186 pLineBiDi->trailingWSStart=length;
187 }
188 } else {
189 const UBiDiLevel *levels=pLineBiDi->levels;
190 int32_t i, trailingWSStart;
191 UBiDiLevel level;
192
193 setTrailingWSStart(pLineBiDi);
194 trailingWSStart=pLineBiDi->trailingWSStart;
b75a7d8f 195
46f4442e
A
196 /* recalculate pLineBiDi->direction */
197 if(trailingWSStart==0) {
198 /* all levels are at paraLevel */
199 pLineBiDi->direction=(UBiDiDirection)(pLineBiDi->paraLevel&1);
200 } else {
201 /* get the level of the first character */
202 level=(UBiDiLevel)(levels[0]&1);
b75a7d8f 203
46f4442e
A
204 /* if there is anything of a different level, then the line is mixed */
205 if(trailingWSStart<length && (pLineBiDi->paraLevel&1)!=level) {
206 /* the trailing WS is at paraLevel, which differs from levels[0] */
207 pLineBiDi->direction=UBIDI_MIXED;
b75a7d8f 208 } else {
46f4442e
A
209 /* see if levels[1..trailingWSStart-1] have the same direction as levels[0] and paraLevel */
210 i=1;
211 for(;;) {
212 if(i==trailingWSStart) {
213 /* the direction values match those in level */
214 pLineBiDi->direction=(UBiDiDirection)level;
215 break;
216 } else if((levels[i]&1)!=level) {
217 pLineBiDi->direction=UBIDI_MIXED;
218 break;
b75a7d8f 219 }
46f4442e 220 ++i;
b75a7d8f
A
221 }
222 }
46f4442e 223 }
b75a7d8f 224
46f4442e
A
225 switch(pLineBiDi->direction) {
226 case UBIDI_LTR:
227 /* make sure paraLevel is even */
228 pLineBiDi->paraLevel=(UBiDiLevel)((pLineBiDi->paraLevel+1)&~1);
b75a7d8f 229
46f4442e
A
230 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
231 pLineBiDi->trailingWSStart=0;
232 break;
233 case UBIDI_RTL:
234 /* make sure paraLevel is odd */
235 pLineBiDi->paraLevel|=1;
b75a7d8f 236
46f4442e
A
237 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
238 pLineBiDi->trailingWSStart=0;
239 break;
240 default:
241 break;
b75a7d8f 242 }
b75a7d8f 243 }
73c04bcf 244 pLineBiDi->pParaBiDi=pParaBiDi; /* mark successful setLine */
b75a7d8f
A
245 return;
246}
247
248U_CAPI UBiDiLevel U_EXPORT2
249ubidi_getLevelAt(const UBiDi *pBiDi, int32_t charIndex) {
250 /* return paraLevel if in the trailing WS run, otherwise the real level */
73c04bcf 251 if(!IS_VALID_PARA_OR_LINE(pBiDi) || charIndex<0 || pBiDi->length<=charIndex) {
b75a7d8f
A
252 return 0;
253 } else if(pBiDi->direction!=UBIDI_MIXED || charIndex>=pBiDi->trailingWSStart) {
73c04bcf 254 return GET_PARALEVEL(pBiDi, charIndex);
b75a7d8f
A
255 } else {
256 return pBiDi->levels[charIndex];
257 }
258}
259
260U_CAPI const UBiDiLevel * U_EXPORT2
261ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
262 int32_t start, length;
263
46f4442e
A
264 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, NULL);
265 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, NULL);
266 if((length=pBiDi->length)<=0) {
b75a7d8f
A
267 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
268 return NULL;
269 }
b75a7d8f
A
270 if((start=pBiDi->trailingWSStart)==length) {
271 /* the current levels array reflects the WS run */
272 return pBiDi->levels;
273 }
274
275 /*
276 * After the previous if(), we know that the levels array
277 * has an implicit trailing WS run and therefore does not fully
278 * reflect itself all the levels.
279 * This must be a UBiDi object for a line, and
280 * we need to create a new levels array.
281 */
b75a7d8f
A
282 if(getLevelsMemory(pBiDi, length)) {
283 UBiDiLevel *levels=pBiDi->levelsMemory;
284
285 if(start>0 && levels!=pBiDi->levels) {
286 uprv_memcpy(levels, pBiDi->levels, start);
287 }
73c04bcf
A
288 /* pBiDi->paraLevel is ok even if contextual multiple paragraphs,
289 since pBidi is a line object */
b75a7d8f
A
290 uprv_memset(levels+start, pBiDi->paraLevel, length-start);
291
292 /* this new levels array is set for the line and reflects the WS run */
293 pBiDi->trailingWSStart=length;
294 return pBiDi->levels=levels;
295 } else {
296 /* out of memory */
297 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
298 return NULL;
299 }
300}
301
302U_CAPI void U_EXPORT2
46f4442e 303ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalPosition,
b75a7d8f 304 int32_t *pLogicalLimit, UBiDiLevel *pLevel) {
46f4442e
A
305 UErrorCode errorCode;
306 int32_t runCount, visualStart, logicalLimit, logicalFirst, i;
307 Run iRun;
308
309 errorCode=U_ZERO_ERROR;
310 RETURN_VOID_IF_BAD_RANGE(logicalPosition, 0, pBiDi->length, errorCode);
311 /* ubidi_countRuns will check VALID_PARA_OR_LINE */
312 runCount=ubidi_countRuns((UBiDi *)pBiDi, &errorCode);
313 if(U_FAILURE(errorCode)) {
b75a7d8f
A
314 return;
315 }
46f4442e
A
316 /* this is done based on runs rather than on levels since levels have
317 a special interpretation when UBIDI_REORDER_RUNS_ONLY
318 */
319 visualStart=logicalLimit=0;
320 iRun=pBiDi->runs[0];
b75a7d8f 321
46f4442e
A
322 for(i=0; i<runCount; i++) {
323 iRun = pBiDi->runs[i];
324 logicalFirst=GET_INDEX(iRun.logicalStart);
325 logicalLimit=logicalFirst+iRun.visualLimit-visualStart;
326 if((logicalPosition>=logicalFirst) &&
327 (logicalPosition<logicalLimit)) {
328 break;
b75a7d8f 329 }
46f4442e
A
330 visualStart = iRun.visualLimit;
331 }
332 if(pLogicalLimit) {
333 *pLogicalLimit=logicalLimit;
334 }
335 if(pLevel) {
336 if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) {
337 *pLevel=(UBiDiLevel)GET_ODD_BIT(iRun.logicalStart);
b75a7d8f 338 }
46f4442e
A
339 else if(pBiDi->direction!=UBIDI_MIXED || logicalPosition>=pBiDi->trailingWSStart) {
340 *pLevel=GET_PARALEVEL(pBiDi, logicalPosition);
341 } else {
342 *pLevel=pBiDi->levels[logicalPosition];
b75a7d8f
A
343 }
344 }
345}
346
b75a7d8f
A
347/* runs API functions ------------------------------------------------------- */
348
349U_CAPI int32_t U_EXPORT2
350ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode) {
46f4442e
A
351 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
352 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
353 ubidi_getRuns(pBiDi, pErrorCode);
354 if(U_FAILURE(*pErrorCode)) {
b75a7d8f 355 return -1;
b75a7d8f 356 }
46f4442e 357 return pBiDi->runCount;
b75a7d8f
A
358}
359
360U_CAPI UBiDiDirection U_EXPORT2
361ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex,
46f4442e
A
362 int32_t *pLogicalStart, int32_t *pLength)
363{
364 int32_t start;
365 UErrorCode errorCode = U_ZERO_ERROR;
366 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, errorCode, UBIDI_LTR);
367 ubidi_getRuns(pBiDi, &errorCode);
368 if(U_FAILURE(errorCode)) {
b75a7d8f 369 return UBIDI_LTR;
46f4442e
A
370 }
371 RETURN_IF_BAD_RANGE(runIndex, 0, pBiDi->runCount, errorCode, UBIDI_LTR);
372
373 start=pBiDi->runs[runIndex].logicalStart;
374 if(pLogicalStart!=NULL) {
375 *pLogicalStart=GET_INDEX(start);
376 }
377 if(pLength!=NULL) {
378 if(runIndex>0) {
379 *pLength=pBiDi->runs[runIndex].visualLimit-
380 pBiDi->runs[runIndex-1].visualLimit;
381 } else {
382 *pLength=pBiDi->runs[0].visualLimit;
b75a7d8f 383 }
b75a7d8f 384 }
46f4442e 385 return (UBiDiDirection)GET_ODD_BIT(start);
b75a7d8f
A
386}
387
374ca955
A
388/* in trivial cases there is only one trivial run; called by ubidi_getRuns() */
389static void
390getSingleRun(UBiDi *pBiDi, UBiDiLevel level) {
391 /* simple, single-run case */
392 pBiDi->runs=pBiDi->simpleRuns;
393 pBiDi->runCount=1;
394
395 /* fill and reorder the single run */
396 pBiDi->runs[0].logicalStart=MAKE_INDEX_ODD_PAIR(0, level);
397 pBiDi->runs[0].visualLimit=pBiDi->length;
73c04bcf 398 pBiDi->runs[0].insertRemove=0;
374ca955
A
399}
400
401/* reorder the runs array (L2) ---------------------------------------------- */
402
403/*
404 * Reorder the same-level runs in the runs array.
405 * Here, runCount>1 and maxLevel>=minLevel>=paraLevel.
406 * All the visualStart fields=logical start before reordering.
407 * The "odd" bits are not set yet.
408 *
409 * Reordering with this data structure lends itself to some handy shortcuts:
410 *
411 * Since each run is moved but not modified, and since at the initial maxLevel
412 * each sequence of same-level runs consists of only one run each, we
413 * don't need to do anything there and can predecrement maxLevel.
414 * In many simple cases, the reordering is thus done entirely in the
415 * index mapping.
416 * Also, reordering occurs only down to the lowest odd level that occurs,
417 * which is minLevel|1. However, if the lowest level itself is odd, then
418 * in the last reordering the sequence of the runs at this level or higher
419 * will be all runs, and we don't need the elaborate loop to search for them.
420 * This is covered by ++minLevel instead of minLevel|=1 followed
421 * by an extra reorder-all after the reorder-some loop.
422 * About a trailing WS run:
423 * Such a run would need special treatment because its level is not
424 * reflected in levels[] if this is not a paragraph object.
425 * Instead, all characters from trailingWSStart on are implicitly at
426 * paraLevel.
427 * However, for all maxLevel>paraLevel, this run will never be reordered
428 * and does not need to be taken into account. maxLevel==paraLevel is only reordered
429 * if minLevel==paraLevel is odd, which is done in the extra segment.
430 * This means that for the main reordering loop we don't need to consider
431 * this run and can --runCount. If it is later part of the all-runs
432 * reordering, then runCount is adjusted accordingly.
433 */
434static void
435reorderLine(UBiDi *pBiDi, UBiDiLevel minLevel, UBiDiLevel maxLevel) {
73c04bcf 436 Run *runs, tempRun;
374ca955 437 UBiDiLevel *levels;
73c04bcf 438 int32_t firstRun, endRun, limitRun, runCount;
374ca955
A
439
440 /* nothing to do? */
441 if(maxLevel<=(minLevel|1)) {
442 return;
443 }
444
445 /*
446 * Reorder only down to the lowest odd level
447 * and reorder at an odd minLevel in a separate, simpler loop.
448 * See comments above for why minLevel is always incremented.
449 */
450 ++minLevel;
451
452 runs=pBiDi->runs;
453 levels=pBiDi->levels;
454 runCount=pBiDi->runCount;
455
456 /* do not include the WS run at paraLevel<=old minLevel except in the simple loop */
457 if(pBiDi->trailingWSStart<pBiDi->length) {
458 --runCount;
459 }
460
461 while(--maxLevel>=minLevel) {
462 firstRun=0;
463
464 /* loop for all sequences of runs */
465 for(;;) {
466 /* look for a sequence of runs that are all at >=maxLevel */
467 /* look for the first run of such a sequence */
468 while(firstRun<runCount && levels[runs[firstRun].logicalStart]<maxLevel) {
469 ++firstRun;
470 }
471 if(firstRun>=runCount) {
472 break; /* no more such runs */
473 }
474
475 /* look for the limit run of such a sequence (the run behind it) */
476 for(limitRun=firstRun; ++limitRun<runCount && levels[runs[limitRun].logicalStart]>=maxLevel;) {}
477
478 /* Swap the entire sequence of runs from firstRun to limitRun-1. */
479 endRun=limitRun-1;
480 while(firstRun<endRun) {
73c04bcf
A
481 tempRun = runs[firstRun];
482 runs[firstRun]=runs[endRun];
483 runs[endRun]=tempRun;
374ca955
A
484 ++firstRun;
485 --endRun;
486 }
487
488 if(limitRun==runCount) {
489 break; /* no more such runs */
490 } else {
491 firstRun=limitRun+1;
492 }
493 }
494 }
495
496 /* now do maxLevel==old minLevel (==odd!), see above */
497 if(!(minLevel&1)) {
498 firstRun=0;
499
500 /* include the trailing WS run in this complete reordering */
501 if(pBiDi->trailingWSStart==pBiDi->length) {
502 --runCount;
503 }
504
505 /* Swap the entire sequence of all runs. (endRun==runCount) */
506 while(firstRun<runCount) {
73c04bcf
A
507 tempRun=runs[firstRun];
508 runs[firstRun]=runs[runCount];
509 runs[runCount]=tempRun;
374ca955
A
510 ++firstRun;
511 --runCount;
512 }
513 }
514}
515
b75a7d8f
A
516/* compute the runs array --------------------------------------------------- */
517
46f4442e 518static int32_t getRunFromLogicalIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode) {
73c04bcf
A
519 Run *runs=pBiDi->runs;
520 int32_t runCount=pBiDi->runCount, visualStart=0, i, length, logicalStart;
521
522 for(i=0; i<runCount; i++) {
523 length=runs[i].visualLimit-visualStart;
524 logicalStart=GET_INDEX(runs[i].logicalStart);
525 if((logicalIndex>=logicalStart) && (logicalIndex<(logicalStart+length))) {
526 return i;
527 }
528 visualStart+=length;
529 }
530 /* we should never get here */
46f4442e
A
531 U_ASSERT(FALSE);
532 *pErrorCode = U_INVALID_STATE_ERROR;
73c04bcf
A
533 return 0;
534}
535
b75a7d8f
A
536/*
537 * Compute the runs array from the levels array.
538 * After ubidi_getRuns() returns TRUE, runCount is guaranteed to be >0
539 * and the runs are reordered.
540 * Odd-level runs have visualStart on their visual right edge and
541 * they progress visually to the left.
73c04bcf
A
542 * If option UBIDI_OPTION_INSERT_MARKS is set, insertRemove will contain the
543 * sum of appropriate LRM/RLM_BEFORE/AFTER flags.
544 * If option UBIDI_OPTION_REMOVE_CONTROLS is set, insertRemove will contain the
545 * negative number of BiDi control characters within this run.
b75a7d8f
A
546 */
547U_CFUNC UBool
46f4442e
A
548ubidi_getRuns(UBiDi *pBiDi, UErrorCode *pErrorCode) {
549 /*
550 * This method returns immediately if the runs are already set. This
551 * includes the case of length==0 (handled in setPara)..
552 */
553 if (pBiDi->runCount>=0) {
554 return TRUE;
555 }
556
b75a7d8f
A
557 if(pBiDi->direction!=UBIDI_MIXED) {
558 /* simple, single-run case - this covers length==0 */
73c04bcf 559 /* pBiDi->paraLevel is ok even for contextual multiple paragraphs */
b75a7d8f
A
560 getSingleRun(pBiDi, pBiDi->paraLevel);
561 } else /* UBIDI_MIXED, length>0 */ {
562 /* mixed directionality */
563 int32_t length=pBiDi->length, limit;
46f4442e
A
564 UBiDiLevel *levels=pBiDi->levels;
565 int32_t i, runCount;
566 UBiDiLevel level=UBIDI_DEFAULT_LTR; /* initialize with no valid level */
b75a7d8f
A
567 /*
568 * If there are WS characters at the end of the line
569 * and the run preceding them has a level different from
570 * paraLevel, then they will form their own run at paraLevel (L1).
571 * Count them separately.
572 * We need some special treatment for this in order to not
573 * modify the levels array which a line UBiDi object shares
574 * with its paragraph parent and its other line siblings.
575 * In other words, for the trailing WS, it may be
576 * levels[]!=paraLevel but we have to treat it like it were so.
577 */
578 limit=pBiDi->trailingWSStart;
46f4442e
A
579 /* count the runs, there is at least one non-WS run, and limit>0 */
580 runCount=0;
581 for(i=0; i<limit; ++i) {
582 /* increment runCount at the start of each run */
583 if(levels[i]!=level) {
584 ++runCount;
585 level=levels[i];
b75a7d8f 586 }
46f4442e 587 }
b75a7d8f 588
46f4442e
A
589 /*
590 * We don't need to see if the last run can be merged with a trailing
591 * WS run because setTrailingWSStart() would have done that.
592 */
593 if(runCount==1 && limit==length) {
594 /* There is only one non-WS run and no trailing WS-run. */
595 getSingleRun(pBiDi, levels[0]);
596 } else /* runCount>1 || limit<length */ {
597 /* allocate and set the runs */
598 Run *runs;
599 int32_t runIndex, start;
600 UBiDiLevel minLevel=UBIDI_MAX_EXPLICIT_LEVEL+1, maxLevel=0;
601
602 /* now, count a (non-mergeable) WS run */
603 if(limit<length) {
604 ++runCount;
605 }
606
607 /* runCount>1 */
608 if(getRunsMemory(pBiDi, runCount)) {
609 runs=pBiDi->runsMemory;
610 } else {
611 return FALSE;
612 }
613
614 /* set the runs */
615 /* FOOD FOR THOUGHT: this could be optimized, e.g.:
616 * 464->444, 484->444, 575->555, 595->555
617 * However, that would take longer. Check also how it would
618 * interact with BiDi control removal and inserting Marks.
b75a7d8f 619 */
46f4442e 620 runIndex=0;
b75a7d8f 621
46f4442e
A
622 /* search for the run limits and initialize visualLimit values with the run lengths */
623 i=0;
624 do {
625 /* prepare this run */
626 start=i;
627 level=levels[i];
628 if(level<minLevel) {
629 minLevel=level;
630 }
631 if(level>maxLevel) {
632 maxLevel=level;
b75a7d8f
A
633 }
634
46f4442e
A
635 /* look for the run limit */
636 while(++i<limit && levels[i]==level) {}
637
638 /* i is another run limit */
639 runs[runIndex].logicalStart=start;
640 runs[runIndex].visualLimit=i-start;
641 runs[runIndex].insertRemove=0;
642 ++runIndex;
643 } while(i<limit);
644
645 if(limit<length) {
646 /* there is a separate WS run */
647 runs[runIndex].logicalStart=limit;
648 runs[runIndex].visualLimit=length-limit;
649 /* For the trailing WS run, pBiDi->paraLevel is ok even
650 if contextual multiple paragraphs. */
651 if(pBiDi->paraLevel<minLevel) {
652 minLevel=pBiDi->paraLevel;
b75a7d8f 653 }
46f4442e 654 }
b75a7d8f 655
46f4442e
A
656 /* set the object fields */
657 pBiDi->runs=runs;
658 pBiDi->runCount=runCount;
b75a7d8f 659
46f4442e 660 reorderLine(pBiDi, minLevel, maxLevel);
b75a7d8f 661
46f4442e
A
662 /* now add the direction flags and adjust the visualLimit's to be just that */
663 /* this loop will also handle the trailing WS run */
664 limit=0;
665 for(i=0; i<runCount; ++i) {
666 ADD_ODD_BIT_FROM_LEVEL(runs[i].logicalStart, levels[runs[i].logicalStart]);
729e4ab9
A
667 limit+=runs[i].visualLimit;
668 runs[i].visualLimit=limit;
46f4442e 669 }
b75a7d8f 670
46f4442e
A
671 /* Set the "odd" bit for the trailing WS run. */
672 /* For a RTL paragraph, it will be the *first* run in visual order. */
673 /* For the trailing WS run, pBiDi->paraLevel is ok even if
674 contextual multiple paragraphs. */
675 if(runIndex<runCount) {
676 int32_t trailingRun = ((pBiDi->paraLevel & 1) != 0)? 0 : runIndex;
374ca955 677
46f4442e 678 ADD_ODD_BIT_FROM_LEVEL(runs[trailingRun].logicalStart, pBiDi->paraLevel);
b75a7d8f
A
679 }
680 }
681 }
73c04bcf
A
682
683 /* handle insert LRM/RLM BEFORE/AFTER run */
684 if(pBiDi->insertPoints.size>0) {
685 Point *point, *start=pBiDi->insertPoints.points,
686 *limit=start+pBiDi->insertPoints.size;
687 int32_t runIndex;
688 for(point=start; point<limit; point++) {
46f4442e 689 runIndex=getRunFromLogicalIndex(pBiDi, point->pos, pErrorCode);
73c04bcf
A
690 pBiDi->runs[runIndex].insertRemove|=point->flag;
691 }
692 }
693
694 /* handle remove BiDi control characters */
695 if(pBiDi->controlCount>0) {
696 int32_t runIndex;
697 const UChar *start=pBiDi->text, *limit=start+pBiDi->length, *pu;
698 for(pu=start; pu<limit; pu++) {
699 if(IS_BIDI_CONTROL_CHAR(*pu)) {
729e4ab9 700 runIndex=getRunFromLogicalIndex(pBiDi, (int32_t)(pu-start), pErrorCode);
73c04bcf
A
701 pBiDi->runs[runIndex].insertRemove--;
702 }
703 }
704 }
705
b75a7d8f
A
706 return TRUE;
707}
708
374ca955
A
709static UBool
710prepareReorder(const UBiDiLevel *levels, int32_t length,
711 int32_t *indexMap,
712 UBiDiLevel *pMinLevel, UBiDiLevel *pMaxLevel) {
713 int32_t start;
714 UBiDiLevel level, minLevel, maxLevel;
b75a7d8f 715
374ca955
A
716 if(levels==NULL || length<=0) {
717 return FALSE;
b75a7d8f
A
718 }
719
374ca955
A
720 /* determine minLevel and maxLevel */
721 minLevel=UBIDI_MAX_EXPLICIT_LEVEL+1;
722 maxLevel=0;
723 for(start=length; start>0;) {
724 level=levels[--start];
725 if(level>UBIDI_MAX_EXPLICIT_LEVEL+1) {
726 return FALSE;
b75a7d8f 727 }
374ca955
A
728 if(level<minLevel) {
729 minLevel=level;
b75a7d8f 730 }
374ca955
A
731 if(level>maxLevel) {
732 maxLevel=level;
b75a7d8f
A
733 }
734 }
374ca955
A
735 *pMinLevel=minLevel;
736 *pMaxLevel=maxLevel;
737
738 /* initialize the index map */
739 for(start=length; start>0;) {
740 --start;
741 indexMap[start]=start;
742 }
743
744 return TRUE;
b75a7d8f
A
745}
746
747/* reorder a line based on a levels array (L2) ------------------------------ */
748
749U_CAPI void U_EXPORT2
750ubidi_reorderLogical(const UBiDiLevel *levels, int32_t length, int32_t *indexMap) {
751 int32_t start, limit, sumOfSosEos;
729e4ab9 752 UBiDiLevel minLevel = 0, maxLevel = 0;
b75a7d8f
A
753
754 if(indexMap==NULL || !prepareReorder(levels, length, indexMap, &minLevel, &maxLevel)) {
755 return;
756 }
757
758 /* nothing to do? */
759 if(minLevel==maxLevel && (minLevel&1)==0) {
760 return;
761 }
762
763 /* reorder only down to the lowest odd level */
764 minLevel|=1;
765
766 /* loop maxLevel..minLevel */
767 do {
768 start=0;
769
770 /* loop for all sequences of levels to reorder at the current maxLevel */
771 for(;;) {
772 /* look for a sequence of levels that are all at >=maxLevel */
773 /* look for the first index of such a sequence */
774 while(start<length && levels[start]<maxLevel) {
775 ++start;
776 }
777 if(start>=length) {
778 break; /* no more such sequences */
779 }
780
781 /* look for the limit of such a sequence (the index behind it) */
782 for(limit=start; ++limit<length && levels[limit]>=maxLevel;) {}
783
784 /*
785 * sos=start of sequence, eos=end of sequence
786 *
787 * The closed (inclusive) interval from sos to eos includes all the logical
788 * and visual indexes within this sequence. They are logically and
789 * visually contiguous and in the same range.
790 *
791 * For each run, the new visual index=sos+eos-old visual index;
792 * we pre-add sos+eos into sumOfSosEos ->
793 * new visual index=sumOfSosEos-old visual index;
794 */
795 sumOfSosEos=start+limit-1;
796
797 /* reorder each index in the sequence */
798 do {
799 indexMap[start]=sumOfSosEos-indexMap[start];
800 } while(++start<limit);
801
802 /* start==limit */
803 if(limit==length) {
804 break; /* no more such sequences */
805 } else {
806 start=limit+1;
807 }
808 }
809 } while(--maxLevel>=minLevel);
810}
811
812U_CAPI void U_EXPORT2
813ubidi_reorderVisual(const UBiDiLevel *levels, int32_t length, int32_t *indexMap) {
814 int32_t start, end, limit, temp;
729e4ab9 815 UBiDiLevel minLevel = 0, maxLevel = 0;
b75a7d8f
A
816
817 if(indexMap==NULL || !prepareReorder(levels, length, indexMap, &minLevel, &maxLevel)) {
818 return;
819 }
820
821 /* nothing to do? */
822 if(minLevel==maxLevel && (minLevel&1)==0) {
823 return;
824 }
825
826 /* reorder only down to the lowest odd level */
827 minLevel|=1;
828
829 /* loop maxLevel..minLevel */
830 do {
831 start=0;
832
833 /* loop for all sequences of levels to reorder at the current maxLevel */
834 for(;;) {
835 /* look for a sequence of levels that are all at >=maxLevel */
836 /* look for the first index of such a sequence */
837 while(start<length && levels[start]<maxLevel) {
838 ++start;
839 }
840 if(start>=length) {
841 break; /* no more such runs */
842 }
843
844 /* look for the limit of such a sequence (the index behind it) */
845 for(limit=start; ++limit<length && levels[limit]>=maxLevel;) {}
846
847 /*
848 * Swap the entire interval of indexes from start to limit-1.
849 * We don't need to swap the levels for the purpose of this
850 * algorithm: the sequence of levels that we look at does not
851 * move anyway.
852 */
853 end=limit-1;
854 while(start<end) {
855 temp=indexMap[start];
856 indexMap[start]=indexMap[end];
857 indexMap[end]=temp;
858
859 ++start;
860 --end;
861 }
862
863 if(limit==length) {
864 break; /* no more such sequences */
865 } else {
866 start=limit+1;
867 }
868 }
869 } while(--maxLevel>=minLevel);
870}
871
b75a7d8f
A
872/* API functions for logical<->visual mapping ------------------------------- */
873
874U_CAPI int32_t U_EXPORT2
875ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode) {
46f4442e
A
876 int32_t visualIndex=UBIDI_MAP_NOWHERE;
877 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
878 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
879 RETURN_IF_BAD_RANGE(logicalIndex, 0, pBiDi->length, *pErrorCode, -1);
880
881 /* we can do the trivial cases without the runs array */
882 switch(pBiDi->direction) {
883 case UBIDI_LTR:
884 visualIndex=logicalIndex;
885 break;
886 case UBIDI_RTL:
887 visualIndex=pBiDi->length-logicalIndex-1;
888 break;
889 default:
890 if(!ubidi_getRuns(pBiDi, pErrorCode)) {
891 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
892 return -1;
893 } else {
894 Run *runs=pBiDi->runs;
895 int32_t i, visualStart=0, offset, length;
896
897 /* linear search for the run, search on the visual runs */
898 for(i=0; i<pBiDi->runCount; ++i) {
899 length=runs[i].visualLimit-visualStart;
900 offset=logicalIndex-GET_INDEX(runs[i].logicalStart);
901 if(offset>=0 && offset<length) {
902 if(IS_EVEN_RUN(runs[i].logicalStart)) {
903 /* LTR */
904 visualIndex=visualStart+offset;
905 } else {
906 /* RTL */
907 visualIndex=visualStart+length-offset-1;
b75a7d8f 908 }
46f4442e 909 break; /* exit for loop */
b75a7d8f 910 }
46f4442e
A
911 visualStart+=length;
912 }
913 if(i>=pBiDi->runCount) {
914 return UBIDI_MAP_NOWHERE;
b75a7d8f
A
915 }
916 }
917 }
73c04bcf
A
918
919 if(pBiDi->insertPoints.size>0) {
920 /* add the number of added marks until the calculated visual index */
921 Run *runs=pBiDi->runs;
922 int32_t i, length, insertRemove;
923 int32_t visualStart=0, markFound=0;
924 for(i=0; ; i++, visualStart+=length) {
925 length=runs[i].visualLimit-visualStart;
926 insertRemove=runs[i].insertRemove;
927 if(insertRemove & (LRM_BEFORE|RLM_BEFORE)) {
928 markFound++;
929 }
930 /* is it the run containing the visual index? */
931 if(visualIndex<runs[i].visualLimit) {
932 return visualIndex+markFound;
933 }
934 if(insertRemove & (LRM_AFTER|RLM_AFTER)) {
935 markFound++;
936 }
937 }
938 }
939 else if(pBiDi->controlCount>0) {
940 /* subtract the number of controls until the calculated visual index */
941 Run *runs=pBiDi->runs;
942 int32_t i, j, start, limit, length, insertRemove;
943 int32_t visualStart=0, controlFound=0;
944 UChar uchar=pBiDi->text[logicalIndex];
945 /* is the logical index pointing to a control ? */
946 if(IS_BIDI_CONTROL_CHAR(uchar)) {
947 return UBIDI_MAP_NOWHERE;
948 }
949 /* loop on runs */
950 for(i=0; ; i++, visualStart+=length) {
951 length=runs[i].visualLimit-visualStart;
952 insertRemove=runs[i].insertRemove;
953 /* calculated visual index is beyond this run? */
954 if(visualIndex>=runs[i].visualLimit) {
955 controlFound-=insertRemove;
956 continue;
957 }
958 /* calculated visual index must be within current run */
959 if(insertRemove==0) {
960 return visualIndex-controlFound;
961 }
962 if(IS_EVEN_RUN(runs[i].logicalStart)) {
963 /* LTR: check from run start to logical index */
964 start=runs[i].logicalStart;
965 limit=logicalIndex;
966 } else {
967 /* RTL: check from logical index to run end */
968 start=logicalIndex+1;
46f4442e 969 limit=GET_INDEX(runs[i].logicalStart)+length;
73c04bcf
A
970 }
971 for(j=start; j<limit; j++) {
972 uchar=pBiDi->text[j];
973 if(IS_BIDI_CONTROL_CHAR(uchar)) {
974 controlFound++;
975 }
976 }
977 return visualIndex-controlFound;
978 }
979 }
980
981 return visualIndex;
b75a7d8f
A
982}
983
984U_CAPI int32_t U_EXPORT2
985ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode) {
73c04bcf
A
986 Run *runs;
987 int32_t i, runCount, start;
46f4442e
A
988 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
989 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
990 RETURN_IF_BAD_RANGE(visualIndex, 0, pBiDi->resultLength, *pErrorCode, -1);
73c04bcf
A
991 /* we can do the trivial cases without the runs array */
992 if(pBiDi->insertPoints.size==0 && pBiDi->controlCount==0) {
993 if(pBiDi->direction==UBIDI_LTR) {
b75a7d8f 994 return visualIndex;
73c04bcf
A
995 }
996 else if(pBiDi->direction==UBIDI_RTL) {
b75a7d8f 997 return pBiDi->length-visualIndex-1;
73c04bcf 998 }
46f4442e
A
999 }
1000 if(!ubidi_getRuns(pBiDi, pErrorCode)) {
1001 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1002 return -1;
73c04bcf 1003 }
b75a7d8f 1004
73c04bcf
A
1005 runs=pBiDi->runs;
1006 runCount=pBiDi->runCount;
1007 if(pBiDi->insertPoints.size>0) {
1008 /* handle inserted LRM/RLM */
1009 int32_t markFound=0, insertRemove;
1010 int32_t visualStart=0, length;
1011 runs=pBiDi->runs;
1012 /* subtract number of marks until visual index */
1013 for(i=0; ; i++, visualStart+=length) {
1014 length=runs[i].visualLimit-visualStart;
1015 insertRemove=runs[i].insertRemove;
1016 if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) {
1017 if(visualIndex<=(visualStart+markFound)) {
1018 return UBIDI_MAP_NOWHERE;
b75a7d8f 1019 }
73c04bcf
A
1020 markFound++;
1021 }
1022 /* is adjusted visual index within this run? */
1023 if(visualIndex<(runs[i].visualLimit+markFound)) {
1024 visualIndex-=markFound;
1025 break;
1026 }
1027 if(insertRemove&(LRM_AFTER|RLM_AFTER)) {
1028 if(visualIndex==(visualStart+length+markFound)) {
1029 return UBIDI_MAP_NOWHERE;
1030 }
1031 markFound++;
1032 }
1033 }
1034 }
1035 else if(pBiDi->controlCount>0) {
1036 /* handle removed BiDi control characters */
1037 int32_t controlFound=0, insertRemove, length;
1038 int32_t logicalStart, logicalEnd, visualStart=0, j, k;
1039 UChar uchar;
1040 UBool evenRun;
1041 /* add number of controls until visual index */
1042 for(i=0; ; i++, visualStart+=length) {
1043 length=runs[i].visualLimit-visualStart;
1044 insertRemove=runs[i].insertRemove;
1045 /* is adjusted visual index beyond current run? */
1046 if(visualIndex>=(runs[i].visualLimit-controlFound+insertRemove)) {
1047 controlFound-=insertRemove;
1048 continue;
1049 }
1050 /* adjusted visual index is within current run */
1051 if(insertRemove==0) {
1052 visualIndex+=controlFound;
1053 break;
1054 }
1055 /* count non-control chars until visualIndex */
1056 logicalStart=runs[i].logicalStart;
1057 evenRun=IS_EVEN_RUN(logicalStart);
1058 REMOVE_ODD_BIT(logicalStart);
1059 logicalEnd=logicalStart+length-1;
1060 for(j=0; j<length; j++) {
1061 k= evenRun ? logicalStart+j : logicalEnd-j;
1062 uchar=pBiDi->text[k];
1063 if(IS_BIDI_CONTROL_CHAR(uchar)) {
1064 controlFound++;
1065 }
1066 if((visualIndex+controlFound)==(visualStart+j)) {
1067 break;
b75a7d8f
A
1068 }
1069 }
73c04bcf
A
1070 visualIndex+=controlFound;
1071 break;
b75a7d8f
A
1072 }
1073 }
73c04bcf
A
1074 /* handle all cases */
1075 if(runCount<=10) {
1076 /* linear search for the run */
1077 for(i=0; visualIndex>=runs[i].visualLimit; ++i) {}
1078 } else {
1079 /* binary search for the run */
1080 int32_t begin=0, limit=runCount;
1081
1082 /* the middle if() is guaranteed to find the run, we don't need a loop limit */
1083 for(;;) {
1084 i=(begin+limit)/2;
1085 if(visualIndex>=runs[i].visualLimit) {
1086 begin=i+1;
1087 } else if(i==0 || visualIndex>=runs[i-1].visualLimit) {
1088 break;
1089 } else {
1090 limit=i;
1091 }
1092 }
1093 }
1094
1095 start=runs[i].logicalStart;
1096 if(IS_EVEN_RUN(start)) {
1097 /* LTR */
1098 /* the offset in runs[i] is visualIndex-runs[i-1].visualLimit */
1099 if(i>0) {
1100 visualIndex-=runs[i-1].visualLimit;
1101 }
1102 return start+visualIndex;
1103 } else {
1104 /* RTL */
1105 return GET_INDEX(start)+runs[i].visualLimit-visualIndex-1;
1106 }
b75a7d8f
A
1107}
1108
1109U_CAPI void U_EXPORT2
1110ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) {
46f4442e
A
1111 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
1112 /* ubidi_countRuns() checks for VALID_PARA_OR_LINE */
1113 ubidi_countRuns(pBiDi, pErrorCode);
1114 if(U_FAILURE(*pErrorCode)) {
b75a7d8f
A
1115 /* no op */
1116 } else if(indexMap==NULL) {
1117 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1118 } else {
46f4442e
A
1119 /* fill a logical-to-visual index map using the runs[] */
1120 int32_t visualStart, visualLimit, i, j, k;
1121 int32_t logicalStart, logicalLimit;
1122 Run *runs=pBiDi->runs;
1123 if (pBiDi->length<=0) {
1124 return;
1125 }
1126 if (pBiDi->length>pBiDi->resultLength) {
1127 uprv_memset(indexMap, 0xFF, pBiDi->length*sizeof(int32_t));
1128 }
1129
1130 visualStart=0;
1131 for(j=0; j<pBiDi->runCount; ++j) {
1132 logicalStart=GET_INDEX(runs[j].logicalStart);
1133 visualLimit=runs[j].visualLimit;
1134 if(IS_EVEN_RUN(runs[j].logicalStart)) {
1135 do { /* LTR */
1136 indexMap[logicalStart++]=visualStart++;
1137 } while(visualStart<visualLimit);
1138 } else {
1139 logicalStart+=visualLimit-visualStart; /* logicalLimit */
1140 do { /* RTL */
1141 indexMap[--logicalStart]=visualStart++;
1142 } while(visualStart<visualLimit);
1143 }
1144 /* visualStart==visualLimit; */
1145 }
73c04bcf
A
1146
1147 if(pBiDi->insertPoints.size>0) {
1148 int32_t markFound=0, runCount=pBiDi->runCount;
46f4442e
A
1149 int32_t length, insertRemove;
1150 visualStart=0;
73c04bcf
A
1151 /* add number of marks found until each index */
1152 for(i=0; i<runCount; i++, visualStart+=length) {
1153 length=runs[i].visualLimit-visualStart;
1154 insertRemove=runs[i].insertRemove;
1155 if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) {
1156 markFound++;
1157 }
1158 if(markFound>0) {
46f4442e
A
1159 logicalStart=GET_INDEX(runs[i].logicalStart);
1160 logicalLimit=logicalStart+length;
1161 for(j=logicalStart; j<logicalLimit; j++) {
73c04bcf
A
1162 indexMap[j]+=markFound;
1163 }
1164 }
1165 if(insertRemove&(LRM_AFTER|RLM_AFTER)) {
1166 markFound++;
1167 }
1168 }
1169 }
1170 else if(pBiDi->controlCount>0) {
1171 int32_t controlFound=0, runCount=pBiDi->runCount;
46f4442e 1172 int32_t length, insertRemove;
73c04bcf
A
1173 UBool evenRun;
1174 UChar uchar;
46f4442e 1175 visualStart=0;
73c04bcf
A
1176 /* subtract number of controls found until each index */
1177 for(i=0; i<runCount; i++, visualStart+=length) {
1178 length=runs[i].visualLimit-visualStart;
1179 insertRemove=runs[i].insertRemove;
1180 /* no control found within previous runs nor within this run */
1181 if((controlFound-insertRemove)==0) {
1182 continue;
1183 }
1184 logicalStart=runs[i].logicalStart;
1185 evenRun=IS_EVEN_RUN(logicalStart);
1186 REMOVE_ODD_BIT(logicalStart);
46f4442e 1187 logicalLimit=logicalStart+length;
73c04bcf
A
1188 /* if no control within this run */
1189 if(insertRemove==0) {
46f4442e 1190 for(j=logicalStart; j<logicalLimit; j++) {
73c04bcf
A
1191 indexMap[j]-=controlFound;
1192 }
1193 continue;
1194 }
1195 for(j=0; j<length; j++) {
46f4442e 1196 k= evenRun ? logicalStart+j : logicalLimit-j-1;
73c04bcf
A
1197 uchar=pBiDi->text[k];
1198 if(IS_BIDI_CONTROL_CHAR(uchar)) {
1199 controlFound++;
1200 indexMap[k]=UBIDI_MAP_NOWHERE;
1201 continue;
1202 }
1203 indexMap[k]-=controlFound;
1204 }
1205 }
1206 }
b75a7d8f
A
1207 }
1208}
1209
1210U_CAPI void U_EXPORT2
1211ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) {
46f4442e
A
1212 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
1213 if(indexMap==NULL) {
b75a7d8f 1214 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
46f4442e
A
1215 return;
1216 }
1217 /* ubidi_countRuns() checks for VALID_PARA_OR_LINE */
1218 ubidi_countRuns(pBiDi, pErrorCode);
1219 if(U_SUCCESS(*pErrorCode)) {
b75a7d8f
A
1220 /* fill a visual-to-logical index map using the runs[] */
1221 Run *runs=pBiDi->runs, *runsLimit=runs+pBiDi->runCount;
73c04bcf 1222 int32_t logicalStart, visualStart, visualLimit, *pi=indexMap;
b75a7d8f 1223
46f4442e
A
1224 if (pBiDi->resultLength<=0) {
1225 return;
1226 }
b75a7d8f
A
1227 visualStart=0;
1228 for(; runs<runsLimit; ++runs) {
1229 logicalStart=runs->logicalStart;
1230 visualLimit=runs->visualLimit;
1231 if(IS_EVEN_RUN(logicalStart)) {
1232 do { /* LTR */
73c04bcf 1233 *pi++ = logicalStart++;
b75a7d8f
A
1234 } while(++visualStart<visualLimit);
1235 } else {
1236 REMOVE_ODD_BIT(logicalStart);
1237 logicalStart+=visualLimit-visualStart; /* logicalLimit */
1238 do { /* RTL */
73c04bcf 1239 *pi++ = --logicalStart;
b75a7d8f
A
1240 } while(++visualStart<visualLimit);
1241 }
1242 /* visualStart==visualLimit; */
1243 }
73c04bcf
A
1244
1245 if(pBiDi->insertPoints.size>0) {
1246 int32_t markFound=0, runCount=pBiDi->runCount;
1247 int32_t insertRemove, i, j, k;
1248 runs=pBiDi->runs;
1249 /* count all inserted marks */
1250 for(i=0; i<runCount; i++) {
1251 insertRemove=runs[i].insertRemove;
1252 if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) {
1253 markFound++;
1254 }
1255 if(insertRemove&(LRM_AFTER|RLM_AFTER)) {
1256 markFound++;
1257 }
1258 }
1259 /* move back indexes by number of preceding marks */
1260 k=pBiDi->resultLength;
1261 for(i=runCount-1; i>=0 && markFound>0; i--) {
1262 insertRemove=runs[i].insertRemove;
1263 if(insertRemove&(LRM_AFTER|RLM_AFTER)) {
1264 indexMap[--k]= UBIDI_MAP_NOWHERE;
1265 markFound--;
1266 }
1267 visualStart= i>0 ? runs[i-1].visualLimit : 0;
1268 for(j=runs[i].visualLimit-1; j>=visualStart && markFound>0; j--) {
1269 indexMap[--k]=indexMap[j];
1270 }
1271 if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) {
1272 indexMap[--k]= UBIDI_MAP_NOWHERE;
1273 markFound--;
1274 }
1275 }
1276 }
1277 else if(pBiDi->controlCount>0) {
1278 int32_t runCount=pBiDi->runCount, logicalEnd;
1279 int32_t insertRemove, length, i, j, k, m;
1280 UChar uchar;
1281 UBool evenRun;
1282 runs=pBiDi->runs;
1283 visualStart=0;
1284 /* move forward indexes by number of preceding controls */
1285 k=0;
1286 for(i=0; i<runCount; i++, visualStart+=length) {
1287 length=runs[i].visualLimit-visualStart;
1288 insertRemove=runs[i].insertRemove;
1289 /* if no control found yet, nothing to do in this run */
1290 if((insertRemove==0)&&(k==visualStart)) {
1291 k+=length;
1292 continue;
1293 }
1294 /* if no control in this run */
1295 if(insertRemove==0) {
1296 visualLimit=runs[i].visualLimit;
1297 for(j=visualStart; j<visualLimit; j++) {
1298 indexMap[k++]=indexMap[j];
1299 }
1300 continue;
1301 }
1302 logicalStart=runs[i].logicalStart;
1303 evenRun=IS_EVEN_RUN(logicalStart);
1304 REMOVE_ODD_BIT(logicalStart);
1305 logicalEnd=logicalStart+length-1;
1306 for(j=0; j<length; j++) {
1307 m= evenRun ? logicalStart+j : logicalEnd-j;
1308 uchar=pBiDi->text[m];
1309 if(!IS_BIDI_CONTROL_CHAR(uchar)) {
1310 indexMap[k++]=m;
1311 }
1312 }
1313 }
1314 }
b75a7d8f
A
1315 }
1316}
1317
1318U_CAPI void U_EXPORT2
1319ubidi_invertMap(const int32_t *srcMap, int32_t *destMap, int32_t length) {
73c04bcf
A
1320 if(srcMap!=NULL && destMap!=NULL && length>0) {
1321 const int32_t *pi;
1322 int32_t destLength=-1, count=0;
1323 /* find highest value and count positive indexes in srcMap */
1324 pi=srcMap+length;
1325 while(pi>srcMap) {
1326 if(*--pi>destLength) {
1327 destLength=*pi;
1328 }
1329 if(*pi>=0) {
1330 count++;
1331 }
1332 }
1333 destLength++; /* add 1 for origin 0 */
1334 if(count<destLength) {
1335 /* we must fill unmatched destMap entries with -1 */
1336 uprv_memset(destMap, 0xFF, destLength*sizeof(int32_t));
1337 }
1338 pi=srcMap+length;
b75a7d8f 1339 while(length>0) {
73c04bcf
A
1340 if(*--pi>=0) {
1341 destMap[*pi]=--length;
1342 } else {
1343 --length;
1344 }
b75a7d8f
A
1345 }
1346 }
1347}