]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ****************************************************************************** | |
3 | * | |
4 | * Copyright (C) 1999-2001, International Business Machines | |
5 | * Corporation and others. All Rights Reserved. | |
6 | * | |
7 | ****************************************************************************** | |
8 | * file name: ubidiln.c | |
9 | * encoding: US-ASCII | |
10 | * tab size: 8 (not used) | |
11 | * indentation:4 | |
12 | * | |
13 | * created on: 1999aug06 | |
14 | * created by: Markus W. Scherer | |
15 | */ | |
16 | ||
17 | /* set import/export definitions */ | |
18 | #ifndef U_COMMON_IMPLEMENTATION | |
19 | # define U_COMMON_IMPLEMENTATION | |
20 | #endif | |
21 | ||
22 | #include "cmemory.h" | |
23 | #include "unicode/utypes.h" | |
24 | #include "unicode/ustring.h" | |
25 | #include "unicode/uchar.h" | |
26 | #include "unicode/ubidi.h" | |
27 | #include "ubidiimp.h" | |
28 | ||
29 | /* | |
30 | * General remarks about the functions in this file: | |
31 | * | |
32 | * These functions deal with the aspects of potentially mixed-directional | |
33 | * text in a single paragraph or in a line of a single paragraph | |
34 | * which has already been processed according to | |
35 | * the Unicode 3.0 BiDi algorithm as defined in | |
36 | * http://www.unicode.org/unicode/reports/tr9/ , version 5, | |
37 | * also described in The Unicode Standard, Version 3.0 . | |
38 | * | |
39 | * This means that there is a UBiDi object with a levels | |
40 | * and a dirProps array. | |
41 | * paraLevel and direction are also set. | |
42 | * Only if the length of the text is zero, then levels==dirProps==NULL. | |
43 | * | |
44 | * The overall directionality of the paragraph | |
45 | * or line is used to bypass the reordering steps if possible. | |
46 | * Even purely RTL text does not need reordering there because | |
47 | * the ubidi_getLogical/VisualIndex() functions can compute the | |
48 | * index on the fly in such a case. | |
49 | * | |
50 | * The implementation of the access to same-level-runs and of the reordering | |
51 | * do attempt to provide better performance and less memory usage compared to | |
52 | * a direct implementation of especially rule (L2) with an array of | |
53 | * one (32-bit) integer per text character. | |
54 | * | |
55 | * Here, the levels array is scanned as soon as necessary, and a vector of | |
56 | * same-level-runs is created. Reordering then is done on this vector. | |
57 | * For each run of text positions that were resolved to the same level, | |
58 | * only 8 bytes are stored: the first text position of the run and the visual | |
59 | * position behind the run after reordering. | |
60 | * One sign bit is used to hold the directionality of the run. | |
61 | * This is inefficient if there are many very short runs. If the average run | |
62 | * length is <2, then this uses more memory. | |
63 | * | |
64 | * In a further attempt to save memory, the levels array is never changed | |
65 | * after all the resolution rules (Xn, Wn, Nn, In). | |
66 | * Many functions have to consider the field trailingWSStart: | |
67 | * if it is less than length, then there is an implicit trailing run | |
68 | * at the paraLevel, | |
69 | * which is not reflected in the levels array. | |
70 | * This allows a line UBiDi object to use the same levels array as | |
71 | * its paragraph parent object. | |
72 | * | |
73 | * When a UBiDi object is created for a line of a paragraph, then the | |
74 | * paragraph's levels and dirProps arrays are reused by way of setting | |
75 | * a pointer into them, not by copying. This again saves memory and forbids to | |
76 | * change the now shared levels for (L1). | |
77 | */ | |
78 | ||
79 | /* prototypes --------------------------------------------------------------- */ | |
80 | ||
81 | static void | |
82 | setTrailingWSStart(UBiDi *pBiDi); | |
83 | ||
84 | static void | |
85 | getSingleRun(UBiDi *pBiDi, UBiDiLevel level); | |
86 | ||
87 | static void | |
88 | reorderLine(UBiDi *pBiDi, UBiDiLevel minLevel, UBiDiLevel maxLevel); | |
89 | ||
90 | static UBool | |
91 | prepareReorder(const UBiDiLevel *levels, int32_t length, | |
92 | int32_t *indexMap, | |
93 | UBiDiLevel *pMinLevel, UBiDiLevel *pMaxLevel); | |
94 | ||
95 | /* ubidi_setLine ------------------------------------------------------------ */ | |
96 | ||
97 | U_CAPI void U_EXPORT2 | |
98 | ubidi_setLine(const UBiDi *pParaBiDi, | |
99 | int32_t start, int32_t limit, | |
100 | UBiDi *pLineBiDi, | |
101 | UErrorCode *pErrorCode) { | |
102 | int32_t length; | |
103 | ||
104 | /* check the argument values */ | |
105 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | |
106 | return; | |
107 | } else if(pParaBiDi==NULL || pLineBiDi==NULL) { | |
108 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
109 | return; | |
110 | } else if(start<0 || start>limit || limit>pParaBiDi->length) { | |
111 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; | |
112 | return; | |
113 | } | |
114 | ||
115 | /* set the values in pLineBiDi from its pParaBiDi parent */ | |
116 | pLineBiDi->text=pParaBiDi->text+start; | |
117 | length=pLineBiDi->length=limit-start; | |
118 | pLineBiDi->paraLevel=pParaBiDi->paraLevel; | |
119 | ||
120 | pLineBiDi->runs=NULL; | |
121 | pLineBiDi->flags=0; | |
122 | ||
123 | if(length>0) { | |
124 | pLineBiDi->dirProps=pParaBiDi->dirProps+start; | |
125 | pLineBiDi->levels=pParaBiDi->levels+start; | |
126 | pLineBiDi->runCount=-1; | |
127 | ||
128 | if(pParaBiDi->direction!=UBIDI_MIXED) { | |
129 | /* the parent is already trivial */ | |
130 | pLineBiDi->direction=pParaBiDi->direction; | |
131 | ||
132 | /* | |
133 | * The parent's levels are all either | |
134 | * implicitly or explicitly ==paraLevel; | |
135 | * do the same here. | |
136 | */ | |
137 | if(pParaBiDi->trailingWSStart<=start) { | |
138 | pLineBiDi->trailingWSStart=0; | |
139 | } else if(pParaBiDi->trailingWSStart<limit) { | |
140 | pLineBiDi->trailingWSStart=pParaBiDi->trailingWSStart-start; | |
141 | } else { | |
142 | pLineBiDi->trailingWSStart=length; | |
143 | } | |
144 | } else { | |
145 | const UBiDiLevel *levels=pLineBiDi->levels; | |
146 | int32_t i, trailingWSStart; | |
147 | UBiDiLevel level; | |
148 | ||
149 | setTrailingWSStart(pLineBiDi); | |
150 | trailingWSStart=pLineBiDi->trailingWSStart; | |
151 | ||
152 | /* recalculate pLineBiDi->direction */ | |
153 | if(trailingWSStart==0) { | |
154 | /* all levels are at paraLevel */ | |
155 | pLineBiDi->direction=(UBiDiDirection)(pLineBiDi->paraLevel&1); | |
156 | } else { | |
157 | /* get the level of the first character */ | |
158 | level=(UBiDiLevel)(levels[0]&1); | |
159 | ||
160 | /* if there is anything of a different level, then the line is mixed */ | |
161 | if(trailingWSStart<length && (pLineBiDi->paraLevel&1)!=level) { | |
162 | /* the trailing WS is at paraLevel, which differs from levels[0] */ | |
163 | pLineBiDi->direction=UBIDI_MIXED; | |
164 | } else { | |
165 | /* see if levels[1..trailingWSStart-1] have the same direction as levels[0] and paraLevel */ | |
166 | i=1; | |
167 | for(;;) { | |
168 | if(i==trailingWSStart) { | |
169 | /* the direction values match those in level */ | |
170 | pLineBiDi->direction=(UBiDiDirection)level; | |
171 | break; | |
172 | } else if((levels[i]&1)!=level) { | |
173 | pLineBiDi->direction=UBIDI_MIXED; | |
174 | break; | |
175 | } | |
176 | ++i; | |
177 | } | |
178 | } | |
179 | } | |
180 | ||
181 | switch(pLineBiDi->direction) { | |
182 | case UBIDI_LTR: | |
183 | /* make sure paraLevel is even */ | |
184 | pLineBiDi->paraLevel=(UBiDiLevel)((pLineBiDi->paraLevel+1)&~1); | |
185 | ||
186 | /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ | |
187 | pLineBiDi->trailingWSStart=0; | |
188 | break; | |
189 | case UBIDI_RTL: | |
190 | /* make sure paraLevel is odd */ | |
191 | pLineBiDi->paraLevel|=1; | |
192 | ||
193 | /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ | |
194 | pLineBiDi->trailingWSStart=0; | |
195 | break; | |
196 | default: | |
197 | break; | |
198 | } | |
199 | } | |
200 | } else { | |
201 | /* create an object for a zero-length line */ | |
202 | pLineBiDi->direction=pLineBiDi->paraLevel&1 ? UBIDI_RTL : UBIDI_LTR; | |
203 | pLineBiDi->trailingWSStart=pLineBiDi->runCount=0; | |
204 | ||
205 | pLineBiDi->dirProps=NULL; | |
206 | pLineBiDi->levels=NULL; | |
207 | } | |
208 | return; | |
209 | } | |
210 | ||
211 | U_CAPI UBiDiLevel U_EXPORT2 | |
212 | ubidi_getLevelAt(const UBiDi *pBiDi, int32_t charIndex) { | |
213 | /* return paraLevel if in the trailing WS run, otherwise the real level */ | |
214 | if(pBiDi==NULL || charIndex<0 || pBiDi->length<=charIndex) { | |
215 | return 0; | |
216 | } else if(pBiDi->direction!=UBIDI_MIXED || charIndex>=pBiDi->trailingWSStart) { | |
217 | return pBiDi->paraLevel; | |
218 | } else { | |
219 | return pBiDi->levels[charIndex]; | |
220 | } | |
221 | } | |
222 | ||
223 | U_CAPI const UBiDiLevel * U_EXPORT2 | |
224 | ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { | |
225 | int32_t start, length; | |
226 | ||
227 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | |
228 | return NULL; | |
229 | } else if(pBiDi==NULL || (length=pBiDi->length)<=0) { | |
230 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
231 | return NULL; | |
232 | } | |
233 | ||
234 | if((start=pBiDi->trailingWSStart)==length) { | |
235 | /* the current levels array reflects the WS run */ | |
236 | return pBiDi->levels; | |
237 | } | |
238 | ||
239 | /* | |
240 | * After the previous if(), we know that the levels array | |
241 | * has an implicit trailing WS run and therefore does not fully | |
242 | * reflect itself all the levels. | |
243 | * This must be a UBiDi object for a line, and | |
244 | * we need to create a new levels array. | |
245 | */ | |
246 | ||
247 | if(getLevelsMemory(pBiDi, length)) { | |
248 | UBiDiLevel *levels=pBiDi->levelsMemory; | |
249 | ||
250 | if(start>0 && levels!=pBiDi->levels) { | |
251 | uprv_memcpy(levels, pBiDi->levels, start); | |
252 | } | |
253 | uprv_memset(levels+start, pBiDi->paraLevel, length-start); | |
254 | ||
255 | /* this new levels array is set for the line and reflects the WS run */ | |
256 | pBiDi->trailingWSStart=length; | |
257 | return pBiDi->levels=levels; | |
258 | } else { | |
259 | /* out of memory */ | |
260 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | |
261 | return NULL; | |
262 | } | |
263 | } | |
264 | ||
265 | U_CAPI void U_EXPORT2 | |
266 | ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalStart, | |
267 | int32_t *pLogicalLimit, UBiDiLevel *pLevel) { | |
268 | int32_t length; | |
269 | ||
270 | if(pBiDi==NULL || logicalStart<0 || (length=pBiDi->length)<=logicalStart) { | |
271 | return; | |
272 | } | |
273 | ||
274 | if(pBiDi->direction!=UBIDI_MIXED || logicalStart>=pBiDi->trailingWSStart) { | |
275 | if(pLogicalLimit!=NULL) { | |
276 | *pLogicalLimit=length; | |
277 | } | |
278 | if(pLevel!=NULL) { | |
279 | *pLevel=pBiDi->paraLevel; | |
280 | } | |
281 | } else { | |
282 | UBiDiLevel *levels=pBiDi->levels; | |
283 | UBiDiLevel level=levels[logicalStart]; | |
284 | ||
285 | /* search for the end of the run */ | |
286 | length=pBiDi->trailingWSStart; | |
287 | while(++logicalStart<length && level==levels[logicalStart]) {} | |
288 | ||
289 | if(pLogicalLimit!=NULL) { | |
290 | *pLogicalLimit=logicalStart; | |
291 | } | |
292 | if(pLevel!=NULL) { | |
293 | *pLevel=level; | |
294 | } | |
295 | } | |
296 | } | |
297 | ||
298 | /* handle trailing WS (L1) -------------------------------------------------- */ | |
299 | ||
300 | /* | |
301 | * setTrailingWSStart() sets the start index for a trailing | |
302 | * run of WS in the line. This is necessary because we do not modify | |
303 | * the paragraph's levels array that we just point into. | |
304 | * Using trailingWSStart is another form of performing (L1). | |
305 | * | |
306 | * To make subsequent operations easier, we also include the run | |
307 | * before the WS if it is at the paraLevel - we merge the two here. | |
308 | */ | |
309 | static void | |
310 | setTrailingWSStart(UBiDi *pBiDi) { | |
311 | /* pBiDi->direction!=UBIDI_MIXED */ | |
312 | ||
313 | const DirProp *dirProps=pBiDi->dirProps; | |
314 | UBiDiLevel *levels=pBiDi->levels; | |
315 | int32_t start=pBiDi->length; | |
316 | UBiDiLevel paraLevel=pBiDi->paraLevel; | |
317 | ||
318 | /* go backwards across all WS, BN, explicit codes */ | |
319 | while(start>0 && DIRPROP_FLAG(dirProps[start-1])&MASK_WS) { | |
320 | --start; | |
321 | } | |
322 | ||
323 | /* if the WS run can be merged with the previous run then do so here */ | |
324 | while(start>0 && levels[start-1]==paraLevel) { | |
325 | --start; | |
326 | } | |
327 | ||
328 | pBiDi->trailingWSStart=start; | |
329 | } | |
330 | ||
331 | /* runs API functions ------------------------------------------------------- */ | |
332 | ||
333 | U_CAPI int32_t U_EXPORT2 | |
334 | ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode) { | |
335 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | |
336 | return -1; | |
337 | } else if(pBiDi==NULL || (pBiDi->runCount<0 && !ubidi_getRuns(pBiDi))) { | |
338 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | |
339 | return -1; | |
340 | } else { | |
341 | return pBiDi->runCount; | |
342 | } | |
343 | } | |
344 | ||
345 | U_CAPI UBiDiDirection U_EXPORT2 | |
346 | ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex, | |
347 | int32_t *pLogicalStart, int32_t *pLength) { | |
348 | if( pBiDi==NULL || runIndex<0 || | |
349 | (pBiDi->runCount==-1 && !ubidi_getRuns(pBiDi)) || | |
350 | runIndex>=pBiDi->runCount | |
351 | ) { | |
352 | return UBIDI_LTR; | |
353 | } else { | |
354 | int32_t start=pBiDi->runs[runIndex].logicalStart; | |
355 | if(pLogicalStart!=NULL) { | |
356 | *pLogicalStart=GET_INDEX(start); | |
357 | } | |
358 | if(pLength!=NULL) { | |
359 | if(runIndex>0) { | |
360 | *pLength=pBiDi->runs[runIndex].visualLimit- | |
361 | pBiDi->runs[runIndex-1].visualLimit; | |
362 | } else { | |
363 | *pLength=pBiDi->runs[0].visualLimit; | |
364 | } | |
365 | } | |
366 | return (UBiDiDirection)GET_ODD_BIT(start); | |
367 | } | |
368 | } | |
369 | ||
370 | /* compute the runs array --------------------------------------------------- */ | |
371 | ||
372 | /* | |
373 | * Compute the runs array from the levels array. | |
374 | * After ubidi_getRuns() returns TRUE, runCount is guaranteed to be >0 | |
375 | * and the runs are reordered. | |
376 | * Odd-level runs have visualStart on their visual right edge and | |
377 | * they progress visually to the left. | |
378 | */ | |
379 | U_CFUNC UBool | |
380 | ubidi_getRuns(UBiDi *pBiDi) { | |
381 | if(pBiDi->direction!=UBIDI_MIXED) { | |
382 | /* simple, single-run case - this covers length==0 */ | |
383 | getSingleRun(pBiDi, pBiDi->paraLevel); | |
384 | } else /* UBIDI_MIXED, length>0 */ { | |
385 | /* mixed directionality */ | |
386 | int32_t length=pBiDi->length, limit; | |
387 | ||
388 | /* | |
389 | * If there are WS characters at the end of the line | |
390 | * and the run preceding them has a level different from | |
391 | * paraLevel, then they will form their own run at paraLevel (L1). | |
392 | * Count them separately. | |
393 | * We need some special treatment for this in order to not | |
394 | * modify the levels array which a line UBiDi object shares | |
395 | * with its paragraph parent and its other line siblings. | |
396 | * In other words, for the trailing WS, it may be | |
397 | * levels[]!=paraLevel but we have to treat it like it were so. | |
398 | */ | |
399 | limit=pBiDi->trailingWSStart; | |
400 | if(limit==0) { | |
401 | /* there is only WS on this line */ | |
402 | getSingleRun(pBiDi, pBiDi->paraLevel); | |
403 | } else { | |
404 | UBiDiLevel *levels=pBiDi->levels; | |
405 | int32_t i, runCount; | |
406 | UBiDiLevel level=UBIDI_DEFAULT_LTR; /* initialize with no valid level */ | |
407 | ||
408 | /* count the runs, there is at least one non-WS run, and limit>0 */ | |
409 | runCount=0; | |
410 | for(i=0; i<limit; ++i) { | |
411 | /* increment runCount at the start of each run */ | |
412 | if(levels[i]!=level) { | |
413 | ++runCount; | |
414 | level=levels[i]; | |
415 | } | |
416 | } | |
417 | ||
418 | /* | |
419 | * We don't need to see if the last run can be merged with a trailing | |
420 | * WS run because setTrailingWSStart() would have done that. | |
421 | */ | |
422 | if(runCount==1 && limit==length) { | |
423 | /* There is only one non-WS run and no trailing WS-run. */ | |
424 | getSingleRun(pBiDi, levels[0]); | |
425 | } else /* runCount>1 || limit<length */ { | |
426 | /* allocate and set the runs */ | |
427 | Run *runs; | |
428 | int32_t runIndex, start; | |
429 | UBiDiLevel minLevel=UBIDI_MAX_EXPLICIT_LEVEL+1, maxLevel=0; | |
430 | ||
431 | /* now, count a (non-mergable) WS run */ | |
432 | if(limit<length) { | |
433 | ++runCount; | |
434 | } | |
435 | ||
436 | /* runCount>1 */ | |
437 | if(getRunsMemory(pBiDi, runCount)) { | |
438 | runs=pBiDi->runsMemory; | |
439 | } else { | |
440 | return FALSE; | |
441 | } | |
442 | ||
443 | /* set the runs */ | |
444 | /* this could be optimized, e.g.: 464->444, 484->444, 575->555, 595->555 */ | |
445 | /* however, that would take longer and make other functions more complicated */ | |
446 | runIndex=0; | |
447 | ||
448 | /* search for the run limits and initialize visualLimit values with the run lengths */ | |
449 | i=0; | |
450 | do { | |
451 | /* prepare this run */ | |
452 | start=i; | |
453 | level=levels[i]; | |
454 | if(level<minLevel) { | |
455 | minLevel=level; | |
456 | } | |
457 | if(level>maxLevel) { | |
458 | maxLevel=level; | |
459 | } | |
460 | ||
461 | /* look for the run limit */ | |
462 | while(++i<limit && levels[i]==level) {} | |
463 | ||
464 | /* i is another run limit */ | |
465 | runs[runIndex].logicalStart=start; | |
466 | runs[runIndex].visualLimit=i-start; | |
467 | ++runIndex; | |
468 | } while(i<limit); | |
469 | ||
470 | if(limit<length) { | |
471 | /* there is a separate WS run */ | |
472 | runs[runIndex].logicalStart=limit; | |
473 | runs[runIndex].visualLimit=length-limit; | |
474 | if(pBiDi->paraLevel<minLevel) { | |
475 | minLevel=pBiDi->paraLevel; | |
476 | } | |
477 | } | |
478 | ||
479 | /* set the object fields */ | |
480 | pBiDi->runs=runs; | |
481 | pBiDi->runCount=runCount; | |
482 | ||
483 | reorderLine(pBiDi, minLevel, maxLevel); | |
484 | ||
485 | /* now add the direction flags and adjust the visualLimit's to be just that */ | |
486 | ADD_ODD_BIT_FROM_LEVEL(runs[0].logicalStart, levels[runs[0].logicalStart]); | |
487 | limit=runs[0].visualLimit; | |
488 | for(i=1; i<runIndex; ++i) { | |
489 | ADD_ODD_BIT_FROM_LEVEL(runs[i].logicalStart, levels[runs[i].logicalStart]); | |
490 | limit=runs[i].visualLimit+=limit; | |
491 | } | |
492 | ||
493 | /* same for the trailing WS run */ | |
494 | if(runIndex<runCount) { | |
495 | ADD_ODD_BIT_FROM_LEVEL(runs[i].logicalStart, pBiDi->paraLevel); | |
496 | runs[runIndex].visualLimit+=limit; | |
497 | } | |
498 | } | |
499 | } | |
500 | } | |
501 | return TRUE; | |
502 | } | |
503 | ||
504 | /* in trivial cases there is only one trivial run; called by ubidi_getRuns() */ | |
505 | static void | |
506 | getSingleRun(UBiDi *pBiDi, UBiDiLevel level) { | |
507 | /* simple, single-run case */ | |
508 | pBiDi->runs=pBiDi->simpleRuns; | |
509 | pBiDi->runCount=1; | |
510 | ||
511 | /* fill and reorder the single run */ | |
512 | pBiDi->runs[0].logicalStart=MAKE_INDEX_ODD_PAIR(0, level); | |
513 | pBiDi->runs[0].visualLimit=pBiDi->length; | |
514 | } | |
515 | ||
516 | /* reorder the runs array (L2) ---------------------------------------------- */ | |
517 | ||
518 | /* | |
519 | * Reorder the same-level runs in the runs array. | |
520 | * Here, runCount>1 and maxLevel>=minLevel>=paraLevel. | |
521 | * All the visualStart fields=logical start before reordering. | |
522 | * The "odd" bits are not set yet. | |
523 | * | |
524 | * Reordering with this data structure lends itself to some handy shortcuts: | |
525 | * | |
526 | * Since each run is moved but not modified, and since at the initial maxLevel | |
527 | * each sequence of same-level runs consists of only one run each, we | |
528 | * don't need to do anything there and can predecrement maxLevel. | |
529 | * In many simple cases, the reordering is thus done entirely in the | |
530 | * index mapping. | |
531 | * Also, reordering occurs only down to the lowest odd level that occurs, | |
532 | * which is minLevel|1. However, if the lowest level itself is odd, then | |
533 | * in the last reordering the sequence of the runs at this level or higher | |
534 | * will be all runs, and we don't need the elaborate loop to search for them. | |
535 | * This is covered by ++minLevel instead of minLevel|=1 followed | |
536 | * by an extra reorder-all after the reorder-some loop. | |
537 | * About a trailing WS run: | |
538 | * Such a run would need special treatment because its level is not | |
539 | * reflected in levels[] if this is not a paragraph object. | |
540 | * Instead, all characters from trailingWSStart on are implicitly at | |
541 | * paraLevel. | |
542 | * However, for all maxLevel>paraLevel, this run will never be reordered | |
543 | * and does not need to be taken into account. maxLevel==paraLevel is only reordered | |
544 | * if minLevel==paraLevel is odd, which is done in the extra segment. | |
545 | * This means that for the main reordering loop we don't need to consider | |
546 | * this run and can --runCount. If it is later part of the all-runs | |
547 | * reordering, then runCount is adjusted accordingly. | |
548 | */ | |
549 | static void | |
550 | reorderLine(UBiDi *pBiDi, UBiDiLevel minLevel, UBiDiLevel maxLevel) { | |
551 | Run *runs; | |
552 | UBiDiLevel *levels; | |
553 | int32_t firstRun, endRun, limitRun, runCount, | |
554 | temp; | |
555 | ||
556 | /* nothing to do? */ | |
557 | if(maxLevel<=(minLevel|1)) { | |
558 | return; | |
559 | } | |
560 | ||
561 | /* | |
562 | * Reorder only down to the lowest odd level | |
563 | * and reorder at an odd minLevel in a separate, simpler loop. | |
564 | * See comments above for why minLevel is always incremented. | |
565 | */ | |
566 | ++minLevel; | |
567 | ||
568 | runs=pBiDi->runs; | |
569 | levels=pBiDi->levels; | |
570 | runCount=pBiDi->runCount; | |
571 | ||
572 | /* do not include the WS run at paraLevel<=old minLevel except in the simple loop */ | |
573 | if(pBiDi->trailingWSStart<pBiDi->length) { | |
574 | --runCount; | |
575 | } | |
576 | ||
577 | while(--maxLevel>=minLevel) { | |
578 | firstRun=0; | |
579 | ||
580 | /* loop for all sequences of runs */ | |
581 | for(;;) { | |
582 | /* look for a sequence of runs that are all at >=maxLevel */ | |
583 | /* look for the first run of such a sequence */ | |
584 | while(firstRun<runCount && levels[runs[firstRun].logicalStart]<maxLevel) { | |
585 | ++firstRun; | |
586 | } | |
587 | if(firstRun>=runCount) { | |
588 | break; /* no more such runs */ | |
589 | } | |
590 | ||
591 | /* look for the limit run of such a sequence (the run behind it) */ | |
592 | for(limitRun=firstRun; ++limitRun<runCount && levels[runs[limitRun].logicalStart]>=maxLevel;) {} | |
593 | ||
594 | /* Swap the entire sequence of runs from firstRun to limitRun-1. */ | |
595 | endRun=limitRun-1; | |
596 | while(firstRun<endRun) { | |
597 | temp=runs[firstRun].logicalStart; | |
598 | runs[firstRun].logicalStart=runs[endRun].logicalStart; | |
599 | runs[endRun].logicalStart=temp; | |
600 | ||
601 | temp=runs[firstRun].visualLimit; | |
602 | runs[firstRun].visualLimit=runs[endRun].visualLimit; | |
603 | runs[endRun].visualLimit=temp; | |
604 | ||
605 | ++firstRun; | |
606 | --endRun; | |
607 | } | |
608 | ||
609 | if(limitRun==runCount) { | |
610 | break; /* no more such runs */ | |
611 | } else { | |
612 | firstRun=limitRun+1; | |
613 | } | |
614 | } | |
615 | } | |
616 | ||
617 | /* now do maxLevel==old minLevel (==odd!), see above */ | |
618 | if(!(minLevel&1)) { | |
619 | firstRun=0; | |
620 | ||
621 | /* include the trailing WS run in this complete reordering */ | |
622 | if(pBiDi->trailingWSStart==pBiDi->length) { | |
623 | --runCount; | |
624 | } | |
625 | ||
626 | /* Swap the entire sequence of all runs. (endRun==runCount) */ | |
627 | while(firstRun<runCount) { | |
628 | temp=runs[firstRun].logicalStart; | |
629 | runs[firstRun].logicalStart=runs[runCount].logicalStart; | |
630 | runs[runCount].logicalStart=temp; | |
631 | ||
632 | temp=runs[firstRun].visualLimit; | |
633 | runs[firstRun].visualLimit=runs[runCount].visualLimit; | |
634 | runs[runCount].visualLimit=temp; | |
635 | ||
636 | ++firstRun; | |
637 | --runCount; | |
638 | } | |
639 | } | |
640 | } | |
641 | ||
642 | /* reorder a line based on a levels array (L2) ------------------------------ */ | |
643 | ||
644 | U_CAPI void U_EXPORT2 | |
645 | ubidi_reorderLogical(const UBiDiLevel *levels, int32_t length, int32_t *indexMap) { | |
646 | int32_t start, limit, sumOfSosEos; | |
647 | UBiDiLevel minLevel, maxLevel; | |
648 | ||
649 | if(indexMap==NULL || !prepareReorder(levels, length, indexMap, &minLevel, &maxLevel)) { | |
650 | return; | |
651 | } | |
652 | ||
653 | /* nothing to do? */ | |
654 | if(minLevel==maxLevel && (minLevel&1)==0) { | |
655 | return; | |
656 | } | |
657 | ||
658 | /* reorder only down to the lowest odd level */ | |
659 | minLevel|=1; | |
660 | ||
661 | /* loop maxLevel..minLevel */ | |
662 | do { | |
663 | start=0; | |
664 | ||
665 | /* loop for all sequences of levels to reorder at the current maxLevel */ | |
666 | for(;;) { | |
667 | /* look for a sequence of levels that are all at >=maxLevel */ | |
668 | /* look for the first index of such a sequence */ | |
669 | while(start<length && levels[start]<maxLevel) { | |
670 | ++start; | |
671 | } | |
672 | if(start>=length) { | |
673 | break; /* no more such sequences */ | |
674 | } | |
675 | ||
676 | /* look for the limit of such a sequence (the index behind it) */ | |
677 | for(limit=start; ++limit<length && levels[limit]>=maxLevel;) {} | |
678 | ||
679 | /* | |
680 | * sos=start of sequence, eos=end of sequence | |
681 | * | |
682 | * The closed (inclusive) interval from sos to eos includes all the logical | |
683 | * and visual indexes within this sequence. They are logically and | |
684 | * visually contiguous and in the same range. | |
685 | * | |
686 | * For each run, the new visual index=sos+eos-old visual index; | |
687 | * we pre-add sos+eos into sumOfSosEos -> | |
688 | * new visual index=sumOfSosEos-old visual index; | |
689 | */ | |
690 | sumOfSosEos=start+limit-1; | |
691 | ||
692 | /* reorder each index in the sequence */ | |
693 | do { | |
694 | indexMap[start]=sumOfSosEos-indexMap[start]; | |
695 | } while(++start<limit); | |
696 | ||
697 | /* start==limit */ | |
698 | if(limit==length) { | |
699 | break; /* no more such sequences */ | |
700 | } else { | |
701 | start=limit+1; | |
702 | } | |
703 | } | |
704 | } while(--maxLevel>=minLevel); | |
705 | } | |
706 | ||
707 | U_CAPI void U_EXPORT2 | |
708 | ubidi_reorderVisual(const UBiDiLevel *levels, int32_t length, int32_t *indexMap) { | |
709 | int32_t start, end, limit, temp; | |
710 | UBiDiLevel minLevel, maxLevel; | |
711 | ||
712 | if(indexMap==NULL || !prepareReorder(levels, length, indexMap, &minLevel, &maxLevel)) { | |
713 | return; | |
714 | } | |
715 | ||
716 | /* nothing to do? */ | |
717 | if(minLevel==maxLevel && (minLevel&1)==0) { | |
718 | return; | |
719 | } | |
720 | ||
721 | /* reorder only down to the lowest odd level */ | |
722 | minLevel|=1; | |
723 | ||
724 | /* loop maxLevel..minLevel */ | |
725 | do { | |
726 | start=0; | |
727 | ||
728 | /* loop for all sequences of levels to reorder at the current maxLevel */ | |
729 | for(;;) { | |
730 | /* look for a sequence of levels that are all at >=maxLevel */ | |
731 | /* look for the first index of such a sequence */ | |
732 | while(start<length && levels[start]<maxLevel) { | |
733 | ++start; | |
734 | } | |
735 | if(start>=length) { | |
736 | break; /* no more such runs */ | |
737 | } | |
738 | ||
739 | /* look for the limit of such a sequence (the index behind it) */ | |
740 | for(limit=start; ++limit<length && levels[limit]>=maxLevel;) {} | |
741 | ||
742 | /* | |
743 | * Swap the entire interval of indexes from start to limit-1. | |
744 | * We don't need to swap the levels for the purpose of this | |
745 | * algorithm: the sequence of levels that we look at does not | |
746 | * move anyway. | |
747 | */ | |
748 | end=limit-1; | |
749 | while(start<end) { | |
750 | temp=indexMap[start]; | |
751 | indexMap[start]=indexMap[end]; | |
752 | indexMap[end]=temp; | |
753 | ||
754 | ++start; | |
755 | --end; | |
756 | } | |
757 | ||
758 | if(limit==length) { | |
759 | break; /* no more such sequences */ | |
760 | } else { | |
761 | start=limit+1; | |
762 | } | |
763 | } | |
764 | } while(--maxLevel>=minLevel); | |
765 | } | |
766 | ||
767 | static UBool | |
768 | prepareReorder(const UBiDiLevel *levels, int32_t length, | |
769 | int32_t *indexMap, | |
770 | UBiDiLevel *pMinLevel, UBiDiLevel *pMaxLevel) { | |
771 | int32_t start; | |
772 | UBiDiLevel level, minLevel, maxLevel; | |
773 | ||
774 | if(levels==NULL || length<=0) { | |
775 | return FALSE; | |
776 | } | |
777 | ||
778 | /* determine minLevel and maxLevel */ | |
779 | minLevel=UBIDI_MAX_EXPLICIT_LEVEL+1; | |
780 | maxLevel=0; | |
781 | for(start=length; start>0;) { | |
782 | level=levels[--start]; | |
783 | if(level>UBIDI_MAX_EXPLICIT_LEVEL+1) { | |
784 | return FALSE; | |
785 | } | |
786 | if(level<minLevel) { | |
787 | minLevel=level; | |
788 | } | |
789 | if(level>maxLevel) { | |
790 | maxLevel=level; | |
791 | } | |
792 | } | |
793 | *pMinLevel=minLevel; | |
794 | *pMaxLevel=maxLevel; | |
795 | ||
796 | /* initialize the index map */ | |
797 | for(start=length; start>0;) { | |
798 | --start; | |
799 | indexMap[start]=start; | |
800 | } | |
801 | ||
802 | return TRUE; | |
803 | } | |
804 | ||
805 | /* API functions for logical<->visual mapping ------------------------------- */ | |
806 | ||
807 | U_CAPI int32_t U_EXPORT2 | |
808 | ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode) { | |
809 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | |
810 | return 0; | |
811 | } else if(pBiDi==NULL) { | |
812 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
813 | return 0; | |
814 | } else if(logicalIndex<0 || pBiDi->length<=logicalIndex) { | |
815 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; | |
816 | return 0; | |
817 | } else { | |
818 | /* we can do the trivial cases without the runs array */ | |
819 | switch(pBiDi->direction) { | |
820 | case UBIDI_LTR: | |
821 | return logicalIndex; | |
822 | case UBIDI_RTL: | |
823 | return pBiDi->length-logicalIndex-1; | |
824 | default: | |
825 | if(pBiDi->runCount<0 && !ubidi_getRuns(pBiDi)) { | |
826 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | |
827 | return 0; | |
828 | } else { | |
829 | Run *runs=pBiDi->runs; | |
830 | int32_t i, visualStart=0, offset, length; | |
831 | ||
832 | /* linear search for the run, search on the visual runs */ | |
833 | for(i=0;; ++i) { | |
834 | length=runs[i].visualLimit-visualStart; | |
835 | offset=logicalIndex-GET_INDEX(runs[i].logicalStart); | |
836 | if(offset>=0 && offset<length) { | |
837 | if(IS_EVEN_RUN(runs[i].logicalStart)) { | |
838 | /* LTR */ | |
839 | return visualStart+offset; | |
840 | } else { | |
841 | /* RTL */ | |
842 | return visualStart+length-offset-1; | |
843 | } | |
844 | } | |
845 | visualStart+=length; | |
846 | } | |
847 | } | |
848 | } | |
849 | } | |
850 | } | |
851 | ||
852 | U_CAPI int32_t U_EXPORT2 | |
853 | ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode) { | |
854 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | |
855 | return 0; | |
856 | } else if(pBiDi==NULL) { | |
857 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
858 | return 0; | |
859 | } else if(visualIndex<0 || pBiDi->length<=visualIndex) { | |
860 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; | |
861 | return 0; | |
862 | } else { | |
863 | /* we can do the trivial cases without the runs array */ | |
864 | switch(pBiDi->direction) { | |
865 | case UBIDI_LTR: | |
866 | return visualIndex; | |
867 | case UBIDI_RTL: | |
868 | return pBiDi->length-visualIndex-1; | |
869 | default: | |
870 | if(pBiDi->runCount<0 && !ubidi_getRuns(pBiDi)) { | |
871 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | |
872 | return 0; | |
873 | } else { | |
874 | Run *runs=pBiDi->runs; | |
875 | int32_t i, runCount=pBiDi->runCount, start; | |
876 | ||
877 | if(runCount<=10) { | |
878 | /* linear search for the run */ | |
879 | for(i=0; visualIndex>=runs[i].visualLimit; ++i) {} | |
880 | } else { | |
881 | /* binary search for the run */ | |
882 | int32_t begin=0, limit=runCount; | |
883 | ||
884 | /* the middle if() will guaranteed find the run, we don't need a loop limit */ | |
885 | for(;;) { | |
886 | i=(begin+limit)/2; | |
887 | if(visualIndex>=runs[i].visualLimit) { | |
888 | begin=i+1; | |
889 | } else if(i==0 || visualIndex>=runs[i-1].visualLimit) { | |
890 | break; | |
891 | } else { | |
892 | limit=i; | |
893 | } | |
894 | } | |
895 | } | |
896 | ||
897 | start=runs[i].logicalStart; | |
898 | if(IS_EVEN_RUN(start)) { | |
899 | /* LTR */ | |
900 | /* the offset in runs[i] is visualIndex-runs[i-1].visualLimit */ | |
901 | if(i>0) { | |
902 | visualIndex-=runs[i-1].visualLimit; | |
903 | } | |
904 | return GET_INDEX(start)+visualIndex; | |
905 | } else { | |
906 | /* RTL */ | |
907 | return GET_INDEX(start)+runs[i].visualLimit-visualIndex-1; | |
908 | } | |
909 | } | |
910 | } | |
911 | } | |
912 | } | |
913 | ||
914 | U_CAPI void U_EXPORT2 | |
915 | ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) { | |
916 | UBiDiLevel *levels; | |
917 | ||
918 | /* ubidi_getLevels() checks all of its and our arguments */ | |
919 | if((levels=(UBiDiLevel *)ubidi_getLevels(pBiDi, pErrorCode))==NULL) { | |
920 | /* no op */ | |
921 | } else if(indexMap==NULL) { | |
922 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
923 | } else { | |
924 | ubidi_reorderLogical(levels, pBiDi->length, indexMap); | |
925 | } | |
926 | } | |
927 | ||
928 | U_CAPI void U_EXPORT2 | |
929 | ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) { | |
930 | /* ubidi_countRuns() checks all of its and our arguments */ | |
931 | if(ubidi_countRuns(pBiDi, pErrorCode)<=0) { | |
932 | /* no op */ | |
933 | } else if(indexMap==NULL) { | |
934 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
935 | } else { | |
936 | /* fill a visual-to-logical index map using the runs[] */ | |
937 | Run *runs=pBiDi->runs, *runsLimit=runs+pBiDi->runCount; | |
938 | int32_t logicalStart, visualStart, visualLimit; | |
939 | ||
940 | visualStart=0; | |
941 | for(; runs<runsLimit; ++runs) { | |
942 | logicalStart=runs->logicalStart; | |
943 | visualLimit=runs->visualLimit; | |
944 | if(IS_EVEN_RUN(logicalStart)) { | |
945 | do { /* LTR */ | |
946 | *indexMap++ = logicalStart++; | |
947 | } while(++visualStart<visualLimit); | |
948 | } else { | |
949 | REMOVE_ODD_BIT(logicalStart); | |
950 | logicalStart+=visualLimit-visualStart; /* logicalLimit */ | |
951 | do { /* RTL */ | |
952 | *indexMap++ = --logicalStart; | |
953 | } while(++visualStart<visualLimit); | |
954 | } | |
955 | /* visualStart==visualLimit; */ | |
956 | } | |
957 | } | |
958 | } | |
959 | ||
960 | U_CAPI void U_EXPORT2 | |
961 | ubidi_invertMap(const int32_t *srcMap, int32_t *destMap, int32_t length) { | |
962 | if(srcMap!=NULL && destMap!=NULL) { | |
963 | srcMap+=length; | |
964 | while(length>0) { | |
965 | destMap[*--srcMap]=--length; | |
966 | } | |
967 | } | |
968 | } |