]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ****************************************************************************** | |
3 | * | |
374ca955 | 4 | * Copyright (C) 1999-2003, International Business Machines |
b75a7d8f A |
5 | * Corporation and others. All Rights Reserved. |
6 | * | |
7 | ****************************************************************************** | |
8 | * file name: ubidiln.c | |
9 | * encoding: US-ASCII | |
10 | * tab size: 8 (not used) | |
11 | * indentation:4 | |
12 | * | |
13 | * created on: 1999aug06 | |
14 | * created by: Markus W. Scherer | |
15 | */ | |
16 | ||
17 | /* set import/export definitions */ | |
18 | #ifndef U_COMMON_IMPLEMENTATION | |
19 | # define U_COMMON_IMPLEMENTATION | |
20 | #endif | |
21 | ||
22 | #include "cmemory.h" | |
23 | #include "unicode/utypes.h" | |
24 | #include "unicode/ustring.h" | |
25 | #include "unicode/uchar.h" | |
26 | #include "unicode/ubidi.h" | |
27 | #include "ubidiimp.h" | |
28 | ||
29 | /* | |
30 | * General remarks about the functions in this file: | |
31 | * | |
32 | * These functions deal with the aspects of potentially mixed-directional | |
33 | * text in a single paragraph or in a line of a single paragraph | |
34 | * which has already been processed according to | |
35 | * the Unicode 3.0 BiDi algorithm as defined in | |
36 | * http://www.unicode.org/unicode/reports/tr9/ , version 5, | |
37 | * also described in The Unicode Standard, Version 3.0 . | |
38 | * | |
39 | * This means that there is a UBiDi object with a levels | |
40 | * and a dirProps array. | |
41 | * paraLevel and direction are also set. | |
42 | * Only if the length of the text is zero, then levels==dirProps==NULL. | |
43 | * | |
44 | * The overall directionality of the paragraph | |
45 | * or line is used to bypass the reordering steps if possible. | |
46 | * Even purely RTL text does not need reordering there because | |
47 | * the ubidi_getLogical/VisualIndex() functions can compute the | |
48 | * index on the fly in such a case. | |
49 | * | |
50 | * The implementation of the access to same-level-runs and of the reordering | |
51 | * do attempt to provide better performance and less memory usage compared to | |
52 | * a direct implementation of especially rule (L2) with an array of | |
53 | * one (32-bit) integer per text character. | |
54 | * | |
55 | * Here, the levels array is scanned as soon as necessary, and a vector of | |
56 | * same-level-runs is created. Reordering then is done on this vector. | |
57 | * For each run of text positions that were resolved to the same level, | |
58 | * only 8 bytes are stored: the first text position of the run and the visual | |
59 | * position behind the run after reordering. | |
60 | * One sign bit is used to hold the directionality of the run. | |
61 | * This is inefficient if there are many very short runs. If the average run | |
62 | * length is <2, then this uses more memory. | |
63 | * | |
64 | * In a further attempt to save memory, the levels array is never changed | |
65 | * after all the resolution rules (Xn, Wn, Nn, In). | |
66 | * Many functions have to consider the field trailingWSStart: | |
67 | * if it is less than length, then there is an implicit trailing run | |
68 | * at the paraLevel, | |
69 | * which is not reflected in the levels array. | |
70 | * This allows a line UBiDi object to use the same levels array as | |
71 | * its paragraph parent object. | |
72 | * | |
73 | * When a UBiDi object is created for a line of a paragraph, then the | |
74 | * paragraph's levels and dirProps arrays are reused by way of setting | |
75 | * a pointer into them, not by copying. This again saves memory and forbids to | |
76 | * change the now shared levels for (L1). | |
77 | */ | |
78 | ||
374ca955 | 79 | /* handle trailing WS (L1) -------------------------------------------------- */ |
b75a7d8f | 80 | |
374ca955 A |
81 | /* |
82 | * setTrailingWSStart() sets the start index for a trailing | |
83 | * run of WS in the line. This is necessary because we do not modify | |
84 | * the paragraph's levels array that we just point into. | |
85 | * Using trailingWSStart is another form of performing (L1). | |
86 | * | |
87 | * To make subsequent operations easier, we also include the run | |
88 | * before the WS if it is at the paraLevel - we merge the two here. | |
89 | */ | |
b75a7d8f | 90 | static void |
374ca955 A |
91 | setTrailingWSStart(UBiDi *pBiDi) { |
92 | /* pBiDi->direction!=UBIDI_MIXED */ | |
b75a7d8f | 93 | |
374ca955 A |
94 | const DirProp *dirProps=pBiDi->dirProps; |
95 | UBiDiLevel *levels=pBiDi->levels; | |
96 | int32_t start=pBiDi->length; | |
97 | UBiDiLevel paraLevel=pBiDi->paraLevel; | |
b75a7d8f | 98 | |
374ca955 A |
99 | /* go backwards across all WS, BN, explicit codes */ |
100 | while(start>0 && DIRPROP_FLAG(dirProps[start-1])&MASK_WS) { | |
101 | --start; | |
102 | } | |
b75a7d8f | 103 | |
374ca955 A |
104 | /* if the WS run can be merged with the previous run then do so here */ |
105 | while(start>0 && levels[start-1]==paraLevel) { | |
106 | --start; | |
107 | } | |
108 | ||
109 | pBiDi->trailingWSStart=start; | |
110 | } | |
b75a7d8f A |
111 | |
112 | /* ubidi_setLine ------------------------------------------------------------ */ | |
113 | ||
114 | U_CAPI void U_EXPORT2 | |
115 | ubidi_setLine(const UBiDi *pParaBiDi, | |
116 | int32_t start, int32_t limit, | |
117 | UBiDi *pLineBiDi, | |
118 | UErrorCode *pErrorCode) { | |
119 | int32_t length; | |
120 | ||
121 | /* check the argument values */ | |
122 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | |
123 | return; | |
124 | } else if(pParaBiDi==NULL || pLineBiDi==NULL) { | |
125 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
126 | return; | |
127 | } else if(start<0 || start>limit || limit>pParaBiDi->length) { | |
128 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; | |
129 | return; | |
130 | } | |
131 | ||
132 | /* set the values in pLineBiDi from its pParaBiDi parent */ | |
133 | pLineBiDi->text=pParaBiDi->text+start; | |
134 | length=pLineBiDi->length=limit-start; | |
135 | pLineBiDi->paraLevel=pParaBiDi->paraLevel; | |
136 | ||
137 | pLineBiDi->runs=NULL; | |
138 | pLineBiDi->flags=0; | |
139 | ||
140 | if(length>0) { | |
141 | pLineBiDi->dirProps=pParaBiDi->dirProps+start; | |
142 | pLineBiDi->levels=pParaBiDi->levels+start; | |
143 | pLineBiDi->runCount=-1; | |
144 | ||
145 | if(pParaBiDi->direction!=UBIDI_MIXED) { | |
146 | /* the parent is already trivial */ | |
147 | pLineBiDi->direction=pParaBiDi->direction; | |
148 | ||
149 | /* | |
150 | * The parent's levels are all either | |
151 | * implicitly or explicitly ==paraLevel; | |
152 | * do the same here. | |
153 | */ | |
154 | if(pParaBiDi->trailingWSStart<=start) { | |
155 | pLineBiDi->trailingWSStart=0; | |
156 | } else if(pParaBiDi->trailingWSStart<limit) { | |
157 | pLineBiDi->trailingWSStart=pParaBiDi->trailingWSStart-start; | |
158 | } else { | |
159 | pLineBiDi->trailingWSStart=length; | |
160 | } | |
161 | } else { | |
162 | const UBiDiLevel *levels=pLineBiDi->levels; | |
163 | int32_t i, trailingWSStart; | |
164 | UBiDiLevel level; | |
165 | ||
166 | setTrailingWSStart(pLineBiDi); | |
167 | trailingWSStart=pLineBiDi->trailingWSStart; | |
168 | ||
169 | /* recalculate pLineBiDi->direction */ | |
170 | if(trailingWSStart==0) { | |
171 | /* all levels are at paraLevel */ | |
172 | pLineBiDi->direction=(UBiDiDirection)(pLineBiDi->paraLevel&1); | |
173 | } else { | |
174 | /* get the level of the first character */ | |
175 | level=(UBiDiLevel)(levels[0]&1); | |
176 | ||
177 | /* if there is anything of a different level, then the line is mixed */ | |
178 | if(trailingWSStart<length && (pLineBiDi->paraLevel&1)!=level) { | |
179 | /* the trailing WS is at paraLevel, which differs from levels[0] */ | |
180 | pLineBiDi->direction=UBIDI_MIXED; | |
181 | } else { | |
182 | /* see if levels[1..trailingWSStart-1] have the same direction as levels[0] and paraLevel */ | |
183 | i=1; | |
184 | for(;;) { | |
185 | if(i==trailingWSStart) { | |
186 | /* the direction values match those in level */ | |
187 | pLineBiDi->direction=(UBiDiDirection)level; | |
188 | break; | |
189 | } else if((levels[i]&1)!=level) { | |
190 | pLineBiDi->direction=UBIDI_MIXED; | |
191 | break; | |
192 | } | |
193 | ++i; | |
194 | } | |
195 | } | |
196 | } | |
197 | ||
198 | switch(pLineBiDi->direction) { | |
199 | case UBIDI_LTR: | |
200 | /* make sure paraLevel is even */ | |
201 | pLineBiDi->paraLevel=(UBiDiLevel)((pLineBiDi->paraLevel+1)&~1); | |
202 | ||
203 | /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ | |
204 | pLineBiDi->trailingWSStart=0; | |
205 | break; | |
206 | case UBIDI_RTL: | |
207 | /* make sure paraLevel is odd */ | |
208 | pLineBiDi->paraLevel|=1; | |
209 | ||
210 | /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ | |
211 | pLineBiDi->trailingWSStart=0; | |
212 | break; | |
213 | default: | |
214 | break; | |
215 | } | |
216 | } | |
217 | } else { | |
218 | /* create an object for a zero-length line */ | |
219 | pLineBiDi->direction=pLineBiDi->paraLevel&1 ? UBIDI_RTL : UBIDI_LTR; | |
220 | pLineBiDi->trailingWSStart=pLineBiDi->runCount=0; | |
221 | ||
222 | pLineBiDi->dirProps=NULL; | |
223 | pLineBiDi->levels=NULL; | |
224 | } | |
225 | return; | |
226 | } | |
227 | ||
228 | U_CAPI UBiDiLevel U_EXPORT2 | |
229 | ubidi_getLevelAt(const UBiDi *pBiDi, int32_t charIndex) { | |
230 | /* return paraLevel if in the trailing WS run, otherwise the real level */ | |
231 | if(pBiDi==NULL || charIndex<0 || pBiDi->length<=charIndex) { | |
232 | return 0; | |
233 | } else if(pBiDi->direction!=UBIDI_MIXED || charIndex>=pBiDi->trailingWSStart) { | |
234 | return pBiDi->paraLevel; | |
235 | } else { | |
236 | return pBiDi->levels[charIndex]; | |
237 | } | |
238 | } | |
239 | ||
240 | U_CAPI const UBiDiLevel * U_EXPORT2 | |
241 | ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { | |
242 | int32_t start, length; | |
243 | ||
244 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | |
245 | return NULL; | |
246 | } else if(pBiDi==NULL || (length=pBiDi->length)<=0) { | |
247 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
248 | return NULL; | |
249 | } | |
250 | ||
251 | if((start=pBiDi->trailingWSStart)==length) { | |
252 | /* the current levels array reflects the WS run */ | |
253 | return pBiDi->levels; | |
254 | } | |
255 | ||
256 | /* | |
257 | * After the previous if(), we know that the levels array | |
258 | * has an implicit trailing WS run and therefore does not fully | |
259 | * reflect itself all the levels. | |
260 | * This must be a UBiDi object for a line, and | |
261 | * we need to create a new levels array. | |
262 | */ | |
263 | ||
264 | if(getLevelsMemory(pBiDi, length)) { | |
265 | UBiDiLevel *levels=pBiDi->levelsMemory; | |
266 | ||
267 | if(start>0 && levels!=pBiDi->levels) { | |
268 | uprv_memcpy(levels, pBiDi->levels, start); | |
269 | } | |
270 | uprv_memset(levels+start, pBiDi->paraLevel, length-start); | |
271 | ||
272 | /* this new levels array is set for the line and reflects the WS run */ | |
273 | pBiDi->trailingWSStart=length; | |
274 | return pBiDi->levels=levels; | |
275 | } else { | |
276 | /* out of memory */ | |
277 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | |
278 | return NULL; | |
279 | } | |
280 | } | |
281 | ||
282 | U_CAPI void U_EXPORT2 | |
283 | ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalStart, | |
284 | int32_t *pLogicalLimit, UBiDiLevel *pLevel) { | |
285 | int32_t length; | |
286 | ||
287 | if(pBiDi==NULL || logicalStart<0 || (length=pBiDi->length)<=logicalStart) { | |
288 | return; | |
289 | } | |
290 | ||
291 | if(pBiDi->direction!=UBIDI_MIXED || logicalStart>=pBiDi->trailingWSStart) { | |
292 | if(pLogicalLimit!=NULL) { | |
293 | *pLogicalLimit=length; | |
294 | } | |
295 | if(pLevel!=NULL) { | |
296 | *pLevel=pBiDi->paraLevel; | |
297 | } | |
298 | } else { | |
299 | UBiDiLevel *levels=pBiDi->levels; | |
300 | UBiDiLevel level=levels[logicalStart]; | |
301 | ||
302 | /* search for the end of the run */ | |
303 | length=pBiDi->trailingWSStart; | |
304 | while(++logicalStart<length && level==levels[logicalStart]) {} | |
305 | ||
306 | if(pLogicalLimit!=NULL) { | |
307 | *pLogicalLimit=logicalStart; | |
308 | } | |
309 | if(pLevel!=NULL) { | |
310 | *pLevel=level; | |
311 | } | |
312 | } | |
313 | } | |
314 | ||
b75a7d8f A |
315 | /* runs API functions ------------------------------------------------------- */ |
316 | ||
317 | U_CAPI int32_t U_EXPORT2 | |
318 | ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode) { | |
319 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | |
320 | return -1; | |
321 | } else if(pBiDi==NULL || (pBiDi->runCount<0 && !ubidi_getRuns(pBiDi))) { | |
322 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | |
323 | return -1; | |
324 | } else { | |
325 | return pBiDi->runCount; | |
326 | } | |
327 | } | |
328 | ||
329 | U_CAPI UBiDiDirection U_EXPORT2 | |
330 | ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex, | |
331 | int32_t *pLogicalStart, int32_t *pLength) { | |
332 | if( pBiDi==NULL || runIndex<0 || | |
333 | (pBiDi->runCount==-1 && !ubidi_getRuns(pBiDi)) || | |
334 | runIndex>=pBiDi->runCount | |
335 | ) { | |
336 | return UBIDI_LTR; | |
337 | } else { | |
338 | int32_t start=pBiDi->runs[runIndex].logicalStart; | |
339 | if(pLogicalStart!=NULL) { | |
340 | *pLogicalStart=GET_INDEX(start); | |
341 | } | |
342 | if(pLength!=NULL) { | |
343 | if(runIndex>0) { | |
344 | *pLength=pBiDi->runs[runIndex].visualLimit- | |
345 | pBiDi->runs[runIndex-1].visualLimit; | |
346 | } else { | |
347 | *pLength=pBiDi->runs[0].visualLimit; | |
348 | } | |
349 | } | |
350 | return (UBiDiDirection)GET_ODD_BIT(start); | |
351 | } | |
352 | } | |
353 | ||
374ca955 A |
354 | /* in trivial cases there is only one trivial run; called by ubidi_getRuns() */ |
355 | static void | |
356 | getSingleRun(UBiDi *pBiDi, UBiDiLevel level) { | |
357 | /* simple, single-run case */ | |
358 | pBiDi->runs=pBiDi->simpleRuns; | |
359 | pBiDi->runCount=1; | |
360 | ||
361 | /* fill and reorder the single run */ | |
362 | pBiDi->runs[0].logicalStart=MAKE_INDEX_ODD_PAIR(0, level); | |
363 | pBiDi->runs[0].visualLimit=pBiDi->length; | |
364 | } | |
365 | ||
366 | /* reorder the runs array (L2) ---------------------------------------------- */ | |
367 | ||
368 | /* | |
369 | * Reorder the same-level runs in the runs array. | |
370 | * Here, runCount>1 and maxLevel>=minLevel>=paraLevel. | |
371 | * All the visualStart fields=logical start before reordering. | |
372 | * The "odd" bits are not set yet. | |
373 | * | |
374 | * Reordering with this data structure lends itself to some handy shortcuts: | |
375 | * | |
376 | * Since each run is moved but not modified, and since at the initial maxLevel | |
377 | * each sequence of same-level runs consists of only one run each, we | |
378 | * don't need to do anything there and can predecrement maxLevel. | |
379 | * In many simple cases, the reordering is thus done entirely in the | |
380 | * index mapping. | |
381 | * Also, reordering occurs only down to the lowest odd level that occurs, | |
382 | * which is minLevel|1. However, if the lowest level itself is odd, then | |
383 | * in the last reordering the sequence of the runs at this level or higher | |
384 | * will be all runs, and we don't need the elaborate loop to search for them. | |
385 | * This is covered by ++minLevel instead of minLevel|=1 followed | |
386 | * by an extra reorder-all after the reorder-some loop. | |
387 | * About a trailing WS run: | |
388 | * Such a run would need special treatment because its level is not | |
389 | * reflected in levels[] if this is not a paragraph object. | |
390 | * Instead, all characters from trailingWSStart on are implicitly at | |
391 | * paraLevel. | |
392 | * However, for all maxLevel>paraLevel, this run will never be reordered | |
393 | * and does not need to be taken into account. maxLevel==paraLevel is only reordered | |
394 | * if minLevel==paraLevel is odd, which is done in the extra segment. | |
395 | * This means that for the main reordering loop we don't need to consider | |
396 | * this run and can --runCount. If it is later part of the all-runs | |
397 | * reordering, then runCount is adjusted accordingly. | |
398 | */ | |
399 | static void | |
400 | reorderLine(UBiDi *pBiDi, UBiDiLevel minLevel, UBiDiLevel maxLevel) { | |
401 | Run *runs; | |
402 | UBiDiLevel *levels; | |
403 | int32_t firstRun, endRun, limitRun, runCount, | |
404 | temp; | |
405 | ||
406 | /* nothing to do? */ | |
407 | if(maxLevel<=(minLevel|1)) { | |
408 | return; | |
409 | } | |
410 | ||
411 | /* | |
412 | * Reorder only down to the lowest odd level | |
413 | * and reorder at an odd minLevel in a separate, simpler loop. | |
414 | * See comments above for why minLevel is always incremented. | |
415 | */ | |
416 | ++minLevel; | |
417 | ||
418 | runs=pBiDi->runs; | |
419 | levels=pBiDi->levels; | |
420 | runCount=pBiDi->runCount; | |
421 | ||
422 | /* do not include the WS run at paraLevel<=old minLevel except in the simple loop */ | |
423 | if(pBiDi->trailingWSStart<pBiDi->length) { | |
424 | --runCount; | |
425 | } | |
426 | ||
427 | while(--maxLevel>=minLevel) { | |
428 | firstRun=0; | |
429 | ||
430 | /* loop for all sequences of runs */ | |
431 | for(;;) { | |
432 | /* look for a sequence of runs that are all at >=maxLevel */ | |
433 | /* look for the first run of such a sequence */ | |
434 | while(firstRun<runCount && levels[runs[firstRun].logicalStart]<maxLevel) { | |
435 | ++firstRun; | |
436 | } | |
437 | if(firstRun>=runCount) { | |
438 | break; /* no more such runs */ | |
439 | } | |
440 | ||
441 | /* look for the limit run of such a sequence (the run behind it) */ | |
442 | for(limitRun=firstRun; ++limitRun<runCount && levels[runs[limitRun].logicalStart]>=maxLevel;) {} | |
443 | ||
444 | /* Swap the entire sequence of runs from firstRun to limitRun-1. */ | |
445 | endRun=limitRun-1; | |
446 | while(firstRun<endRun) { | |
447 | temp=runs[firstRun].logicalStart; | |
448 | runs[firstRun].logicalStart=runs[endRun].logicalStart; | |
449 | runs[endRun].logicalStart=temp; | |
450 | ||
451 | temp=runs[firstRun].visualLimit; | |
452 | runs[firstRun].visualLimit=runs[endRun].visualLimit; | |
453 | runs[endRun].visualLimit=temp; | |
454 | ||
455 | ++firstRun; | |
456 | --endRun; | |
457 | } | |
458 | ||
459 | if(limitRun==runCount) { | |
460 | break; /* no more such runs */ | |
461 | } else { | |
462 | firstRun=limitRun+1; | |
463 | } | |
464 | } | |
465 | } | |
466 | ||
467 | /* now do maxLevel==old minLevel (==odd!), see above */ | |
468 | if(!(minLevel&1)) { | |
469 | firstRun=0; | |
470 | ||
471 | /* include the trailing WS run in this complete reordering */ | |
472 | if(pBiDi->trailingWSStart==pBiDi->length) { | |
473 | --runCount; | |
474 | } | |
475 | ||
476 | /* Swap the entire sequence of all runs. (endRun==runCount) */ | |
477 | while(firstRun<runCount) { | |
478 | temp=runs[firstRun].logicalStart; | |
479 | runs[firstRun].logicalStart=runs[runCount].logicalStart; | |
480 | runs[runCount].logicalStart=temp; | |
481 | ||
482 | temp=runs[firstRun].visualLimit; | |
483 | runs[firstRun].visualLimit=runs[runCount].visualLimit; | |
484 | runs[runCount].visualLimit=temp; | |
485 | ||
486 | ++firstRun; | |
487 | --runCount; | |
488 | } | |
489 | } | |
490 | } | |
491 | ||
b75a7d8f A |
492 | /* compute the runs array --------------------------------------------------- */ |
493 | ||
494 | /* | |
495 | * Compute the runs array from the levels array. | |
496 | * After ubidi_getRuns() returns TRUE, runCount is guaranteed to be >0 | |
497 | * and the runs are reordered. | |
498 | * Odd-level runs have visualStart on their visual right edge and | |
499 | * they progress visually to the left. | |
500 | */ | |
501 | U_CFUNC UBool | |
502 | ubidi_getRuns(UBiDi *pBiDi) { | |
503 | if(pBiDi->direction!=UBIDI_MIXED) { | |
504 | /* simple, single-run case - this covers length==0 */ | |
505 | getSingleRun(pBiDi, pBiDi->paraLevel); | |
506 | } else /* UBIDI_MIXED, length>0 */ { | |
507 | /* mixed directionality */ | |
508 | int32_t length=pBiDi->length, limit; | |
509 | ||
510 | /* | |
511 | * If there are WS characters at the end of the line | |
512 | * and the run preceding them has a level different from | |
513 | * paraLevel, then they will form their own run at paraLevel (L1). | |
514 | * Count them separately. | |
515 | * We need some special treatment for this in order to not | |
516 | * modify the levels array which a line UBiDi object shares | |
517 | * with its paragraph parent and its other line siblings. | |
518 | * In other words, for the trailing WS, it may be | |
519 | * levels[]!=paraLevel but we have to treat it like it were so. | |
520 | */ | |
521 | limit=pBiDi->trailingWSStart; | |
522 | if(limit==0) { | |
523 | /* there is only WS on this line */ | |
524 | getSingleRun(pBiDi, pBiDi->paraLevel); | |
525 | } else { | |
526 | UBiDiLevel *levels=pBiDi->levels; | |
527 | int32_t i, runCount; | |
528 | UBiDiLevel level=UBIDI_DEFAULT_LTR; /* initialize with no valid level */ | |
529 | ||
530 | /* count the runs, there is at least one non-WS run, and limit>0 */ | |
531 | runCount=0; | |
532 | for(i=0; i<limit; ++i) { | |
533 | /* increment runCount at the start of each run */ | |
534 | if(levels[i]!=level) { | |
535 | ++runCount; | |
536 | level=levels[i]; | |
537 | } | |
538 | } | |
539 | ||
540 | /* | |
541 | * We don't need to see if the last run can be merged with a trailing | |
542 | * WS run because setTrailingWSStart() would have done that. | |
543 | */ | |
544 | if(runCount==1 && limit==length) { | |
545 | /* There is only one non-WS run and no trailing WS-run. */ | |
546 | getSingleRun(pBiDi, levels[0]); | |
547 | } else /* runCount>1 || limit<length */ { | |
548 | /* allocate and set the runs */ | |
549 | Run *runs; | |
550 | int32_t runIndex, start; | |
551 | UBiDiLevel minLevel=UBIDI_MAX_EXPLICIT_LEVEL+1, maxLevel=0; | |
552 | ||
553 | /* now, count a (non-mergable) WS run */ | |
554 | if(limit<length) { | |
555 | ++runCount; | |
556 | } | |
557 | ||
558 | /* runCount>1 */ | |
559 | if(getRunsMemory(pBiDi, runCount)) { | |
560 | runs=pBiDi->runsMemory; | |
561 | } else { | |
562 | return FALSE; | |
563 | } | |
564 | ||
565 | /* set the runs */ | |
566 | /* this could be optimized, e.g.: 464->444, 484->444, 575->555, 595->555 */ | |
567 | /* however, that would take longer and make other functions more complicated */ | |
568 | runIndex=0; | |
569 | ||
570 | /* search for the run limits and initialize visualLimit values with the run lengths */ | |
571 | i=0; | |
572 | do { | |
573 | /* prepare this run */ | |
574 | start=i; | |
575 | level=levels[i]; | |
576 | if(level<minLevel) { | |
577 | minLevel=level; | |
578 | } | |
579 | if(level>maxLevel) { | |
580 | maxLevel=level; | |
581 | } | |
582 | ||
583 | /* look for the run limit */ | |
584 | while(++i<limit && levels[i]==level) {} | |
585 | ||
586 | /* i is another run limit */ | |
587 | runs[runIndex].logicalStart=start; | |
588 | runs[runIndex].visualLimit=i-start; | |
589 | ++runIndex; | |
590 | } while(i<limit); | |
591 | ||
592 | if(limit<length) { | |
593 | /* there is a separate WS run */ | |
594 | runs[runIndex].logicalStart=limit; | |
595 | runs[runIndex].visualLimit=length-limit; | |
596 | if(pBiDi->paraLevel<minLevel) { | |
597 | minLevel=pBiDi->paraLevel; | |
598 | } | |
599 | } | |
600 | ||
601 | /* set the object fields */ | |
602 | pBiDi->runs=runs; | |
603 | pBiDi->runCount=runCount; | |
604 | ||
605 | reorderLine(pBiDi, minLevel, maxLevel); | |
606 | ||
607 | /* now add the direction flags and adjust the visualLimit's to be just that */ | |
608 | ADD_ODD_BIT_FROM_LEVEL(runs[0].logicalStart, levels[runs[0].logicalStart]); | |
609 | limit=runs[0].visualLimit; | |
374ca955 A |
610 | |
611 | /* this loop will also handle the trailing WS run */ | |
612 | for(i=1; i<runCount; ++i) { | |
b75a7d8f A |
613 | ADD_ODD_BIT_FROM_LEVEL(runs[i].logicalStart, levels[runs[i].logicalStart]); |
614 | limit=runs[i].visualLimit+=limit; | |
615 | } | |
616 | ||
374ca955 A |
617 | /* Set the "odd" bit for the trailing WS run. */ |
618 | /* For a RTL paragraph, it will be the *first* run in visual order. */ | |
b75a7d8f | 619 | if(runIndex<runCount) { |
374ca955 A |
620 | int32_t trailingRun = ((pBiDi->paraLevel & 1) != 0)? 0 : runIndex; |
621 | ||
622 | ADD_ODD_BIT_FROM_LEVEL(runs[trailingRun].logicalStart, pBiDi->paraLevel); | |
b75a7d8f A |
623 | } |
624 | } | |
625 | } | |
626 | } | |
627 | return TRUE; | |
628 | } | |
629 | ||
374ca955 A |
630 | static UBool |
631 | prepareReorder(const UBiDiLevel *levels, int32_t length, | |
632 | int32_t *indexMap, | |
633 | UBiDiLevel *pMinLevel, UBiDiLevel *pMaxLevel) { | |
634 | int32_t start; | |
635 | UBiDiLevel level, minLevel, maxLevel; | |
b75a7d8f | 636 | |
374ca955 A |
637 | if(levels==NULL || length<=0) { |
638 | return FALSE; | |
b75a7d8f A |
639 | } |
640 | ||
374ca955 A |
641 | /* determine minLevel and maxLevel */ |
642 | minLevel=UBIDI_MAX_EXPLICIT_LEVEL+1; | |
643 | maxLevel=0; | |
644 | for(start=length; start>0;) { | |
645 | level=levels[--start]; | |
646 | if(level>UBIDI_MAX_EXPLICIT_LEVEL+1) { | |
647 | return FALSE; | |
b75a7d8f | 648 | } |
374ca955 A |
649 | if(level<minLevel) { |
650 | minLevel=level; | |
b75a7d8f | 651 | } |
374ca955 A |
652 | if(level>maxLevel) { |
653 | maxLevel=level; | |
b75a7d8f A |
654 | } |
655 | } | |
374ca955 A |
656 | *pMinLevel=minLevel; |
657 | *pMaxLevel=maxLevel; | |
658 | ||
659 | /* initialize the index map */ | |
660 | for(start=length; start>0;) { | |
661 | --start; | |
662 | indexMap[start]=start; | |
663 | } | |
664 | ||
665 | return TRUE; | |
b75a7d8f A |
666 | } |
667 | ||
668 | /* reorder a line based on a levels array (L2) ------------------------------ */ | |
669 | ||
670 | U_CAPI void U_EXPORT2 | |
671 | ubidi_reorderLogical(const UBiDiLevel *levels, int32_t length, int32_t *indexMap) { | |
672 | int32_t start, limit, sumOfSosEos; | |
673 | UBiDiLevel minLevel, maxLevel; | |
674 | ||
675 | if(indexMap==NULL || !prepareReorder(levels, length, indexMap, &minLevel, &maxLevel)) { | |
676 | return; | |
677 | } | |
678 | ||
679 | /* nothing to do? */ | |
680 | if(minLevel==maxLevel && (minLevel&1)==0) { | |
681 | return; | |
682 | } | |
683 | ||
684 | /* reorder only down to the lowest odd level */ | |
685 | minLevel|=1; | |
686 | ||
687 | /* loop maxLevel..minLevel */ | |
688 | do { | |
689 | start=0; | |
690 | ||
691 | /* loop for all sequences of levels to reorder at the current maxLevel */ | |
692 | for(;;) { | |
693 | /* look for a sequence of levels that are all at >=maxLevel */ | |
694 | /* look for the first index of such a sequence */ | |
695 | while(start<length && levels[start]<maxLevel) { | |
696 | ++start; | |
697 | } | |
698 | if(start>=length) { | |
699 | break; /* no more such sequences */ | |
700 | } | |
701 | ||
702 | /* look for the limit of such a sequence (the index behind it) */ | |
703 | for(limit=start; ++limit<length && levels[limit]>=maxLevel;) {} | |
704 | ||
705 | /* | |
706 | * sos=start of sequence, eos=end of sequence | |
707 | * | |
708 | * The closed (inclusive) interval from sos to eos includes all the logical | |
709 | * and visual indexes within this sequence. They are logically and | |
710 | * visually contiguous and in the same range. | |
711 | * | |
712 | * For each run, the new visual index=sos+eos-old visual index; | |
713 | * we pre-add sos+eos into sumOfSosEos -> | |
714 | * new visual index=sumOfSosEos-old visual index; | |
715 | */ | |
716 | sumOfSosEos=start+limit-1; | |
717 | ||
718 | /* reorder each index in the sequence */ | |
719 | do { | |
720 | indexMap[start]=sumOfSosEos-indexMap[start]; | |
721 | } while(++start<limit); | |
722 | ||
723 | /* start==limit */ | |
724 | if(limit==length) { | |
725 | break; /* no more such sequences */ | |
726 | } else { | |
727 | start=limit+1; | |
728 | } | |
729 | } | |
730 | } while(--maxLevel>=minLevel); | |
731 | } | |
732 | ||
733 | U_CAPI void U_EXPORT2 | |
734 | ubidi_reorderVisual(const UBiDiLevel *levels, int32_t length, int32_t *indexMap) { | |
735 | int32_t start, end, limit, temp; | |
736 | UBiDiLevel minLevel, maxLevel; | |
737 | ||
738 | if(indexMap==NULL || !prepareReorder(levels, length, indexMap, &minLevel, &maxLevel)) { | |
739 | return; | |
740 | } | |
741 | ||
742 | /* nothing to do? */ | |
743 | if(minLevel==maxLevel && (minLevel&1)==0) { | |
744 | return; | |
745 | } | |
746 | ||
747 | /* reorder only down to the lowest odd level */ | |
748 | minLevel|=1; | |
749 | ||
750 | /* loop maxLevel..minLevel */ | |
751 | do { | |
752 | start=0; | |
753 | ||
754 | /* loop for all sequences of levels to reorder at the current maxLevel */ | |
755 | for(;;) { | |
756 | /* look for a sequence of levels that are all at >=maxLevel */ | |
757 | /* look for the first index of such a sequence */ | |
758 | while(start<length && levels[start]<maxLevel) { | |
759 | ++start; | |
760 | } | |
761 | if(start>=length) { | |
762 | break; /* no more such runs */ | |
763 | } | |
764 | ||
765 | /* look for the limit of such a sequence (the index behind it) */ | |
766 | for(limit=start; ++limit<length && levels[limit]>=maxLevel;) {} | |
767 | ||
768 | /* | |
769 | * Swap the entire interval of indexes from start to limit-1. | |
770 | * We don't need to swap the levels for the purpose of this | |
771 | * algorithm: the sequence of levels that we look at does not | |
772 | * move anyway. | |
773 | */ | |
774 | end=limit-1; | |
775 | while(start<end) { | |
776 | temp=indexMap[start]; | |
777 | indexMap[start]=indexMap[end]; | |
778 | indexMap[end]=temp; | |
779 | ||
780 | ++start; | |
781 | --end; | |
782 | } | |
783 | ||
784 | if(limit==length) { | |
785 | break; /* no more such sequences */ | |
786 | } else { | |
787 | start=limit+1; | |
788 | } | |
789 | } | |
790 | } while(--maxLevel>=minLevel); | |
791 | } | |
792 | ||
b75a7d8f A |
793 | /* API functions for logical<->visual mapping ------------------------------- */ |
794 | ||
795 | U_CAPI int32_t U_EXPORT2 | |
796 | ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode) { | |
797 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | |
798 | return 0; | |
799 | } else if(pBiDi==NULL) { | |
800 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
801 | return 0; | |
802 | } else if(logicalIndex<0 || pBiDi->length<=logicalIndex) { | |
803 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; | |
804 | return 0; | |
805 | } else { | |
806 | /* we can do the trivial cases without the runs array */ | |
807 | switch(pBiDi->direction) { | |
808 | case UBIDI_LTR: | |
809 | return logicalIndex; | |
810 | case UBIDI_RTL: | |
811 | return pBiDi->length-logicalIndex-1; | |
812 | default: | |
813 | if(pBiDi->runCount<0 && !ubidi_getRuns(pBiDi)) { | |
814 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | |
815 | return 0; | |
816 | } else { | |
817 | Run *runs=pBiDi->runs; | |
818 | int32_t i, visualStart=0, offset, length; | |
819 | ||
820 | /* linear search for the run, search on the visual runs */ | |
821 | for(i=0;; ++i) { | |
822 | length=runs[i].visualLimit-visualStart; | |
823 | offset=logicalIndex-GET_INDEX(runs[i].logicalStart); | |
824 | if(offset>=0 && offset<length) { | |
825 | if(IS_EVEN_RUN(runs[i].logicalStart)) { | |
826 | /* LTR */ | |
827 | return visualStart+offset; | |
828 | } else { | |
829 | /* RTL */ | |
830 | return visualStart+length-offset-1; | |
831 | } | |
832 | } | |
833 | visualStart+=length; | |
834 | } | |
835 | } | |
836 | } | |
837 | } | |
838 | } | |
839 | ||
840 | U_CAPI int32_t U_EXPORT2 | |
841 | ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode) { | |
842 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | |
843 | return 0; | |
844 | } else if(pBiDi==NULL) { | |
845 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
846 | return 0; | |
847 | } else if(visualIndex<0 || pBiDi->length<=visualIndex) { | |
848 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; | |
849 | return 0; | |
850 | } else { | |
851 | /* we can do the trivial cases without the runs array */ | |
852 | switch(pBiDi->direction) { | |
853 | case UBIDI_LTR: | |
854 | return visualIndex; | |
855 | case UBIDI_RTL: | |
856 | return pBiDi->length-visualIndex-1; | |
857 | default: | |
858 | if(pBiDi->runCount<0 && !ubidi_getRuns(pBiDi)) { | |
859 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | |
860 | return 0; | |
861 | } else { | |
862 | Run *runs=pBiDi->runs; | |
863 | int32_t i, runCount=pBiDi->runCount, start; | |
864 | ||
865 | if(runCount<=10) { | |
866 | /* linear search for the run */ | |
867 | for(i=0; visualIndex>=runs[i].visualLimit; ++i) {} | |
868 | } else { | |
869 | /* binary search for the run */ | |
870 | int32_t begin=0, limit=runCount; | |
871 | ||
872 | /* the middle if() will guaranteed find the run, we don't need a loop limit */ | |
873 | for(;;) { | |
874 | i=(begin+limit)/2; | |
875 | if(visualIndex>=runs[i].visualLimit) { | |
876 | begin=i+1; | |
877 | } else if(i==0 || visualIndex>=runs[i-1].visualLimit) { | |
878 | break; | |
879 | } else { | |
880 | limit=i; | |
881 | } | |
882 | } | |
883 | } | |
884 | ||
885 | start=runs[i].logicalStart; | |
886 | if(IS_EVEN_RUN(start)) { | |
887 | /* LTR */ | |
888 | /* the offset in runs[i] is visualIndex-runs[i-1].visualLimit */ | |
889 | if(i>0) { | |
890 | visualIndex-=runs[i-1].visualLimit; | |
891 | } | |
892 | return GET_INDEX(start)+visualIndex; | |
893 | } else { | |
894 | /* RTL */ | |
895 | return GET_INDEX(start)+runs[i].visualLimit-visualIndex-1; | |
896 | } | |
897 | } | |
898 | } | |
899 | } | |
900 | } | |
901 | ||
902 | U_CAPI void U_EXPORT2 | |
903 | ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) { | |
904 | UBiDiLevel *levels; | |
905 | ||
906 | /* ubidi_getLevels() checks all of its and our arguments */ | |
907 | if((levels=(UBiDiLevel *)ubidi_getLevels(pBiDi, pErrorCode))==NULL) { | |
908 | /* no op */ | |
909 | } else if(indexMap==NULL) { | |
910 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
911 | } else { | |
912 | ubidi_reorderLogical(levels, pBiDi->length, indexMap); | |
913 | } | |
914 | } | |
915 | ||
916 | U_CAPI void U_EXPORT2 | |
917 | ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) { | |
918 | /* ubidi_countRuns() checks all of its and our arguments */ | |
919 | if(ubidi_countRuns(pBiDi, pErrorCode)<=0) { | |
920 | /* no op */ | |
921 | } else if(indexMap==NULL) { | |
922 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
923 | } else { | |
924 | /* fill a visual-to-logical index map using the runs[] */ | |
925 | Run *runs=pBiDi->runs, *runsLimit=runs+pBiDi->runCount; | |
926 | int32_t logicalStart, visualStart, visualLimit; | |
927 | ||
928 | visualStart=0; | |
929 | for(; runs<runsLimit; ++runs) { | |
930 | logicalStart=runs->logicalStart; | |
931 | visualLimit=runs->visualLimit; | |
932 | if(IS_EVEN_RUN(logicalStart)) { | |
933 | do { /* LTR */ | |
934 | *indexMap++ = logicalStart++; | |
935 | } while(++visualStart<visualLimit); | |
936 | } else { | |
937 | REMOVE_ODD_BIT(logicalStart); | |
938 | logicalStart+=visualLimit-visualStart; /* logicalLimit */ | |
939 | do { /* RTL */ | |
940 | *indexMap++ = --logicalStart; | |
941 | } while(++visualStart<visualLimit); | |
942 | } | |
943 | /* visualStart==visualLimit; */ | |
944 | } | |
945 | } | |
946 | } | |
947 | ||
948 | U_CAPI void U_EXPORT2 | |
949 | ubidi_invertMap(const int32_t *srcMap, int32_t *destMap, int32_t length) { | |
950 | if(srcMap!=NULL && destMap!=NULL) { | |
951 | srcMap+=length; | |
952 | while(length>0) { | |
953 | destMap[*--srcMap]=--length; | |
954 | } | |
955 | } | |
956 | } |