]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/ubidi.c
ICU-461.12.tar.gz
[apple/icu.git] / icuSources / common / ubidi.c
1 /*
2 ******************************************************************************
3 *
4 * Copyright (C) 1999-2010, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 ******************************************************************************
8 * file name: ubidi.c
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 1999jul27
14 * created by: Markus W. Scherer, updated by Matitiahu Allouche
15 */
16
17 #include "cmemory.h"
18 #include "unicode/utypes.h"
19 #include "unicode/ustring.h"
20 #include "unicode/uchar.h"
21 #include "unicode/ubidi.h"
22 #include "ubidi_props.h"
23 #include "ubidiimp.h"
24 #include "uassert.h"
25
26 /*
27 * General implementation notes:
28 *
29 * Throughout the implementation, there are comments like (W2) that refer to
30 * rules of the BiDi algorithm in its version 5, in this example to the second
31 * rule of the resolution of weak types.
32 *
33 * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
34 * character according to UTF-16, the second UChar gets the directional property of
35 * the entire character assigned, while the first one gets a BN, a boundary
36 * neutral, type, which is ignored by most of the algorithm according to
37 * rule (X9) and the implementation suggestions of the BiDi algorithm.
38 *
39 * Later, adjustWSLevels() will set the level for each BN to that of the
40 * following character (UChar), which results in surrogate pairs getting the
41 * same level on each of their surrogates.
42 *
43 * In a UTF-8 implementation, the same thing could be done: the last byte of
44 * a multi-byte sequence would get the "real" property, while all previous
45 * bytes of that sequence would get BN.
46 *
47 * It is not possible to assign all those parts of a character the same real
48 * property because this would fail in the resolution of weak types with rules
49 * that look at immediately surrounding types.
50 *
51 * As a related topic, this implementation does not remove Boundary Neutral
52 * types from the input, but ignores them wherever this is relevant.
53 * For example, the loop for the resolution of the weak types reads
54 * types until it finds a non-BN.
55 * Also, explicit embedding codes are neither changed into BN nor removed.
56 * They are only treated the same way real BNs are.
57 * As stated before, adjustWSLevels() takes care of them at the end.
58 * For the purpose of conformance, the levels of all these codes
59 * do not matter.
60 *
61 * Note that this implementation never modifies the dirProps
62 * after the initial setup.
63 *
64 *
65 * In this implementation, the resolution of weak types (Wn),
66 * neutrals (Nn), and the assignment of the resolved level (In)
67 * are all done in one single loop, in resolveImplicitLevels().
68 * Changes of dirProp values are done on the fly, without writing
69 * them back to the dirProps array.
70 *
71 *
72 * This implementation contains code that allows to bypass steps of the
73 * algorithm that are not needed on the specific paragraph
74 * in order to speed up the most common cases considerably,
75 * like text that is entirely LTR, or RTL text without numbers.
76 *
77 * Most of this is done by setting a bit for each directional property
78 * in a flags variable and later checking for whether there are
79 * any LTR characters or any RTL characters, or both, whether
80 * there are any explicit embedding codes, etc.
81 *
82 * If the (Xn) steps are performed, then the flags are re-evaluated,
83 * because they will then not contain the embedding codes any more
84 * and will be adjusted for override codes, so that subsequently
85 * more bypassing may be possible than what the initial flags suggested.
86 *
87 * If the text is not mixed-directional, then the
88 * algorithm steps for the weak type resolution are not performed,
89 * and all levels are set to the paragraph level.
90 *
91 * If there are no explicit embedding codes, then the (Xn) steps
92 * are not performed.
93 *
94 * If embedding levels are supplied as a parameter, then all
95 * explicit embedding codes are ignored, and the (Xn) steps
96 * are not performed.
97 *
98 * White Space types could get the level of the run they belong to,
99 * and are checked with a test of (flags&MASK_EMBEDDING) to
100 * consider if the paragraph direction should be considered in
101 * the flags variable.
102 *
103 * If there are no White Space types in the paragraph, then
104 * (L1) is not necessary in adjustWSLevels().
105 */
106
107 /* to avoid some conditional statements, use tiny constant arrays */
108 static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) };
109 static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) };
110 static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) };
111
112 #define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
113 #define DIRPROP_FLAG_E(level) flagE[(level)&1]
114 #define DIRPROP_FLAG_O(level) flagO[(level)&1]
115
116 /* UBiDi object management -------------------------------------------------- */
117
118 U_CAPI UBiDi * U_EXPORT2
119 ubidi_open(void)
120 {
121 UErrorCode errorCode=U_ZERO_ERROR;
122 return ubidi_openSized(0, 0, &errorCode);
123 }
124
125 U_CAPI UBiDi * U_EXPORT2
126 ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) {
127 UBiDi *pBiDi;
128
129 /* check the argument values */
130 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
131 return NULL;
132 } else if(maxLength<0 || maxRunCount<0) {
133 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
134 return NULL; /* invalid arguments */
135 }
136
137 /* allocate memory for the object */
138 pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi));
139 if(pBiDi==NULL) {
140 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
141 return NULL;
142 }
143
144 /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */
145 uprv_memset(pBiDi, 0, sizeof(UBiDi));
146
147 /* get BiDi properties */
148 pBiDi->bdp=ubidi_getSingleton();
149
150 /* allocate memory for arrays as requested */
151 if(maxLength>0) {
152 if( !getInitialDirPropsMemory(pBiDi, maxLength) ||
153 !getInitialLevelsMemory(pBiDi, maxLength)
154 ) {
155 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
156 }
157 } else {
158 pBiDi->mayAllocateText=TRUE;
159 }
160
161 if(maxRunCount>0) {
162 if(maxRunCount==1) {
163 /* use simpleRuns[] */
164 pBiDi->runsSize=sizeof(Run);
165 } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) {
166 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
167 }
168 } else {
169 pBiDi->mayAllocateRuns=TRUE;
170 }
171
172 if(U_SUCCESS(*pErrorCode)) {
173 return pBiDi;
174 } else {
175 ubidi_close(pBiDi);
176 return NULL;
177 }
178 }
179
180 /*
181 * We are allowed to allocate memory if memory==NULL or
182 * mayAllocate==TRUE for each array that we need.
183 * We also try to grow memory as needed if we
184 * allocate it.
185 *
186 * Assume sizeNeeded>0.
187 * If *pMemory!=NULL, then assume *pSize>0.
188 *
189 * ### this realloc() may unnecessarily copy the old data,
190 * which we know we don't need any more;
191 * is this the best way to do this??
192 */
193 U_CFUNC UBool
194 ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) {
195 void **pMemory = (void **)bidiMem;
196 /* check for existing memory */
197 if(*pMemory==NULL) {
198 /* we need to allocate memory */
199 if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) {
200 *pSize=sizeNeeded;
201 return TRUE;
202 } else {
203 return FALSE;
204 }
205 } else {
206 if(sizeNeeded<=*pSize) {
207 /* there is already enough memory */
208 return TRUE;
209 }
210 else if(!mayAllocate) {
211 /* not enough memory, and we must not allocate */
212 return FALSE;
213 } else {
214 /* we try to grow */
215 void *memory;
216 /* in most cases, we do not need the copy-old-data part of
217 * realloc, but it is needed when adding runs using getRunsMemory()
218 * in setParaRunsOnly()
219 */
220 if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) {
221 *pMemory=memory;
222 *pSize=sizeNeeded;
223 return TRUE;
224 } else {
225 /* we failed to grow */
226 return FALSE;
227 }
228 }
229 }
230 }
231
232 U_CAPI void U_EXPORT2
233 ubidi_close(UBiDi *pBiDi) {
234 if(pBiDi!=NULL) {
235 pBiDi->pParaBiDi=NULL; /* in case one tries to reuse this block */
236 if(pBiDi->dirPropsMemory!=NULL) {
237 uprv_free(pBiDi->dirPropsMemory);
238 }
239 if(pBiDi->levelsMemory!=NULL) {
240 uprv_free(pBiDi->levelsMemory);
241 }
242 if(pBiDi->runsMemory!=NULL) {
243 uprv_free(pBiDi->runsMemory);
244 }
245 if(pBiDi->parasMemory!=NULL) {
246 uprv_free(pBiDi->parasMemory);
247 }
248 if(pBiDi->insertPoints.points!=NULL) {
249 uprv_free(pBiDi->insertPoints.points);
250 }
251
252 uprv_free(pBiDi);
253 }
254 }
255
256 /* set to approximate "inverse BiDi" ---------------------------------------- */
257
258 U_CAPI void U_EXPORT2
259 ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) {
260 if(pBiDi!=NULL) {
261 pBiDi->isInverse=isInverse;
262 pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L
263 : UBIDI_REORDER_DEFAULT;
264 }
265 }
266
267 U_CAPI UBool U_EXPORT2
268 ubidi_isInverse(UBiDi *pBiDi) {
269 if(pBiDi!=NULL) {
270 return pBiDi->isInverse;
271 } else {
272 return FALSE;
273 }
274 }
275
276 /* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
277 * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre
278 * concept of RUNS_ONLY which is a double operation.
279 * It could be advantageous to divide this into 3 concepts:
280 * a) Operation: direct / inverse / RUNS_ONLY
281 * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R
282 * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
283 * This would allow combinations not possible today like RUNS_ONLY with
284 * NUMBERS_SPECIAL.
285 * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
286 * REMOVE_CONTROLS for the inverse step.
287 * Not all combinations would be supported, and probably not all do make sense.
288 * This would need to document which ones are supported and what are the
289 * fallbacks for unsupported combinations.
290 */
291 U_CAPI void U_EXPORT2
292 ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) {
293 if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT)
294 && (reorderingMode < UBIDI_REORDER_COUNT)) {
295 pBiDi->reorderingMode = reorderingMode;
296 pBiDi->isInverse = (UBool)(reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L);
297 }
298 }
299
300 U_CAPI UBiDiReorderingMode U_EXPORT2
301 ubidi_getReorderingMode(UBiDi *pBiDi) {
302 if (pBiDi!=NULL) {
303 return pBiDi->reorderingMode;
304 } else {
305 return UBIDI_REORDER_DEFAULT;
306 }
307 }
308
309 U_CAPI void U_EXPORT2
310 ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) {
311 if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
312 reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
313 }
314 if (pBiDi!=NULL) {
315 pBiDi->reorderingOptions=reorderingOptions;
316 }
317 }
318
319 U_CAPI uint32_t U_EXPORT2
320 ubidi_getReorderingOptions(UBiDi *pBiDi) {
321 if (pBiDi!=NULL) {
322 return pBiDi->reorderingOptions;
323 } else {
324 return 0;
325 }
326 }
327
328 U_CAPI UBiDiDirection U_EXPORT2
329 ubidi_getBaseDirection(const UChar *text,
330 int32_t length){
331
332 int32_t i;
333 UChar32 uchar;
334 UCharDirection dir;
335
336 if( text==NULL || length<-1 ){
337 return UBIDI_NEUTRAL;
338 }
339
340 if(length==-1) {
341 length=u_strlen(text);
342 }
343
344 for( i = 0 ; i < length; ) {
345 /* i is incremented by U16_NEXT */
346 U16_NEXT(text, i, length, uchar);
347 dir = u_charDirection(uchar);
348 if( dir == U_LEFT_TO_RIGHT )
349 return UBIDI_LTR;
350 if( dir == U_RIGHT_TO_LEFT || dir ==U_RIGHT_TO_LEFT_ARABIC )
351 return UBIDI_RTL;
352 }
353 return UBIDI_NEUTRAL;
354 }
355
356 /* perform (P2)..(P3) ------------------------------------------------------- */
357
358 /*
359 * Get the directional properties for the text,
360 * calculate the flags bit-set, and
361 * determine the paragraph level if necessary.
362 */
363 static void
364 getDirProps(UBiDi *pBiDi) {
365 const UChar *text=pBiDi->text;
366 DirProp *dirProps=pBiDi->dirPropsMemory; /* pBiDi->dirProps is const */
367
368 int32_t i=0, i1, length=pBiDi->originalLength;
369 Flags flags=0; /* collect all directionalities in the text */
370 UChar32 uchar;
371 DirProp dirProp=0, paraDirDefault=0;/* initialize to avoid compiler warnings */
372 UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel);
373 /* for inverse BiDi, the default para level is set to RTL if there is a
374 strong R or AL character at either end of the text */
375 UBool isDefaultLevelInverse=isDefaultLevel && (UBool)
376 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
377 pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL);
378 int32_t lastArabicPos=-1;
379 int32_t controlCount=0;
380 UBool removeBiDiControls = (UBool)(pBiDi->reorderingOptions &
381 UBIDI_OPTION_REMOVE_CONTROLS);
382
383 typedef enum {
384 NOT_CONTEXTUAL, /* 0: not contextual paraLevel */
385 LOOKING_FOR_STRONG, /* 1: looking for first strong char */
386 FOUND_STRONG_CHAR /* 2: found first strong char */
387 } State;
388 State state;
389 int32_t paraStart=0; /* index of first char in paragraph */
390 DirProp paraDir; /* == CONTEXT_RTL within paragraphs
391 starting with strong R char */
392 DirProp lastStrongDir=0; /* for default level & inverse BiDi */
393 int32_t lastStrongLTR=0; /* for STREAMING option */
394
395 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
396 pBiDi->length=0;
397 lastStrongLTR=0;
398 }
399 if(isDefaultLevel) {
400 paraDirDefault=pBiDi->paraLevel&1 ? CONTEXT_RTL : 0;
401 paraDir=paraDirDefault;
402 lastStrongDir=paraDirDefault;
403 state=LOOKING_FOR_STRONG;
404 } else {
405 state=NOT_CONTEXTUAL;
406 paraDir=0;
407 }
408 /* count paragraphs and determine the paragraph level (P2..P3) */
409 /*
410 * see comment in ubidi.h:
411 * the DEFAULT_XXX values are designed so that
412 * their bit 0 alone yields the intended default
413 */
414 for( /* i=0 above */ ; i<length; ) {
415 /* i is incremented by U16_NEXT */
416 U16_NEXT(text, i, length, uchar);
417 flags|=DIRPROP_FLAG(dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar));
418 dirProps[i-1]=dirProp|paraDir;
419 if(uchar>0xffff) { /* set the lead surrogate's property to BN */
420 flags|=DIRPROP_FLAG(BN);
421 dirProps[i-2]=(DirProp)(BN|paraDir);
422 }
423 if(state==LOOKING_FOR_STRONG) {
424 if(dirProp==L) {
425 state=FOUND_STRONG_CHAR;
426 if(paraDir) {
427 paraDir=0;
428 for(i1=paraStart; i1<i; i1++) {
429 dirProps[i1]&=~CONTEXT_RTL;
430 }
431 }
432 continue;
433 }
434 if(dirProp==R || dirProp==AL) {
435 state=FOUND_STRONG_CHAR;
436 if(paraDir==0) {
437 paraDir=CONTEXT_RTL;
438 for(i1=paraStart; i1<i; i1++) {
439 dirProps[i1]|=CONTEXT_RTL;
440 }
441 }
442 continue;
443 }
444 }
445 if(dirProp==L) {
446 lastStrongDir=0;
447 lastStrongLTR=i; /* i is index to next character */
448 }
449 else if(dirProp==R) {
450 lastStrongDir=CONTEXT_RTL;
451 }
452 else if(dirProp==AL) {
453 lastStrongDir=CONTEXT_RTL;
454 lastArabicPos=i-1;
455 }
456 else if(dirProp==B) {
457 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
458 pBiDi->length=i; /* i is index to next character */
459 }
460 if(isDefaultLevelInverse && (lastStrongDir==CONTEXT_RTL) &&(paraDir!=lastStrongDir)) {
461 for( ; paraStart<i; paraStart++) {
462 dirProps[paraStart]|=CONTEXT_RTL;
463 }
464 }
465 if(i<length) { /* B not last char in text */
466 if(!((uchar==CR) && (text[i]==LF))) {
467 pBiDi->paraCount++;
468 }
469 if(isDefaultLevel) {
470 state=LOOKING_FOR_STRONG;
471 paraStart=i; /* i is index to next character */
472 paraDir=paraDirDefault;
473 lastStrongDir=paraDirDefault;
474 }
475 }
476 }
477 if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar)) {
478 controlCount++;
479 }
480 }
481 if(isDefaultLevelInverse && (lastStrongDir==CONTEXT_RTL) &&(paraDir!=lastStrongDir)) {
482 for(i1=paraStart; i1<length; i1++) {
483 dirProps[i1]|=CONTEXT_RTL;
484 }
485 }
486 if(isDefaultLevel) {
487 pBiDi->paraLevel=GET_PARALEVEL(pBiDi, 0);
488 }
489 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
490 if((lastStrongLTR>pBiDi->length) &&
491 (GET_PARALEVEL(pBiDi, lastStrongLTR)==0)) {
492 pBiDi->length = lastStrongLTR;
493 }
494 if(pBiDi->length<pBiDi->originalLength) {
495 pBiDi->paraCount--;
496 }
497 }
498 /* The following line does nothing new for contextual paraLevel, but is
499 needed for absolute paraLevel. */
500 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
501
502 if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
503 flags|=DIRPROP_FLAG(L);
504 }
505
506 pBiDi->controlCount = controlCount;
507 pBiDi->flags=flags;
508 pBiDi->lastArabicPos=lastArabicPos;
509 }
510
511 /* perform (X1)..(X9) ------------------------------------------------------- */
512
513 /* determine if the text is mixed-directional or single-directional */
514 static UBiDiDirection
515 directionFromFlags(UBiDi *pBiDi) {
516 Flags flags=pBiDi->flags;
517 /* if the text contains AN and neutrals, then some neutrals may become RTL */
518 if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) {
519 return UBIDI_LTR;
520 } else if(!(flags&MASK_LTR)) {
521 return UBIDI_RTL;
522 } else {
523 return UBIDI_MIXED;
524 }
525 }
526
527 /*
528 * Resolve the explicit levels as specified by explicit embedding codes.
529 * Recalculate the flags to have them reflect the real properties
530 * after taking the explicit embeddings into account.
531 *
532 * The BiDi algorithm is designed to result in the same behavior whether embedding
533 * levels are externally specified (from "styled text", supposedly the preferred
534 * method) or set by explicit embedding codes (LRx, RLx, PDF) in the plain text.
535 * That is why (X9) instructs to remove all explicit codes (and BN).
536 * However, in a real implementation, this removal of these codes and their index
537 * positions in the plain text is undesirable since it would result in
538 * reallocated, reindexed text.
539 * Instead, this implementation leaves the codes in there and just ignores them
540 * in the subsequent processing.
541 * In order to get the same reordering behavior, positions with a BN or an
542 * explicit embedding code just get the same level assigned as the last "real"
543 * character.
544 *
545 * Some implementations, not this one, then overwrite some of these
546 * directionality properties at "real" same-level-run boundaries by
547 * L or R codes so that the resolution of weak types can be performed on the
548 * entire paragraph at once instead of having to parse it once more and
549 * perform that resolution on same-level-runs.
550 * This limits the scope of the implicit rules in effectively
551 * the same way as the run limits.
552 *
553 * Instead, this implementation does not modify these codes.
554 * On one hand, the paragraph has to be scanned for same-level-runs, but
555 * on the other hand, this saves another loop to reset these codes,
556 * or saves making and modifying a copy of dirProps[].
557 *
558 *
559 * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
560 *
561 *
562 * Handling the stack of explicit levels (Xn):
563 *
564 * With the BiDi stack of explicit levels,
565 * as pushed with each LRE, RLE, LRO, and RLO and popped with each PDF,
566 * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL==61.
567 *
568 * In order to have a correct push-pop semantics even in the case of overflows,
569 * there are two overflow counters:
570 * - countOver60 is incremented with each LRx at level 60
571 * - from level 60, one RLx increases the level to 61
572 * - countOver61 is incremented with each LRx and RLx at level 61
573 *
574 * Popping levels with PDF must work in the opposite order so that level 61
575 * is correct at the correct point. Underflows (too many PDFs) must be checked.
576 *
577 * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
578 */
579 static UBiDiDirection
580 resolveExplicitLevels(UBiDi *pBiDi) {
581 const DirProp *dirProps=pBiDi->dirProps;
582 UBiDiLevel *levels=pBiDi->levels;
583 const UChar *text=pBiDi->text;
584
585 int32_t i=0, length=pBiDi->length;
586 Flags flags=pBiDi->flags; /* collect all directionalities in the text */
587 DirProp dirProp;
588 UBiDiLevel level=GET_PARALEVEL(pBiDi, 0);
589
590 UBiDiDirection direction;
591 int32_t paraIndex=0;
592
593 /* determine if the text is mixed-directional or single-directional */
594 direction=directionFromFlags(pBiDi);
595
596 /* we may not need to resolve any explicit levels, but for multiple
597 paragraphs we want to loop on all chars to set the para boundaries */
598 if((direction!=UBIDI_MIXED) && (pBiDi->paraCount==1)) {
599 /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
600 } else if((pBiDi->paraCount==1) &&
601 (!(flags&MASK_EXPLICIT) ||
602 (pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL))) {
603 /* mixed, but all characters are at the same embedding level */
604 /* or we are in "inverse BiDi" */
605 /* and we don't have contextual multiple paragraphs with some B char */
606 /* set all levels to the paragraph level */
607 for(i=0; i<length; ++i) {
608 levels[i]=level;
609 }
610 } else {
611 /* continue to perform (Xn) */
612
613 /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
614 /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */
615 UBiDiLevel embeddingLevel=level, newLevel, stackTop=0;
616
617 UBiDiLevel stack[UBIDI_MAX_EXPLICIT_LEVEL]; /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL */
618 uint32_t countOver60=0, countOver61=0; /* count overflows of explicit levels */
619
620 /* recalculate the flags */
621 flags=0;
622
623 for(i=0; i<length; ++i) {
624 dirProp=NO_CONTEXT_RTL(dirProps[i]);
625 switch(dirProp) {
626 case LRE:
627 case LRO:
628 /* (X3, X5) */
629 newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1)); /* least greater even level */
630 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL) {
631 stack[stackTop]=embeddingLevel;
632 ++stackTop;
633 embeddingLevel=newLevel;
634 if(dirProp==LRO) {
635 embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
636 }
637 /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE
638 since this has already been done for newLevel which is
639 the source for embeddingLevel.
640 */
641 } else if((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)==UBIDI_MAX_EXPLICIT_LEVEL) {
642 ++countOver61;
643 } else /* (embeddingLevel&~UBIDI_LEVEL_OVERRIDE)==UBIDI_MAX_EXPLICIT_LEVEL-1 */ {
644 ++countOver60;
645 }
646 flags|=DIRPROP_FLAG(BN);
647 break;
648 case RLE:
649 case RLO:
650 /* (X2, X4) */
651 newLevel=(UBiDiLevel)(((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)+1)|1); /* least greater odd level */
652 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL) {
653 stack[stackTop]=embeddingLevel;
654 ++stackTop;
655 embeddingLevel=newLevel;
656 if(dirProp==RLO) {
657 embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
658 }
659 /* we don't need to set UBIDI_LEVEL_OVERRIDE off for RLE
660 since this has already been done for newLevel which is
661 the source for embeddingLevel.
662 */
663 } else {
664 ++countOver61;
665 }
666 flags|=DIRPROP_FLAG(BN);
667 break;
668 case PDF:
669 /* (X7) */
670 /* handle all the overflow cases first */
671 if(countOver61>0) {
672 --countOver61;
673 } else if(countOver60>0 && (embeddingLevel&~UBIDI_LEVEL_OVERRIDE)!=UBIDI_MAX_EXPLICIT_LEVEL) {
674 /* handle LRx overflows from level 60 */
675 --countOver60;
676 } else if(stackTop>0) {
677 /* this is the pop operation; it also pops level 61 while countOver60>0 */
678 --stackTop;
679 embeddingLevel=stack[stackTop];
680 /* } else { (underflow) */
681 }
682 flags|=DIRPROP_FLAG(BN);
683 break;
684 case B:
685 stackTop=0;
686 countOver60=countOver61=0;
687 level=GET_PARALEVEL(pBiDi, i);
688 if((i+1)<length) {
689 embeddingLevel=GET_PARALEVEL(pBiDi, i+1);
690 if(!((text[i]==CR) && (text[i+1]==LF))) {
691 pBiDi->paras[paraIndex++]=i+1;
692 }
693 }
694 flags|=DIRPROP_FLAG(B);
695 break;
696 case BN:
697 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
698 /* they will get their levels set correctly in adjustWSLevels() */
699 flags|=DIRPROP_FLAG(BN);
700 break;
701 default:
702 /* all other types get the "real" level */
703 if(level!=embeddingLevel) {
704 level=embeddingLevel;
705 if(level&UBIDI_LEVEL_OVERRIDE) {
706 flags|=DIRPROP_FLAG_O(level)|DIRPROP_FLAG_MULTI_RUNS;
707 } else {
708 flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG_MULTI_RUNS;
709 }
710 }
711 if(!(level&UBIDI_LEVEL_OVERRIDE)) {
712 flags|=DIRPROP_FLAG(dirProp);
713 }
714 break;
715 }
716
717 /*
718 * We need to set reasonable levels even on BN codes and
719 * explicit codes because we will later look at same-level runs (X10).
720 */
721 levels[i]=level;
722 }
723 if(flags&MASK_EMBEDDING) {
724 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
725 }
726 if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
727 flags|=DIRPROP_FLAG(L);
728 }
729
730 /* subsequently, ignore the explicit codes and BN (X9) */
731
732 /* again, determine if the text is mixed-directional or single-directional */
733 pBiDi->flags=flags;
734 direction=directionFromFlags(pBiDi);
735 }
736
737 return direction;
738 }
739
740 /*
741 * Use a pre-specified embedding levels array:
742 *
743 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
744 * ignore all explicit codes (X9),
745 * and check all the preset levels.
746 *
747 * Recalculate the flags to have them reflect the real properties
748 * after taking the explicit embeddings into account.
749 */
750 static UBiDiDirection
751 checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
752 const DirProp *dirProps=pBiDi->dirProps;
753 DirProp dirProp;
754 UBiDiLevel *levels=pBiDi->levels;
755 const UChar *text=pBiDi->text;
756
757 int32_t i, length=pBiDi->length;
758 Flags flags=0; /* collect all directionalities in the text */
759 UBiDiLevel level;
760 uint32_t paraIndex=0;
761
762 for(i=0; i<length; ++i) {
763 level=levels[i];
764 dirProp=NO_CONTEXT_RTL(dirProps[i]);
765 if(level&UBIDI_LEVEL_OVERRIDE) {
766 /* keep the override flag in levels[i] but adjust the flags */
767 level&=~UBIDI_LEVEL_OVERRIDE; /* make the range check below simpler */
768 flags|=DIRPROP_FLAG_O(level);
769 } else {
770 /* set the flags */
771 flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp);
772 }
773 if((level<GET_PARALEVEL(pBiDi, i) &&
774 !((0==level)&&(dirProp==B))) ||
775 (UBIDI_MAX_EXPLICIT_LEVEL<level)) {
776 /* level out of bounds */
777 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
778 return UBIDI_LTR;
779 }
780 if((dirProp==B) && ((i+1)<length)) {
781 if(!((text[i]==CR) && (text[i+1]==LF))) {
782 pBiDi->paras[paraIndex++]=i+1;
783 }
784 }
785 }
786 if(flags&MASK_EMBEDDING) {
787 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
788 }
789
790 /* determine if the text is mixed-directional or single-directional */
791 pBiDi->flags=flags;
792 return directionFromFlags(pBiDi);
793 }
794
795 /******************************************************************
796 The Properties state machine table
797 *******************************************************************
798
799 All table cells are 8 bits:
800 bits 0..4: next state
801 bits 5..7: action to perform (if > 0)
802
803 Cells may be of format "n" where n represents the next state
804 (except for the rightmost column).
805 Cells may also be of format "s(x,y)" where x represents an action
806 to perform and y represents the next state.
807
808 *******************************************************************
809 Definitions and type for properties state table
810 *******************************************************************
811 */
812 #define IMPTABPROPS_COLUMNS 14
813 #define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1)
814 #define GET_STATEPROPS(cell) ((cell)&0x1f)
815 #define GET_ACTIONPROPS(cell) ((cell)>>5)
816 #define s(action, newState) ((uint8_t)(newState+(action<<5)))
817
818 static const uint8_t groupProp[] = /* dirProp regrouped */
819 {
820 /* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN */
821 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10
822 };
823 enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */
824
825 /******************************************************************
826
827 PROPERTIES STATE TABLE
828
829 In table impTabProps,
830 - the ON column regroups ON and WS
831 - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF
832 - the Res column is the reduced property assigned to a run
833
834 Action 1: process current run1, init new run1
835 2: init new run2
836 3: process run1, process run2, init new run1
837 4: process run1, set run1=run2, init new run2
838
839 Notes:
840 1) This table is used in resolveImplicitLevels().
841 2) This table triggers actions when there is a change in the Bidi
842 property of incoming characters (action 1).
843 3) Most such property sequences are processed immediately (in
844 fact, passed to processPropertySeq().
845 4) However, numbers are assembled as one sequence. This means
846 that undefined situations (like CS following digits, until
847 it is known if the next char will be a digit) are held until
848 following chars define them.
849 Example: digits followed by CS, then comes another CS or ON;
850 the digits will be processed, then the CS assigned
851 as the start of an ON sequence (action 3).
852 5) There are cases where more than one sequence must be
853 processed, for instance digits followed by CS followed by L:
854 the digits must be processed as one sequence, and the CS
855 must be processed as an ON sequence, all this before starting
856 assembling chars for the opening L sequence.
857
858
859 */
860 static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] =
861 {
862 /* L , R , EN , AN , ON , S , B , ES , ET , CS , BN , NSM , AL , Res */
863 /* 0 Init */ { 1 , 2 , 4 , 5 , 7 , 15 , 17 , 7 , 9 , 7 , 0 , 7 , 3 , DirProp_ON },
864 /* 1 L */ { 1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 1 , 1 , s(1,3), DirProp_L },
865 /* 2 R */ { s(1,1), 2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 2 , 2 , s(1,3), DirProp_R },
866 /* 3 AL */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8), 3 , 3 , 3 , DirProp_R },
867 /* 4 EN */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10), 11 ,s(2,10), 4 , 4 , s(1,3), DirProp_EN },
868 /* 5 AN */ { s(1,1), s(1,2), s(1,4), 5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12), 5 , 5 , s(1,3), DirProp_AN },
869 /* 6 AL:EN/AN */ { s(1,1), s(1,2), 6 , 6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13), 6 , 6 , s(1,3), DirProp_AN },
870 /* 7 ON */ { s(1,1), s(1,2), s(1,4), s(1,5), 7 ,s(1,15),s(1,17), 7 ,s(2,14), 7 , 7 , 7 , s(1,3), DirProp_ON },
871 /* 8 AL:ON */ { s(1,1), s(1,2), s(1,6), s(1,6), 8 ,s(1,16),s(1,17), 8 , 8 , 8 , 8 , 8 , s(1,3), DirProp_ON },
872 /* 9 ET */ { s(1,1), s(1,2), 4 , s(1,5), 7 ,s(1,15),s(1,17), 7 , 9 , 7 , 9 , 9 , s(1,3), DirProp_ON },
873 /*10 EN+ES/CS */ { s(3,1), s(3,2), 4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 10 , s(4,7), s(3,3), DirProp_EN },
874 /*11 EN+ET */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 11 , s(1,7), 11 , 11 , s(1,3), DirProp_EN },
875 /*12 AN+CS */ { s(3,1), s(3,2), s(3,4), 5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 12 , s(4,7), s(3,3), DirProp_AN },
876 /*13 AL:EN/AN+CS */ { s(3,1), s(3,2), 6 , 6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8), 13 , s(4,8), s(3,3), DirProp_AN },
877 /*14 ON+ET */ { s(1,1), s(1,2), s(4,4), s(1,5), 7 ,s(1,15),s(1,17), 7 , 14 , 7 , 14 , 14 , s(1,3), DirProp_ON },
878 /*15 S */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7), 15 ,s(1,17), s(1,7), s(1,9), s(1,7), 15 , s(1,7), s(1,3), DirProp_S },
879 /*16 AL:S */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8), 16 ,s(1,17), s(1,8), s(1,8), s(1,8), 16 , s(1,8), s(1,3), DirProp_S },
880 /*17 B */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15), 17 , s(1,7), s(1,9), s(1,7), 17 , s(1,7), s(1,3), DirProp_B }
881 };
882
883 /* we must undef macro s because the levels table have a different
884 * structure (4 bits for action and 4 bits for next state.
885 */
886 #undef s
887
888 /******************************************************************
889 The levels state machine tables
890 *******************************************************************
891
892 All table cells are 8 bits:
893 bits 0..3: next state
894 bits 4..7: action to perform (if > 0)
895
896 Cells may be of format "n" where n represents the next state
897 (except for the rightmost column).
898 Cells may also be of format "s(x,y)" where x represents an action
899 to perform and y represents the next state.
900
901 This format limits each table to 16 states each and to 15 actions.
902
903 *******************************************************************
904 Definitions and type for levels state tables
905 *******************************************************************
906 */
907 #define IMPTABLEVELS_COLUMNS (DirProp_B + 2)
908 #define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1)
909 #define GET_STATE(cell) ((cell)&0x0f)
910 #define GET_ACTION(cell) ((cell)>>4)
911 #define s(action, newState) ((uint8_t)(newState+(action<<4)))
912
913 typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS];
914 typedef uint8_t ImpAct[];
915
916 /* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct,
917 * instead of having a pair of ImpTab and a pair of ImpAct.
918 */
919 typedef struct ImpTabPair {
920 const void * pImpTab[2];
921 const void * pImpAct[2];
922 } ImpTabPair;
923
924 /******************************************************************
925
926 LEVELS STATE TABLES
927
928 In all levels state tables,
929 - state 0 is the initial state
930 - the Res column is the increment to add to the text level
931 for this property sequence.
932
933 The impAct arrays for each table of a pair map the local action
934 numbers of the table to the total list of actions. For instance,
935 action 2 in a given table corresponds to the action number which
936 appears in entry [2] of the impAct array for that table.
937 The first entry of all impAct arrays must be 0.
938
939 Action 1: init conditional sequence
940 2: prepend conditional sequence to current sequence
941 3: set ON sequence to new level - 1
942 4: init EN/AN/ON sequence
943 5: fix EN/AN/ON sequence followed by R
944 6: set previous level sequence to level 2
945
946 Notes:
947 1) These tables are used in processPropertySeq(). The input
948 is property sequences as determined by resolveImplicitLevels.
949 2) Most such property sequences are processed immediately
950 (levels are assigned).
951 3) However, some sequences cannot be assigned a final level till
952 one or more following sequences are received. For instance,
953 ON following an R sequence within an even-level paragraph.
954 If the following sequence is R, the ON sequence will be
955 assigned basic run level+1, and so will the R sequence.
956 4) S is generally handled like ON, since its level will be fixed
957 to paragraph level in adjustWSLevels().
958
959 */
960
961 static const ImpTab impTabL_DEFAULT = /* Even paragraph level */
962 /* In this table, conditional sequences receive the higher possible level
963 until proven otherwise.
964 */
965 {
966 /* L , R , EN , AN , ON , S , B , Res */
967 /* 0 : init */ { 0 , 1 , 0 , 2 , 0 , 0 , 0 , 0 },
968 /* 1 : R */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 1 },
969 /* 2 : AN */ { 0 , 1 , 0 , 2 , s(1,5), s(1,5), 0 , 2 },
970 /* 3 : R+EN/AN */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 2 },
971 /* 4 : R+ON */ { s(2,0), 1 , 3 , 3 , 4 , 4 , s(2,0), 1 },
972 /* 5 : AN+ON */ { s(2,0), 1 , s(2,0), 2 , 5 , 5 , s(2,0), 1 }
973 };
974 static const ImpTab impTabR_DEFAULT = /* Odd paragraph level */
975 /* In this table, conditional sequences receive the lower possible level
976 until proven otherwise.
977 */
978 {
979 /* L , R , EN , AN , ON , S , B , Res */
980 /* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 },
981 /* 1 : L */ { 1 , 0 , 1 , 3 , s(1,4), s(1,4), 0 , 1 },
982 /* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 },
983 /* 3 : L+AN */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 1 },
984 /* 4 : L+ON */ { s(2,1), 0 , s(2,1), 3 , 4 , 4 , 0 , 0 },
985 /* 5 : L+AN+ON */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 0 }
986 };
987 static const ImpAct impAct0 = {0,1,2,3,4,5,6};
988 static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT,
989 &impTabR_DEFAULT},
990 {&impAct0, &impAct0}};
991
992 static const ImpTab impTabL_NUMBERS_SPECIAL = /* Even paragraph level */
993 /* In this table, conditional sequences receive the higher possible level
994 until proven otherwise.
995 */
996 {
997 /* L , R , EN , AN , ON , S , B , Res */
998 /* 0 : init */ { 0 , 2 , 1 , 1 , 0 , 0 , 0 , 0 },
999 /* 1 : L+EN/AN */ { 0 , 2 , 1 , 1 , 0 , 0 , 0 , 2 },
1000 /* 2 : R */ { 0 , 2 , 4 , 4 , s(1,3), 0 , 0 , 1 },
1001 /* 3 : R+ON */ { s(2,0), 2 , 4 , 4 , 3 , 3 , s(2,0), 1 },
1002 /* 4 : R+EN/AN */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 2 }
1003 };
1004 static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL,
1005 &impTabR_DEFAULT},
1006 {&impAct0, &impAct0}};
1007
1008 static const ImpTab impTabL_GROUP_NUMBERS_WITH_R =
1009 /* In this table, EN/AN+ON sequences receive levels as if associated with R
1010 until proven that there is L or sor/eor on both sides. AN is handled like EN.
1011 */
1012 {
1013 /* L , R , EN , AN , ON , S , B , Res */
1014 /* 0 init */ { 0 , 3 , s(1,1), s(1,1), 0 , 0 , 0 , 0 },
1015 /* 1 EN/AN */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 2 },
1016 /* 2 EN/AN+ON */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 1 },
1017 /* 3 R */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 1 },
1018 /* 4 R+ON */ { s(2,0), 3 , 5 , 5 , 4 , s(2,0), s(2,0), 1 },
1019 /* 5 R+EN/AN */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 2 }
1020 };
1021 static const ImpTab impTabR_GROUP_NUMBERS_WITH_R =
1022 /* In this table, EN/AN+ON sequences receive levels as if associated with R
1023 until proven that there is L on both sides. AN is handled like EN.
1024 */
1025 {
1026 /* L , R , EN , AN , ON , S , B , Res */
1027 /* 0 init */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1028 /* 1 EN/AN */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 1 },
1029 /* 2 L */ { 2 , 0 , s(1,4), s(1,4), s(1,3), 0 , 0 , 1 },
1030 /* 3 L+ON */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 0 },
1031 /* 4 L+EN/AN */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 1 }
1032 };
1033 static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = {
1034 {&impTabL_GROUP_NUMBERS_WITH_R,
1035 &impTabR_GROUP_NUMBERS_WITH_R},
1036 {&impAct0, &impAct0}};
1037
1038
1039 static const ImpTab impTabL_INVERSE_NUMBERS_AS_L =
1040 /* This table is identical to the Default LTR table except that EN and AN are
1041 handled like L.
1042 */
1043 {
1044 /* L , R , EN , AN , ON , S , B , Res */
1045 /* 0 : init */ { 0 , 1 , 0 , 0 , 0 , 0 , 0 , 0 },
1046 /* 1 : R */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 1 },
1047 /* 2 : AN */ { 0 , 1 , 0 , 0 , s(1,5), s(1,5), 0 , 2 },
1048 /* 3 : R+EN/AN */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 2 },
1049 /* 4 : R+ON */ { s(2,0), 1 , s(2,0), s(2,0), 4 , 4 , s(2,0), 1 },
1050 /* 5 : AN+ON */ { s(2,0), 1 , s(2,0), s(2,0), 5 , 5 , s(2,0), 1 }
1051 };
1052 static const ImpTab impTabR_INVERSE_NUMBERS_AS_L =
1053 /* This table is identical to the Default RTL table except that EN and AN are
1054 handled like L.
1055 */
1056 {
1057 /* L , R , EN , AN , ON , S , B , Res */
1058 /* 0 : init */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1059 /* 1 : L */ { 1 , 0 , 1 , 1 , s(1,4), s(1,4), 0 , 1 },
1060 /* 2 : EN/AN */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 1 },
1061 /* 3 : L+AN */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 1 },
1062 /* 4 : L+ON */ { s(2,1), 0 , s(2,1), s(2,1), 4 , 4 , 0 , 0 },
1063 /* 5 : L+AN+ON */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 0 }
1064 };
1065 static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = {
1066 {&impTabL_INVERSE_NUMBERS_AS_L,
1067 &impTabR_INVERSE_NUMBERS_AS_L},
1068 {&impAct0, &impAct0}};
1069
1070 static const ImpTab impTabR_INVERSE_LIKE_DIRECT = /* Odd paragraph level */
1071 /* In this table, conditional sequences receive the lower possible level
1072 until proven otherwise.
1073 */
1074 {
1075 /* L , R , EN , AN , ON , S , B , Res */
1076 /* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 },
1077 /* 1 : L */ { 1 , 0 , 1 , 2 , s(1,3), s(1,3), 0 , 1 },
1078 /* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 },
1079 /* 3 : L+ON */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 0 },
1080 /* 4 : L+ON+AN */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 3 },
1081 /* 5 : L+AN+ON */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 2 },
1082 /* 6 : L+ON+EN */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 1 }
1083 };
1084 static const ImpAct impAct1 = {0,1,11,12};
1085 /* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc"
1086 */
1087 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = {
1088 {&impTabL_DEFAULT,
1089 &impTabR_INVERSE_LIKE_DIRECT},
1090 {&impAct0, &impAct1}};
1091
1092 static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS =
1093 /* The case handled in this table is (visually): R EN L
1094 */
1095 {
1096 /* L , R , EN , AN , ON , S , B , Res */
1097 /* 0 : init */ { 0 , s(6,3), 0 , 1 , 0 , 0 , 0 , 0 },
1098 /* 1 : L+AN */ { 0 , s(6,3), 0 , 1 , s(1,2), s(3,0), 0 , 4 },
1099 /* 2 : L+AN+ON */ { s(2,0), s(6,3), s(2,0), 1 , 2 , s(3,0), s(2,0), 3 },
1100 /* 3 : R */ { 0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0), 0 , 3 },
1101 /* 4 : R+ON */ { s(3,0), s(4,3), s(5,5), s(5,6), 4 , s(3,0), s(3,0), 3 },
1102 /* 5 : R+EN */ { s(3,0), s(4,3), 5 , s(5,6), s(1,4), s(3,0), s(3,0), 4 },
1103 /* 6 : R+AN */ { s(3,0), s(4,3), s(5,5), 6 , s(1,4), s(3,0), s(3,0), 4 }
1104 };
1105 static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS =
1106 /* The cases handled in this table are (visually): R EN L
1107 R L AN L
1108 */
1109 {
1110 /* L , R , EN , AN , ON , S , B , Res */
1111 /* 0 : init */ { s(1,3), 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1112 /* 1 : R+EN/AN */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 1 },
1113 /* 2 : R+EN/AN+ON */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 0 },
1114 /* 3 : L */ { 3 , 0 , 3 , s(3,6), s(1,4), s(4,0), 0 , 1 },
1115 /* 4 : L+ON */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 0 },
1116 /* 5 : L+ON+EN */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 1 },
1117 /* 6 : L+AN */ { s(5,3), s(4,0), 6 , 6 , 4 , s(4,0), s(4,0), 3 }
1118 };
1119 static const ImpAct impAct2 = {0,1,7,8,9,10};
1120 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = {
1121 {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
1122 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
1123 {&impAct0, &impAct2}};
1124
1125 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = {
1126 {&impTabL_NUMBERS_SPECIAL,
1127 &impTabR_INVERSE_LIKE_DIRECT},
1128 {&impAct0, &impAct1}};
1129
1130 static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS =
1131 /* The case handled in this table is (visually): R EN L
1132 */
1133 {
1134 /* L , R , EN , AN , ON , S , B , Res */
1135 /* 0 : init */ { 0 , s(6,2), 1 , 1 , 0 , 0 , 0 , 0 },
1136 /* 1 : L+EN/AN */ { 0 , s(6,2), 1 , 1 , 0 , s(3,0), 0 , 4 },
1137 /* 2 : R */ { 0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0), 0 , 3 },
1138 /* 3 : R+ON */ { s(3,0), s(4,2), s(5,4), s(5,4), 3 , s(3,0), s(3,0), 3 },
1139 /* 4 : R+EN/AN */ { s(3,0), s(4,2), 4 , 4 , s(1,3), s(3,0), s(3,0), 4 }
1140 };
1141 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = {
1142 {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
1143 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
1144 {&impAct0, &impAct2}};
1145
1146 #undef s
1147
1148 typedef struct {
1149 const ImpTab * pImpTab; /* level table pointer */
1150 const ImpAct * pImpAct; /* action map array */
1151 int32_t startON; /* start of ON sequence */
1152 int32_t startL2EN; /* start of level 2 sequence */
1153 int32_t lastStrongRTL; /* index of last found R or AL */
1154 int32_t state; /* current state */
1155 UBiDiLevel runLevel; /* run level before implicit solving */
1156 } LevState;
1157
1158 /*------------------------------------------------------------------------*/
1159
1160 static void
1161 addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag)
1162 /* param pos: position where to insert
1163 param flag: one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER
1164 */
1165 {
1166 #define FIRSTALLOC 10
1167 Point point;
1168 InsertPoints * pInsertPoints=&(pBiDi->insertPoints);
1169
1170 if (pInsertPoints->capacity == 0)
1171 {
1172 pInsertPoints->points=uprv_malloc(sizeof(Point)*FIRSTALLOC);
1173 if (pInsertPoints->points == NULL)
1174 {
1175 pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
1176 return;
1177 }
1178 pInsertPoints->capacity=FIRSTALLOC;
1179 }
1180 if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */
1181 {
1182 void * savePoints=pInsertPoints->points;
1183 pInsertPoints->points=uprv_realloc(pInsertPoints->points,
1184 pInsertPoints->capacity*2*sizeof(Point));
1185 if (pInsertPoints->points == NULL)
1186 {
1187 pInsertPoints->points=savePoints;
1188 pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
1189 return;
1190 }
1191 else pInsertPoints->capacity*=2;
1192 }
1193 point.pos=pos;
1194 point.flag=flag;
1195 pInsertPoints->points[pInsertPoints->size]=point;
1196 pInsertPoints->size++;
1197 #undef FIRSTALLOC
1198 }
1199
1200 /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
1201
1202 /*
1203 * This implementation of the (Wn) rules applies all rules in one pass.
1204 * In order to do so, it needs a look-ahead of typically 1 character
1205 * (except for W5: sequences of ET) and keeps track of changes
1206 * in a rule Wp that affect a later Wq (p<q).
1207 *
1208 * The (Nn) and (In) rules are also performed in that same single loop,
1209 * but effectively one iteration behind for white space.
1210 *
1211 * Since all implicit rules are performed in one step, it is not necessary
1212 * to actually store the intermediate directional properties in dirProps[].
1213 */
1214
1215 static void
1216 processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop,
1217 int32_t start, int32_t limit) {
1218 uint8_t cell, oldStateSeq, actionSeq;
1219 const ImpTab * pImpTab=pLevState->pImpTab;
1220 const ImpAct * pImpAct=pLevState->pImpAct;
1221 UBiDiLevel * levels=pBiDi->levels;
1222 UBiDiLevel level, addLevel;
1223 InsertPoints * pInsertPoints;
1224 int32_t start0, k;
1225
1226 start0=start; /* save original start position */
1227 oldStateSeq=(uint8_t)pLevState->state;
1228 cell=(*pImpTab)[oldStateSeq][_prop];
1229 pLevState->state=GET_STATE(cell); /* isolate the new state */
1230 actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */
1231 addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES];
1232
1233 if(actionSeq) {
1234 switch(actionSeq) {
1235 case 1: /* init ON seq */
1236 pLevState->startON=start0;
1237 break;
1238
1239 case 2: /* prepend ON seq to current seq */
1240 start=pLevState->startON;
1241 break;
1242
1243 case 3: /* L or S after possible relevant EN/AN */
1244 /* check if we had EN after R/AL */
1245 if (pLevState->startL2EN >= 0) {
1246 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
1247 }
1248 pLevState->startL2EN=-1; /* not within previous if since could also be -2 */
1249 /* check if we had any relevant EN/AN after R/AL */
1250 pInsertPoints=&(pBiDi->insertPoints);
1251 if ((pInsertPoints->capacity == 0) ||
1252 (pInsertPoints->size <= pInsertPoints->confirmed))
1253 {
1254 /* nothing, just clean up */
1255 pLevState->lastStrongRTL=-1;
1256 /* check if we have a pending conditional segment */
1257 level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES];
1258 if ((level & 1) && (pLevState->startON > 0)) { /* after ON */
1259 start=pLevState->startON; /* reset to basic run level */
1260 }
1261 if (_prop == DirProp_S) /* add LRM before S */
1262 {
1263 addPoint(pBiDi, start0, LRM_BEFORE);
1264 pInsertPoints->confirmed=pInsertPoints->size;
1265 }
1266 break;
1267 }
1268 /* reset previous RTL cont to level for LTR text */
1269 for (k=pLevState->lastStrongRTL+1; k<start0; k++)
1270 {
1271 /* reset odd level, leave runLevel+2 as is */
1272 levels[k]=(levels[k] - 2) & ~1;
1273 }
1274 /* mark insert points as confirmed */
1275 pInsertPoints->confirmed=pInsertPoints->size;
1276 pLevState->lastStrongRTL=-1;
1277 if (_prop == DirProp_S) /* add LRM before S */
1278 {
1279 addPoint(pBiDi, start0, LRM_BEFORE);
1280 pInsertPoints->confirmed=pInsertPoints->size;
1281 }
1282 break;
1283
1284 case 4: /* R/AL after possible relevant EN/AN */
1285 /* just clean up */
1286 pInsertPoints=&(pBiDi->insertPoints);
1287 if (pInsertPoints->capacity > 0)
1288 /* remove all non confirmed insert points */
1289 pInsertPoints->size=pInsertPoints->confirmed;
1290 pLevState->startON=-1;
1291 pLevState->startL2EN=-1;
1292 pLevState->lastStrongRTL=limit - 1;
1293 break;
1294
1295 case 5: /* EN/AN after R/AL + possible cont */
1296 /* check for real AN */
1297 if ((_prop == DirProp_AN) && (NO_CONTEXT_RTL(pBiDi->dirProps[start0]) == AN) &&
1298 (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))
1299 {
1300 /* real AN */
1301 if (pLevState->startL2EN == -1) /* if no relevant EN already found */
1302 {
1303 /* just note the righmost digit as a strong RTL */
1304 pLevState->lastStrongRTL=limit - 1;
1305 break;
1306 }
1307 if (pLevState->startL2EN >= 0) /* after EN, no AN */
1308 {
1309 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
1310 pLevState->startL2EN=-2;
1311 }
1312 /* note AN */
1313 addPoint(pBiDi, start0, LRM_BEFORE);
1314 break;
1315 }
1316 /* if first EN/AN after R/AL */
1317 if (pLevState->startL2EN == -1) {
1318 pLevState->startL2EN=start0;
1319 }
1320 break;
1321
1322 case 6: /* note location of latest R/AL */
1323 pLevState->lastStrongRTL=limit - 1;
1324 pLevState->startON=-1;
1325 break;
1326
1327 case 7: /* L after R+ON/EN/AN */
1328 /* include possible adjacent number on the left */
1329 for (k=start0-1; k>=0 && !(levels[k]&1); k--);
1330 if(k>=0) {
1331 addPoint(pBiDi, k, RLM_BEFORE); /* add RLM before */
1332 pInsertPoints=&(pBiDi->insertPoints);
1333 pInsertPoints->confirmed=pInsertPoints->size; /* confirm it */
1334 }
1335 pLevState->startON=start0;
1336 break;
1337
1338 case 8: /* AN after L */
1339 /* AN numbers between L text on both sides may be trouble. */
1340 /* tentatively bracket with LRMs; will be confirmed if followed by L */
1341 addPoint(pBiDi, start0, LRM_BEFORE); /* add LRM before */
1342 addPoint(pBiDi, start0, LRM_AFTER); /* add LRM after */
1343 break;
1344
1345 case 9: /* R after L+ON/EN/AN */
1346 /* false alert, infirm LRMs around previous AN */
1347 pInsertPoints=&(pBiDi->insertPoints);
1348 pInsertPoints->size=pInsertPoints->confirmed;
1349 if (_prop == DirProp_S) /* add RLM before S */
1350 {
1351 addPoint(pBiDi, start0, RLM_BEFORE);
1352 pInsertPoints->confirmed=pInsertPoints->size;
1353 }
1354 break;
1355
1356 case 10: /* L after L+ON/AN */
1357 level=pLevState->runLevel + addLevel;
1358 for(k=pLevState->startON; k<start0; k++) {
1359 if (levels[k]<level)
1360 levels[k]=level;
1361 }
1362 pInsertPoints=&(pBiDi->insertPoints);
1363 pInsertPoints->confirmed=pInsertPoints->size; /* confirm inserts */
1364 pLevState->startON=start0;
1365 break;
1366
1367 case 11: /* L after L+ON+EN/AN/ON */
1368 level=pLevState->runLevel;
1369 for(k=start0-1; k>=pLevState->startON; k--) {
1370 if(levels[k]==level+3) {
1371 while(levels[k]==level+3) {
1372 levels[k--]-=2;
1373 }
1374 while(levels[k]==level) {
1375 k--;
1376 }
1377 }
1378 if(levels[k]==level+2) {
1379 levels[k]=level;
1380 continue;
1381 }
1382 levels[k]=level+1;
1383 }
1384 break;
1385
1386 case 12: /* R after L+ON+EN/AN/ON */
1387 level=pLevState->runLevel+1;
1388 for(k=start0-1; k>=pLevState->startON; k--) {
1389 if(levels[k]>level) {
1390 levels[k]-=2;
1391 }
1392 }
1393 break;
1394
1395 default: /* we should never get here */
1396 U_ASSERT(FALSE);
1397 break;
1398 }
1399 }
1400 if((addLevel) || (start < start0)) {
1401 level=pLevState->runLevel + addLevel;
1402 for(k=start; k<limit; k++) {
1403 levels[k]=level;
1404 }
1405 }
1406 }
1407
1408 static void
1409 resolveImplicitLevels(UBiDi *pBiDi,
1410 int32_t start, int32_t limit,
1411 DirProp sor, DirProp eor) {
1412 const DirProp *dirProps=pBiDi->dirProps;
1413
1414 LevState levState;
1415 int32_t i, start1, start2;
1416 uint8_t oldStateImp, stateImp, actionImp;
1417 uint8_t gprop, resProp, cell;
1418 UBool inverseRTL;
1419 DirProp nextStrongProp=R;
1420 int32_t nextStrongPos=-1;
1421
1422 levState.startON = -1; /* silence gcc flow analysis */
1423
1424 /* check for RTL inverse BiDi mode */
1425 /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to
1426 * loop on the text characters from end to start.
1427 * This would need a different properties state table (at least different
1428 * actions) and different levels state tables (maybe very similar to the
1429 * LTR corresponding ones.
1430 */
1431 inverseRTL=(UBool)
1432 ((start<pBiDi->lastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) &&
1433 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
1434 pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL));
1435 /* initialize for levels state table */
1436 levState.startL2EN=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
1437 levState.lastStrongRTL=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
1438 levState.state=0;
1439 levState.runLevel=pBiDi->levels[start];
1440 levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1];
1441 levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1];
1442 processPropertySeq(pBiDi, &levState, sor, start, start);
1443 /* initialize for property state table */
1444 if(NO_CONTEXT_RTL(dirProps[start])==NSM) {
1445 stateImp = 1 + sor;
1446 } else {
1447 stateImp=0;
1448 }
1449 start1=start;
1450 start2=start;
1451
1452 for(i=start; i<=limit; i++) {
1453 if(i>=limit) {
1454 gprop=eor;
1455 } else {
1456 DirProp prop, prop1;
1457 prop=NO_CONTEXT_RTL(dirProps[i]);
1458 if(inverseRTL) {
1459 if(prop==AL) {
1460 /* AL before EN does not make it AN */
1461 prop=R;
1462 } else if(prop==EN) {
1463 if(nextStrongPos<=i) {
1464 /* look for next strong char (L/R/AL) */
1465 int32_t j;
1466 nextStrongProp=R; /* set default */
1467 nextStrongPos=limit;
1468 for(j=i+1; j<limit; j++) {
1469 prop1=NO_CONTEXT_RTL(dirProps[j]);
1470 if(prop1==L || prop1==R || prop1==AL) {
1471 nextStrongProp=prop1;
1472 nextStrongPos=j;
1473 break;
1474 }
1475 }
1476 }
1477 if(nextStrongProp==AL) {
1478 prop=AN;
1479 }
1480 }
1481 }
1482 gprop=groupProp[prop];
1483 }
1484 oldStateImp=stateImp;
1485 cell=impTabProps[oldStateImp][gprop];
1486 stateImp=GET_STATEPROPS(cell); /* isolate the new state */
1487 actionImp=GET_ACTIONPROPS(cell); /* isolate the action */
1488 if((i==limit) && (actionImp==0)) {
1489 /* there is an unprocessed sequence if its property == eor */
1490 actionImp=1; /* process the last sequence */
1491 }
1492 if(actionImp) {
1493 resProp=impTabProps[oldStateImp][IMPTABPROPS_RES];
1494 switch(actionImp) {
1495 case 1: /* process current seq1, init new seq1 */
1496 processPropertySeq(pBiDi, &levState, resProp, start1, i);
1497 start1=i;
1498 break;
1499 case 2: /* init new seq2 */
1500 start2=i;
1501 break;
1502 case 3: /* process seq1, process seq2, init new seq1 */
1503 processPropertySeq(pBiDi, &levState, resProp, start1, start2);
1504 processPropertySeq(pBiDi, &levState, DirProp_ON, start2, i);
1505 start1=i;
1506 break;
1507 case 4: /* process seq1, set seq1=seq2, init new seq2 */
1508 processPropertySeq(pBiDi, &levState, resProp, start1, start2);
1509 start1=start2;
1510 start2=i;
1511 break;
1512 default: /* we should never get here */
1513 U_ASSERT(FALSE);
1514 break;
1515 }
1516 }
1517 }
1518 /* flush possible pending sequence, e.g. ON */
1519 processPropertySeq(pBiDi, &levState, eor, limit, limit);
1520 }
1521
1522 /* perform (L1) and (X9) ---------------------------------------------------- */
1523
1524 /*
1525 * Reset the embedding levels for some non-graphic characters (L1).
1526 * This function also sets appropriate levels for BN, and
1527 * explicit embedding types that are supposed to have been removed
1528 * from the paragraph in (X9).
1529 */
1530 static void
1531 adjustWSLevels(UBiDi *pBiDi) {
1532 const DirProp *dirProps=pBiDi->dirProps;
1533 UBiDiLevel *levels=pBiDi->levels;
1534 int32_t i;
1535
1536 if(pBiDi->flags&MASK_WS) {
1537 UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR;
1538 Flags flag;
1539
1540 i=pBiDi->trailingWSStart;
1541 while(i>0) {
1542 /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
1543 while(i>0 && (flag=DIRPROP_FLAG_NC(dirProps[--i]))&MASK_WS) {
1544 if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
1545 levels[i]=0;
1546 } else {
1547 levels[i]=GET_PARALEVEL(pBiDi, i);
1548 }
1549 }
1550
1551 /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
1552 /* here, i+1 is guaranteed to be <length */
1553 while(i>0) {
1554 flag=DIRPROP_FLAG_NC(dirProps[--i]);
1555 if(flag&MASK_BN_EXPLICIT) {
1556 levels[i]=levels[i+1];
1557 } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
1558 levels[i]=0;
1559 break;
1560 } else if(flag&MASK_B_S) {
1561 levels[i]=GET_PARALEVEL(pBiDi, i);
1562 break;
1563 }
1564 }
1565 }
1566 }
1567 }
1568
1569 #define BIDI_MIN(x, y) ((x)<(y) ? (x) : (y))
1570 #define BIDI_ABS(x) ((x)>=0 ? (x) : (-(x)))
1571 static void
1572 setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length,
1573 UBiDiLevel paraLevel, UErrorCode *pErrorCode) {
1574 void *runsOnlyMemory;
1575 int32_t *visualMap;
1576 UChar *visualText;
1577 int32_t saveLength, saveTrailingWSStart;
1578 const UBiDiLevel *levels;
1579 UBiDiLevel *saveLevels;
1580 UBiDiDirection saveDirection;
1581 UBool saveMayAllocateText;
1582 Run *runs;
1583 int32_t visualLength, i, j, visualStart, logicalStart,
1584 runCount, runLength, addedRuns, insertRemove,
1585 start, limit, step, indexOddBit, logicalPos,
1586 index0, index1;
1587 uint32_t saveOptions;
1588
1589 pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT;
1590 if(length==0) {
1591 ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
1592 goto cleanup3;
1593 }
1594 /* obtain memory for mapping table and visual text */
1595 runsOnlyMemory=uprv_malloc(length*(sizeof(int32_t)+sizeof(UChar)+sizeof(UBiDiLevel)));
1596 if(runsOnlyMemory==NULL) {
1597 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1598 goto cleanup3;
1599 }
1600 visualMap=runsOnlyMemory;
1601 visualText=(UChar *)&visualMap[length];
1602 saveLevels=(UBiDiLevel *)&visualText[length];
1603 saveOptions=pBiDi->reorderingOptions;
1604 if(saveOptions & UBIDI_OPTION_INSERT_MARKS) {
1605 pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
1606 pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS;
1607 }
1608 paraLevel&=1; /* accept only 0 or 1 */
1609 ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
1610 if(U_FAILURE(*pErrorCode)) {
1611 goto cleanup3;
1612 }
1613 /* we cannot access directly pBiDi->levels since it is not yet set if
1614 * direction is not MIXED
1615 */
1616 levels=ubidi_getLevels(pBiDi, pErrorCode);
1617 uprv_memcpy(saveLevels, levels, pBiDi->length*sizeof(UBiDiLevel));
1618 saveTrailingWSStart=pBiDi->trailingWSStart;
1619 saveLength=pBiDi->length;
1620 saveDirection=pBiDi->direction;
1621
1622 /* FOOD FOR THOUGHT: instead of writing the visual text, we could use
1623 * the visual map and the dirProps array to drive the second call
1624 * to ubidi_setPara (but must make provision for possible removal of
1625 * BiDi controls. Alternatively, only use the dirProps array via
1626 * customized classifier callback.
1627 */
1628 visualLength=ubidi_writeReordered(pBiDi, visualText, length,
1629 UBIDI_DO_MIRRORING, pErrorCode);
1630 ubidi_getVisualMap(pBiDi, visualMap, pErrorCode);
1631 if(U_FAILURE(*pErrorCode)) {
1632 goto cleanup2;
1633 }
1634 pBiDi->reorderingOptions=saveOptions;
1635
1636 pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT;
1637 paraLevel^=1;
1638 /* Because what we did with reorderingOptions, visualText may be shorter
1639 * than the original text. But we don't want the levels memory to be
1640 * reallocated shorter than the original length, since we need to restore
1641 * the levels as after the first call to ubidi_setpara() before returning.
1642 * We will force mayAllocateText to FALSE before the second call to
1643 * ubidi_setpara(), and will restore it afterwards.
1644 */
1645 saveMayAllocateText=pBiDi->mayAllocateText;
1646 pBiDi->mayAllocateText=FALSE;
1647 ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode);
1648 pBiDi->mayAllocateText=saveMayAllocateText;
1649 ubidi_getRuns(pBiDi, pErrorCode);
1650 if(U_FAILURE(*pErrorCode)) {
1651 goto cleanup1;
1652 }
1653 /* check if some runs must be split, count how many splits */
1654 addedRuns=0;
1655 runCount=pBiDi->runCount;
1656 runs=pBiDi->runs;
1657 visualStart=0;
1658 for(i=0; i<runCount; i++, visualStart+=runLength) {
1659 runLength=runs[i].visualLimit-visualStart;
1660 if(runLength<2) {
1661 continue;
1662 }
1663 logicalStart=GET_INDEX(runs[i].logicalStart);
1664 for(j=logicalStart+1; j<logicalStart+runLength; j++) {
1665 index0=visualMap[j];
1666 index1=visualMap[j-1];
1667 if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
1668 addedRuns++;
1669 }
1670 }
1671 }
1672 if(addedRuns) {
1673 if(getRunsMemory(pBiDi, runCount+addedRuns)) {
1674 if(runCount==1) {
1675 /* because we switch from UBiDi.simpleRuns to UBiDi.runs */
1676 pBiDi->runsMemory[0]=runs[0];
1677 }
1678 runs=pBiDi->runs=pBiDi->runsMemory;
1679 pBiDi->runCount+=addedRuns;
1680 } else {
1681 goto cleanup1;
1682 }
1683 }
1684 /* split runs which are not consecutive in source text */
1685 for(i=runCount-1; i>=0; i--) {
1686 runLength= i==0 ? runs[0].visualLimit :
1687 runs[i].visualLimit-runs[i-1].visualLimit;
1688 logicalStart=runs[i].logicalStart;
1689 indexOddBit=GET_ODD_BIT(logicalStart);
1690 logicalStart=GET_INDEX(logicalStart);
1691 if(runLength<2) {
1692 if(addedRuns) {
1693 runs[i+addedRuns]=runs[i];
1694 }
1695 logicalPos=visualMap[logicalStart];
1696 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
1697 saveLevels[logicalPos]^indexOddBit);
1698 continue;
1699 }
1700 if(indexOddBit) {
1701 start=logicalStart;
1702 limit=logicalStart+runLength-1;
1703 step=1;
1704 } else {
1705 start=logicalStart+runLength-1;
1706 limit=logicalStart;
1707 step=-1;
1708 }
1709 for(j=start; j!=limit; j+=step) {
1710 index0=visualMap[j];
1711 index1=visualMap[j+step];
1712 if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
1713 logicalPos=BIDI_MIN(visualMap[start], index0);
1714 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
1715 saveLevels[logicalPos]^indexOddBit);
1716 runs[i+addedRuns].visualLimit=runs[i].visualLimit;
1717 runs[i].visualLimit-=BIDI_ABS(j-start)+1;
1718 insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER);
1719 runs[i+addedRuns].insertRemove=insertRemove;
1720 runs[i].insertRemove&=~insertRemove;
1721 start=j+step;
1722 addedRuns--;
1723 }
1724 }
1725 if(addedRuns) {
1726 runs[i+addedRuns]=runs[i];
1727 }
1728 logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]);
1729 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
1730 saveLevels[logicalPos]^indexOddBit);
1731 }
1732
1733 cleanup1:
1734 /* restore initial paraLevel */
1735 pBiDi->paraLevel^=1;
1736 cleanup2:
1737 /* restore real text */
1738 pBiDi->text=text;
1739 pBiDi->length=saveLength;
1740 pBiDi->originalLength=length;
1741 pBiDi->direction=saveDirection;
1742 /* the saved levels should never excess levelsSize, but we check anyway */
1743 if(saveLength>pBiDi->levelsSize) {
1744 saveLength=pBiDi->levelsSize;
1745 }
1746 uprv_memcpy(pBiDi->levels, saveLevels, saveLength*sizeof(UBiDiLevel));
1747 pBiDi->trailingWSStart=saveTrailingWSStart;
1748 /* free memory for mapping table and visual text */
1749 uprv_free(runsOnlyMemory);
1750 if(pBiDi->runCount>1) {
1751 pBiDi->direction=UBIDI_MIXED;
1752 }
1753 cleanup3:
1754 pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY;
1755 }
1756
1757 /* ubidi_setPara ------------------------------------------------------------ */
1758
1759 U_CAPI void U_EXPORT2
1760 ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
1761 UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
1762 UErrorCode *pErrorCode) {
1763 UBiDiDirection direction;
1764
1765 /* check the argument values */
1766 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
1767 if(pBiDi==NULL || text==NULL || length<-1 ||
1768 (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevel<UBIDI_DEFAULT_LTR)) {
1769 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1770 return;
1771 }
1772
1773 if(length==-1) {
1774 length=u_strlen(text);
1775 }
1776
1777 /* special treatment for RUNS_ONLY mode */
1778 if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) {
1779 setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode);
1780 return;
1781 }
1782
1783 /* initialize the UBiDi structure */
1784 pBiDi->pParaBiDi=NULL; /* mark unfinished setPara */
1785 pBiDi->text=text;
1786 pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length;
1787 pBiDi->paraLevel=paraLevel;
1788 pBiDi->direction=UBIDI_LTR;
1789 pBiDi->paraCount=1;
1790
1791 pBiDi->dirProps=NULL;
1792 pBiDi->levels=NULL;
1793 pBiDi->runs=NULL;
1794 pBiDi->insertPoints.size=0; /* clean up from last call */
1795 pBiDi->insertPoints.confirmed=0; /* clean up from last call */
1796
1797 /*
1798 * Save the original paraLevel if contextual; otherwise, set to 0.
1799 */
1800 if(IS_DEFAULT_LEVEL(paraLevel)) {
1801 pBiDi->defaultParaLevel=paraLevel;
1802 } else {
1803 pBiDi->defaultParaLevel=0;
1804 }
1805
1806 if(length==0) {
1807 /*
1808 * For an empty paragraph, create a UBiDi object with the paraLevel and
1809 * the flags and the direction set but without allocating zero-length arrays.
1810 * There is nothing more to do.
1811 */
1812 if(IS_DEFAULT_LEVEL(paraLevel)) {
1813 pBiDi->paraLevel&=1;
1814 pBiDi->defaultParaLevel=0;
1815 }
1816 if(paraLevel&1) {
1817 pBiDi->flags=DIRPROP_FLAG(R);
1818 pBiDi->direction=UBIDI_RTL;
1819 } else {
1820 pBiDi->flags=DIRPROP_FLAG(L);
1821 pBiDi->direction=UBIDI_LTR;
1822 }
1823
1824 pBiDi->runCount=0;
1825 pBiDi->paraCount=0;
1826 pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */
1827 return;
1828 }
1829
1830 pBiDi->runCount=-1;
1831
1832 /*
1833 * Get the directional properties,
1834 * the flags bit-set, and
1835 * determine the paragraph level if necessary.
1836 */
1837 if(getDirPropsMemory(pBiDi, length)) {
1838 pBiDi->dirProps=pBiDi->dirPropsMemory;
1839 getDirProps(pBiDi);
1840 } else {
1841 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1842 return;
1843 }
1844 /* the processed length may have changed if UBIDI_OPTION_STREAMING */
1845 length= pBiDi->length;
1846 pBiDi->trailingWSStart=length; /* the levels[] will reflect the WS run */
1847 /* allocate paras memory */
1848 if(pBiDi->paraCount>1) {
1849 if(getInitialParasMemory(pBiDi, pBiDi->paraCount)) {
1850 pBiDi->paras=pBiDi->parasMemory;
1851 pBiDi->paras[pBiDi->paraCount-1]=length;
1852 } else {
1853 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1854 return;
1855 }
1856 } else {
1857 /* initialize paras for single paragraph */
1858 pBiDi->paras=pBiDi->simpleParas;
1859 pBiDi->simpleParas[0]=length;
1860 }
1861
1862 /* are explicit levels specified? */
1863 if(embeddingLevels==NULL) {
1864 /* no: determine explicit levels according to the (Xn) rules */\
1865 if(getLevelsMemory(pBiDi, length)) {
1866 pBiDi->levels=pBiDi->levelsMemory;
1867 direction=resolveExplicitLevels(pBiDi);
1868 } else {
1869 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1870 return;
1871 }
1872 } else {
1873 /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */
1874 pBiDi->levels=embeddingLevels;
1875 direction=checkExplicitLevels(pBiDi, pErrorCode);
1876 if(U_FAILURE(*pErrorCode)) {
1877 return;
1878 }
1879 }
1880
1881 /*
1882 * The steps after (X9) in the UBiDi algorithm are performed only if
1883 * the paragraph text has mixed directionality!
1884 */
1885 pBiDi->direction=direction;
1886 switch(direction) {
1887 case UBIDI_LTR:
1888 /* make sure paraLevel is even */
1889 pBiDi->paraLevel=(UBiDiLevel)((pBiDi->paraLevel+1)&~1);
1890
1891 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
1892 pBiDi->trailingWSStart=0;
1893 break;
1894 case UBIDI_RTL:
1895 /* make sure paraLevel is odd */
1896 pBiDi->paraLevel|=1;
1897
1898 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
1899 pBiDi->trailingWSStart=0;
1900 break;
1901 default:
1902 /*
1903 * Choose the right implicit state table
1904 */
1905 switch(pBiDi->reorderingMode) {
1906 case UBIDI_REORDER_DEFAULT:
1907 pBiDi->pImpTabPair=&impTab_DEFAULT;
1908 break;
1909 case UBIDI_REORDER_NUMBERS_SPECIAL:
1910 pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL;
1911 break;
1912 case UBIDI_REORDER_GROUP_NUMBERS_WITH_R:
1913 pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R;
1914 break;
1915 case UBIDI_REORDER_INVERSE_NUMBERS_AS_L:
1916 pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L;
1917 break;
1918 case UBIDI_REORDER_INVERSE_LIKE_DIRECT:
1919 if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
1920 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS;
1921 } else {
1922 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT;
1923 }
1924 break;
1925 case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL:
1926 if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
1927 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS;
1928 } else {
1929 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL;
1930 }
1931 break;
1932 default:
1933 /* we should never get here */
1934 U_ASSERT(FALSE);
1935 break;
1936 }
1937 /*
1938 * If there are no external levels specified and there
1939 * are no significant explicit level codes in the text,
1940 * then we can treat the entire paragraph as one run.
1941 * Otherwise, we need to perform the following rules on runs of
1942 * the text with the same embedding levels. (X10)
1943 * "Significant" explicit level codes are ones that actually
1944 * affect non-BN characters.
1945 * Examples for "insignificant" ones are empty embeddings
1946 * LRE-PDF, LRE-RLE-PDF-PDF, etc.
1947 */
1948 if(embeddingLevels==NULL && pBiDi->paraCount<=1 &&
1949 !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) {
1950 resolveImplicitLevels(pBiDi, 0, length,
1951 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)),
1952 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1)));
1953 } else {
1954 /* sor, eor: start and end types of same-level-run */
1955 UBiDiLevel *levels=pBiDi->levels;
1956 int32_t start, limit=0;
1957 UBiDiLevel level, nextLevel;
1958 DirProp sor, eor;
1959
1960 /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
1961 level=GET_PARALEVEL(pBiDi, 0);
1962 nextLevel=levels[0];
1963 if(level<nextLevel) {
1964 eor=GET_LR_FROM_LEVEL(nextLevel);
1965 } else {
1966 eor=GET_LR_FROM_LEVEL(level);
1967 }
1968
1969 do {
1970 /* determine start and limit of the run (end points just behind the run) */
1971
1972 /* the values for this run's start are the same as for the previous run's end */
1973 start=limit;
1974 level=nextLevel;
1975 if((start>0) && (NO_CONTEXT_RTL(pBiDi->dirProps[start-1])==B)) {
1976 /* except if this is a new paragraph, then set sor = para level */
1977 sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start));
1978 } else {
1979 sor=eor;
1980 }
1981
1982 /* search for the limit of this run */
1983 while(++limit<length && levels[limit]==level) {}
1984
1985 /* get the correct level of the next run */
1986 if(limit<length) {
1987 nextLevel=levels[limit];
1988 } else {
1989 nextLevel=GET_PARALEVEL(pBiDi, length-1);
1990 }
1991
1992 /* determine eor from max(level, nextLevel); sor is last run's eor */
1993 if((level&~UBIDI_LEVEL_OVERRIDE)<(nextLevel&~UBIDI_LEVEL_OVERRIDE)) {
1994 eor=GET_LR_FROM_LEVEL(nextLevel);
1995 } else {
1996 eor=GET_LR_FROM_LEVEL(level);
1997 }
1998
1999 /* if the run consists of overridden directional types, then there
2000 are no implicit types to be resolved */
2001 if(!(level&UBIDI_LEVEL_OVERRIDE)) {
2002 resolveImplicitLevels(pBiDi, start, limit, sor, eor);
2003 } else {
2004 /* remove the UBIDI_LEVEL_OVERRIDE flags */
2005 do {
2006 levels[start++]&=~UBIDI_LEVEL_OVERRIDE;
2007 } while(start<limit);
2008 }
2009 } while(limit<length);
2010 }
2011 /* check if we got any memory shortage while adding insert points */
2012 if (U_FAILURE(pBiDi->insertPoints.errorCode))
2013 {
2014 *pErrorCode=pBiDi->insertPoints.errorCode;
2015 return;
2016 }
2017 /* reset the embedding levels for some non-graphic characters (L1), (X9) */
2018 adjustWSLevels(pBiDi);
2019 break;
2020 }
2021 /* add RLM for inverse Bidi with contextual orientation resolving
2022 * to RTL which would not round-trip otherwise
2023 */
2024 if((pBiDi->defaultParaLevel>0) &&
2025 (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) &&
2026 ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) ||
2027 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) {
2028 int32_t i, j, start, last;
2029 DirProp dirProp;
2030 for(i=0; i<pBiDi->paraCount; i++) {
2031 last=pBiDi->paras[i]-1;
2032 if((pBiDi->dirProps[last] & CONTEXT_RTL)==0) {
2033 continue; /* LTR paragraph */
2034 }
2035 start= i==0 ? 0 : pBiDi->paras[i - 1];
2036 for(j=last; j>=start; j--) {
2037 dirProp=NO_CONTEXT_RTL(pBiDi->dirProps[j]);
2038 if(dirProp==L) {
2039 if(j<last) {
2040 while(NO_CONTEXT_RTL(pBiDi->dirProps[last])==B) {
2041 last--;
2042 }
2043 }
2044 addPoint(pBiDi, last, RLM_BEFORE);
2045 break;
2046 }
2047 if(DIRPROP_FLAG(dirProp) & MASK_R_AL) {
2048 break;
2049 }
2050 }
2051 }
2052 }
2053
2054 if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
2055 pBiDi->resultLength -= pBiDi->controlCount;
2056 } else {
2057 pBiDi->resultLength += pBiDi->insertPoints.size;
2058 }
2059 pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */
2060 }
2061
2062 U_CAPI void U_EXPORT2
2063 ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) {
2064 if(pBiDi!=NULL) {
2065 pBiDi->orderParagraphsLTR=orderParagraphsLTR;
2066 }
2067 }
2068
2069 U_CAPI UBool U_EXPORT2
2070 ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) {
2071 if(pBiDi!=NULL) {
2072 return pBiDi->orderParagraphsLTR;
2073 } else {
2074 return FALSE;
2075 }
2076 }
2077
2078 U_CAPI UBiDiDirection U_EXPORT2
2079 ubidi_getDirection(const UBiDi *pBiDi) {
2080 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2081 return pBiDi->direction;
2082 } else {
2083 return UBIDI_LTR;
2084 }
2085 }
2086
2087 U_CAPI const UChar * U_EXPORT2
2088 ubidi_getText(const UBiDi *pBiDi) {
2089 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2090 return pBiDi->text;
2091 } else {
2092 return NULL;
2093 }
2094 }
2095
2096 U_CAPI int32_t U_EXPORT2
2097 ubidi_getLength(const UBiDi *pBiDi) {
2098 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2099 return pBiDi->originalLength;
2100 } else {
2101 return 0;
2102 }
2103 }
2104
2105 U_CAPI int32_t U_EXPORT2
2106 ubidi_getProcessedLength(const UBiDi *pBiDi) {
2107 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2108 return pBiDi->length;
2109 } else {
2110 return 0;
2111 }
2112 }
2113
2114 U_CAPI int32_t U_EXPORT2
2115 ubidi_getResultLength(const UBiDi *pBiDi) {
2116 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2117 return pBiDi->resultLength;
2118 } else {
2119 return 0;
2120 }
2121 }
2122
2123 /* paragraphs API functions ------------------------------------------------- */
2124
2125 U_CAPI UBiDiLevel U_EXPORT2
2126 ubidi_getParaLevel(const UBiDi *pBiDi) {
2127 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2128 return pBiDi->paraLevel;
2129 } else {
2130 return 0;
2131 }
2132 }
2133
2134 U_CAPI int32_t U_EXPORT2
2135 ubidi_countParagraphs(UBiDi *pBiDi) {
2136 if(!IS_VALID_PARA_OR_LINE(pBiDi)) {
2137 return 0;
2138 } else {
2139 return pBiDi->paraCount;
2140 }
2141 }
2142
2143 U_CAPI void U_EXPORT2
2144 ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
2145 int32_t *pParaStart, int32_t *pParaLimit,
2146 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
2147 int32_t paraStart;
2148
2149 /* check the argument values */
2150 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2151 RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode);
2152 RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode);
2153
2154 pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */
2155 if(paraIndex) {
2156 paraStart=pBiDi->paras[paraIndex-1];
2157 } else {
2158 paraStart=0;
2159 }
2160 if(pParaStart!=NULL) {
2161 *pParaStart=paraStart;
2162 }
2163 if(pParaLimit!=NULL) {
2164 *pParaLimit=pBiDi->paras[paraIndex];
2165 }
2166 if(pParaLevel!=NULL) {
2167 *pParaLevel=GET_PARALEVEL(pBiDi, paraStart);
2168 }
2169 }
2170
2171 U_CAPI int32_t U_EXPORT2
2172 ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex,
2173 int32_t *pParaStart, int32_t *pParaLimit,
2174 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
2175 uint32_t paraIndex;
2176
2177 /* check the argument values */
2178 /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */
2179 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
2180 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
2181 pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */
2182 RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1);
2183
2184 for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex]; paraIndex++);
2185 ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode);
2186 return paraIndex;
2187 }
2188
2189 U_CAPI void U_EXPORT2
2190 ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn,
2191 const void *newContext, UBiDiClassCallback **oldFn,
2192 const void **oldContext, UErrorCode *pErrorCode)
2193 {
2194 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2195 if(pBiDi==NULL) {
2196 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2197 return;
2198 }
2199 if( oldFn )
2200 {
2201 *oldFn = pBiDi->fnClassCallback;
2202 }
2203 if( oldContext )
2204 {
2205 *oldContext = pBiDi->coClassCallback;
2206 }
2207 pBiDi->fnClassCallback = newFn;
2208 pBiDi->coClassCallback = newContext;
2209 }
2210
2211 U_CAPI void U_EXPORT2
2212 ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context)
2213 {
2214 if(pBiDi==NULL) {
2215 return;
2216 }
2217 if( fn )
2218 {
2219 *fn = pBiDi->fnClassCallback;
2220 }
2221 if( context )
2222 {
2223 *context = pBiDi->coClassCallback;
2224 }
2225 }
2226
2227 U_CAPI UCharDirection U_EXPORT2
2228 ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c)
2229 {
2230 UCharDirection dir;
2231
2232 if( pBiDi->fnClassCallback == NULL ||
2233 (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT )
2234 {
2235 return ubidi_getClass(pBiDi->bdp, c);
2236 } else {
2237 return dir;
2238 }
2239 }
2240