]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/ubidi.c
ICU-400.37.tar.gz
[apple/icu.git] / icuSources / common / ubidi.c
CommitLineData
73c04bcf 1/*
b75a7d8f
A
2******************************************************************************
3*
46f4442e 4* Copyright (C) 1999-2008, International Business Machines
b75a7d8f
A
5* Corporation and others. All Rights Reserved.
6*
7******************************************************************************
8* file name: ubidi.c
9* encoding: US-ASCII
10* tab size: 8 (not used)
11* indentation:4
12*
13* created on: 1999jul27
46f4442e 14* created by: Markus W. Scherer, updated by Matitiahu Allouche
b75a7d8f
A
15*/
16
b75a7d8f
A
17#include "cmemory.h"
18#include "unicode/utypes.h"
19#include "unicode/ustring.h"
20#include "unicode/uchar.h"
21#include "unicode/ubidi.h"
73c04bcf 22#include "ubidi_props.h"
b75a7d8f 23#include "ubidiimp.h"
46f4442e 24#include "uassert.h"
b75a7d8f
A
25
26/*
27 * General implementation notes:
28 *
29 * Throughout the implementation, there are comments like (W2) that refer to
30 * rules of the BiDi algorithm in its version 5, in this example to the second
31 * rule of the resolution of weak types.
32 *
33 * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
34 * character according to UTF-16, the second UChar gets the directional property of
35 * the entire character assigned, while the first one gets a BN, a boundary
36 * neutral, type, which is ignored by most of the algorithm according to
37 * rule (X9) and the implementation suggestions of the BiDi algorithm.
38 *
39 * Later, adjustWSLevels() will set the level for each BN to that of the
40 * following character (UChar), which results in surrogate pairs getting the
41 * same level on each of their surrogates.
42 *
43 * In a UTF-8 implementation, the same thing could be done: the last byte of
44 * a multi-byte sequence would get the "real" property, while all previous
45 * bytes of that sequence would get BN.
46 *
47 * It is not possible to assign all those parts of a character the same real
48 * property because this would fail in the resolution of weak types with rules
49 * that look at immediately surrounding types.
50 *
51 * As a related topic, this implementation does not remove Boundary Neutral
73c04bcf 52 * types from the input, but ignores them wherever this is relevant.
b75a7d8f
A
53 * For example, the loop for the resolution of the weak types reads
54 * types until it finds a non-BN.
55 * Also, explicit embedding codes are neither changed into BN nor removed.
56 * They are only treated the same way real BNs are.
57 * As stated before, adjustWSLevels() takes care of them at the end.
58 * For the purpose of conformance, the levels of all these codes
59 * do not matter.
60 *
61 * Note that this implementation never modifies the dirProps
62 * after the initial setup.
63 *
64 *
65 * In this implementation, the resolution of weak types (Wn),
66 * neutrals (Nn), and the assignment of the resolved level (In)
67 * are all done in one single loop, in resolveImplicitLevels().
68 * Changes of dirProp values are done on the fly, without writing
69 * them back to the dirProps array.
70 *
71 *
72 * This implementation contains code that allows to bypass steps of the
73 * algorithm that are not needed on the specific paragraph
74 * in order to speed up the most common cases considerably,
75 * like text that is entirely LTR, or RTL text without numbers.
76 *
77 * Most of this is done by setting a bit for each directional property
78 * in a flags variable and later checking for whether there are
79 * any LTR characters or any RTL characters, or both, whether
80 * there are any explicit embedding codes, etc.
81 *
82 * If the (Xn) steps are performed, then the flags are re-evaluated,
83 * because they will then not contain the embedding codes any more
84 * and will be adjusted for override codes, so that subsequently
85 * more bypassing may be possible than what the initial flags suggested.
86 *
87 * If the text is not mixed-directional, then the
88 * algorithm steps for the weak type resolution are not performed,
89 * and all levels are set to the paragraph level.
90 *
91 * If there are no explicit embedding codes, then the (Xn) steps
92 * are not performed.
93 *
94 * If embedding levels are supplied as a parameter, then all
95 * explicit embedding codes are ignored, and the (Xn) steps
96 * are not performed.
97 *
98 * White Space types could get the level of the run they belong to,
99 * and are checked with a test of (flags&MASK_EMBEDDING) to
100 * consider if the paragraph direction should be considered in
101 * the flags variable.
102 *
103 * If there are no White Space types in the paragraph, then
104 * (L1) is not necessary in adjustWSLevels().
105 */
106
b75a7d8f
A
107/* to avoid some conditional statements, use tiny constant arrays */
108static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) };
109static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) };
110static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) };
111
112#define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
113#define DIRPROP_FLAG_E(level) flagE[(level)&1]
114#define DIRPROP_FLAG_O(level) flagO[(level)&1]
115
116/* UBiDi object management -------------------------------------------------- */
117
118U_CAPI UBiDi * U_EXPORT2
73c04bcf 119ubidi_open(void)
b75a7d8f
A
120{
121 UErrorCode errorCode=U_ZERO_ERROR;
122 return ubidi_openSized(0, 0, &errorCode);
123}
124
125U_CAPI UBiDi * U_EXPORT2
126ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) {
127 UBiDi *pBiDi;
128
129 /* check the argument values */
130 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
131 return NULL;
132 } else if(maxLength<0 || maxRunCount<0) {
133 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
134 return NULL; /* invalid arguments */
135 }
136
137 /* allocate memory for the object */
138 pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi));
139 if(pBiDi==NULL) {
140 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
141 return NULL;
142 }
143
144 /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */
145 uprv_memset(pBiDi, 0, sizeof(UBiDi));
146
73c04bcf
A
147 /* get BiDi properties */
148 pBiDi->bdp=ubidi_getSingleton(pErrorCode);
149 if(U_FAILURE(*pErrorCode)) {
150 uprv_free(pBiDi);
151 return NULL;
152 }
153
b75a7d8f
A
154 /* allocate memory for arrays as requested */
155 if(maxLength>0) {
156 if( !getInitialDirPropsMemory(pBiDi, maxLength) ||
157 !getInitialLevelsMemory(pBiDi, maxLength)
158 ) {
159 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
160 }
161 } else {
162 pBiDi->mayAllocateText=TRUE;
163 }
164
165 if(maxRunCount>0) {
166 if(maxRunCount==1) {
167 /* use simpleRuns[] */
168 pBiDi->runsSize=sizeof(Run);
169 } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) {
170 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
171 }
172 } else {
173 pBiDi->mayAllocateRuns=TRUE;
174 }
175
176 if(U_SUCCESS(*pErrorCode)) {
177 return pBiDi;
178 } else {
179 ubidi_close(pBiDi);
180 return NULL;
181 }
182}
183
184/*
185 * We are allowed to allocate memory if memory==NULL or
186 * mayAllocate==TRUE for each array that we need.
46f4442e 187 * We also try to grow memory as needed if we
b75a7d8f
A
188 * allocate it.
189 *
190 * Assume sizeNeeded>0.
191 * If *pMemory!=NULL, then assume *pSize>0.
192 *
193 * ### this realloc() may unnecessarily copy the old data,
194 * which we know we don't need any more;
195 * is this the best way to do this??
196 */
197U_CFUNC UBool
46f4442e
A
198ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) {
199 void **pMemory = (void **)bidiMem;
b75a7d8f
A
200 /* check for existing memory */
201 if(*pMemory==NULL) {
202 /* we need to allocate memory */
203 if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) {
204 *pSize=sizeNeeded;
205 return TRUE;
206 } else {
207 return FALSE;
208 }
209 } else {
46f4442e
A
210 if(sizeNeeded<=*pSize) {
211 /* there is already enough memory */
212 return TRUE;
213 }
214 else if(!mayAllocate) {
b75a7d8f
A
215 /* not enough memory, and we must not allocate */
216 return FALSE;
46f4442e
A
217 } else {
218 /* we try to grow */
b75a7d8f 219 void *memory;
46f4442e
A
220 /* in most cases, we do not need the copy-old-data part of
221 * realloc, but it is needed when adding runs using getRunsMemory()
222 * in setParaRunsOnly()
223 */
b75a7d8f
A
224 if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) {
225 *pMemory=memory;
226 *pSize=sizeNeeded;
227 return TRUE;
228 } else {
229 /* we failed to grow */
230 return FALSE;
231 }
b75a7d8f
A
232 }
233 }
234}
235
236U_CAPI void U_EXPORT2
237ubidi_close(UBiDi *pBiDi) {
238 if(pBiDi!=NULL) {
73c04bcf 239 pBiDi->pParaBiDi=NULL; /* in case one tries to reuse this block */
b75a7d8f
A
240 if(pBiDi->dirPropsMemory!=NULL) {
241 uprv_free(pBiDi->dirPropsMemory);
242 }
243 if(pBiDi->levelsMemory!=NULL) {
244 uprv_free(pBiDi->levelsMemory);
245 }
246 if(pBiDi->runsMemory!=NULL) {
247 uprv_free(pBiDi->runsMemory);
248 }
73c04bcf
A
249 if(pBiDi->parasMemory!=NULL) {
250 uprv_free(pBiDi->parasMemory);
251 }
252 if(pBiDi->insertPoints.points!=NULL) {
253 uprv_free(pBiDi->insertPoints.points);
254 }
255
b75a7d8f
A
256 uprv_free(pBiDi);
257 }
258}
259
260/* set to approximate "inverse BiDi" ---------------------------------------- */
261
262U_CAPI void U_EXPORT2
263ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) {
264 if(pBiDi!=NULL) {
265 pBiDi->isInverse=isInverse;
73c04bcf
A
266 pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L
267 : UBIDI_REORDER_DEFAULT;
b75a7d8f
A
268 }
269}
270
271U_CAPI UBool U_EXPORT2
272ubidi_isInverse(UBiDi *pBiDi) {
273 if(pBiDi!=NULL) {
274 return pBiDi->isInverse;
275 } else {
276 return FALSE;
277 }
278}
279
73c04bcf
A
280/* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
281 * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre
282 * concept of RUNS_ONLY which is a double operation.
283 * It could be advantageous to divide this into 3 concepts:
284 * a) Operation: direct / inverse / RUNS_ONLY
46f4442e 285 * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R
73c04bcf
A
286 * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
287 * This would allow combinations not possible today like RUNS_ONLY with
288 * NUMBERS_SPECIAL.
289 * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
290 * REMOVE_CONTROLS for the inverse step.
291 * Not all combinations would be supported, and probably not all do make sense.
292 * This would need to document which ones are supported and what are the
293 * fallbacks for unsupported combinations.
294 */
295U_CAPI void U_EXPORT2
296ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) {
46f4442e 297 if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT)
73c04bcf
A
298 && (reorderingMode < UBIDI_REORDER_COUNT)) {
299 pBiDi->reorderingMode = reorderingMode;
46f4442e 300 pBiDi->isInverse = (UBool)(reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L);
73c04bcf
A
301 }
302}
303
304U_CAPI UBiDiReorderingMode U_EXPORT2
305ubidi_getReorderingMode(UBiDi *pBiDi) {
46f4442e 306 if (pBiDi!=NULL) {
73c04bcf
A
307 return pBiDi->reorderingMode;
308 } else {
309 return UBIDI_REORDER_DEFAULT;
310 }
311}
312
313U_CAPI void U_EXPORT2
314ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) {
315 if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
316 reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
317 }
46f4442e
A
318 if (pBiDi!=NULL) {
319 pBiDi->reorderingOptions=reorderingOptions;
73c04bcf
A
320 }
321}
322
323U_CAPI uint32_t U_EXPORT2
324ubidi_getReorderingOptions(UBiDi *pBiDi) {
46f4442e 325 if (pBiDi!=NULL) {
73c04bcf
A
326 return pBiDi->reorderingOptions;
327 } else {
328 return 0;
329 }
330}
331
b75a7d8f
A
332/* perform (P2)..(P3) ------------------------------------------------------- */
333
334/*
335 * Get the directional properties for the text,
336 * calculate the flags bit-set, and
73c04bcf 337 * determine the paragraph level if necessary.
b75a7d8f
A
338 */
339static void
73c04bcf
A
340getDirProps(UBiDi *pBiDi) {
341 const UChar *text=pBiDi->text;
b75a7d8f
A
342 DirProp *dirProps=pBiDi->dirPropsMemory; /* pBiDi->dirProps is const */
343
73c04bcf 344 int32_t i=0, i0, i1, length=pBiDi->originalLength;
b75a7d8f
A
345 Flags flags=0; /* collect all directionalities in the text */
346 UChar32 uchar;
73c04bcf
A
347 DirProp dirProp=0, paraDirDefault=0;/* initialize to avoid compiler warnings */
348 UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel);
349 /* for inverse BiDi, the default para level is set to RTL if there is a
46f4442e
A
350 strong R or AL character at either end of the text */
351 UBool isDefaultLevelInverse=isDefaultLevel && (UBool)
73c04bcf
A
352 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
353 pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL);
354 int32_t lastArabicPos=-1;
355 int32_t controlCount=0;
46f4442e
A
356 UBool removeBiDiControls = (UBool)(pBiDi->reorderingOptions &
357 UBIDI_OPTION_REMOVE_CONTROLS);
73c04bcf
A
358
359 typedef enum {
360 NOT_CONTEXTUAL, /* 0: not contextual paraLevel */
361 LOOKING_FOR_STRONG, /* 1: looking for first strong char */
362 FOUND_STRONG_CHAR /* 2: found first strong char */
363 } State;
364 State state;
365 int32_t paraStart=0; /* index of first char in paragraph */
366 DirProp paraDir; /* == CONTEXT_RTL within paragraphs
367 starting with strong R char */
368 DirProp lastStrongDir=0; /* for default level & inverse BiDi */
369 int32_t lastStrongLTR=0; /* for STREAMING option */
370
371 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
372 pBiDi->length=0;
373 lastStrongLTR=0;
374 }
375 if(isDefaultLevel) {
376 paraDirDefault=pBiDi->paraLevel&1 ? CONTEXT_RTL : 0;
377 paraDir=paraDirDefault;
378 lastStrongDir=paraDirDefault;
379 state=LOOKING_FOR_STRONG;
374ca955 380 } else {
73c04bcf
A
381 state=NOT_CONTEXTUAL;
382 paraDir=0;
b75a7d8f 383 }
73c04bcf
A
384 /* count paragraphs and determine the paragraph level (P2..P3) */
385 /*
386 * see comment in ubidi.h:
387 * the DEFAULT_XXX values are designed so that
388 * their bit 0 alone yields the intended default
389 */
390 for( /* i=0 above */ ; i<length; ) {
391 /* i is incremented by UTF_NEXT_CHAR */
b75a7d8f
A
392 i0=i; /* index of first code unit */
393 UTF_NEXT_CHAR(text, i, length, uchar);
394 i1=i-1; /* index of last code unit, gets the directional property */
46f4442e 395 flags|=DIRPROP_FLAG(dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar));
73c04bcf 396 dirProps[i1]=dirProp|paraDir;
b75a7d8f
A
397 if(i1>i0) { /* set previous code units' properties to BN */
398 flags|=DIRPROP_FLAG(BN);
399 do {
46f4442e 400 dirProps[--i1]=(DirProp)(BN|paraDir);
b75a7d8f
A
401 } while(i1>i0);
402 }
73c04bcf
A
403 if(state==LOOKING_FOR_STRONG) {
404 if(dirProp==L) {
405 state=FOUND_STRONG_CHAR;
406 if(paraDir) {
407 paraDir=0;
408 for(i1=paraStart; i1<i; i1++) {
409 dirProps[i1]&=~CONTEXT_RTL;
410 }
411 }
412 continue;
413 }
414 if(dirProp==R || dirProp==AL) {
415 state=FOUND_STRONG_CHAR;
416 if(paraDir==0) {
417 paraDir=CONTEXT_RTL;
418 for(i1=paraStart; i1<i; i1++) {
419 dirProps[i1]|=CONTEXT_RTL;
420 }
421 }
422 continue;
423 }
424 }
425 if(dirProp==L) {
426 lastStrongDir=0;
427 lastStrongLTR=i; /* i is index to next character */
428 }
429 else if(dirProp==R) {
430 lastStrongDir=CONTEXT_RTL;
431 }
432 else if(dirProp==AL) {
433 lastStrongDir=CONTEXT_RTL;
434 lastArabicPos=i-1;
435 }
436 else if(dirProp==B) {
437 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
438 pBiDi->length=i; /* i is index to next character */
439 }
440 if(isDefaultLevelInverse && (lastStrongDir==CONTEXT_RTL) &&(paraDir!=lastStrongDir)) {
441 for( ; paraStart<i; paraStart++) {
442 dirProps[paraStart]|=CONTEXT_RTL;
443 }
444 }
445 if(i<length) { /* B not last char in text */
446 if(!((uchar==CR) && (text[i]==LF))) {
447 pBiDi->paraCount++;
448 }
449 if(isDefaultLevel) {
450 state=LOOKING_FOR_STRONG;
451 paraStart=i; /* i is index to next character */
452 paraDir=paraDirDefault;
453 lastStrongDir=paraDirDefault;
454 }
455 }
456 }
457 if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar)) {
458 controlCount++;
459 }
b75a7d8f 460 }
73c04bcf
A
461 if(isDefaultLevelInverse && (lastStrongDir==CONTEXT_RTL) &&(paraDir!=lastStrongDir)) {
462 for(i1=paraStart; i1<length; i1++) {
463 dirProps[i1]|=CONTEXT_RTL;
464 }
465 }
466 if(isDefaultLevel) {
467 pBiDi->paraLevel=GET_PARALEVEL(pBiDi, 0);
468 }
469 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
470 if((lastStrongLTR>pBiDi->length) &&
471 (GET_PARALEVEL(pBiDi, lastStrongLTR)==0)) {
472 pBiDi->length = lastStrongLTR;
473 }
474 if(pBiDi->length<pBiDi->originalLength) {
475 pBiDi->paraCount--;
476 }
477 }
478 /* The following line does nothing new for contextual paraLevel, but is
479 needed for absolute paraLevel. */
480 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
481
482 if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
483 flags|=DIRPROP_FLAG(L);
b75a7d8f
A
484 }
485
73c04bcf 486 pBiDi->controlCount = controlCount;
b75a7d8f 487 pBiDi->flags=flags;
73c04bcf 488 pBiDi->lastArabicPos=lastArabicPos;
b75a7d8f
A
489}
490
491/* perform (X1)..(X9) ------------------------------------------------------- */
492
374ca955
A
493/* determine if the text is mixed-directional or single-directional */
494static UBiDiDirection
73c04bcf
A
495directionFromFlags(UBiDi *pBiDi) {
496 Flags flags=pBiDi->flags;
374ca955
A
497 /* if the text contains AN and neutrals, then some neutrals may become RTL */
498 if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) {
499 return UBIDI_LTR;
500 } else if(!(flags&MASK_LTR)) {
501 return UBIDI_RTL;
502 } else {
503 return UBIDI_MIXED;
504 }
505}
506
b75a7d8f
A
507/*
508 * Resolve the explicit levels as specified by explicit embedding codes.
509 * Recalculate the flags to have them reflect the real properties
510 * after taking the explicit embeddings into account.
511 *
512 * The BiDi algorithm is designed to result in the same behavior whether embedding
513 * levels are externally specified (from "styled text", supposedly the preferred
514 * method) or set by explicit embedding codes (LRx, RLx, PDF) in the plain text.
515 * That is why (X9) instructs to remove all explicit codes (and BN).
516 * However, in a real implementation, this removal of these codes and their index
517 * positions in the plain text is undesirable since it would result in
518 * reallocated, reindexed text.
519 * Instead, this implementation leaves the codes in there and just ignores them
520 * in the subsequent processing.
521 * In order to get the same reordering behavior, positions with a BN or an
522 * explicit embedding code just get the same level assigned as the last "real"
523 * character.
524 *
525 * Some implementations, not this one, then overwrite some of these
526 * directionality properties at "real" same-level-run boundaries by
527 * L or R codes so that the resolution of weak types can be performed on the
528 * entire paragraph at once instead of having to parse it once more and
529 * perform that resolution on same-level-runs.
530 * This limits the scope of the implicit rules in effectively
531 * the same way as the run limits.
532 *
533 * Instead, this implementation does not modify these codes.
534 * On one hand, the paragraph has to be scanned for same-level-runs, but
535 * on the other hand, this saves another loop to reset these codes,
536 * or saves making and modifying a copy of dirProps[].
537 *
538 *
539 * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
540 *
541 *
542 * Handling the stack of explicit levels (Xn):
543 *
544 * With the BiDi stack of explicit levels,
545 * as pushed with each LRE, RLE, LRO, and RLO and popped with each PDF,
546 * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL==61.
547 *
548 * In order to have a correct push-pop semantics even in the case of overflows,
549 * there are two overflow counters:
550 * - countOver60 is incremented with each LRx at level 60
551 * - from level 60, one RLx increases the level to 61
552 * - countOver61 is incremented with each LRx and RLx at level 61
553 *
554 * Popping levels with PDF must work in the opposite order so that level 61
555 * is correct at the correct point. Underflows (too many PDFs) must be checked.
556 *
557 * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
558 */
b75a7d8f
A
559static UBiDiDirection
560resolveExplicitLevels(UBiDi *pBiDi) {
561 const DirProp *dirProps=pBiDi->dirProps;
562 UBiDiLevel *levels=pBiDi->levels;
73c04bcf
A
563 const UChar *text=pBiDi->text;
564
b75a7d8f
A
565 int32_t i=0, length=pBiDi->length;
566 Flags flags=pBiDi->flags; /* collect all directionalities in the text */
567 DirProp dirProp;
73c04bcf 568 UBiDiLevel level=GET_PARALEVEL(pBiDi, 0);
b75a7d8f
A
569
570 UBiDiDirection direction;
73c04bcf 571 int32_t paraIndex=0;
b75a7d8f
A
572
573 /* determine if the text is mixed-directional or single-directional */
73c04bcf 574 direction=directionFromFlags(pBiDi);
b75a7d8f 575
73c04bcf
A
576 /* we may not need to resolve any explicit levels, but for multiple
577 paragraphs we want to loop on all chars to set the para boundaries */
578 if((direction!=UBIDI_MIXED) && (pBiDi->paraCount==1)) {
b75a7d8f 579 /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
73c04bcf
A
580 } else if((pBiDi->paraCount==1) &&
581 (!(flags&MASK_EXPLICIT) ||
582 (pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL))) {
b75a7d8f
A
583 /* mixed, but all characters are at the same embedding level */
584 /* or we are in "inverse BiDi" */
73c04bcf 585 /* and we don't have contextual multiple paragraphs with some B char */
b75a7d8f
A
586 /* set all levels to the paragraph level */
587 for(i=0; i<length; ++i) {
588 levels[i]=level;
589 }
590 } else {
591 /* continue to perform (Xn) */
592
593 /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
594 /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */
595 UBiDiLevel embeddingLevel=level, newLevel, stackTop=0;
596
597 UBiDiLevel stack[UBIDI_MAX_EXPLICIT_LEVEL]; /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL */
598 uint32_t countOver60=0, countOver61=0; /* count overflows of explicit levels */
599
600 /* recalculate the flags */
601 flags=0;
602
b75a7d8f 603 for(i=0; i<length; ++i) {
73c04bcf 604 dirProp=NO_CONTEXT_RTL(dirProps[i]);
b75a7d8f
A
605 switch(dirProp) {
606 case LRE:
607 case LRO:
608 /* (X3, X5) */
609 newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1)); /* least greater even level */
610 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL) {
611 stack[stackTop]=embeddingLevel;
612 ++stackTop;
613 embeddingLevel=newLevel;
614 if(dirProp==LRO) {
615 embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
b75a7d8f 616 }
73c04bcf
A
617 /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE
618 since this has already been done for newLevel which is
619 the source for embeddingLevel.
620 */
b75a7d8f
A
621 } else if((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)==UBIDI_MAX_EXPLICIT_LEVEL) {
622 ++countOver61;
623 } else /* (embeddingLevel&~UBIDI_LEVEL_OVERRIDE)==UBIDI_MAX_EXPLICIT_LEVEL-1 */ {
624 ++countOver60;
625 }
626 flags|=DIRPROP_FLAG(BN);
627 break;
628 case RLE:
629 case RLO:
630 /* (X2, X4) */
631 newLevel=(UBiDiLevel)(((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)+1)|1); /* least greater odd level */
632 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL) {
633 stack[stackTop]=embeddingLevel;
634 ++stackTop;
635 embeddingLevel=newLevel;
636 if(dirProp==RLO) {
637 embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
b75a7d8f 638 }
73c04bcf
A
639 /* we don't need to set UBIDI_LEVEL_OVERRIDE off for RLE
640 since this has already been done for newLevel which is
641 the source for embeddingLevel.
642 */
b75a7d8f
A
643 } else {
644 ++countOver61;
645 }
646 flags|=DIRPROP_FLAG(BN);
647 break;
648 case PDF:
649 /* (X7) */
650 /* handle all the overflow cases first */
651 if(countOver61>0) {
652 --countOver61;
653 } else if(countOver60>0 && (embeddingLevel&~UBIDI_LEVEL_OVERRIDE)!=UBIDI_MAX_EXPLICIT_LEVEL) {
654 /* handle LRx overflows from level 60 */
655 --countOver60;
656 } else if(stackTop>0) {
657 /* this is the pop operation; it also pops level 61 while countOver60>0 */
658 --stackTop;
659 embeddingLevel=stack[stackTop];
660 /* } else { (underflow) */
661 }
662 flags|=DIRPROP_FLAG(BN);
663 break;
664 case B:
b75a7d8f
A
665 stackTop=0;
666 countOver60=countOver61=0;
73c04bcf
A
667 level=GET_PARALEVEL(pBiDi, i);
668 if((i+1)<length) {
669 embeddingLevel=GET_PARALEVEL(pBiDi, i+1);
670 if(!((text[i]==CR) && (text[i+1]==LF))) {
671 pBiDi->paras[paraIndex++]=i+1;
672 }
673 }
b75a7d8f
A
674 flags|=DIRPROP_FLAG(B);
675 break;
676 case BN:
677 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
678 /* they will get their levels set correctly in adjustWSLevels() */
679 flags|=DIRPROP_FLAG(BN);
680 break;
681 default:
682 /* all other types get the "real" level */
683 if(level!=embeddingLevel) {
684 level=embeddingLevel;
685 if(level&UBIDI_LEVEL_OVERRIDE) {
686 flags|=DIRPROP_FLAG_O(level)|DIRPROP_FLAG_MULTI_RUNS;
687 } else {
688 flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG_MULTI_RUNS;
689 }
690 }
691 if(!(level&UBIDI_LEVEL_OVERRIDE)) {
692 flags|=DIRPROP_FLAG(dirProp);
693 }
694 break;
695 }
696
697 /*
698 * We need to set reasonable levels even on BN codes and
699 * explicit codes because we will later look at same-level runs (X10).
700 */
701 levels[i]=level;
702 }
703 if(flags&MASK_EMBEDDING) {
704 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
705 }
73c04bcf
A
706 if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
707 flags|=DIRPROP_FLAG(L);
708 }
b75a7d8f
A
709
710 /* subsequently, ignore the explicit codes and BN (X9) */
711
712 /* again, determine if the text is mixed-directional or single-directional */
713 pBiDi->flags=flags;
73c04bcf 714 direction=directionFromFlags(pBiDi);
b75a7d8f 715 }
73c04bcf 716
b75a7d8f
A
717 return direction;
718}
719
720/*
721 * Use a pre-specified embedding levels array:
722 *
723 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
724 * ignore all explicit codes (X9),
725 * and check all the preset levels.
726 *
727 * Recalculate the flags to have them reflect the real properties
728 * after taking the explicit embeddings into account.
729 */
730static UBiDiDirection
731checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
732 const DirProp *dirProps=pBiDi->dirProps;
73c04bcf 733 DirProp dirProp;
b75a7d8f 734 UBiDiLevel *levels=pBiDi->levels;
73c04bcf
A
735 const UChar *text=pBiDi->text;
736
b75a7d8f
A
737 int32_t i, length=pBiDi->length;
738 Flags flags=0; /* collect all directionalities in the text */
73c04bcf
A
739 UBiDiLevel level;
740 uint32_t paraIndex=0;
b75a7d8f
A
741
742 for(i=0; i<length; ++i) {
743 level=levels[i];
73c04bcf 744 dirProp=NO_CONTEXT_RTL(dirProps[i]);
b75a7d8f
A
745 if(level&UBIDI_LEVEL_OVERRIDE) {
746 /* keep the override flag in levels[i] but adjust the flags */
747 level&=~UBIDI_LEVEL_OVERRIDE; /* make the range check below simpler */
748 flags|=DIRPROP_FLAG_O(level);
749 } else {
750 /* set the flags */
73c04bcf 751 flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp);
b75a7d8f 752 }
73c04bcf
A
753 if((level<GET_PARALEVEL(pBiDi, i) &&
754 !((0==level)&&(dirProp==B))) ||
755 (UBIDI_MAX_EXPLICIT_LEVEL<level)) {
b75a7d8f
A
756 /* level out of bounds */
757 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
758 return UBIDI_LTR;
759 }
73c04bcf
A
760 if((dirProp==B) && ((i+1)<length)) {
761 if(!((text[i]==CR) && (text[i+1]==LF))) {
762 pBiDi->paras[paraIndex++]=i+1;
763 }
764 }
b75a7d8f
A
765 }
766 if(flags&MASK_EMBEDDING) {
767 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
768 }
769
770 /* determine if the text is mixed-directional or single-directional */
771 pBiDi->flags=flags;
73c04bcf
A
772 return directionFromFlags(pBiDi);
773}
774
46f4442e
A
775/******************************************************************
776 The Properties state machine table
777*******************************************************************
778
779 All table cells are 8 bits:
780 bits 0..4: next state
781 bits 5..7: action to perform (if > 0)
782
783 Cells may be of format "n" where n represents the next state
784 (except for the rightmost column).
785 Cells may also be of format "s(x,y)" where x represents an action
786 to perform and y represents the next state.
787
788*******************************************************************
789 Definitions and type for properties state table
790*******************************************************************
791*/
73c04bcf
A
792#define IMPTABPROPS_COLUMNS 14
793#define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1)
794#define GET_STATEPROPS(cell) ((cell)&0x1f)
795#define GET_ACTIONPROPS(cell) ((cell)>>5)
46f4442e 796#define s(action, newState) ((uint8_t)(newState+(action<<5)))
73c04bcf
A
797
798static const uint8_t groupProp[] = /* dirProp regrouped */
799{
800/* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN */
801 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10
802};
46f4442e
A
803enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */
804
805/******************************************************************
806
807 PROPERTIES STATE TABLE
808
809 In table impTabProps,
810 - the ON column regroups ON and WS
811 - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF
812 - the Res column is the reduced property assigned to a run
813
814 Action 1: process current run1, init new run1
815 2: init new run2
816 3: process run1, process run2, init new run1
817 4: process run1, set run1=run2, init new run2
818
819 Notes:
820 1) This table is used in resolveImplicitLevels().
821 2) This table triggers actions when there is a change in the Bidi
822 property of incoming characters (action 1).
823 3) Most such property sequences are processed immediately (in
824 fact, passed to processPropertySeq().
825 4) However, numbers are assembled as one sequence. This means
826 that undefined situations (like CS following digits, until
827 it is known if the next char will be a digit) are held until
828 following chars define them.
829 Example: digits followed by CS, then comes another CS or ON;
830 the digits will be processed, then the CS assigned
831 as the start of an ON sequence (action 3).
832 5) There are cases where more than one sequence must be
833 processed, for instance digits followed by CS followed by L:
834 the digits must be processed as one sequence, and the CS
835 must be processed as an ON sequence, all this before starting
836 assembling chars for the opening L sequence.
837
838
839*/
73c04bcf
A
840static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] =
841{
842/* L , R , EN , AN , ON , S , B , ES , ET , CS , BN , NSM , AL , Res */
46f4442e
A
843/* 0 Init */ { 1 , 2 , 4 , 5 , 7 , 15 , 17 , 7 , 9 , 7 , 0 , 7 , 3 , DirProp_ON },
844/* 1 L */ { 1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 1 , 1 , s(1,3), DirProp_L },
845/* 2 R */ { s(1,1), 2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 2 , 2 , s(1,3), DirProp_R },
846/* 3 AL */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8), 3 , 3 , 3 , DirProp_R },
847/* 4 EN */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10), 11 ,s(2,10), 4 , 4 , s(1,3), DirProp_EN },
848/* 5 AN */ { s(1,1), s(1,2), s(1,4), 5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12), 5 , 5 , s(1,3), DirProp_AN },
849/* 6 AL:EN/AN */ { s(1,1), s(1,2), 6 , 6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13), 6 , 6 , s(1,3), DirProp_AN },
850/* 7 ON */ { s(1,1), s(1,2), s(1,4), s(1,5), 7 ,s(1,15),s(1,17), 7 ,s(2,14), 7 , 7 , 7 , s(1,3), DirProp_ON },
851/* 8 AL:ON */ { s(1,1), s(1,2), s(1,6), s(1,6), 8 ,s(1,16),s(1,17), 8 , 8 , 8 , 8 , 8 , s(1,3), DirProp_ON },
852/* 9 ET */ { s(1,1), s(1,2), 4 , s(1,5), 7 ,s(1,15),s(1,17), 7 , 9 , 7 , 9 , 9 , s(1,3), DirProp_ON },
853/*10 EN+ES/CS */ { s(3,1), s(3,2), 4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 10 , s(4,7), s(3,3), DirProp_EN },
854/*11 EN+ET */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 11 , s(1,7), 11 , 11 , s(1,3), DirProp_EN },
855/*12 AN+CS */ { s(3,1), s(3,2), s(3,4), 5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 12 , s(4,7), s(3,3), DirProp_AN },
856/*13 AL:EN/AN+CS */ { s(3,1), s(3,2), 6 , 6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8), 13 , s(4,8), s(3,3), DirProp_AN },
857/*14 ON+ET */ { s(1,1), s(1,2), s(4,4), s(1,5), 7 ,s(1,15),s(1,17), 7 , 14 , 7 , 14 , 14 , s(1,3), DirProp_ON },
858/*15 S */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7), 15 ,s(1,17), s(1,7), s(1,9), s(1,7), 15 , s(1,7), s(1,3), DirProp_S },
859/*16 AL:S */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8), 16 ,s(1,17), s(1,8), s(1,8), s(1,8), 16 , s(1,8), s(1,3), DirProp_S },
860/*17 B */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15), 17 , s(1,7), s(1,9), s(1,7), 17 , s(1,7), s(1,3), DirProp_B }
73c04bcf
A
861};
862
46f4442e 863/* we must undef macro s because the levels table have a different
73c04bcf
A
864 * structure (4 bits for action and 4 bits for next state.
865 */
46f4442e
A
866#undef s
867
868/******************************************************************
869 The levels state machine tables
870*******************************************************************
871
872 All table cells are 8 bits:
873 bits 0..3: next state
874 bits 4..7: action to perform (if > 0)
875
876 Cells may be of format "n" where n represents the next state
877 (except for the rightmost column).
878 Cells may also be of format "s(x,y)" where x represents an action
879 to perform and y represents the next state.
880
881 This format limits each table to 16 states each and to 15 actions.
882
883*******************************************************************
884 Definitions and type for levels state tables
885*******************************************************************
886*/
887#define IMPTABLEVELS_COLUMNS (DirProp_B + 2)
73c04bcf
A
888#define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1)
889#define GET_STATE(cell) ((cell)&0x0f)
890#define GET_ACTION(cell) ((cell)>>4)
46f4442e 891#define s(action, newState) ((uint8_t)(newState+(action<<4)))
73c04bcf
A
892
893typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS];
894typedef uint8_t ImpAct[];
895
896/* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct,
897 * instead of having a pair of ImpTab and a pair of ImpAct.
898 */
899typedef struct ImpTabPair {
46f4442e
A
900 const void * pImpTab[2];
901 const void * pImpAct[2];
73c04bcf
A
902} ImpTabPair;
903
46f4442e
A
904/******************************************************************
905
906 LEVELS STATE TABLES
907
908 In all levels state tables,
909 - state 0 is the initial state
910 - the Res column is the increment to add to the text level
911 for this property sequence.
912
913 The impAct arrays for each table of a pair map the local action
914 numbers of the table to the total list of actions. For instance,
915 action 2 in a given table corresponds to the action number which
916 appears in entry [2] of the impAct array for that table.
917 The first entry of all impAct arrays must be 0.
918
919 Action 1: init conditional sequence
920 2: prepend conditional sequence to current sequence
921 3: set ON sequence to new level - 1
922 4: init EN/AN/ON sequence
923 5: fix EN/AN/ON sequence followed by R
924 6: set previous level sequence to level 2
925
926 Notes:
927 1) These tables are used in processPropertySeq(). The input
928 is property sequences as determined by resolveImplicitLevels.
929 2) Most such property sequences are processed immediately
930 (levels are assigned).
931 3) However, some sequences cannot be assigned a final level till
932 one or more following sequences are received. For instance,
933 ON following an R sequence within an even-level paragraph.
934 If the following sequence is R, the ON sequence will be
935 assigned basic run level+1, and so will the R sequence.
936 4) S is generally handled like ON, since its level will be fixed
937 to paragraph level in adjustWSLevels().
938
939*/
73c04bcf
A
940
941static const ImpTab impTabL_DEFAULT = /* Even paragraph level */
942/* In this table, conditional sequences receive the higher possible level
943 until proven otherwise.
944*/
945{
946/* L , R , EN , AN , ON , S , B , Res */
947/* 0 : init */ { 0 , 1 , 0 , 2 , 0 , 0 , 0 , 0 },
46f4442e
A
948/* 1 : R */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 1 },
949/* 2 : AN */ { 0 , 1 , 0 , 2 , s(1,5), s(1,5), 0 , 2 },
950/* 3 : R+EN/AN */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 2 },
951/* 4 : R+ON */ { s(2,0), 1 , 3 , 3 , 4 , 4 , s(2,0), 1 },
952/* 5 : AN+ON */ { s(2,0), 1 , s(2,0), 2 , 5 , 5 , s(2,0), 1 }
73c04bcf
A
953};
954static const ImpTab impTabR_DEFAULT = /* Odd paragraph level */
955/* In this table, conditional sequences receive the lower possible level
956 until proven otherwise.
957*/
958{
959/* L , R , EN , AN , ON , S , B , Res */
960/* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 },
46f4442e 961/* 1 : L */ { 1 , 0 , 1 , 3 , s(1,4), s(1,4), 0 , 1 },
73c04bcf
A
962/* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 },
963/* 3 : L+AN */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 1 },
46f4442e 964/* 4 : L+ON */ { s(2,1), 0 , s(2,1), 3 , 4 , 4 , 0 , 0 },
73c04bcf
A
965/* 5 : L+AN+ON */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 0 }
966};
967static const ImpAct impAct0 = {0,1,2,3,4,5,6};
46f4442e
A
968static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT,
969 &impTabR_DEFAULT},
970 {&impAct0, &impAct0}};
73c04bcf
A
971
972static const ImpTab impTabL_NUMBERS_SPECIAL = /* Even paragraph level */
973/* In this table, conditional sequences receive the higher possible level
974 until proven otherwise.
975*/
976{
977/* L , R , EN , AN , ON , S , B , Res */
978/* 0 : init */ { 0 , 2 , 1 , 1 , 0 , 0 , 0 , 0 },
979/* 1 : L+EN/AN */ { 0 , 2 , 1 , 1 , 0 , 0 , 0 , 2 },
46f4442e
A
980/* 2 : R */ { 0 , 2 , 4 , 4 , s(1,3), 0 , 0 , 1 },
981/* 3 : R+ON */ { s(2,0), 2 , 4 , 4 , 3 , 3 , s(2,0), 1 },
982/* 4 : R+EN/AN */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 2 }
73c04bcf 983 };
46f4442e
A
984static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL,
985 &impTabR_DEFAULT},
986 {&impAct0, &impAct0}};
73c04bcf
A
987
988static const ImpTab impTabL_GROUP_NUMBERS_WITH_R =
989/* In this table, EN/AN+ON sequences receive levels as if associated with R
990 until proven that there is L or sor/eor on both sides. AN is handled like EN.
991*/
992{
993/* L , R , EN , AN , ON , S , B , Res */
46f4442e
A
994/* 0 init */ { 0 , 3 , s(1,1), s(1,1), 0 , 0 , 0 , 0 },
995/* 1 EN/AN */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 2 },
996/* 2 EN/AN+ON */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 1 },
997/* 3 R */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 1 },
998/* 4 R+ON */ { s(2,0), 3 , 5 , 5 , 4 , s(2,0), s(2,0), 1 },
999/* 5 R+EN/AN */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 2 }
73c04bcf
A
1000};
1001static const ImpTab impTabR_GROUP_NUMBERS_WITH_R =
1002/* In this table, EN/AN+ON sequences receive levels as if associated with R
1003 until proven that there is L on both sides. AN is handled like EN.
1004*/
1005{
1006/* L , R , EN , AN , ON , S , B , Res */
1007/* 0 init */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1008/* 1 EN/AN */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 1 },
46f4442e
A
1009/* 2 L */ { 2 , 0 , s(1,4), s(1,4), s(1,3), 0 , 0 , 1 },
1010/* 3 L+ON */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 0 },
1011/* 4 L+EN/AN */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 1 }
73c04bcf
A
1012};
1013static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = {
46f4442e
A
1014 {&impTabL_GROUP_NUMBERS_WITH_R,
1015 &impTabR_GROUP_NUMBERS_WITH_R},
1016 {&impAct0, &impAct0}};
73c04bcf
A
1017
1018
1019static const ImpTab impTabL_INVERSE_NUMBERS_AS_L =
1020/* This table is identical to the Default LTR table except that EN and AN are
1021 handled like L.
1022*/
1023{
1024/* L , R , EN , AN , ON , S , B , Res */
1025/* 0 : init */ { 0 , 1 , 0 , 0 , 0 , 0 , 0 , 0 },
46f4442e
A
1026/* 1 : R */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 1 },
1027/* 2 : AN */ { 0 , 1 , 0 , 0 , s(1,5), s(1,5), 0 , 2 },
1028/* 3 : R+EN/AN */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 2 },
1029/* 4 : R+ON */ { s(2,0), 1 , s(2,0), s(2,0), 4 , 4 , s(2,0), 1 },
1030/* 5 : AN+ON */ { s(2,0), 1 , s(2,0), s(2,0), 5 , 5 , s(2,0), 1 }
73c04bcf
A
1031};
1032static const ImpTab impTabR_INVERSE_NUMBERS_AS_L =
1033/* This table is identical to the Default RTL table except that EN and AN are
1034 handled like L.
1035*/
1036{
1037/* L , R , EN , AN , ON , S , B , Res */
1038/* 0 : init */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 0 },
46f4442e 1039/* 1 : L */ { 1 , 0 , 1 , 1 , s(1,4), s(1,4), 0 , 1 },
73c04bcf
A
1040/* 2 : EN/AN */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 1 },
1041/* 3 : L+AN */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 1 },
46f4442e 1042/* 4 : L+ON */ { s(2,1), 0 , s(2,1), s(2,1), 4 , 4 , 0 , 0 },
73c04bcf
A
1043/* 5 : L+AN+ON */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 0 }
1044};
1045static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = {
46f4442e
A
1046 {&impTabL_INVERSE_NUMBERS_AS_L,
1047 &impTabR_INVERSE_NUMBERS_AS_L},
1048 {&impAct0, &impAct0}};
73c04bcf
A
1049
1050static const ImpTab impTabR_INVERSE_LIKE_DIRECT = /* Odd paragraph level */
1051/* In this table, conditional sequences receive the lower possible level
1052 until proven otherwise.
1053*/
1054{
1055/* L , R , EN , AN , ON , S , B , Res */
1056/* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 },
46f4442e 1057/* 1 : L */ { 1 , 0 , 1 , 2 , s(1,3), s(1,3), 0 , 1 },
73c04bcf 1058/* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 },
46f4442e
A
1059/* 3 : L+ON */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 0 },
1060/* 4 : L+ON+AN */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 3 },
1061/* 5 : L+AN+ON */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 2 },
1062/* 6 : L+ON+EN */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 1 }
73c04bcf
A
1063};
1064static const ImpAct impAct1 = {0,1,11,12};
1065/* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc"
1066 */
1067static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = {
46f4442e
A
1068 {&impTabL_DEFAULT,
1069 &impTabR_INVERSE_LIKE_DIRECT},
1070 {&impAct0, &impAct1}};
73c04bcf
A
1071
1072static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS =
1073/* The case handled in this table is (visually): R EN L
1074*/
1075{
1076/* L , R , EN , AN , ON , S , B , Res */
46f4442e
A
1077/* 0 : init */ { 0 , s(6,3), 0 , 1 , 0 , 0 , 0 , 0 },
1078/* 1 : L+AN */ { 0 , s(6,3), 0 , 1 , s(1,2), s(3,0), 0 , 4 },
1079/* 2 : L+AN+ON */ { s(2,0), s(6,3), s(2,0), 1 , 2 , s(3,0), s(2,0), 3 },
1080/* 3 : R */ { 0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0), 0 , 3 },
1081/* 4 : R+ON */ { s(3,0), s(4,3), s(5,5), s(5,6), 4 , s(3,0), s(3,0), 3 },
1082/* 5 : R+EN */ { s(3,0), s(4,3), 5 , s(5,6), s(1,4), s(3,0), s(3,0), 4 },
1083/* 6 : R+AN */ { s(3,0), s(4,3), s(5,5), 6 , s(1,4), s(3,0), s(3,0), 4 }
73c04bcf
A
1084};
1085static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS =
1086/* The cases handled in this table are (visually): R EN L
1087 R L AN L
1088*/
1089{
1090/* L , R , EN , AN , ON , S , B , Res */
46f4442e
A
1091/* 0 : init */ { s(1,3), 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1092/* 1 : R+EN/AN */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 1 },
1093/* 2 : R+EN/AN+ON */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 0 },
1094/* 3 : L */ { 3 , 0 , 3 , s(3,6), s(1,4), s(4,0), 0 , 1 },
1095/* 4 : L+ON */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 0 },
1096/* 5 : L+ON+EN */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 1 },
1097/* 6 : L+AN */ { s(5,3), s(4,0), 6 , 6 , 4 , s(4,0), s(4,0), 3 }
73c04bcf
A
1098};
1099static const ImpAct impAct2 = {0,1,7,8,9,10};
1100static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = {
46f4442e
A
1101 {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
1102 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
1103 {&impAct0, &impAct2}};
73c04bcf
A
1104
1105static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = {
46f4442e
A
1106 {&impTabL_NUMBERS_SPECIAL,
1107 &impTabR_INVERSE_LIKE_DIRECT},
1108 {&impAct0, &impAct1}};
73c04bcf
A
1109
1110static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS =
1111/* The case handled in this table is (visually): R EN L
1112*/
1113{
1114/* L , R , EN , AN , ON , S , B , Res */
46f4442e
A
1115/* 0 : init */ { 0 , s(6,2), 1 , 1 , 0 , 0 , 0 , 0 },
1116/* 1 : L+EN/AN */ { 0 , s(6,2), 1 , 1 , 0 , s(3,0), 0 , 4 },
1117/* 2 : R */ { 0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0), 0 , 3 },
1118/* 3 : R+ON */ { s(3,0), s(4,2), s(5,4), s(5,4), 3 , s(3,0), s(3,0), 3 },
1119/* 4 : R+EN/AN */ { s(3,0), s(4,2), 4 , 4 , s(1,3), s(3,0), s(3,0), 4 }
73c04bcf
A
1120};
1121static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = {
46f4442e
A
1122 {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
1123 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
1124 {&impAct0, &impAct2}};
73c04bcf 1125
46f4442e 1126#undef s
73c04bcf
A
1127
1128typedef struct {
46f4442e
A
1129 const ImpTab * pImpTab; /* level table pointer */
1130 const ImpAct * pImpAct; /* action map array */
73c04bcf
A
1131 int32_t startON; /* start of ON sequence */
1132 int32_t startL2EN; /* start of level 2 sequence */
1133 int32_t lastStrongRTL; /* index of last found R or AL */
1134 int32_t state; /* current state */
1135 UBiDiLevel runLevel; /* run level before implicit solving */
1136} LevState;
1137
1138/*------------------------------------------------------------------------*/
1139
1140static void
1141addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag)
1142 /* param pos: position where to insert
1143 param flag: one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER
1144 */
1145{
1146#define FIRSTALLOC 10
1147 Point point;
1148 InsertPoints * pInsertPoints=&(pBiDi->insertPoints);
1149
1150 if (pInsertPoints->capacity == 0)
1151 {
1152 pInsertPoints->points=uprv_malloc(sizeof(Point)*FIRSTALLOC);
1153 if (pInsertPoints->points == NULL)
1154 {
1155 pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
1156 return;
1157 }
1158 pInsertPoints->capacity=FIRSTALLOC;
1159 }
1160 if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */
1161 {
1162 void * savePoints=pInsertPoints->points;
1163 pInsertPoints->points=uprv_realloc(pInsertPoints->points,
1164 pInsertPoints->capacity*2*sizeof(Point));
1165 if (pInsertPoints->points == NULL)
1166 {
1167 pInsertPoints->points=savePoints;
1168 pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
1169 return;
1170 }
1171 else pInsertPoints->capacity*=2;
1172 }
1173 point.pos=pos;
1174 point.flag=flag;
1175 pInsertPoints->points[pInsertPoints->size]=point;
1176 pInsertPoints->size++;
1177#undef FIRSTALLOC
b75a7d8f
A
1178}
1179
b75a7d8f
A
1180/* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
1181
1182/*
1183 * This implementation of the (Wn) rules applies all rules in one pass.
1184 * In order to do so, it needs a look-ahead of typically 1 character
1185 * (except for W5: sequences of ET) and keeps track of changes
1186 * in a rule Wp that affect a later Wq (p<q).
1187 *
b75a7d8f
A
1188 * The (Nn) and (In) rules are also performed in that same single loop,
1189 * but effectively one iteration behind for white space.
1190 *
1191 * Since all implicit rules are performed in one step, it is not necessary
1192 * to actually store the intermediate directional properties in dirProps[].
1193 */
1194
b75a7d8f 1195static void
73c04bcf
A
1196processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop,
1197 int32_t start, int32_t limit) {
1198 uint8_t cell, oldStateSeq, actionSeq;
46f4442e
A
1199 const ImpTab * pImpTab=pLevState->pImpTab;
1200 const ImpAct * pImpAct=pLevState->pImpAct;
73c04bcf
A
1201 UBiDiLevel * levels=pBiDi->levels;
1202 UBiDiLevel level, addLevel;
1203 InsertPoints * pInsertPoints;
1204 int32_t start0, k;
1205
1206 start0=start; /* save original start position */
46f4442e 1207 oldStateSeq=(uint8_t)pLevState->state;
73c04bcf
A
1208 cell=(*pImpTab)[oldStateSeq][_prop];
1209 pLevState->state=GET_STATE(cell); /* isolate the new state */
1210 actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */
1211 addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES];
1212
1213 if(actionSeq) {
1214 switch(actionSeq) {
1215 case 1: /* init ON seq */
1216 pLevState->startON=start0;
b75a7d8f 1217 break;
b75a7d8f 1218
73c04bcf
A
1219 case 2: /* prepend ON seq to current seq */
1220 start=pLevState->startON;
1221 break;
b75a7d8f 1222
73c04bcf
A
1223 case 3: /* L or S after possible relevant EN/AN */
1224 /* check if we had EN after R/AL */
1225 if (pLevState->startL2EN >= 0) {
1226 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
b75a7d8f 1227 }
73c04bcf
A
1228 pLevState->startL2EN=-1; /* not within previous if since could also be -2 */
1229 /* check if we had any relevant EN/AN after R/AL */
1230 pInsertPoints=&(pBiDi->insertPoints);
1231 if ((pInsertPoints->capacity == 0) ||
1232 (pInsertPoints->size <= pInsertPoints->confirmed))
1233 {
1234 /* nothing, just clean up */
1235 pLevState->lastStrongRTL=-1;
1236 /* check if we have a pending conditional segment */
1237 level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES];
1238 if ((level & 1) && (pLevState->startON > 0)) { /* after ON */
1239 start=pLevState->startON; /* reset to basic run level */
b75a7d8f 1240 }
46f4442e 1241 if (_prop == DirProp_S) /* add LRM before S */
73c04bcf
A
1242 {
1243 addPoint(pBiDi, start0, LRM_BEFORE);
1244 pInsertPoints->confirmed=pInsertPoints->size;
b75a7d8f 1245 }
73c04bcf 1246 break;
b75a7d8f 1247 }
73c04bcf
A
1248 /* reset previous RTL cont to level for LTR text */
1249 for (k=pLevState->lastStrongRTL+1; k<start0; k++)
1250 {
1251 /* reset odd level, leave runLevel+2 as is */
1252 levels[k]=(levels[k] - 2) & ~1;
b75a7d8f 1253 }
73c04bcf
A
1254 /* mark insert points as confirmed */
1255 pInsertPoints->confirmed=pInsertPoints->size;
1256 pLevState->lastStrongRTL=-1;
46f4442e 1257 if (_prop == DirProp_S) /* add LRM before S */
73c04bcf
A
1258 {
1259 addPoint(pBiDi, start0, LRM_BEFORE);
1260 pInsertPoints->confirmed=pInsertPoints->size;
b75a7d8f 1261 }
73c04bcf 1262 break;
b75a7d8f 1263
73c04bcf
A
1264 case 4: /* R/AL after possible relevant EN/AN */
1265 /* just clean up */
1266 pInsertPoints=&(pBiDi->insertPoints);
1267 if (pInsertPoints->capacity > 0)
1268 /* remove all non confirmed insert points */
1269 pInsertPoints->size=pInsertPoints->confirmed;
1270 pLevState->startON=-1;
1271 pLevState->startL2EN=-1;
1272 pLevState->lastStrongRTL=limit - 1;
1273 break;
1274
1275 case 5: /* EN/AN after R/AL + possible cont */
1276 /* check for real AN */
46f4442e 1277 if ((_prop == DirProp_AN) && (NO_CONTEXT_RTL(pBiDi->dirProps[start0]) == AN) &&
73c04bcf
A
1278 (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))
1279 {
1280 /* real AN */
1281 if (pLevState->startL2EN == -1) /* if no relevant EN already found */
1282 {
1283 /* just note the righmost digit as a strong RTL */
1284 pLevState->lastStrongRTL=limit - 1;
1285 break;
b75a7d8f 1286 }
73c04bcf
A
1287 if (pLevState->startL2EN >= 0) /* after EN, no AN */
1288 {
1289 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
1290 pLevState->startL2EN=-2;
1291 }
1292 /* note AN */
1293 addPoint(pBiDi, start0, LRM_BEFORE);
1294 break;
1295 }
1296 /* if first EN/AN after R/AL */
1297 if (pLevState->startL2EN == -1) {
1298 pLevState->startL2EN=start0;
b75a7d8f 1299 }
73c04bcf 1300 break;
b75a7d8f 1301
73c04bcf
A
1302 case 6: /* note location of latest R/AL */
1303 pLevState->lastStrongRTL=limit - 1;
1304 pLevState->startON=-1;
b75a7d8f 1305 break;
73c04bcf
A
1306
1307 case 7: /* L after R+ON/EN/AN */
1308 /* include possible adjacent number on the left */
1309 for (k=start0-1; k>=0 && !(levels[k]&1); k--);
1310 if(k>=0) {
1311 addPoint(pBiDi, k, RLM_BEFORE); /* add RLM before */
1312 pInsertPoints=&(pBiDi->insertPoints);
1313 pInsertPoints->confirmed=pInsertPoints->size; /* confirm it */
1314 }
1315 pLevState->startON=start0;
b75a7d8f 1316 break;
73c04bcf
A
1317
1318 case 8: /* AN after L */
1319 /* AN numbers between L text on both sides may be trouble. */
1320 /* tentatively bracket with LRMs; will be confirmed if followed by L */
1321 addPoint(pBiDi, start0, LRM_BEFORE); /* add LRM before */
1322 addPoint(pBiDi, start0, LRM_AFTER); /* add LRM after */
b75a7d8f 1323 break;
b75a7d8f 1324
73c04bcf
A
1325 case 9: /* R after L+ON/EN/AN */
1326 /* false alert, infirm LRMs around previous AN */
1327 pInsertPoints=&(pBiDi->insertPoints);
1328 pInsertPoints->size=pInsertPoints->confirmed;
46f4442e 1329 if (_prop == DirProp_S) /* add RLM before S */
73c04bcf
A
1330 {
1331 addPoint(pBiDi, start0, RLM_BEFORE);
1332 pInsertPoints->confirmed=pInsertPoints->size;
1333 }
1334 break;
b75a7d8f 1335
73c04bcf
A
1336 case 10: /* L after L+ON/AN */
1337 level=pLevState->runLevel + addLevel;
1338 for(k=pLevState->startON; k<start0; k++) {
1339 if (levels[k]<level)
1340 levels[k]=level;
b75a7d8f 1341 }
73c04bcf
A
1342 pInsertPoints=&(pBiDi->insertPoints);
1343 pInsertPoints->confirmed=pInsertPoints->size; /* confirm inserts */
1344 pLevState->startON=start0;
1345 break;
1346
1347 case 11: /* L after L+ON+EN/AN/ON */
1348 level=pLevState->runLevel;
1349 for(k=start0-1; k>=pLevState->startON; k--) {
1350 if(levels[k]==level+3) {
1351 while(levels[k]==level+3) {
1352 levels[k--]-=2;
b75a7d8f 1353 }
73c04bcf
A
1354 while(levels[k]==level) {
1355 k--;
b75a7d8f
A
1356 }
1357 }
73c04bcf
A
1358 if(levels[k]==level+2) {
1359 levels[k]=level;
1360 continue;
b75a7d8f 1361 }
73c04bcf 1362 levels[k]=level+1;
b75a7d8f 1363 }
73c04bcf 1364 break;
b75a7d8f 1365
73c04bcf
A
1366 case 12: /* R after L+ON+EN/AN/ON */
1367 level=pLevState->runLevel+1;
1368 for(k=start0-1; k>=pLevState->startON; k--) {
1369 if(levels[k]>level) {
1370 levels[k]-=2;
b75a7d8f 1371 }
b75a7d8f 1372 }
73c04bcf 1373 break;
b75a7d8f 1374
73c04bcf 1375 default: /* we should never get here */
46f4442e 1376 U_ASSERT(FALSE);
73c04bcf 1377 break;
b75a7d8f
A
1378 }
1379 }
73c04bcf
A
1380 if((addLevel) || (start < start0)) {
1381 level=pLevState->runLevel + addLevel;
1382 for(k=start; k<limit; k++) {
1383 levels[k]=level;
1384 }
1385 }
1386}
b75a7d8f 1387
73c04bcf
A
1388static void
1389resolveImplicitLevels(UBiDi *pBiDi,
1390 int32_t start, int32_t limit,
1391 DirProp sor, DirProp eor) {
1392 const DirProp *dirProps=pBiDi->dirProps;
b75a7d8f 1393
73c04bcf
A
1394 LevState levState;
1395 int32_t i, start1, start2;
1396 uint8_t oldStateImp, stateImp, actionImp;
1397 uint8_t gprop, resProp, cell;
1398 UBool inverseRTL;
1399 DirProp nextStrongProp=R;
1400 int32_t nextStrongPos=-1;
1401
1402 /* check for RTL inverse BiDi mode */
1403 /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to
1404 * loop on the text characters from end to start.
1405 * This would need a different properties state table (at least different
1406 * actions) and different levels state tables (maybe very similar to the
1407 * LTR corresponding ones.
1408 */
46f4442e
A
1409 inverseRTL=(UBool)
1410 ((start<pBiDi->lastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) &&
1411 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
1412 pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL));
73c04bcf
A
1413 /* initialize for levels state table */
1414 levState.startL2EN=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
1415 levState.lastStrongRTL=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
1416 levState.state=0;
1417 levState.runLevel=pBiDi->levels[start];
46f4442e
A
1418 levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1];
1419 levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1];
73c04bcf
A
1420 processPropertySeq(pBiDi, &levState, sor, start, start);
1421 /* initialize for property state table */
1422 if(dirProps[start]==NSM) {
1423 stateImp = 1 + sor;
1424 } else {
1425 stateImp=0;
1426 }
1427 start1=start;
1428 start2=start;
1429
1430 for(i=start; i<=limit; i++) {
1431 if(i>=limit) {
1432 gprop=eor;
b75a7d8f 1433 } else {
73c04bcf
A
1434 DirProp prop, prop1;
1435 prop=NO_CONTEXT_RTL(dirProps[i]);
1436 if(inverseRTL) {
1437 if(prop==AL) {
1438 /* AL before EN does not make it AN */
1439 prop=R;
1440 } else if(prop==EN) {
1441 if(nextStrongPos<=i) {
1442 /* look for next strong char (L/R/AL) */
1443 int32_t j;
1444 nextStrongProp=R; /* set default */
1445 nextStrongPos=limit;
1446 for(j=i+1; j<limit; j++) {
1447 prop1=NO_CONTEXT_RTL(dirProps[j]);
1448 if(prop1==L || prop1==R || prop1==AL) {
1449 nextStrongProp=prop1;
1450 nextStrongPos=j;
1451 break;
1452 }
1453 }
1454 }
1455 if(nextStrongProp==AL) {
1456 prop=AN;
1457 }
b75a7d8f
A
1458 }
1459 }
73c04bcf 1460 gprop=groupProp[prop];
b75a7d8f 1461 }
73c04bcf
A
1462 oldStateImp=stateImp;
1463 cell=impTabProps[oldStateImp][gprop];
1464 stateImp=GET_STATEPROPS(cell); /* isolate the new state */
1465 actionImp=GET_ACTIONPROPS(cell); /* isolate the action */
1466 if((i==limit) && (actionImp==0)) {
1467 /* there is an unprocessed sequence if its property == eor */
1468 actionImp=1; /* process the last sequence */
1469 }
1470 if(actionImp) {
1471 resProp=impTabProps[oldStateImp][IMPTABPROPS_RES];
1472 switch(actionImp) {
1473 case 1: /* process current seq1, init new seq1 */
1474 processPropertySeq(pBiDi, &levState, resProp, start1, i);
1475 start1=i;
1476 break;
1477 case 2: /* init new seq2 */
1478 start2=i;
1479 break;
1480 case 3: /* process seq1, process seq2, init new seq1 */
1481 processPropertySeq(pBiDi, &levState, resProp, start1, start2);
46f4442e 1482 processPropertySeq(pBiDi, &levState, DirProp_ON, start2, i);
73c04bcf
A
1483 start1=i;
1484 break;
1485 case 4: /* process seq1, set seq1=seq2, init new seq2 */
1486 processPropertySeq(pBiDi, &levState, resProp, start1, start2);
1487 start1=start2;
1488 start2=i;
1489 break;
1490 default: /* we should never get here */
46f4442e 1491 U_ASSERT(FALSE);
73c04bcf
A
1492 break;
1493 }
b75a7d8f
A
1494 }
1495 }
73c04bcf
A
1496 /* flush possible pending sequence, e.g. ON */
1497 processPropertySeq(pBiDi, &levState, eor, limit, limit);
b75a7d8f
A
1498}
1499
1500/* perform (L1) and (X9) ---------------------------------------------------- */
1501
1502/*
1503 * Reset the embedding levels for some non-graphic characters (L1).
1504 * This function also sets appropriate levels for BN, and
1505 * explicit embedding types that are supposed to have been removed
1506 * from the paragraph in (X9).
1507 */
1508static void
1509adjustWSLevels(UBiDi *pBiDi) {
1510 const DirProp *dirProps=pBiDi->dirProps;
1511 UBiDiLevel *levels=pBiDi->levels;
1512 int32_t i;
1513
1514 if(pBiDi->flags&MASK_WS) {
73c04bcf 1515 UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR;
b75a7d8f
A
1516 Flags flag;
1517
1518 i=pBiDi->trailingWSStart;
1519 while(i>0) {
1520 /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
73c04bcf
A
1521 while(i>0 && (flag=DIRPROP_FLAG_NC(dirProps[--i]))&MASK_WS) {
1522 if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
1523 levels[i]=0;
1524 } else {
1525 levels[i]=GET_PARALEVEL(pBiDi, i);
1526 }
b75a7d8f
A
1527 }
1528
1529 /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
1530 /* here, i+1 is guaranteed to be <length */
1531 while(i>0) {
73c04bcf 1532 flag=DIRPROP_FLAG_NC(dirProps[--i]);
b75a7d8f
A
1533 if(flag&MASK_BN_EXPLICIT) {
1534 levels[i]=levels[i+1];
73c04bcf
A
1535 } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
1536 levels[i]=0;
1537 break;
b75a7d8f 1538 } else if(flag&MASK_B_S) {
73c04bcf 1539 levels[i]=GET_PARALEVEL(pBiDi, i);
b75a7d8f
A
1540 break;
1541 }
1542 }
1543 }
1544 }
1545}
1546
73c04bcf
A
1547#define BIDI_MIN(x, y) ((x)<(y) ? (x) : (y))
1548#define BIDI_ABS(x) ((x)>=0 ? (x) : (-(x)))
1549static void
1550setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length,
1551 UBiDiLevel paraLevel, UErrorCode *pErrorCode) {
1552 void *runsOnlyMemory;
1553 int32_t *visualMap;
1554 UChar *visualText;
46f4442e 1555 int32_t saveLength, saveTrailingWSStart;
73c04bcf
A
1556 const UBiDiLevel *levels;
1557 UBiDiLevel *saveLevels;
46f4442e
A
1558 UBiDiDirection saveDirection;
1559 UBool saveMayAllocateText;
73c04bcf
A
1560 Run *runs;
1561 int32_t visualLength, i, j, visualStart, logicalStart,
1562 runCount, runLength, addedRuns, insertRemove,
1563 start, limit, step, indexOddBit, logicalPos,
1564 index, index1;
1565 uint32_t saveOptions;
1566
1567 pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT;
1568 if(length==0) {
1569 ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
1570 goto cleanup3;
1571 }
1572 /* obtain memory for mapping table and visual text */
1573 runsOnlyMemory=uprv_malloc(length*(sizeof(int32_t)+sizeof(UChar)+sizeof(UBiDiLevel)));
1574 if(runsOnlyMemory==NULL) {
1575 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1576 goto cleanup3;
1577 }
1578 visualMap=runsOnlyMemory;
1579 visualText=(UChar *)&visualMap[length];
1580 saveLevels=(UBiDiLevel *)&visualText[length];
1581 saveOptions=pBiDi->reorderingOptions;
1582 if(saveOptions & UBIDI_OPTION_INSERT_MARKS) {
1583 pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
1584 pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS;
1585 }
46f4442e 1586 paraLevel&=1; /* accept only 0 or 1 */
73c04bcf 1587 ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
46f4442e
A
1588 if(U_FAILURE(*pErrorCode)) {
1589 goto cleanup3;
1590 }
1591 /* we cannot access directly pBiDi->levels since it is not yet set if
1592 * direction is not MIXED
1593 */
73c04bcf 1594 levels=ubidi_getLevels(pBiDi, pErrorCode);
46f4442e
A
1595 uprv_memcpy(saveLevels, levels, pBiDi->length*sizeof(UBiDiLevel));
1596 saveTrailingWSStart=pBiDi->trailingWSStart;
1597 saveLength=pBiDi->length;
1598 saveDirection=pBiDi->direction;
73c04bcf
A
1599
1600 /* FOOD FOR THOUGHT: instead of writing the visual text, we could use
1601 * the visual map and the dirProps array to drive the second call
1602 * to ubidi_setPara (but must make provision for possible removal of
1603 * BiDi controls. Alternatively, only use the dirProps array via
1604 * customized classifier callback.
1605 */
1606 visualLength=ubidi_writeReordered(pBiDi, visualText, length,
1607 UBIDI_DO_MIRRORING, pErrorCode);
73c04bcf
A
1608 ubidi_getVisualMap(pBiDi, visualMap, pErrorCode);
1609 if(U_FAILURE(*pErrorCode)) {
1610 goto cleanup2;
1611 }
46f4442e 1612 pBiDi->reorderingOptions=saveOptions;
73c04bcf
A
1613
1614 pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT;
46f4442e
A
1615 paraLevel^=1;
1616 /* Because what we did with reorderingOptions, visualText may be shorter
1617 * than the original text. But we don't want the levels memory to be
1618 * reallocated shorter than the original length, since we need to restore
1619 * the levels as after the first call to ubidi_setpara() before returning.
1620 * We will force mayAllocateText to FALSE before the second call to
1621 * ubidi_setpara(), and will restore it afterwards.
1622 */
1623 saveMayAllocateText=pBiDi->mayAllocateText;
1624 pBiDi->mayAllocateText=FALSE;
73c04bcf 1625 ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode);
46f4442e
A
1626 pBiDi->mayAllocateText=saveMayAllocateText;
1627 ubidi_getRuns(pBiDi, pErrorCode);
73c04bcf
A
1628 if(U_FAILURE(*pErrorCode)) {
1629 goto cleanup1;
1630 }
73c04bcf
A
1631 /* check if some runs must be split, count how many splits */
1632 addedRuns=0;
1633 runCount=pBiDi->runCount;
1634 runs=pBiDi->runs;
1635 visualStart=0;
1636 for(i=0; i<runCount; i++, visualStart+=runLength) {
1637 runLength=runs[i].visualLimit-visualStart;
1638 if(runLength<2) {
1639 continue;
1640 }
1641 logicalStart=GET_INDEX(runs[i].logicalStart);
1642 for(j=logicalStart+1; j<logicalStart+runLength; j++) {
1643 index=visualMap[j];
1644 index1=visualMap[j-1];
1645 if((BIDI_ABS(index-index1)!=1) || (saveLevels[index]!=saveLevels[index1])) {
1646 addedRuns++;
1647 }
1648 }
1649 }
1650 if(addedRuns) {
1651 if(getRunsMemory(pBiDi, runCount+addedRuns)) {
1652 if(runCount==1) {
1653 /* because we switch from UBiDi.simpleRuns to UBiDi.runs */
1654 pBiDi->runsMemory[0]=runs[0];
1655 }
1656 runs=pBiDi->runs=pBiDi->runsMemory;
1657 pBiDi->runCount+=addedRuns;
1658 } else {
1659 goto cleanup1;
1660 }
1661 }
1662 /* split runs which are not consecutive in source text */
1663 for(i=runCount-1; i>=0; i--) {
1664 runLength= i==0 ? runs[0].visualLimit :
1665 runs[i].visualLimit-runs[i-1].visualLimit;
1666 logicalStart=runs[i].logicalStart;
1667 indexOddBit=GET_ODD_BIT(logicalStart);
1668 logicalStart=GET_INDEX(logicalStart);
1669 if(runLength<2) {
1670 if(addedRuns) {
1671 runs[i+addedRuns]=runs[i];
1672 }
1673 logicalPos=visualMap[logicalStart];
1674 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
1675 saveLevels[logicalPos]^indexOddBit);
1676 continue;
1677 }
1678 if(indexOddBit) {
1679 start=logicalStart;
1680 limit=logicalStart+runLength-1;
1681 step=1;
1682 } else {
1683 start=logicalStart+runLength-1;
1684 limit=logicalStart;
1685 step=-1;
1686 }
1687 for(j=start; j!=limit; j+=step) {
1688 index=visualMap[j];
1689 index1=visualMap[j+step];
1690 if((BIDI_ABS(index-index1)!=1) || (saveLevels[index]!=saveLevels[index1])) {
1691 logicalPos=BIDI_MIN(visualMap[start], index);
1692 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
1693 saveLevels[logicalPos]^indexOddBit);
1694 runs[i+addedRuns].visualLimit=runs[i].visualLimit;
1695 runs[i].visualLimit-=BIDI_ABS(j-start)+1;
1696 insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER);
1697 runs[i+addedRuns].insertRemove=insertRemove;
1698 runs[i].insertRemove&=~insertRemove;
1699 start=j+step;
1700 addedRuns--;
1701 }
1702 }
1703 if(addedRuns) {
1704 runs[i+addedRuns]=runs[i];
1705 }
1706 logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]);
1707 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
1708 saveLevels[logicalPos]^indexOddBit);
1709 }
1710
1711 cleanup1:
1712 /* restore initial paraLevel */
1713 pBiDi->paraLevel^=1;
1714 cleanup2:
1715 /* restore real text */
1716 pBiDi->text=text;
46f4442e
A
1717 pBiDi->length=saveLength;
1718 pBiDi->originalLength=length;
1719 pBiDi->direction=saveDirection;
1720 /* the saved levels should never excess levelsSize, but we check anyway */
1721 if(saveLength>pBiDi->levelsSize) {
1722 saveLength=pBiDi->levelsSize;
1723 }
1724 uprv_memcpy(pBiDi->levels, saveLevels, saveLength*sizeof(UBiDiLevel));
1725 pBiDi->trailingWSStart=saveTrailingWSStart;
73c04bcf
A
1726 /* free memory for mapping table and visual text */
1727 uprv_free(runsOnlyMemory);
46f4442e
A
1728 if(pBiDi->runCount>1) {
1729 pBiDi->direction=UBIDI_MIXED;
1730 }
73c04bcf
A
1731 cleanup3:
1732 pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY;
1733}
1734
374ca955
A
1735/* ubidi_setPara ------------------------------------------------------------ */
1736
1737U_CAPI void U_EXPORT2
1738ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
1739 UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
1740 UErrorCode *pErrorCode) {
1741 UBiDiDirection direction;
1742
1743 /* check the argument values */
46f4442e
A
1744 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
1745 if(pBiDi==NULL || text==NULL || length<-1 ||
1746 (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevel<UBIDI_DEFAULT_LTR)) {
374ca955
A
1747 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1748 return;
1749 }
1750
1751 if(length==-1) {
1752 length=u_strlen(text);
1753 }
1754
73c04bcf
A
1755 /* special treatment for RUNS_ONLY mode */
1756 if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) {
1757 setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode);
1758 return;
1759 }
1760
374ca955 1761 /* initialize the UBiDi structure */
73c04bcf 1762 pBiDi->pParaBiDi=NULL; /* mark unfinished setPara */
374ca955 1763 pBiDi->text=text;
73c04bcf 1764 pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length;
374ca955
A
1765 pBiDi->paraLevel=paraLevel;
1766 pBiDi->direction=UBIDI_LTR;
73c04bcf 1767 pBiDi->paraCount=1;
374ca955
A
1768
1769 pBiDi->dirProps=NULL;
1770 pBiDi->levels=NULL;
1771 pBiDi->runs=NULL;
73c04bcf
A
1772 pBiDi->insertPoints.size=0; /* clean up from last call */
1773 pBiDi->insertPoints.confirmed=0; /* clean up from last call */
1774
1775 /*
1776 * Save the original paraLevel if contextual; otherwise, set to 0.
1777 */
1778 if(IS_DEFAULT_LEVEL(paraLevel)) {
1779 pBiDi->defaultParaLevel=paraLevel;
1780 } else {
1781 pBiDi->defaultParaLevel=0;
1782 }
374ca955
A
1783
1784 if(length==0) {
1785 /*
1786 * For an empty paragraph, create a UBiDi object with the paraLevel and
1787 * the flags and the direction set but without allocating zero-length arrays.
1788 * There is nothing more to do.
1789 */
1790 if(IS_DEFAULT_LEVEL(paraLevel)) {
1791 pBiDi->paraLevel&=1;
73c04bcf 1792 pBiDi->defaultParaLevel=0;
374ca955
A
1793 }
1794 if(paraLevel&1) {
1795 pBiDi->flags=DIRPROP_FLAG(R);
1796 pBiDi->direction=UBIDI_RTL;
1797 } else {
1798 pBiDi->flags=DIRPROP_FLAG(L);
1799 pBiDi->direction=UBIDI_LTR;
1800 }
1801
1802 pBiDi->runCount=0;
46f4442e 1803 pBiDi->paraCount=0;
73c04bcf 1804 pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */
374ca955
A
1805 return;
1806 }
1807
1808 pBiDi->runCount=-1;
1809
1810 /*
1811 * Get the directional properties,
1812 * the flags bit-set, and
73c04bcf 1813 * determine the paragraph level if necessary.
374ca955
A
1814 */
1815 if(getDirPropsMemory(pBiDi, length)) {
1816 pBiDi->dirProps=pBiDi->dirPropsMemory;
73c04bcf 1817 getDirProps(pBiDi);
374ca955
A
1818 } else {
1819 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1820 return;
1821 }
73c04bcf
A
1822 /* the processed length may have changed if UBIDI_OPTION_STREAMING */
1823 length= pBiDi->length;
1824 pBiDi->trailingWSStart=length; /* the levels[] will reflect the WS run */
1825 /* allocate paras memory */
1826 if(pBiDi->paraCount>1) {
1827 if(getInitialParasMemory(pBiDi, pBiDi->paraCount)) {
1828 pBiDi->paras=pBiDi->parasMemory;
1829 pBiDi->paras[pBiDi->paraCount-1]=length;
1830 } else {
1831 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1832 return;
1833 }
1834 } else {
1835 /* initialize paras for single paragraph */
1836 pBiDi->paras=pBiDi->simpleParas;
1837 pBiDi->simpleParas[0]=length;
1838 }
374ca955
A
1839
1840 /* are explicit levels specified? */
1841 if(embeddingLevels==NULL) {
1842 /* no: determine explicit levels according to the (Xn) rules */\
1843 if(getLevelsMemory(pBiDi, length)) {
1844 pBiDi->levels=pBiDi->levelsMemory;
1845 direction=resolveExplicitLevels(pBiDi);
1846 } else {
1847 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1848 return;
1849 }
1850 } else {
73c04bcf 1851 /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */
374ca955
A
1852 pBiDi->levels=embeddingLevels;
1853 direction=checkExplicitLevels(pBiDi, pErrorCode);
1854 if(U_FAILURE(*pErrorCode)) {
1855 return;
1856 }
1857 }
1858
1859 /*
1860 * The steps after (X9) in the UBiDi algorithm are performed only if
1861 * the paragraph text has mixed directionality!
1862 */
1863 pBiDi->direction=direction;
1864 switch(direction) {
1865 case UBIDI_LTR:
1866 /* make sure paraLevel is even */
1867 pBiDi->paraLevel=(UBiDiLevel)((pBiDi->paraLevel+1)&~1);
1868
1869 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
1870 pBiDi->trailingWSStart=0;
1871 break;
1872 case UBIDI_RTL:
1873 /* make sure paraLevel is odd */
1874 pBiDi->paraLevel|=1;
1875
1876 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
1877 pBiDi->trailingWSStart=0;
1878 break;
1879 default:
73c04bcf
A
1880 /*
1881 * Choose the right implicit state table
1882 */
1883 switch(pBiDi->reorderingMode) {
1884 case UBIDI_REORDER_DEFAULT:
1885 pBiDi->pImpTabPair=&impTab_DEFAULT;
1886 break;
1887 case UBIDI_REORDER_NUMBERS_SPECIAL:
1888 pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL;
1889 break;
1890 case UBIDI_REORDER_GROUP_NUMBERS_WITH_R:
1891 pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R;
1892 break;
73c04bcf
A
1893 case UBIDI_REORDER_INVERSE_NUMBERS_AS_L:
1894 pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L;
1895 break;
1896 case UBIDI_REORDER_INVERSE_LIKE_DIRECT:
1897 if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
1898 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS;
1899 } else {
1900 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT;
1901 }
1902 break;
1903 case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL:
1904 if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
1905 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS;
1906 } else {
1907 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL;
1908 }
1909 break;
1910 default:
46f4442e
A
1911 /* we should never get here */
1912 U_ASSERT(FALSE);
73c04bcf
A
1913 break;
1914 }
374ca955
A
1915 /*
1916 * If there are no external levels specified and there
1917 * are no significant explicit level codes in the text,
1918 * then we can treat the entire paragraph as one run.
1919 * Otherwise, we need to perform the following rules on runs of
1920 * the text with the same embedding levels. (X10)
1921 * "Significant" explicit level codes are ones that actually
1922 * affect non-BN characters.
1923 * Examples for "insignificant" ones are empty embeddings
1924 * LRE-PDF, LRE-RLE-PDF-PDF, etc.
1925 */
46f4442e
A
1926 if(embeddingLevels==NULL && pBiDi->paraCount<=1 &&
1927 !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) {
374ca955 1928 resolveImplicitLevels(pBiDi, 0, length,
73c04bcf
A
1929 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)),
1930 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1)));
374ca955
A
1931 } else {
1932 /* sor, eor: start and end types of same-level-run */
1933 UBiDiLevel *levels=pBiDi->levels;
1934 int32_t start, limit=0;
1935 UBiDiLevel level, nextLevel;
1936 DirProp sor, eor;
1937
1938 /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
73c04bcf 1939 level=GET_PARALEVEL(pBiDi, 0);
374ca955
A
1940 nextLevel=levels[0];
1941 if(level<nextLevel) {
1942 eor=GET_LR_FROM_LEVEL(nextLevel);
1943 } else {
1944 eor=GET_LR_FROM_LEVEL(level);
1945 }
1946
1947 do {
1948 /* determine start and limit of the run (end points just behind the run) */
1949
1950 /* the values for this run's start are the same as for the previous run's end */
374ca955
A
1951 start=limit;
1952 level=nextLevel;
73c04bcf
A
1953 if((start>0) && (NO_CONTEXT_RTL(pBiDi->dirProps[start-1])==B)) {
1954 /* except if this is a new paragraph, then set sor = para level */
1955 sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start));
1956 } else {
1957 sor=eor;
1958 }
374ca955
A
1959
1960 /* search for the limit of this run */
1961 while(++limit<length && levels[limit]==level) {}
1962
1963 /* get the correct level of the next run */
1964 if(limit<length) {
1965 nextLevel=levels[limit];
1966 } else {
73c04bcf 1967 nextLevel=GET_PARALEVEL(pBiDi, length-1);
374ca955
A
1968 }
1969
1970 /* determine eor from max(level, nextLevel); sor is last run's eor */
1971 if((level&~UBIDI_LEVEL_OVERRIDE)<(nextLevel&~UBIDI_LEVEL_OVERRIDE)) {
1972 eor=GET_LR_FROM_LEVEL(nextLevel);
1973 } else {
1974 eor=GET_LR_FROM_LEVEL(level);
1975 }
1976
1977 /* if the run consists of overridden directional types, then there
1978 are no implicit types to be resolved */
1979 if(!(level&UBIDI_LEVEL_OVERRIDE)) {
1980 resolveImplicitLevels(pBiDi, start, limit, sor, eor);
1981 } else {
1982 /* remove the UBIDI_LEVEL_OVERRIDE flags */
1983 do {
1984 levels[start++]&=~UBIDI_LEVEL_OVERRIDE;
1985 } while(start<limit);
1986 }
1987 } while(limit<length);
1988 }
73c04bcf
A
1989 /* check if we got any memory shortage while adding insert points */
1990 if (U_FAILURE(pBiDi->insertPoints.errorCode))
1991 {
1992 *pErrorCode=pBiDi->insertPoints.errorCode;
1993 return;
1994 }
374ca955
A
1995 /* reset the embedding levels for some non-graphic characters (L1), (X9) */
1996 adjustWSLevels(pBiDi);
374ca955
A
1997 break;
1998 }
46f4442e
A
1999 /* add RLM for inverse Bidi with contextual orientation resolving
2000 * to RTL which would not round-trip otherwise
2001 */
2002 if((pBiDi->defaultParaLevel>0) &&
2003 (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) &&
2004 ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) ||
2005 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) {
2006 int32_t i, j, start, last;
2007 DirProp dirProp;
2008 for(i=0; i<pBiDi->paraCount; i++) {
2009 last=pBiDi->paras[i]-1;
2010 if((pBiDi->dirProps[last] & CONTEXT_RTL)==0) {
2011 continue; /* LTR paragraph */
2012 }
2013 start= i==0 ? 0 : pBiDi->paras[i - 1];
2014 for(j=last; j>=start; j--) {
2015 dirProp=NO_CONTEXT_RTL(pBiDi->dirProps[j]);
2016 if(dirProp==L) {
2017 if(j<last) {
2018 while(NO_CONTEXT_RTL(pBiDi->dirProps[last])==B) {
2019 last--;
2020 }
2021 }
2022 addPoint(pBiDi, last, RLM_BEFORE);
2023 break;
2024 }
2025 if(DIRPROP_FLAG(dirProp) & MASK_R_AL) {
2026 break;
2027 }
2028 }
2029 }
2030 }
2031
73c04bcf
A
2032 if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
2033 pBiDi->resultLength -= pBiDi->controlCount;
2034 } else {
2035 pBiDi->resultLength += pBiDi->insertPoints.size;
2036 }
2037 pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */
2038}
2039
2040U_CAPI void U_EXPORT2
2041ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) {
2042 if(pBiDi!=NULL) {
2043 pBiDi->orderParagraphsLTR=orderParagraphsLTR;
2044 }
2045}
2046
2047U_CAPI UBool U_EXPORT2
2048ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) {
2049 if(pBiDi!=NULL) {
2050 return pBiDi->orderParagraphsLTR;
2051 } else {
2052 return FALSE;
2053 }
374ca955 2054}
b75a7d8f
A
2055
2056U_CAPI UBiDiDirection U_EXPORT2
2057ubidi_getDirection(const UBiDi *pBiDi) {
73c04bcf 2058 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
b75a7d8f
A
2059 return pBiDi->direction;
2060 } else {
2061 return UBIDI_LTR;
2062 }
2063}
2064
2065U_CAPI const UChar * U_EXPORT2
2066ubidi_getText(const UBiDi *pBiDi) {
73c04bcf 2067 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
b75a7d8f
A
2068 return pBiDi->text;
2069 } else {
2070 return NULL;
2071 }
2072}
2073
2074U_CAPI int32_t U_EXPORT2
2075ubidi_getLength(const UBiDi *pBiDi) {
73c04bcf
A
2076 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2077 return pBiDi->originalLength;
2078 } else {
2079 return 0;
2080 }
2081}
2082
2083U_CAPI int32_t U_EXPORT2
2084ubidi_getProcessedLength(const UBiDi *pBiDi) {
2085 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
b75a7d8f
A
2086 return pBiDi->length;
2087 } else {
2088 return 0;
2089 }
2090}
2091
73c04bcf
A
2092U_CAPI int32_t U_EXPORT2
2093ubidi_getResultLength(const UBiDi *pBiDi) {
2094 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
2095 return pBiDi->resultLength;
2096 } else {
2097 return 0;
2098 }
2099}
2100
2101/* paragraphs API functions ------------------------------------------------- */
2102
b75a7d8f
A
2103U_CAPI UBiDiLevel U_EXPORT2
2104ubidi_getParaLevel(const UBiDi *pBiDi) {
73c04bcf 2105 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
b75a7d8f
A
2106 return pBiDi->paraLevel;
2107 } else {
2108 return 0;
2109 }
2110}
2111
73c04bcf
A
2112U_CAPI int32_t U_EXPORT2
2113ubidi_countParagraphs(UBiDi *pBiDi) {
2114 if(!IS_VALID_PARA_OR_LINE(pBiDi)) {
2115 return 0;
2116 } else {
2117 return pBiDi->paraCount;
2118 }
2119}
b75a7d8f 2120
73c04bcf
A
2121U_CAPI void U_EXPORT2
2122ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
2123 int32_t *pParaStart, int32_t *pParaLimit,
2124 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
2125 int32_t paraStart;
b75a7d8f 2126
73c04bcf 2127 /* check the argument values */
46f4442e
A
2128 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2129 RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode);
2130 RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode);
2131
73c04bcf
A
2132 pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */
2133 if(paraIndex) {
2134 paraStart=pBiDi->paras[paraIndex-1];
2135 } else {
2136 paraStart=0;
2137 }
2138 if(pParaStart!=NULL) {
2139 *pParaStart=paraStart;
2140 }
2141 if(pParaLimit!=NULL) {
2142 *pParaLimit=pBiDi->paras[paraIndex];
2143 }
2144 if(pParaLevel!=NULL) {
2145 *pParaLevel=GET_PARALEVEL(pBiDi, paraStart);
2146 }
73c04bcf 2147}
b75a7d8f 2148
73c04bcf
A
2149U_CAPI int32_t U_EXPORT2
2150ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex,
2151 int32_t *pParaStart, int32_t *pParaLimit,
2152 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
2153 uint32_t paraIndex;
b75a7d8f 2154
73c04bcf
A
2155 /* check the argument values */
2156 /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */
46f4442e
A
2157 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
2158 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
73c04bcf 2159 pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */
46f4442e
A
2160 RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1);
2161
73c04bcf
A
2162 for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex]; paraIndex++);
2163 ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode);
2164 return paraIndex;
2165}
b75a7d8f 2166
73c04bcf
A
2167U_CAPI void U_EXPORT2
2168ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn,
2169 const void *newContext, UBiDiClassCallback **oldFn,
2170 const void **oldContext, UErrorCode *pErrorCode)
2171{
46f4442e
A
2172 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2173 if(pBiDi==NULL) {
73c04bcf
A
2174 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2175 return;
2176 }
2177 if( oldFn )
2178 {
2179 *oldFn = pBiDi->fnClassCallback;
2180 }
2181 if( oldContext )
2182 {
2183 *oldContext = pBiDi->coClassCallback;
2184 }
2185 pBiDi->fnClassCallback = newFn;
2186 pBiDi->coClassCallback = newContext;
2187}
b75a7d8f 2188
73c04bcf
A
2189U_CAPI void U_EXPORT2
2190ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context)
2191{
46f4442e
A
2192 if(pBiDi==NULL) {
2193 return;
2194 }
73c04bcf
A
2195 if( fn )
2196 {
2197 *fn = pBiDi->fnClassCallback;
2198 }
2199 if( context )
2200 {
2201 *context = pBiDi->coClassCallback;
2202 }
2203}
b75a7d8f 2204
73c04bcf
A
2205U_CAPI UCharDirection U_EXPORT2
2206ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c)
2207{
2208 UCharDirection dir;
b75a7d8f 2209
73c04bcf
A
2210 if( pBiDi->fnClassCallback == NULL ||
2211 (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT )
2212 {
2213 return ubidi_getClass(pBiDi->bdp, c);
2214 } else {
2215 return dir;
b75a7d8f 2216 }
b75a7d8f 2217}
73c04bcf 2218