]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/ubidi.cpp
ICU-64260.0.1.tar.gz
[apple/icu.git] / icuSources / common / ubidi.cpp
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 *
6 * Copyright (C) 1999-2015, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 ******************************************************************************
10 * file name: ubidi.c
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 1999jul27
16 * created by: Markus W. Scherer, updated by Matitiahu Allouche
17 *
18 */
19
20 #include "cmemory.h"
21 #include "unicode/utypes.h"
22 #include "unicode/ustring.h"
23 #include "unicode/uchar.h"
24 #include "unicode/ubidi.h"
25 #include "unicode/utf16.h"
26 #include "ubidi_props.h"
27 #include "ubidiimp.h"
28 #include "uassert.h"
29
30 /*
31 * General implementation notes:
32 *
33 * Throughout the implementation, there are comments like (W2) that refer to
34 * rules of the BiDi algorithm, in this example to the second rule of the
35 * resolution of weak types.
36 *
37 * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
38 * character according to UTF-16, the second UChar gets the directional property of
39 * the entire character assigned, while the first one gets a BN, a boundary
40 * neutral, type, which is ignored by most of the algorithm according to
41 * rule (X9) and the implementation suggestions of the BiDi algorithm.
42 *
43 * Later, adjustWSLevels() will set the level for each BN to that of the
44 * following character (UChar), which results in surrogate pairs getting the
45 * same level on each of their surrogates.
46 *
47 * In a UTF-8 implementation, the same thing could be done: the last byte of
48 * a multi-byte sequence would get the "real" property, while all previous
49 * bytes of that sequence would get BN.
50 *
51 * It is not possible to assign all those parts of a character the same real
52 * property because this would fail in the resolution of weak types with rules
53 * that look at immediately surrounding types.
54 *
55 * As a related topic, this implementation does not remove Boundary Neutral
56 * types from the input, but ignores them wherever this is relevant.
57 * For example, the loop for the resolution of the weak types reads
58 * types until it finds a non-BN.
59 * Also, explicit embedding codes are neither changed into BN nor removed.
60 * They are only treated the same way real BNs are.
61 * As stated before, adjustWSLevels() takes care of them at the end.
62 * For the purpose of conformance, the levels of all these codes
63 * do not matter.
64 *
65 * Note that this implementation modifies the dirProps
66 * after the initial setup, when applying X5c (replace FSI by LRI or RLI),
67 * X6, N0 (replace paired brackets by L or R).
68 *
69 * In this implementation, the resolution of weak types (W1 to W6),
70 * neutrals (N1 and N2), and the assignment of the resolved level (In)
71 * are all done in one single loop, in resolveImplicitLevels().
72 * Changes of dirProp values are done on the fly, without writing
73 * them back to the dirProps array.
74 *
75 *
76 * This implementation contains code that allows to bypass steps of the
77 * algorithm that are not needed on the specific paragraph
78 * in order to speed up the most common cases considerably,
79 * like text that is entirely LTR, or RTL text without numbers.
80 *
81 * Most of this is done by setting a bit for each directional property
82 * in a flags variable and later checking for whether there are
83 * any LTR characters or any RTL characters, or both, whether
84 * there are any explicit embedding codes, etc.
85 *
86 * If the (Xn) steps are performed, then the flags are re-evaluated,
87 * because they will then not contain the embedding codes any more
88 * and will be adjusted for override codes, so that subsequently
89 * more bypassing may be possible than what the initial flags suggested.
90 *
91 * If the text is not mixed-directional, then the
92 * algorithm steps for the weak type resolution are not performed,
93 * and all levels are set to the paragraph level.
94 *
95 * If there are no explicit embedding codes, then the (Xn) steps
96 * are not performed.
97 *
98 * If embedding levels are supplied as a parameter, then all
99 * explicit embedding codes are ignored, and the (Xn) steps
100 * are not performed.
101 *
102 * White Space types could get the level of the run they belong to,
103 * and are checked with a test of (flags&MASK_EMBEDDING) to
104 * consider if the paragraph direction should be considered in
105 * the flags variable.
106 *
107 * If there are no White Space types in the paragraph, then
108 * (L1) is not necessary in adjustWSLevels().
109 */
110
111 /* to avoid some conditional statements, use tiny constant arrays */
112 static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) };
113 static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) };
114 static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) };
115
116 #define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
117 #define DIRPROP_FLAG_E(level) flagE[(level)&1]
118 #define DIRPROP_FLAG_O(level) flagO[(level)&1]
119
120 #define DIR_FROM_STRONG(strong) ((strong)==L ? L : R)
121
122 #define NO_OVERRIDE(level) ((level)&~UBIDI_LEVEL_OVERRIDE)
123
124 /* UBiDi object management -------------------------------------------------- */
125
126 U_CAPI UBiDi * U_EXPORT2
127 ubidi_open(void)
128 {
129 UErrorCode errorCode=U_ZERO_ERROR;
130 return ubidi_openSized(0, 0, &errorCode);
131 }
132
133 U_CAPI UBiDi * U_EXPORT2
134 ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) {
135 UBiDi *pBiDi;
136
137 /* check the argument values */
138 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
139 return NULL;
140 } else if(maxLength<0 || maxRunCount<0) {
141 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
142 return NULL; /* invalid arguments */
143 }
144
145 /* allocate memory for the object */
146 pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi));
147 if(pBiDi==NULL) {
148 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
149 return NULL;
150 }
151
152 /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */
153 uprv_memset(pBiDi, 0, sizeof(UBiDi));
154
155 /* allocate memory for arrays as requested */
156 if(maxLength>0) {
157 if( !getInitialDirPropsMemory(pBiDi, maxLength) ||
158 !getInitialLevelsMemory(pBiDi, maxLength)
159 ) {
160 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
161 }
162 } else {
163 pBiDi->mayAllocateText=TRUE;
164 }
165
166 if(maxRunCount>0) {
167 if(maxRunCount==1) {
168 /* use simpleRuns[] */
169 pBiDi->runsSize=sizeof(Run);
170 } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) {
171 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
172 }
173 } else {
174 pBiDi->mayAllocateRuns=TRUE;
175 }
176
177 if(U_SUCCESS(*pErrorCode)) {
178 return pBiDi;
179 } else {
180 ubidi_close(pBiDi);
181 return NULL;
182 }
183 }
184
185 /*
186 * We are allowed to allocate memory if memory==NULL or
187 * mayAllocate==TRUE for each array that we need.
188 * We also try to grow memory as needed if we
189 * allocate it.
190 *
191 * Assume sizeNeeded>0.
192 * If *pMemory!=NULL, then assume *pSize>0.
193 *
194 * ### this realloc() may unnecessarily copy the old data,
195 * which we know we don't need any more;
196 * is this the best way to do this??
197 */
198 U_CFUNC UBool
199 ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) {
200 void **pMemory = (void **)bidiMem;
201 /* check for existing memory */
202 if(*pMemory==NULL) {
203 /* we need to allocate memory */
204 if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) {
205 *pSize=sizeNeeded;
206 return TRUE;
207 } else {
208 return FALSE;
209 }
210 } else {
211 if(sizeNeeded<=*pSize) {
212 /* there is already enough memory */
213 return TRUE;
214 }
215 else if(!mayAllocate) {
216 /* not enough memory, and we must not allocate */
217 return FALSE;
218 } else {
219 /* we try to grow */
220 void *memory;
221 /* in most cases, we do not need the copy-old-data part of
222 * realloc, but it is needed when adding runs using getRunsMemory()
223 * in setParaRunsOnly()
224 */
225 if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) {
226 *pMemory=memory;
227 *pSize=sizeNeeded;
228 return TRUE;
229 } else {
230 /* we failed to grow */
231 return FALSE;
232 }
233 }
234 }
235 }
236
237 U_CAPI void U_EXPORT2
238 ubidi_close(UBiDi *pBiDi) {
239 if(pBiDi!=NULL) {
240 pBiDi->pParaBiDi=NULL; /* in case one tries to reuse this block */
241 if(pBiDi->dirInsertMemory!=NULL) {
242 uprv_free(pBiDi->dirInsertMemory);
243 }
244 if(pBiDi->dirPropsMemory!=NULL) {
245 uprv_free(pBiDi->dirPropsMemory);
246 }
247 if(pBiDi->levelsMemory!=NULL) {
248 uprv_free(pBiDi->levelsMemory);
249 }
250 if(pBiDi->openingsMemory!=NULL) {
251 uprv_free(pBiDi->openingsMemory);
252 }
253 if(pBiDi->parasMemory!=NULL) {
254 uprv_free(pBiDi->parasMemory);
255 }
256 if(pBiDi->runsMemory!=NULL) {
257 uprv_free(pBiDi->runsMemory);
258 }
259 if(pBiDi->isolatesMemory!=NULL) {
260 uprv_free(pBiDi->isolatesMemory);
261 }
262 if(pBiDi->insertPoints.points!=NULL) {
263 uprv_free(pBiDi->insertPoints.points);
264 }
265
266 uprv_free(pBiDi);
267 }
268 }
269
270 /* set to approximate "inverse BiDi" ---------------------------------------- */
271
272 U_CAPI void U_EXPORT2
273 ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) {
274 if(pBiDi!=NULL) {
275 pBiDi->isInverse=isInverse;
276 pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L
277 : UBIDI_REORDER_DEFAULT;
278 }
279 }
280
281 U_CAPI UBool U_EXPORT2
282 ubidi_isInverse(UBiDi *pBiDi) {
283 if(pBiDi!=NULL) {
284 return pBiDi->isInverse;
285 } else {
286 return FALSE;
287 }
288 }
289
290 /* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
291 * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre
292 * concept of RUNS_ONLY which is a double operation.
293 * It could be advantageous to divide this into 3 concepts:
294 * a) Operation: direct / inverse / RUNS_ONLY
295 * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R
296 * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
297 * This would allow combinations not possible today like RUNS_ONLY with
298 * NUMBERS_SPECIAL.
299 * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
300 * REMOVE_CONTROLS for the inverse step.
301 * Not all combinations would be supported, and probably not all do make sense.
302 * This would need to document which ones are supported and what are the
303 * fallbacks for unsupported combinations.
304 */
305 U_CAPI void U_EXPORT2
306 ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) {
307 if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT)
308 && (reorderingMode < UBIDI_REORDER_COUNT)) {
309 pBiDi->reorderingMode = reorderingMode;
310 pBiDi->isInverse = (UBool)(reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L);
311 }
312 }
313
314 U_CAPI UBiDiReorderingMode U_EXPORT2
315 ubidi_getReorderingMode(UBiDi *pBiDi) {
316 if (pBiDi!=NULL) {
317 return pBiDi->reorderingMode;
318 } else {
319 return UBIDI_REORDER_DEFAULT;
320 }
321 }
322
323 U_CAPI void U_EXPORT2
324 ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) {
325 if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
326 reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
327 }
328 if (pBiDi!=NULL) {
329 pBiDi->reorderingOptions=reorderingOptions;
330 }
331 }
332
333 U_CAPI uint32_t U_EXPORT2
334 ubidi_getReorderingOptions(UBiDi *pBiDi) {
335 if (pBiDi!=NULL) {
336 return pBiDi->reorderingOptions;
337 } else {
338 return 0;
339 }
340 }
341
342 U_CAPI UBiDiDirection U_EXPORT2
343 ubidi_getBaseDirection(const UChar *text,
344 int32_t length){
345
346 int32_t i;
347 UChar32 uchar;
348 UCharDirection dir;
349
350 if( text==NULL || length<-1 ){
351 return UBIDI_NEUTRAL;
352 }
353
354 if(length==-1) {
355 length=u_strlen(text);
356 }
357
358 for( i = 0 ; i < length; ) {
359 /* i is incremented by U16_NEXT */
360 U16_NEXT(text, i, length, uchar);
361 dir = u_charDirection(uchar);
362 if( dir == U_LEFT_TO_RIGHT )
363 return UBIDI_LTR;
364 if( dir == U_RIGHT_TO_LEFT || dir ==U_RIGHT_TO_LEFT_ARABIC )
365 return UBIDI_RTL;
366 }
367 return UBIDI_NEUTRAL;
368 }
369
370 /* perform (P2)..(P3) ------------------------------------------------------- */
371
372 /**
373 * Returns the directionality of the first strong character
374 * after the last B in prologue, if any.
375 * Requires prologue!=null.
376 */
377 static DirProp
378 firstL_R_AL(UBiDi *pBiDi) {
379 const UChar *text=pBiDi->prologue;
380 int32_t length=pBiDi->proLength;
381 int32_t i;
382 UChar32 uchar;
383 DirProp dirProp, result=ON;
384 for(i=0; i<length; ) {
385 /* i is incremented by U16_NEXT */
386 U16_NEXT(text, i, length, uchar);
387 dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
388 if(result==ON) {
389 if(dirProp==L || dirProp==R || dirProp==AL) {
390 result=dirProp;
391 }
392 } else {
393 if(dirProp==B) {
394 result=ON;
395 }
396 }
397 }
398 return result;
399 }
400
401 /*
402 * Check that there are enough entries in the array pointed to by pBiDi->paras
403 */
404 static UBool
405 checkParaCount(UBiDi *pBiDi) {
406 int32_t count=pBiDi->paraCount;
407 if(pBiDi->paras==pBiDi->simpleParas) {
408 if(count<=SIMPLE_PARAS_COUNT)
409 return TRUE;
410 if(!getInitialParasMemory(pBiDi, SIMPLE_PARAS_COUNT * 2))
411 return FALSE;
412 pBiDi->paras=pBiDi->parasMemory;
413 uprv_memcpy(pBiDi->parasMemory, pBiDi->simpleParas, SIMPLE_PARAS_COUNT * sizeof(Para));
414 return TRUE;
415 }
416 if(!getInitialParasMemory(pBiDi, count * 2))
417 return FALSE;
418 pBiDi->paras=pBiDi->parasMemory;
419 return TRUE;
420 }
421
422 /*
423 * Get the directional properties for the inserted bidi controls.
424 */
425
426 /* subset of bidi properties, fit in 4 bits */
427 enum { /* correspondence to standard class */
428 Insert_none = 0, /* 0 all others */
429 Insert_L, /* 1 L = U_LEFT_TO_RIGHT */
430 Insert_R, /* 2 R = U_RIGHT_TO_LEFT */
431 Insert_AL, /* 3 AL = U_RIGHT_TO_LEFT_ARABIC */
432 Insert_LRE, /* 4 LRE = U_LEFT_TO_RIGHT_EMBEDDING */
433 Insert_LRO, /* 5 LRO = U_LEFT_TO_RIGHT_OVERRIDE */
434 Insert_RLE, /* 6 RLE = U_RIGHT_TO_LEFT_EMBEDDING */
435 Insert_RLO, /* 7 RLO = U_RIGHT_TO_LEFT_OVERRIDE */
436 Insert_PDF, /* 8 PDF = U_POP_DIRECTIONAL_FORMAT */
437 Insert_FSI, /* 9 FSI = U_FIRST_STRONG_ISOLATE */
438 Insert_LRI, /* 10 LRI = U_LEFT_TO_RIGHT_ISOLATE */
439 Insert_RLI, /* 11 RLI = U_RIGHT_TO_LEFT_ISOLATE */
440 Insert_PDI, /* 12 PDI = U_POP_DIRECTIONAL_ISOLATE */
441 Insert_B, /* 13 B = U_BLOCK_SEPARATOR */
442 Insert_S, /* 14 S = U_SEGMENT_SEPARATOR */
443 Insert_WS, /* 15 WS = U_WHITE_SPACE_NEUTRAL */
444 Insert_count /* 16 */
445 };
446
447 /* map standard dir class to special 4-bit insert value (Insert_none as default) */
448 static const uint16_t insertDirFromStdDir[dirPropCount] = {
449 Insert_none, /* L= U_LEFT_TO_RIGHT */
450 Insert_none, /* R= U_RIGHT_TO_LEFT, */
451 Insert_none, /* EN= U_EUROPEAN_NUMBER */
452 Insert_none, /* ES= U_EUROPEAN_NUMBER_SEPARATOR */
453 Insert_none, /* ET= U_EUROPEAN_NUMBER_TERMINATOR */
454 Insert_none, /* AN= U_ARABIC_NUMBER */
455 Insert_none, /* CS= U_COMMON_NUMBER_SEPARATOR */
456 Insert_none, /* B= U_BLOCK_SEPARATOR */
457 Insert_none, /* S= U_SEGMENT_SEPARATOR */
458 Insert_none, /* WS= U_WHITE_SPACE_NEUTRAL */
459 Insert_none, /* ON= U_OTHER_NEUTRAL */
460 Insert_LRE, /* LRE=U_LEFT_TO_RIGHT_EMBEDDING */
461 Insert_LRO, /* LRO=U_LEFT_TO_RIGHT_OVERRIDE */
462 Insert_none, /* AL= U_RIGHT_TO_LEFT_ARABIC */
463 Insert_RLE, /* RLE=U_RIGHT_TO_LEFT_EMBEDDING */
464 Insert_RLO, /* RLO=U_RIGHT_TO_LEFT_OVERRIDE */
465 Insert_PDF, /* PDF=U_POP_DIRECTIONAL_FORMAT */
466 Insert_none, /* NSM=U_DIR_NON_SPACING_MARK */
467 Insert_none, /* BN= U_BOUNDARY_NEUTRAL */
468 Insert_FSI, /* FSI=U_FIRST_STRONG_ISOLATE */
469 Insert_LRI, /* LRI=U_LEFT_TO_RIGHT_ISOLATE */
470 Insert_RLI, /* RLI=U_RIGHT_TO_LEFT_ISOLATE */
471 Insert_PDI, /* PDI=U_POP_DIRECTIONAL_ISOLATE */
472 Insert_none, /* ENL */
473 Insert_none, /* ENR */
474 };
475
476 /* map special 4-bit insert direction class to standard dir class (ON as default) */
477 static const uint8_t stdDirFromInsertDir[Insert_count] = {
478 ON, /* Insert_none > ON */
479 L, /* Insert_L */
480 R, /* Insert_R */
481 AL, /* Insert_AL */
482 LRE, /* Insert_LRE */
483 LRO, /* Insert_LRO */
484 RLE, /* Insert_RLE */
485 RLO, /* Insert_RLO */
486 PDF, /* Insert_PDF */
487 FSI, /* Insert_FSI */
488 LRI, /* Insert_LRI */
489 RLI, /* Insert_RLI */
490 PDI, /* Insert_PDI */
491 B, /* Insert_B */
492 S, /* Insert_S */
493 WS, /* Insert_WS */
494 };
495
496 enum { kMaxControlStringLen = 4 };
497
498 static UBool
499 getDirInsert(UBiDi *pBiDi,
500 const int32_t *offsets, int32_t offsetCount,
501 const int32_t *controlStringIndices,
502 const UChar * const * controlStrings) {
503 int32_t offset, offsetsIndex;
504 uint16_t *dirInsert = pBiDi->dirInsert;
505 /* initialize dirInsert */
506 for (offset = 0; offset < pBiDi->length; offset++) {
507 dirInsert[offset] = 0;
508 }
509 for (offsetsIndex = 0; offsetsIndex < offsetCount; offsetsIndex++) {
510 const UChar * controlString;
511 UChar uchar;
512 int32_t controlStringIndex, dirInsertIndex = 0;
513 uint16_t dirInsertValue = 0;
514 offset = offsets[offsetsIndex];
515 if (offset < 0 || offset >= pBiDi->length) {
516 return FALSE; /* param err in offsets array */
517 }
518 controlStringIndex = (controlStringIndices == NULL)? offsetsIndex: controlStringIndices[offsetsIndex];
519 controlString = controlStrings[controlStringIndex];
520 if (controlString == NULL) {
521 return FALSE; /* param err in controlStrings array */
522 }
523 while ((uchar = *controlString++) != 0) {
524 uint16_t insertValue = (U16_IS_SURROGATE(uchar))? Insert_none:
525 insertDirFromStdDir[(uint32_t)ubidi_getCustomizedClass(pBiDi, uchar)];
526 if (dirInsertIndex >= kMaxControlStringLen || insertValue == Insert_none) {
527 return FALSE; /* param err in controlStrings array */
528 }
529 dirInsertValue |= (insertValue << (4 * dirInsertIndex++));
530 }
531 dirInsert[offset] = dirInsertValue;
532 }
533 return TRUE;
534 }
535
536 /*
537 * Get the directional properties for the text, calculate the flags bit-set, and
538 * determine the paragraph level if necessary (in pBiDi->paras[i].level).
539 * FSI initiators are also resolved and their dirProp replaced with LRI or RLI.
540 * When encountering an FSI, it is initially replaced with an LRI, which is the
541 * default. Only if a strong R or AL is found within its scope will the LRI be
542 * replaced by an RLI.
543 */
544 static UBool
545 getDirProps(UBiDi *pBiDi) {
546 const UChar *text=pBiDi->text;
547 DirProp *dirProps=pBiDi->dirPropsMemory; /* pBiDi->dirProps is const */
548 uint16_t *dirInsert = pBiDi->dirInsert; /* may be NULL */
549
550 int32_t i=0, originalLength=pBiDi->originalLength;
551 Flags flags=0; /* collect all directionalities in the text */
552 UChar32 uchar;
553 DirProp dirProp=0, defaultParaLevel=0; /* initialize to avoid compiler warnings */
554 int32_t dirInsertValue;
555 int8_t dirInsertIndex; /* position within dirInsertValue, if any */
556 UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel);
557 /* for inverse BiDi, the default para level is set to RTL if there is a
558 strong R or AL character at either end of the text */
559 UBool isDefaultLevelInverse=isDefaultLevel && (UBool)
560 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
561 pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL);
562 int32_t lastArabicPos=-1;
563 int32_t controlCount=0;
564 UBool removeBiDiControls = (UBool)(pBiDi->reorderingOptions &
565 UBIDI_OPTION_REMOVE_CONTROLS);
566
567 enum State {
568 NOT_SEEKING_STRONG, /* 0: not contextual paraLevel, not after FSI */
569 SEEKING_STRONG_FOR_PARA, /* 1: looking for first strong char in para */
570 SEEKING_STRONG_FOR_FSI, /* 2: looking for first strong after FSI */
571 LOOKING_FOR_PDI /* 3: found strong after FSI, looking for PDI */
572 };
573 State state;
574 DirProp lastStrong=ON; /* for default level & inverse BiDi */
575 /* The following stacks are used to manage isolate sequences. Those
576 sequences may be nested, but obviously never more deeply than the
577 maximum explicit embedding level.
578 lastStack is the index of the last used entry in the stack. A value of -1
579 means that there is no open isolate sequence.
580 lastStack is reset to -1 on paragraph boundaries. */
581 /* The following stack contains the position of the initiator of
582 each open isolate sequence */
583 int32_t isolateStartStack[UBIDI_MAX_EXPLICIT_LEVEL+1];
584 int8_t isolateStartInsertIndex[UBIDI_MAX_EXPLICIT_LEVEL+1];
585 /* The following stack contains the last known state before
586 encountering the initiator of an isolate sequence */
587 State previousStateStack[UBIDI_MAX_EXPLICIT_LEVEL+1];
588 int32_t stackLast=-1;
589
590 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING)
591 pBiDi->length=0;
592 defaultParaLevel=pBiDi->paraLevel&1;
593 if(isDefaultLevel) {
594 pBiDi->paras[0].level=defaultParaLevel;
595 lastStrong=defaultParaLevel;
596 if(pBiDi->proLength>0 && /* there is a prologue */
597 (dirProp=firstL_R_AL(pBiDi))!=ON) { /* with a strong character */
598 if(dirProp==L)
599 pBiDi->paras[0].level=0; /* set the default para level */
600 else
601 pBiDi->paras[0].level=1; /* set the default para level */
602 state=NOT_SEEKING_STRONG;
603 } else {
604 state=SEEKING_STRONG_FOR_PARA;
605 }
606 } else {
607 pBiDi->paras[0].level=pBiDi->paraLevel;
608 state=NOT_SEEKING_STRONG;
609 }
610 /* count paragraphs and determine the paragraph level (P2..P3) */
611 /*
612 * see comment in ubidi.h:
613 * the UBIDI_DEFAULT_XXX values are designed so that
614 * their bit 0 alone yields the intended default
615 */
616 dirInsertValue = 0;
617 dirInsertIndex = -1; /* indicate that we have not checked dirInsert yet */
618 for( /* i=0 above */ ; i<originalLength; ) {
619 if (dirInsert != NULL && dirInsertIndex < 0) {
620 dirInsertValue = dirInsert[i];
621 }
622 if (dirInsertValue > 0) {
623 dirInsertIndex++;
624 dirProp = (DirProp)stdDirFromInsertDir[dirInsertValue & 0x000F];
625 dirInsertValue >>= 4;
626 flags|=DIRPROP_FLAG(dirProp);
627 uchar = 0;
628 } else {
629 dirInsertIndex = -1;
630 /* i is incremented by U16_NEXT */
631 U16_NEXT(text, i, originalLength, uchar);
632 flags|=DIRPROP_FLAG(dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar));
633 dirProps[i-1]=dirProp;
634 if(uchar>0xffff) { /* set the lead surrogate's property to BN */
635 flags|=DIRPROP_FLAG(BN);
636 dirProps[i-2]=BN;
637 }
638 }
639 if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar))
640 controlCount++;
641 if(dirProp==L) {
642 if(state==SEEKING_STRONG_FOR_PARA) {
643 pBiDi->paras[pBiDi->paraCount-1].level=0;
644 state=NOT_SEEKING_STRONG;
645 }
646 else if(state==SEEKING_STRONG_FOR_FSI) {
647 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
648 /* no need for next statement, already set by default */
649 /* dirProps[isolateStartStack[stackLast]]=LRI; */
650 flags|=DIRPROP_FLAG(LRI);
651 }
652 state=LOOKING_FOR_PDI;
653 }
654 lastStrong=L;
655 continue;
656 }
657 if(dirProp==R || dirProp==AL) {
658 if(state==SEEKING_STRONG_FOR_PARA) {
659 pBiDi->paras[pBiDi->paraCount-1].level=1;
660 state=NOT_SEEKING_STRONG;
661 }
662 else if(state==SEEKING_STRONG_FOR_FSI) {
663 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
664 if (isolateStartInsertIndex[stackLast] < 0) {
665 dirProps[isolateStartStack[stackLast]]=RLI;
666 } else {
667 dirInsert[stackLast] &= ~(0x000F << (4*isolateStartInsertIndex[stackLast]));
668 dirInsert[stackLast] |= (Insert_RLI << (4*isolateStartInsertIndex[stackLast]));
669 }
670 flags|=DIRPROP_FLAG(RLI);
671 }
672 state=LOOKING_FOR_PDI;
673 }
674 lastStrong=R;
675 if(dirProp==AL)
676 lastArabicPos=i-1;
677 continue;
678 }
679 if(dirProp>=FSI && dirProp<=RLI) { /* FSI, LRI or RLI */
680 stackLast++;
681 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
682 isolateStartStack[stackLast]= (dirInsertIndex < 0)? i-1: i /* we have not incremented with U16_NEXT yet */;
683 isolateStartInsertIndex[stackLast] = dirInsertIndex;
684 previousStateStack[stackLast]=state;
685 }
686 if(dirProp==FSI) {
687 if (dirInsertIndex < 0) {
688 dirProps[i-1]=LRI; /* default if no strong char */
689 } else {
690 dirInsert[i] &= ~(0x000F << (4*dirInsertIndex));
691 dirInsert[i] |= (Insert_LRI << (4*dirInsertIndex));
692 }
693 state=SEEKING_STRONG_FOR_FSI;
694 }
695 else
696 state=LOOKING_FOR_PDI;
697 continue;
698 }
699 if(dirProp==PDI) {
700 if(state==SEEKING_STRONG_FOR_FSI) {
701 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
702 /* no need for next statement, already set by default */
703 /* dirProps[isolateStartStack[stackLast]]=LRI; */
704 flags|=DIRPROP_FLAG(LRI);
705 }
706 }
707 if(stackLast>=0) {
708 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL)
709 state=previousStateStack[stackLast];
710 stackLast--;
711 }
712 continue;
713 }
714 if(dirProp==B) {
715 if(i<originalLength && uchar==CR && text[i]==LF) /* do nothing on the CR */
716 continue;
717 pBiDi->paras[pBiDi->paraCount-1].limit=i;
718 if(isDefaultLevelInverse && lastStrong==R)
719 pBiDi->paras[pBiDi->paraCount-1].level=1;
720 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
721 /* When streaming, we only process whole paragraphs
722 thus some updates are only done on paragraph boundaries */
723 pBiDi->length=i; /* i is index to next character */
724 pBiDi->controlCount=controlCount;
725 }
726 if(i<originalLength) { /* B not last char in text */
727 pBiDi->paraCount++;
728 if(checkParaCount(pBiDi)==FALSE) /* not enough memory for a new para entry */
729 return FALSE;
730 if(isDefaultLevel) {
731 pBiDi->paras[pBiDi->paraCount-1].level=defaultParaLevel;
732 state=SEEKING_STRONG_FOR_PARA;
733 lastStrong=defaultParaLevel;
734 } else {
735 pBiDi->paras[pBiDi->paraCount-1].level=pBiDi->paraLevel;
736 state=NOT_SEEKING_STRONG;
737 }
738 stackLast=-1;
739 }
740 continue;
741 }
742 }
743 /* Ignore still open isolate sequences with overflow */
744 if(stackLast>UBIDI_MAX_EXPLICIT_LEVEL) {
745 stackLast=UBIDI_MAX_EXPLICIT_LEVEL;
746 state=SEEKING_STRONG_FOR_FSI; /* to be on the safe side */
747 }
748 /* Resolve direction of still unresolved open FSI sequences */
749 while(stackLast>=0) {
750 if(state==SEEKING_STRONG_FOR_FSI) {
751 /* no need for next statement, already set by default */
752 /* dirProps[isolateStartStack[stackLast]]=LRI; */
753 flags|=DIRPROP_FLAG(LRI);
754 break;
755 }
756 state=previousStateStack[stackLast];
757 stackLast--;
758 }
759 /* When streaming, ignore text after the last paragraph separator */
760 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
761 if(pBiDi->length<originalLength)
762 pBiDi->paraCount--;
763 } else {
764 pBiDi->paras[pBiDi->paraCount-1].limit=originalLength;
765 pBiDi->controlCount=controlCount;
766 }
767 /* For inverse bidi, default para direction is RTL if there is
768 a strong R or AL at either end of the paragraph */
769 if(isDefaultLevelInverse && lastStrong==R) {
770 pBiDi->paras[pBiDi->paraCount-1].level=1;
771 }
772 if(isDefaultLevel) {
773 pBiDi->paraLevel=static_cast<UBiDiLevel>(pBiDi->paras[0].level);
774 }
775 /* The following is needed to resolve the text direction for default level
776 paragraphs containing no strong character */
777 for(i=0; i<pBiDi->paraCount; i++)
778 flags|=DIRPROP_FLAG_LR(pBiDi->paras[i].level);
779
780 if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
781 flags|=DIRPROP_FLAG(L);
782 }
783 pBiDi->flags=flags;
784 pBiDi->lastArabicPos=lastArabicPos;
785 return TRUE;
786 }
787
788 /* determine the paragraph level at position index */
789 U_CFUNC UBiDiLevel
790 ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t pindex) {
791 int32_t i;
792 for(i=0; i<pBiDi->paraCount; i++)
793 if(pindex<pBiDi->paras[i].limit)
794 break;
795 if(i>=pBiDi->paraCount)
796 i=pBiDi->paraCount-1;
797 return (UBiDiLevel)(pBiDi->paras[i].level);
798 }
799
800 /* Functions for handling paired brackets ----------------------------------- */
801
802 /* In the isoRuns array, the first entry is used for text outside of any
803 isolate sequence. Higher entries are used for each more deeply nested
804 isolate sequence. isoRunLast is the index of the last used entry. The
805 openings array is used to note the data of opening brackets not yet
806 matched by a closing bracket, or matched but still susceptible to change
807 level.
808 Each isoRun entry contains the index of the first and
809 one-after-last openings entries for pending opening brackets it
810 contains. The next openings entry to use is the one-after-last of the
811 most deeply nested isoRun entry.
812 isoRun entries also contain their current embedding level and the last
813 encountered strong character, since these will be needed to resolve
814 the level of paired brackets. */
815
816 static void
817 bracketInit(UBiDi *pBiDi, BracketData *bd) {
818 bd->pBiDi=pBiDi;
819 bd->isoRunLast=0;
820 bd->isoRuns[0].start=0;
821 bd->isoRuns[0].limit=0;
822 bd->isoRuns[0].level=GET_PARALEVEL(pBiDi, 0);
823 UBiDiLevel t = GET_PARALEVEL(pBiDi, 0) & 1;
824 bd->isoRuns[0].lastStrong = bd->isoRuns[0].lastBase = t;
825 bd->isoRuns[0].contextDir = (UBiDiDirection)t;
826 bd->isoRuns[0].contextPos=0;
827 if(pBiDi->openingsMemory) {
828 bd->openings=pBiDi->openingsMemory;
829 bd->openingsCount=pBiDi->openingsSize / sizeof(Opening);
830 } else {
831 bd->openings=bd->simpleOpenings;
832 bd->openingsCount=SIMPLE_OPENINGS_COUNT;
833 }
834 bd->isNumbersSpecial=bd->pBiDi->reorderingMode==UBIDI_REORDER_NUMBERS_SPECIAL ||
835 bd->pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL;
836 }
837
838 /* paragraph boundary */
839 static void
840 bracketProcessB(BracketData *bd, UBiDiLevel level) {
841 bd->isoRunLast=0;
842 bd->isoRuns[0].limit=0;
843 bd->isoRuns[0].level=level;
844 bd->isoRuns[0].lastStrong=bd->isoRuns[0].lastBase=level&1;
845 bd->isoRuns[0].contextDir=(UBiDiDirection)(level&1);
846 bd->isoRuns[0].contextPos=0;
847 }
848
849 /* LRE, LRO, RLE, RLO, PDF */
850 static void
851 bracketProcessBoundary(BracketData *bd, int32_t lastCcPos, DirProp lastCcDirProp,
852 UBiDiLevel contextLevel, UBiDiLevel embeddingLevel) {
853 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
854 if(DIRPROP_FLAG(lastCcDirProp)&MASK_ISO) /* after an isolate */
855 return;
856 if(NO_OVERRIDE(embeddingLevel)>NO_OVERRIDE(contextLevel)) /* not a PDF */
857 contextLevel=embeddingLevel;
858 pLastIsoRun->limit=pLastIsoRun->start;
859 pLastIsoRun->level=embeddingLevel;
860 pLastIsoRun->lastStrong=pLastIsoRun->lastBase=contextLevel&1;
861 pLastIsoRun->contextDir=(UBiDiDirection)(contextLevel&1);
862 pLastIsoRun->contextPos=(UBiDiDirection)lastCcPos;
863 }
864
865 /* LRI or RLI */
866 static void
867 bracketProcessLRI_RLI(BracketData *bd, UBiDiLevel level) {
868 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
869 int16_t lastLimit;
870 pLastIsoRun->lastBase=ON;
871 lastLimit=pLastIsoRun->limit;
872 bd->isoRunLast++;
873 pLastIsoRun++;
874 pLastIsoRun->start=pLastIsoRun->limit=lastLimit;
875 pLastIsoRun->level=level;
876 pLastIsoRun->lastStrong=pLastIsoRun->lastBase=level&1;
877 pLastIsoRun->contextDir=(UBiDiDirection)(level&1);
878 pLastIsoRun->contextPos=0;
879 }
880
881 /* PDI */
882 static void
883 bracketProcessPDI(BracketData *bd) {
884 IsoRun *pLastIsoRun;
885 bd->isoRunLast--;
886 pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
887 pLastIsoRun->lastBase=ON;
888 }
889
890 /* newly found opening bracket: create an openings entry */
891 static UBool /* return TRUE if success */
892 bracketAddOpening(BracketData *bd, UChar match, int32_t position) {
893 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
894 Opening *pOpening;
895 if(pLastIsoRun->limit>=bd->openingsCount) { /* no available new entry */
896 UBiDi *pBiDi=bd->pBiDi;
897 if(!getInitialOpeningsMemory(pBiDi, pLastIsoRun->limit * 2))
898 return FALSE;
899 if(bd->openings==bd->simpleOpenings)
900 uprv_memcpy(pBiDi->openingsMemory, bd->simpleOpenings,
901 SIMPLE_OPENINGS_COUNT * sizeof(Opening));
902 bd->openings=pBiDi->openingsMemory; /* may have changed */
903 bd->openingsCount=pBiDi->openingsSize / sizeof(Opening);
904 }
905 pOpening=&bd->openings[pLastIsoRun->limit];
906 pOpening->position=position;
907 pOpening->match=match;
908 pOpening->contextDir=pLastIsoRun->contextDir;
909 pOpening->contextPos=pLastIsoRun->contextPos;
910 pOpening->flags=0;
911 pLastIsoRun->limit++;
912 return TRUE;
913 }
914
915 /* change N0c1 to N0c2 when a preceding bracket is assigned the embedding level */
916 static void
917 fixN0c(BracketData *bd, int32_t openingIndex, int32_t newPropPosition, DirProp newProp) {
918 /* This function calls itself recursively */
919 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
920 Opening *qOpening;
921 DirProp *dirProps=bd->pBiDi->dirProps;
922 int32_t k, openingPosition, closingPosition;
923 for(k=openingIndex+1, qOpening=&bd->openings[k]; k<pLastIsoRun->limit; k++, qOpening++) {
924 if(qOpening->match>=0) /* not an N0c match */
925 continue;
926 if(newPropPosition<qOpening->contextPos)
927 break;
928 if(newPropPosition>=qOpening->position)
929 continue;
930 if(newProp==qOpening->contextDir)
931 break;
932 openingPosition=qOpening->position;
933 dirProps[openingPosition]=newProp;
934 closingPosition=-(qOpening->match);
935 dirProps[closingPosition]=newProp;
936 qOpening->match=0; /* prevent further changes */
937 fixN0c(bd, k, openingPosition, newProp);
938 fixN0c(bd, k, closingPosition, newProp);
939 }
940 }
941
942 /* process closing bracket */
943 static DirProp /* return L or R if N0b or N0c, ON if N0d */
944 bracketProcessClosing(BracketData *bd, int32_t openIdx, int32_t position) {
945 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
946 Opening *pOpening, *qOpening;
947 UBiDiDirection direction;
948 UBool stable;
949 DirProp newProp;
950 pOpening=&bd->openings[openIdx];
951 direction=(UBiDiDirection)(pLastIsoRun->level&1);
952 stable=TRUE; /* assume stable until proved otherwise */
953
954 /* The stable flag is set when brackets are paired and their
955 level is resolved and cannot be changed by what will be
956 found later in the source string.
957 An unstable match can occur only when applying N0c, where
958 the resolved level depends on the preceding context, and
959 this context may be affected by text occurring later.
960 Example: RTL paragraph containing: abc[(latin) HEBREW]
961 When the closing parenthesis is encountered, it appears
962 that N0c1 must be applied since 'abc' sets an opposite
963 direction context and both parentheses receive level 2.
964 However, when the closing square bracket is processed,
965 N0b applies because of 'HEBREW' being included within the
966 brackets, thus the square brackets are treated like R and
967 receive level 1. However, this changes the preceding
968 context of the opening parenthesis, and it now appears
969 that N0c2 must be applied to the parentheses rather than
970 N0c1. */
971
972 if((direction==0 && pOpening->flags&FOUND_L) ||
973 (direction==1 && pOpening->flags&FOUND_R)) { /* N0b */
974 newProp=static_cast<DirProp>(direction);
975 }
976 else if(pOpening->flags&(FOUND_L|FOUND_R)) { /* N0c */
977 /* it is stable if there is no containing pair or in
978 conditions too complicated and not worth checking */
979 stable=(openIdx==pLastIsoRun->start);
980 if(direction!=pOpening->contextDir)
981 newProp= static_cast<DirProp>(pOpening->contextDir); /* N0c1 */
982 else
983 newProp= static_cast<DirProp>(direction); /* N0c2 */
984 } else {
985 /* forget this and any brackets nested within this pair */
986 pLastIsoRun->limit= static_cast<uint16_t>(openIdx);
987 return ON; /* N0d */
988 }
989 bd->pBiDi->dirProps[pOpening->position]=newProp;
990 bd->pBiDi->dirProps[position]=newProp;
991 /* Update nested N0c pairs that may be affected */
992 fixN0c(bd, openIdx, pOpening->position, newProp);
993 if(stable) {
994 pLastIsoRun->limit= static_cast<uint16_t>(openIdx); /* forget any brackets nested within this pair */
995 /* remove lower located synonyms if any */
996 while(pLastIsoRun->limit>pLastIsoRun->start &&
997 bd->openings[pLastIsoRun->limit-1].position==pOpening->position)
998 pLastIsoRun->limit--;
999 } else {
1000 int32_t k;
1001 pOpening->match=-position;
1002 /* neutralize lower located synonyms if any */
1003 k=openIdx-1;
1004 while(k>=pLastIsoRun->start &&
1005 bd->openings[k].position==pOpening->position)
1006 bd->openings[k--].match=0;
1007 /* neutralize any unmatched opening between the current pair;
1008 this will also neutralize higher located synonyms if any */
1009 for(k=openIdx+1; k<pLastIsoRun->limit; k++) {
1010 qOpening=&bd->openings[k];
1011 if(qOpening->position>=position)
1012 break;
1013 if(qOpening->match>0)
1014 qOpening->match=0;
1015 }
1016 }
1017 return newProp;
1018 }
1019
1020 /* handle strong characters, digits and candidates for closing brackets */
1021 static UBool /* return TRUE if success */
1022 bracketProcessChar(BracketData *bd, int32_t position) {
1023 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
1024 DirProp *dirProps, dirProp, newProp;
1025 UBiDiLevel level;
1026 dirProps=bd->pBiDi->dirProps;
1027 dirProp=dirProps[position];
1028 if(dirProp==ON) {
1029 UChar c, match;
1030 int32_t idx;
1031 /* First see if it is a matching closing bracket. Hopefully, this is
1032 more efficient than checking if it is a closing bracket at all */
1033 c=bd->pBiDi->text[position];
1034 for(idx=pLastIsoRun->limit-1; idx>=pLastIsoRun->start; idx--) {
1035 if(bd->openings[idx].match!=c)
1036 continue;
1037 /* We have a match */
1038 newProp=bracketProcessClosing(bd, idx, position);
1039 if(newProp==ON) { /* N0d */
1040 c=0; /* prevent handling as an opening */
1041 break;
1042 }
1043 pLastIsoRun->lastBase=ON;
1044 pLastIsoRun->contextDir=(UBiDiDirection)newProp;
1045 pLastIsoRun->contextPos=position;
1046 level=bd->pBiDi->levels[position];
1047 if(level&UBIDI_LEVEL_OVERRIDE) { /* X4, X5 */
1048 uint16_t flag;
1049 int32_t i;
1050 newProp=level&1;
1051 pLastIsoRun->lastStrong=newProp;
1052 flag=DIRPROP_FLAG(newProp);
1053 for(i=pLastIsoRun->start; i<idx; i++)
1054 bd->openings[i].flags|=flag;
1055 /* matching brackets are not overridden by LRO/RLO */
1056 bd->pBiDi->levels[position]&=~UBIDI_LEVEL_OVERRIDE;
1057 }
1058 /* matching brackets are not overridden by LRO/RLO */
1059 bd->pBiDi->levels[bd->openings[idx].position]&=~UBIDI_LEVEL_OVERRIDE;
1060 return TRUE;
1061 }
1062 /* We get here only if the ON character is not a matching closing
1063 bracket or it is a case of N0d */
1064 /* Now see if it is an opening bracket */
1065 if(c)
1066 match= static_cast<UChar>(u_getBidiPairedBracket(c)); /* get the matching char */
1067 else
1068 match=0;
1069 if(match!=c && /* has a matching char */
1070 ubidi_getPairedBracketType(c)==U_BPT_OPEN) { /* opening bracket */
1071 /* special case: process synonyms
1072 create an opening entry for each synonym */
1073 if(match==0x232A) { /* RIGHT-POINTING ANGLE BRACKET */
1074 if(!bracketAddOpening(bd, 0x3009, position))
1075 return FALSE;
1076 }
1077 else if(match==0x3009) { /* RIGHT ANGLE BRACKET */
1078 if(!bracketAddOpening(bd, 0x232A, position))
1079 return FALSE;
1080 }
1081 if(!bracketAddOpening(bd, match, position))
1082 return FALSE;
1083 }
1084 }
1085 level=bd->pBiDi->levels[position];
1086 if(level&UBIDI_LEVEL_OVERRIDE) { /* X4, X5 */
1087 newProp=level&1;
1088 if(dirProp!=S && dirProp!=WS && dirProp!=ON)
1089 dirProps[position]=newProp;
1090 pLastIsoRun->lastBase=newProp;
1091 pLastIsoRun->lastStrong=newProp;
1092 pLastIsoRun->contextDir=(UBiDiDirection)newProp;
1093 pLastIsoRun->contextPos=position;
1094 }
1095 else if(dirProp<=R || dirProp==AL) {
1096 newProp= static_cast<DirProp>(DIR_FROM_STRONG(dirProp));
1097 pLastIsoRun->lastBase=dirProp;
1098 pLastIsoRun->lastStrong=dirProp;
1099 pLastIsoRun->contextDir=(UBiDiDirection)newProp;
1100 pLastIsoRun->contextPos=position;
1101 }
1102 else if(dirProp==EN) {
1103 pLastIsoRun->lastBase=EN;
1104 if(pLastIsoRun->lastStrong==L) {
1105 newProp=L; /* W7 */
1106 if(!bd->isNumbersSpecial)
1107 dirProps[position]=ENL;
1108 pLastIsoRun->contextDir=(UBiDiDirection)L;
1109 pLastIsoRun->contextPos=position;
1110 }
1111 else {
1112 newProp=R; /* N0 */
1113 if(pLastIsoRun->lastStrong==AL)
1114 dirProps[position]=AN; /* W2 */
1115 else
1116 dirProps[position]=ENR;
1117 pLastIsoRun->contextDir=(UBiDiDirection)R;
1118 pLastIsoRun->contextPos=position;
1119 }
1120 }
1121 else if(dirProp==AN) {
1122 newProp=R; /* N0 */
1123 pLastIsoRun->lastBase=AN;
1124 pLastIsoRun->contextDir=(UBiDiDirection)R;
1125 pLastIsoRun->contextPos=position;
1126 }
1127 else if(dirProp==NSM) {
1128 /* if the last real char was ON, change NSM to ON so that it
1129 will stay ON even if the last real char is a bracket which
1130 may be changed to L or R */
1131 newProp=pLastIsoRun->lastBase;
1132 if(newProp==ON)
1133 dirProps[position]=newProp;
1134 }
1135 else {
1136 newProp=dirProp;
1137 pLastIsoRun->lastBase=dirProp;
1138 }
1139 if(newProp<=R || newProp==AL) {
1140 int32_t i;
1141 uint16_t flag=DIRPROP_FLAG(DIR_FROM_STRONG(newProp));
1142 for(i=pLastIsoRun->start; i<pLastIsoRun->limit; i++)
1143 if(position>bd->openings[i].position)
1144 bd->openings[i].flags|=flag;
1145 }
1146 return TRUE;
1147 }
1148
1149 /* perform (X1)..(X9) ------------------------------------------------------- */
1150
1151 /* determine if the text is mixed-directional or single-directional */
1152 static UBiDiDirection
1153 directionFromFlags(UBiDi *pBiDi) {
1154 Flags flags=pBiDi->flags;
1155 /* if the text contains AN and neutrals, then some neutrals may become RTL */
1156 if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) {
1157 return UBIDI_LTR;
1158 } else if(!(flags&MASK_LTR)) {
1159 return UBIDI_RTL;
1160 } else {
1161 return UBIDI_MIXED;
1162 }
1163 }
1164
1165 /*
1166 * Resolve the explicit levels as specified by explicit embedding codes.
1167 * Recalculate the flags to have them reflect the real properties
1168 * after taking the explicit embeddings into account.
1169 *
1170 * The BiDi algorithm is designed to result in the same behavior whether embedding
1171 * levels are externally specified (from "styled text", supposedly the preferred
1172 * method) or set by explicit embedding codes (LRx, RLx, PDF, FSI, PDI) in the plain text.
1173 * That is why (X9) instructs to remove all not-isolate explicit codes (and BN).
1174 * However, in a real implementation, the removal of these codes and their index
1175 * positions in the plain text is undesirable since it would result in
1176 * reallocated, reindexed text.
1177 * Instead, this implementation leaves the codes in there and just ignores them
1178 * in the subsequent processing.
1179 * In order to get the same reordering behavior, positions with a BN or a not-isolate
1180 * explicit embedding code just get the same level assigned as the last "real"
1181 * character.
1182 *
1183 * Some implementations, not this one, then overwrite some of these
1184 * directionality properties at "real" same-level-run boundaries by
1185 * L or R codes so that the resolution of weak types can be performed on the
1186 * entire paragraph at once instead of having to parse it once more and
1187 * perform that resolution on same-level-runs.
1188 * This limits the scope of the implicit rules in effectively
1189 * the same way as the run limits.
1190 *
1191 * Instead, this implementation does not modify these codes, except for
1192 * paired brackets whose properties (ON) may be replaced by L or R.
1193 * On one hand, the paragraph has to be scanned for same-level-runs, but
1194 * on the other hand, this saves another loop to reset these codes,
1195 * or saves making and modifying a copy of dirProps[].
1196 *
1197 *
1198 * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
1199 *
1200 *
1201 * Handling the stack of explicit levels (Xn):
1202 *
1203 * With the BiDi stack of explicit levels, as pushed with each
1204 * LRE, RLE, LRO, RLO, LRI, RLI and FSI and popped with each PDF and PDI,
1205 * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL.
1206 *
1207 * In order to have a correct push-pop semantics even in the case of overflows,
1208 * overflow counters and a valid isolate counter are used as described in UAX#9
1209 * section 3.3.2 "Explicit Levels and Directions".
1210 *
1211 * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
1212 *
1213 * Returns normally the direction; -1 if there was a memory shortage
1214 *
1215 */
1216 static UBiDiDirection
1217 resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
1218 DirProp *dirProps=pBiDi->dirProps;
1219 uint16_t *dirInsert = pBiDi->dirInsert; /* may be NULL */
1220 UBiDiLevel *levels=pBiDi->levels;
1221 const UChar *text=pBiDi->text;
1222
1223 int32_t i=0, length=pBiDi->length;
1224 Flags flags=pBiDi->flags; /* collect all directionalities in the text */
1225 DirProp dirProp;
1226 int32_t dirInsertValue;
1227 int8_t dirInsertIndex; /* position within dirInsertValue, if any */
1228 UBiDiLevel level=GET_PARALEVEL(pBiDi, 0);
1229 UBiDiDirection direction;
1230 pBiDi->isolateCount=0;
1231
1232 if(U_FAILURE(*pErrorCode)) { return UBIDI_LTR; }
1233
1234 /* determine if the text is mixed-directional or single-directional */
1235 direction=directionFromFlags(pBiDi);
1236
1237 /* we may not need to resolve any explicit levels */
1238 if((direction!=UBIDI_MIXED)) {
1239 /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
1240 return direction;
1241 }
1242 if(pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL) {
1243 /* inverse BiDi: mixed, but all characters are at the same embedding level */
1244 /* set all levels to the paragraph level */
1245 int32_t paraIndex, start, limit;
1246 for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) {
1247 if(paraIndex==0)
1248 start=0;
1249 else
1250 start=pBiDi->paras[paraIndex-1].limit;
1251 limit=pBiDi->paras[paraIndex].limit;
1252 level= static_cast<UBiDiLevel>(pBiDi->paras[paraIndex].level);
1253 for(i=start; i<limit; i++)
1254 levels[i]=level;
1255 }
1256 return direction; /* no bracket matching for inverse BiDi */
1257 }
1258 if(!(flags&(MASK_EXPLICIT|MASK_ISO))) {
1259 /* no embeddings, set all levels to the paragraph level */
1260 /* we still have to perform bracket matching */
1261 int32_t paraIndex, start, limit;
1262 BracketData bracketData;
1263 bracketInit(pBiDi, &bracketData);
1264 for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) {
1265 if(paraIndex==0)
1266 start=0;
1267 else
1268 start=pBiDi->paras[paraIndex-1].limit;
1269 limit=pBiDi->paras[paraIndex].limit;
1270 level= static_cast<UBiDiLevel>(pBiDi->paras[paraIndex].level);
1271 for(i=start; i<limit; i++) {
1272 levels[i]=level;
1273 dirProp=dirProps[i];
1274 if(dirProp==BN)
1275 continue;
1276 if(dirProp==B) {
1277 if((i+1)<length) {
1278 if(text[i]==CR && text[i+1]==LF)
1279 continue; /* skip CR when followed by LF */
1280 bracketProcessB(&bracketData, level);
1281 }
1282 continue;
1283 }
1284 if(!bracketProcessChar(&bracketData, i)) {
1285 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1286 return UBIDI_LTR;
1287 }
1288 }
1289 }
1290 return direction;
1291 }
1292 {
1293 /* continue to perform (Xn) */
1294
1295 /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
1296 /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */
1297 UBiDiLevel embeddingLevel=level, newLevel;
1298 UBiDiLevel previousLevel=level; /* previous level for regular (not CC) characters */
1299 int32_t lastCcPos=0; /* index of last effective LRx,RLx, PDx */
1300 DirProp lastCcDirProp=0; /* dirProp of last effective LRx,RLx, PDx */
1301
1302 /* The following stack remembers the embedding level and the ISOLATE flag of level runs.
1303 stackLast points to its current entry. */
1304 uint16_t stack[UBIDI_MAX_EXPLICIT_LEVEL+2]; /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL
1305 but we need one more entry as base */
1306 uint32_t stackLast=0;
1307 int32_t overflowIsolateCount=0;
1308 int32_t overflowEmbeddingCount=0;
1309 int32_t validIsolateCount=0;
1310 BracketData bracketData;
1311 bracketInit(pBiDi, &bracketData);
1312 stack[0]=level; /* initialize base entry to para level, no override, no isolate */
1313
1314 /* recalculate the flags */
1315 flags=0;
1316
1317 dirInsertValue = 0;
1318 dirInsertIndex = -1; /* indicate that we have not checked dirInsert yet */
1319 for(i=0; i<length; ) { /* now conditionally increment at end */
1320 if (dirInsert != NULL && dirInsertIndex < 0) {
1321 dirInsertValue = dirInsert[i];
1322 }
1323 if (dirInsertValue > 0) {
1324 dirInsertIndex++;
1325 dirProp = (DirProp)stdDirFromInsertDir[dirInsertValue & 0x000F];
1326 dirInsertValue >>= 4;
1327 } else {
1328 dirInsertIndex = -1;
1329 dirProp=dirProps[i];
1330 }
1331 switch(dirProp) {
1332 case LRE:
1333 case RLE:
1334 case LRO:
1335 case RLO:
1336 /* (X2, X3, X4, X5) */
1337 flags|=DIRPROP_FLAG(BN);
1338 levels[i]=previousLevel;
1339 if (dirProp==LRE || dirProp==LRO)
1340 /* least greater even level */
1341 newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1));
1342 else
1343 /* least greater odd level */
1344 newLevel=(UBiDiLevel)((NO_OVERRIDE(embeddingLevel)+1)|1);
1345 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
1346 overflowEmbeddingCount==0) {
1347 lastCcPos=i;
1348 lastCcDirProp = dirProp;
1349 embeddingLevel=newLevel;
1350 if(dirProp==LRO || dirProp==RLO)
1351 embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
1352 stackLast++;
1353 stack[stackLast]=embeddingLevel;
1354 /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE and RLE
1355 since this has already been done for newLevel which is
1356 the source for embeddingLevel.
1357 */
1358 } else {
1359 if(overflowIsolateCount==0)
1360 overflowEmbeddingCount++;
1361 }
1362 break;
1363 case PDF:
1364 /* (X7) */
1365 flags|=DIRPROP_FLAG(BN);
1366 levels[i]=previousLevel;
1367 /* handle all the overflow cases first */
1368 if(overflowIsolateCount) {
1369 break;
1370 }
1371 if(overflowEmbeddingCount) {
1372 overflowEmbeddingCount--;
1373 break;
1374 }
1375 if(stackLast>0 && stack[stackLast]<ISOLATE) { /* not an isolate entry */
1376 lastCcPos=i;
1377 lastCcDirProp = dirProp;
1378 stackLast--;
1379 embeddingLevel=(UBiDiLevel)stack[stackLast];
1380 }
1381 break;
1382 case LRI:
1383 case RLI:
1384 flags|=(DIRPROP_FLAG(ON)|DIRPROP_FLAG_LR(embeddingLevel));
1385 levels[i]=NO_OVERRIDE(embeddingLevel);
1386 if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
1387 bracketProcessBoundary(&bracketData, lastCcPos, lastCcDirProp,
1388 previousLevel, embeddingLevel);
1389 flags|=DIRPROP_FLAG_MULTI_RUNS;
1390 }
1391 previousLevel=embeddingLevel;
1392 /* (X5a, X5b) */
1393 if(dirProp==LRI)
1394 /* least greater even level */
1395 newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1));
1396 else
1397 /* least greater odd level */
1398 newLevel=(UBiDiLevel)((NO_OVERRIDE(embeddingLevel)+1)|1);
1399 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
1400 overflowEmbeddingCount==0) {
1401 flags|=DIRPROP_FLAG(dirProp);
1402 lastCcPos=i;
1403 lastCcDirProp = dirProp;
1404 validIsolateCount++;
1405 if(validIsolateCount>pBiDi->isolateCount)
1406 pBiDi->isolateCount=validIsolateCount;
1407 embeddingLevel=newLevel;
1408 /* we can increment stackLast without checking because newLevel
1409 will exceed UBIDI_MAX_EXPLICIT_LEVEL before stackLast overflows */
1410 stackLast++;
1411 stack[stackLast]=embeddingLevel+ISOLATE;
1412 bracketProcessLRI_RLI(&bracketData, embeddingLevel);
1413 } else {
1414 /* make it WS so that it is handled by adjustWSLevels() */
1415 if (dirInsertIndex < 0) {
1416 dirProps[i]=WS;
1417 } else {
1418 dirInsert[i] &= ~(0x000F << (4*dirInsertIndex));
1419 dirInsert[i] |= (Insert_WS << (4*dirInsertIndex));
1420 }
1421 overflowIsolateCount++;
1422 }
1423 break;
1424 case PDI:
1425 if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
1426 bracketProcessBoundary(&bracketData, lastCcPos, lastCcDirProp,
1427 previousLevel, embeddingLevel);
1428 flags|=DIRPROP_FLAG_MULTI_RUNS;
1429 }
1430 /* (X6a) */
1431 if(overflowIsolateCount) {
1432 overflowIsolateCount--;
1433 /* make it WS so that it is handled by adjustWSLevels() */
1434 if (dirInsertIndex < 0) {
1435 dirProps[i]=WS;
1436 } else {
1437 dirInsert[i] &= ~(0x000F << (4*dirInsertIndex));
1438 dirInsert[i] |= (Insert_WS << (4*dirInsertIndex));
1439 }
1440 }
1441 else if(validIsolateCount) {
1442 flags|=DIRPROP_FLAG(PDI);
1443 lastCcPos=i;
1444 lastCcDirProp = dirProp;
1445 overflowEmbeddingCount=0;
1446 while(stack[stackLast]<ISOLATE) /* pop embedding entries */
1447 stackLast--; /* until the last isolate entry */
1448 stackLast--; /* pop also the last isolate entry */
1449 validIsolateCount--;
1450 bracketProcessPDI(&bracketData);
1451 } else
1452 /* make it WS so that it is handled by adjustWSLevels() */
1453 if (dirInsertIndex < 0) {
1454 dirProps[i]=WS;
1455 } else {
1456 dirInsert[i] &= ~(0x000F << (4*dirInsertIndex));
1457 dirInsert[i] |= (Insert_WS << (4*dirInsertIndex));
1458 }
1459 embeddingLevel=(UBiDiLevel)stack[stackLast]&~ISOLATE;
1460 flags|=(DIRPROP_FLAG(ON)|DIRPROP_FLAG_LR(embeddingLevel));
1461 previousLevel=embeddingLevel;
1462 levels[i]=NO_OVERRIDE(embeddingLevel);
1463 break;
1464 case B:
1465 flags|=DIRPROP_FLAG(B);
1466 levels[i]=GET_PARALEVEL(pBiDi, i);
1467 if((i+1)<length) {
1468 if(text[i]==CR && text[i+1]==LF)
1469 break; /* skip CR when followed by LF */
1470 overflowEmbeddingCount=overflowIsolateCount=0;
1471 validIsolateCount=0;
1472 stackLast=0;
1473 previousLevel=embeddingLevel=GET_PARALEVEL(pBiDi, i+1);
1474 stack[0]=embeddingLevel; /* initialize base entry to para level, no override, no isolate */
1475 bracketProcessB(&bracketData, embeddingLevel);
1476 }
1477 break;
1478 case BN:
1479 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
1480 /* they will get their levels set correctly in adjustWSLevels() */
1481 levels[i]=previousLevel;
1482 flags|=DIRPROP_FLAG(BN);
1483 break;
1484 default:
1485 /* all other types are normal characters and get the "real" level */
1486 if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
1487 bracketProcessBoundary(&bracketData, lastCcPos, lastCcDirProp,
1488 previousLevel, embeddingLevel);
1489 flags|=DIRPROP_FLAG_MULTI_RUNS;
1490 if(embeddingLevel&UBIDI_LEVEL_OVERRIDE)
1491 flags|=DIRPROP_FLAG_O(embeddingLevel);
1492 else
1493 flags|=DIRPROP_FLAG_E(embeddingLevel);
1494 }
1495 previousLevel=embeddingLevel;
1496 levels[i]=embeddingLevel;
1497 if(!bracketProcessChar(&bracketData, i))
1498 return (UBiDiDirection)-1;
1499 /* the dirProp may have been changed in bracketProcessChar() */
1500 flags|=DIRPROP_FLAG(dirProps[i]);
1501 break;
1502 }
1503 if (dirInsertIndex < 0) {
1504 ++i;
1505 }
1506 }
1507 if(flags&MASK_EMBEDDING)
1508 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
1509 if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B)))
1510 flags|=DIRPROP_FLAG(L);
1511 /* again, determine if the text is mixed-directional or single-directional */
1512 pBiDi->flags=flags;
1513 direction=directionFromFlags(pBiDi);
1514 }
1515 return direction;
1516 }
1517
1518 /*
1519 * Use a pre-specified embedding levels array:
1520 *
1521 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
1522 * ignore all explicit codes (X9),
1523 * and check all the preset levels.
1524 *
1525 * Recalculate the flags to have them reflect the real properties
1526 * after taking the explicit embeddings into account.
1527 */
1528 static UBiDiDirection
1529 checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
1530 DirProp *dirProps=pBiDi->dirProps;
1531 UBiDiLevel *levels=pBiDi->levels;
1532 int32_t isolateCount=0;
1533
1534 int32_t length=pBiDi->length;
1535 Flags flags=0; /* collect all directionalities in the text */
1536 pBiDi->isolateCount=0;
1537
1538 int32_t currentParaIndex = 0;
1539 int32_t currentParaLimit = pBiDi->paras[0].limit;
1540 int32_t currentParaLevel = pBiDi->paraLevel;
1541
1542 for(int32_t i=0; i<length; ++i) {
1543 UBiDiLevel level=levels[i];
1544 DirProp dirProp=dirProps[i];
1545 if(dirProp==LRI || dirProp==RLI) {
1546 isolateCount++;
1547 if(isolateCount>pBiDi->isolateCount)
1548 pBiDi->isolateCount=isolateCount;
1549 }
1550 else if(dirProp==PDI)
1551 isolateCount--;
1552 else if(dirProp==B)
1553 isolateCount=0;
1554
1555 // optimized version of int32_t currentParaLevel = GET_PARALEVEL(pBiDi, i);
1556 if (pBiDi->defaultParaLevel != 0 &&
1557 i == currentParaLimit && (currentParaIndex + 1) < pBiDi->paraCount) {
1558 currentParaLevel = pBiDi->paras[++currentParaIndex].level;
1559 currentParaLimit = pBiDi->paras[currentParaIndex].limit;
1560 }
1561
1562 UBiDiLevel overrideFlag = level & UBIDI_LEVEL_OVERRIDE;
1563 level &= ~UBIDI_LEVEL_OVERRIDE;
1564 if (level < currentParaLevel || UBIDI_MAX_EXPLICIT_LEVEL < level) {
1565 if (level == 0) {
1566 if (dirProp == B) {
1567 // Paragraph separators are ok with explicit level 0.
1568 // Prevents reordering of paragraphs.
1569 } else {
1570 // Treat explicit level 0 as a wildcard for the paragraph level.
1571 // Avoid making the caller guess what the paragraph level would be.
1572 level = (UBiDiLevel)currentParaLevel;
1573 levels[i] = level | overrideFlag;
1574 }
1575 } else {
1576 // 1 <= level < currentParaLevel or UBIDI_MAX_EXPLICIT_LEVEL < level
1577 /* level out of bounds */
1578 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1579 return UBIDI_LTR;
1580 }
1581 }
1582 if (overrideFlag != 0) {
1583 /* keep the override flag in levels[i] but adjust the flags */
1584 flags|=DIRPROP_FLAG_O(level);
1585 } else {
1586 /* set the flags */
1587 flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp);
1588 }
1589 }
1590 if(flags&MASK_EMBEDDING)
1591 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
1592 /* determine if the text is mixed-directional or single-directional */
1593 pBiDi->flags=flags;
1594 return directionFromFlags(pBiDi);
1595 }
1596
1597 /******************************************************************
1598 The Properties state machine table
1599 *******************************************************************
1600
1601 All table cells are 8 bits:
1602 bits 0..4: next state
1603 bits 5..7: action to perform (if > 0)
1604
1605 Cells may be of format "n" where n represents the next state
1606 (except for the rightmost column).
1607 Cells may also be of format "s(x,y)" where x represents an action
1608 to perform and y represents the next state.
1609
1610 *******************************************************************
1611 Definitions and type for properties state table
1612 *******************************************************************
1613 */
1614 #define IMPTABPROPS_COLUMNS 16
1615 #define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1)
1616 #define GET_STATEPROPS(cell) ((cell)&0x1f)
1617 #define GET_ACTIONPROPS(cell) ((cell)>>5)
1618 #define s(action, newState) ((uint8_t)(newState+(action<<5)))
1619
1620 static const uint8_t groupProp[] = /* dirProp regrouped */
1621 {
1622 /* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN FSI LRI RLI PDI ENL ENR */
1623 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10, 4, 4, 4, 4, 13, 14
1624 };
1625 enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */
1626
1627 /******************************************************************
1628
1629 PROPERTIES STATE TABLE
1630
1631 In table impTabProps,
1632 - the ON column regroups ON and WS, FSI, RLI, LRI and PDI
1633 - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF
1634 - the Res column is the reduced property assigned to a run
1635
1636 Action 1: process current run1, init new run1
1637 2: init new run2
1638 3: process run1, process run2, init new run1
1639 4: process run1, set run1=run2, init new run2
1640
1641 Notes:
1642 1) This table is used in resolveImplicitLevels().
1643 2) This table triggers actions when there is a change in the Bidi
1644 property of incoming characters (action 1).
1645 3) Most such property sequences are processed immediately (in
1646 fact, passed to processPropertySeq().
1647 4) However, numbers are assembled as one sequence. This means
1648 that undefined situations (like CS following digits, until
1649 it is known if the next char will be a digit) are held until
1650 following chars define them.
1651 Example: digits followed by CS, then comes another CS or ON;
1652 the digits will be processed, then the CS assigned
1653 as the start of an ON sequence (action 3).
1654 5) There are cases where more than one sequence must be
1655 processed, for instance digits followed by CS followed by L:
1656 the digits must be processed as one sequence, and the CS
1657 must be processed as an ON sequence, all this before starting
1658 assembling chars for the opening L sequence.
1659
1660
1661 */
1662 static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] =
1663 {
1664 /* L , R , EN , AN , ON , S , B , ES , ET , CS , BN , NSM , AL , ENL , ENR , Res */
1665 /* 0 Init */ { 1 , 2 , 4 , 5 , 7 , 15 , 17 , 7 , 9 , 7 , 0 , 7 , 3 , 18 , 21 , DirProp_ON },
1666 /* 1 L */ { 1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 1 , 1 , s(1,3),s(1,18),s(1,21), DirProp_L },
1667 /* 2 R */ { s(1,1), 2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 2 , 2 , s(1,3),s(1,18),s(1,21), DirProp_R },
1668 /* 3 AL */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8), 3 , 3 , 3 ,s(1,18),s(1,21), DirProp_R },
1669 /* 4 EN */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10), 11 ,s(2,10), 4 , 4 , s(1,3), 18 , 21 , DirProp_EN },
1670 /* 5 AN */ { s(1,1), s(1,2), s(1,4), 5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12), 5 , 5 , s(1,3),s(1,18),s(1,21), DirProp_AN },
1671 /* 6 AL:EN/AN */ { s(1,1), s(1,2), 6 , 6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13), 6 , 6 , s(1,3), 18 , 21 , DirProp_AN },
1672 /* 7 ON */ { s(1,1), s(1,2), s(1,4), s(1,5), 7 ,s(1,15),s(1,17), 7 ,s(2,14), 7 , 7 , 7 , s(1,3),s(1,18),s(1,21), DirProp_ON },
1673 /* 8 AL:ON */ { s(1,1), s(1,2), s(1,6), s(1,6), 8 ,s(1,16),s(1,17), 8 , 8 , 8 , 8 , 8 , s(1,3),s(1,18),s(1,21), DirProp_ON },
1674 /* 9 ET */ { s(1,1), s(1,2), 4 , s(1,5), 7 ,s(1,15),s(1,17), 7 , 9 , 7 , 9 , 9 , s(1,3), 18 , 21 , DirProp_ON },
1675 /*10 EN+ES/CS */ { s(3,1), s(3,2), 4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 10 , s(4,7), s(3,3), 18 , 21 , DirProp_EN },
1676 /*11 EN+ET */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 11 , s(1,7), 11 , 11 , s(1,3), 18 , 21 , DirProp_EN },
1677 /*12 AN+CS */ { s(3,1), s(3,2), s(3,4), 5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 12 , s(4,7), s(3,3),s(3,18),s(3,21), DirProp_AN },
1678 /*13 AL:EN/AN+CS */ { s(3,1), s(3,2), 6 , 6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8), 13 , s(4,8), s(3,3), 18 , 21 , DirProp_AN },
1679 /*14 ON+ET */ { s(1,1), s(1,2), s(4,4), s(1,5), 7 ,s(1,15),s(1,17), 7 , 14 , 7 , 14 , 14 , s(1,3),s(4,18),s(4,21), DirProp_ON },
1680 /*15 S */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7), 15 ,s(1,17), s(1,7), s(1,9), s(1,7), 15 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_S },
1681 /*16 AL:S */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8), 16 ,s(1,17), s(1,8), s(1,8), s(1,8), 16 , s(1,8), s(1,3),s(1,18),s(1,21), DirProp_S },
1682 /*17 B */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15), 17 , s(1,7), s(1,9), s(1,7), 17 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_B },
1683 /*18 ENL */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,19), 20 ,s(2,19), 18 , 18 , s(1,3), 18 , 21 , DirProp_L },
1684 /*19 ENL+ES/CS */ { s(3,1), s(3,2), 18 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 19 , s(4,7), s(3,3), 18 , 21 , DirProp_L },
1685 /*20 ENL+ET */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 20 , s(1,7), 20 , 20 , s(1,3), 18 , 21 , DirProp_L },
1686 /*21 ENR */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,22), 23 ,s(2,22), 21 , 21 , s(1,3), 18 , 21 , DirProp_AN },
1687 /*22 ENR+ES/CS */ { s(3,1), s(3,2), 21 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 22 , s(4,7), s(3,3), 18 , 21 , DirProp_AN },
1688 /*23 ENR+ET */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 23 , s(1,7), 23 , 23 , s(1,3), 18 , 21 , DirProp_AN }
1689 };
1690
1691 /* we must undef macro s because the levels tables have a different
1692 * structure (4 bits for action and 4 bits for next state.
1693 */
1694 #undef s
1695
1696 /******************************************************************
1697 The levels state machine tables
1698 *******************************************************************
1699
1700 All table cells are 8 bits:
1701 bits 0..3: next state
1702 bits 4..7: action to perform (if > 0)
1703
1704 Cells may be of format "n" where n represents the next state
1705 (except for the rightmost column).
1706 Cells may also be of format "s(x,y)" where x represents an action
1707 to perform and y represents the next state.
1708
1709 This format limits each table to 16 states each and to 15 actions.
1710
1711 *******************************************************************
1712 Definitions and type for levels state tables
1713 *******************************************************************
1714 */
1715 #define IMPTABLEVELS_COLUMNS (DirProp_B + 2)
1716 #define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1)
1717 #define GET_STATE(cell) ((cell)&0x0f)
1718 #define GET_ACTION(cell) ((cell)>>4)
1719 #define s(action, newState) ((uint8_t)(newState+(action<<4)))
1720
1721 typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS];
1722 typedef uint8_t ImpAct[];
1723
1724 /* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct,
1725 * instead of having a pair of ImpTab and a pair of ImpAct.
1726 */
1727 typedef struct ImpTabPair {
1728 const void * pImpTab[2];
1729 const void * pImpAct[2];
1730 } ImpTabPair;
1731
1732 /******************************************************************
1733
1734 LEVELS STATE TABLES
1735
1736 In all levels state tables,
1737 - state 0 is the initial state
1738 - the Res column is the increment to add to the text level
1739 for this property sequence.
1740
1741 The impAct arrays for each table of a pair map the local action
1742 numbers of the table to the total list of actions. For instance,
1743 action 2 in a given table corresponds to the action number which
1744 appears in entry [2] of the impAct array for that table.
1745 The first entry of all impAct arrays must be 0.
1746
1747 Action 1: init conditional sequence
1748 2: prepend conditional sequence to current sequence
1749 3: set ON sequence to new level - 1
1750 4: init EN/AN/ON sequence
1751 5: fix EN/AN/ON sequence followed by R
1752 6: set previous level sequence to level 2
1753
1754 Notes:
1755 1) These tables are used in processPropertySeq(). The input
1756 is property sequences as determined by resolveImplicitLevels.
1757 2) Most such property sequences are processed immediately
1758 (levels are assigned).
1759 3) However, some sequences cannot be assigned a final level till
1760 one or more following sequences are received. For instance,
1761 ON following an R sequence within an even-level paragraph.
1762 If the following sequence is R, the ON sequence will be
1763 assigned basic run level+1, and so will the R sequence.
1764 4) S is generally handled like ON, since its level will be fixed
1765 to paragraph level in adjustWSLevels().
1766
1767 */
1768
1769 static const ImpTab impTabL_DEFAULT = /* Even paragraph level */
1770 /* In this table, conditional sequences receive the lower possible level
1771 until proven otherwise.
1772 */
1773 {
1774 /* L , R , EN , AN , ON , S , B , Res */
1775 /* 0 : init */ { 0 , 1 , 0 , 2 , 0 , 0 , 0 , 0 },
1776 /* 1 : R */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 1 },
1777 /* 2 : AN */ { 0 , 1 , 0 , 2 , s(1,5), s(1,5), 0 , 2 },
1778 /* 3 : R+EN/AN */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 2 },
1779 /* 4 : R+ON */ { 0 , s(2,1), s(3,3), s(3,3), 4 , 4 , 0 , 0 },
1780 /* 5 : AN+ON */ { 0 , s(2,1), 0 , s(3,2), 5 , 5 , 0 , 0 }
1781 };
1782 static const ImpTab impTabR_DEFAULT = /* Odd paragraph level */
1783 /* In this table, conditional sequences receive the lower possible level
1784 until proven otherwise.
1785 */
1786 {
1787 /* L , R , EN , AN , ON , S , B , Res */
1788 /* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 },
1789 /* 1 : L */ { 1 , 0 , 1 , 3 , s(1,4), s(1,4), 0 , 1 },
1790 /* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 },
1791 /* 3 : L+AN */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 1 },
1792 /* 4 : L+ON */ { s(2,1), 0 , s(2,1), 3 , 4 , 4 , 0 , 0 },
1793 /* 5 : L+AN+ON */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 0 }
1794 };
1795 static const ImpAct impAct0 = {0,1,2,3,4};
1796 static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT,
1797 &impTabR_DEFAULT},
1798 {&impAct0, &impAct0}};
1799
1800 static const ImpTab impTabL_NUMBERS_SPECIAL = /* Even paragraph level */
1801 /* In this table, conditional sequences receive the lower possible level
1802 until proven otherwise.
1803 */
1804 {
1805 /* L , R , EN , AN , ON , S , B , Res */
1806 /* 0 : init */ { 0 , 2 , s(1,1), s(1,1), 0 , 0 , 0 , 0 },
1807 /* 1 : L+EN/AN */ { 0 , s(4,2), 1 , 1 , 0 , 0 , 0 , 0 },
1808 /* 2 : R */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 1 },
1809 /* 3 : R+ON */ { 0 , s(2,2), s(3,4), s(3,4), 3 , 3 , 0 , 0 },
1810 /* 4 : R+EN/AN */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 2 }
1811 };
1812 static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL,
1813 &impTabR_DEFAULT},
1814 {&impAct0, &impAct0}};
1815
1816 static const ImpTab impTabL_GROUP_NUMBERS_WITH_R =
1817 /* In this table, EN/AN+ON sequences receive levels as if associated with R
1818 until proven that there is L or sor/eor on both sides. AN is handled like EN.
1819 */
1820 {
1821 /* L , R , EN , AN , ON , S , B , Res */
1822 /* 0 init */ { 0 , 3 , s(1,1), s(1,1), 0 , 0 , 0 , 0 },
1823 /* 1 EN/AN */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 2 },
1824 /* 2 EN/AN+ON */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 1 },
1825 /* 3 R */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 1 },
1826 /* 4 R+ON */ { s(2,0), 3 , 5 , 5 , 4 , s(2,0), s(2,0), 1 },
1827 /* 5 R+EN/AN */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 2 }
1828 };
1829 static const ImpTab impTabR_GROUP_NUMBERS_WITH_R =
1830 /* In this table, EN/AN+ON sequences receive levels as if associated with R
1831 until proven that there is L on both sides. AN is handled like EN.
1832 */
1833 {
1834 /* L , R , EN , AN , ON , S , B , Res */
1835 /* 0 init */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1836 /* 1 EN/AN */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 1 },
1837 /* 2 L */ { 2 , 0 , s(1,4), s(1,4), s(1,3), 0 , 0 , 1 },
1838 /* 3 L+ON */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 0 },
1839 /* 4 L+EN/AN */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 1 }
1840 };
1841 static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = {
1842 {&impTabL_GROUP_NUMBERS_WITH_R,
1843 &impTabR_GROUP_NUMBERS_WITH_R},
1844 {&impAct0, &impAct0}};
1845
1846
1847 static const ImpTab impTabL_INVERSE_NUMBERS_AS_L =
1848 /* This table is identical to the Default LTR table except that EN and AN are
1849 handled like L.
1850 */
1851 {
1852 /* L , R , EN , AN , ON , S , B , Res */
1853 /* 0 : init */ { 0 , 1 , 0 , 0 , 0 , 0 , 0 , 0 },
1854 /* 1 : R */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 1 },
1855 /* 2 : AN */ { 0 , 1 , 0 , 0 , s(1,5), s(1,5), 0 , 2 },
1856 /* 3 : R+EN/AN */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 2 },
1857 /* 4 : R+ON */ { s(2,0), 1 , s(2,0), s(2,0), 4 , 4 , s(2,0), 1 },
1858 /* 5 : AN+ON */ { s(2,0), 1 , s(2,0), s(2,0), 5 , 5 , s(2,0), 1 }
1859 };
1860 static const ImpTab impTabR_INVERSE_NUMBERS_AS_L =
1861 /* This table is identical to the Default RTL table except that EN and AN are
1862 handled like L.
1863 */
1864 {
1865 /* L , R , EN , AN , ON , S , B , Res */
1866 /* 0 : init */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1867 /* 1 : L */ { 1 , 0 , 1 , 1 , s(1,4), s(1,4), 0 , 1 },
1868 /* 2 : EN/AN */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 1 },
1869 /* 3 : L+AN */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 1 },
1870 /* 4 : L+ON */ { s(2,1), 0 , s(2,1), s(2,1), 4 , 4 , 0 , 0 },
1871 /* 5 : L+AN+ON */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 0 }
1872 };
1873 static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = {
1874 {&impTabL_INVERSE_NUMBERS_AS_L,
1875 &impTabR_INVERSE_NUMBERS_AS_L},
1876 {&impAct0, &impAct0}};
1877
1878 static const ImpTab impTabR_INVERSE_LIKE_DIRECT = /* Odd paragraph level */
1879 /* In this table, conditional sequences receive the lower possible level
1880 until proven otherwise.
1881 */
1882 {
1883 /* L , R , EN , AN , ON , S , B , Res */
1884 /* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 },
1885 /* 1 : L */ { 1 , 0 , 1 , 2 , s(1,3), s(1,3), 0 , 1 },
1886 /* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 },
1887 /* 3 : L+ON */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 0 },
1888 /* 4 : L+ON+AN */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 3 },
1889 /* 5 : L+AN+ON */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 2 },
1890 /* 6 : L+ON+EN */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 1 }
1891 };
1892 static const ImpAct impAct1 = {0,1,13,14};
1893 /* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc"
1894 */
1895 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = {
1896 {&impTabL_DEFAULT,
1897 &impTabR_INVERSE_LIKE_DIRECT},
1898 {&impAct0, &impAct1}};
1899
1900 static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS =
1901 /* The case handled in this table is (visually): R EN L
1902 */
1903 {
1904 /* L , R , EN , AN , ON , S , B , Res */
1905 /* 0 : init */ { 0 , s(6,3), 0 , 1 , 0 , 0 , 0 , 0 },
1906 /* 1 : L+AN */ { 0 , s(6,3), 0 , 1 , s(1,2), s(3,0), 0 , 4 },
1907 /* 2 : L+AN+ON */ { s(2,0), s(6,3), s(2,0), 1 , 2 , s(3,0), s(2,0), 3 },
1908 /* 3 : R */ { 0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0), 0 , 3 },
1909 /* 4 : R+ON */ { s(3,0), s(4,3), s(5,5), s(5,6), 4 , s(3,0), s(3,0), 3 },
1910 /* 5 : R+EN */ { s(3,0), s(4,3), 5 , s(5,6), s(1,4), s(3,0), s(3,0), 4 },
1911 /* 6 : R+AN */ { s(3,0), s(4,3), s(5,5), 6 , s(1,4), s(3,0), s(3,0), 4 }
1912 };
1913 static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS =
1914 /* The cases handled in this table are (visually): R EN L
1915 R L AN L
1916 */
1917 {
1918 /* L , R , EN , AN , ON , S , B , Res */
1919 /* 0 : init */ { s(1,3), 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1920 /* 1 : R+EN/AN */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 1 },
1921 /* 2 : R+EN/AN+ON */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 0 },
1922 /* 3 : L */ { 3 , 0 , 3 , s(3,6), s(1,4), s(4,0), 0 , 1 },
1923 /* 4 : L+ON */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 0 },
1924 /* 5 : L+ON+EN */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 1 },
1925 /* 6 : L+AN */ { s(5,3), s(4,0), 6 , 6 , 4 , s(4,0), s(4,0), 3 }
1926 };
1927 static const ImpAct impAct2 = {0,1,2,5,6,7,8};
1928 static const ImpAct impAct3 = {0,1,9,10,11,12};
1929 static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = {
1930 {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
1931 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
1932 {&impAct2, &impAct3}};
1933
1934 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = {
1935 {&impTabL_NUMBERS_SPECIAL,
1936 &impTabR_INVERSE_LIKE_DIRECT},
1937 {&impAct0, &impAct1}};
1938
1939 static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS =
1940 /* The case handled in this table is (visually): R EN L
1941 */
1942 {
1943 /* L , R , EN , AN , ON , S , B , Res */
1944 /* 0 : init */ { 0 , s(6,2), 1 , 1 , 0 , 0 , 0 , 0 },
1945 /* 1 : L+EN/AN */ { 0 , s(6,2), 1 , 1 , 0 , s(3,0), 0 , 4 },
1946 /* 2 : R */ { 0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0), 0 , 3 },
1947 /* 3 : R+ON */ { s(3,0), s(4,2), s(5,4), s(5,4), 3 , s(3,0), s(3,0), 3 },
1948 /* 4 : R+EN/AN */ { s(3,0), s(4,2), 4 , 4 , s(1,3), s(3,0), s(3,0), 4 }
1949 };
1950 static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = {
1951 {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
1952 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
1953 {&impAct2, &impAct3}};
1954
1955 #undef s
1956
1957 typedef struct {
1958 const ImpTab * pImpTab; /* level table pointer */
1959 const ImpAct * pImpAct; /* action map array */
1960 int32_t startON; /* start of ON sequence */
1961 int32_t startL2EN; /* start of level 2 sequence */
1962 int32_t lastStrongRTL; /* index of last found R or AL */
1963 int32_t state; /* current state */
1964 int32_t runStart; /* start position of the run */
1965 UBiDiLevel runLevel; /* run level before implicit solving */
1966 } LevState;
1967
1968 /*------------------------------------------------------------------------*/
1969
1970 static void
1971 addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag)
1972 /* param pos: position where to insert
1973 param flag: one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER
1974 */
1975 {
1976 #define FIRSTALLOC 10
1977 Point point;
1978 InsertPoints * pInsertPoints=&(pBiDi->insertPoints);
1979
1980 if (pInsertPoints->capacity == 0)
1981 {
1982 pInsertPoints->points=static_cast<Point *>(uprv_malloc(sizeof(Point)*FIRSTALLOC));
1983 if (pInsertPoints->points == NULL)
1984 {
1985 pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
1986 return;
1987 }
1988 pInsertPoints->capacity=FIRSTALLOC;
1989 }
1990 if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */
1991 {
1992 Point * savePoints=pInsertPoints->points;
1993 pInsertPoints->points=static_cast<Point *>(uprv_realloc(pInsertPoints->points,
1994 pInsertPoints->capacity*2*sizeof(Point)));
1995 if (pInsertPoints->points == NULL)
1996 {
1997 pInsertPoints->points=savePoints;
1998 pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
1999 return;
2000 }
2001 else pInsertPoints->capacity*=2;
2002 }
2003 point.pos=pos;
2004 point.flag=flag;
2005 pInsertPoints->points[pInsertPoints->size]=point;
2006 pInsertPoints->size++;
2007 #undef FIRSTALLOC
2008 }
2009
2010 static void
2011 setLevelsOutsideIsolates(UBiDi *pBiDi, int32_t start, int32_t limit, UBiDiLevel level)
2012 {
2013 DirProp *dirProps=pBiDi->dirProps, dirProp;
2014 uint16_t *dirInsert = pBiDi->dirInsert; /* may be NULL */
2015 UBiDiLevel *levels=pBiDi->levels;
2016 int32_t dirInsertValue;
2017 int8_t dirInsertIndex; /* position within dirInsertValue, if any */
2018 int32_t isolateCount=0, k;
2019 dirInsertValue = 0;
2020 dirInsertIndex = -1; /* indicate that we have not checked dirInsert yet */
2021 for(k=start; k<limit; k++) {
2022 if (dirInsert != NULL && dirInsertIndex < 0) {
2023 dirInsertValue = dirInsert[k];
2024 }
2025 if (dirInsertValue > 0) {
2026 dirInsertIndex++;
2027 dirProp = (DirProp)stdDirFromInsertDir[dirInsertValue & 0x000F];
2028 dirInsertValue >>= 4;
2029 } else {
2030 dirInsertIndex = -1;
2031 dirProp=dirProps[k];
2032 }
2033 if(dirProp==PDI)
2034 isolateCount--;
2035 if(isolateCount==0)
2036 levels[k]=level;
2037 if(dirProp==LRI || dirProp==RLI)
2038 isolateCount++;
2039 }
2040 }
2041
2042 /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
2043
2044 /*
2045 * This implementation of the (Wn) rules applies all rules in one pass.
2046 * In order to do so, it needs a look-ahead of typically 1 character
2047 * (except for W5: sequences of ET) and keeps track of changes
2048 * in a rule Wp that affect a later Wq (p<q).
2049 *
2050 * The (Nn) and (In) rules are also performed in that same single loop,
2051 * but effectively one iteration behind for white space.
2052 *
2053 * Since all implicit rules are performed in one step, it is not necessary
2054 * to actually store the intermediate directional properties in dirProps[].
2055 */
2056
2057 static void
2058 processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop,
2059 int32_t start, int32_t limit) {
2060 uint8_t cell, oldStateSeq, actionSeq;
2061 const ImpTab * pImpTab=pLevState->pImpTab;
2062 const ImpAct * pImpAct=pLevState->pImpAct;
2063 UBiDiLevel * levels=pBiDi->levels;
2064 UBiDiLevel level, addLevel;
2065 InsertPoints * pInsertPoints;
2066 int32_t start0, k;
2067
2068 start0=start; /* save original start position */
2069 oldStateSeq=(uint8_t)pLevState->state;
2070 cell=(*pImpTab)[oldStateSeq][_prop];
2071 pLevState->state=GET_STATE(cell); /* isolate the new state */
2072 actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */
2073 addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES];
2074
2075 if(actionSeq) {
2076 switch(actionSeq) {
2077 case 1: /* init ON seq */
2078 pLevState->startON=start0;
2079 break;
2080
2081 case 2: /* prepend ON seq to current seq */
2082 start=pLevState->startON;
2083 break;
2084
2085 case 3: /* EN/AN after R+ON */
2086 level=pLevState->runLevel+1;
2087 setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level);
2088 break;
2089
2090 case 4: /* EN/AN before R for NUMBERS_SPECIAL */
2091 level=pLevState->runLevel+2;
2092 setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level);
2093 break;
2094
2095 case 5: /* L or S after possible relevant EN/AN */
2096 /* check if we had EN after R/AL */
2097 if (pLevState->startL2EN >= 0) {
2098 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
2099 }
2100 pLevState->startL2EN=-1; /* not within previous if since could also be -2 */
2101 /* check if we had any relevant EN/AN after R/AL */
2102 pInsertPoints=&(pBiDi->insertPoints);
2103 if ((pInsertPoints->capacity == 0) ||
2104 (pInsertPoints->size <= pInsertPoints->confirmed))
2105 {
2106 /* nothing, just clean up */
2107 pLevState->lastStrongRTL=-1;
2108 /* check if we have a pending conditional segment */
2109 level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES];
2110 if ((level & 1) && (pLevState->startON > 0)) { /* after ON */
2111 start=pLevState->startON; /* reset to basic run level */
2112 }
2113 if (_prop == DirProp_S) /* add LRM before S */
2114 {
2115 addPoint(pBiDi, start0, LRM_BEFORE);
2116 pInsertPoints->confirmed=pInsertPoints->size;
2117 }
2118 break;
2119 }
2120 /* reset previous RTL cont to level for LTR text */
2121 for (k=pLevState->lastStrongRTL+1; k<start0; k++)
2122 {
2123 /* reset odd level, leave runLevel+2 as is */
2124 levels[k]=(levels[k] - 2) & ~1;
2125 }
2126 /* mark insert points as confirmed */
2127 pInsertPoints->confirmed=pInsertPoints->size;
2128 pLevState->lastStrongRTL=-1;
2129 if (_prop == DirProp_S) /* add LRM before S */
2130 {
2131 addPoint(pBiDi, start0, LRM_BEFORE);
2132 pInsertPoints->confirmed=pInsertPoints->size;
2133 }
2134 break;
2135
2136 case 6: /* R/AL after possible relevant EN/AN */
2137 /* just clean up */
2138 pInsertPoints=&(pBiDi->insertPoints);
2139 if (pInsertPoints->capacity > 0)
2140 /* remove all non confirmed insert points */
2141 pInsertPoints->size=pInsertPoints->confirmed;
2142 pLevState->startON=-1;
2143 pLevState->startL2EN=-1;
2144 pLevState->lastStrongRTL=limit - 1;
2145 break;
2146
2147 case 7: /* EN/AN after R/AL + possible cont */
2148 /* check for real AN */
2149 if ((_prop == DirProp_AN) && (pBiDi->dirProps[start0] == AN) &&
2150 (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))
2151 {
2152 /* real AN */
2153 if (pLevState->startL2EN == -1) /* if no relevant EN already found */
2154 {
2155 /* just note the righmost digit as a strong RTL */
2156 pLevState->lastStrongRTL=limit - 1;
2157 break;
2158 }
2159 if (pLevState->startL2EN >= 0) /* after EN, no AN */
2160 {
2161 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
2162 pLevState->startL2EN=-2;
2163 }
2164 /* note AN */
2165 addPoint(pBiDi, start0, LRM_BEFORE);
2166 break;
2167 }
2168 /* if first EN/AN after R/AL */
2169 if (pLevState->startL2EN == -1) {
2170 pLevState->startL2EN=start0;
2171 }
2172 break;
2173
2174 case 8: /* note location of latest R/AL */
2175 pLevState->lastStrongRTL=limit - 1;
2176 pLevState->startON=-1;
2177 break;
2178
2179 case 9: /* L after R+ON/EN/AN */
2180 /* include possible adjacent number on the left */
2181 for (k=start0-1; k>=0 && !(levels[k]&1); k--);
2182 if(k>=0) {
2183 addPoint(pBiDi, k, RLM_BEFORE); /* add RLM before */
2184 pInsertPoints=&(pBiDi->insertPoints);
2185 pInsertPoints->confirmed=pInsertPoints->size; /* confirm it */
2186 }
2187 pLevState->startON=start0;
2188 break;
2189
2190 case 10: /* AN after L */
2191 /* AN numbers between L text on both sides may be trouble. */
2192 /* tentatively bracket with LRMs; will be confirmed if followed by L */
2193 addPoint(pBiDi, start0, LRM_BEFORE); /* add LRM before */
2194 addPoint(pBiDi, start0, LRM_AFTER); /* add LRM after */
2195 break;
2196
2197 case 11: /* R after L+ON/EN/AN */
2198 /* false alert, infirm LRMs around previous AN */
2199 pInsertPoints=&(pBiDi->insertPoints);
2200 pInsertPoints->size=pInsertPoints->confirmed;
2201 if (_prop == DirProp_S) /* add RLM before S */
2202 {
2203 addPoint(pBiDi, start0, RLM_BEFORE);
2204 pInsertPoints->confirmed=pInsertPoints->size;
2205 }
2206 break;
2207
2208 case 12: /* L after L+ON/AN */
2209 level=pLevState->runLevel + addLevel;
2210 for(k=pLevState->startON; k<start0; k++) {
2211 if (levels[k]<level)
2212 levels[k]=level;
2213 }
2214 pInsertPoints=&(pBiDi->insertPoints);
2215 pInsertPoints->confirmed=pInsertPoints->size; /* confirm inserts */
2216 pLevState->startON=start0;
2217 break;
2218
2219 case 13: /* L after L+ON+EN/AN/ON */
2220 level=pLevState->runLevel;
2221 for(k=start0-1; k>=pLevState->startON; k--) {
2222 if(levels[k]==level+3) {
2223 while(levels[k]==level+3) {
2224 levels[k--]-=2;
2225 }
2226 while(levels[k]==level) {
2227 k--;
2228 }
2229 }
2230 if(levels[k]==level+2) {
2231 levels[k]=level;
2232 continue;
2233 }
2234 levels[k]=level+1;
2235 }
2236 break;
2237
2238 case 14: /* R after L+ON+EN/AN/ON */
2239 level=pLevState->runLevel+1;
2240 for(k=start0-1; k>=pLevState->startON; k--) {
2241 if(levels[k]>level) {
2242 levels[k]-=2;
2243 }
2244 }
2245 break;
2246
2247 default: /* we should never get here */
2248 UPRV_UNREACHABLE;
2249 }
2250 }
2251 if((addLevel) || (start < start0)) {
2252 level=pLevState->runLevel + addLevel;
2253 if(start>=pLevState->runStart) {
2254 for(k=start; k<limit; k++) {
2255 levels[k]=level;
2256 }
2257 } else {
2258 setLevelsOutsideIsolates(pBiDi, start, limit, level);
2259 }
2260 }
2261 }
2262
2263 /**
2264 * Returns the directionality of the last strong character at the end of the prologue, if any.
2265 * Requires prologue!=null.
2266 */
2267 static DirProp
2268 lastL_R_AL(UBiDi *pBiDi) {
2269 const UChar *text=pBiDi->prologue;
2270 int32_t length=pBiDi->proLength;
2271 int32_t i;
2272 UChar32 uchar;
2273 DirProp dirProp;
2274 for(i=length; i>0; ) {
2275 /* i is decremented by U16_PREV */
2276 U16_PREV(text, 0, i, uchar);
2277 dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
2278 if(dirProp==L) {
2279 return DirProp_L;
2280 }
2281 if(dirProp==R || dirProp==AL) {
2282 return DirProp_R;
2283 }
2284 if(dirProp==B) {
2285 return DirProp_ON;
2286 }
2287 }
2288 return DirProp_ON;
2289 }
2290
2291 /**
2292 * Returns the directionality of the first strong character, or digit, in the epilogue, if any.
2293 * Requires epilogue!=null.
2294 */
2295 static DirProp
2296 firstL_R_AL_EN_AN(UBiDi *pBiDi) {
2297 const UChar *text=pBiDi->epilogue;
2298 int32_t length=pBiDi->epiLength;
2299 int32_t i;
2300 UChar32 uchar;
2301 DirProp dirProp;
2302 for(i=0; i<length; ) {
2303 /* i is incremented by U16_NEXT */
2304 U16_NEXT(text, i, length, uchar);
2305 dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
2306 if(dirProp==L) {
2307 return DirProp_L;
2308 }
2309 if(dirProp==R || dirProp==AL) {
2310 return DirProp_R;
2311 }
2312 if(dirProp==EN) {
2313 return DirProp_EN;
2314 }
2315 if(dirProp==AN) {
2316 return DirProp_AN;
2317 }
2318 }
2319 return DirProp_ON;
2320 }
2321
2322 static void
2323 resolveImplicitLevels(UBiDi *pBiDi,
2324 int32_t start, int32_t limit,
2325 DirProp sor, DirProp eor) {
2326 const DirProp *dirProps=pBiDi->dirProps;
2327 uint16_t *dirInsert = pBiDi->dirInsert; /* may be NULL */
2328 DirProp dirProp;
2329 int32_t dirInsertValue;
2330 LevState levState;
2331 int32_t i, start1, start2;
2332 uint16_t oldStateImp, stateImp, actionImp;
2333 uint8_t gprop, resProp, cell;
2334 UBool inverseRTL;
2335 DirProp nextStrongProp=R;
2336 int32_t nextStrongPos=-1;
2337
2338 /* check for RTL inverse BiDi mode */
2339 /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to
2340 * loop on the text characters from end to start.
2341 * This would need a different properties state table (at least different
2342 * actions) and different levels state tables (maybe very similar to the
2343 * LTR corresponding ones.
2344 */
2345 inverseRTL=(UBool)
2346 ((start<pBiDi->lastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) &&
2347 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
2348 pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL));
2349
2350 /* initialize for property and levels state tables */
2351 levState.startL2EN=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
2352 levState.lastStrongRTL=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
2353 levState.runStart=start;
2354 levState.runLevel=pBiDi->levels[start];
2355 levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1];
2356 levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1];
2357 if(start==0 && pBiDi->proLength>0) {
2358 DirProp lastStrong=lastL_R_AL(pBiDi);
2359 if(lastStrong!=DirProp_ON) {
2360 sor=lastStrong;
2361 }
2362 }
2363 /* The isolates[] entries contain enough information to
2364 resume the bidi algorithm in the same state as it was
2365 when it was interrupted by an isolate sequence. */
2366 dirInsertValue = 0;
2367 if (dirInsert != NULL) {
2368 dirInsertValue = dirInsert[start];
2369 while (dirInsertValue > 0) {
2370 if ((dirInsertValue & 0x000F) == Insert_PDI) {
2371 break;
2372 }
2373 dirInsertValue >>= 4;
2374 }
2375 }
2376 if((dirProps[start]==PDI || dirInsertValue>0) && pBiDi->isolateCount >= 0) {
2377 levState.startON=pBiDi->isolates[pBiDi->isolateCount].startON;
2378 start1=pBiDi->isolates[pBiDi->isolateCount].start1;
2379 stateImp=pBiDi->isolates[pBiDi->isolateCount].stateImp;
2380 levState.state=pBiDi->isolates[pBiDi->isolateCount].state;
2381 pBiDi->isolateCount--;
2382 } else {
2383 levState.startON=-1;
2384 start1=start;
2385 if(dirProps[start]==NSM)
2386 stateImp = 1 + sor;
2387 else
2388 stateImp=0;
2389 levState.state=0;
2390 processPropertySeq(pBiDi, &levState, sor, start, start);
2391 }
2392 start2=start; /* to make Java compiler happy */
2393
2394 for(i=start; i<=limit; i++) {
2395 if(i>=limit) {
2396 int32_t k;
2397 dirInsertValue = 0;
2398 for(k=limit-1; k>start && dirInsertValue <= 0; k--) {
2399 dirProp = dirProps[k];
2400 if ((DIRPROP_FLAG(dirProp)&MASK_BN_EXPLICIT) == 0) {
2401 break;
2402 }
2403 dirProp = ON;
2404 if (dirInsert != NULL) {
2405 dirInsertValue = dirInsert[k];
2406 while (dirInsertValue > 0) {
2407 dirProp = (DirProp)stdDirFromInsertDir[dirInsertValue & 0x000F];
2408 if ((DIRPROP_FLAG(dirProp)&MASK_BN_EXPLICIT) == 0) {
2409 break;
2410 }
2411 dirInsertValue >>= 4;
2412 }
2413 }
2414 }
2415 if (k == start) {
2416 dirProp = dirProps[k];
2417 }
2418 if(dirProp==LRI || dirProp==RLI)
2419 break; /* no forced closing for sequence ending with LRI/RLI */
2420 gprop=eor;
2421 } else {
2422 DirProp prop, prop1;
2423 prop=dirProps[i];
2424 if(prop==B) {
2425 pBiDi->isolateCount=-1; /* current isolates stack entry == none */
2426 }
2427 if(inverseRTL) {
2428 if(prop==AL) {
2429 /* AL before EN does not make it AN */
2430 prop=R;
2431 } else if(prop==EN) {
2432 if(nextStrongPos<=i) {
2433 /* look for next strong char (L/R/AL) */
2434 int32_t j;
2435 nextStrongProp=R; /* set default */
2436 nextStrongPos=limit;
2437 for(j=i+1; j<limit; j++) {
2438 prop1=dirProps[j];
2439 if(prop1==L || prop1==R || prop1==AL) {
2440 nextStrongProp=prop1;
2441 nextStrongPos=j;
2442 break;
2443 }
2444 }
2445 }
2446 if(nextStrongProp==AL) {
2447 prop=AN;
2448 }
2449 }
2450 }
2451 gprop=groupProp[prop];
2452 }
2453 oldStateImp=stateImp;
2454 cell=impTabProps[oldStateImp][gprop];
2455 stateImp=GET_STATEPROPS(cell); /* isolate the new state */
2456 actionImp=GET_ACTIONPROPS(cell); /* isolate the action */
2457 if((i==limit) && (actionImp==0)) {
2458 /* there is an unprocessed sequence if its property == eor */
2459 actionImp=1; /* process the last sequence */
2460 }
2461 if(actionImp) {
2462 resProp=impTabProps[oldStateImp][IMPTABPROPS_RES];
2463 switch(actionImp) {
2464 case 1: /* process current seq1, init new seq1 */
2465 processPropertySeq(pBiDi, &levState, resProp, start1, i);
2466 start1=i;
2467 break;
2468 case 2: /* init new seq2 */
2469 start2=i;
2470 break;
2471 case 3: /* process seq1, process seq2, init new seq1 */
2472 processPropertySeq(pBiDi, &levState, resProp, start1, start2);
2473 processPropertySeq(pBiDi, &levState, DirProp_ON, start2, i);
2474 start1=i;
2475 break;
2476 case 4: /* process seq1, set seq1=seq2, init new seq2 */
2477 processPropertySeq(pBiDi, &levState, resProp, start1, start2);
2478 start1=start2;
2479 start2=i;
2480 break;
2481 default: /* we should never get here */
2482 UPRV_UNREACHABLE;
2483 }
2484 }
2485 }
2486
2487 /* flush possible pending sequence, e.g. ON */
2488 if(limit==pBiDi->length && pBiDi->epiLength>0) {
2489 DirProp firstStrong=firstL_R_AL_EN_AN(pBiDi);
2490 if(firstStrong!=DirProp_ON) {
2491 eor=firstStrong;
2492 }
2493 }
2494
2495 /* look for the last char not a BN or LRE/RLE/LRO/RLO/PDF */
2496 dirInsertValue = 0;
2497 for(i=limit-1; i>start && dirInsertValue <= 0; i--) {
2498 dirProp=dirProps[i];
2499 if ((DIRPROP_FLAG(dirProp)&MASK_BN_EXPLICIT) == 0) {
2500 break;
2501 }
2502 dirProp = ON;
2503 if (dirInsert != NULL) {
2504 dirInsertValue = dirInsert[i];
2505 while (dirInsertValue > 0) {
2506 dirProp = (DirProp)stdDirFromInsertDir[dirInsertValue & 0x000F];
2507 if ((DIRPROP_FLAG(dirProp)&MASK_BN_EXPLICIT) == 0) {
2508 break;
2509 }
2510 dirInsertValue >>= 4;
2511 }
2512 }
2513 }
2514 if (i == start) {
2515 dirProp=dirProps[i];
2516 }
2517 if((dirProp==LRI || dirProp==RLI) && limit<pBiDi->length) {
2518 pBiDi->isolateCount++;
2519 pBiDi->isolates[pBiDi->isolateCount].stateImp=stateImp;
2520 pBiDi->isolates[pBiDi->isolateCount].state=levState.state;
2521 pBiDi->isolates[pBiDi->isolateCount].start1=start1;
2522 pBiDi->isolates[pBiDi->isolateCount].startON=levState.startON;
2523 }
2524 else
2525 processPropertySeq(pBiDi, &levState, eor, limit, limit);
2526 }
2527
2528 /* perform (L1) and (X9) ---------------------------------------------------- */
2529
2530 /*
2531 * Reset the embedding levels for some non-graphic characters (L1).
2532 * This function also sets appropriate levels for BN, and
2533 * explicit embedding types that are supposed to have been removed
2534 * from the paragraph in (X9).
2535 */
2536 static void
2537 adjustWSLevels(UBiDi *pBiDi) {
2538 const DirProp *dirProps=pBiDi->dirProps;
2539 UBiDiLevel *levels=pBiDi->levels;
2540 int32_t i;
2541
2542 if(pBiDi->flags&MASK_WS) {
2543 UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR;
2544 Flags flag;
2545
2546 i=pBiDi->trailingWSStart;
2547 while(i>0) {
2548 /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
2549 while(i>0 && (flag=DIRPROP_FLAG(dirProps[--i]))&MASK_WS) {
2550 if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
2551 levels[i]=0;
2552 } else {
2553 levels[i]=GET_PARALEVEL(pBiDi, i);
2554 }
2555 }
2556
2557 /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
2558 /* here, i+1 is guaranteed to be <length */
2559 while(i>0) {
2560 flag=DIRPROP_FLAG(dirProps[--i]);
2561 if(flag&MASK_BN_EXPLICIT) {
2562 levels[i]=levels[i+1];
2563 } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
2564 levels[i]=0;
2565 break;
2566 } else if(flag&MASK_B_S) {
2567 levels[i]=GET_PARALEVEL(pBiDi, i);
2568 break;
2569 }
2570 }
2571 }
2572 }
2573 }
2574
2575 U_CAPI void U_EXPORT2
2576 ubidi_setContext(UBiDi *pBiDi,
2577 const UChar *prologue, int32_t proLength,
2578 const UChar *epilogue, int32_t epiLength,
2579 UErrorCode *pErrorCode) {
2580 /* check the argument values */
2581 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2582 if(pBiDi==NULL || proLength<-1 || epiLength<-1 ||
2583 (prologue==NULL && proLength!=0) || (epilogue==NULL && epiLength!=0)) {
2584 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2585 return;
2586 }
2587
2588 if(proLength==-1) {
2589 pBiDi->proLength=u_strlen(prologue);
2590 } else {
2591 pBiDi->proLength=proLength;
2592 }
2593 if(epiLength==-1) {
2594 pBiDi->epiLength=u_strlen(epilogue);
2595 } else {
2596 pBiDi->epiLength=epiLength;
2597 }
2598 pBiDi->prologue=prologue;
2599 pBiDi->epilogue=epilogue;
2600 }
2601
2602 static void
2603 setParaSuccess(UBiDi *pBiDi) {
2604 pBiDi->proLength=0; /* forget the last context */
2605 pBiDi->epiLength=0;
2606 pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */
2607 }
2608
2609 #define BIDI_MIN(x, y) ((x)<(y) ? (x) : (y))
2610 #define BIDI_ABS(x) ((x)>=0 ? (x) : (-(x)))
2611
2612 static void
2613 setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length,
2614 UBiDiLevel paraLevel, UErrorCode *pErrorCode) {
2615 int32_t *runsOnlyMemory = NULL;
2616 int32_t *visualMap;
2617 UChar *visualText;
2618 int32_t saveLength, saveTrailingWSStart;
2619 const UBiDiLevel *levels;
2620 UBiDiLevel *saveLevels;
2621 UBiDiDirection saveDirection;
2622 UBool saveMayAllocateText;
2623 Run *runs;
2624 int32_t visualLength, i, j, visualStart, logicalStart,
2625 runCount, runLength, addedRuns, insertRemove,
2626 start, limit, step, indexOddBit, logicalPos,
2627 index0, index1;
2628 uint32_t saveOptions;
2629
2630 pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT;
2631 if(length==0) {
2632 ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
2633 goto cleanup3;
2634 }
2635 /* obtain memory for mapping table and visual text */
2636 runsOnlyMemory=static_cast<int32_t *>(uprv_malloc(length*(sizeof(int32_t)+sizeof(UChar)+sizeof(UBiDiLevel))));
2637 if(runsOnlyMemory==NULL) {
2638 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2639 goto cleanup3;
2640 }
2641 visualMap=runsOnlyMemory;
2642 visualText=(UChar *)&visualMap[length];
2643 saveLevels=(UBiDiLevel *)&visualText[length];
2644 saveOptions=pBiDi->reorderingOptions;
2645 if(saveOptions & UBIDI_OPTION_INSERT_MARKS) {
2646 pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
2647 pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS;
2648 }
2649 paraLevel&=1; /* accept only 0 or 1 */
2650 ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
2651 if(U_FAILURE(*pErrorCode)) {
2652 goto cleanup3;
2653 }
2654 /* we cannot access directly pBiDi->levels since it is not yet set if
2655 * direction is not MIXED
2656 */
2657 levels=ubidi_getLevels(pBiDi, pErrorCode);
2658 uprv_memcpy(saveLevels, levels, (size_t)pBiDi->length*sizeof(UBiDiLevel));
2659 saveTrailingWSStart=pBiDi->trailingWSStart;
2660 saveLength=pBiDi->length;
2661 saveDirection=pBiDi->direction;
2662
2663 /* FOOD FOR THOUGHT: instead of writing the visual text, we could use
2664 * the visual map and the dirProps array to drive the second call
2665 * to ubidi_setPara (but must make provision for possible removal of
2666 * BiDi controls. Alternatively, only use the dirProps array via
2667 * customized classifier callback.
2668 */
2669 visualLength=ubidi_writeReordered(pBiDi, visualText, length,
2670 UBIDI_DO_MIRRORING, pErrorCode);
2671 ubidi_getVisualMap(pBiDi, visualMap, pErrorCode);
2672 if(U_FAILURE(*pErrorCode)) {
2673 goto cleanup2;
2674 }
2675 pBiDi->reorderingOptions=saveOptions;
2676
2677 pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT;
2678 paraLevel^=1;
2679 /* Because what we did with reorderingOptions, visualText may be shorter
2680 * than the original text. But we don't want the levels memory to be
2681 * reallocated shorter than the original length, since we need to restore
2682 * the levels as after the first call to ubidi_setpara() before returning.
2683 * We will force mayAllocateText to FALSE before the second call to
2684 * ubidi_setpara(), and will restore it afterwards.
2685 */
2686 saveMayAllocateText=pBiDi->mayAllocateText;
2687 pBiDi->mayAllocateText=FALSE;
2688 ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode);
2689 pBiDi->mayAllocateText=saveMayAllocateText;
2690 ubidi_getRuns(pBiDi, pErrorCode);
2691 if(U_FAILURE(*pErrorCode)) {
2692 goto cleanup1;
2693 }
2694 /* check if some runs must be split, count how many splits */
2695 addedRuns=0;
2696 runCount=pBiDi->runCount;
2697 runs=pBiDi->runs;
2698 visualStart=0;
2699 for(i=0; i<runCount; i++, visualStart+=runLength) {
2700 runLength=runs[i].visualLimit-visualStart;
2701 if(runLength<2) {
2702 continue;
2703 }
2704 logicalStart=GET_INDEX(runs[i].logicalStart);
2705 for(j=logicalStart+1; j<logicalStart+runLength; j++) {
2706 index0=visualMap[j];
2707 index1=visualMap[j-1];
2708 if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
2709 addedRuns++;
2710 }
2711 }
2712 }
2713 if(addedRuns) {
2714 if(getRunsMemory(pBiDi, runCount+addedRuns)) {
2715 if(runCount==1) {
2716 /* because we switch from UBiDi.simpleRuns to UBiDi.runs */
2717 pBiDi->runsMemory[0]=runs[0];
2718 }
2719 runs=pBiDi->runs=pBiDi->runsMemory;
2720 pBiDi->runCount+=addedRuns;
2721 } else {
2722 goto cleanup1;
2723 }
2724 }
2725 /* split runs which are not consecutive in source text */
2726 for(i=runCount-1; i>=0; i--) {
2727 runLength= i==0 ? runs[0].visualLimit :
2728 runs[i].visualLimit-runs[i-1].visualLimit;
2729 logicalStart=runs[i].logicalStart;
2730 indexOddBit=GET_ODD_BIT(logicalStart);
2731 logicalStart=GET_INDEX(logicalStart);
2732 if(runLength<2) {
2733 if(addedRuns) {
2734 runs[i+addedRuns]=runs[i];
2735 }
2736 logicalPos=visualMap[logicalStart];
2737 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2738 saveLevels[logicalPos]^indexOddBit);
2739 continue;
2740 }
2741 if(indexOddBit) {
2742 start=logicalStart;
2743 limit=logicalStart+runLength-1;
2744 step=1;
2745 } else {
2746 start=logicalStart+runLength-1;
2747 limit=logicalStart;
2748 step=-1;
2749 }
2750 for(j=start; j!=limit; j+=step) {
2751 index0=visualMap[j];
2752 index1=visualMap[j+step];
2753 if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
2754 logicalPos=BIDI_MIN(visualMap[start], index0);
2755 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2756 saveLevels[logicalPos]^indexOddBit);
2757 runs[i+addedRuns].visualLimit=runs[i].visualLimit;
2758 runs[i].visualLimit-=BIDI_ABS(j-start)+1;
2759 insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER);
2760 runs[i+addedRuns].insertRemove=insertRemove;
2761 runs[i].insertRemove&=~insertRemove;
2762 start=j+step;
2763 addedRuns--;
2764 }
2765 }
2766 if(addedRuns) {
2767 runs[i+addedRuns]=runs[i];
2768 }
2769 logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]);
2770 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2771 saveLevels[logicalPos]^indexOddBit);
2772 }
2773
2774 cleanup1:
2775 /* restore initial paraLevel */
2776 pBiDi->paraLevel^=1;
2777 cleanup2:
2778 /* restore real text */
2779 pBiDi->text=text;
2780 pBiDi->length=saveLength;
2781 pBiDi->originalLength=length;
2782 pBiDi->direction=saveDirection;
2783 /* the saved levels should never excess levelsSize, but we check anyway */
2784 if(saveLength>pBiDi->levelsSize) {
2785 saveLength=pBiDi->levelsSize;
2786 }
2787 uprv_memcpy(pBiDi->levels, saveLevels, (size_t)saveLength*sizeof(UBiDiLevel));
2788 pBiDi->trailingWSStart=saveTrailingWSStart;
2789 if(pBiDi->runCount>1) {
2790 pBiDi->direction=UBIDI_MIXED;
2791 }
2792 cleanup3:
2793 /* free memory for mapping table and visual text */
2794 uprv_free(runsOnlyMemory);
2795
2796 pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY;
2797 }
2798
2799 /* -------------------------------------------------------------------------- */
2800 /* internal proptotype */
2801
2802 static void
2803 ubidi_setParaInternal(UBiDi *pBiDi,
2804 const UChar *text, int32_t length,
2805 UBiDiLevel paraLevel,
2806 UBiDiLevel *embeddingLevels,
2807 const int32_t *offsets, int32_t offsetCount,
2808 const int32_t *controlStringIndices,
2809 const UChar * const * controlStrings,
2810 UErrorCode *pErrorCode);
2811
2812 /* ubidi_setPara ------------------------------------------------------------ */
2813
2814 U_CAPI void U_EXPORT2
2815 ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
2816 UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
2817 UErrorCode *pErrorCode) {
2818 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2819 ubidi_setParaInternal(pBiDi, text, length, paraLevel,
2820 embeddingLevels,
2821 NULL, 0, NULL, NULL,
2822 pErrorCode);
2823 }
2824
2825 /* ubidi_setParaWithControls ------------------------------------------------ */
2826
2827 U_CAPI void U_EXPORT2
2828 ubidi_setParaWithControls(UBiDi *pBiDi,
2829 const UChar *text, int32_t length,
2830 UBiDiLevel paraLevel,
2831 const int32_t *offsets, int32_t offsetCount,
2832 const int32_t *controlStringIndices,
2833 const UChar * const * controlStrings,
2834 UErrorCode *pErrorCode) {
2835 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2836 /* check the argument values that are not already checked in ubidi_setParaInternal */
2837 if ( offsetCount < 0 || (offsetCount > 0 && (offsets == NULL || controlStrings == NULL)) ) {
2838 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2839 return;
2840 }
2841 ubidi_setParaInternal(pBiDi, text, length, paraLevel,
2842 NULL,
2843 offsets, offsetCount, controlStringIndices, controlStrings,
2844 pErrorCode);
2845 }
2846
2847 /* ubidi_setParaInternal ---------------------------------------------------- */
2848
2849 void
2850 ubidi_setParaInternal(UBiDi *pBiDi,
2851 const UChar *text, int32_t length,
2852 UBiDiLevel paraLevel,
2853 UBiDiLevel *embeddingLevels,
2854 const int32_t *offsets, int32_t offsetCount,
2855 const int32_t *controlStringIndices,
2856 const UChar * const * controlStrings,
2857 UErrorCode *pErrorCode) {
2858 UBiDiDirection direction;
2859 DirProp *dirProps;
2860
2861 /* check the argument values (pErrorCode status alrecy checked before getting here) */
2862 if(pBiDi==NULL || text==NULL || length<-1 ||
2863 (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevel<UBIDI_DEFAULT_LTR)) {
2864 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2865 return;
2866 }
2867
2868 if(length==-1) {
2869 length=u_strlen(text);
2870 }
2871 if (offsetCount > 0 && pBiDi->reorderingMode > UBIDI_REORDER_GROUP_NUMBERS_WITH_R) {
2872 offsetCount = 0;
2873 }
2874
2875 /* special treatment for RUNS_ONLY mode */
2876 if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) {
2877 setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode);
2878 return;
2879 }
2880
2881 /* initialize the UBiDi structure */
2882 pBiDi->pParaBiDi=NULL; /* mark unfinished setPara */
2883 pBiDi->text=text;
2884 pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length;
2885 pBiDi->paraLevel=paraLevel;
2886 pBiDi->direction=(UBiDiDirection)(paraLevel&1);
2887 pBiDi->paraCount=1;
2888
2889 pBiDi->dirInsert=NULL;
2890 pBiDi->dirProps=NULL;
2891 pBiDi->levels=NULL;
2892 pBiDi->runs=NULL;
2893 pBiDi->insertPoints.size=0; /* clean up from last call */
2894 pBiDi->insertPoints.confirmed=0; /* clean up from last call */
2895
2896 /*
2897 * Save the original paraLevel if contextual; otherwise, set to 0.
2898 */
2899 pBiDi->defaultParaLevel=IS_DEFAULT_LEVEL(paraLevel);
2900
2901 if(length==0) {
2902 /*
2903 * For an empty paragraph, create a UBiDi object with the paraLevel and
2904 * the flags and the direction set but without allocating zero-length arrays.
2905 * There is nothing more to do.
2906 */
2907 if(IS_DEFAULT_LEVEL(paraLevel)) {
2908 pBiDi->paraLevel&=1;
2909 pBiDi->defaultParaLevel=0;
2910 }
2911 pBiDi->flags=DIRPROP_FLAG_LR(paraLevel);
2912 pBiDi->runCount=0;
2913 pBiDi->paraCount=0;
2914 setParaSuccess(pBiDi); /* mark successful setPara */
2915 return;
2916 }
2917
2918 pBiDi->runCount=-1;
2919
2920 /* allocate paras memory */
2921 if(pBiDi->parasMemory)
2922 pBiDi->paras=pBiDi->parasMemory;
2923 else
2924 pBiDi->paras=pBiDi->simpleParas;
2925
2926 /*
2927 * Get the inserted directional properties
2928 * if necessary.
2929 */
2930 if (offsetCount > 0) {
2931 if(getDirInsertMemory(pBiDi, length)) {
2932 pBiDi->dirInsert=pBiDi->dirInsertMemory;
2933 if(!getDirInsert(pBiDi, offsets, offsetCount, controlStringIndices, controlStrings)) {
2934 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2935 return;
2936 }
2937 } else {
2938 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2939 return;
2940 }
2941 }
2942
2943 /*
2944 * Get the directional properties,
2945 * the flags bit-set, and
2946 * determine the paragraph level if necessary.
2947 */
2948 if(getDirPropsMemory(pBiDi, length)) {
2949 pBiDi->dirProps=pBiDi->dirPropsMemory;
2950 if(!getDirProps(pBiDi)) {
2951 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2952 return;
2953 }
2954 } else {
2955 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2956 return;
2957 }
2958 dirProps=pBiDi->dirProps;
2959 /* the processed length may have changed if UBIDI_OPTION_STREAMING */
2960 length= pBiDi->length;
2961 pBiDi->trailingWSStart=length; /* the levels[] will reflect the WS run */
2962
2963 /* are explicit levels specified? */
2964 if(embeddingLevels==NULL) {
2965 /* no: determine explicit levels according to the (Xn) rules */\
2966 if(getLevelsMemory(pBiDi, length)) {
2967 pBiDi->levels=pBiDi->levelsMemory;
2968 direction=resolveExplicitLevels(pBiDi, pErrorCode);
2969 if(U_FAILURE(*pErrorCode)) {
2970 return;
2971 }
2972 } else {
2973 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2974 return;
2975 }
2976 } else {
2977 /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */
2978 pBiDi->levels=embeddingLevels;
2979 direction=checkExplicitLevels(pBiDi, pErrorCode);
2980 if(U_FAILURE(*pErrorCode)) {
2981 return;
2982 }
2983 }
2984
2985 /* allocate isolate memory */
2986 if(pBiDi->isolateCount<=SIMPLE_ISOLATES_COUNT)
2987 pBiDi->isolates=pBiDi->simpleIsolates;
2988 else
2989 if((int32_t)(pBiDi->isolateCount*sizeof(Isolate))<=pBiDi->isolatesSize)
2990 pBiDi->isolates=pBiDi->isolatesMemory;
2991 else {
2992 if(getInitialIsolatesMemory(pBiDi, pBiDi->isolateCount)) {
2993 pBiDi->isolates=pBiDi->isolatesMemory;
2994 } else {
2995 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2996 return;
2997 }
2998 }
2999 pBiDi->isolateCount=-1; /* current isolates stack entry == none */
3000
3001 /*
3002 * The steps after (X9) in the UBiDi algorithm are performed only if
3003 * the paragraph text has mixed directionality!
3004 */
3005 pBiDi->direction=direction;
3006 switch(direction) {
3007 case UBIDI_LTR:
3008 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
3009 pBiDi->trailingWSStart=0;
3010 break;
3011 case UBIDI_RTL:
3012 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
3013 pBiDi->trailingWSStart=0;
3014 break;
3015 default:
3016 /*
3017 * Choose the right implicit state table
3018 */
3019 switch(pBiDi->reorderingMode) {
3020 case UBIDI_REORDER_DEFAULT:
3021 pBiDi->pImpTabPair=&impTab_DEFAULT;
3022 break;
3023 case UBIDI_REORDER_NUMBERS_SPECIAL:
3024 pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL;
3025 break;
3026 case UBIDI_REORDER_GROUP_NUMBERS_WITH_R:
3027 pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R;
3028 break;
3029 case UBIDI_REORDER_INVERSE_NUMBERS_AS_L:
3030 pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L;
3031 break;
3032 case UBIDI_REORDER_INVERSE_LIKE_DIRECT:
3033 if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
3034 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS;
3035 } else {
3036 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT;
3037 }
3038 break;
3039 case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL:
3040 if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
3041 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS;
3042 } else {
3043 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL;
3044 }
3045 break;
3046 default:
3047 /* we should never get here */
3048 UPRV_UNREACHABLE;
3049 }
3050 /*
3051 * If there are no external levels specified and there
3052 * are no significant explicit level codes in the text,
3053 * then we can treat the entire paragraph as one run.
3054 * Otherwise, we need to perform the following rules on runs of
3055 * the text with the same embedding levels. (X10)
3056 * "Significant" explicit level codes are ones that actually
3057 * affect non-BN characters.
3058 * Examples for "insignificant" ones are empty embeddings
3059 * LRE-PDF, LRE-RLE-PDF-PDF, etc.
3060 */
3061 if(embeddingLevels==NULL && pBiDi->paraCount<=1 &&
3062 !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) {
3063 resolveImplicitLevels(pBiDi, 0, length,
3064 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)),
3065 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1)));
3066 } else {
3067 /* sor, eor: start and end types of same-level-run */
3068 UBiDiLevel *levels=pBiDi->levels;
3069 int32_t start, limit=0;
3070 UBiDiLevel level, nextLevel;
3071 DirProp sor, eor;
3072
3073 /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
3074 level=GET_PARALEVEL(pBiDi, 0);
3075 nextLevel=levels[0];
3076 if(level<nextLevel) {
3077 eor=GET_LR_FROM_LEVEL(nextLevel);
3078 } else {
3079 eor=GET_LR_FROM_LEVEL(level);
3080 }
3081
3082 do {
3083 /* determine start and limit of the run (end points just behind the run) */
3084
3085 /* the values for this run's start are the same as for the previous run's end */
3086 start=limit;
3087 level=nextLevel;
3088 if((start>0) && (dirProps[start-1]==B)) {
3089 /* except if this is a new paragraph, then set sor = para level */
3090 sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start));
3091 } else {
3092 sor=eor;
3093 }
3094
3095 /* search for the limit of this run */
3096 while((++limit<length) &&
3097 ((levels[limit]==level) ||
3098 (DIRPROP_FLAG(dirProps[limit])&MASK_BN_EXPLICIT))) {}
3099
3100 /* get the correct level of the next run */
3101 if(limit<length) {
3102 nextLevel=levels[limit];
3103 } else {
3104 nextLevel=GET_PARALEVEL(pBiDi, length-1);
3105 }
3106
3107 /* determine eor from max(level, nextLevel); sor is last run's eor */
3108 if(NO_OVERRIDE(level)<NO_OVERRIDE(nextLevel)) {
3109 eor=GET_LR_FROM_LEVEL(nextLevel);
3110 } else {
3111 eor=GET_LR_FROM_LEVEL(level);
3112 }
3113
3114 /* if the run consists of overridden directional types, then there
3115 are no implicit types to be resolved */
3116 if(!(level&UBIDI_LEVEL_OVERRIDE)) {
3117 resolveImplicitLevels(pBiDi, start, limit, sor, eor);
3118 } else {
3119 /* remove the UBIDI_LEVEL_OVERRIDE flags */
3120 do {
3121 levels[start++]&=~UBIDI_LEVEL_OVERRIDE;
3122 } while(start<limit);
3123 }
3124 } while(limit<length);
3125 }
3126 /* check if we got any memory shortage while adding insert points */
3127 if (U_FAILURE(pBiDi->insertPoints.errorCode))
3128 {
3129 *pErrorCode=pBiDi->insertPoints.errorCode;
3130 return;
3131 }
3132 /* reset the embedding levels for some non-graphic characters (L1), (X9) */
3133 adjustWSLevels(pBiDi);
3134 break;
3135 }
3136 /* add RLM for inverse Bidi with contextual orientation resolving
3137 * to RTL which would not round-trip otherwise
3138 */
3139 if((pBiDi->defaultParaLevel>0) &&
3140 (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) &&
3141 ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) ||
3142 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) {
3143 int32_t i, j, start, last;
3144 UBiDiLevel level;
3145 DirProp dirProp;
3146 for(i=0; i<pBiDi->paraCount; i++) {
3147 last=(pBiDi->paras[i].limit)-1;
3148 level= static_cast<UBiDiLevel>(pBiDi->paras[i].level);
3149 if(level==0)
3150 continue; /* LTR paragraph */
3151 start= i==0 ? 0 : pBiDi->paras[i-1].limit;
3152 for(j=last; j>=start; j--) {
3153 dirProp=dirProps[j];
3154 if(dirProp==L) {
3155 if(j<last) {
3156 while(dirProps[last]==B) {
3157 last--;
3158 }
3159 }
3160 addPoint(pBiDi, last, RLM_BEFORE);
3161 break;
3162 }
3163 if(DIRPROP_FLAG(dirProp) & MASK_R_AL) {
3164 break;
3165 }
3166 }
3167 }
3168 }
3169
3170 if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
3171 pBiDi->resultLength -= pBiDi->controlCount;
3172 } else {
3173 pBiDi->resultLength += pBiDi->insertPoints.size;
3174 }
3175 setParaSuccess(pBiDi); /* mark successful setPara */
3176 }
3177
3178 /* -------------------------------------------------------------------------- */
3179
3180 U_CAPI void U_EXPORT2
3181 ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) {
3182 if(pBiDi!=NULL) {
3183 pBiDi->orderParagraphsLTR=orderParagraphsLTR;
3184 }
3185 }
3186
3187 U_CAPI UBool U_EXPORT2
3188 ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) {
3189 if(pBiDi!=NULL) {
3190 return pBiDi->orderParagraphsLTR;
3191 } else {
3192 return FALSE;
3193 }
3194 }
3195
3196 U_CAPI UBiDiDirection U_EXPORT2
3197 ubidi_getDirection(const UBiDi *pBiDi) {
3198 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
3199 return pBiDi->direction;
3200 } else {
3201 return UBIDI_LTR;
3202 }
3203 }
3204
3205 U_CAPI const UChar * U_EXPORT2
3206 ubidi_getText(const UBiDi *pBiDi) {
3207 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
3208 return pBiDi->text;
3209 } else {
3210 return NULL;
3211 }
3212 }
3213
3214 U_CAPI int32_t U_EXPORT2
3215 ubidi_getLength(const UBiDi *pBiDi) {
3216 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
3217 return pBiDi->originalLength;
3218 } else {
3219 return 0;
3220 }
3221 }
3222
3223 U_CAPI int32_t U_EXPORT2
3224 ubidi_getProcessedLength(const UBiDi *pBiDi) {
3225 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
3226 return pBiDi->length;
3227 } else {
3228 return 0;
3229 }
3230 }
3231
3232 U_CAPI int32_t U_EXPORT2
3233 ubidi_getResultLength(const UBiDi *pBiDi) {
3234 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
3235 return pBiDi->resultLength;
3236 } else {
3237 return 0;
3238 }
3239 }
3240
3241 /* paragraphs API functions ------------------------------------------------- */
3242
3243 U_CAPI UBiDiLevel U_EXPORT2
3244 ubidi_getParaLevel(const UBiDi *pBiDi) {
3245 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
3246 return pBiDi->paraLevel;
3247 } else {
3248 return 0;
3249 }
3250 }
3251
3252 U_CAPI int32_t U_EXPORT2
3253 ubidi_countParagraphs(UBiDi *pBiDi) {
3254 if(!IS_VALID_PARA_OR_LINE(pBiDi)) {
3255 return 0;
3256 } else {
3257 return pBiDi->paraCount;
3258 }
3259 }
3260
3261 U_CAPI void U_EXPORT2
3262 ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
3263 int32_t *pParaStart, int32_t *pParaLimit,
3264 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
3265 int32_t paraStart;
3266
3267 /* check the argument values */
3268 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
3269 RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode);
3270 RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode);
3271
3272 pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */
3273 if(paraIndex) {
3274 paraStart=pBiDi->paras[paraIndex-1].limit;
3275 } else {
3276 paraStart=0;
3277 }
3278 if(pParaStart!=NULL) {
3279 *pParaStart=paraStart;
3280 }
3281 if(pParaLimit!=NULL) {
3282 *pParaLimit=pBiDi->paras[paraIndex].limit;
3283 }
3284 if(pParaLevel!=NULL) {
3285 *pParaLevel=GET_PARALEVEL(pBiDi, paraStart);
3286 }
3287 }
3288
3289 U_CAPI int32_t U_EXPORT2
3290 ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex,
3291 int32_t *pParaStart, int32_t *pParaLimit,
3292 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
3293 int32_t paraIndex;
3294
3295 /* check the argument values */
3296 /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */
3297 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
3298 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
3299 pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */
3300 RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1);
3301
3302 for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex].limit; paraIndex++);
3303 ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode);
3304 return paraIndex;
3305 }
3306
3307 U_CAPI void U_EXPORT2
3308 ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn,
3309 const void *newContext, UBiDiClassCallback **oldFn,
3310 const void **oldContext, UErrorCode *pErrorCode)
3311 {
3312 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
3313 if(pBiDi==NULL) {
3314 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
3315 return;
3316 }
3317 if( oldFn )
3318 {
3319 *oldFn = pBiDi->fnClassCallback;
3320 }
3321 if( oldContext )
3322 {
3323 *oldContext = pBiDi->coClassCallback;
3324 }
3325 pBiDi->fnClassCallback = newFn;
3326 pBiDi->coClassCallback = newContext;
3327 }
3328
3329 U_CAPI void U_EXPORT2
3330 ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context)
3331 {
3332 if(pBiDi==NULL) {
3333 return;
3334 }
3335 if( fn )
3336 {
3337 *fn = pBiDi->fnClassCallback;
3338 }
3339 if( context )
3340 {
3341 *context = pBiDi->coClassCallback;
3342 }
3343 }
3344
3345 U_CAPI UCharDirection U_EXPORT2
3346 ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c)
3347 {
3348 UCharDirection dir;
3349
3350 if( pBiDi->fnClassCallback == NULL ||
3351 (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT )
3352 {
3353 dir = ubidi_getClass(c);
3354 }
3355 if(dir >= U_CHAR_DIRECTION_COUNT) {
3356 dir = (UCharDirection)ON;
3357 }
3358 return dir;
3359 }