]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/ubidi.cpp
ICU-62141.0.1.tar.gz
[apple/icu.git] / icuSources / common / ubidi.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
73c04bcf 3/*
b75a7d8f
A
4******************************************************************************
5*
b331163b 6* Copyright (C) 1999-2015, International Business Machines
b75a7d8f
A
7* Corporation and others. All Rights Reserved.
8*
9******************************************************************************
10* file name: ubidi.c
f3c0d7a5 11* encoding: UTF-8
b75a7d8f
A
12* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 1999jul27
46f4442e 16* created by: Markus W. Scherer, updated by Matitiahu Allouche
57a6839d 17*
b75a7d8f
A
18*/
19
b75a7d8f
A
20#include "cmemory.h"
21#include "unicode/utypes.h"
22#include "unicode/ustring.h"
23#include "unicode/uchar.h"
24#include "unicode/ubidi.h"
4388f060 25#include "unicode/utf16.h"
73c04bcf 26#include "ubidi_props.h"
b75a7d8f 27#include "ubidiimp.h"
46f4442e 28#include "uassert.h"
b75a7d8f
A
29
30/*
31 * General implementation notes:
32 *
33 * Throughout the implementation, there are comments like (W2) that refer to
57a6839d
A
34 * rules of the BiDi algorithm, in this example to the second rule of the
35 * resolution of weak types.
b75a7d8f
A
36 *
37 * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
38 * character according to UTF-16, the second UChar gets the directional property of
39 * the entire character assigned, while the first one gets a BN, a boundary
40 * neutral, type, which is ignored by most of the algorithm according to
41 * rule (X9) and the implementation suggestions of the BiDi algorithm.
42 *
43 * Later, adjustWSLevels() will set the level for each BN to that of the
44 * following character (UChar), which results in surrogate pairs getting the
45 * same level on each of their surrogates.
46 *
47 * In a UTF-8 implementation, the same thing could be done: the last byte of
48 * a multi-byte sequence would get the "real" property, while all previous
49 * bytes of that sequence would get BN.
50 *
51 * It is not possible to assign all those parts of a character the same real
52 * property because this would fail in the resolution of weak types with rules
53 * that look at immediately surrounding types.
54 *
55 * As a related topic, this implementation does not remove Boundary Neutral
73c04bcf 56 * types from the input, but ignores them wherever this is relevant.
b75a7d8f
A
57 * For example, the loop for the resolution of the weak types reads
58 * types until it finds a non-BN.
59 * Also, explicit embedding codes are neither changed into BN nor removed.
60 * They are only treated the same way real BNs are.
61 * As stated before, adjustWSLevels() takes care of them at the end.
62 * For the purpose of conformance, the levels of all these codes
63 * do not matter.
64 *
57a6839d
A
65 * Note that this implementation modifies the dirProps
66 * after the initial setup, when applying X5c (replace FSI by LRI or RLI),
67 * X6, N0 (replace paired brackets by L or R).
b75a7d8f 68 *
57a6839d
A
69 * In this implementation, the resolution of weak types (W1 to W6),
70 * neutrals (N1 and N2), and the assignment of the resolved level (In)
b75a7d8f
A
71 * are all done in one single loop, in resolveImplicitLevels().
72 * Changes of dirProp values are done on the fly, without writing
73 * them back to the dirProps array.
74 *
75 *
76 * This implementation contains code that allows to bypass steps of the
77 * algorithm that are not needed on the specific paragraph
78 * in order to speed up the most common cases considerably,
79 * like text that is entirely LTR, or RTL text without numbers.
80 *
81 * Most of this is done by setting a bit for each directional property
82 * in a flags variable and later checking for whether there are
83 * any LTR characters or any RTL characters, or both, whether
84 * there are any explicit embedding codes, etc.
85 *
86 * If the (Xn) steps are performed, then the flags are re-evaluated,
87 * because they will then not contain the embedding codes any more
88 * and will be adjusted for override codes, so that subsequently
89 * more bypassing may be possible than what the initial flags suggested.
90 *
91 * If the text is not mixed-directional, then the
92 * algorithm steps for the weak type resolution are not performed,
93 * and all levels are set to the paragraph level.
94 *
95 * If there are no explicit embedding codes, then the (Xn) steps
96 * are not performed.
97 *
98 * If embedding levels are supplied as a parameter, then all
99 * explicit embedding codes are ignored, and the (Xn) steps
100 * are not performed.
101 *
102 * White Space types could get the level of the run they belong to,
103 * and are checked with a test of (flags&MASK_EMBEDDING) to
104 * consider if the paragraph direction should be considered in
105 * the flags variable.
106 *
107 * If there are no White Space types in the paragraph, then
108 * (L1) is not necessary in adjustWSLevels().
109 */
110
b75a7d8f
A
111/* to avoid some conditional statements, use tiny constant arrays */
112static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) };
113static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) };
114static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) };
115
116#define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
57a6839d
A
117#define DIRPROP_FLAG_E(level) flagE[(level)&1]
118#define DIRPROP_FLAG_O(level) flagO[(level)&1]
119
120#define DIR_FROM_STRONG(strong) ((strong)==L ? L : R)
121
122#define NO_OVERRIDE(level) ((level)&~UBIDI_LEVEL_OVERRIDE)
b75a7d8f
A
123
124/* UBiDi object management -------------------------------------------------- */
125
126U_CAPI UBiDi * U_EXPORT2
73c04bcf 127ubidi_open(void)
b75a7d8f
A
128{
129 UErrorCode errorCode=U_ZERO_ERROR;
130 return ubidi_openSized(0, 0, &errorCode);
131}
132
133U_CAPI UBiDi * U_EXPORT2
134ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) {
135 UBiDi *pBiDi;
136
137 /* check the argument values */
138 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
139 return NULL;
140 } else if(maxLength<0 || maxRunCount<0) {
141 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
142 return NULL; /* invalid arguments */
143 }
144
145 /* allocate memory for the object */
146 pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi));
147 if(pBiDi==NULL) {
148 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
149 return NULL;
150 }
151
152 /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */
153 uprv_memset(pBiDi, 0, sizeof(UBiDi));
154
155 /* allocate memory for arrays as requested */
156 if(maxLength>0) {
157 if( !getInitialDirPropsMemory(pBiDi, maxLength) ||
158 !getInitialLevelsMemory(pBiDi, maxLength)
159 ) {
160 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
161 }
162 } else {
163 pBiDi->mayAllocateText=TRUE;
164 }
165
166 if(maxRunCount>0) {
167 if(maxRunCount==1) {
168 /* use simpleRuns[] */
169 pBiDi->runsSize=sizeof(Run);
170 } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) {
171 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
172 }
173 } else {
174 pBiDi->mayAllocateRuns=TRUE;
175 }
176
177 if(U_SUCCESS(*pErrorCode)) {
178 return pBiDi;
179 } else {
180 ubidi_close(pBiDi);
181 return NULL;
182 }
183}
184
185/*
186 * We are allowed to allocate memory if memory==NULL or
187 * mayAllocate==TRUE for each array that we need.
46f4442e 188 * We also try to grow memory as needed if we
b75a7d8f
A
189 * allocate it.
190 *
191 * Assume sizeNeeded>0.
192 * If *pMemory!=NULL, then assume *pSize>0.
193 *
194 * ### this realloc() may unnecessarily copy the old data,
195 * which we know we don't need any more;
196 * is this the best way to do this??
197 */
198U_CFUNC UBool
46f4442e
A
199ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) {
200 void **pMemory = (void **)bidiMem;
b75a7d8f
A
201 /* check for existing memory */
202 if(*pMemory==NULL) {
203 /* we need to allocate memory */
204 if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) {
205 *pSize=sizeNeeded;
206 return TRUE;
207 } else {
208 return FALSE;
209 }
210 } else {
46f4442e
A
211 if(sizeNeeded<=*pSize) {
212 /* there is already enough memory */
213 return TRUE;
214 }
215 else if(!mayAllocate) {
b75a7d8f
A
216 /* not enough memory, and we must not allocate */
217 return FALSE;
46f4442e
A
218 } else {
219 /* we try to grow */
b75a7d8f 220 void *memory;
46f4442e
A
221 /* in most cases, we do not need the copy-old-data part of
222 * realloc, but it is needed when adding runs using getRunsMemory()
223 * in setParaRunsOnly()
224 */
b75a7d8f
A
225 if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) {
226 *pMemory=memory;
227 *pSize=sizeNeeded;
228 return TRUE;
229 } else {
230 /* we failed to grow */
231 return FALSE;
232 }
b75a7d8f
A
233 }
234 }
235}
236
237U_CAPI void U_EXPORT2
238ubidi_close(UBiDi *pBiDi) {
239 if(pBiDi!=NULL) {
73c04bcf 240 pBiDi->pParaBiDi=NULL; /* in case one tries to reuse this block */
2ca993e8
A
241 if(pBiDi->dirInsertMemory!=NULL) {
242 uprv_free(pBiDi->dirInsertMemory);
243 }
b75a7d8f
A
244 if(pBiDi->dirPropsMemory!=NULL) {
245 uprv_free(pBiDi->dirPropsMemory);
246 }
247 if(pBiDi->levelsMemory!=NULL) {
248 uprv_free(pBiDi->levelsMemory);
249 }
57a6839d
A
250 if(pBiDi->openingsMemory!=NULL) {
251 uprv_free(pBiDi->openingsMemory);
b75a7d8f 252 }
73c04bcf
A
253 if(pBiDi->parasMemory!=NULL) {
254 uprv_free(pBiDi->parasMemory);
255 }
57a6839d
A
256 if(pBiDi->runsMemory!=NULL) {
257 uprv_free(pBiDi->runsMemory);
258 }
259 if(pBiDi->isolatesMemory!=NULL) {
260 uprv_free(pBiDi->isolatesMemory);
261 }
73c04bcf
A
262 if(pBiDi->insertPoints.points!=NULL) {
263 uprv_free(pBiDi->insertPoints.points);
264 }
265
b75a7d8f
A
266 uprv_free(pBiDi);
267 }
268}
269
270/* set to approximate "inverse BiDi" ---------------------------------------- */
271
272U_CAPI void U_EXPORT2
273ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) {
274 if(pBiDi!=NULL) {
275 pBiDi->isInverse=isInverse;
73c04bcf
A
276 pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L
277 : UBIDI_REORDER_DEFAULT;
b75a7d8f
A
278 }
279}
280
281U_CAPI UBool U_EXPORT2
282ubidi_isInverse(UBiDi *pBiDi) {
283 if(pBiDi!=NULL) {
284 return pBiDi->isInverse;
285 } else {
286 return FALSE;
287 }
288}
289
73c04bcf
A
290/* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
291 * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre
292 * concept of RUNS_ONLY which is a double operation.
293 * It could be advantageous to divide this into 3 concepts:
294 * a) Operation: direct / inverse / RUNS_ONLY
46f4442e 295 * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R
73c04bcf
A
296 * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
297 * This would allow combinations not possible today like RUNS_ONLY with
298 * NUMBERS_SPECIAL.
299 * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
300 * REMOVE_CONTROLS for the inverse step.
301 * Not all combinations would be supported, and probably not all do make sense.
302 * This would need to document which ones are supported and what are the
303 * fallbacks for unsupported combinations.
304 */
305U_CAPI void U_EXPORT2
306ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) {
46f4442e 307 if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT)
73c04bcf
A
308 && (reorderingMode < UBIDI_REORDER_COUNT)) {
309 pBiDi->reorderingMode = reorderingMode;
46f4442e 310 pBiDi->isInverse = (UBool)(reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L);
73c04bcf
A
311 }
312}
313
314U_CAPI UBiDiReorderingMode U_EXPORT2
315ubidi_getReorderingMode(UBiDi *pBiDi) {
46f4442e 316 if (pBiDi!=NULL) {
73c04bcf
A
317 return pBiDi->reorderingMode;
318 } else {
319 return UBIDI_REORDER_DEFAULT;
320 }
321}
322
323U_CAPI void U_EXPORT2
324ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) {
325 if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
326 reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
327 }
46f4442e
A
328 if (pBiDi!=NULL) {
329 pBiDi->reorderingOptions=reorderingOptions;
73c04bcf
A
330 }
331}
332
333U_CAPI uint32_t U_EXPORT2
334ubidi_getReorderingOptions(UBiDi *pBiDi) {
46f4442e 335 if (pBiDi!=NULL) {
73c04bcf
A
336 return pBiDi->reorderingOptions;
337 } else {
338 return 0;
339 }
340}
341
729e4ab9
A
342U_CAPI UBiDiDirection U_EXPORT2
343ubidi_getBaseDirection(const UChar *text,
344int32_t length){
345
346 int32_t i;
347 UChar32 uchar;
348 UCharDirection dir;
4388f060 349
729e4ab9
A
350 if( text==NULL || length<-1 ){
351 return UBIDI_NEUTRAL;
352 }
353
354 if(length==-1) {
355 length=u_strlen(text);
356 }
357
358 for( i = 0 ; i < length; ) {
359 /* i is incremented by U16_NEXT */
360 U16_NEXT(text, i, length, uchar);
361 dir = u_charDirection(uchar);
362 if( dir == U_LEFT_TO_RIGHT )
363 return UBIDI_LTR;
364 if( dir == U_RIGHT_TO_LEFT || dir ==U_RIGHT_TO_LEFT_ARABIC )
365 return UBIDI_RTL;
366 }
367 return UBIDI_NEUTRAL;
368}
369
b75a7d8f
A
370/* perform (P2)..(P3) ------------------------------------------------------- */
371
57a6839d
A
372/**
373 * Returns the directionality of the first strong character
374 * after the last B in prologue, if any.
375 * Requires prologue!=null.
376 */
4388f060
A
377static DirProp
378firstL_R_AL(UBiDi *pBiDi) {
4388f060
A
379 const UChar *text=pBiDi->prologue;
380 int32_t length=pBiDi->proLength;
381 int32_t i;
382 UChar32 uchar;
383 DirProp dirProp, result=ON;
384 for(i=0; i<length; ) {
385 /* i is incremented by U16_NEXT */
386 U16_NEXT(text, i, length, uchar);
387 dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
388 if(result==ON) {
389 if(dirProp==L || dirProp==R || dirProp==AL) {
390 result=dirProp;
391 }
392 } else {
393 if(dirProp==B) {
394 result=ON;
395 }
396 }
397 }
398 return result;
399}
400
b75a7d8f 401/*
57a6839d 402 * Check that there are enough entries in the array pointed to by pBiDi->paras
b75a7d8f 403 */
57a6839d
A
404static UBool
405checkParaCount(UBiDi *pBiDi) {
406 int32_t count=pBiDi->paraCount;
407 if(pBiDi->paras==pBiDi->simpleParas) {
b331163b 408 if(count<=SIMPLE_PARAS_COUNT)
57a6839d 409 return TRUE;
b331163b 410 if(!getInitialParasMemory(pBiDi, SIMPLE_PARAS_COUNT * 2))
57a6839d
A
411 return FALSE;
412 pBiDi->paras=pBiDi->parasMemory;
b331163b 413 uprv_memcpy(pBiDi->parasMemory, pBiDi->simpleParas, SIMPLE_PARAS_COUNT * sizeof(Para));
57a6839d
A
414 return TRUE;
415 }
416 if(!getInitialParasMemory(pBiDi, count * 2))
417 return FALSE;
418 pBiDi->paras=pBiDi->parasMemory;
419 return TRUE;
420}
421
2ca993e8
A
422/*
423 * Get the directional properties for the inserted bidi controls.
424 */
425
426/* subset of bidi properties, fit in 4 bits */
427enum { /* correspondence to standard class */
428 Insert_none = 0, /* 0 all others */
429 Insert_L, /* 1 L = U_LEFT_TO_RIGHT */
430 Insert_R, /* 2 R = U_RIGHT_TO_LEFT */
431 Insert_AL, /* 3 AL = U_RIGHT_TO_LEFT_ARABIC */
432 Insert_LRE, /* 4 LRE = U_LEFT_TO_RIGHT_EMBEDDING */
433 Insert_LRO, /* 5 LRO = U_LEFT_TO_RIGHT_OVERRIDE */
434 Insert_RLE, /* 6 RLE = U_RIGHT_TO_LEFT_EMBEDDING */
435 Insert_RLO, /* 7 RLO = U_RIGHT_TO_LEFT_OVERRIDE */
436 Insert_PDF, /* 8 PDF = U_POP_DIRECTIONAL_FORMAT */
437 Insert_FSI, /* 9 FSI = U_FIRST_STRONG_ISOLATE */
438 Insert_LRI, /* 10 LRI = U_LEFT_TO_RIGHT_ISOLATE */
439 Insert_RLI, /* 11 RLI = U_RIGHT_TO_LEFT_ISOLATE */
440 Insert_PDI, /* 12 PDI = U_POP_DIRECTIONAL_ISOLATE */
441 Insert_B, /* 13 B = U_BLOCK_SEPARATOR */
442 Insert_S, /* 14 S = U_SEGMENT_SEPARATOR */
443 Insert_WS, /* 15 WS = U_WHITE_SPACE_NEUTRAL */
444 Insert_count /* 16 */
445};
446
447/* map standard dir class to special 4-bit insert value (Insert_none as default) */
448static const uint16_t insertDirFromStdDir[dirPropCount] = {
449 Insert_none, /* L= U_LEFT_TO_RIGHT */
450 Insert_none, /* R= U_RIGHT_TO_LEFT, */
451 Insert_none, /* EN= U_EUROPEAN_NUMBER */
452 Insert_none, /* ES= U_EUROPEAN_NUMBER_SEPARATOR */
453 Insert_none, /* ET= U_EUROPEAN_NUMBER_TERMINATOR */
454 Insert_none, /* AN= U_ARABIC_NUMBER */
455 Insert_none, /* CS= U_COMMON_NUMBER_SEPARATOR */
456 Insert_none, /* B= U_BLOCK_SEPARATOR */
457 Insert_none, /* S= U_SEGMENT_SEPARATOR */
458 Insert_none, /* WS= U_WHITE_SPACE_NEUTRAL */
459 Insert_none, /* ON= U_OTHER_NEUTRAL */
460 Insert_LRE, /* LRE=U_LEFT_TO_RIGHT_EMBEDDING */
461 Insert_LRO, /* LRO=U_LEFT_TO_RIGHT_OVERRIDE */
462 Insert_none, /* AL= U_RIGHT_TO_LEFT_ARABIC */
463 Insert_RLE, /* RLE=U_RIGHT_TO_LEFT_EMBEDDING */
464 Insert_RLO, /* RLO=U_RIGHT_TO_LEFT_OVERRIDE */
465 Insert_PDF, /* PDF=U_POP_DIRECTIONAL_FORMAT */
466 Insert_none, /* NSM=U_DIR_NON_SPACING_MARK */
467 Insert_none, /* BN= U_BOUNDARY_NEUTRAL */
468 Insert_FSI, /* FSI=U_FIRST_STRONG_ISOLATE */
469 Insert_LRI, /* LRI=U_LEFT_TO_RIGHT_ISOLATE */
470 Insert_RLI, /* RLI=U_RIGHT_TO_LEFT_ISOLATE */
471 Insert_PDI, /* PDI=U_POP_DIRECTIONAL_ISOLATE */
472 Insert_none, /* ENL */
473 Insert_none, /* ENR */
474};
475
476/* map special 4-bit insert direction class to standard dir class (ON as default) */
477static const uint8_t stdDirFromInsertDir[Insert_count] = {
478 ON, /* Insert_none > ON */
479 L, /* Insert_L */
480 R, /* Insert_R */
481 AL, /* Insert_AL */
482 LRE, /* Insert_LRE */
483 LRO, /* Insert_LRO */
484 RLE, /* Insert_RLE */
485 RLO, /* Insert_RLO */
486 PDF, /* Insert_PDF */
487 FSI, /* Insert_FSI */
488 LRI, /* Insert_LRI */
489 RLI, /* Insert_RLI */
490 PDI, /* Insert_PDI */
491 B, /* Insert_B */
492 S, /* Insert_S */
493 WS, /* Insert_WS */
494};
495
496enum { kMaxControlStringLen = 4 };
497
498static UBool
499getDirInsert(UBiDi *pBiDi,
500 const int32_t *offsets, int32_t offsetCount,
501 const int32_t *controlStringIndices,
502 const UChar * const * controlStrings) {
503 int32_t offset, offsetsIndex;
504 uint16_t *dirInsert = pBiDi->dirInsert;
505 /* initialize dirInsert */
506 for (offset = 0; offset < pBiDi->length; offset++) {
507 dirInsert[offset] = 0;
508 }
509 for (offsetsIndex = 0; offsetsIndex < offsetCount; offsetsIndex++) {
510 const UChar * controlString;
511 UChar uchar;
512 int32_t controlStringIndex, dirInsertIndex = 0;
513 uint16_t dirInsertValue = 0;
514 offset = offsets[offsetsIndex];
515 if (offset < 0 || offset >= pBiDi->length) {
516 return FALSE; /* param err in offsets array */
517 }
518 controlStringIndex = (controlStringIndices == NULL)? offsetsIndex: controlStringIndices[offsetsIndex];
519 controlString = controlStrings[controlStringIndex];
520 if (controlString == NULL) {
521 return FALSE; /* param err in controlStrings array */
522 }
523 while ((uchar = *controlString++) != 0) {
524 uint16_t insertValue = (U16_IS_SURROGATE(uchar))? Insert_none:
525 insertDirFromStdDir[(uint32_t)ubidi_getCustomizedClass(pBiDi, uchar)];
526 if (dirInsertIndex >= kMaxControlStringLen || insertValue == Insert_none) {
527 return FALSE; /* param err in controlStrings array */
528 }
529 dirInsertValue |= (insertValue << (4 * dirInsertIndex++));
530 }
531 dirInsert[offset] = dirInsertValue;
532 }
533 return TRUE;
534}
535
57a6839d
A
536/*
537 * Get the directional properties for the text, calculate the flags bit-set, and
538 * determine the paragraph level if necessary (in pBiDi->paras[i].level).
539 * FSI initiators are also resolved and their dirProp replaced with LRI or RLI.
540 * When encountering an FSI, it is initially replaced with an LRI, which is the
541 * default. Only if a strong R or AL is found within its scope will the LRI be
542 * replaced by an RLI.
543 */
544static UBool
73c04bcf
A
545getDirProps(UBiDi *pBiDi) {
546 const UChar *text=pBiDi->text;
b75a7d8f 547 DirProp *dirProps=pBiDi->dirPropsMemory; /* pBiDi->dirProps is const */
2ca993e8 548 uint16_t *dirInsert = pBiDi->dirInsert; /* may be NULL */
b75a7d8f 549
57a6839d 550 int32_t i=0, originalLength=pBiDi->originalLength;
b75a7d8f
A
551 Flags flags=0; /* collect all directionalities in the text */
552 UChar32 uchar;
57a6839d 553 DirProp dirProp=0, defaultParaLevel=0; /* initialize to avoid compiler warnings */
2ca993e8
A
554 int32_t dirInsertValue;
555 int8_t dirInsertIndex; /* position within dirInsertValue, if any */
73c04bcf
A
556 UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel);
557 /* for inverse BiDi, the default para level is set to RTL if there is a
57a6839d 558 strong R or AL character at either end of the text */
46f4442e 559 UBool isDefaultLevelInverse=isDefaultLevel && (UBool)
73c04bcf
A
560 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
561 pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL);
562 int32_t lastArabicPos=-1;
563 int32_t controlCount=0;
46f4442e
A
564 UBool removeBiDiControls = (UBool)(pBiDi->reorderingOptions &
565 UBIDI_OPTION_REMOVE_CONTROLS);
73c04bcf 566
f3c0d7a5 567 enum State {
57a6839d
A
568 NOT_SEEKING_STRONG, /* 0: not contextual paraLevel, not after FSI */
569 SEEKING_STRONG_FOR_PARA, /* 1: looking for first strong char in para */
570 SEEKING_STRONG_FOR_FSI, /* 2: looking for first strong after FSI */
571 LOOKING_FOR_PDI /* 3: found strong after FSI, looking for PDI */
f3c0d7a5 572 };
73c04bcf 573 State state;
57a6839d
A
574 DirProp lastStrong=ON; /* for default level & inverse BiDi */
575 /* The following stacks are used to manage isolate sequences. Those
576 sequences may be nested, but obviously never more deeply than the
577 maximum explicit embedding level.
578 lastStack is the index of the last used entry in the stack. A value of -1
579 means that there is no open isolate sequence.
580 lastStack is reset to -1 on paragraph boundaries. */
581 /* The following stack contains the position of the initiator of
582 each open isolate sequence */
583 int32_t isolateStartStack[UBIDI_MAX_EXPLICIT_LEVEL+1];
2ca993e8 584 int8_t isolateStartInsertIndex[UBIDI_MAX_EXPLICIT_LEVEL+1];
57a6839d
A
585 /* The following stack contains the last known state before
586 encountering the initiator of an isolate sequence */
f3c0d7a5 587 State previousStateStack[UBIDI_MAX_EXPLICIT_LEVEL+1];
57a6839d
A
588 int32_t stackLast=-1;
589
590 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING)
73c04bcf 591 pBiDi->length=0;
57a6839d 592 defaultParaLevel=pBiDi->paraLevel&1;
73c04bcf 593 if(isDefaultLevel) {
57a6839d
A
594 pBiDi->paras[0].level=defaultParaLevel;
595 lastStrong=defaultParaLevel;
596 if(pBiDi->proLength>0 && /* there is a prologue */
597 (dirProp=firstL_R_AL(pBiDi))!=ON) { /* with a strong character */
598 if(dirProp==L)
599 pBiDi->paras[0].level=0; /* set the default para level */
600 else
601 pBiDi->paras[0].level=1; /* set the default para level */
602 state=NOT_SEEKING_STRONG;
4388f060 603 } else {
57a6839d 604 state=SEEKING_STRONG_FOR_PARA;
4388f060 605 }
374ca955 606 } else {
57a6839d
A
607 pBiDi->paras[0].level=pBiDi->paraLevel;
608 state=NOT_SEEKING_STRONG;
b75a7d8f 609 }
73c04bcf
A
610 /* count paragraphs and determine the paragraph level (P2..P3) */
611 /*
612 * see comment in ubidi.h:
57a6839d 613 * the UBIDI_DEFAULT_XXX values are designed so that
73c04bcf
A
614 * their bit 0 alone yields the intended default
615 */
2ca993e8
A
616 dirInsertValue = 0;
617 dirInsertIndex = -1; /* indicate that we have not checked dirInsert yet */
57a6839d 618 for( /* i=0 above */ ; i<originalLength; ) {
2ca993e8
A
619 if (dirInsert != NULL && dirInsertIndex < 0) {
620 dirInsertValue = dirInsert[i];
621 }
622 if (dirInsertValue > 0) {
623 dirInsertIndex++;
624 dirProp = (DirProp)stdDirFromInsertDir[dirInsertValue & 0x000F];
625 dirInsertValue >>= 4;
626 flags|=DIRPROP_FLAG(dirProp);
627 uchar = 0;
628 } else {
629 dirInsertIndex = -1;
630 /* i is incremented by U16_NEXT */
631 U16_NEXT(text, i, originalLength, uchar);
632 flags|=DIRPROP_FLAG(dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar));
633 dirProps[i-1]=dirProp;
634 if(uchar>0xffff) { /* set the lead surrogate's property to BN */
635 flags|=DIRPROP_FLAG(BN);
636 dirProps[i-2]=BN;
637 }
57a6839d
A
638 }
639 if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar))
640 controlCount++;
641 if(dirProp==L) {
642 if(state==SEEKING_STRONG_FOR_PARA) {
643 pBiDi->paras[pBiDi->paraCount-1].level=0;
644 state=NOT_SEEKING_STRONG;
73c04bcf 645 }
57a6839d
A
646 else if(state==SEEKING_STRONG_FOR_FSI) {
647 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
648 /* no need for next statement, already set by default */
649 /* dirProps[isolateStartStack[stackLast]]=LRI; */
650 flags|=DIRPROP_FLAG(LRI);
73c04bcf 651 }
57a6839d 652 state=LOOKING_FOR_PDI;
73c04bcf 653 }
57a6839d
A
654 lastStrong=L;
655 continue;
73c04bcf 656 }
57a6839d
A
657 if(dirProp==R || dirProp==AL) {
658 if(state==SEEKING_STRONG_FOR_PARA) {
659 pBiDi->paras[pBiDi->paraCount-1].level=1;
660 state=NOT_SEEKING_STRONG;
661 }
662 else if(state==SEEKING_STRONG_FOR_FSI) {
663 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
2ca993e8
A
664 if (isolateStartInsertIndex[stackLast] < 0) {
665 dirProps[isolateStartStack[stackLast]]=RLI;
666 } else {
667 dirInsert[stackLast] &= ~(0x000F << (4*isolateStartInsertIndex[stackLast]));
668 dirInsert[stackLast] |= (Insert_RLI << (4*isolateStartInsertIndex[stackLast]));
669 }
57a6839d
A
670 flags|=DIRPROP_FLAG(RLI);
671 }
672 state=LOOKING_FOR_PDI;
673 }
674 lastStrong=R;
675 if(dirProp==AL)
676 lastArabicPos=i-1;
677 continue;
73c04bcf 678 }
57a6839d
A
679 if(dirProp>=FSI && dirProp<=RLI) { /* FSI, LRI or RLI */
680 stackLast++;
681 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
2ca993e8
A
682 isolateStartStack[stackLast]= (dirInsertIndex < 0)? i-1: i /* we have not incremented with U16_NEXT yet */;
683 isolateStartInsertIndex[stackLast] = dirInsertIndex;
57a6839d
A
684 previousStateStack[stackLast]=state;
685 }
686 if(dirProp==FSI) {
2ca993e8
A
687 if (dirInsertIndex < 0) {
688 dirProps[i-1]=LRI; /* default if no strong char */
689 } else {
690 dirInsert[i] &= ~(0x000F << (4*dirInsertIndex));
691 dirInsert[i] |= (Insert_LRI << (4*dirInsertIndex));
692 }
57a6839d
A
693 state=SEEKING_STRONG_FOR_FSI;
694 }
695 else
696 state=LOOKING_FOR_PDI;
697 continue;
73c04bcf 698 }
57a6839d
A
699 if(dirProp==PDI) {
700 if(state==SEEKING_STRONG_FOR_FSI) {
701 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
702 /* no need for next statement, already set by default */
703 /* dirProps[isolateStartStack[stackLast]]=LRI; */
704 flags|=DIRPROP_FLAG(LRI);
705 }
706 }
707 if(stackLast>=0) {
708 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL)
709 state=previousStateStack[stackLast];
710 stackLast--;
711 }
712 continue;
73c04bcf 713 }
57a6839d
A
714 if(dirProp==B) {
715 if(i<originalLength && uchar==CR && text[i]==LF) /* do nothing on the CR */
716 continue;
717 pBiDi->paras[pBiDi->paraCount-1].limit=i;
718 if(isDefaultLevelInverse && lastStrong==R)
719 pBiDi->paras[pBiDi->paraCount-1].level=1;
73c04bcf 720 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
57a6839d
A
721 /* When streaming, we only process whole paragraphs
722 thus some updates are only done on paragraph boundaries */
73c04bcf 723 pBiDi->length=i; /* i is index to next character */
57a6839d 724 pBiDi->controlCount=controlCount;
73c04bcf 725 }
57a6839d
A
726 if(i<originalLength) { /* B not last char in text */
727 pBiDi->paraCount++;
728 if(checkParaCount(pBiDi)==FALSE) /* not enough memory for a new para entry */
729 return FALSE;
73c04bcf 730 if(isDefaultLevel) {
57a6839d
A
731 pBiDi->paras[pBiDi->paraCount-1].level=defaultParaLevel;
732 state=SEEKING_STRONG_FOR_PARA;
733 lastStrong=defaultParaLevel;
734 } else {
735 pBiDi->paras[pBiDi->paraCount-1].level=pBiDi->paraLevel;
736 state=NOT_SEEKING_STRONG;
73c04bcf 737 }
57a6839d 738 stackLast=-1;
73c04bcf 739 }
57a6839d 740 continue;
73c04bcf 741 }
b75a7d8f 742 }
57a6839d
A
743 /* Ignore still open isolate sequences with overflow */
744 if(stackLast>UBIDI_MAX_EXPLICIT_LEVEL) {
745 stackLast=UBIDI_MAX_EXPLICIT_LEVEL;
746 state=SEEKING_STRONG_FOR_FSI; /* to be on the safe side */
747 }
748 /* Resolve direction of still unresolved open FSI sequences */
749 while(stackLast>=0) {
750 if(state==SEEKING_STRONG_FOR_FSI) {
751 /* no need for next statement, already set by default */
752 /* dirProps[isolateStartStack[stackLast]]=LRI; */
753 flags|=DIRPROP_FLAG(LRI);
754 break;
73c04bcf 755 }
57a6839d
A
756 state=previousStateStack[stackLast];
757 stackLast--;
73c04bcf 758 }
57a6839d 759 /* When streaming, ignore text after the last paragraph separator */
73c04bcf 760 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
57a6839d 761 if(pBiDi->length<originalLength)
73c04bcf 762 pBiDi->paraCount--;
57a6839d
A
763 } else {
764 pBiDi->paras[pBiDi->paraCount-1].limit=originalLength;
765 pBiDi->controlCount=controlCount;
766 }
767 /* For inverse bidi, default para direction is RTL if there is
768 a strong R or AL at either end of the paragraph */
769 if(isDefaultLevelInverse && lastStrong==R) {
770 pBiDi->paras[pBiDi->paraCount-1].level=1;
73c04bcf 771 }
57a6839d
A
772 if(isDefaultLevel) {
773 pBiDi->paraLevel=pBiDi->paras[0].level;
774 }
775 /* The following is needed to resolve the text direction for default level
776 paragraphs containing no strong character */
777 for(i=0; i<pBiDi->paraCount; i++)
778 flags|=DIRPROP_FLAG_LR(pBiDi->paras[i].level);
73c04bcf
A
779
780 if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
781 flags|=DIRPROP_FLAG(L);
b75a7d8f 782 }
b75a7d8f 783 pBiDi->flags=flags;
73c04bcf 784 pBiDi->lastArabicPos=lastArabicPos;
57a6839d
A
785 return TRUE;
786}
787
788/* determine the paragraph level at position index */
789U_CFUNC UBiDiLevel
790ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t pindex) {
791 int32_t i;
792 for(i=0; i<pBiDi->paraCount; i++)
793 if(pindex<pBiDi->paras[i].limit)
794 break;
795 if(i>=pBiDi->paraCount)
796 i=pBiDi->paraCount-1;
797 return (UBiDiLevel)(pBiDi->paras[i].level);
798}
799
800/* Functions for handling paired brackets ----------------------------------- */
801
802/* In the isoRuns array, the first entry is used for text outside of any
803 isolate sequence. Higher entries are used for each more deeply nested
804 isolate sequence. isoRunLast is the index of the last used entry. The
805 openings array is used to note the data of opening brackets not yet
806 matched by a closing bracket, or matched but still susceptible to change
807 level.
808 Each isoRun entry contains the index of the first and
809 one-after-last openings entries for pending opening brackets it
810 contains. The next openings entry to use is the one-after-last of the
811 most deeply nested isoRun entry.
812 isoRun entries also contain their current embedding level and the last
813 encountered strong character, since these will be needed to resolve
814 the level of paired brackets. */
815
816static void
817bracketInit(UBiDi *pBiDi, BracketData *bd) {
818 bd->pBiDi=pBiDi;
819 bd->isoRunLast=0;
820 bd->isoRuns[0].start=0;
821 bd->isoRuns[0].limit=0;
822 bd->isoRuns[0].level=GET_PARALEVEL(pBiDi, 0);
f3c0d7a5
A
823 UBiDiLevel t = GET_PARALEVEL(pBiDi, 0) & 1;
824 bd->isoRuns[0].lastStrong = bd->isoRuns[0].lastBase = t;
825 bd->isoRuns[0].contextDir = (UBiDiDirection)t;
57a6839d
A
826 bd->isoRuns[0].contextPos=0;
827 if(pBiDi->openingsMemory) {
828 bd->openings=pBiDi->openingsMemory;
b331163b 829 bd->openingsCount=pBiDi->openingsSize / sizeof(Opening);
57a6839d
A
830 } else {
831 bd->openings=bd->simpleOpenings;
b331163b 832 bd->openingsCount=SIMPLE_OPENINGS_COUNT;
57a6839d
A
833 }
834 bd->isNumbersSpecial=bd->pBiDi->reorderingMode==UBIDI_REORDER_NUMBERS_SPECIAL ||
835 bd->pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL;
836}
837
838/* paragraph boundary */
839static void
840bracketProcessB(BracketData *bd, UBiDiLevel level) {
841 bd->isoRunLast=0;
842 bd->isoRuns[0].limit=0;
843 bd->isoRuns[0].level=level;
f3c0d7a5
A
844 bd->isoRuns[0].lastStrong=bd->isoRuns[0].lastBase=level&1;
845 bd->isoRuns[0].contextDir=(UBiDiDirection)(level&1);
57a6839d
A
846 bd->isoRuns[0].contextPos=0;
847}
848
849/* LRE, LRO, RLE, RLO, PDF */
850static void
2ca993e8 851bracketProcessBoundary(BracketData *bd, int32_t lastCcPos, DirProp lastCcDirProp,
57a6839d
A
852 UBiDiLevel contextLevel, UBiDiLevel embeddingLevel) {
853 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
2ca993e8 854 if(DIRPROP_FLAG(lastCcDirProp)&MASK_ISO) /* after an isolate */
57a6839d
A
855 return;
856 if(NO_OVERRIDE(embeddingLevel)>NO_OVERRIDE(contextLevel)) /* not a PDF */
857 contextLevel=embeddingLevel;
858 pLastIsoRun->limit=pLastIsoRun->start;
859 pLastIsoRun->level=embeddingLevel;
f3c0d7a5
A
860 pLastIsoRun->lastStrong=pLastIsoRun->lastBase=contextLevel&1;
861 pLastIsoRun->contextDir=(UBiDiDirection)(contextLevel&1);
862 pLastIsoRun->contextPos=(UBiDiDirection)lastCcPos;
57a6839d
A
863}
864
865/* LRI or RLI */
866static void
867bracketProcessLRI_RLI(BracketData *bd, UBiDiLevel level) {
868 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
869 int16_t lastLimit;
870 pLastIsoRun->lastBase=ON;
871 lastLimit=pLastIsoRun->limit;
872 bd->isoRunLast++;
873 pLastIsoRun++;
874 pLastIsoRun->start=pLastIsoRun->limit=lastLimit;
875 pLastIsoRun->level=level;
f3c0d7a5
A
876 pLastIsoRun->lastStrong=pLastIsoRun->lastBase=level&1;
877 pLastIsoRun->contextDir=(UBiDiDirection)(level&1);
57a6839d
A
878 pLastIsoRun->contextPos=0;
879}
880
881/* PDI */
882static void
883bracketProcessPDI(BracketData *bd) {
884 IsoRun *pLastIsoRun;
885 bd->isoRunLast--;
886 pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
887 pLastIsoRun->lastBase=ON;
888}
889
890/* newly found opening bracket: create an openings entry */
891static UBool /* return TRUE if success */
892bracketAddOpening(BracketData *bd, UChar match, int32_t position) {
893 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
894 Opening *pOpening;
b331163b 895 if(pLastIsoRun->limit>=bd->openingsCount) { /* no available new entry */
57a6839d
A
896 UBiDi *pBiDi=bd->pBiDi;
897 if(!getInitialOpeningsMemory(pBiDi, pLastIsoRun->limit * 2))
898 return FALSE;
899 if(bd->openings==bd->simpleOpenings)
900 uprv_memcpy(pBiDi->openingsMemory, bd->simpleOpenings,
b331163b 901 SIMPLE_OPENINGS_COUNT * sizeof(Opening));
57a6839d 902 bd->openings=pBiDi->openingsMemory; /* may have changed */
b331163b 903 bd->openingsCount=pBiDi->openingsSize / sizeof(Opening);
57a6839d
A
904 }
905 pOpening=&bd->openings[pLastIsoRun->limit];
906 pOpening->position=position;
907 pOpening->match=match;
908 pOpening->contextDir=pLastIsoRun->contextDir;
909 pOpening->contextPos=pLastIsoRun->contextPos;
910 pOpening->flags=0;
911 pLastIsoRun->limit++;
912 return TRUE;
913}
914
915/* change N0c1 to N0c2 when a preceding bracket is assigned the embedding level */
916static void
917fixN0c(BracketData *bd, int32_t openingIndex, int32_t newPropPosition, DirProp newProp) {
918 /* This function calls itself recursively */
919 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
920 Opening *qOpening;
921 DirProp *dirProps=bd->pBiDi->dirProps;
922 int32_t k, openingPosition, closingPosition;
923 for(k=openingIndex+1, qOpening=&bd->openings[k]; k<pLastIsoRun->limit; k++, qOpening++) {
924 if(qOpening->match>=0) /* not an N0c match */
925 continue;
926 if(newPropPosition<qOpening->contextPos)
927 break;
928 if(newPropPosition>=qOpening->position)
929 continue;
930 if(newProp==qOpening->contextDir)
931 break;
932 openingPosition=qOpening->position;
933 dirProps[openingPosition]=newProp;
934 closingPosition=-(qOpening->match);
935 dirProps[closingPosition]=newProp;
936 qOpening->match=0; /* prevent further changes */
937 fixN0c(bd, k, openingPosition, newProp);
938 fixN0c(bd, k, closingPosition, newProp);
939 }
940}
941
942/* process closing bracket */
943static DirProp /* return L or R if N0b or N0c, ON if N0d */
944bracketProcessClosing(BracketData *bd, int32_t openIdx, int32_t position) {
945 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
946 Opening *pOpening, *qOpening;
947 UBiDiDirection direction;
948 UBool stable;
949 DirProp newProp;
950 pOpening=&bd->openings[openIdx];
f3c0d7a5 951 direction=(UBiDiDirection)(pLastIsoRun->level&1);
57a6839d
A
952 stable=TRUE; /* assume stable until proved otherwise */
953
954 /* The stable flag is set when brackets are paired and their
955 level is resolved and cannot be changed by what will be
956 found later in the source string.
957 An unstable match can occur only when applying N0c, where
958 the resolved level depends on the preceding context, and
959 this context may be affected by text occurring later.
960 Example: RTL paragraph containing: abc[(latin) HEBREW]
961 When the closing parenthesis is encountered, it appears
962 that N0c1 must be applied since 'abc' sets an opposite
963 direction context and both parentheses receive level 2.
964 However, when the closing square bracket is processed,
965 N0b applies because of 'HEBREW' being included within the
966 brackets, thus the square brackets are treated like R and
967 receive level 1. However, this changes the preceding
968 context of the opening parenthesis, and it now appears
969 that N0c2 must be applied to the parentheses rather than
970 N0c1. */
971
972 if((direction==0 && pOpening->flags&FOUND_L) ||
973 (direction==1 && pOpening->flags&FOUND_R)) { /* N0b */
974 newProp=direction;
975 }
976 else if(pOpening->flags&(FOUND_L|FOUND_R)) { /* N0c */
977 /* it is stable if there is no containing pair or in
978 conditions too complicated and not worth checking */
979 stable=(openIdx==pLastIsoRun->start);
980 if(direction!=pOpening->contextDir)
981 newProp=pOpening->contextDir; /* N0c1 */
982 else
983 newProp=direction; /* N0c2 */
984 } else {
985 /* forget this and any brackets nested within this pair */
986 pLastIsoRun->limit=openIdx;
987 return ON; /* N0d */
988 }
989 bd->pBiDi->dirProps[pOpening->position]=newProp;
990 bd->pBiDi->dirProps[position]=newProp;
991 /* Update nested N0c pairs that may be affected */
992 fixN0c(bd, openIdx, pOpening->position, newProp);
993 if(stable) {
994 pLastIsoRun->limit=openIdx; /* forget any brackets nested within this pair */
995 /* remove lower located synonyms if any */
996 while(pLastIsoRun->limit>pLastIsoRun->start &&
997 bd->openings[pLastIsoRun->limit-1].position==pOpening->position)
998 pLastIsoRun->limit--;
999 } else {
1000 int32_t k;
1001 pOpening->match=-position;
1002 /* neutralize lower located synonyms if any */
1003 k=openIdx-1;
1004 while(k>=pLastIsoRun->start &&
1005 bd->openings[k].position==pOpening->position)
1006 bd->openings[k--].match=0;
1007 /* neutralize any unmatched opening between the current pair;
1008 this will also neutralize higher located synonyms if any */
1009 for(k=openIdx+1; k<pLastIsoRun->limit; k++) {
1010 qOpening=&bd->openings[k];
1011 if(qOpening->position>=position)
1012 break;
1013 if(qOpening->match>0)
1014 qOpening->match=0;
1015 }
1016 }
1017 return newProp;
1018}
1019
1020/* handle strong characters, digits and candidates for closing brackets */
1021static UBool /* return TRUE if success */
1022bracketProcessChar(BracketData *bd, int32_t position) {
1023 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
1024 DirProp *dirProps, dirProp, newProp;
1025 UBiDiLevel level;
1026 dirProps=bd->pBiDi->dirProps;
1027 dirProp=dirProps[position];
1028 if(dirProp==ON) {
1029 UChar c, match;
1030 int32_t idx;
1031 /* First see if it is a matching closing bracket. Hopefully, this is
1032 more efficient than checking if it is a closing bracket at all */
1033 c=bd->pBiDi->text[position];
1034 for(idx=pLastIsoRun->limit-1; idx>=pLastIsoRun->start; idx--) {
1035 if(bd->openings[idx].match!=c)
1036 continue;
1037 /* We have a match */
1038 newProp=bracketProcessClosing(bd, idx, position);
1039 if(newProp==ON) { /* N0d */
1040 c=0; /* prevent handling as an opening */
1041 break;
1042 }
1043 pLastIsoRun->lastBase=ON;
f3c0d7a5 1044 pLastIsoRun->contextDir=(UBiDiDirection)newProp;
57a6839d
A
1045 pLastIsoRun->contextPos=position;
1046 level=bd->pBiDi->levels[position];
1047 if(level&UBIDI_LEVEL_OVERRIDE) { /* X4, X5 */
1048 uint16_t flag;
1049 int32_t i;
1050 newProp=level&1;
1051 pLastIsoRun->lastStrong=newProp;
1052 flag=DIRPROP_FLAG(newProp);
1053 for(i=pLastIsoRun->start; i<idx; i++)
1054 bd->openings[i].flags|=flag;
1055 /* matching brackets are not overridden by LRO/RLO */
1056 bd->pBiDi->levels[position]&=~UBIDI_LEVEL_OVERRIDE;
1057 }
1058 /* matching brackets are not overridden by LRO/RLO */
1059 bd->pBiDi->levels[bd->openings[idx].position]&=~UBIDI_LEVEL_OVERRIDE;
1060 return TRUE;
1061 }
1062 /* We get here only if the ON character is not a matching closing
1063 bracket or it is a case of N0d */
1064 /* Now see if it is an opening bracket */
1065 if(c)
1066 match=u_getBidiPairedBracket(c); /* get the matching char */
1067 else
1068 match=0;
1069 if(match!=c && /* has a matching char */
0f5d89e8 1070 ubidi_getPairedBracketType(c)==U_BPT_OPEN) { /* opening bracket */
57a6839d
A
1071 /* special case: process synonyms
1072 create an opening entry for each synonym */
1073 if(match==0x232A) { /* RIGHT-POINTING ANGLE BRACKET */
1074 if(!bracketAddOpening(bd, 0x3009, position))
1075 return FALSE;
1076 }
1077 else if(match==0x3009) { /* RIGHT ANGLE BRACKET */
1078 if(!bracketAddOpening(bd, 0x232A, position))
1079 return FALSE;
1080 }
1081 if(!bracketAddOpening(bd, match, position))
1082 return FALSE;
1083 }
1084 }
1085 level=bd->pBiDi->levels[position];
1086 if(level&UBIDI_LEVEL_OVERRIDE) { /* X4, X5 */
1087 newProp=level&1;
1088 if(dirProp!=S && dirProp!=WS && dirProp!=ON)
1089 dirProps[position]=newProp;
1090 pLastIsoRun->lastBase=newProp;
1091 pLastIsoRun->lastStrong=newProp;
f3c0d7a5 1092 pLastIsoRun->contextDir=(UBiDiDirection)newProp;
57a6839d
A
1093 pLastIsoRun->contextPos=position;
1094 }
1095 else if(dirProp<=R || dirProp==AL) {
1096 newProp=DIR_FROM_STRONG(dirProp);
1097 pLastIsoRun->lastBase=dirProp;
1098 pLastIsoRun->lastStrong=dirProp;
f3c0d7a5 1099 pLastIsoRun->contextDir=(UBiDiDirection)newProp;
57a6839d
A
1100 pLastIsoRun->contextPos=position;
1101 }
1102 else if(dirProp==EN) {
1103 pLastIsoRun->lastBase=EN;
1104 if(pLastIsoRun->lastStrong==L) {
1105 newProp=L; /* W7 */
1106 if(!bd->isNumbersSpecial)
1107 dirProps[position]=ENL;
f3c0d7a5 1108 pLastIsoRun->contextDir=(UBiDiDirection)L;
57a6839d
A
1109 pLastIsoRun->contextPos=position;
1110 }
1111 else {
1112 newProp=R; /* N0 */
1113 if(pLastIsoRun->lastStrong==AL)
1114 dirProps[position]=AN; /* W2 */
1115 else
1116 dirProps[position]=ENR;
f3c0d7a5 1117 pLastIsoRun->contextDir=(UBiDiDirection)R;
57a6839d
A
1118 pLastIsoRun->contextPos=position;
1119 }
1120 }
1121 else if(dirProp==AN) {
1122 newProp=R; /* N0 */
1123 pLastIsoRun->lastBase=AN;
f3c0d7a5 1124 pLastIsoRun->contextDir=(UBiDiDirection)R;
57a6839d
A
1125 pLastIsoRun->contextPos=position;
1126 }
1127 else if(dirProp==NSM) {
1128 /* if the last real char was ON, change NSM to ON so that it
1129 will stay ON even if the last real char is a bracket which
1130 may be changed to L or R */
1131 newProp=pLastIsoRun->lastBase;
1132 if(newProp==ON)
1133 dirProps[position]=newProp;
1134 }
1135 else {
1136 newProp=dirProp;
1137 pLastIsoRun->lastBase=dirProp;
1138 }
1139 if(newProp<=R || newProp==AL) {
1140 int32_t i;
1141 uint16_t flag=DIRPROP_FLAG(DIR_FROM_STRONG(newProp));
1142 for(i=pLastIsoRun->start; i<pLastIsoRun->limit; i++)
1143 if(position>bd->openings[i].position)
1144 bd->openings[i].flags|=flag;
1145 }
1146 return TRUE;
b75a7d8f
A
1147}
1148
1149/* perform (X1)..(X9) ------------------------------------------------------- */
1150
374ca955
A
1151/* determine if the text is mixed-directional or single-directional */
1152static UBiDiDirection
73c04bcf
A
1153directionFromFlags(UBiDi *pBiDi) {
1154 Flags flags=pBiDi->flags;
374ca955
A
1155 /* if the text contains AN and neutrals, then some neutrals may become RTL */
1156 if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) {
1157 return UBIDI_LTR;
1158 } else if(!(flags&MASK_LTR)) {
1159 return UBIDI_RTL;
1160 } else {
1161 return UBIDI_MIXED;
1162 }
1163}
1164
b75a7d8f
A
1165/*
1166 * Resolve the explicit levels as specified by explicit embedding codes.
1167 * Recalculate the flags to have them reflect the real properties
1168 * after taking the explicit embeddings into account.
1169 *
1170 * The BiDi algorithm is designed to result in the same behavior whether embedding
1171 * levels are externally specified (from "styled text", supposedly the preferred
57a6839d
A
1172 * method) or set by explicit embedding codes (LRx, RLx, PDF, FSI, PDI) in the plain text.
1173 * That is why (X9) instructs to remove all not-isolate explicit codes (and BN).
1174 * However, in a real implementation, the removal of these codes and their index
b75a7d8f
A
1175 * positions in the plain text is undesirable since it would result in
1176 * reallocated, reindexed text.
1177 * Instead, this implementation leaves the codes in there and just ignores them
1178 * in the subsequent processing.
57a6839d 1179 * In order to get the same reordering behavior, positions with a BN or a not-isolate
b75a7d8f
A
1180 * explicit embedding code just get the same level assigned as the last "real"
1181 * character.
1182 *
1183 * Some implementations, not this one, then overwrite some of these
1184 * directionality properties at "real" same-level-run boundaries by
1185 * L or R codes so that the resolution of weak types can be performed on the
1186 * entire paragraph at once instead of having to parse it once more and
1187 * perform that resolution on same-level-runs.
1188 * This limits the scope of the implicit rules in effectively
1189 * the same way as the run limits.
1190 *
57a6839d
A
1191 * Instead, this implementation does not modify these codes, except for
1192 * paired brackets whose properties (ON) may be replaced by L or R.
b75a7d8f
A
1193 * On one hand, the paragraph has to be scanned for same-level-runs, but
1194 * on the other hand, this saves another loop to reset these codes,
1195 * or saves making and modifying a copy of dirProps[].
1196 *
1197 *
1198 * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
1199 *
1200 *
1201 * Handling the stack of explicit levels (Xn):
1202 *
57a6839d
A
1203 * With the BiDi stack of explicit levels, as pushed with each
1204 * LRE, RLE, LRO, RLO, LRI, RLI and FSI and popped with each PDF and PDI,
1205 * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL.
b75a7d8f
A
1206 *
1207 * In order to have a correct push-pop semantics even in the case of overflows,
57a6839d
A
1208 * overflow counters and a valid isolate counter are used as described in UAX#9
1209 * section 3.3.2 "Explicit Levels and Directions".
b75a7d8f
A
1210 *
1211 * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
57a6839d
A
1212 *
1213 * Returns normally the direction; -1 if there was a memory shortage
1214 *
b75a7d8f 1215 */
b75a7d8f 1216static UBiDiDirection
57a6839d
A
1217resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
1218 DirProp *dirProps=pBiDi->dirProps;
2ca993e8 1219 uint16_t *dirInsert = pBiDi->dirInsert; /* may be NULL */
b75a7d8f 1220 UBiDiLevel *levels=pBiDi->levels;
73c04bcf
A
1221 const UChar *text=pBiDi->text;
1222
b75a7d8f
A
1223 int32_t i=0, length=pBiDi->length;
1224 Flags flags=pBiDi->flags; /* collect all directionalities in the text */
1225 DirProp dirProp;
2ca993e8
A
1226 int32_t dirInsertValue;
1227 int8_t dirInsertIndex; /* position within dirInsertValue, if any */
73c04bcf 1228 UBiDiLevel level=GET_PARALEVEL(pBiDi, 0);
b75a7d8f 1229 UBiDiDirection direction;
57a6839d
A
1230 pBiDi->isolateCount=0;
1231
1232 if(U_FAILURE(*pErrorCode)) { return UBIDI_LTR; }
b75a7d8f
A
1233
1234 /* determine if the text is mixed-directional or single-directional */
73c04bcf 1235 direction=directionFromFlags(pBiDi);
b75a7d8f 1236
57a6839d
A
1237 /* we may not need to resolve any explicit levels */
1238 if((direction!=UBIDI_MIXED)) {
b75a7d8f 1239 /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
57a6839d
A
1240 return direction;
1241 }
1242 if(pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL) {
1243 /* inverse BiDi: mixed, but all characters are at the same embedding level */
b75a7d8f 1244 /* set all levels to the paragraph level */
57a6839d
A
1245 int32_t paraIndex, start, limit;
1246 for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) {
1247 if(paraIndex==0)
1248 start=0;
1249 else
1250 start=pBiDi->paras[paraIndex-1].limit;
1251 limit=pBiDi->paras[paraIndex].limit;
1252 level=pBiDi->paras[paraIndex].level;
1253 for(i=start; i<limit; i++)
1254 levels[i]=level;
b75a7d8f 1255 }
57a6839d
A
1256 return direction; /* no bracket matching for inverse BiDi */
1257 }
1258 if(!(flags&(MASK_EXPLICIT|MASK_ISO))) {
1259 /* no embeddings, set all levels to the paragraph level */
1260 /* we still have to perform bracket matching */
1261 int32_t paraIndex, start, limit;
1262 BracketData bracketData;
1263 bracketInit(pBiDi, &bracketData);
1264 for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) {
1265 if(paraIndex==0)
1266 start=0;
1267 else
1268 start=pBiDi->paras[paraIndex-1].limit;
1269 limit=pBiDi->paras[paraIndex].limit;
1270 level=pBiDi->paras[paraIndex].level;
1271 for(i=start; i<limit; i++) {
1272 levels[i]=level;
1273 dirProp=dirProps[i];
1274 if(dirProp==BN)
1275 continue;
1276 if(dirProp==B) {
1277 if((i+1)<length) {
1278 if(text[i]==CR && text[i+1]==LF)
1279 continue; /* skip CR when followed by LF */
1280 bracketProcessB(&bracketData, level);
1281 }
1282 continue;
1283 }
1284 if(!bracketProcessChar(&bracketData, i)) {
1285 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1286 return UBIDI_LTR;
1287 }
1288 }
1289 }
1290 return direction;
1291 }
1292 {
b75a7d8f
A
1293 /* continue to perform (Xn) */
1294
1295 /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
1296 /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */
57a6839d
A
1297 UBiDiLevel embeddingLevel=level, newLevel;
1298 UBiDiLevel previousLevel=level; /* previous level for regular (not CC) characters */
1299 int32_t lastCcPos=0; /* index of last effective LRx,RLx, PDx */
2ca993e8 1300 DirProp lastCcDirProp=0; /* dirProp of last effective LRx,RLx, PDx */
57a6839d
A
1301
1302 /* The following stack remembers the embedding level and the ISOLATE flag of level runs.
1303 stackLast points to its current entry. */
1304 uint16_t stack[UBIDI_MAX_EXPLICIT_LEVEL+2]; /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL
1305 but we need one more entry as base */
1306 uint32_t stackLast=0;
1307 int32_t overflowIsolateCount=0;
1308 int32_t overflowEmbeddingCount=0;
1309 int32_t validIsolateCount=0;
1310 BracketData bracketData;
1311 bracketInit(pBiDi, &bracketData);
1312 stack[0]=level; /* initialize base entry to para level, no override, no isolate */
b75a7d8f
A
1313
1314 /* recalculate the flags */
1315 flags=0;
1316
2ca993e8
A
1317 dirInsertValue = 0;
1318 dirInsertIndex = -1; /* indicate that we have not checked dirInsert yet */
1319 for(i=0; i<length; ) { /* now conditionally increment at end */
1320 if (dirInsert != NULL && dirInsertIndex < 0) {
1321 dirInsertValue = dirInsert[i];
1322 }
1323 if (dirInsertValue > 0) {
1324 dirInsertIndex++;
1325 dirProp = (DirProp)stdDirFromInsertDir[dirInsertValue & 0x000F];
1326 dirInsertValue >>= 4;
1327 } else {
1328 dirInsertIndex = -1;
1329 dirProp=dirProps[i];
1330 }
b75a7d8f
A
1331 switch(dirProp) {
1332 case LRE:
b75a7d8f 1333 case RLE:
57a6839d 1334 case LRO:
b75a7d8f 1335 case RLO:
57a6839d
A
1336 /* (X2, X3, X4, X5) */
1337 flags|=DIRPROP_FLAG(BN);
1338 levels[i]=previousLevel;
1339 if (dirProp==LRE || dirProp==LRO)
1340 /* least greater even level */
1341 newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1));
1342 else
1343 /* least greater odd level */
1344 newLevel=(UBiDiLevel)((NO_OVERRIDE(embeddingLevel)+1)|1);
1345 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
1346 overflowEmbeddingCount==0) {
1347 lastCcPos=i;
2ca993e8 1348 lastCcDirProp = dirProp;
b75a7d8f 1349 embeddingLevel=newLevel;
57a6839d 1350 if(dirProp==LRO || dirProp==RLO)
b75a7d8f 1351 embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
57a6839d
A
1352 stackLast++;
1353 stack[stackLast]=embeddingLevel;
1354 /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE and RLE
73c04bcf
A
1355 since this has already been done for newLevel which is
1356 the source for embeddingLevel.
1357 */
b75a7d8f 1358 } else {
57a6839d
A
1359 if(overflowIsolateCount==0)
1360 overflowEmbeddingCount++;
b75a7d8f 1361 }
b75a7d8f
A
1362 break;
1363 case PDF:
1364 /* (X7) */
57a6839d
A
1365 flags|=DIRPROP_FLAG(BN);
1366 levels[i]=previousLevel;
b75a7d8f 1367 /* handle all the overflow cases first */
57a6839d
A
1368 if(overflowIsolateCount) {
1369 break;
b75a7d8f 1370 }
57a6839d
A
1371 if(overflowEmbeddingCount) {
1372 overflowEmbeddingCount--;
1373 break;
1374 }
1375 if(stackLast>0 && stack[stackLast]<ISOLATE) { /* not an isolate entry */
1376 lastCcPos=i;
2ca993e8 1377 lastCcDirProp = dirProp;
57a6839d
A
1378 stackLast--;
1379 embeddingLevel=(UBiDiLevel)stack[stackLast];
1380 }
1381 break;
1382 case LRI:
1383 case RLI:
1384 flags|=(DIRPROP_FLAG(ON)|DIRPROP_FLAG_LR(embeddingLevel));
1385 levels[i]=NO_OVERRIDE(embeddingLevel);
1386 if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
2ca993e8 1387 bracketProcessBoundary(&bracketData, lastCcPos, lastCcDirProp,
57a6839d
A
1388 previousLevel, embeddingLevel);
1389 flags|=DIRPROP_FLAG_MULTI_RUNS;
1390 }
1391 previousLevel=embeddingLevel;
1392 /* (X5a, X5b) */
1393 if(dirProp==LRI)
1394 /* least greater even level */
1395 newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1));
1396 else
1397 /* least greater odd level */
1398 newLevel=(UBiDiLevel)((NO_OVERRIDE(embeddingLevel)+1)|1);
1399 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
1400 overflowEmbeddingCount==0) {
1401 flags|=DIRPROP_FLAG(dirProp);
1402 lastCcPos=i;
2ca993e8 1403 lastCcDirProp = dirProp;
57a6839d
A
1404 validIsolateCount++;
1405 if(validIsolateCount>pBiDi->isolateCount)
1406 pBiDi->isolateCount=validIsolateCount;
1407 embeddingLevel=newLevel;
1408 /* we can increment stackLast without checking because newLevel
1409 will exceed UBIDI_MAX_EXPLICIT_LEVEL before stackLast overflows */
1410 stackLast++;
1411 stack[stackLast]=embeddingLevel+ISOLATE;
1412 bracketProcessLRI_RLI(&bracketData, embeddingLevel);
1413 } else {
1414 /* make it WS so that it is handled by adjustWSLevels() */
2ca993e8
A
1415 if (dirInsertIndex < 0) {
1416 dirProps[i]=WS;
1417 } else {
1418 dirInsert[i] &= ~(0x000F << (4*dirInsertIndex));
1419 dirInsert[i] |= (Insert_WS << (4*dirInsertIndex));
1420 }
57a6839d
A
1421 overflowIsolateCount++;
1422 }
1423 break;
1424 case PDI:
1425 if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
2ca993e8 1426 bracketProcessBoundary(&bracketData, lastCcPos, lastCcDirProp,
57a6839d
A
1427 previousLevel, embeddingLevel);
1428 flags|=DIRPROP_FLAG_MULTI_RUNS;
1429 }
1430 /* (X6a) */
1431 if(overflowIsolateCount) {
1432 overflowIsolateCount--;
1433 /* make it WS so that it is handled by adjustWSLevels() */
2ca993e8
A
1434 if (dirInsertIndex < 0) {
1435 dirProps[i]=WS;
1436 } else {
1437 dirInsert[i] &= ~(0x000F << (4*dirInsertIndex));
1438 dirInsert[i] |= (Insert_WS << (4*dirInsertIndex));
1439 }
57a6839d
A
1440 }
1441 else if(validIsolateCount) {
1442 flags|=DIRPROP_FLAG(PDI);
1443 lastCcPos=i;
2ca993e8 1444 lastCcDirProp = dirProp;
57a6839d
A
1445 overflowEmbeddingCount=0;
1446 while(stack[stackLast]<ISOLATE) /* pop embedding entries */
1447 stackLast--; /* until the last isolate entry */
1448 stackLast--; /* pop also the last isolate entry */
1449 validIsolateCount--;
1450 bracketProcessPDI(&bracketData);
1451 } else
1452 /* make it WS so that it is handled by adjustWSLevels() */
2ca993e8
A
1453 if (dirInsertIndex < 0) {
1454 dirProps[i]=WS;
1455 } else {
1456 dirInsert[i] &= ~(0x000F << (4*dirInsertIndex));
1457 dirInsert[i] |= (Insert_WS << (4*dirInsertIndex));
1458 }
57a6839d
A
1459 embeddingLevel=(UBiDiLevel)stack[stackLast]&~ISOLATE;
1460 flags|=(DIRPROP_FLAG(ON)|DIRPROP_FLAG_LR(embeddingLevel));
1461 previousLevel=embeddingLevel;
1462 levels[i]=NO_OVERRIDE(embeddingLevel);
b75a7d8f
A
1463 break;
1464 case B:
57a6839d
A
1465 flags|=DIRPROP_FLAG(B);
1466 levels[i]=GET_PARALEVEL(pBiDi, i);
73c04bcf 1467 if((i+1)<length) {
57a6839d
A
1468 if(text[i]==CR && text[i+1]==LF)
1469 break; /* skip CR when followed by LF */
1470 overflowEmbeddingCount=overflowIsolateCount=0;
1471 validIsolateCount=0;
1472 stackLast=0;
1473 previousLevel=embeddingLevel=GET_PARALEVEL(pBiDi, i+1);
1474 stack[0]=embeddingLevel; /* initialize base entry to para level, no override, no isolate */
1475 bracketProcessB(&bracketData, embeddingLevel);
73c04bcf 1476 }
b75a7d8f
A
1477 break;
1478 case BN:
1479 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
1480 /* they will get their levels set correctly in adjustWSLevels() */
57a6839d 1481 levels[i]=previousLevel;
b75a7d8f
A
1482 flags|=DIRPROP_FLAG(BN);
1483 break;
1484 default:
57a6839d
A
1485 /* all other types are normal characters and get the "real" level */
1486 if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
2ca993e8 1487 bracketProcessBoundary(&bracketData, lastCcPos, lastCcDirProp,
57a6839d
A
1488 previousLevel, embeddingLevel);
1489 flags|=DIRPROP_FLAG_MULTI_RUNS;
1490 if(embeddingLevel&UBIDI_LEVEL_OVERRIDE)
1491 flags|=DIRPROP_FLAG_O(embeddingLevel);
1492 else
1493 flags|=DIRPROP_FLAG_E(embeddingLevel);
b75a7d8f 1494 }
57a6839d
A
1495 previousLevel=embeddingLevel;
1496 levels[i]=embeddingLevel;
1497 if(!bracketProcessChar(&bracketData, i))
f3c0d7a5 1498 return (UBiDiDirection)-1;
57a6839d
A
1499 /* the dirProp may have been changed in bracketProcessChar() */
1500 flags|=DIRPROP_FLAG(dirProps[i]);
b75a7d8f
A
1501 break;
1502 }
2ca993e8
A
1503 if (dirInsertIndex < 0) {
1504 ++i;
1505 }
b75a7d8f 1506 }
57a6839d 1507 if(flags&MASK_EMBEDDING)
b75a7d8f 1508 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
57a6839d 1509 if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B)))
73c04bcf 1510 flags|=DIRPROP_FLAG(L);
b75a7d8f
A
1511 /* again, determine if the text is mixed-directional or single-directional */
1512 pBiDi->flags=flags;
73c04bcf 1513 direction=directionFromFlags(pBiDi);
b75a7d8f
A
1514 }
1515 return direction;
1516}
1517
1518/*
1519 * Use a pre-specified embedding levels array:
1520 *
1521 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
1522 * ignore all explicit codes (X9),
1523 * and check all the preset levels.
1524 *
1525 * Recalculate the flags to have them reflect the real properties
1526 * after taking the explicit embeddings into account.
1527 */
1528static UBiDiDirection
1529checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
57a6839d 1530 DirProp *dirProps=pBiDi->dirProps;
b75a7d8f 1531 UBiDiLevel *levels=pBiDi->levels;
57a6839d 1532 int32_t isolateCount=0;
73c04bcf 1533
f3c0d7a5 1534 int32_t length=pBiDi->length;
b75a7d8f 1535 Flags flags=0; /* collect all directionalities in the text */
57a6839d 1536 pBiDi->isolateCount=0;
b75a7d8f 1537
f3c0d7a5
A
1538 int32_t currentParaIndex = 0;
1539 int32_t currentParaLimit = pBiDi->paras[0].limit;
1540 int32_t currentParaLevel = pBiDi->paraLevel;
1541
1542 for(int32_t i=0; i<length; ++i) {
1543 UBiDiLevel level=levels[i];
1544 DirProp dirProp=dirProps[i];
57a6839d
A
1545 if(dirProp==LRI || dirProp==RLI) {
1546 isolateCount++;
1547 if(isolateCount>pBiDi->isolateCount)
1548 pBiDi->isolateCount=isolateCount;
1549 }
1550 else if(dirProp==PDI)
1551 isolateCount--;
1552 else if(dirProp==B)
1553 isolateCount=0;
f3c0d7a5
A
1554
1555 // optimized version of int32_t currentParaLevel = GET_PARALEVEL(pBiDi, i);
1556 if (pBiDi->defaultParaLevel != 0 &&
1557 i == currentParaLimit && (currentParaIndex + 1) < pBiDi->paraCount) {
1558 currentParaLevel = pBiDi->paras[++currentParaIndex].level;
1559 currentParaLimit = pBiDi->paras[currentParaIndex].limit;
1560 }
1561
1562 UBiDiLevel overrideFlag = level & UBIDI_LEVEL_OVERRIDE;
1563 level &= ~UBIDI_LEVEL_OVERRIDE;
1564 if (level < currentParaLevel || UBIDI_MAX_EXPLICIT_LEVEL < level) {
1565 if (level == 0) {
1566 if (dirProp == B) {
1567 // Paragraph separators are ok with explicit level 0.
1568 // Prevents reordering of paragraphs.
1569 } else {
1570 // Treat explicit level 0 as a wildcard for the paragraph level.
1571 // Avoid making the caller guess what the paragraph level would be.
1572 level = (UBiDiLevel)currentParaLevel;
1573 levels[i] = level | overrideFlag;
1574 }
1575 } else {
1576 // 1 <= level < currentParaLevel or UBIDI_MAX_EXPLICIT_LEVEL < level
1577 /* level out of bounds */
1578 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1579 return UBIDI_LTR;
1580 }
1581 }
1582 if (overrideFlag != 0) {
b75a7d8f 1583 /* keep the override flag in levels[i] but adjust the flags */
b75a7d8f
A
1584 flags|=DIRPROP_FLAG_O(level);
1585 } else {
1586 /* set the flags */
73c04bcf 1587 flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp);
b75a7d8f 1588 }
b75a7d8f 1589 }
57a6839d 1590 if(flags&MASK_EMBEDDING)
b75a7d8f 1591 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
b75a7d8f
A
1592 /* determine if the text is mixed-directional or single-directional */
1593 pBiDi->flags=flags;
73c04bcf
A
1594 return directionFromFlags(pBiDi);
1595}
1596
46f4442e
A
1597/******************************************************************
1598 The Properties state machine table
1599*******************************************************************
1600
1601 All table cells are 8 bits:
1602 bits 0..4: next state
1603 bits 5..7: action to perform (if > 0)
1604
1605 Cells may be of format "n" where n represents the next state
1606 (except for the rightmost column).
1607 Cells may also be of format "s(x,y)" where x represents an action
1608 to perform and y represents the next state.
1609
1610*******************************************************************
1611 Definitions and type for properties state table
1612*******************************************************************
1613*/
57a6839d 1614#define IMPTABPROPS_COLUMNS 16
73c04bcf
A
1615#define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1)
1616#define GET_STATEPROPS(cell) ((cell)&0x1f)
1617#define GET_ACTIONPROPS(cell) ((cell)>>5)
46f4442e 1618#define s(action, newState) ((uint8_t)(newState+(action<<5)))
73c04bcf
A
1619
1620static const uint8_t groupProp[] = /* dirProp regrouped */
1621{
57a6839d
A
1622/* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN FSI LRI RLI PDI ENL ENR */
1623 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10, 4, 4, 4, 4, 13, 14
73c04bcf 1624};
46f4442e
A
1625enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */
1626
1627/******************************************************************
1628
1629 PROPERTIES STATE TABLE
1630
1631 In table impTabProps,
57a6839d 1632 - the ON column regroups ON and WS, FSI, RLI, LRI and PDI
46f4442e
A
1633 - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF
1634 - the Res column is the reduced property assigned to a run
1635
1636 Action 1: process current run1, init new run1
1637 2: init new run2
1638 3: process run1, process run2, init new run1
1639 4: process run1, set run1=run2, init new run2
1640
1641 Notes:
1642 1) This table is used in resolveImplicitLevels().
1643 2) This table triggers actions when there is a change in the Bidi
1644 property of incoming characters (action 1).
1645 3) Most such property sequences are processed immediately (in
1646 fact, passed to processPropertySeq().
1647 4) However, numbers are assembled as one sequence. This means
1648 that undefined situations (like CS following digits, until
1649 it is known if the next char will be a digit) are held until
1650 following chars define them.
1651 Example: digits followed by CS, then comes another CS or ON;
1652 the digits will be processed, then the CS assigned
1653 as the start of an ON sequence (action 3).
1654 5) There are cases where more than one sequence must be
1655 processed, for instance digits followed by CS followed by L:
1656 the digits must be processed as one sequence, and the CS
1657 must be processed as an ON sequence, all this before starting
1658 assembling chars for the opening L sequence.
1659
1660
1661*/
73c04bcf
A
1662static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] =
1663{
57a6839d
A
1664/* L , R , EN , AN , ON , S , B , ES , ET , CS , BN , NSM , AL , ENL , ENR , Res */
1665/* 0 Init */ { 1 , 2 , 4 , 5 , 7 , 15 , 17 , 7 , 9 , 7 , 0 , 7 , 3 , 18 , 21 , DirProp_ON },
1666/* 1 L */ { 1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 1 , 1 , s(1,3),s(1,18),s(1,21), DirProp_L },
1667/* 2 R */ { s(1,1), 2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 2 , 2 , s(1,3),s(1,18),s(1,21), DirProp_R },
1668/* 3 AL */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8), 3 , 3 , 3 ,s(1,18),s(1,21), DirProp_R },
1669/* 4 EN */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10), 11 ,s(2,10), 4 , 4 , s(1,3), 18 , 21 , DirProp_EN },
1670/* 5 AN */ { s(1,1), s(1,2), s(1,4), 5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12), 5 , 5 , s(1,3),s(1,18),s(1,21), DirProp_AN },
1671/* 6 AL:EN/AN */ { s(1,1), s(1,2), 6 , 6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13), 6 , 6 , s(1,3), 18 , 21 , DirProp_AN },
1672/* 7 ON */ { s(1,1), s(1,2), s(1,4), s(1,5), 7 ,s(1,15),s(1,17), 7 ,s(2,14), 7 , 7 , 7 , s(1,3),s(1,18),s(1,21), DirProp_ON },
1673/* 8 AL:ON */ { s(1,1), s(1,2), s(1,6), s(1,6), 8 ,s(1,16),s(1,17), 8 , 8 , 8 , 8 , 8 , s(1,3),s(1,18),s(1,21), DirProp_ON },
1674/* 9 ET */ { s(1,1), s(1,2), 4 , s(1,5), 7 ,s(1,15),s(1,17), 7 , 9 , 7 , 9 , 9 , s(1,3), 18 , 21 , DirProp_ON },
1675/*10 EN+ES/CS */ { s(3,1), s(3,2), 4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 10 , s(4,7), s(3,3), 18 , 21 , DirProp_EN },
1676/*11 EN+ET */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 11 , s(1,7), 11 , 11 , s(1,3), 18 , 21 , DirProp_EN },
1677/*12 AN+CS */ { s(3,1), s(3,2), s(3,4), 5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 12 , s(4,7), s(3,3),s(3,18),s(3,21), DirProp_AN },
1678/*13 AL:EN/AN+CS */ { s(3,1), s(3,2), 6 , 6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8), 13 , s(4,8), s(3,3), 18 , 21 , DirProp_AN },
1679/*14 ON+ET */ { s(1,1), s(1,2), s(4,4), s(1,5), 7 ,s(1,15),s(1,17), 7 , 14 , 7 , 14 , 14 , s(1,3),s(4,18),s(4,21), DirProp_ON },
1680/*15 S */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7), 15 ,s(1,17), s(1,7), s(1,9), s(1,7), 15 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_S },
1681/*16 AL:S */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8), 16 ,s(1,17), s(1,8), s(1,8), s(1,8), 16 , s(1,8), s(1,3),s(1,18),s(1,21), DirProp_S },
1682/*17 B */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15), 17 , s(1,7), s(1,9), s(1,7), 17 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_B },
1683/*18 ENL */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,19), 20 ,s(2,19), 18 , 18 , s(1,3), 18 , 21 , DirProp_L },
1684/*19 ENL+ES/CS */ { s(3,1), s(3,2), 18 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 19 , s(4,7), s(3,3), 18 , 21 , DirProp_L },
1685/*20 ENL+ET */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 20 , s(1,7), 20 , 20 , s(1,3), 18 , 21 , DirProp_L },
1686/*21 ENR */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,22), 23 ,s(2,22), 21 , 21 , s(1,3), 18 , 21 , DirProp_AN },
1687/*22 ENR+ES/CS */ { s(3,1), s(3,2), 21 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 22 , s(4,7), s(3,3), 18 , 21 , DirProp_AN },
1688/*23 ENR+ET */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 23 , s(1,7), 23 , 23 , s(1,3), 18 , 21 , DirProp_AN }
73c04bcf
A
1689};
1690
57a6839d 1691/* we must undef macro s because the levels tables have a different
73c04bcf
A
1692 * structure (4 bits for action and 4 bits for next state.
1693 */
46f4442e
A
1694#undef s
1695
1696/******************************************************************
1697 The levels state machine tables
1698*******************************************************************
1699
1700 All table cells are 8 bits:
1701 bits 0..3: next state
1702 bits 4..7: action to perform (if > 0)
1703
1704 Cells may be of format "n" where n represents the next state
1705 (except for the rightmost column).
1706 Cells may also be of format "s(x,y)" where x represents an action
1707 to perform and y represents the next state.
1708
1709 This format limits each table to 16 states each and to 15 actions.
1710
1711*******************************************************************
1712 Definitions and type for levels state tables
1713*******************************************************************
1714*/
1715#define IMPTABLEVELS_COLUMNS (DirProp_B + 2)
73c04bcf
A
1716#define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1)
1717#define GET_STATE(cell) ((cell)&0x0f)
1718#define GET_ACTION(cell) ((cell)>>4)
46f4442e 1719#define s(action, newState) ((uint8_t)(newState+(action<<4)))
73c04bcf
A
1720
1721typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS];
1722typedef uint8_t ImpAct[];
1723
1724/* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct,
1725 * instead of having a pair of ImpTab and a pair of ImpAct.
1726 */
1727typedef struct ImpTabPair {
46f4442e
A
1728 const void * pImpTab[2];
1729 const void * pImpAct[2];
73c04bcf
A
1730} ImpTabPair;
1731
46f4442e
A
1732/******************************************************************
1733
1734 LEVELS STATE TABLES
1735
1736 In all levels state tables,
1737 - state 0 is the initial state
1738 - the Res column is the increment to add to the text level
1739 for this property sequence.
1740
1741 The impAct arrays for each table of a pair map the local action
1742 numbers of the table to the total list of actions. For instance,
1743 action 2 in a given table corresponds to the action number which
1744 appears in entry [2] of the impAct array for that table.
1745 The first entry of all impAct arrays must be 0.
1746
1747 Action 1: init conditional sequence
1748 2: prepend conditional sequence to current sequence
1749 3: set ON sequence to new level - 1
1750 4: init EN/AN/ON sequence
1751 5: fix EN/AN/ON sequence followed by R
1752 6: set previous level sequence to level 2
1753
1754 Notes:
1755 1) These tables are used in processPropertySeq(). The input
1756 is property sequences as determined by resolveImplicitLevels.
1757 2) Most such property sequences are processed immediately
1758 (levels are assigned).
1759 3) However, some sequences cannot be assigned a final level till
1760 one or more following sequences are received. For instance,
1761 ON following an R sequence within an even-level paragraph.
1762 If the following sequence is R, the ON sequence will be
1763 assigned basic run level+1, and so will the R sequence.
1764 4) S is generally handled like ON, since its level will be fixed
1765 to paragraph level in adjustWSLevels().
1766
1767*/
73c04bcf
A
1768
1769static const ImpTab impTabL_DEFAULT = /* Even paragraph level */
57a6839d 1770/* In this table, conditional sequences receive the lower possible level
73c04bcf
A
1771 until proven otherwise.
1772*/
1773{
1774/* L , R , EN , AN , ON , S , B , Res */
1775/* 0 : init */ { 0 , 1 , 0 , 2 , 0 , 0 , 0 , 0 },
46f4442e
A
1776/* 1 : R */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 1 },
1777/* 2 : AN */ { 0 , 1 , 0 , 2 , s(1,5), s(1,5), 0 , 2 },
1778/* 3 : R+EN/AN */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 2 },
57a6839d
A
1779/* 4 : R+ON */ { 0 , s(2,1), s(3,3), s(3,3), 4 , 4 , 0 , 0 },
1780/* 5 : AN+ON */ { 0 , s(2,1), 0 , s(3,2), 5 , 5 , 0 , 0 }
73c04bcf
A
1781};
1782static const ImpTab impTabR_DEFAULT = /* Odd paragraph level */
1783/* In this table, conditional sequences receive the lower possible level
1784 until proven otherwise.
1785*/
1786{
1787/* L , R , EN , AN , ON , S , B , Res */
1788/* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 },
46f4442e 1789/* 1 : L */ { 1 , 0 , 1 , 3 , s(1,4), s(1,4), 0 , 1 },
73c04bcf
A
1790/* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 },
1791/* 3 : L+AN */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 1 },
46f4442e 1792/* 4 : L+ON */ { s(2,1), 0 , s(2,1), 3 , 4 , 4 , 0 , 0 },
73c04bcf
A
1793/* 5 : L+AN+ON */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 0 }
1794};
57a6839d 1795static const ImpAct impAct0 = {0,1,2,3,4};
46f4442e
A
1796static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT,
1797 &impTabR_DEFAULT},
1798 {&impAct0, &impAct0}};
73c04bcf
A
1799
1800static const ImpTab impTabL_NUMBERS_SPECIAL = /* Even paragraph level */
57a6839d 1801/* In this table, conditional sequences receive the lower possible level
73c04bcf
A
1802 until proven otherwise.
1803*/
1804{
1805/* L , R , EN , AN , ON , S , B , Res */
57a6839d
A
1806/* 0 : init */ { 0 , 2 , s(1,1), s(1,1), 0 , 0 , 0 , 0 },
1807/* 1 : L+EN/AN */ { 0 , s(4,2), 1 , 1 , 0 , 0 , 0 , 0 },
1808/* 2 : R */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 1 },
1809/* 3 : R+ON */ { 0 , s(2,2), s(3,4), s(3,4), 3 , 3 , 0 , 0 },
1810/* 4 : R+EN/AN */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 2 }
1811};
46f4442e
A
1812static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL,
1813 &impTabR_DEFAULT},
1814 {&impAct0, &impAct0}};
73c04bcf
A
1815
1816static const ImpTab impTabL_GROUP_NUMBERS_WITH_R =
1817/* In this table, EN/AN+ON sequences receive levels as if associated with R
1818 until proven that there is L or sor/eor on both sides. AN is handled like EN.
1819*/
1820{
1821/* L , R , EN , AN , ON , S , B , Res */
46f4442e
A
1822/* 0 init */ { 0 , 3 , s(1,1), s(1,1), 0 , 0 , 0 , 0 },
1823/* 1 EN/AN */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 2 },
1824/* 2 EN/AN+ON */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 1 },
1825/* 3 R */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 1 },
1826/* 4 R+ON */ { s(2,0), 3 , 5 , 5 , 4 , s(2,0), s(2,0), 1 },
1827/* 5 R+EN/AN */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 2 }
73c04bcf
A
1828};
1829static const ImpTab impTabR_GROUP_NUMBERS_WITH_R =
1830/* In this table, EN/AN+ON sequences receive levels as if associated with R
1831 until proven that there is L on both sides. AN is handled like EN.
1832*/
1833{
1834/* L , R , EN , AN , ON , S , B , Res */
1835/* 0 init */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1836/* 1 EN/AN */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 1 },
46f4442e
A
1837/* 2 L */ { 2 , 0 , s(1,4), s(1,4), s(1,3), 0 , 0 , 1 },
1838/* 3 L+ON */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 0 },
1839/* 4 L+EN/AN */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 1 }
73c04bcf
A
1840};
1841static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = {
46f4442e
A
1842 {&impTabL_GROUP_NUMBERS_WITH_R,
1843 &impTabR_GROUP_NUMBERS_WITH_R},
1844 {&impAct0, &impAct0}};
73c04bcf
A
1845
1846
1847static const ImpTab impTabL_INVERSE_NUMBERS_AS_L =
1848/* This table is identical to the Default LTR table except that EN and AN are
1849 handled like L.
1850*/
1851{
1852/* L , R , EN , AN , ON , S , B , Res */
1853/* 0 : init */ { 0 , 1 , 0 , 0 , 0 , 0 , 0 , 0 },
46f4442e
A
1854/* 1 : R */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 1 },
1855/* 2 : AN */ { 0 , 1 , 0 , 0 , s(1,5), s(1,5), 0 , 2 },
1856/* 3 : R+EN/AN */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 2 },
1857/* 4 : R+ON */ { s(2,0), 1 , s(2,0), s(2,0), 4 , 4 , s(2,0), 1 },
1858/* 5 : AN+ON */ { s(2,0), 1 , s(2,0), s(2,0), 5 , 5 , s(2,0), 1 }
73c04bcf
A
1859};
1860static const ImpTab impTabR_INVERSE_NUMBERS_AS_L =
1861/* This table is identical to the Default RTL table except that EN and AN are
1862 handled like L.
1863*/
1864{
1865/* L , R , EN , AN , ON , S , B , Res */
1866/* 0 : init */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 0 },
46f4442e 1867/* 1 : L */ { 1 , 0 , 1 , 1 , s(1,4), s(1,4), 0 , 1 },
73c04bcf
A
1868/* 2 : EN/AN */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 1 },
1869/* 3 : L+AN */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 1 },
46f4442e 1870/* 4 : L+ON */ { s(2,1), 0 , s(2,1), s(2,1), 4 , 4 , 0 , 0 },
73c04bcf
A
1871/* 5 : L+AN+ON */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 0 }
1872};
1873static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = {
46f4442e
A
1874 {&impTabL_INVERSE_NUMBERS_AS_L,
1875 &impTabR_INVERSE_NUMBERS_AS_L},
1876 {&impAct0, &impAct0}};
73c04bcf
A
1877
1878static const ImpTab impTabR_INVERSE_LIKE_DIRECT = /* Odd paragraph level */
1879/* In this table, conditional sequences receive the lower possible level
1880 until proven otherwise.
1881*/
1882{
1883/* L , R , EN , AN , ON , S , B , Res */
1884/* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 },
46f4442e 1885/* 1 : L */ { 1 , 0 , 1 , 2 , s(1,3), s(1,3), 0 , 1 },
73c04bcf 1886/* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 },
46f4442e
A
1887/* 3 : L+ON */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 0 },
1888/* 4 : L+ON+AN */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 3 },
1889/* 5 : L+AN+ON */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 2 },
1890/* 6 : L+ON+EN */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 1 }
73c04bcf 1891};
57a6839d 1892static const ImpAct impAct1 = {0,1,13,14};
73c04bcf
A
1893/* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc"
1894 */
1895static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = {
46f4442e
A
1896 {&impTabL_DEFAULT,
1897 &impTabR_INVERSE_LIKE_DIRECT},
1898 {&impAct0, &impAct1}};
73c04bcf
A
1899
1900static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS =
1901/* The case handled in this table is (visually): R EN L
1902*/
1903{
1904/* L , R , EN , AN , ON , S , B , Res */
46f4442e
A
1905/* 0 : init */ { 0 , s(6,3), 0 , 1 , 0 , 0 , 0 , 0 },
1906/* 1 : L+AN */ { 0 , s(6,3), 0 , 1 , s(1,2), s(3,0), 0 , 4 },
1907/* 2 : L+AN+ON */ { s(2,0), s(6,3), s(2,0), 1 , 2 , s(3,0), s(2,0), 3 },
1908/* 3 : R */ { 0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0), 0 , 3 },
1909/* 4 : R+ON */ { s(3,0), s(4,3), s(5,5), s(5,6), 4 , s(3,0), s(3,0), 3 },
1910/* 5 : R+EN */ { s(3,0), s(4,3), 5 , s(5,6), s(1,4), s(3,0), s(3,0), 4 },
1911/* 6 : R+AN */ { s(3,0), s(4,3), s(5,5), 6 , s(1,4), s(3,0), s(3,0), 4 }
73c04bcf
A
1912};
1913static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS =
1914/* The cases handled in this table are (visually): R EN L
1915 R L AN L
1916*/
1917{
1918/* L , R , EN , AN , ON , S , B , Res */
46f4442e
A
1919/* 0 : init */ { s(1,3), 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1920/* 1 : R+EN/AN */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 1 },
1921/* 2 : R+EN/AN+ON */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 0 },
1922/* 3 : L */ { 3 , 0 , 3 , s(3,6), s(1,4), s(4,0), 0 , 1 },
1923/* 4 : L+ON */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 0 },
1924/* 5 : L+ON+EN */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 1 },
1925/* 6 : L+AN */ { s(5,3), s(4,0), 6 , 6 , 4 , s(4,0), s(4,0), 3 }
73c04bcf 1926};
57a6839d
A
1927static const ImpAct impAct2 = {0,1,2,5,6,7,8};
1928static const ImpAct impAct3 = {0,1,9,10,11,12};
73c04bcf 1929static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = {
46f4442e
A
1930 {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
1931 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
57a6839d 1932 {&impAct2, &impAct3}};
73c04bcf
A
1933
1934static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = {
46f4442e
A
1935 {&impTabL_NUMBERS_SPECIAL,
1936 &impTabR_INVERSE_LIKE_DIRECT},
1937 {&impAct0, &impAct1}};
73c04bcf
A
1938
1939static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS =
1940/* The case handled in this table is (visually): R EN L
1941*/
1942{
1943/* L , R , EN , AN , ON , S , B , Res */
46f4442e
A
1944/* 0 : init */ { 0 , s(6,2), 1 , 1 , 0 , 0 , 0 , 0 },
1945/* 1 : L+EN/AN */ { 0 , s(6,2), 1 , 1 , 0 , s(3,0), 0 , 4 },
1946/* 2 : R */ { 0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0), 0 , 3 },
1947/* 3 : R+ON */ { s(3,0), s(4,2), s(5,4), s(5,4), 3 , s(3,0), s(3,0), 3 },
1948/* 4 : R+EN/AN */ { s(3,0), s(4,2), 4 , 4 , s(1,3), s(3,0), s(3,0), 4 }
73c04bcf
A
1949};
1950static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = {
46f4442e
A
1951 {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
1952 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
57a6839d 1953 {&impAct2, &impAct3}};
73c04bcf 1954
46f4442e 1955#undef s
73c04bcf
A
1956
1957typedef struct {
46f4442e
A
1958 const ImpTab * pImpTab; /* level table pointer */
1959 const ImpAct * pImpAct; /* action map array */
73c04bcf
A
1960 int32_t startON; /* start of ON sequence */
1961 int32_t startL2EN; /* start of level 2 sequence */
1962 int32_t lastStrongRTL; /* index of last found R or AL */
1963 int32_t state; /* current state */
57a6839d 1964 int32_t runStart; /* start position of the run */
73c04bcf
A
1965 UBiDiLevel runLevel; /* run level before implicit solving */
1966} LevState;
1967
1968/*------------------------------------------------------------------------*/
1969
1970static void
1971addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag)
1972 /* param pos: position where to insert
1973 param flag: one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER
1974 */
1975{
1976#define FIRSTALLOC 10
1977 Point point;
1978 InsertPoints * pInsertPoints=&(pBiDi->insertPoints);
1979
1980 if (pInsertPoints->capacity == 0)
1981 {
f3c0d7a5 1982 pInsertPoints->points=static_cast<Point *>(uprv_malloc(sizeof(Point)*FIRSTALLOC));
73c04bcf
A
1983 if (pInsertPoints->points == NULL)
1984 {
1985 pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
1986 return;
1987 }
1988 pInsertPoints->capacity=FIRSTALLOC;
1989 }
1990 if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */
1991 {
f3c0d7a5
A
1992 Point * savePoints=pInsertPoints->points;
1993 pInsertPoints->points=static_cast<Point *>(uprv_realloc(pInsertPoints->points,
1994 pInsertPoints->capacity*2*sizeof(Point)));
73c04bcf
A
1995 if (pInsertPoints->points == NULL)
1996 {
1997 pInsertPoints->points=savePoints;
1998 pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
1999 return;
2000 }
2001 else pInsertPoints->capacity*=2;
2002 }
2003 point.pos=pos;
2004 point.flag=flag;
2005 pInsertPoints->points[pInsertPoints->size]=point;
2006 pInsertPoints->size++;
2007#undef FIRSTALLOC
b75a7d8f
A
2008}
2009
57a6839d
A
2010static void
2011setLevelsOutsideIsolates(UBiDi *pBiDi, int32_t start, int32_t limit, UBiDiLevel level)
2012{
2013 DirProp *dirProps=pBiDi->dirProps, dirProp;
2ca993e8 2014 uint16_t *dirInsert = pBiDi->dirInsert; /* may be NULL */
57a6839d 2015 UBiDiLevel *levels=pBiDi->levels;
2ca993e8
A
2016 int32_t dirInsertValue;
2017 int8_t dirInsertIndex; /* position within dirInsertValue, if any */
57a6839d 2018 int32_t isolateCount=0, k;
2ca993e8
A
2019 dirInsertValue = 0;
2020 dirInsertIndex = -1; /* indicate that we have not checked dirInsert yet */
57a6839d 2021 for(k=start; k<limit; k++) {
2ca993e8
A
2022 if (dirInsert != NULL && dirInsertIndex < 0) {
2023 dirInsertValue = dirInsert[k];
2024 }
2025 if (dirInsertValue > 0) {
2026 dirInsertIndex++;
2027 dirProp = (DirProp)stdDirFromInsertDir[dirInsertValue & 0x000F];
2028 dirInsertValue >>= 4;
2029 } else {
2030 dirInsertIndex = -1;
2031 dirProp=dirProps[k];
2032 }
57a6839d
A
2033 if(dirProp==PDI)
2034 isolateCount--;
2035 if(isolateCount==0)
2036 levels[k]=level;
2037 if(dirProp==LRI || dirProp==RLI)
2038 isolateCount++;
2039 }
2040}
2041
b75a7d8f
A
2042/* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
2043
2044/*
2045 * This implementation of the (Wn) rules applies all rules in one pass.
2046 * In order to do so, it needs a look-ahead of typically 1 character
2047 * (except for W5: sequences of ET) and keeps track of changes
2048 * in a rule Wp that affect a later Wq (p<q).
2049 *
b75a7d8f
A
2050 * The (Nn) and (In) rules are also performed in that same single loop,
2051 * but effectively one iteration behind for white space.
2052 *
2053 * Since all implicit rules are performed in one step, it is not necessary
2054 * to actually store the intermediate directional properties in dirProps[].
2055 */
2056
b75a7d8f 2057static void
73c04bcf
A
2058processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop,
2059 int32_t start, int32_t limit) {
2060 uint8_t cell, oldStateSeq, actionSeq;
46f4442e
A
2061 const ImpTab * pImpTab=pLevState->pImpTab;
2062 const ImpAct * pImpAct=pLevState->pImpAct;
73c04bcf
A
2063 UBiDiLevel * levels=pBiDi->levels;
2064 UBiDiLevel level, addLevel;
2065 InsertPoints * pInsertPoints;
2066 int32_t start0, k;
2067
2068 start0=start; /* save original start position */
46f4442e 2069 oldStateSeq=(uint8_t)pLevState->state;
73c04bcf
A
2070 cell=(*pImpTab)[oldStateSeq][_prop];
2071 pLevState->state=GET_STATE(cell); /* isolate the new state */
2072 actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */
2073 addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES];
2074
2075 if(actionSeq) {
2076 switch(actionSeq) {
2077 case 1: /* init ON seq */
2078 pLevState->startON=start0;
b75a7d8f 2079 break;
b75a7d8f 2080
73c04bcf
A
2081 case 2: /* prepend ON seq to current seq */
2082 start=pLevState->startON;
2083 break;
b75a7d8f 2084
57a6839d
A
2085 case 3: /* EN/AN after R+ON */
2086 level=pLevState->runLevel+1;
2087 setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level);
2088 break;
2089
2090 case 4: /* EN/AN before R for NUMBERS_SPECIAL */
2091 level=pLevState->runLevel+2;
2092 setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level);
2093 break;
2094
2095 case 5: /* L or S after possible relevant EN/AN */
73c04bcf
A
2096 /* check if we had EN after R/AL */
2097 if (pLevState->startL2EN >= 0) {
2098 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
b75a7d8f 2099 }
73c04bcf
A
2100 pLevState->startL2EN=-1; /* not within previous if since could also be -2 */
2101 /* check if we had any relevant EN/AN after R/AL */
2102 pInsertPoints=&(pBiDi->insertPoints);
2103 if ((pInsertPoints->capacity == 0) ||
2104 (pInsertPoints->size <= pInsertPoints->confirmed))
2105 {
2106 /* nothing, just clean up */
2107 pLevState->lastStrongRTL=-1;
2108 /* check if we have a pending conditional segment */
2109 level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES];
2110 if ((level & 1) && (pLevState->startON > 0)) { /* after ON */
2111 start=pLevState->startON; /* reset to basic run level */
b75a7d8f 2112 }
46f4442e 2113 if (_prop == DirProp_S) /* add LRM before S */
73c04bcf
A
2114 {
2115 addPoint(pBiDi, start0, LRM_BEFORE);
2116 pInsertPoints->confirmed=pInsertPoints->size;
b75a7d8f 2117 }
73c04bcf 2118 break;
b75a7d8f 2119 }
73c04bcf
A
2120 /* reset previous RTL cont to level for LTR text */
2121 for (k=pLevState->lastStrongRTL+1; k<start0; k++)
2122 {
2123 /* reset odd level, leave runLevel+2 as is */
2124 levels[k]=(levels[k] - 2) & ~1;
b75a7d8f 2125 }
73c04bcf
A
2126 /* mark insert points as confirmed */
2127 pInsertPoints->confirmed=pInsertPoints->size;
2128 pLevState->lastStrongRTL=-1;
46f4442e 2129 if (_prop == DirProp_S) /* add LRM before S */
73c04bcf
A
2130 {
2131 addPoint(pBiDi, start0, LRM_BEFORE);
2132 pInsertPoints->confirmed=pInsertPoints->size;
b75a7d8f 2133 }
73c04bcf 2134 break;
b75a7d8f 2135
57a6839d 2136 case 6: /* R/AL after possible relevant EN/AN */
73c04bcf
A
2137 /* just clean up */
2138 pInsertPoints=&(pBiDi->insertPoints);
2139 if (pInsertPoints->capacity > 0)
2140 /* remove all non confirmed insert points */
2141 pInsertPoints->size=pInsertPoints->confirmed;
2142 pLevState->startON=-1;
2143 pLevState->startL2EN=-1;
2144 pLevState->lastStrongRTL=limit - 1;
2145 break;
2146
57a6839d 2147 case 7: /* EN/AN after R/AL + possible cont */
73c04bcf 2148 /* check for real AN */
57a6839d 2149 if ((_prop == DirProp_AN) && (pBiDi->dirProps[start0] == AN) &&
73c04bcf
A
2150 (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))
2151 {
2152 /* real AN */
2153 if (pLevState->startL2EN == -1) /* if no relevant EN already found */
2154 {
2155 /* just note the righmost digit as a strong RTL */
2156 pLevState->lastStrongRTL=limit - 1;
2157 break;
b75a7d8f 2158 }
73c04bcf
A
2159 if (pLevState->startL2EN >= 0) /* after EN, no AN */
2160 {
2161 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
2162 pLevState->startL2EN=-2;
2163 }
2164 /* note AN */
2165 addPoint(pBiDi, start0, LRM_BEFORE);
2166 break;
2167 }
2168 /* if first EN/AN after R/AL */
2169 if (pLevState->startL2EN == -1) {
2170 pLevState->startL2EN=start0;
b75a7d8f 2171 }
73c04bcf 2172 break;
b75a7d8f 2173
57a6839d 2174 case 8: /* note location of latest R/AL */
73c04bcf
A
2175 pLevState->lastStrongRTL=limit - 1;
2176 pLevState->startON=-1;
b75a7d8f 2177 break;
73c04bcf 2178
57a6839d 2179 case 9: /* L after R+ON/EN/AN */
73c04bcf
A
2180 /* include possible adjacent number on the left */
2181 for (k=start0-1; k>=0 && !(levels[k]&1); k--);
2182 if(k>=0) {
2183 addPoint(pBiDi, k, RLM_BEFORE); /* add RLM before */
2184 pInsertPoints=&(pBiDi->insertPoints);
2185 pInsertPoints->confirmed=pInsertPoints->size; /* confirm it */
2186 }
2187 pLevState->startON=start0;
b75a7d8f 2188 break;
73c04bcf 2189
57a6839d 2190 case 10: /* AN after L */
73c04bcf
A
2191 /* AN numbers between L text on both sides may be trouble. */
2192 /* tentatively bracket with LRMs; will be confirmed if followed by L */
2193 addPoint(pBiDi, start0, LRM_BEFORE); /* add LRM before */
2194 addPoint(pBiDi, start0, LRM_AFTER); /* add LRM after */
b75a7d8f 2195 break;
b75a7d8f 2196
57a6839d 2197 case 11: /* R after L+ON/EN/AN */
73c04bcf
A
2198 /* false alert, infirm LRMs around previous AN */
2199 pInsertPoints=&(pBiDi->insertPoints);
2200 pInsertPoints->size=pInsertPoints->confirmed;
46f4442e 2201 if (_prop == DirProp_S) /* add RLM before S */
73c04bcf
A
2202 {
2203 addPoint(pBiDi, start0, RLM_BEFORE);
2204 pInsertPoints->confirmed=pInsertPoints->size;
2205 }
2206 break;
b75a7d8f 2207
57a6839d 2208 case 12: /* L after L+ON/AN */
73c04bcf
A
2209 level=pLevState->runLevel + addLevel;
2210 for(k=pLevState->startON; k<start0; k++) {
2211 if (levels[k]<level)
2212 levels[k]=level;
b75a7d8f 2213 }
73c04bcf
A
2214 pInsertPoints=&(pBiDi->insertPoints);
2215 pInsertPoints->confirmed=pInsertPoints->size; /* confirm inserts */
2216 pLevState->startON=start0;
2217 break;
2218
57a6839d 2219 case 13: /* L after L+ON+EN/AN/ON */
73c04bcf
A
2220 level=pLevState->runLevel;
2221 for(k=start0-1; k>=pLevState->startON; k--) {
2222 if(levels[k]==level+3) {
2223 while(levels[k]==level+3) {
2224 levels[k--]-=2;
b75a7d8f 2225 }
73c04bcf
A
2226 while(levels[k]==level) {
2227 k--;
b75a7d8f
A
2228 }
2229 }
73c04bcf
A
2230 if(levels[k]==level+2) {
2231 levels[k]=level;
2232 continue;
b75a7d8f 2233 }
73c04bcf 2234 levels[k]=level+1;
b75a7d8f 2235 }
73c04bcf 2236 break;
b75a7d8f 2237
57a6839d 2238 case 14: /* R after L+ON+EN/AN/ON */
73c04bcf
A
2239 level=pLevState->runLevel+1;
2240 for(k=start0-1; k>=pLevState->startON; k--) {
2241 if(levels[k]>level) {
2242 levels[k]-=2;
b75a7d8f 2243 }
b75a7d8f 2244 }
73c04bcf 2245 break;
b75a7d8f 2246
73c04bcf 2247 default: /* we should never get here */
46f4442e 2248 U_ASSERT(FALSE);
73c04bcf 2249 break;
b75a7d8f
A
2250 }
2251 }
73c04bcf
A
2252 if((addLevel) || (start < start0)) {
2253 level=pLevState->runLevel + addLevel;
57a6839d
A
2254 if(start>=pLevState->runStart) {
2255 for(k=start; k<limit; k++) {
2256 levels[k]=level;
2257 }
2258 } else {
2259 setLevelsOutsideIsolates(pBiDi, start, limit, level);
73c04bcf
A
2260 }
2261 }
2262}
b75a7d8f 2263
57a6839d
A
2264/**
2265 * Returns the directionality of the last strong character at the end of the prologue, if any.
2266 * Requires prologue!=null.
2267 */
4388f060
A
2268static DirProp
2269lastL_R_AL(UBiDi *pBiDi) {
4388f060
A
2270 const UChar *text=pBiDi->prologue;
2271 int32_t length=pBiDi->proLength;
2272 int32_t i;
2273 UChar32 uchar;
2274 DirProp dirProp;
2275 for(i=length; i>0; ) {
2276 /* i is decremented by U16_PREV */
2277 U16_PREV(text, 0, i, uchar);
2278 dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
2279 if(dirProp==L) {
2280 return DirProp_L;
2281 }
2282 if(dirProp==R || dirProp==AL) {
2283 return DirProp_R;
2284 }
2285 if(dirProp==B) {
2286 return DirProp_ON;
2287 }
2288 }
2289 return DirProp_ON;
2290}
2291
57a6839d
A
2292/**
2293 * Returns the directionality of the first strong character, or digit, in the epilogue, if any.
2294 * Requires epilogue!=null.
2295 */
4388f060
A
2296static DirProp
2297firstL_R_AL_EN_AN(UBiDi *pBiDi) {
4388f060
A
2298 const UChar *text=pBiDi->epilogue;
2299 int32_t length=pBiDi->epiLength;
2300 int32_t i;
2301 UChar32 uchar;
2302 DirProp dirProp;
2303 for(i=0; i<length; ) {
2304 /* i is incremented by U16_NEXT */
2305 U16_NEXT(text, i, length, uchar);
2306 dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
2307 if(dirProp==L) {
2308 return DirProp_L;
2309 }
2310 if(dirProp==R || dirProp==AL) {
2311 return DirProp_R;
2312 }
2313 if(dirProp==EN) {
2314 return DirProp_EN;
2315 }
2316 if(dirProp==AN) {
2317 return DirProp_AN;
2318 }
2319 }
2320 return DirProp_ON;
2321}
2322
73c04bcf
A
2323static void
2324resolveImplicitLevels(UBiDi *pBiDi,
2325 int32_t start, int32_t limit,
2326 DirProp sor, DirProp eor) {
2327 const DirProp *dirProps=pBiDi->dirProps;
2ca993e8 2328 uint16_t *dirInsert = pBiDi->dirInsert; /* may be NULL */
57a6839d 2329 DirProp dirProp;
2ca993e8
A
2330 int32_t dirInsertValue;
2331 int8_t dirInsertIndex; /* position within dirInsertValue, if any */
73c04bcf
A
2332 LevState levState;
2333 int32_t i, start1, start2;
57a6839d 2334 uint16_t oldStateImp, stateImp, actionImp;
73c04bcf
A
2335 uint8_t gprop, resProp, cell;
2336 UBool inverseRTL;
2337 DirProp nextStrongProp=R;
2338 int32_t nextStrongPos=-1;
2339
2340 /* check for RTL inverse BiDi mode */
2341 /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to
2342 * loop on the text characters from end to start.
2343 * This would need a different properties state table (at least different
2344 * actions) and different levels state tables (maybe very similar to the
2345 * LTR corresponding ones.
2346 */
46f4442e
A
2347 inverseRTL=(UBool)
2348 ((start<pBiDi->lastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) &&
2349 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
2350 pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL));
57a6839d
A
2351
2352 /* initialize for property and levels state tables */
73c04bcf
A
2353 levState.startL2EN=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
2354 levState.lastStrongRTL=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
57a6839d 2355 levState.runStart=start;
73c04bcf 2356 levState.runLevel=pBiDi->levels[start];
46f4442e
A
2357 levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1];
2358 levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1];
4388f060
A
2359 if(start==0 && pBiDi->proLength>0) {
2360 DirProp lastStrong=lastL_R_AL(pBiDi);
2361 if(lastStrong!=DirProp_ON) {
2362 sor=lastStrong;
2363 }
2364 }
57a6839d
A
2365 /* The isolates[] entries contain enough information to
2366 resume the bidi algorithm in the same state as it was
2367 when it was interrupted by an isolate sequence. */
2ca993e8
A
2368 dirInsertValue = 0;
2369 if (dirInsert != NULL) {
2370 dirInsertValue = dirInsert[start];
2371 while (dirInsertValue > 0) {
2372 if ((dirInsertValue & 0x000F) == Insert_PDI) {
2373 break;
2374 }
2375 dirInsertValue >>= 4;
2376 }
2377 }
2378 if((dirProps[start]==PDI || dirInsertValue>0) && pBiDi->isolateCount >= 0) {
57a6839d
A
2379 levState.startON=pBiDi->isolates[pBiDi->isolateCount].startON;
2380 start1=pBiDi->isolates[pBiDi->isolateCount].start1;
2381 stateImp=pBiDi->isolates[pBiDi->isolateCount].stateImp;
2382 levState.state=pBiDi->isolates[pBiDi->isolateCount].state;
2383 pBiDi->isolateCount--;
73c04bcf 2384 } else {
57a6839d
A
2385 levState.startON=-1;
2386 start1=start;
2387 if(dirProps[start]==NSM)
2388 stateImp = 1 + sor;
2389 else
2390 stateImp=0;
2391 levState.state=0;
2392 processPropertySeq(pBiDi, &levState, sor, start, start);
73c04bcf 2393 }
57a6839d 2394 start2=start; /* to make Java compiler happy */
73c04bcf
A
2395
2396 for(i=start; i<=limit; i++) {
2397 if(i>=limit) {
57a6839d 2398 int32_t k;
2ca993e8
A
2399 dirInsertValue = 0;
2400 for(k=limit-1; k>start && dirInsertValue <= 0; k--) {
2401 dirProp = dirProps[k];
2402 if ((DIRPROP_FLAG(dirProp)&MASK_BN_EXPLICIT) == 0) {
2403 break;
2404 }
2405 dirProp = ON;
2406 if (dirInsert != NULL) {
2407 dirInsertValue = dirInsert[k];
2408 while (dirInsertValue > 0) {
2409 dirProp = (DirProp)stdDirFromInsertDir[dirInsertValue & 0x000F];
2410 if ((DIRPROP_FLAG(dirProp)&MASK_BN_EXPLICIT) == 0) {
2411 break;
2412 }
2413 dirInsertValue >>= 4;
2414 }
2415 }
2416 }
2417 if (k == start) {
2418 dirProp = dirProps[k];
2419 }
57a6839d
A
2420 if(dirProp==LRI || dirProp==RLI)
2421 break; /* no forced closing for sequence ending with LRI/RLI */
73c04bcf 2422 gprop=eor;
b75a7d8f 2423 } else {
73c04bcf 2424 DirProp prop, prop1;
57a6839d 2425 prop=dirProps[i];
b331163b
A
2426 if(prop==B) {
2427 pBiDi->isolateCount=-1; /* current isolates stack entry == none */
2428 }
73c04bcf
A
2429 if(inverseRTL) {
2430 if(prop==AL) {
2431 /* AL before EN does not make it AN */
2432 prop=R;
2433 } else if(prop==EN) {
2434 if(nextStrongPos<=i) {
2435 /* look for next strong char (L/R/AL) */
2436 int32_t j;
2437 nextStrongProp=R; /* set default */
2438 nextStrongPos=limit;
2439 for(j=i+1; j<limit; j++) {
57a6839d 2440 prop1=dirProps[j];
73c04bcf
A
2441 if(prop1==L || prop1==R || prop1==AL) {
2442 nextStrongProp=prop1;
2443 nextStrongPos=j;
2444 break;
2445 }
2446 }
2447 }
2448 if(nextStrongProp==AL) {
2449 prop=AN;
2450 }
b75a7d8f
A
2451 }
2452 }
73c04bcf 2453 gprop=groupProp[prop];
b75a7d8f 2454 }
73c04bcf
A
2455 oldStateImp=stateImp;
2456 cell=impTabProps[oldStateImp][gprop];
2457 stateImp=GET_STATEPROPS(cell); /* isolate the new state */
2458 actionImp=GET_ACTIONPROPS(cell); /* isolate the action */
2459 if((i==limit) && (actionImp==0)) {
2460 /* there is an unprocessed sequence if its property == eor */
2461 actionImp=1; /* process the last sequence */
2462 }
2463 if(actionImp) {
2464 resProp=impTabProps[oldStateImp][IMPTABPROPS_RES];
2465 switch(actionImp) {
2466 case 1: /* process current seq1, init new seq1 */
2467 processPropertySeq(pBiDi, &levState, resProp, start1, i);
2468 start1=i;
2469 break;
2470 case 2: /* init new seq2 */
2471 start2=i;
2472 break;
2473 case 3: /* process seq1, process seq2, init new seq1 */
2474 processPropertySeq(pBiDi, &levState, resProp, start1, start2);
46f4442e 2475 processPropertySeq(pBiDi, &levState, DirProp_ON, start2, i);
73c04bcf
A
2476 start1=i;
2477 break;
2478 case 4: /* process seq1, set seq1=seq2, init new seq2 */
2479 processPropertySeq(pBiDi, &levState, resProp, start1, start2);
2480 start1=start2;
2481 start2=i;
2482 break;
2483 default: /* we should never get here */
46f4442e 2484 U_ASSERT(FALSE);
73c04bcf
A
2485 break;
2486 }
b75a7d8f
A
2487 }
2488 }
57a6839d 2489
73c04bcf 2490 /* flush possible pending sequence, e.g. ON */
4388f060
A
2491 if(limit==pBiDi->length && pBiDi->epiLength>0) {
2492 DirProp firstStrong=firstL_R_AL_EN_AN(pBiDi);
2493 if(firstStrong!=DirProp_ON) {
2494 eor=firstStrong;
2495 }
2496 }
57a6839d
A
2497
2498 /* look for the last char not a BN or LRE/RLE/LRO/RLO/PDF */
2ca993e8
A
2499 dirInsertValue = 0;
2500 for(i=limit-1; i>start && dirInsertValue <= 0; i--) {
2501 dirProp=dirProps[i];
2502 if ((DIRPROP_FLAG(dirProp)&MASK_BN_EXPLICIT) == 0) {
2503 break;
2504 }
2505 dirProp = ON;
2506 if (dirInsert != NULL) {
2507 dirInsertValue = dirInsert[i];
2508 while (dirInsertValue > 0) {
2509 dirProp = (DirProp)stdDirFromInsertDir[dirInsertValue & 0x000F];
2510 if ((DIRPROP_FLAG(dirProp)&MASK_BN_EXPLICIT) == 0) {
2511 break;
2512 }
2513 dirInsertValue >>= 4;
2514 }
2515 }
2516 }
2517 if (i == start) {
2518 dirProp=dirProps[i];
2519 }
57a6839d
A
2520 if((dirProp==LRI || dirProp==RLI) && limit<pBiDi->length) {
2521 pBiDi->isolateCount++;
2522 pBiDi->isolates[pBiDi->isolateCount].stateImp=stateImp;
2523 pBiDi->isolates[pBiDi->isolateCount].state=levState.state;
2524 pBiDi->isolates[pBiDi->isolateCount].start1=start1;
2525 pBiDi->isolates[pBiDi->isolateCount].startON=levState.startON;
2526 }
2527 else
2528 processPropertySeq(pBiDi, &levState, eor, limit, limit);
b75a7d8f
A
2529}
2530
2531/* perform (L1) and (X9) ---------------------------------------------------- */
2532
2533/*
2534 * Reset the embedding levels for some non-graphic characters (L1).
2535 * This function also sets appropriate levels for BN, and
2536 * explicit embedding types that are supposed to have been removed
2537 * from the paragraph in (X9).
2538 */
2539static void
2540adjustWSLevels(UBiDi *pBiDi) {
2541 const DirProp *dirProps=pBiDi->dirProps;
2542 UBiDiLevel *levels=pBiDi->levels;
2543 int32_t i;
2544
2545 if(pBiDi->flags&MASK_WS) {
73c04bcf 2546 UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR;
b75a7d8f
A
2547 Flags flag;
2548
2549 i=pBiDi->trailingWSStart;
2550 while(i>0) {
2551 /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
57a6839d 2552 while(i>0 && (flag=DIRPROP_FLAG(dirProps[--i]))&MASK_WS) {
73c04bcf
A
2553 if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
2554 levels[i]=0;
2555 } else {
2556 levels[i]=GET_PARALEVEL(pBiDi, i);
2557 }
b75a7d8f
A
2558 }
2559
2560 /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
2561 /* here, i+1 is guaranteed to be <length */
2562 while(i>0) {
57a6839d 2563 flag=DIRPROP_FLAG(dirProps[--i]);
b75a7d8f
A
2564 if(flag&MASK_BN_EXPLICIT) {
2565 levels[i]=levels[i+1];
73c04bcf
A
2566 } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
2567 levels[i]=0;
2568 break;
b75a7d8f 2569 } else if(flag&MASK_B_S) {
73c04bcf 2570 levels[i]=GET_PARALEVEL(pBiDi, i);
b75a7d8f
A
2571 break;
2572 }
2573 }
2574 }
2575 }
2576}
2577
51004dcb 2578U_CAPI void U_EXPORT2
4388f060
A
2579ubidi_setContext(UBiDi *pBiDi,
2580 const UChar *prologue, int32_t proLength,
2581 const UChar *epilogue, int32_t epiLength,
2582 UErrorCode *pErrorCode) {
2583 /* check the argument values */
2584 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2585 if(pBiDi==NULL || proLength<-1 || epiLength<-1 ||
2586 (prologue==NULL && proLength!=0) || (epilogue==NULL && epiLength!=0)) {
2587 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2588 return;
2589 }
2590
2591 if(proLength==-1) {
2592 pBiDi->proLength=u_strlen(prologue);
2593 } else {
2594 pBiDi->proLength=proLength;
2595 }
2596 if(epiLength==-1) {
2597 pBiDi->epiLength=u_strlen(epilogue);
2598 } else {
2599 pBiDi->epiLength=epiLength;
2600 }
2601 pBiDi->prologue=prologue;
2602 pBiDi->epilogue=epilogue;
2603}
2604
2605static void
2606setParaSuccess(UBiDi *pBiDi) {
2607 pBiDi->proLength=0; /* forget the last context */
2608 pBiDi->epiLength=0;
2609 pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */
2610}
2611
73c04bcf
A
2612#define BIDI_MIN(x, y) ((x)<(y) ? (x) : (y))
2613#define BIDI_ABS(x) ((x)>=0 ? (x) : (-(x)))
57a6839d 2614
73c04bcf
A
2615static void
2616setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length,
2617 UBiDiLevel paraLevel, UErrorCode *pErrorCode) {
f3c0d7a5 2618 int32_t *runsOnlyMemory = NULL;
73c04bcf
A
2619 int32_t *visualMap;
2620 UChar *visualText;
46f4442e 2621 int32_t saveLength, saveTrailingWSStart;
73c04bcf
A
2622 const UBiDiLevel *levels;
2623 UBiDiLevel *saveLevels;
46f4442e
A
2624 UBiDiDirection saveDirection;
2625 UBool saveMayAllocateText;
73c04bcf
A
2626 Run *runs;
2627 int32_t visualLength, i, j, visualStart, logicalStart,
2628 runCount, runLength, addedRuns, insertRemove,
2629 start, limit, step, indexOddBit, logicalPos,
729e4ab9 2630 index0, index1;
73c04bcf
A
2631 uint32_t saveOptions;
2632
2633 pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT;
2634 if(length==0) {
2635 ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
2636 goto cleanup3;
2637 }
2638 /* obtain memory for mapping table and visual text */
f3c0d7a5 2639 runsOnlyMemory=static_cast<int32_t *>(uprv_malloc(length*(sizeof(int32_t)+sizeof(UChar)+sizeof(UBiDiLevel))));
73c04bcf
A
2640 if(runsOnlyMemory==NULL) {
2641 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2642 goto cleanup3;
2643 }
2644 visualMap=runsOnlyMemory;
2645 visualText=(UChar *)&visualMap[length];
2646 saveLevels=(UBiDiLevel *)&visualText[length];
2647 saveOptions=pBiDi->reorderingOptions;
2648 if(saveOptions & UBIDI_OPTION_INSERT_MARKS) {
2649 pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
2650 pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS;
2651 }
46f4442e 2652 paraLevel&=1; /* accept only 0 or 1 */
73c04bcf 2653 ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
46f4442e
A
2654 if(U_FAILURE(*pErrorCode)) {
2655 goto cleanup3;
2656 }
2657 /* we cannot access directly pBiDi->levels since it is not yet set if
2658 * direction is not MIXED
2659 */
73c04bcf 2660 levels=ubidi_getLevels(pBiDi, pErrorCode);
a62d09fc 2661 uprv_memcpy(saveLevels, levels, (size_t)pBiDi->length*sizeof(UBiDiLevel));
46f4442e
A
2662 saveTrailingWSStart=pBiDi->trailingWSStart;
2663 saveLength=pBiDi->length;
2664 saveDirection=pBiDi->direction;
73c04bcf
A
2665
2666 /* FOOD FOR THOUGHT: instead of writing the visual text, we could use
2667 * the visual map and the dirProps array to drive the second call
2668 * to ubidi_setPara (but must make provision for possible removal of
2669 * BiDi controls. Alternatively, only use the dirProps array via
2670 * customized classifier callback.
2671 */
2672 visualLength=ubidi_writeReordered(pBiDi, visualText, length,
2673 UBIDI_DO_MIRRORING, pErrorCode);
73c04bcf
A
2674 ubidi_getVisualMap(pBiDi, visualMap, pErrorCode);
2675 if(U_FAILURE(*pErrorCode)) {
2676 goto cleanup2;
2677 }
46f4442e 2678 pBiDi->reorderingOptions=saveOptions;
73c04bcf
A
2679
2680 pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT;
46f4442e
A
2681 paraLevel^=1;
2682 /* Because what we did with reorderingOptions, visualText may be shorter
2683 * than the original text. But we don't want the levels memory to be
2684 * reallocated shorter than the original length, since we need to restore
2685 * the levels as after the first call to ubidi_setpara() before returning.
2686 * We will force mayAllocateText to FALSE before the second call to
2687 * ubidi_setpara(), and will restore it afterwards.
2688 */
2689 saveMayAllocateText=pBiDi->mayAllocateText;
2690 pBiDi->mayAllocateText=FALSE;
73c04bcf 2691 ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode);
46f4442e
A
2692 pBiDi->mayAllocateText=saveMayAllocateText;
2693 ubidi_getRuns(pBiDi, pErrorCode);
73c04bcf
A
2694 if(U_FAILURE(*pErrorCode)) {
2695 goto cleanup1;
2696 }
73c04bcf
A
2697 /* check if some runs must be split, count how many splits */
2698 addedRuns=0;
2699 runCount=pBiDi->runCount;
2700 runs=pBiDi->runs;
2701 visualStart=0;
2702 for(i=0; i<runCount; i++, visualStart+=runLength) {
2703 runLength=runs[i].visualLimit-visualStart;
2704 if(runLength<2) {
2705 continue;
2706 }
2707 logicalStart=GET_INDEX(runs[i].logicalStart);
2708 for(j=logicalStart+1; j<logicalStart+runLength; j++) {
729e4ab9 2709 index0=visualMap[j];
73c04bcf 2710 index1=visualMap[j-1];
729e4ab9 2711 if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
73c04bcf
A
2712 addedRuns++;
2713 }
2714 }
2715 }
2716 if(addedRuns) {
2717 if(getRunsMemory(pBiDi, runCount+addedRuns)) {
2718 if(runCount==1) {
2719 /* because we switch from UBiDi.simpleRuns to UBiDi.runs */
2720 pBiDi->runsMemory[0]=runs[0];
2721 }
2722 runs=pBiDi->runs=pBiDi->runsMemory;
2723 pBiDi->runCount+=addedRuns;
2724 } else {
2725 goto cleanup1;
2726 }
2727 }
2728 /* split runs which are not consecutive in source text */
2729 for(i=runCount-1; i>=0; i--) {
2730 runLength= i==0 ? runs[0].visualLimit :
2731 runs[i].visualLimit-runs[i-1].visualLimit;
2732 logicalStart=runs[i].logicalStart;
2733 indexOddBit=GET_ODD_BIT(logicalStart);
2734 logicalStart=GET_INDEX(logicalStart);
2735 if(runLength<2) {
2736 if(addedRuns) {
2737 runs[i+addedRuns]=runs[i];
2738 }
2739 logicalPos=visualMap[logicalStart];
2740 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2741 saveLevels[logicalPos]^indexOddBit);
2742 continue;
2743 }
2744 if(indexOddBit) {
2745 start=logicalStart;
2746 limit=logicalStart+runLength-1;
2747 step=1;
2748 } else {
2749 start=logicalStart+runLength-1;
2750 limit=logicalStart;
2751 step=-1;
2752 }
2753 for(j=start; j!=limit; j+=step) {
729e4ab9 2754 index0=visualMap[j];
73c04bcf 2755 index1=visualMap[j+step];
729e4ab9
A
2756 if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
2757 logicalPos=BIDI_MIN(visualMap[start], index0);
73c04bcf
A
2758 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2759 saveLevels[logicalPos]^indexOddBit);
2760 runs[i+addedRuns].visualLimit=runs[i].visualLimit;
2761 runs[i].visualLimit-=BIDI_ABS(j-start)+1;
2762 insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER);
2763 runs[i+addedRuns].insertRemove=insertRemove;
2764 runs[i].insertRemove&=~insertRemove;
2765 start=j+step;
2766 addedRuns--;
2767 }
2768 }
2769 if(addedRuns) {
2770 runs[i+addedRuns]=runs[i];
2771 }
2772 logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]);
2773 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2774 saveLevels[logicalPos]^indexOddBit);
2775 }
2776
2777 cleanup1:
2778 /* restore initial paraLevel */
2779 pBiDi->paraLevel^=1;
2780 cleanup2:
2781 /* restore real text */
2782 pBiDi->text=text;
46f4442e
A
2783 pBiDi->length=saveLength;
2784 pBiDi->originalLength=length;
2785 pBiDi->direction=saveDirection;
2786 /* the saved levels should never excess levelsSize, but we check anyway */
2787 if(saveLength>pBiDi->levelsSize) {
2788 saveLength=pBiDi->levelsSize;
2789 }
a62d09fc 2790 uprv_memcpy(pBiDi->levels, saveLevels, (size_t)saveLength*sizeof(UBiDiLevel));
46f4442e 2791 pBiDi->trailingWSStart=saveTrailingWSStart;
46f4442e
A
2792 if(pBiDi->runCount>1) {
2793 pBiDi->direction=UBIDI_MIXED;
2794 }
73c04bcf 2795 cleanup3:
b331163b
A
2796 /* free memory for mapping table and visual text */
2797 uprv_free(runsOnlyMemory);
2798
73c04bcf
A
2799 pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY;
2800}
2801
2ca993e8
A
2802/* -------------------------------------------------------------------------- */
2803/* internal proptotype */
2804
2805static void
2806ubidi_setParaInternal(UBiDi *pBiDi,
2807 const UChar *text, int32_t length,
2808 UBiDiLevel paraLevel,
2809 UBiDiLevel *embeddingLevels,
2810 const int32_t *offsets, int32_t offsetCount,
2811 const int32_t *controlStringIndices,
2812 const UChar * const * controlStrings,
2813 UErrorCode *pErrorCode);
2814
374ca955
A
2815/* ubidi_setPara ------------------------------------------------------------ */
2816
2817U_CAPI void U_EXPORT2
2818ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
2819 UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
2820 UErrorCode *pErrorCode) {
2ca993e8
A
2821 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2822 ubidi_setParaInternal(pBiDi, text, length, paraLevel,
2823 embeddingLevels,
2824 NULL, 0, NULL, NULL,
2825 pErrorCode);
2826}
2827
2828/* ubidi_setParaWithControls ------------------------------------------------ */
2829
2830U_CAPI void U_EXPORT2
2831ubidi_setParaWithControls(UBiDi *pBiDi,
2832 const UChar *text, int32_t length,
2833 UBiDiLevel paraLevel,
2834 const int32_t *offsets, int32_t offsetCount,
2835 const int32_t *controlStringIndices,
2836 const UChar * const * controlStrings,
2837 UErrorCode *pErrorCode) {
2838 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2839 /* check the argument values that are not already checked in ubidi_setParaInternal */
2840 if ( offsetCount < 0 || (offsetCount > 0 && (offsets == NULL || controlStrings == NULL)) ) {
2841 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2842 return;
2843 }
2844 ubidi_setParaInternal(pBiDi, text, length, paraLevel,
2845 NULL,
2846 offsets, offsetCount, controlStringIndices, controlStrings,
2847 pErrorCode);
2848}
2849
2850/* ubidi_setParaInternal ---------------------------------------------------- */
2851
2852void
2853ubidi_setParaInternal(UBiDi *pBiDi,
2854 const UChar *text, int32_t length,
2855 UBiDiLevel paraLevel,
2856 UBiDiLevel *embeddingLevels,
2857 const int32_t *offsets, int32_t offsetCount,
2858 const int32_t *controlStringIndices,
2859 const UChar * const * controlStrings,
2860 UErrorCode *pErrorCode) {
374ca955 2861 UBiDiDirection direction;
57a6839d 2862 DirProp *dirProps;
374ca955 2863
2ca993e8 2864 /* check the argument values (pErrorCode status alrecy checked before getting here) */
46f4442e
A
2865 if(pBiDi==NULL || text==NULL || length<-1 ||
2866 (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevel<UBIDI_DEFAULT_LTR)) {
374ca955
A
2867 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2868 return;
2869 }
2870
2871 if(length==-1) {
2872 length=u_strlen(text);
2873 }
2ca993e8
A
2874 if (offsetCount > 0 && pBiDi->reorderingMode > UBIDI_REORDER_GROUP_NUMBERS_WITH_R) {
2875 offsetCount = 0;
2876 }
374ca955 2877
73c04bcf
A
2878 /* special treatment for RUNS_ONLY mode */
2879 if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) {
2880 setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode);
2881 return;
2882 }
2883
374ca955 2884 /* initialize the UBiDi structure */
73c04bcf 2885 pBiDi->pParaBiDi=NULL; /* mark unfinished setPara */
374ca955 2886 pBiDi->text=text;
73c04bcf 2887 pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length;
374ca955 2888 pBiDi->paraLevel=paraLevel;
f3c0d7a5 2889 pBiDi->direction=(UBiDiDirection)(paraLevel&1);
73c04bcf 2890 pBiDi->paraCount=1;
374ca955 2891
2ca993e8 2892 pBiDi->dirInsert=NULL;
374ca955
A
2893 pBiDi->dirProps=NULL;
2894 pBiDi->levels=NULL;
2895 pBiDi->runs=NULL;
73c04bcf
A
2896 pBiDi->insertPoints.size=0; /* clean up from last call */
2897 pBiDi->insertPoints.confirmed=0; /* clean up from last call */
2898
2899 /*
2900 * Save the original paraLevel if contextual; otherwise, set to 0.
2901 */
57a6839d 2902 pBiDi->defaultParaLevel=IS_DEFAULT_LEVEL(paraLevel);
374ca955
A
2903
2904 if(length==0) {
2905 /*
2906 * For an empty paragraph, create a UBiDi object with the paraLevel and
2907 * the flags and the direction set but without allocating zero-length arrays.
2908 * There is nothing more to do.
2909 */
2910 if(IS_DEFAULT_LEVEL(paraLevel)) {
2911 pBiDi->paraLevel&=1;
73c04bcf 2912 pBiDi->defaultParaLevel=0;
374ca955 2913 }
57a6839d 2914 pBiDi->flags=DIRPROP_FLAG_LR(paraLevel);
374ca955 2915 pBiDi->runCount=0;
46f4442e 2916 pBiDi->paraCount=0;
4388f060 2917 setParaSuccess(pBiDi); /* mark successful setPara */
374ca955
A
2918 return;
2919 }
2920
2921 pBiDi->runCount=-1;
2922
57a6839d
A
2923 /* allocate paras memory */
2924 if(pBiDi->parasMemory)
2925 pBiDi->paras=pBiDi->parasMemory;
2926 else
2927 pBiDi->paras=pBiDi->simpleParas;
2928
2ca993e8
A
2929 /*
2930 * Get the inserted directional properties
2931 * if necessary.
2932 */
2933 if (offsetCount > 0) {
2934 if(getDirInsertMemory(pBiDi, length)) {
2935 pBiDi->dirInsert=pBiDi->dirInsertMemory;
2936 if(!getDirInsert(pBiDi, offsets, offsetCount, controlStringIndices, controlStrings)) {
2937 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2938 return;
2939 }
2940 } else {
2941 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2942 return;
2943 }
2944 }
2945
374ca955
A
2946 /*
2947 * Get the directional properties,
2948 * the flags bit-set, and
73c04bcf 2949 * determine the paragraph level if necessary.
374ca955
A
2950 */
2951 if(getDirPropsMemory(pBiDi, length)) {
2952 pBiDi->dirProps=pBiDi->dirPropsMemory;
57a6839d
A
2953 if(!getDirProps(pBiDi)) {
2954 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2955 return;
2956 }
374ca955
A
2957 } else {
2958 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2959 return;
2960 }
57a6839d 2961 dirProps=pBiDi->dirProps;
73c04bcf
A
2962 /* the processed length may have changed if UBIDI_OPTION_STREAMING */
2963 length= pBiDi->length;
2964 pBiDi->trailingWSStart=length; /* the levels[] will reflect the WS run */
374ca955
A
2965
2966 /* are explicit levels specified? */
2967 if(embeddingLevels==NULL) {
2968 /* no: determine explicit levels according to the (Xn) rules */\
2969 if(getLevelsMemory(pBiDi, length)) {
2970 pBiDi->levels=pBiDi->levelsMemory;
57a6839d
A
2971 direction=resolveExplicitLevels(pBiDi, pErrorCode);
2972 if(U_FAILURE(*pErrorCode)) {
2973 return;
2974 }
374ca955
A
2975 } else {
2976 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2977 return;
2978 }
2979 } else {
73c04bcf 2980 /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */
374ca955
A
2981 pBiDi->levels=embeddingLevels;
2982 direction=checkExplicitLevels(pBiDi, pErrorCode);
2983 if(U_FAILURE(*pErrorCode)) {
2984 return;
2985 }
2986 }
2987
57a6839d 2988 /* allocate isolate memory */
b331163b 2989 if(pBiDi->isolateCount<=SIMPLE_ISOLATES_COUNT)
57a6839d
A
2990 pBiDi->isolates=pBiDi->simpleIsolates;
2991 else
2992 if((int32_t)(pBiDi->isolateCount*sizeof(Isolate))<=pBiDi->isolatesSize)
2993 pBiDi->isolates=pBiDi->isolatesMemory;
2994 else {
2995 if(getInitialIsolatesMemory(pBiDi, pBiDi->isolateCount)) {
2996 pBiDi->isolates=pBiDi->isolatesMemory;
2997 } else {
2998 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2999 return;
3000 }
3001 }
3002 pBiDi->isolateCount=-1; /* current isolates stack entry == none */
3003
374ca955
A
3004 /*
3005 * The steps after (X9) in the UBiDi algorithm are performed only if
3006 * the paragraph text has mixed directionality!
3007 */
3008 pBiDi->direction=direction;
3009 switch(direction) {
3010 case UBIDI_LTR:
374ca955
A
3011 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
3012 pBiDi->trailingWSStart=0;
3013 break;
3014 case UBIDI_RTL:
374ca955
A
3015 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
3016 pBiDi->trailingWSStart=0;
3017 break;
3018 default:
73c04bcf
A
3019 /*
3020 * Choose the right implicit state table
3021 */
3022 switch(pBiDi->reorderingMode) {
3023 case UBIDI_REORDER_DEFAULT:
3024 pBiDi->pImpTabPair=&impTab_DEFAULT;
3025 break;
3026 case UBIDI_REORDER_NUMBERS_SPECIAL:
3027 pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL;
3028 break;
3029 case UBIDI_REORDER_GROUP_NUMBERS_WITH_R:
3030 pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R;
3031 break;
73c04bcf
A
3032 case UBIDI_REORDER_INVERSE_NUMBERS_AS_L:
3033 pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L;
3034 break;
3035 case UBIDI_REORDER_INVERSE_LIKE_DIRECT:
3036 if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
3037 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS;
3038 } else {
3039 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT;
3040 }
3041 break;
3042 case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL:
3043 if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
3044 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS;
3045 } else {
3046 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL;
3047 }
3048 break;
3049 default:
46f4442e
A
3050 /* we should never get here */
3051 U_ASSERT(FALSE);
73c04bcf
A
3052 break;
3053 }
374ca955
A
3054 /*
3055 * If there are no external levels specified and there
3056 * are no significant explicit level codes in the text,
3057 * then we can treat the entire paragraph as one run.
3058 * Otherwise, we need to perform the following rules on runs of
3059 * the text with the same embedding levels. (X10)
3060 * "Significant" explicit level codes are ones that actually
3061 * affect non-BN characters.
3062 * Examples for "insignificant" ones are empty embeddings
3063 * LRE-PDF, LRE-RLE-PDF-PDF, etc.
3064 */
46f4442e
A
3065 if(embeddingLevels==NULL && pBiDi->paraCount<=1 &&
3066 !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) {
374ca955 3067 resolveImplicitLevels(pBiDi, 0, length,
73c04bcf
A
3068 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)),
3069 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1)));
374ca955
A
3070 } else {
3071 /* sor, eor: start and end types of same-level-run */
3072 UBiDiLevel *levels=pBiDi->levels;
3073 int32_t start, limit=0;
3074 UBiDiLevel level, nextLevel;
3075 DirProp sor, eor;
3076
3077 /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
73c04bcf 3078 level=GET_PARALEVEL(pBiDi, 0);
374ca955
A
3079 nextLevel=levels[0];
3080 if(level<nextLevel) {
3081 eor=GET_LR_FROM_LEVEL(nextLevel);
3082 } else {
3083 eor=GET_LR_FROM_LEVEL(level);
3084 }
3085
3086 do {
3087 /* determine start and limit of the run (end points just behind the run) */
3088
3089 /* the values for this run's start are the same as for the previous run's end */
374ca955
A
3090 start=limit;
3091 level=nextLevel;
57a6839d 3092 if((start>0) && (dirProps[start-1]==B)) {
73c04bcf
A
3093 /* except if this is a new paragraph, then set sor = para level */
3094 sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start));
3095 } else {
3096 sor=eor;
3097 }
374ca955
A
3098
3099 /* search for the limit of this run */
57a6839d
A
3100 while((++limit<length) &&
3101 ((levels[limit]==level) ||
3102 (DIRPROP_FLAG(dirProps[limit])&MASK_BN_EXPLICIT))) {}
374ca955
A
3103
3104 /* get the correct level of the next run */
3105 if(limit<length) {
3106 nextLevel=levels[limit];
3107 } else {
73c04bcf 3108 nextLevel=GET_PARALEVEL(pBiDi, length-1);
374ca955
A
3109 }
3110
3111 /* determine eor from max(level, nextLevel); sor is last run's eor */
57a6839d 3112 if(NO_OVERRIDE(level)<NO_OVERRIDE(nextLevel)) {
374ca955
A
3113 eor=GET_LR_FROM_LEVEL(nextLevel);
3114 } else {
3115 eor=GET_LR_FROM_LEVEL(level);
3116 }
3117
3118 /* if the run consists of overridden directional types, then there
3119 are no implicit types to be resolved */
3120 if(!(level&UBIDI_LEVEL_OVERRIDE)) {
3121 resolveImplicitLevels(pBiDi, start, limit, sor, eor);
3122 } else {
3123 /* remove the UBIDI_LEVEL_OVERRIDE flags */
3124 do {
3125 levels[start++]&=~UBIDI_LEVEL_OVERRIDE;
3126 } while(start<limit);
3127 }
3128 } while(limit<length);
3129 }
73c04bcf
A
3130 /* check if we got any memory shortage while adding insert points */
3131 if (U_FAILURE(pBiDi->insertPoints.errorCode))
3132 {
3133 *pErrorCode=pBiDi->insertPoints.errorCode;
3134 return;
3135 }
374ca955
A
3136 /* reset the embedding levels for some non-graphic characters (L1), (X9) */
3137 adjustWSLevels(pBiDi);
374ca955
A
3138 break;
3139 }
46f4442e
A
3140 /* add RLM for inverse Bidi with contextual orientation resolving
3141 * to RTL which would not round-trip otherwise
3142 */
3143 if((pBiDi->defaultParaLevel>0) &&
3144 (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) &&
3145 ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) ||
3146 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) {
3147 int32_t i, j, start, last;
57a6839d 3148 UBiDiLevel level;
46f4442e
A
3149 DirProp dirProp;
3150 for(i=0; i<pBiDi->paraCount; i++) {
57a6839d
A
3151 last=(pBiDi->paras[i].limit)-1;
3152 level=pBiDi->paras[i].level;
3153 if(level==0)
46f4442e 3154 continue; /* LTR paragraph */
57a6839d 3155 start= i==0 ? 0 : pBiDi->paras[i-1].limit;
46f4442e 3156 for(j=last; j>=start; j--) {
57a6839d 3157 dirProp=dirProps[j];
46f4442e
A
3158 if(dirProp==L) {
3159 if(j<last) {
57a6839d 3160 while(dirProps[last]==B) {
46f4442e
A
3161 last--;
3162 }
3163 }
3164 addPoint(pBiDi, last, RLM_BEFORE);
3165 break;
3166 }
3167 if(DIRPROP_FLAG(dirProp) & MASK_R_AL) {
3168 break;
3169 }
3170 }
3171 }
3172 }
3173
73c04bcf
A
3174 if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
3175 pBiDi->resultLength -= pBiDi->controlCount;
3176 } else {
3177 pBiDi->resultLength += pBiDi->insertPoints.size;
3178 }
4388f060 3179 setParaSuccess(pBiDi); /* mark successful setPara */
73c04bcf
A
3180}
3181
2ca993e8
A
3182/* -------------------------------------------------------------------------- */
3183
73c04bcf
A
3184U_CAPI void U_EXPORT2
3185ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) {
3186 if(pBiDi!=NULL) {
3187 pBiDi->orderParagraphsLTR=orderParagraphsLTR;
3188 }
3189}
3190
3191U_CAPI UBool U_EXPORT2
3192ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) {
3193 if(pBiDi!=NULL) {
3194 return pBiDi->orderParagraphsLTR;
3195 } else {
3196 return FALSE;
3197 }
374ca955 3198}
b75a7d8f
A
3199
3200U_CAPI UBiDiDirection U_EXPORT2
3201ubidi_getDirection(const UBiDi *pBiDi) {
73c04bcf 3202 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
b75a7d8f
A
3203 return pBiDi->direction;
3204 } else {
3205 return UBIDI_LTR;
3206 }
3207}
3208
3209U_CAPI const UChar * U_EXPORT2
3210ubidi_getText(const UBiDi *pBiDi) {
73c04bcf 3211 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
b75a7d8f
A
3212 return pBiDi->text;
3213 } else {
3214 return NULL;
3215 }
3216}
3217
3218U_CAPI int32_t U_EXPORT2
3219ubidi_getLength(const UBiDi *pBiDi) {
73c04bcf
A
3220 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
3221 return pBiDi->originalLength;
3222 } else {
3223 return 0;
3224 }
3225}
3226
3227U_CAPI int32_t U_EXPORT2
3228ubidi_getProcessedLength(const UBiDi *pBiDi) {
3229 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
b75a7d8f
A
3230 return pBiDi->length;
3231 } else {
3232 return 0;
3233 }
3234}
3235
73c04bcf
A
3236U_CAPI int32_t U_EXPORT2
3237ubidi_getResultLength(const UBiDi *pBiDi) {
3238 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
3239 return pBiDi->resultLength;
3240 } else {
3241 return 0;
3242 }
3243}
3244
3245/* paragraphs API functions ------------------------------------------------- */
3246
b75a7d8f
A
3247U_CAPI UBiDiLevel U_EXPORT2
3248ubidi_getParaLevel(const UBiDi *pBiDi) {
73c04bcf 3249 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
b75a7d8f
A
3250 return pBiDi->paraLevel;
3251 } else {
3252 return 0;
3253 }
3254}
3255
73c04bcf
A
3256U_CAPI int32_t U_EXPORT2
3257ubidi_countParagraphs(UBiDi *pBiDi) {
3258 if(!IS_VALID_PARA_OR_LINE(pBiDi)) {
3259 return 0;
3260 } else {
3261 return pBiDi->paraCount;
3262 }
3263}
b75a7d8f 3264
73c04bcf
A
3265U_CAPI void U_EXPORT2
3266ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
3267 int32_t *pParaStart, int32_t *pParaLimit,
3268 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
3269 int32_t paraStart;
b75a7d8f 3270
73c04bcf 3271 /* check the argument values */
46f4442e
A
3272 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
3273 RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode);
3274 RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode);
3275
73c04bcf
A
3276 pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */
3277 if(paraIndex) {
57a6839d 3278 paraStart=pBiDi->paras[paraIndex-1].limit;
73c04bcf
A
3279 } else {
3280 paraStart=0;
3281 }
3282 if(pParaStart!=NULL) {
3283 *pParaStart=paraStart;
3284 }
3285 if(pParaLimit!=NULL) {
57a6839d 3286 *pParaLimit=pBiDi->paras[paraIndex].limit;
73c04bcf
A
3287 }
3288 if(pParaLevel!=NULL) {
3289 *pParaLevel=GET_PARALEVEL(pBiDi, paraStart);
3290 }
73c04bcf 3291}
b75a7d8f 3292
73c04bcf
A
3293U_CAPI int32_t U_EXPORT2
3294ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex,
3295 int32_t *pParaStart, int32_t *pParaLimit,
3296 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
57a6839d 3297 int32_t paraIndex;
b75a7d8f 3298
73c04bcf
A
3299 /* check the argument values */
3300 /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */
46f4442e
A
3301 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
3302 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
73c04bcf 3303 pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */
46f4442e
A
3304 RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1);
3305
57a6839d 3306 for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex].limit; paraIndex++);
73c04bcf
A
3307 ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode);
3308 return paraIndex;
3309}
b75a7d8f 3310
73c04bcf
A
3311U_CAPI void U_EXPORT2
3312ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn,
3313 const void *newContext, UBiDiClassCallback **oldFn,
3314 const void **oldContext, UErrorCode *pErrorCode)
3315{
46f4442e
A
3316 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
3317 if(pBiDi==NULL) {
73c04bcf
A
3318 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
3319 return;
3320 }
3321 if( oldFn )
3322 {
3323 *oldFn = pBiDi->fnClassCallback;
3324 }
3325 if( oldContext )
3326 {
3327 *oldContext = pBiDi->coClassCallback;
3328 }
3329 pBiDi->fnClassCallback = newFn;
3330 pBiDi->coClassCallback = newContext;
3331}
b75a7d8f 3332
73c04bcf
A
3333U_CAPI void U_EXPORT2
3334ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context)
3335{
46f4442e
A
3336 if(pBiDi==NULL) {
3337 return;
3338 }
73c04bcf
A
3339 if( fn )
3340 {
3341 *fn = pBiDi->fnClassCallback;
3342 }
3343 if( context )
3344 {
3345 *context = pBiDi->coClassCallback;
3346 }
3347}
b75a7d8f 3348
73c04bcf
A
3349U_CAPI UCharDirection U_EXPORT2
3350ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c)
3351{
3352 UCharDirection dir;
b75a7d8f 3353
73c04bcf
A
3354 if( pBiDi->fnClassCallback == NULL ||
3355 (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT )
3356 {
0f5d89e8 3357 dir = ubidi_getClass(c);
57a6839d
A
3358 }
3359 if(dir >= U_CHAR_DIRECTION_COUNT) {
f3c0d7a5 3360 dir = (UCharDirection)ON;
b75a7d8f 3361 }
57a6839d 3362 return dir;
b75a7d8f 3363}