]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/ubidi.cpp
ICU-59173.0.1.tar.gz
[apple/icu.git] / icuSources / common / ubidi.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
73c04bcf 3/*
b75a7d8f
A
4******************************************************************************
5*
b331163b 6* Copyright (C) 1999-2015, International Business Machines
b75a7d8f
A
7* Corporation and others. All Rights Reserved.
8*
9******************************************************************************
10* file name: ubidi.c
f3c0d7a5 11* encoding: UTF-8
b75a7d8f
A
12* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 1999jul27
46f4442e 16* created by: Markus W. Scherer, updated by Matitiahu Allouche
57a6839d 17*
b75a7d8f
A
18*/
19
b75a7d8f
A
20#include "cmemory.h"
21#include "unicode/utypes.h"
22#include "unicode/ustring.h"
23#include "unicode/uchar.h"
24#include "unicode/ubidi.h"
4388f060 25#include "unicode/utf16.h"
73c04bcf 26#include "ubidi_props.h"
b75a7d8f 27#include "ubidiimp.h"
46f4442e 28#include "uassert.h"
b75a7d8f
A
29
30/*
31 * General implementation notes:
32 *
33 * Throughout the implementation, there are comments like (W2) that refer to
57a6839d
A
34 * rules of the BiDi algorithm, in this example to the second rule of the
35 * resolution of weak types.
b75a7d8f
A
36 *
37 * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
38 * character according to UTF-16, the second UChar gets the directional property of
39 * the entire character assigned, while the first one gets a BN, a boundary
40 * neutral, type, which is ignored by most of the algorithm according to
41 * rule (X9) and the implementation suggestions of the BiDi algorithm.
42 *
43 * Later, adjustWSLevels() will set the level for each BN to that of the
44 * following character (UChar), which results in surrogate pairs getting the
45 * same level on each of their surrogates.
46 *
47 * In a UTF-8 implementation, the same thing could be done: the last byte of
48 * a multi-byte sequence would get the "real" property, while all previous
49 * bytes of that sequence would get BN.
50 *
51 * It is not possible to assign all those parts of a character the same real
52 * property because this would fail in the resolution of weak types with rules
53 * that look at immediately surrounding types.
54 *
55 * As a related topic, this implementation does not remove Boundary Neutral
73c04bcf 56 * types from the input, but ignores them wherever this is relevant.
b75a7d8f
A
57 * For example, the loop for the resolution of the weak types reads
58 * types until it finds a non-BN.
59 * Also, explicit embedding codes are neither changed into BN nor removed.
60 * They are only treated the same way real BNs are.
61 * As stated before, adjustWSLevels() takes care of them at the end.
62 * For the purpose of conformance, the levels of all these codes
63 * do not matter.
64 *
57a6839d
A
65 * Note that this implementation modifies the dirProps
66 * after the initial setup, when applying X5c (replace FSI by LRI or RLI),
67 * X6, N0 (replace paired brackets by L or R).
b75a7d8f 68 *
57a6839d
A
69 * In this implementation, the resolution of weak types (W1 to W6),
70 * neutrals (N1 and N2), and the assignment of the resolved level (In)
b75a7d8f
A
71 * are all done in one single loop, in resolveImplicitLevels().
72 * Changes of dirProp values are done on the fly, without writing
73 * them back to the dirProps array.
74 *
75 *
76 * This implementation contains code that allows to bypass steps of the
77 * algorithm that are not needed on the specific paragraph
78 * in order to speed up the most common cases considerably,
79 * like text that is entirely LTR, or RTL text without numbers.
80 *
81 * Most of this is done by setting a bit for each directional property
82 * in a flags variable and later checking for whether there are
83 * any LTR characters or any RTL characters, or both, whether
84 * there are any explicit embedding codes, etc.
85 *
86 * If the (Xn) steps are performed, then the flags are re-evaluated,
87 * because they will then not contain the embedding codes any more
88 * and will be adjusted for override codes, so that subsequently
89 * more bypassing may be possible than what the initial flags suggested.
90 *
91 * If the text is not mixed-directional, then the
92 * algorithm steps for the weak type resolution are not performed,
93 * and all levels are set to the paragraph level.
94 *
95 * If there are no explicit embedding codes, then the (Xn) steps
96 * are not performed.
97 *
98 * If embedding levels are supplied as a parameter, then all
99 * explicit embedding codes are ignored, and the (Xn) steps
100 * are not performed.
101 *
102 * White Space types could get the level of the run they belong to,
103 * and are checked with a test of (flags&MASK_EMBEDDING) to
104 * consider if the paragraph direction should be considered in
105 * the flags variable.
106 *
107 * If there are no White Space types in the paragraph, then
108 * (L1) is not necessary in adjustWSLevels().
109 */
110
b75a7d8f
A
111/* to avoid some conditional statements, use tiny constant arrays */
112static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) };
113static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) };
114static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) };
115
116#define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
57a6839d
A
117#define DIRPROP_FLAG_E(level) flagE[(level)&1]
118#define DIRPROP_FLAG_O(level) flagO[(level)&1]
119
120#define DIR_FROM_STRONG(strong) ((strong)==L ? L : R)
121
122#define NO_OVERRIDE(level) ((level)&~UBIDI_LEVEL_OVERRIDE)
b75a7d8f
A
123
124/* UBiDi object management -------------------------------------------------- */
125
126U_CAPI UBiDi * U_EXPORT2
73c04bcf 127ubidi_open(void)
b75a7d8f
A
128{
129 UErrorCode errorCode=U_ZERO_ERROR;
130 return ubidi_openSized(0, 0, &errorCode);
131}
132
133U_CAPI UBiDi * U_EXPORT2
134ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) {
135 UBiDi *pBiDi;
136
137 /* check the argument values */
138 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
139 return NULL;
140 } else if(maxLength<0 || maxRunCount<0) {
141 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
142 return NULL; /* invalid arguments */
143 }
144
145 /* allocate memory for the object */
146 pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi));
147 if(pBiDi==NULL) {
148 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
149 return NULL;
150 }
151
152 /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */
153 uprv_memset(pBiDi, 0, sizeof(UBiDi));
154
73c04bcf 155 /* get BiDi properties */
729e4ab9 156 pBiDi->bdp=ubidi_getSingleton();
73c04bcf 157
b75a7d8f
A
158 /* allocate memory for arrays as requested */
159 if(maxLength>0) {
160 if( !getInitialDirPropsMemory(pBiDi, maxLength) ||
161 !getInitialLevelsMemory(pBiDi, maxLength)
162 ) {
163 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
164 }
165 } else {
166 pBiDi->mayAllocateText=TRUE;
167 }
168
169 if(maxRunCount>0) {
170 if(maxRunCount==1) {
171 /* use simpleRuns[] */
172 pBiDi->runsSize=sizeof(Run);
173 } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) {
174 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
175 }
176 } else {
177 pBiDi->mayAllocateRuns=TRUE;
178 }
179
180 if(U_SUCCESS(*pErrorCode)) {
181 return pBiDi;
182 } else {
183 ubidi_close(pBiDi);
184 return NULL;
185 }
186}
187
188/*
189 * We are allowed to allocate memory if memory==NULL or
190 * mayAllocate==TRUE for each array that we need.
46f4442e 191 * We also try to grow memory as needed if we
b75a7d8f
A
192 * allocate it.
193 *
194 * Assume sizeNeeded>0.
195 * If *pMemory!=NULL, then assume *pSize>0.
196 *
197 * ### this realloc() may unnecessarily copy the old data,
198 * which we know we don't need any more;
199 * is this the best way to do this??
200 */
201U_CFUNC UBool
46f4442e
A
202ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) {
203 void **pMemory = (void **)bidiMem;
b75a7d8f
A
204 /* check for existing memory */
205 if(*pMemory==NULL) {
206 /* we need to allocate memory */
207 if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) {
208 *pSize=sizeNeeded;
209 return TRUE;
210 } else {
211 return FALSE;
212 }
213 } else {
46f4442e
A
214 if(sizeNeeded<=*pSize) {
215 /* there is already enough memory */
216 return TRUE;
217 }
218 else if(!mayAllocate) {
b75a7d8f
A
219 /* not enough memory, and we must not allocate */
220 return FALSE;
46f4442e
A
221 } else {
222 /* we try to grow */
b75a7d8f 223 void *memory;
46f4442e
A
224 /* in most cases, we do not need the copy-old-data part of
225 * realloc, but it is needed when adding runs using getRunsMemory()
226 * in setParaRunsOnly()
227 */
b75a7d8f
A
228 if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) {
229 *pMemory=memory;
230 *pSize=sizeNeeded;
231 return TRUE;
232 } else {
233 /* we failed to grow */
234 return FALSE;
235 }
b75a7d8f
A
236 }
237 }
238}
239
240U_CAPI void U_EXPORT2
241ubidi_close(UBiDi *pBiDi) {
242 if(pBiDi!=NULL) {
73c04bcf 243 pBiDi->pParaBiDi=NULL; /* in case one tries to reuse this block */
2ca993e8
A
244 if(pBiDi->dirInsertMemory!=NULL) {
245 uprv_free(pBiDi->dirInsertMemory);
246 }
b75a7d8f
A
247 if(pBiDi->dirPropsMemory!=NULL) {
248 uprv_free(pBiDi->dirPropsMemory);
249 }
250 if(pBiDi->levelsMemory!=NULL) {
251 uprv_free(pBiDi->levelsMemory);
252 }
57a6839d
A
253 if(pBiDi->openingsMemory!=NULL) {
254 uprv_free(pBiDi->openingsMemory);
b75a7d8f 255 }
73c04bcf
A
256 if(pBiDi->parasMemory!=NULL) {
257 uprv_free(pBiDi->parasMemory);
258 }
57a6839d
A
259 if(pBiDi->runsMemory!=NULL) {
260 uprv_free(pBiDi->runsMemory);
261 }
262 if(pBiDi->isolatesMemory!=NULL) {
263 uprv_free(pBiDi->isolatesMemory);
264 }
73c04bcf
A
265 if(pBiDi->insertPoints.points!=NULL) {
266 uprv_free(pBiDi->insertPoints.points);
267 }
268
b75a7d8f
A
269 uprv_free(pBiDi);
270 }
271}
272
273/* set to approximate "inverse BiDi" ---------------------------------------- */
274
275U_CAPI void U_EXPORT2
276ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) {
277 if(pBiDi!=NULL) {
278 pBiDi->isInverse=isInverse;
73c04bcf
A
279 pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L
280 : UBIDI_REORDER_DEFAULT;
b75a7d8f
A
281 }
282}
283
284U_CAPI UBool U_EXPORT2
285ubidi_isInverse(UBiDi *pBiDi) {
286 if(pBiDi!=NULL) {
287 return pBiDi->isInverse;
288 } else {
289 return FALSE;
290 }
291}
292
73c04bcf
A
293/* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
294 * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre
295 * concept of RUNS_ONLY which is a double operation.
296 * It could be advantageous to divide this into 3 concepts:
297 * a) Operation: direct / inverse / RUNS_ONLY
46f4442e 298 * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R
73c04bcf
A
299 * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
300 * This would allow combinations not possible today like RUNS_ONLY with
301 * NUMBERS_SPECIAL.
302 * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
303 * REMOVE_CONTROLS for the inverse step.
304 * Not all combinations would be supported, and probably not all do make sense.
305 * This would need to document which ones are supported and what are the
306 * fallbacks for unsupported combinations.
307 */
308U_CAPI void U_EXPORT2
309ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) {
46f4442e 310 if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT)
73c04bcf
A
311 && (reorderingMode < UBIDI_REORDER_COUNT)) {
312 pBiDi->reorderingMode = reorderingMode;
46f4442e 313 pBiDi->isInverse = (UBool)(reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L);
73c04bcf
A
314 }
315}
316
317U_CAPI UBiDiReorderingMode U_EXPORT2
318ubidi_getReorderingMode(UBiDi *pBiDi) {
46f4442e 319 if (pBiDi!=NULL) {
73c04bcf
A
320 return pBiDi->reorderingMode;
321 } else {
322 return UBIDI_REORDER_DEFAULT;
323 }
324}
325
326U_CAPI void U_EXPORT2
327ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) {
328 if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
329 reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
330 }
46f4442e
A
331 if (pBiDi!=NULL) {
332 pBiDi->reorderingOptions=reorderingOptions;
73c04bcf
A
333 }
334}
335
336U_CAPI uint32_t U_EXPORT2
337ubidi_getReorderingOptions(UBiDi *pBiDi) {
46f4442e 338 if (pBiDi!=NULL) {
73c04bcf
A
339 return pBiDi->reorderingOptions;
340 } else {
341 return 0;
342 }
343}
344
729e4ab9
A
345U_CAPI UBiDiDirection U_EXPORT2
346ubidi_getBaseDirection(const UChar *text,
347int32_t length){
348
349 int32_t i;
350 UChar32 uchar;
351 UCharDirection dir;
4388f060 352
729e4ab9
A
353 if( text==NULL || length<-1 ){
354 return UBIDI_NEUTRAL;
355 }
356
357 if(length==-1) {
358 length=u_strlen(text);
359 }
360
361 for( i = 0 ; i < length; ) {
362 /* i is incremented by U16_NEXT */
363 U16_NEXT(text, i, length, uchar);
364 dir = u_charDirection(uchar);
365 if( dir == U_LEFT_TO_RIGHT )
366 return UBIDI_LTR;
367 if( dir == U_RIGHT_TO_LEFT || dir ==U_RIGHT_TO_LEFT_ARABIC )
368 return UBIDI_RTL;
369 }
370 return UBIDI_NEUTRAL;
371}
372
b75a7d8f
A
373/* perform (P2)..(P3) ------------------------------------------------------- */
374
57a6839d
A
375/**
376 * Returns the directionality of the first strong character
377 * after the last B in prologue, if any.
378 * Requires prologue!=null.
379 */
4388f060
A
380static DirProp
381firstL_R_AL(UBiDi *pBiDi) {
4388f060
A
382 const UChar *text=pBiDi->prologue;
383 int32_t length=pBiDi->proLength;
384 int32_t i;
385 UChar32 uchar;
386 DirProp dirProp, result=ON;
387 for(i=0; i<length; ) {
388 /* i is incremented by U16_NEXT */
389 U16_NEXT(text, i, length, uchar);
390 dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
391 if(result==ON) {
392 if(dirProp==L || dirProp==R || dirProp==AL) {
393 result=dirProp;
394 }
395 } else {
396 if(dirProp==B) {
397 result=ON;
398 }
399 }
400 }
401 return result;
402}
403
b75a7d8f 404/*
57a6839d 405 * Check that there are enough entries in the array pointed to by pBiDi->paras
b75a7d8f 406 */
57a6839d
A
407static UBool
408checkParaCount(UBiDi *pBiDi) {
409 int32_t count=pBiDi->paraCount;
410 if(pBiDi->paras==pBiDi->simpleParas) {
b331163b 411 if(count<=SIMPLE_PARAS_COUNT)
57a6839d 412 return TRUE;
b331163b 413 if(!getInitialParasMemory(pBiDi, SIMPLE_PARAS_COUNT * 2))
57a6839d
A
414 return FALSE;
415 pBiDi->paras=pBiDi->parasMemory;
b331163b 416 uprv_memcpy(pBiDi->parasMemory, pBiDi->simpleParas, SIMPLE_PARAS_COUNT * sizeof(Para));
57a6839d
A
417 return TRUE;
418 }
419 if(!getInitialParasMemory(pBiDi, count * 2))
420 return FALSE;
421 pBiDi->paras=pBiDi->parasMemory;
422 return TRUE;
423}
424
2ca993e8
A
425/*
426 * Get the directional properties for the inserted bidi controls.
427 */
428
429/* subset of bidi properties, fit in 4 bits */
430enum { /* correspondence to standard class */
431 Insert_none = 0, /* 0 all others */
432 Insert_L, /* 1 L = U_LEFT_TO_RIGHT */
433 Insert_R, /* 2 R = U_RIGHT_TO_LEFT */
434 Insert_AL, /* 3 AL = U_RIGHT_TO_LEFT_ARABIC */
435 Insert_LRE, /* 4 LRE = U_LEFT_TO_RIGHT_EMBEDDING */
436 Insert_LRO, /* 5 LRO = U_LEFT_TO_RIGHT_OVERRIDE */
437 Insert_RLE, /* 6 RLE = U_RIGHT_TO_LEFT_EMBEDDING */
438 Insert_RLO, /* 7 RLO = U_RIGHT_TO_LEFT_OVERRIDE */
439 Insert_PDF, /* 8 PDF = U_POP_DIRECTIONAL_FORMAT */
440 Insert_FSI, /* 9 FSI = U_FIRST_STRONG_ISOLATE */
441 Insert_LRI, /* 10 LRI = U_LEFT_TO_RIGHT_ISOLATE */
442 Insert_RLI, /* 11 RLI = U_RIGHT_TO_LEFT_ISOLATE */
443 Insert_PDI, /* 12 PDI = U_POP_DIRECTIONAL_ISOLATE */
444 Insert_B, /* 13 B = U_BLOCK_SEPARATOR */
445 Insert_S, /* 14 S = U_SEGMENT_SEPARATOR */
446 Insert_WS, /* 15 WS = U_WHITE_SPACE_NEUTRAL */
447 Insert_count /* 16 */
448};
449
450/* map standard dir class to special 4-bit insert value (Insert_none as default) */
451static const uint16_t insertDirFromStdDir[dirPropCount] = {
452 Insert_none, /* L= U_LEFT_TO_RIGHT */
453 Insert_none, /* R= U_RIGHT_TO_LEFT, */
454 Insert_none, /* EN= U_EUROPEAN_NUMBER */
455 Insert_none, /* ES= U_EUROPEAN_NUMBER_SEPARATOR */
456 Insert_none, /* ET= U_EUROPEAN_NUMBER_TERMINATOR */
457 Insert_none, /* AN= U_ARABIC_NUMBER */
458 Insert_none, /* CS= U_COMMON_NUMBER_SEPARATOR */
459 Insert_none, /* B= U_BLOCK_SEPARATOR */
460 Insert_none, /* S= U_SEGMENT_SEPARATOR */
461 Insert_none, /* WS= U_WHITE_SPACE_NEUTRAL */
462 Insert_none, /* ON= U_OTHER_NEUTRAL */
463 Insert_LRE, /* LRE=U_LEFT_TO_RIGHT_EMBEDDING */
464 Insert_LRO, /* LRO=U_LEFT_TO_RIGHT_OVERRIDE */
465 Insert_none, /* AL= U_RIGHT_TO_LEFT_ARABIC */
466 Insert_RLE, /* RLE=U_RIGHT_TO_LEFT_EMBEDDING */
467 Insert_RLO, /* RLO=U_RIGHT_TO_LEFT_OVERRIDE */
468 Insert_PDF, /* PDF=U_POP_DIRECTIONAL_FORMAT */
469 Insert_none, /* NSM=U_DIR_NON_SPACING_MARK */
470 Insert_none, /* BN= U_BOUNDARY_NEUTRAL */
471 Insert_FSI, /* FSI=U_FIRST_STRONG_ISOLATE */
472 Insert_LRI, /* LRI=U_LEFT_TO_RIGHT_ISOLATE */
473 Insert_RLI, /* RLI=U_RIGHT_TO_LEFT_ISOLATE */
474 Insert_PDI, /* PDI=U_POP_DIRECTIONAL_ISOLATE */
475 Insert_none, /* ENL */
476 Insert_none, /* ENR */
477};
478
479/* map special 4-bit insert direction class to standard dir class (ON as default) */
480static const uint8_t stdDirFromInsertDir[Insert_count] = {
481 ON, /* Insert_none > ON */
482 L, /* Insert_L */
483 R, /* Insert_R */
484 AL, /* Insert_AL */
485 LRE, /* Insert_LRE */
486 LRO, /* Insert_LRO */
487 RLE, /* Insert_RLE */
488 RLO, /* Insert_RLO */
489 PDF, /* Insert_PDF */
490 FSI, /* Insert_FSI */
491 LRI, /* Insert_LRI */
492 RLI, /* Insert_RLI */
493 PDI, /* Insert_PDI */
494 B, /* Insert_B */
495 S, /* Insert_S */
496 WS, /* Insert_WS */
497};
498
499enum { kMaxControlStringLen = 4 };
500
501static UBool
502getDirInsert(UBiDi *pBiDi,
503 const int32_t *offsets, int32_t offsetCount,
504 const int32_t *controlStringIndices,
505 const UChar * const * controlStrings) {
506 int32_t offset, offsetsIndex;
507 uint16_t *dirInsert = pBiDi->dirInsert;
508 /* initialize dirInsert */
509 for (offset = 0; offset < pBiDi->length; offset++) {
510 dirInsert[offset] = 0;
511 }
512 for (offsetsIndex = 0; offsetsIndex < offsetCount; offsetsIndex++) {
513 const UChar * controlString;
514 UChar uchar;
515 int32_t controlStringIndex, dirInsertIndex = 0;
516 uint16_t dirInsertValue = 0;
517 offset = offsets[offsetsIndex];
518 if (offset < 0 || offset >= pBiDi->length) {
519 return FALSE; /* param err in offsets array */
520 }
521 controlStringIndex = (controlStringIndices == NULL)? offsetsIndex: controlStringIndices[offsetsIndex];
522 controlString = controlStrings[controlStringIndex];
523 if (controlString == NULL) {
524 return FALSE; /* param err in controlStrings array */
525 }
526 while ((uchar = *controlString++) != 0) {
527 uint16_t insertValue = (U16_IS_SURROGATE(uchar))? Insert_none:
528 insertDirFromStdDir[(uint32_t)ubidi_getCustomizedClass(pBiDi, uchar)];
529 if (dirInsertIndex >= kMaxControlStringLen || insertValue == Insert_none) {
530 return FALSE; /* param err in controlStrings array */
531 }
532 dirInsertValue |= (insertValue << (4 * dirInsertIndex++));
533 }
534 dirInsert[offset] = dirInsertValue;
535 }
536 return TRUE;
537}
538
57a6839d
A
539/*
540 * Get the directional properties for the text, calculate the flags bit-set, and
541 * determine the paragraph level if necessary (in pBiDi->paras[i].level).
542 * FSI initiators are also resolved and their dirProp replaced with LRI or RLI.
543 * When encountering an FSI, it is initially replaced with an LRI, which is the
544 * default. Only if a strong R or AL is found within its scope will the LRI be
545 * replaced by an RLI.
546 */
547static UBool
73c04bcf
A
548getDirProps(UBiDi *pBiDi) {
549 const UChar *text=pBiDi->text;
b75a7d8f 550 DirProp *dirProps=pBiDi->dirPropsMemory; /* pBiDi->dirProps is const */
2ca993e8 551 uint16_t *dirInsert = pBiDi->dirInsert; /* may be NULL */
b75a7d8f 552
57a6839d 553 int32_t i=0, originalLength=pBiDi->originalLength;
b75a7d8f
A
554 Flags flags=0; /* collect all directionalities in the text */
555 UChar32 uchar;
57a6839d 556 DirProp dirProp=0, defaultParaLevel=0; /* initialize to avoid compiler warnings */
2ca993e8
A
557 int32_t dirInsertValue;
558 int8_t dirInsertIndex; /* position within dirInsertValue, if any */
73c04bcf
A
559 UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel);
560 /* for inverse BiDi, the default para level is set to RTL if there is a
57a6839d 561 strong R or AL character at either end of the text */
46f4442e 562 UBool isDefaultLevelInverse=isDefaultLevel && (UBool)
73c04bcf
A
563 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
564 pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL);
565 int32_t lastArabicPos=-1;
566 int32_t controlCount=0;
46f4442e
A
567 UBool removeBiDiControls = (UBool)(pBiDi->reorderingOptions &
568 UBIDI_OPTION_REMOVE_CONTROLS);
73c04bcf 569
f3c0d7a5 570 enum State {
57a6839d
A
571 NOT_SEEKING_STRONG, /* 0: not contextual paraLevel, not after FSI */
572 SEEKING_STRONG_FOR_PARA, /* 1: looking for first strong char in para */
573 SEEKING_STRONG_FOR_FSI, /* 2: looking for first strong after FSI */
574 LOOKING_FOR_PDI /* 3: found strong after FSI, looking for PDI */
f3c0d7a5 575 };
73c04bcf 576 State state;
57a6839d
A
577 DirProp lastStrong=ON; /* for default level & inverse BiDi */
578 /* The following stacks are used to manage isolate sequences. Those
579 sequences may be nested, but obviously never more deeply than the
580 maximum explicit embedding level.
581 lastStack is the index of the last used entry in the stack. A value of -1
582 means that there is no open isolate sequence.
583 lastStack is reset to -1 on paragraph boundaries. */
584 /* The following stack contains the position of the initiator of
585 each open isolate sequence */
586 int32_t isolateStartStack[UBIDI_MAX_EXPLICIT_LEVEL+1];
2ca993e8 587 int8_t isolateStartInsertIndex[UBIDI_MAX_EXPLICIT_LEVEL+1];
57a6839d
A
588 /* The following stack contains the last known state before
589 encountering the initiator of an isolate sequence */
f3c0d7a5 590 State previousStateStack[UBIDI_MAX_EXPLICIT_LEVEL+1];
57a6839d
A
591 int32_t stackLast=-1;
592
593 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING)
73c04bcf 594 pBiDi->length=0;
57a6839d 595 defaultParaLevel=pBiDi->paraLevel&1;
73c04bcf 596 if(isDefaultLevel) {
57a6839d
A
597 pBiDi->paras[0].level=defaultParaLevel;
598 lastStrong=defaultParaLevel;
599 if(pBiDi->proLength>0 && /* there is a prologue */
600 (dirProp=firstL_R_AL(pBiDi))!=ON) { /* with a strong character */
601 if(dirProp==L)
602 pBiDi->paras[0].level=0; /* set the default para level */
603 else
604 pBiDi->paras[0].level=1; /* set the default para level */
605 state=NOT_SEEKING_STRONG;
4388f060 606 } else {
57a6839d 607 state=SEEKING_STRONG_FOR_PARA;
4388f060 608 }
374ca955 609 } else {
57a6839d
A
610 pBiDi->paras[0].level=pBiDi->paraLevel;
611 state=NOT_SEEKING_STRONG;
b75a7d8f 612 }
73c04bcf
A
613 /* count paragraphs and determine the paragraph level (P2..P3) */
614 /*
615 * see comment in ubidi.h:
57a6839d 616 * the UBIDI_DEFAULT_XXX values are designed so that
73c04bcf
A
617 * their bit 0 alone yields the intended default
618 */
2ca993e8
A
619 dirInsertValue = 0;
620 dirInsertIndex = -1; /* indicate that we have not checked dirInsert yet */
57a6839d 621 for( /* i=0 above */ ; i<originalLength; ) {
2ca993e8
A
622 if (dirInsert != NULL && dirInsertIndex < 0) {
623 dirInsertValue = dirInsert[i];
624 }
625 if (dirInsertValue > 0) {
626 dirInsertIndex++;
627 dirProp = (DirProp)stdDirFromInsertDir[dirInsertValue & 0x000F];
628 dirInsertValue >>= 4;
629 flags|=DIRPROP_FLAG(dirProp);
630 uchar = 0;
631 } else {
632 dirInsertIndex = -1;
633 /* i is incremented by U16_NEXT */
634 U16_NEXT(text, i, originalLength, uchar);
635 flags|=DIRPROP_FLAG(dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar));
636 dirProps[i-1]=dirProp;
637 if(uchar>0xffff) { /* set the lead surrogate's property to BN */
638 flags|=DIRPROP_FLAG(BN);
639 dirProps[i-2]=BN;
640 }
57a6839d
A
641 }
642 if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar))
643 controlCount++;
644 if(dirProp==L) {
645 if(state==SEEKING_STRONG_FOR_PARA) {
646 pBiDi->paras[pBiDi->paraCount-1].level=0;
647 state=NOT_SEEKING_STRONG;
73c04bcf 648 }
57a6839d
A
649 else if(state==SEEKING_STRONG_FOR_FSI) {
650 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
651 /* no need for next statement, already set by default */
652 /* dirProps[isolateStartStack[stackLast]]=LRI; */
653 flags|=DIRPROP_FLAG(LRI);
73c04bcf 654 }
57a6839d 655 state=LOOKING_FOR_PDI;
73c04bcf 656 }
57a6839d
A
657 lastStrong=L;
658 continue;
73c04bcf 659 }
57a6839d
A
660 if(dirProp==R || dirProp==AL) {
661 if(state==SEEKING_STRONG_FOR_PARA) {
662 pBiDi->paras[pBiDi->paraCount-1].level=1;
663 state=NOT_SEEKING_STRONG;
664 }
665 else if(state==SEEKING_STRONG_FOR_FSI) {
666 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
2ca993e8
A
667 if (isolateStartInsertIndex[stackLast] < 0) {
668 dirProps[isolateStartStack[stackLast]]=RLI;
669 } else {
670 dirInsert[stackLast] &= ~(0x000F << (4*isolateStartInsertIndex[stackLast]));
671 dirInsert[stackLast] |= (Insert_RLI << (4*isolateStartInsertIndex[stackLast]));
672 }
57a6839d
A
673 flags|=DIRPROP_FLAG(RLI);
674 }
675 state=LOOKING_FOR_PDI;
676 }
677 lastStrong=R;
678 if(dirProp==AL)
679 lastArabicPos=i-1;
680 continue;
73c04bcf 681 }
57a6839d
A
682 if(dirProp>=FSI && dirProp<=RLI) { /* FSI, LRI or RLI */
683 stackLast++;
684 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
2ca993e8
A
685 isolateStartStack[stackLast]= (dirInsertIndex < 0)? i-1: i /* we have not incremented with U16_NEXT yet */;
686 isolateStartInsertIndex[stackLast] = dirInsertIndex;
57a6839d
A
687 previousStateStack[stackLast]=state;
688 }
689 if(dirProp==FSI) {
2ca993e8
A
690 if (dirInsertIndex < 0) {
691 dirProps[i-1]=LRI; /* default if no strong char */
692 } else {
693 dirInsert[i] &= ~(0x000F << (4*dirInsertIndex));
694 dirInsert[i] |= (Insert_LRI << (4*dirInsertIndex));
695 }
57a6839d
A
696 state=SEEKING_STRONG_FOR_FSI;
697 }
698 else
699 state=LOOKING_FOR_PDI;
700 continue;
73c04bcf 701 }
57a6839d
A
702 if(dirProp==PDI) {
703 if(state==SEEKING_STRONG_FOR_FSI) {
704 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) {
705 /* no need for next statement, already set by default */
706 /* dirProps[isolateStartStack[stackLast]]=LRI; */
707 flags|=DIRPROP_FLAG(LRI);
708 }
709 }
710 if(stackLast>=0) {
711 if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL)
712 state=previousStateStack[stackLast];
713 stackLast--;
714 }
715 continue;
73c04bcf 716 }
57a6839d
A
717 if(dirProp==B) {
718 if(i<originalLength && uchar==CR && text[i]==LF) /* do nothing on the CR */
719 continue;
720 pBiDi->paras[pBiDi->paraCount-1].limit=i;
721 if(isDefaultLevelInverse && lastStrong==R)
722 pBiDi->paras[pBiDi->paraCount-1].level=1;
73c04bcf 723 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
57a6839d
A
724 /* When streaming, we only process whole paragraphs
725 thus some updates are only done on paragraph boundaries */
73c04bcf 726 pBiDi->length=i; /* i is index to next character */
57a6839d 727 pBiDi->controlCount=controlCount;
73c04bcf 728 }
57a6839d
A
729 if(i<originalLength) { /* B not last char in text */
730 pBiDi->paraCount++;
731 if(checkParaCount(pBiDi)==FALSE) /* not enough memory for a new para entry */
732 return FALSE;
73c04bcf 733 if(isDefaultLevel) {
57a6839d
A
734 pBiDi->paras[pBiDi->paraCount-1].level=defaultParaLevel;
735 state=SEEKING_STRONG_FOR_PARA;
736 lastStrong=defaultParaLevel;
737 } else {
738 pBiDi->paras[pBiDi->paraCount-1].level=pBiDi->paraLevel;
739 state=NOT_SEEKING_STRONG;
73c04bcf 740 }
57a6839d 741 stackLast=-1;
73c04bcf 742 }
57a6839d 743 continue;
73c04bcf 744 }
b75a7d8f 745 }
57a6839d
A
746 /* Ignore still open isolate sequences with overflow */
747 if(stackLast>UBIDI_MAX_EXPLICIT_LEVEL) {
748 stackLast=UBIDI_MAX_EXPLICIT_LEVEL;
749 state=SEEKING_STRONG_FOR_FSI; /* to be on the safe side */
750 }
751 /* Resolve direction of still unresolved open FSI sequences */
752 while(stackLast>=0) {
753 if(state==SEEKING_STRONG_FOR_FSI) {
754 /* no need for next statement, already set by default */
755 /* dirProps[isolateStartStack[stackLast]]=LRI; */
756 flags|=DIRPROP_FLAG(LRI);
757 break;
73c04bcf 758 }
57a6839d
A
759 state=previousStateStack[stackLast];
760 stackLast--;
73c04bcf 761 }
57a6839d 762 /* When streaming, ignore text after the last paragraph separator */
73c04bcf 763 if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) {
57a6839d 764 if(pBiDi->length<originalLength)
73c04bcf 765 pBiDi->paraCount--;
57a6839d
A
766 } else {
767 pBiDi->paras[pBiDi->paraCount-1].limit=originalLength;
768 pBiDi->controlCount=controlCount;
769 }
770 /* For inverse bidi, default para direction is RTL if there is
771 a strong R or AL at either end of the paragraph */
772 if(isDefaultLevelInverse && lastStrong==R) {
773 pBiDi->paras[pBiDi->paraCount-1].level=1;
73c04bcf 774 }
57a6839d
A
775 if(isDefaultLevel) {
776 pBiDi->paraLevel=pBiDi->paras[0].level;
777 }
778 /* The following is needed to resolve the text direction for default level
779 paragraphs containing no strong character */
780 for(i=0; i<pBiDi->paraCount; i++)
781 flags|=DIRPROP_FLAG_LR(pBiDi->paras[i].level);
73c04bcf
A
782
783 if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) {
784 flags|=DIRPROP_FLAG(L);
b75a7d8f 785 }
b75a7d8f 786 pBiDi->flags=flags;
73c04bcf 787 pBiDi->lastArabicPos=lastArabicPos;
57a6839d
A
788 return TRUE;
789}
790
791/* determine the paragraph level at position index */
792U_CFUNC UBiDiLevel
793ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t pindex) {
794 int32_t i;
795 for(i=0; i<pBiDi->paraCount; i++)
796 if(pindex<pBiDi->paras[i].limit)
797 break;
798 if(i>=pBiDi->paraCount)
799 i=pBiDi->paraCount-1;
800 return (UBiDiLevel)(pBiDi->paras[i].level);
801}
802
803/* Functions for handling paired brackets ----------------------------------- */
804
805/* In the isoRuns array, the first entry is used for text outside of any
806 isolate sequence. Higher entries are used for each more deeply nested
807 isolate sequence. isoRunLast is the index of the last used entry. The
808 openings array is used to note the data of opening brackets not yet
809 matched by a closing bracket, or matched but still susceptible to change
810 level.
811 Each isoRun entry contains the index of the first and
812 one-after-last openings entries for pending opening brackets it
813 contains. The next openings entry to use is the one-after-last of the
814 most deeply nested isoRun entry.
815 isoRun entries also contain their current embedding level and the last
816 encountered strong character, since these will be needed to resolve
817 the level of paired brackets. */
818
819static void
820bracketInit(UBiDi *pBiDi, BracketData *bd) {
821 bd->pBiDi=pBiDi;
822 bd->isoRunLast=0;
823 bd->isoRuns[0].start=0;
824 bd->isoRuns[0].limit=0;
825 bd->isoRuns[0].level=GET_PARALEVEL(pBiDi, 0);
f3c0d7a5
A
826 UBiDiLevel t = GET_PARALEVEL(pBiDi, 0) & 1;
827 bd->isoRuns[0].lastStrong = bd->isoRuns[0].lastBase = t;
828 bd->isoRuns[0].contextDir = (UBiDiDirection)t;
57a6839d
A
829 bd->isoRuns[0].contextPos=0;
830 if(pBiDi->openingsMemory) {
831 bd->openings=pBiDi->openingsMemory;
b331163b 832 bd->openingsCount=pBiDi->openingsSize / sizeof(Opening);
57a6839d
A
833 } else {
834 bd->openings=bd->simpleOpenings;
b331163b 835 bd->openingsCount=SIMPLE_OPENINGS_COUNT;
57a6839d
A
836 }
837 bd->isNumbersSpecial=bd->pBiDi->reorderingMode==UBIDI_REORDER_NUMBERS_SPECIAL ||
838 bd->pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL;
839}
840
841/* paragraph boundary */
842static void
843bracketProcessB(BracketData *bd, UBiDiLevel level) {
844 bd->isoRunLast=0;
845 bd->isoRuns[0].limit=0;
846 bd->isoRuns[0].level=level;
f3c0d7a5
A
847 bd->isoRuns[0].lastStrong=bd->isoRuns[0].lastBase=level&1;
848 bd->isoRuns[0].contextDir=(UBiDiDirection)(level&1);
57a6839d
A
849 bd->isoRuns[0].contextPos=0;
850}
851
852/* LRE, LRO, RLE, RLO, PDF */
853static void
2ca993e8 854bracketProcessBoundary(BracketData *bd, int32_t lastCcPos, DirProp lastCcDirProp,
57a6839d
A
855 UBiDiLevel contextLevel, UBiDiLevel embeddingLevel) {
856 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
2ca993e8 857 if(DIRPROP_FLAG(lastCcDirProp)&MASK_ISO) /* after an isolate */
57a6839d
A
858 return;
859 if(NO_OVERRIDE(embeddingLevel)>NO_OVERRIDE(contextLevel)) /* not a PDF */
860 contextLevel=embeddingLevel;
861 pLastIsoRun->limit=pLastIsoRun->start;
862 pLastIsoRun->level=embeddingLevel;
f3c0d7a5
A
863 pLastIsoRun->lastStrong=pLastIsoRun->lastBase=contextLevel&1;
864 pLastIsoRun->contextDir=(UBiDiDirection)(contextLevel&1);
865 pLastIsoRun->contextPos=(UBiDiDirection)lastCcPos;
57a6839d
A
866}
867
868/* LRI or RLI */
869static void
870bracketProcessLRI_RLI(BracketData *bd, UBiDiLevel level) {
871 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
872 int16_t lastLimit;
873 pLastIsoRun->lastBase=ON;
874 lastLimit=pLastIsoRun->limit;
875 bd->isoRunLast++;
876 pLastIsoRun++;
877 pLastIsoRun->start=pLastIsoRun->limit=lastLimit;
878 pLastIsoRun->level=level;
f3c0d7a5
A
879 pLastIsoRun->lastStrong=pLastIsoRun->lastBase=level&1;
880 pLastIsoRun->contextDir=(UBiDiDirection)(level&1);
57a6839d
A
881 pLastIsoRun->contextPos=0;
882}
883
884/* PDI */
885static void
886bracketProcessPDI(BracketData *bd) {
887 IsoRun *pLastIsoRun;
888 bd->isoRunLast--;
889 pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
890 pLastIsoRun->lastBase=ON;
891}
892
893/* newly found opening bracket: create an openings entry */
894static UBool /* return TRUE if success */
895bracketAddOpening(BracketData *bd, UChar match, int32_t position) {
896 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
897 Opening *pOpening;
b331163b 898 if(pLastIsoRun->limit>=bd->openingsCount) { /* no available new entry */
57a6839d
A
899 UBiDi *pBiDi=bd->pBiDi;
900 if(!getInitialOpeningsMemory(pBiDi, pLastIsoRun->limit * 2))
901 return FALSE;
902 if(bd->openings==bd->simpleOpenings)
903 uprv_memcpy(pBiDi->openingsMemory, bd->simpleOpenings,
b331163b 904 SIMPLE_OPENINGS_COUNT * sizeof(Opening));
57a6839d 905 bd->openings=pBiDi->openingsMemory; /* may have changed */
b331163b 906 bd->openingsCount=pBiDi->openingsSize / sizeof(Opening);
57a6839d
A
907 }
908 pOpening=&bd->openings[pLastIsoRun->limit];
909 pOpening->position=position;
910 pOpening->match=match;
911 pOpening->contextDir=pLastIsoRun->contextDir;
912 pOpening->contextPos=pLastIsoRun->contextPos;
913 pOpening->flags=0;
914 pLastIsoRun->limit++;
915 return TRUE;
916}
917
918/* change N0c1 to N0c2 when a preceding bracket is assigned the embedding level */
919static void
920fixN0c(BracketData *bd, int32_t openingIndex, int32_t newPropPosition, DirProp newProp) {
921 /* This function calls itself recursively */
922 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
923 Opening *qOpening;
924 DirProp *dirProps=bd->pBiDi->dirProps;
925 int32_t k, openingPosition, closingPosition;
926 for(k=openingIndex+1, qOpening=&bd->openings[k]; k<pLastIsoRun->limit; k++, qOpening++) {
927 if(qOpening->match>=0) /* not an N0c match */
928 continue;
929 if(newPropPosition<qOpening->contextPos)
930 break;
931 if(newPropPosition>=qOpening->position)
932 continue;
933 if(newProp==qOpening->contextDir)
934 break;
935 openingPosition=qOpening->position;
936 dirProps[openingPosition]=newProp;
937 closingPosition=-(qOpening->match);
938 dirProps[closingPosition]=newProp;
939 qOpening->match=0; /* prevent further changes */
940 fixN0c(bd, k, openingPosition, newProp);
941 fixN0c(bd, k, closingPosition, newProp);
942 }
943}
944
945/* process closing bracket */
946static DirProp /* return L or R if N0b or N0c, ON if N0d */
947bracketProcessClosing(BracketData *bd, int32_t openIdx, int32_t position) {
948 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
949 Opening *pOpening, *qOpening;
950 UBiDiDirection direction;
951 UBool stable;
952 DirProp newProp;
953 pOpening=&bd->openings[openIdx];
f3c0d7a5 954 direction=(UBiDiDirection)(pLastIsoRun->level&1);
57a6839d
A
955 stable=TRUE; /* assume stable until proved otherwise */
956
957 /* The stable flag is set when brackets are paired and their
958 level is resolved and cannot be changed by what will be
959 found later in the source string.
960 An unstable match can occur only when applying N0c, where
961 the resolved level depends on the preceding context, and
962 this context may be affected by text occurring later.
963 Example: RTL paragraph containing: abc[(latin) HEBREW]
964 When the closing parenthesis is encountered, it appears
965 that N0c1 must be applied since 'abc' sets an opposite
966 direction context and both parentheses receive level 2.
967 However, when the closing square bracket is processed,
968 N0b applies because of 'HEBREW' being included within the
969 brackets, thus the square brackets are treated like R and
970 receive level 1. However, this changes the preceding
971 context of the opening parenthesis, and it now appears
972 that N0c2 must be applied to the parentheses rather than
973 N0c1. */
974
975 if((direction==0 && pOpening->flags&FOUND_L) ||
976 (direction==1 && pOpening->flags&FOUND_R)) { /* N0b */
977 newProp=direction;
978 }
979 else if(pOpening->flags&(FOUND_L|FOUND_R)) { /* N0c */
980 /* it is stable if there is no containing pair or in
981 conditions too complicated and not worth checking */
982 stable=(openIdx==pLastIsoRun->start);
983 if(direction!=pOpening->contextDir)
984 newProp=pOpening->contextDir; /* N0c1 */
985 else
986 newProp=direction; /* N0c2 */
987 } else {
988 /* forget this and any brackets nested within this pair */
989 pLastIsoRun->limit=openIdx;
990 return ON; /* N0d */
991 }
992 bd->pBiDi->dirProps[pOpening->position]=newProp;
993 bd->pBiDi->dirProps[position]=newProp;
994 /* Update nested N0c pairs that may be affected */
995 fixN0c(bd, openIdx, pOpening->position, newProp);
996 if(stable) {
997 pLastIsoRun->limit=openIdx; /* forget any brackets nested within this pair */
998 /* remove lower located synonyms if any */
999 while(pLastIsoRun->limit>pLastIsoRun->start &&
1000 bd->openings[pLastIsoRun->limit-1].position==pOpening->position)
1001 pLastIsoRun->limit--;
1002 } else {
1003 int32_t k;
1004 pOpening->match=-position;
1005 /* neutralize lower located synonyms if any */
1006 k=openIdx-1;
1007 while(k>=pLastIsoRun->start &&
1008 bd->openings[k].position==pOpening->position)
1009 bd->openings[k--].match=0;
1010 /* neutralize any unmatched opening between the current pair;
1011 this will also neutralize higher located synonyms if any */
1012 for(k=openIdx+1; k<pLastIsoRun->limit; k++) {
1013 qOpening=&bd->openings[k];
1014 if(qOpening->position>=position)
1015 break;
1016 if(qOpening->match>0)
1017 qOpening->match=0;
1018 }
1019 }
1020 return newProp;
1021}
1022
1023/* handle strong characters, digits and candidates for closing brackets */
1024static UBool /* return TRUE if success */
1025bracketProcessChar(BracketData *bd, int32_t position) {
1026 IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast];
1027 DirProp *dirProps, dirProp, newProp;
1028 UBiDiLevel level;
1029 dirProps=bd->pBiDi->dirProps;
1030 dirProp=dirProps[position];
1031 if(dirProp==ON) {
1032 UChar c, match;
1033 int32_t idx;
1034 /* First see if it is a matching closing bracket. Hopefully, this is
1035 more efficient than checking if it is a closing bracket at all */
1036 c=bd->pBiDi->text[position];
1037 for(idx=pLastIsoRun->limit-1; idx>=pLastIsoRun->start; idx--) {
1038 if(bd->openings[idx].match!=c)
1039 continue;
1040 /* We have a match */
1041 newProp=bracketProcessClosing(bd, idx, position);
1042 if(newProp==ON) { /* N0d */
1043 c=0; /* prevent handling as an opening */
1044 break;
1045 }
1046 pLastIsoRun->lastBase=ON;
f3c0d7a5 1047 pLastIsoRun->contextDir=(UBiDiDirection)newProp;
57a6839d
A
1048 pLastIsoRun->contextPos=position;
1049 level=bd->pBiDi->levels[position];
1050 if(level&UBIDI_LEVEL_OVERRIDE) { /* X4, X5 */
1051 uint16_t flag;
1052 int32_t i;
1053 newProp=level&1;
1054 pLastIsoRun->lastStrong=newProp;
1055 flag=DIRPROP_FLAG(newProp);
1056 for(i=pLastIsoRun->start; i<idx; i++)
1057 bd->openings[i].flags|=flag;
1058 /* matching brackets are not overridden by LRO/RLO */
1059 bd->pBiDi->levels[position]&=~UBIDI_LEVEL_OVERRIDE;
1060 }
1061 /* matching brackets are not overridden by LRO/RLO */
1062 bd->pBiDi->levels[bd->openings[idx].position]&=~UBIDI_LEVEL_OVERRIDE;
1063 return TRUE;
1064 }
1065 /* We get here only if the ON character is not a matching closing
1066 bracket or it is a case of N0d */
1067 /* Now see if it is an opening bracket */
1068 if(c)
1069 match=u_getBidiPairedBracket(c); /* get the matching char */
1070 else
1071 match=0;
1072 if(match!=c && /* has a matching char */
1073 ubidi_getPairedBracketType(bd->pBiDi->bdp, c)==U_BPT_OPEN) { /* opening bracket */
1074 /* special case: process synonyms
1075 create an opening entry for each synonym */
1076 if(match==0x232A) { /* RIGHT-POINTING ANGLE BRACKET */
1077 if(!bracketAddOpening(bd, 0x3009, position))
1078 return FALSE;
1079 }
1080 else if(match==0x3009) { /* RIGHT ANGLE BRACKET */
1081 if(!bracketAddOpening(bd, 0x232A, position))
1082 return FALSE;
1083 }
1084 if(!bracketAddOpening(bd, match, position))
1085 return FALSE;
1086 }
1087 }
1088 level=bd->pBiDi->levels[position];
1089 if(level&UBIDI_LEVEL_OVERRIDE) { /* X4, X5 */
1090 newProp=level&1;
1091 if(dirProp!=S && dirProp!=WS && dirProp!=ON)
1092 dirProps[position]=newProp;
1093 pLastIsoRun->lastBase=newProp;
1094 pLastIsoRun->lastStrong=newProp;
f3c0d7a5 1095 pLastIsoRun->contextDir=(UBiDiDirection)newProp;
57a6839d
A
1096 pLastIsoRun->contextPos=position;
1097 }
1098 else if(dirProp<=R || dirProp==AL) {
1099 newProp=DIR_FROM_STRONG(dirProp);
1100 pLastIsoRun->lastBase=dirProp;
1101 pLastIsoRun->lastStrong=dirProp;
f3c0d7a5 1102 pLastIsoRun->contextDir=(UBiDiDirection)newProp;
57a6839d
A
1103 pLastIsoRun->contextPos=position;
1104 }
1105 else if(dirProp==EN) {
1106 pLastIsoRun->lastBase=EN;
1107 if(pLastIsoRun->lastStrong==L) {
1108 newProp=L; /* W7 */
1109 if(!bd->isNumbersSpecial)
1110 dirProps[position]=ENL;
f3c0d7a5 1111 pLastIsoRun->contextDir=(UBiDiDirection)L;
57a6839d
A
1112 pLastIsoRun->contextPos=position;
1113 }
1114 else {
1115 newProp=R; /* N0 */
1116 if(pLastIsoRun->lastStrong==AL)
1117 dirProps[position]=AN; /* W2 */
1118 else
1119 dirProps[position]=ENR;
f3c0d7a5 1120 pLastIsoRun->contextDir=(UBiDiDirection)R;
57a6839d
A
1121 pLastIsoRun->contextPos=position;
1122 }
1123 }
1124 else if(dirProp==AN) {
1125 newProp=R; /* N0 */
1126 pLastIsoRun->lastBase=AN;
f3c0d7a5 1127 pLastIsoRun->contextDir=(UBiDiDirection)R;
57a6839d
A
1128 pLastIsoRun->contextPos=position;
1129 }
1130 else if(dirProp==NSM) {
1131 /* if the last real char was ON, change NSM to ON so that it
1132 will stay ON even if the last real char is a bracket which
1133 may be changed to L or R */
1134 newProp=pLastIsoRun->lastBase;
1135 if(newProp==ON)
1136 dirProps[position]=newProp;
1137 }
1138 else {
1139 newProp=dirProp;
1140 pLastIsoRun->lastBase=dirProp;
1141 }
1142 if(newProp<=R || newProp==AL) {
1143 int32_t i;
1144 uint16_t flag=DIRPROP_FLAG(DIR_FROM_STRONG(newProp));
1145 for(i=pLastIsoRun->start; i<pLastIsoRun->limit; i++)
1146 if(position>bd->openings[i].position)
1147 bd->openings[i].flags|=flag;
1148 }
1149 return TRUE;
b75a7d8f
A
1150}
1151
1152/* perform (X1)..(X9) ------------------------------------------------------- */
1153
374ca955
A
1154/* determine if the text is mixed-directional or single-directional */
1155static UBiDiDirection
73c04bcf
A
1156directionFromFlags(UBiDi *pBiDi) {
1157 Flags flags=pBiDi->flags;
374ca955
A
1158 /* if the text contains AN and neutrals, then some neutrals may become RTL */
1159 if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) {
1160 return UBIDI_LTR;
1161 } else if(!(flags&MASK_LTR)) {
1162 return UBIDI_RTL;
1163 } else {
1164 return UBIDI_MIXED;
1165 }
1166}
1167
b75a7d8f
A
1168/*
1169 * Resolve the explicit levels as specified by explicit embedding codes.
1170 * Recalculate the flags to have them reflect the real properties
1171 * after taking the explicit embeddings into account.
1172 *
1173 * The BiDi algorithm is designed to result in the same behavior whether embedding
1174 * levels are externally specified (from "styled text", supposedly the preferred
57a6839d
A
1175 * method) or set by explicit embedding codes (LRx, RLx, PDF, FSI, PDI) in the plain text.
1176 * That is why (X9) instructs to remove all not-isolate explicit codes (and BN).
1177 * However, in a real implementation, the removal of these codes and their index
b75a7d8f
A
1178 * positions in the plain text is undesirable since it would result in
1179 * reallocated, reindexed text.
1180 * Instead, this implementation leaves the codes in there and just ignores them
1181 * in the subsequent processing.
57a6839d 1182 * In order to get the same reordering behavior, positions with a BN or a not-isolate
b75a7d8f
A
1183 * explicit embedding code just get the same level assigned as the last "real"
1184 * character.
1185 *
1186 * Some implementations, not this one, then overwrite some of these
1187 * directionality properties at "real" same-level-run boundaries by
1188 * L or R codes so that the resolution of weak types can be performed on the
1189 * entire paragraph at once instead of having to parse it once more and
1190 * perform that resolution on same-level-runs.
1191 * This limits the scope of the implicit rules in effectively
1192 * the same way as the run limits.
1193 *
57a6839d
A
1194 * Instead, this implementation does not modify these codes, except for
1195 * paired brackets whose properties (ON) may be replaced by L or R.
b75a7d8f
A
1196 * On one hand, the paragraph has to be scanned for same-level-runs, but
1197 * on the other hand, this saves another loop to reset these codes,
1198 * or saves making and modifying a copy of dirProps[].
1199 *
1200 *
1201 * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
1202 *
1203 *
1204 * Handling the stack of explicit levels (Xn):
1205 *
57a6839d
A
1206 * With the BiDi stack of explicit levels, as pushed with each
1207 * LRE, RLE, LRO, RLO, LRI, RLI and FSI and popped with each PDF and PDI,
1208 * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL.
b75a7d8f
A
1209 *
1210 * In order to have a correct push-pop semantics even in the case of overflows,
57a6839d
A
1211 * overflow counters and a valid isolate counter are used as described in UAX#9
1212 * section 3.3.2 "Explicit Levels and Directions".
b75a7d8f
A
1213 *
1214 * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
57a6839d
A
1215 *
1216 * Returns normally the direction; -1 if there was a memory shortage
1217 *
b75a7d8f 1218 */
b75a7d8f 1219static UBiDiDirection
57a6839d
A
1220resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
1221 DirProp *dirProps=pBiDi->dirProps;
2ca993e8 1222 uint16_t *dirInsert = pBiDi->dirInsert; /* may be NULL */
b75a7d8f 1223 UBiDiLevel *levels=pBiDi->levels;
73c04bcf
A
1224 const UChar *text=pBiDi->text;
1225
b75a7d8f
A
1226 int32_t i=0, length=pBiDi->length;
1227 Flags flags=pBiDi->flags; /* collect all directionalities in the text */
1228 DirProp dirProp;
2ca993e8
A
1229 int32_t dirInsertValue;
1230 int8_t dirInsertIndex; /* position within dirInsertValue, if any */
73c04bcf 1231 UBiDiLevel level=GET_PARALEVEL(pBiDi, 0);
b75a7d8f 1232 UBiDiDirection direction;
57a6839d
A
1233 pBiDi->isolateCount=0;
1234
1235 if(U_FAILURE(*pErrorCode)) { return UBIDI_LTR; }
b75a7d8f
A
1236
1237 /* determine if the text is mixed-directional or single-directional */
73c04bcf 1238 direction=directionFromFlags(pBiDi);
b75a7d8f 1239
57a6839d
A
1240 /* we may not need to resolve any explicit levels */
1241 if((direction!=UBIDI_MIXED)) {
b75a7d8f 1242 /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
57a6839d
A
1243 return direction;
1244 }
1245 if(pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL) {
1246 /* inverse BiDi: mixed, but all characters are at the same embedding level */
b75a7d8f 1247 /* set all levels to the paragraph level */
57a6839d
A
1248 int32_t paraIndex, start, limit;
1249 for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) {
1250 if(paraIndex==0)
1251 start=0;
1252 else
1253 start=pBiDi->paras[paraIndex-1].limit;
1254 limit=pBiDi->paras[paraIndex].limit;
1255 level=pBiDi->paras[paraIndex].level;
1256 for(i=start; i<limit; i++)
1257 levels[i]=level;
b75a7d8f 1258 }
57a6839d
A
1259 return direction; /* no bracket matching for inverse BiDi */
1260 }
1261 if(!(flags&(MASK_EXPLICIT|MASK_ISO))) {
1262 /* no embeddings, set all levels to the paragraph level */
1263 /* we still have to perform bracket matching */
1264 int32_t paraIndex, start, limit;
1265 BracketData bracketData;
1266 bracketInit(pBiDi, &bracketData);
1267 for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) {
1268 if(paraIndex==0)
1269 start=0;
1270 else
1271 start=pBiDi->paras[paraIndex-1].limit;
1272 limit=pBiDi->paras[paraIndex].limit;
1273 level=pBiDi->paras[paraIndex].level;
1274 for(i=start; i<limit; i++) {
1275 levels[i]=level;
1276 dirProp=dirProps[i];
1277 if(dirProp==BN)
1278 continue;
1279 if(dirProp==B) {
1280 if((i+1)<length) {
1281 if(text[i]==CR && text[i+1]==LF)
1282 continue; /* skip CR when followed by LF */
1283 bracketProcessB(&bracketData, level);
1284 }
1285 continue;
1286 }
1287 if(!bracketProcessChar(&bracketData, i)) {
1288 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1289 return UBIDI_LTR;
1290 }
1291 }
1292 }
1293 return direction;
1294 }
1295 {
b75a7d8f
A
1296 /* continue to perform (Xn) */
1297
1298 /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
1299 /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */
57a6839d
A
1300 UBiDiLevel embeddingLevel=level, newLevel;
1301 UBiDiLevel previousLevel=level; /* previous level for regular (not CC) characters */
1302 int32_t lastCcPos=0; /* index of last effective LRx,RLx, PDx */
2ca993e8 1303 DirProp lastCcDirProp=0; /* dirProp of last effective LRx,RLx, PDx */
57a6839d
A
1304
1305 /* The following stack remembers the embedding level and the ISOLATE flag of level runs.
1306 stackLast points to its current entry. */
1307 uint16_t stack[UBIDI_MAX_EXPLICIT_LEVEL+2]; /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL
1308 but we need one more entry as base */
1309 uint32_t stackLast=0;
1310 int32_t overflowIsolateCount=0;
1311 int32_t overflowEmbeddingCount=0;
1312 int32_t validIsolateCount=0;
1313 BracketData bracketData;
1314 bracketInit(pBiDi, &bracketData);
1315 stack[0]=level; /* initialize base entry to para level, no override, no isolate */
b75a7d8f
A
1316
1317 /* recalculate the flags */
1318 flags=0;
1319
2ca993e8
A
1320 dirInsertValue = 0;
1321 dirInsertIndex = -1; /* indicate that we have not checked dirInsert yet */
1322 for(i=0; i<length; ) { /* now conditionally increment at end */
1323 if (dirInsert != NULL && dirInsertIndex < 0) {
1324 dirInsertValue = dirInsert[i];
1325 }
1326 if (dirInsertValue > 0) {
1327 dirInsertIndex++;
1328 dirProp = (DirProp)stdDirFromInsertDir[dirInsertValue & 0x000F];
1329 dirInsertValue >>= 4;
1330 } else {
1331 dirInsertIndex = -1;
1332 dirProp=dirProps[i];
1333 }
b75a7d8f
A
1334 switch(dirProp) {
1335 case LRE:
b75a7d8f 1336 case RLE:
57a6839d 1337 case LRO:
b75a7d8f 1338 case RLO:
57a6839d
A
1339 /* (X2, X3, X4, X5) */
1340 flags|=DIRPROP_FLAG(BN);
1341 levels[i]=previousLevel;
1342 if (dirProp==LRE || dirProp==LRO)
1343 /* least greater even level */
1344 newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1));
1345 else
1346 /* least greater odd level */
1347 newLevel=(UBiDiLevel)((NO_OVERRIDE(embeddingLevel)+1)|1);
1348 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
1349 overflowEmbeddingCount==0) {
1350 lastCcPos=i;
2ca993e8 1351 lastCcDirProp = dirProp;
b75a7d8f 1352 embeddingLevel=newLevel;
57a6839d 1353 if(dirProp==LRO || dirProp==RLO)
b75a7d8f 1354 embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
57a6839d
A
1355 stackLast++;
1356 stack[stackLast]=embeddingLevel;
1357 /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE and RLE
73c04bcf
A
1358 since this has already been done for newLevel which is
1359 the source for embeddingLevel.
1360 */
b75a7d8f 1361 } else {
57a6839d
A
1362 if(overflowIsolateCount==0)
1363 overflowEmbeddingCount++;
b75a7d8f 1364 }
b75a7d8f
A
1365 break;
1366 case PDF:
1367 /* (X7) */
57a6839d
A
1368 flags|=DIRPROP_FLAG(BN);
1369 levels[i]=previousLevel;
b75a7d8f 1370 /* handle all the overflow cases first */
57a6839d
A
1371 if(overflowIsolateCount) {
1372 break;
b75a7d8f 1373 }
57a6839d
A
1374 if(overflowEmbeddingCount) {
1375 overflowEmbeddingCount--;
1376 break;
1377 }
1378 if(stackLast>0 && stack[stackLast]<ISOLATE) { /* not an isolate entry */
1379 lastCcPos=i;
2ca993e8 1380 lastCcDirProp = dirProp;
57a6839d
A
1381 stackLast--;
1382 embeddingLevel=(UBiDiLevel)stack[stackLast];
1383 }
1384 break;
1385 case LRI:
1386 case RLI:
1387 flags|=(DIRPROP_FLAG(ON)|DIRPROP_FLAG_LR(embeddingLevel));
1388 levels[i]=NO_OVERRIDE(embeddingLevel);
1389 if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
2ca993e8 1390 bracketProcessBoundary(&bracketData, lastCcPos, lastCcDirProp,
57a6839d
A
1391 previousLevel, embeddingLevel);
1392 flags|=DIRPROP_FLAG_MULTI_RUNS;
1393 }
1394 previousLevel=embeddingLevel;
1395 /* (X5a, X5b) */
1396 if(dirProp==LRI)
1397 /* least greater even level */
1398 newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1));
1399 else
1400 /* least greater odd level */
1401 newLevel=(UBiDiLevel)((NO_OVERRIDE(embeddingLevel)+1)|1);
1402 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 &&
1403 overflowEmbeddingCount==0) {
1404 flags|=DIRPROP_FLAG(dirProp);
1405 lastCcPos=i;
2ca993e8 1406 lastCcDirProp = dirProp;
57a6839d
A
1407 validIsolateCount++;
1408 if(validIsolateCount>pBiDi->isolateCount)
1409 pBiDi->isolateCount=validIsolateCount;
1410 embeddingLevel=newLevel;
1411 /* we can increment stackLast without checking because newLevel
1412 will exceed UBIDI_MAX_EXPLICIT_LEVEL before stackLast overflows */
1413 stackLast++;
1414 stack[stackLast]=embeddingLevel+ISOLATE;
1415 bracketProcessLRI_RLI(&bracketData, embeddingLevel);
1416 } else {
1417 /* make it WS so that it is handled by adjustWSLevels() */
2ca993e8
A
1418 if (dirInsertIndex < 0) {
1419 dirProps[i]=WS;
1420 } else {
1421 dirInsert[i] &= ~(0x000F << (4*dirInsertIndex));
1422 dirInsert[i] |= (Insert_WS << (4*dirInsertIndex));
1423 }
57a6839d
A
1424 overflowIsolateCount++;
1425 }
1426 break;
1427 case PDI:
1428 if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
2ca993e8 1429 bracketProcessBoundary(&bracketData, lastCcPos, lastCcDirProp,
57a6839d
A
1430 previousLevel, embeddingLevel);
1431 flags|=DIRPROP_FLAG_MULTI_RUNS;
1432 }
1433 /* (X6a) */
1434 if(overflowIsolateCount) {
1435 overflowIsolateCount--;
1436 /* make it WS so that it is handled by adjustWSLevels() */
2ca993e8
A
1437 if (dirInsertIndex < 0) {
1438 dirProps[i]=WS;
1439 } else {
1440 dirInsert[i] &= ~(0x000F << (4*dirInsertIndex));
1441 dirInsert[i] |= (Insert_WS << (4*dirInsertIndex));
1442 }
57a6839d
A
1443 }
1444 else if(validIsolateCount) {
1445 flags|=DIRPROP_FLAG(PDI);
1446 lastCcPos=i;
2ca993e8 1447 lastCcDirProp = dirProp;
57a6839d
A
1448 overflowEmbeddingCount=0;
1449 while(stack[stackLast]<ISOLATE) /* pop embedding entries */
1450 stackLast--; /* until the last isolate entry */
1451 stackLast--; /* pop also the last isolate entry */
1452 validIsolateCount--;
1453 bracketProcessPDI(&bracketData);
1454 } else
1455 /* make it WS so that it is handled by adjustWSLevels() */
2ca993e8
A
1456 if (dirInsertIndex < 0) {
1457 dirProps[i]=WS;
1458 } else {
1459 dirInsert[i] &= ~(0x000F << (4*dirInsertIndex));
1460 dirInsert[i] |= (Insert_WS << (4*dirInsertIndex));
1461 }
57a6839d
A
1462 embeddingLevel=(UBiDiLevel)stack[stackLast]&~ISOLATE;
1463 flags|=(DIRPROP_FLAG(ON)|DIRPROP_FLAG_LR(embeddingLevel));
1464 previousLevel=embeddingLevel;
1465 levels[i]=NO_OVERRIDE(embeddingLevel);
b75a7d8f
A
1466 break;
1467 case B:
57a6839d
A
1468 flags|=DIRPROP_FLAG(B);
1469 levels[i]=GET_PARALEVEL(pBiDi, i);
73c04bcf 1470 if((i+1)<length) {
57a6839d
A
1471 if(text[i]==CR && text[i+1]==LF)
1472 break; /* skip CR when followed by LF */
1473 overflowEmbeddingCount=overflowIsolateCount=0;
1474 validIsolateCount=0;
1475 stackLast=0;
1476 previousLevel=embeddingLevel=GET_PARALEVEL(pBiDi, i+1);
1477 stack[0]=embeddingLevel; /* initialize base entry to para level, no override, no isolate */
1478 bracketProcessB(&bracketData, embeddingLevel);
73c04bcf 1479 }
b75a7d8f
A
1480 break;
1481 case BN:
1482 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
1483 /* they will get their levels set correctly in adjustWSLevels() */
57a6839d 1484 levels[i]=previousLevel;
b75a7d8f
A
1485 flags|=DIRPROP_FLAG(BN);
1486 break;
1487 default:
57a6839d
A
1488 /* all other types are normal characters and get the "real" level */
1489 if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) {
2ca993e8 1490 bracketProcessBoundary(&bracketData, lastCcPos, lastCcDirProp,
57a6839d
A
1491 previousLevel, embeddingLevel);
1492 flags|=DIRPROP_FLAG_MULTI_RUNS;
1493 if(embeddingLevel&UBIDI_LEVEL_OVERRIDE)
1494 flags|=DIRPROP_FLAG_O(embeddingLevel);
1495 else
1496 flags|=DIRPROP_FLAG_E(embeddingLevel);
b75a7d8f 1497 }
57a6839d
A
1498 previousLevel=embeddingLevel;
1499 levels[i]=embeddingLevel;
1500 if(!bracketProcessChar(&bracketData, i))
f3c0d7a5 1501 return (UBiDiDirection)-1;
57a6839d
A
1502 /* the dirProp may have been changed in bracketProcessChar() */
1503 flags|=DIRPROP_FLAG(dirProps[i]);
b75a7d8f
A
1504 break;
1505 }
2ca993e8
A
1506 if (dirInsertIndex < 0) {
1507 ++i;
1508 }
b75a7d8f 1509 }
57a6839d 1510 if(flags&MASK_EMBEDDING)
b75a7d8f 1511 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
57a6839d 1512 if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B)))
73c04bcf 1513 flags|=DIRPROP_FLAG(L);
b75a7d8f
A
1514 /* again, determine if the text is mixed-directional or single-directional */
1515 pBiDi->flags=flags;
73c04bcf 1516 direction=directionFromFlags(pBiDi);
b75a7d8f
A
1517 }
1518 return direction;
1519}
1520
1521/*
1522 * Use a pre-specified embedding levels array:
1523 *
1524 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
1525 * ignore all explicit codes (X9),
1526 * and check all the preset levels.
1527 *
1528 * Recalculate the flags to have them reflect the real properties
1529 * after taking the explicit embeddings into account.
1530 */
1531static UBiDiDirection
1532checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
57a6839d 1533 DirProp *dirProps=pBiDi->dirProps;
b75a7d8f 1534 UBiDiLevel *levels=pBiDi->levels;
57a6839d 1535 int32_t isolateCount=0;
73c04bcf 1536
f3c0d7a5 1537 int32_t length=pBiDi->length;
b75a7d8f 1538 Flags flags=0; /* collect all directionalities in the text */
57a6839d 1539 pBiDi->isolateCount=0;
b75a7d8f 1540
f3c0d7a5
A
1541 int32_t currentParaIndex = 0;
1542 int32_t currentParaLimit = pBiDi->paras[0].limit;
1543 int32_t currentParaLevel = pBiDi->paraLevel;
1544
1545 for(int32_t i=0; i<length; ++i) {
1546 UBiDiLevel level=levels[i];
1547 DirProp dirProp=dirProps[i];
57a6839d
A
1548 if(dirProp==LRI || dirProp==RLI) {
1549 isolateCount++;
1550 if(isolateCount>pBiDi->isolateCount)
1551 pBiDi->isolateCount=isolateCount;
1552 }
1553 else if(dirProp==PDI)
1554 isolateCount--;
1555 else if(dirProp==B)
1556 isolateCount=0;
f3c0d7a5
A
1557
1558 // optimized version of int32_t currentParaLevel = GET_PARALEVEL(pBiDi, i);
1559 if (pBiDi->defaultParaLevel != 0 &&
1560 i == currentParaLimit && (currentParaIndex + 1) < pBiDi->paraCount) {
1561 currentParaLevel = pBiDi->paras[++currentParaIndex].level;
1562 currentParaLimit = pBiDi->paras[currentParaIndex].limit;
1563 }
1564
1565 UBiDiLevel overrideFlag = level & UBIDI_LEVEL_OVERRIDE;
1566 level &= ~UBIDI_LEVEL_OVERRIDE;
1567 if (level < currentParaLevel || UBIDI_MAX_EXPLICIT_LEVEL < level) {
1568 if (level == 0) {
1569 if (dirProp == B) {
1570 // Paragraph separators are ok with explicit level 0.
1571 // Prevents reordering of paragraphs.
1572 } else {
1573 // Treat explicit level 0 as a wildcard for the paragraph level.
1574 // Avoid making the caller guess what the paragraph level would be.
1575 level = (UBiDiLevel)currentParaLevel;
1576 levels[i] = level | overrideFlag;
1577 }
1578 } else {
1579 // 1 <= level < currentParaLevel or UBIDI_MAX_EXPLICIT_LEVEL < level
1580 /* level out of bounds */
1581 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1582 return UBIDI_LTR;
1583 }
1584 }
1585 if (overrideFlag != 0) {
b75a7d8f 1586 /* keep the override flag in levels[i] but adjust the flags */
b75a7d8f
A
1587 flags|=DIRPROP_FLAG_O(level);
1588 } else {
1589 /* set the flags */
73c04bcf 1590 flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp);
b75a7d8f 1591 }
b75a7d8f 1592 }
57a6839d 1593 if(flags&MASK_EMBEDDING)
b75a7d8f 1594 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
b75a7d8f
A
1595 /* determine if the text is mixed-directional or single-directional */
1596 pBiDi->flags=flags;
73c04bcf
A
1597 return directionFromFlags(pBiDi);
1598}
1599
46f4442e
A
1600/******************************************************************
1601 The Properties state machine table
1602*******************************************************************
1603
1604 All table cells are 8 bits:
1605 bits 0..4: next state
1606 bits 5..7: action to perform (if > 0)
1607
1608 Cells may be of format "n" where n represents the next state
1609 (except for the rightmost column).
1610 Cells may also be of format "s(x,y)" where x represents an action
1611 to perform and y represents the next state.
1612
1613*******************************************************************
1614 Definitions and type for properties state table
1615*******************************************************************
1616*/
57a6839d 1617#define IMPTABPROPS_COLUMNS 16
73c04bcf
A
1618#define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1)
1619#define GET_STATEPROPS(cell) ((cell)&0x1f)
1620#define GET_ACTIONPROPS(cell) ((cell)>>5)
46f4442e 1621#define s(action, newState) ((uint8_t)(newState+(action<<5)))
73c04bcf
A
1622
1623static const uint8_t groupProp[] = /* dirProp regrouped */
1624{
57a6839d
A
1625/* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN FSI LRI RLI PDI ENL ENR */
1626 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10, 4, 4, 4, 4, 13, 14
73c04bcf 1627};
46f4442e
A
1628enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */
1629
1630/******************************************************************
1631
1632 PROPERTIES STATE TABLE
1633
1634 In table impTabProps,
57a6839d 1635 - the ON column regroups ON and WS, FSI, RLI, LRI and PDI
46f4442e
A
1636 - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF
1637 - the Res column is the reduced property assigned to a run
1638
1639 Action 1: process current run1, init new run1
1640 2: init new run2
1641 3: process run1, process run2, init new run1
1642 4: process run1, set run1=run2, init new run2
1643
1644 Notes:
1645 1) This table is used in resolveImplicitLevels().
1646 2) This table triggers actions when there is a change in the Bidi
1647 property of incoming characters (action 1).
1648 3) Most such property sequences are processed immediately (in
1649 fact, passed to processPropertySeq().
1650 4) However, numbers are assembled as one sequence. This means
1651 that undefined situations (like CS following digits, until
1652 it is known if the next char will be a digit) are held until
1653 following chars define them.
1654 Example: digits followed by CS, then comes another CS or ON;
1655 the digits will be processed, then the CS assigned
1656 as the start of an ON sequence (action 3).
1657 5) There are cases where more than one sequence must be
1658 processed, for instance digits followed by CS followed by L:
1659 the digits must be processed as one sequence, and the CS
1660 must be processed as an ON sequence, all this before starting
1661 assembling chars for the opening L sequence.
1662
1663
1664*/
73c04bcf
A
1665static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] =
1666{
57a6839d
A
1667/* L , R , EN , AN , ON , S , B , ES , ET , CS , BN , NSM , AL , ENL , ENR , Res */
1668/* 0 Init */ { 1 , 2 , 4 , 5 , 7 , 15 , 17 , 7 , 9 , 7 , 0 , 7 , 3 , 18 , 21 , DirProp_ON },
1669/* 1 L */ { 1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 1 , 1 , s(1,3),s(1,18),s(1,21), DirProp_L },
1670/* 2 R */ { s(1,1), 2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 2 , 2 , s(1,3),s(1,18),s(1,21), DirProp_R },
1671/* 3 AL */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8), 3 , 3 , 3 ,s(1,18),s(1,21), DirProp_R },
1672/* 4 EN */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10), 11 ,s(2,10), 4 , 4 , s(1,3), 18 , 21 , DirProp_EN },
1673/* 5 AN */ { s(1,1), s(1,2), s(1,4), 5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12), 5 , 5 , s(1,3),s(1,18),s(1,21), DirProp_AN },
1674/* 6 AL:EN/AN */ { s(1,1), s(1,2), 6 , 6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13), 6 , 6 , s(1,3), 18 , 21 , DirProp_AN },
1675/* 7 ON */ { s(1,1), s(1,2), s(1,4), s(1,5), 7 ,s(1,15),s(1,17), 7 ,s(2,14), 7 , 7 , 7 , s(1,3),s(1,18),s(1,21), DirProp_ON },
1676/* 8 AL:ON */ { s(1,1), s(1,2), s(1,6), s(1,6), 8 ,s(1,16),s(1,17), 8 , 8 , 8 , 8 , 8 , s(1,3),s(1,18),s(1,21), DirProp_ON },
1677/* 9 ET */ { s(1,1), s(1,2), 4 , s(1,5), 7 ,s(1,15),s(1,17), 7 , 9 , 7 , 9 , 9 , s(1,3), 18 , 21 , DirProp_ON },
1678/*10 EN+ES/CS */ { s(3,1), s(3,2), 4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 10 , s(4,7), s(3,3), 18 , 21 , DirProp_EN },
1679/*11 EN+ET */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 11 , s(1,7), 11 , 11 , s(1,3), 18 , 21 , DirProp_EN },
1680/*12 AN+CS */ { s(3,1), s(3,2), s(3,4), 5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 12 , s(4,7), s(3,3),s(3,18),s(3,21), DirProp_AN },
1681/*13 AL:EN/AN+CS */ { s(3,1), s(3,2), 6 , 6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8), 13 , s(4,8), s(3,3), 18 , 21 , DirProp_AN },
1682/*14 ON+ET */ { s(1,1), s(1,2), s(4,4), s(1,5), 7 ,s(1,15),s(1,17), 7 , 14 , 7 , 14 , 14 , s(1,3),s(4,18),s(4,21), DirProp_ON },
1683/*15 S */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7), 15 ,s(1,17), s(1,7), s(1,9), s(1,7), 15 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_S },
1684/*16 AL:S */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8), 16 ,s(1,17), s(1,8), s(1,8), s(1,8), 16 , s(1,8), s(1,3),s(1,18),s(1,21), DirProp_S },
1685/*17 B */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15), 17 , s(1,7), s(1,9), s(1,7), 17 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_B },
1686/*18 ENL */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,19), 20 ,s(2,19), 18 , 18 , s(1,3), 18 , 21 , DirProp_L },
1687/*19 ENL+ES/CS */ { s(3,1), s(3,2), 18 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 19 , s(4,7), s(3,3), 18 , 21 , DirProp_L },
1688/*20 ENL+ET */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 20 , s(1,7), 20 , 20 , s(1,3), 18 , 21 , DirProp_L },
1689/*21 ENR */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,22), 23 ,s(2,22), 21 , 21 , s(1,3), 18 , 21 , DirProp_AN },
1690/*22 ENR+ES/CS */ { s(3,1), s(3,2), 21 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 22 , s(4,7), s(3,3), 18 , 21 , DirProp_AN },
1691/*23 ENR+ET */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 23 , s(1,7), 23 , 23 , s(1,3), 18 , 21 , DirProp_AN }
73c04bcf
A
1692};
1693
57a6839d 1694/* we must undef macro s because the levels tables have a different
73c04bcf
A
1695 * structure (4 bits for action and 4 bits for next state.
1696 */
46f4442e
A
1697#undef s
1698
1699/******************************************************************
1700 The levels state machine tables
1701*******************************************************************
1702
1703 All table cells are 8 bits:
1704 bits 0..3: next state
1705 bits 4..7: action to perform (if > 0)
1706
1707 Cells may be of format "n" where n represents the next state
1708 (except for the rightmost column).
1709 Cells may also be of format "s(x,y)" where x represents an action
1710 to perform and y represents the next state.
1711
1712 This format limits each table to 16 states each and to 15 actions.
1713
1714*******************************************************************
1715 Definitions and type for levels state tables
1716*******************************************************************
1717*/
1718#define IMPTABLEVELS_COLUMNS (DirProp_B + 2)
73c04bcf
A
1719#define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1)
1720#define GET_STATE(cell) ((cell)&0x0f)
1721#define GET_ACTION(cell) ((cell)>>4)
46f4442e 1722#define s(action, newState) ((uint8_t)(newState+(action<<4)))
73c04bcf
A
1723
1724typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS];
1725typedef uint8_t ImpAct[];
1726
1727/* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct,
1728 * instead of having a pair of ImpTab and a pair of ImpAct.
1729 */
1730typedef struct ImpTabPair {
46f4442e
A
1731 const void * pImpTab[2];
1732 const void * pImpAct[2];
73c04bcf
A
1733} ImpTabPair;
1734
46f4442e
A
1735/******************************************************************
1736
1737 LEVELS STATE TABLES
1738
1739 In all levels state tables,
1740 - state 0 is the initial state
1741 - the Res column is the increment to add to the text level
1742 for this property sequence.
1743
1744 The impAct arrays for each table of a pair map the local action
1745 numbers of the table to the total list of actions. For instance,
1746 action 2 in a given table corresponds to the action number which
1747 appears in entry [2] of the impAct array for that table.
1748 The first entry of all impAct arrays must be 0.
1749
1750 Action 1: init conditional sequence
1751 2: prepend conditional sequence to current sequence
1752 3: set ON sequence to new level - 1
1753 4: init EN/AN/ON sequence
1754 5: fix EN/AN/ON sequence followed by R
1755 6: set previous level sequence to level 2
1756
1757 Notes:
1758 1) These tables are used in processPropertySeq(). The input
1759 is property sequences as determined by resolveImplicitLevels.
1760 2) Most such property sequences are processed immediately
1761 (levels are assigned).
1762 3) However, some sequences cannot be assigned a final level till
1763 one or more following sequences are received. For instance,
1764 ON following an R sequence within an even-level paragraph.
1765 If the following sequence is R, the ON sequence will be
1766 assigned basic run level+1, and so will the R sequence.
1767 4) S is generally handled like ON, since its level will be fixed
1768 to paragraph level in adjustWSLevels().
1769
1770*/
73c04bcf
A
1771
1772static const ImpTab impTabL_DEFAULT = /* Even paragraph level */
57a6839d 1773/* In this table, conditional sequences receive the lower possible level
73c04bcf
A
1774 until proven otherwise.
1775*/
1776{
1777/* L , R , EN , AN , ON , S , B , Res */
1778/* 0 : init */ { 0 , 1 , 0 , 2 , 0 , 0 , 0 , 0 },
46f4442e
A
1779/* 1 : R */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 1 },
1780/* 2 : AN */ { 0 , 1 , 0 , 2 , s(1,5), s(1,5), 0 , 2 },
1781/* 3 : R+EN/AN */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 2 },
57a6839d
A
1782/* 4 : R+ON */ { 0 , s(2,1), s(3,3), s(3,3), 4 , 4 , 0 , 0 },
1783/* 5 : AN+ON */ { 0 , s(2,1), 0 , s(3,2), 5 , 5 , 0 , 0 }
73c04bcf
A
1784};
1785static const ImpTab impTabR_DEFAULT = /* Odd paragraph level */
1786/* In this table, conditional sequences receive the lower possible level
1787 until proven otherwise.
1788*/
1789{
1790/* L , R , EN , AN , ON , S , B , Res */
1791/* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 },
46f4442e 1792/* 1 : L */ { 1 , 0 , 1 , 3 , s(1,4), s(1,4), 0 , 1 },
73c04bcf
A
1793/* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 },
1794/* 3 : L+AN */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 1 },
46f4442e 1795/* 4 : L+ON */ { s(2,1), 0 , s(2,1), 3 , 4 , 4 , 0 , 0 },
73c04bcf
A
1796/* 5 : L+AN+ON */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 0 }
1797};
57a6839d 1798static const ImpAct impAct0 = {0,1,2,3,4};
46f4442e
A
1799static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT,
1800 &impTabR_DEFAULT},
1801 {&impAct0, &impAct0}};
73c04bcf
A
1802
1803static const ImpTab impTabL_NUMBERS_SPECIAL = /* Even paragraph level */
57a6839d 1804/* In this table, conditional sequences receive the lower possible level
73c04bcf
A
1805 until proven otherwise.
1806*/
1807{
1808/* L , R , EN , AN , ON , S , B , Res */
57a6839d
A
1809/* 0 : init */ { 0 , 2 , s(1,1), s(1,1), 0 , 0 , 0 , 0 },
1810/* 1 : L+EN/AN */ { 0 , s(4,2), 1 , 1 , 0 , 0 , 0 , 0 },
1811/* 2 : R */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 1 },
1812/* 3 : R+ON */ { 0 , s(2,2), s(3,4), s(3,4), 3 , 3 , 0 , 0 },
1813/* 4 : R+EN/AN */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 2 }
1814};
46f4442e
A
1815static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL,
1816 &impTabR_DEFAULT},
1817 {&impAct0, &impAct0}};
73c04bcf
A
1818
1819static const ImpTab impTabL_GROUP_NUMBERS_WITH_R =
1820/* In this table, EN/AN+ON sequences receive levels as if associated with R
1821 until proven that there is L or sor/eor on both sides. AN is handled like EN.
1822*/
1823{
1824/* L , R , EN , AN , ON , S , B , Res */
46f4442e
A
1825/* 0 init */ { 0 , 3 , s(1,1), s(1,1), 0 , 0 , 0 , 0 },
1826/* 1 EN/AN */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 2 },
1827/* 2 EN/AN+ON */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 1 },
1828/* 3 R */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 1 },
1829/* 4 R+ON */ { s(2,0), 3 , 5 , 5 , 4 , s(2,0), s(2,0), 1 },
1830/* 5 R+EN/AN */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 2 }
73c04bcf
A
1831};
1832static const ImpTab impTabR_GROUP_NUMBERS_WITH_R =
1833/* In this table, EN/AN+ON sequences receive levels as if associated with R
1834 until proven that there is L on both sides. AN is handled like EN.
1835*/
1836{
1837/* L , R , EN , AN , ON , S , B , Res */
1838/* 0 init */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1839/* 1 EN/AN */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 1 },
46f4442e
A
1840/* 2 L */ { 2 , 0 , s(1,4), s(1,4), s(1,3), 0 , 0 , 1 },
1841/* 3 L+ON */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 0 },
1842/* 4 L+EN/AN */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 1 }
73c04bcf
A
1843};
1844static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = {
46f4442e
A
1845 {&impTabL_GROUP_NUMBERS_WITH_R,
1846 &impTabR_GROUP_NUMBERS_WITH_R},
1847 {&impAct0, &impAct0}};
73c04bcf
A
1848
1849
1850static const ImpTab impTabL_INVERSE_NUMBERS_AS_L =
1851/* This table is identical to the Default LTR table except that EN and AN are
1852 handled like L.
1853*/
1854{
1855/* L , R , EN , AN , ON , S , B , Res */
1856/* 0 : init */ { 0 , 1 , 0 , 0 , 0 , 0 , 0 , 0 },
46f4442e
A
1857/* 1 : R */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 1 },
1858/* 2 : AN */ { 0 , 1 , 0 , 0 , s(1,5), s(1,5), 0 , 2 },
1859/* 3 : R+EN/AN */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 2 },
1860/* 4 : R+ON */ { s(2,0), 1 , s(2,0), s(2,0), 4 , 4 , s(2,0), 1 },
1861/* 5 : AN+ON */ { s(2,0), 1 , s(2,0), s(2,0), 5 , 5 , s(2,0), 1 }
73c04bcf
A
1862};
1863static const ImpTab impTabR_INVERSE_NUMBERS_AS_L =
1864/* This table is identical to the Default RTL table except that EN and AN are
1865 handled like L.
1866*/
1867{
1868/* L , R , EN , AN , ON , S , B , Res */
1869/* 0 : init */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 0 },
46f4442e 1870/* 1 : L */ { 1 , 0 , 1 , 1 , s(1,4), s(1,4), 0 , 1 },
73c04bcf
A
1871/* 2 : EN/AN */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 1 },
1872/* 3 : L+AN */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 1 },
46f4442e 1873/* 4 : L+ON */ { s(2,1), 0 , s(2,1), s(2,1), 4 , 4 , 0 , 0 },
73c04bcf
A
1874/* 5 : L+AN+ON */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 0 }
1875};
1876static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = {
46f4442e
A
1877 {&impTabL_INVERSE_NUMBERS_AS_L,
1878 &impTabR_INVERSE_NUMBERS_AS_L},
1879 {&impAct0, &impAct0}};
73c04bcf
A
1880
1881static const ImpTab impTabR_INVERSE_LIKE_DIRECT = /* Odd paragraph level */
1882/* In this table, conditional sequences receive the lower possible level
1883 until proven otherwise.
1884*/
1885{
1886/* L , R , EN , AN , ON , S , B , Res */
1887/* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 },
46f4442e 1888/* 1 : L */ { 1 , 0 , 1 , 2 , s(1,3), s(1,3), 0 , 1 },
73c04bcf 1889/* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 },
46f4442e
A
1890/* 3 : L+ON */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 0 },
1891/* 4 : L+ON+AN */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 3 },
1892/* 5 : L+AN+ON */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 2 },
1893/* 6 : L+ON+EN */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 1 }
73c04bcf 1894};
57a6839d 1895static const ImpAct impAct1 = {0,1,13,14};
73c04bcf
A
1896/* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc"
1897 */
1898static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = {
46f4442e
A
1899 {&impTabL_DEFAULT,
1900 &impTabR_INVERSE_LIKE_DIRECT},
1901 {&impAct0, &impAct1}};
73c04bcf
A
1902
1903static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS =
1904/* The case handled in this table is (visually): R EN L
1905*/
1906{
1907/* L , R , EN , AN , ON , S , B , Res */
46f4442e
A
1908/* 0 : init */ { 0 , s(6,3), 0 , 1 , 0 , 0 , 0 , 0 },
1909/* 1 : L+AN */ { 0 , s(6,3), 0 , 1 , s(1,2), s(3,0), 0 , 4 },
1910/* 2 : L+AN+ON */ { s(2,0), s(6,3), s(2,0), 1 , 2 , s(3,0), s(2,0), 3 },
1911/* 3 : R */ { 0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0), 0 , 3 },
1912/* 4 : R+ON */ { s(3,0), s(4,3), s(5,5), s(5,6), 4 , s(3,0), s(3,0), 3 },
1913/* 5 : R+EN */ { s(3,0), s(4,3), 5 , s(5,6), s(1,4), s(3,0), s(3,0), 4 },
1914/* 6 : R+AN */ { s(3,0), s(4,3), s(5,5), 6 , s(1,4), s(3,0), s(3,0), 4 }
73c04bcf
A
1915};
1916static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS =
1917/* The cases handled in this table are (visually): R EN L
1918 R L AN L
1919*/
1920{
1921/* L , R , EN , AN , ON , S , B , Res */
46f4442e
A
1922/* 0 : init */ { s(1,3), 0 , 1 , 1 , 0 , 0 , 0 , 0 },
1923/* 1 : R+EN/AN */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 1 },
1924/* 2 : R+EN/AN+ON */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 0 },
1925/* 3 : L */ { 3 , 0 , 3 , s(3,6), s(1,4), s(4,0), 0 , 1 },
1926/* 4 : L+ON */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 0 },
1927/* 5 : L+ON+EN */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 1 },
1928/* 6 : L+AN */ { s(5,3), s(4,0), 6 , 6 , 4 , s(4,0), s(4,0), 3 }
73c04bcf 1929};
57a6839d
A
1930static const ImpAct impAct2 = {0,1,2,5,6,7,8};
1931static const ImpAct impAct3 = {0,1,9,10,11,12};
73c04bcf 1932static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = {
46f4442e
A
1933 {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
1934 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
57a6839d 1935 {&impAct2, &impAct3}};
73c04bcf
A
1936
1937static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = {
46f4442e
A
1938 {&impTabL_NUMBERS_SPECIAL,
1939 &impTabR_INVERSE_LIKE_DIRECT},
1940 {&impAct0, &impAct1}};
73c04bcf
A
1941
1942static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS =
1943/* The case handled in this table is (visually): R EN L
1944*/
1945{
1946/* L , R , EN , AN , ON , S , B , Res */
46f4442e
A
1947/* 0 : init */ { 0 , s(6,2), 1 , 1 , 0 , 0 , 0 , 0 },
1948/* 1 : L+EN/AN */ { 0 , s(6,2), 1 , 1 , 0 , s(3,0), 0 , 4 },
1949/* 2 : R */ { 0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0), 0 , 3 },
1950/* 3 : R+ON */ { s(3,0), s(4,2), s(5,4), s(5,4), 3 , s(3,0), s(3,0), 3 },
1951/* 4 : R+EN/AN */ { s(3,0), s(4,2), 4 , 4 , s(1,3), s(3,0), s(3,0), 4 }
73c04bcf
A
1952};
1953static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = {
46f4442e
A
1954 {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
1955 &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS},
57a6839d 1956 {&impAct2, &impAct3}};
73c04bcf 1957
46f4442e 1958#undef s
73c04bcf
A
1959
1960typedef struct {
46f4442e
A
1961 const ImpTab * pImpTab; /* level table pointer */
1962 const ImpAct * pImpAct; /* action map array */
73c04bcf
A
1963 int32_t startON; /* start of ON sequence */
1964 int32_t startL2EN; /* start of level 2 sequence */
1965 int32_t lastStrongRTL; /* index of last found R or AL */
1966 int32_t state; /* current state */
57a6839d 1967 int32_t runStart; /* start position of the run */
73c04bcf
A
1968 UBiDiLevel runLevel; /* run level before implicit solving */
1969} LevState;
1970
1971/*------------------------------------------------------------------------*/
1972
1973static void
1974addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag)
1975 /* param pos: position where to insert
1976 param flag: one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER
1977 */
1978{
1979#define FIRSTALLOC 10
1980 Point point;
1981 InsertPoints * pInsertPoints=&(pBiDi->insertPoints);
1982
1983 if (pInsertPoints->capacity == 0)
1984 {
f3c0d7a5 1985 pInsertPoints->points=static_cast<Point *>(uprv_malloc(sizeof(Point)*FIRSTALLOC));
73c04bcf
A
1986 if (pInsertPoints->points == NULL)
1987 {
1988 pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
1989 return;
1990 }
1991 pInsertPoints->capacity=FIRSTALLOC;
1992 }
1993 if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */
1994 {
f3c0d7a5
A
1995 Point * savePoints=pInsertPoints->points;
1996 pInsertPoints->points=static_cast<Point *>(uprv_realloc(pInsertPoints->points,
1997 pInsertPoints->capacity*2*sizeof(Point)));
73c04bcf
A
1998 if (pInsertPoints->points == NULL)
1999 {
2000 pInsertPoints->points=savePoints;
2001 pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR;
2002 return;
2003 }
2004 else pInsertPoints->capacity*=2;
2005 }
2006 point.pos=pos;
2007 point.flag=flag;
2008 pInsertPoints->points[pInsertPoints->size]=point;
2009 pInsertPoints->size++;
2010#undef FIRSTALLOC
b75a7d8f
A
2011}
2012
57a6839d
A
2013static void
2014setLevelsOutsideIsolates(UBiDi *pBiDi, int32_t start, int32_t limit, UBiDiLevel level)
2015{
2016 DirProp *dirProps=pBiDi->dirProps, dirProp;
2ca993e8 2017 uint16_t *dirInsert = pBiDi->dirInsert; /* may be NULL */
57a6839d 2018 UBiDiLevel *levels=pBiDi->levels;
2ca993e8
A
2019 int32_t dirInsertValue;
2020 int8_t dirInsertIndex; /* position within dirInsertValue, if any */
57a6839d 2021 int32_t isolateCount=0, k;
2ca993e8
A
2022 dirInsertValue = 0;
2023 dirInsertIndex = -1; /* indicate that we have not checked dirInsert yet */
57a6839d 2024 for(k=start; k<limit; k++) {
2ca993e8
A
2025 if (dirInsert != NULL && dirInsertIndex < 0) {
2026 dirInsertValue = dirInsert[k];
2027 }
2028 if (dirInsertValue > 0) {
2029 dirInsertIndex++;
2030 dirProp = (DirProp)stdDirFromInsertDir[dirInsertValue & 0x000F];
2031 dirInsertValue >>= 4;
2032 } else {
2033 dirInsertIndex = -1;
2034 dirProp=dirProps[k];
2035 }
57a6839d
A
2036 if(dirProp==PDI)
2037 isolateCount--;
2038 if(isolateCount==0)
2039 levels[k]=level;
2040 if(dirProp==LRI || dirProp==RLI)
2041 isolateCount++;
2042 }
2043}
2044
b75a7d8f
A
2045/* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
2046
2047/*
2048 * This implementation of the (Wn) rules applies all rules in one pass.
2049 * In order to do so, it needs a look-ahead of typically 1 character
2050 * (except for W5: sequences of ET) and keeps track of changes
2051 * in a rule Wp that affect a later Wq (p<q).
2052 *
b75a7d8f
A
2053 * The (Nn) and (In) rules are also performed in that same single loop,
2054 * but effectively one iteration behind for white space.
2055 *
2056 * Since all implicit rules are performed in one step, it is not necessary
2057 * to actually store the intermediate directional properties in dirProps[].
2058 */
2059
b75a7d8f 2060static void
73c04bcf
A
2061processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop,
2062 int32_t start, int32_t limit) {
2063 uint8_t cell, oldStateSeq, actionSeq;
46f4442e
A
2064 const ImpTab * pImpTab=pLevState->pImpTab;
2065 const ImpAct * pImpAct=pLevState->pImpAct;
73c04bcf
A
2066 UBiDiLevel * levels=pBiDi->levels;
2067 UBiDiLevel level, addLevel;
2068 InsertPoints * pInsertPoints;
2069 int32_t start0, k;
2070
2071 start0=start; /* save original start position */
46f4442e 2072 oldStateSeq=(uint8_t)pLevState->state;
73c04bcf
A
2073 cell=(*pImpTab)[oldStateSeq][_prop];
2074 pLevState->state=GET_STATE(cell); /* isolate the new state */
2075 actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */
2076 addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES];
2077
2078 if(actionSeq) {
2079 switch(actionSeq) {
2080 case 1: /* init ON seq */
2081 pLevState->startON=start0;
b75a7d8f 2082 break;
b75a7d8f 2083
73c04bcf
A
2084 case 2: /* prepend ON seq to current seq */
2085 start=pLevState->startON;
2086 break;
b75a7d8f 2087
57a6839d
A
2088 case 3: /* EN/AN after R+ON */
2089 level=pLevState->runLevel+1;
2090 setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level);
2091 break;
2092
2093 case 4: /* EN/AN before R for NUMBERS_SPECIAL */
2094 level=pLevState->runLevel+2;
2095 setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level);
2096 break;
2097
2098 case 5: /* L or S after possible relevant EN/AN */
73c04bcf
A
2099 /* check if we had EN after R/AL */
2100 if (pLevState->startL2EN >= 0) {
2101 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
b75a7d8f 2102 }
73c04bcf
A
2103 pLevState->startL2EN=-1; /* not within previous if since could also be -2 */
2104 /* check if we had any relevant EN/AN after R/AL */
2105 pInsertPoints=&(pBiDi->insertPoints);
2106 if ((pInsertPoints->capacity == 0) ||
2107 (pInsertPoints->size <= pInsertPoints->confirmed))
2108 {
2109 /* nothing, just clean up */
2110 pLevState->lastStrongRTL=-1;
2111 /* check if we have a pending conditional segment */
2112 level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES];
2113 if ((level & 1) && (pLevState->startON > 0)) { /* after ON */
2114 start=pLevState->startON; /* reset to basic run level */
b75a7d8f 2115 }
46f4442e 2116 if (_prop == DirProp_S) /* add LRM before S */
73c04bcf
A
2117 {
2118 addPoint(pBiDi, start0, LRM_BEFORE);
2119 pInsertPoints->confirmed=pInsertPoints->size;
b75a7d8f 2120 }
73c04bcf 2121 break;
b75a7d8f 2122 }
73c04bcf
A
2123 /* reset previous RTL cont to level for LTR text */
2124 for (k=pLevState->lastStrongRTL+1; k<start0; k++)
2125 {
2126 /* reset odd level, leave runLevel+2 as is */
2127 levels[k]=(levels[k] - 2) & ~1;
b75a7d8f 2128 }
73c04bcf
A
2129 /* mark insert points as confirmed */
2130 pInsertPoints->confirmed=pInsertPoints->size;
2131 pLevState->lastStrongRTL=-1;
46f4442e 2132 if (_prop == DirProp_S) /* add LRM before S */
73c04bcf
A
2133 {
2134 addPoint(pBiDi, start0, LRM_BEFORE);
2135 pInsertPoints->confirmed=pInsertPoints->size;
b75a7d8f 2136 }
73c04bcf 2137 break;
b75a7d8f 2138
57a6839d 2139 case 6: /* R/AL after possible relevant EN/AN */
73c04bcf
A
2140 /* just clean up */
2141 pInsertPoints=&(pBiDi->insertPoints);
2142 if (pInsertPoints->capacity > 0)
2143 /* remove all non confirmed insert points */
2144 pInsertPoints->size=pInsertPoints->confirmed;
2145 pLevState->startON=-1;
2146 pLevState->startL2EN=-1;
2147 pLevState->lastStrongRTL=limit - 1;
2148 break;
2149
57a6839d 2150 case 7: /* EN/AN after R/AL + possible cont */
73c04bcf 2151 /* check for real AN */
57a6839d 2152 if ((_prop == DirProp_AN) && (pBiDi->dirProps[start0] == AN) &&
73c04bcf
A
2153 (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))
2154 {
2155 /* real AN */
2156 if (pLevState->startL2EN == -1) /* if no relevant EN already found */
2157 {
2158 /* just note the righmost digit as a strong RTL */
2159 pLevState->lastStrongRTL=limit - 1;
2160 break;
b75a7d8f 2161 }
73c04bcf
A
2162 if (pLevState->startL2EN >= 0) /* after EN, no AN */
2163 {
2164 addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE);
2165 pLevState->startL2EN=-2;
2166 }
2167 /* note AN */
2168 addPoint(pBiDi, start0, LRM_BEFORE);
2169 break;
2170 }
2171 /* if first EN/AN after R/AL */
2172 if (pLevState->startL2EN == -1) {
2173 pLevState->startL2EN=start0;
b75a7d8f 2174 }
73c04bcf 2175 break;
b75a7d8f 2176
57a6839d 2177 case 8: /* note location of latest R/AL */
73c04bcf
A
2178 pLevState->lastStrongRTL=limit - 1;
2179 pLevState->startON=-1;
b75a7d8f 2180 break;
73c04bcf 2181
57a6839d 2182 case 9: /* L after R+ON/EN/AN */
73c04bcf
A
2183 /* include possible adjacent number on the left */
2184 for (k=start0-1; k>=0 && !(levels[k]&1); k--);
2185 if(k>=0) {
2186 addPoint(pBiDi, k, RLM_BEFORE); /* add RLM before */
2187 pInsertPoints=&(pBiDi->insertPoints);
2188 pInsertPoints->confirmed=pInsertPoints->size; /* confirm it */
2189 }
2190 pLevState->startON=start0;
b75a7d8f 2191 break;
73c04bcf 2192
57a6839d 2193 case 10: /* AN after L */
73c04bcf
A
2194 /* AN numbers between L text on both sides may be trouble. */
2195 /* tentatively bracket with LRMs; will be confirmed if followed by L */
2196 addPoint(pBiDi, start0, LRM_BEFORE); /* add LRM before */
2197 addPoint(pBiDi, start0, LRM_AFTER); /* add LRM after */
b75a7d8f 2198 break;
b75a7d8f 2199
57a6839d 2200 case 11: /* R after L+ON/EN/AN */
73c04bcf
A
2201 /* false alert, infirm LRMs around previous AN */
2202 pInsertPoints=&(pBiDi->insertPoints);
2203 pInsertPoints->size=pInsertPoints->confirmed;
46f4442e 2204 if (_prop == DirProp_S) /* add RLM before S */
73c04bcf
A
2205 {
2206 addPoint(pBiDi, start0, RLM_BEFORE);
2207 pInsertPoints->confirmed=pInsertPoints->size;
2208 }
2209 break;
b75a7d8f 2210
57a6839d 2211 case 12: /* L after L+ON/AN */
73c04bcf
A
2212 level=pLevState->runLevel + addLevel;
2213 for(k=pLevState->startON; k<start0; k++) {
2214 if (levels[k]<level)
2215 levels[k]=level;
b75a7d8f 2216 }
73c04bcf
A
2217 pInsertPoints=&(pBiDi->insertPoints);
2218 pInsertPoints->confirmed=pInsertPoints->size; /* confirm inserts */
2219 pLevState->startON=start0;
2220 break;
2221
57a6839d 2222 case 13: /* L after L+ON+EN/AN/ON */
73c04bcf
A
2223 level=pLevState->runLevel;
2224 for(k=start0-1; k>=pLevState->startON; k--) {
2225 if(levels[k]==level+3) {
2226 while(levels[k]==level+3) {
2227 levels[k--]-=2;
b75a7d8f 2228 }
73c04bcf
A
2229 while(levels[k]==level) {
2230 k--;
b75a7d8f
A
2231 }
2232 }
73c04bcf
A
2233 if(levels[k]==level+2) {
2234 levels[k]=level;
2235 continue;
b75a7d8f 2236 }
73c04bcf 2237 levels[k]=level+1;
b75a7d8f 2238 }
73c04bcf 2239 break;
b75a7d8f 2240
57a6839d 2241 case 14: /* R after L+ON+EN/AN/ON */
73c04bcf
A
2242 level=pLevState->runLevel+1;
2243 for(k=start0-1; k>=pLevState->startON; k--) {
2244 if(levels[k]>level) {
2245 levels[k]-=2;
b75a7d8f 2246 }
b75a7d8f 2247 }
73c04bcf 2248 break;
b75a7d8f 2249
73c04bcf 2250 default: /* we should never get here */
46f4442e 2251 U_ASSERT(FALSE);
73c04bcf 2252 break;
b75a7d8f
A
2253 }
2254 }
73c04bcf
A
2255 if((addLevel) || (start < start0)) {
2256 level=pLevState->runLevel + addLevel;
57a6839d
A
2257 if(start>=pLevState->runStart) {
2258 for(k=start; k<limit; k++) {
2259 levels[k]=level;
2260 }
2261 } else {
2262 setLevelsOutsideIsolates(pBiDi, start, limit, level);
73c04bcf
A
2263 }
2264 }
2265}
b75a7d8f 2266
57a6839d
A
2267/**
2268 * Returns the directionality of the last strong character at the end of the prologue, if any.
2269 * Requires prologue!=null.
2270 */
4388f060
A
2271static DirProp
2272lastL_R_AL(UBiDi *pBiDi) {
4388f060
A
2273 const UChar *text=pBiDi->prologue;
2274 int32_t length=pBiDi->proLength;
2275 int32_t i;
2276 UChar32 uchar;
2277 DirProp dirProp;
2278 for(i=length; i>0; ) {
2279 /* i is decremented by U16_PREV */
2280 U16_PREV(text, 0, i, uchar);
2281 dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
2282 if(dirProp==L) {
2283 return DirProp_L;
2284 }
2285 if(dirProp==R || dirProp==AL) {
2286 return DirProp_R;
2287 }
2288 if(dirProp==B) {
2289 return DirProp_ON;
2290 }
2291 }
2292 return DirProp_ON;
2293}
2294
57a6839d
A
2295/**
2296 * Returns the directionality of the first strong character, or digit, in the epilogue, if any.
2297 * Requires epilogue!=null.
2298 */
4388f060
A
2299static DirProp
2300firstL_R_AL_EN_AN(UBiDi *pBiDi) {
4388f060
A
2301 const UChar *text=pBiDi->epilogue;
2302 int32_t length=pBiDi->epiLength;
2303 int32_t i;
2304 UChar32 uchar;
2305 DirProp dirProp;
2306 for(i=0; i<length; ) {
2307 /* i is incremented by U16_NEXT */
2308 U16_NEXT(text, i, length, uchar);
2309 dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar);
2310 if(dirProp==L) {
2311 return DirProp_L;
2312 }
2313 if(dirProp==R || dirProp==AL) {
2314 return DirProp_R;
2315 }
2316 if(dirProp==EN) {
2317 return DirProp_EN;
2318 }
2319 if(dirProp==AN) {
2320 return DirProp_AN;
2321 }
2322 }
2323 return DirProp_ON;
2324}
2325
73c04bcf
A
2326static void
2327resolveImplicitLevels(UBiDi *pBiDi,
2328 int32_t start, int32_t limit,
2329 DirProp sor, DirProp eor) {
2330 const DirProp *dirProps=pBiDi->dirProps;
2ca993e8 2331 uint16_t *dirInsert = pBiDi->dirInsert; /* may be NULL */
57a6839d 2332 DirProp dirProp;
2ca993e8
A
2333 int32_t dirInsertValue;
2334 int8_t dirInsertIndex; /* position within dirInsertValue, if any */
73c04bcf
A
2335 LevState levState;
2336 int32_t i, start1, start2;
57a6839d 2337 uint16_t oldStateImp, stateImp, actionImp;
73c04bcf
A
2338 uint8_t gprop, resProp, cell;
2339 UBool inverseRTL;
2340 DirProp nextStrongProp=R;
2341 int32_t nextStrongPos=-1;
2342
2343 /* check for RTL inverse BiDi mode */
2344 /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to
2345 * loop on the text characters from end to start.
2346 * This would need a different properties state table (at least different
2347 * actions) and different levels state tables (maybe very similar to the
2348 * LTR corresponding ones.
2349 */
46f4442e
A
2350 inverseRTL=(UBool)
2351 ((start<pBiDi->lastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) &&
2352 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT ||
2353 pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL));
57a6839d
A
2354
2355 /* initialize for property and levels state tables */
73c04bcf
A
2356 levState.startL2EN=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
2357 levState.lastStrongRTL=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
57a6839d 2358 levState.runStart=start;
73c04bcf 2359 levState.runLevel=pBiDi->levels[start];
46f4442e
A
2360 levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1];
2361 levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1];
4388f060
A
2362 if(start==0 && pBiDi->proLength>0) {
2363 DirProp lastStrong=lastL_R_AL(pBiDi);
2364 if(lastStrong!=DirProp_ON) {
2365 sor=lastStrong;
2366 }
2367 }
57a6839d
A
2368 /* The isolates[] entries contain enough information to
2369 resume the bidi algorithm in the same state as it was
2370 when it was interrupted by an isolate sequence. */
2ca993e8
A
2371 dirInsertValue = 0;
2372 if (dirInsert != NULL) {
2373 dirInsertValue = dirInsert[start];
2374 while (dirInsertValue > 0) {
2375 if ((dirInsertValue & 0x000F) == Insert_PDI) {
2376 break;
2377 }
2378 dirInsertValue >>= 4;
2379 }
2380 }
2381 if((dirProps[start]==PDI || dirInsertValue>0) && pBiDi->isolateCount >= 0) {
57a6839d
A
2382 levState.startON=pBiDi->isolates[pBiDi->isolateCount].startON;
2383 start1=pBiDi->isolates[pBiDi->isolateCount].start1;
2384 stateImp=pBiDi->isolates[pBiDi->isolateCount].stateImp;
2385 levState.state=pBiDi->isolates[pBiDi->isolateCount].state;
2386 pBiDi->isolateCount--;
73c04bcf 2387 } else {
57a6839d
A
2388 levState.startON=-1;
2389 start1=start;
2390 if(dirProps[start]==NSM)
2391 stateImp = 1 + sor;
2392 else
2393 stateImp=0;
2394 levState.state=0;
2395 processPropertySeq(pBiDi, &levState, sor, start, start);
73c04bcf 2396 }
57a6839d 2397 start2=start; /* to make Java compiler happy */
73c04bcf
A
2398
2399 for(i=start; i<=limit; i++) {
2400 if(i>=limit) {
57a6839d 2401 int32_t k;
2ca993e8
A
2402 dirInsertValue = 0;
2403 for(k=limit-1; k>start && dirInsertValue <= 0; k--) {
2404 dirProp = dirProps[k];
2405 if ((DIRPROP_FLAG(dirProp)&MASK_BN_EXPLICIT) == 0) {
2406 break;
2407 }
2408 dirProp = ON;
2409 if (dirInsert != NULL) {
2410 dirInsertValue = dirInsert[k];
2411 while (dirInsertValue > 0) {
2412 dirProp = (DirProp)stdDirFromInsertDir[dirInsertValue & 0x000F];
2413 if ((DIRPROP_FLAG(dirProp)&MASK_BN_EXPLICIT) == 0) {
2414 break;
2415 }
2416 dirInsertValue >>= 4;
2417 }
2418 }
2419 }
2420 if (k == start) {
2421 dirProp = dirProps[k];
2422 }
57a6839d
A
2423 if(dirProp==LRI || dirProp==RLI)
2424 break; /* no forced closing for sequence ending with LRI/RLI */
73c04bcf 2425 gprop=eor;
b75a7d8f 2426 } else {
73c04bcf 2427 DirProp prop, prop1;
57a6839d 2428 prop=dirProps[i];
b331163b
A
2429 if(prop==B) {
2430 pBiDi->isolateCount=-1; /* current isolates stack entry == none */
2431 }
73c04bcf
A
2432 if(inverseRTL) {
2433 if(prop==AL) {
2434 /* AL before EN does not make it AN */
2435 prop=R;
2436 } else if(prop==EN) {
2437 if(nextStrongPos<=i) {
2438 /* look for next strong char (L/R/AL) */
2439 int32_t j;
2440 nextStrongProp=R; /* set default */
2441 nextStrongPos=limit;
2442 for(j=i+1; j<limit; j++) {
57a6839d 2443 prop1=dirProps[j];
73c04bcf
A
2444 if(prop1==L || prop1==R || prop1==AL) {
2445 nextStrongProp=prop1;
2446 nextStrongPos=j;
2447 break;
2448 }
2449 }
2450 }
2451 if(nextStrongProp==AL) {
2452 prop=AN;
2453 }
b75a7d8f
A
2454 }
2455 }
73c04bcf 2456 gprop=groupProp[prop];
b75a7d8f 2457 }
73c04bcf
A
2458 oldStateImp=stateImp;
2459 cell=impTabProps[oldStateImp][gprop];
2460 stateImp=GET_STATEPROPS(cell); /* isolate the new state */
2461 actionImp=GET_ACTIONPROPS(cell); /* isolate the action */
2462 if((i==limit) && (actionImp==0)) {
2463 /* there is an unprocessed sequence if its property == eor */
2464 actionImp=1; /* process the last sequence */
2465 }
2466 if(actionImp) {
2467 resProp=impTabProps[oldStateImp][IMPTABPROPS_RES];
2468 switch(actionImp) {
2469 case 1: /* process current seq1, init new seq1 */
2470 processPropertySeq(pBiDi, &levState, resProp, start1, i);
2471 start1=i;
2472 break;
2473 case 2: /* init new seq2 */
2474 start2=i;
2475 break;
2476 case 3: /* process seq1, process seq2, init new seq1 */
2477 processPropertySeq(pBiDi, &levState, resProp, start1, start2);
46f4442e 2478 processPropertySeq(pBiDi, &levState, DirProp_ON, start2, i);
73c04bcf
A
2479 start1=i;
2480 break;
2481 case 4: /* process seq1, set seq1=seq2, init new seq2 */
2482 processPropertySeq(pBiDi, &levState, resProp, start1, start2);
2483 start1=start2;
2484 start2=i;
2485 break;
2486 default: /* we should never get here */
46f4442e 2487 U_ASSERT(FALSE);
73c04bcf
A
2488 break;
2489 }
b75a7d8f
A
2490 }
2491 }
57a6839d 2492
73c04bcf 2493 /* flush possible pending sequence, e.g. ON */
4388f060
A
2494 if(limit==pBiDi->length && pBiDi->epiLength>0) {
2495 DirProp firstStrong=firstL_R_AL_EN_AN(pBiDi);
2496 if(firstStrong!=DirProp_ON) {
2497 eor=firstStrong;
2498 }
2499 }
57a6839d
A
2500
2501 /* look for the last char not a BN or LRE/RLE/LRO/RLO/PDF */
2ca993e8
A
2502 dirInsertValue = 0;
2503 for(i=limit-1; i>start && dirInsertValue <= 0; i--) {
2504 dirProp=dirProps[i];
2505 if ((DIRPROP_FLAG(dirProp)&MASK_BN_EXPLICIT) == 0) {
2506 break;
2507 }
2508 dirProp = ON;
2509 if (dirInsert != NULL) {
2510 dirInsertValue = dirInsert[i];
2511 while (dirInsertValue > 0) {
2512 dirProp = (DirProp)stdDirFromInsertDir[dirInsertValue & 0x000F];
2513 if ((DIRPROP_FLAG(dirProp)&MASK_BN_EXPLICIT) == 0) {
2514 break;
2515 }
2516 dirInsertValue >>= 4;
2517 }
2518 }
2519 }
2520 if (i == start) {
2521 dirProp=dirProps[i];
2522 }
57a6839d
A
2523 if((dirProp==LRI || dirProp==RLI) && limit<pBiDi->length) {
2524 pBiDi->isolateCount++;
2525 pBiDi->isolates[pBiDi->isolateCount].stateImp=stateImp;
2526 pBiDi->isolates[pBiDi->isolateCount].state=levState.state;
2527 pBiDi->isolates[pBiDi->isolateCount].start1=start1;
2528 pBiDi->isolates[pBiDi->isolateCount].startON=levState.startON;
2529 }
2530 else
2531 processPropertySeq(pBiDi, &levState, eor, limit, limit);
b75a7d8f
A
2532}
2533
2534/* perform (L1) and (X9) ---------------------------------------------------- */
2535
2536/*
2537 * Reset the embedding levels for some non-graphic characters (L1).
2538 * This function also sets appropriate levels for BN, and
2539 * explicit embedding types that are supposed to have been removed
2540 * from the paragraph in (X9).
2541 */
2542static void
2543adjustWSLevels(UBiDi *pBiDi) {
2544 const DirProp *dirProps=pBiDi->dirProps;
2545 UBiDiLevel *levels=pBiDi->levels;
2546 int32_t i;
2547
2548 if(pBiDi->flags&MASK_WS) {
73c04bcf 2549 UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR;
b75a7d8f
A
2550 Flags flag;
2551
2552 i=pBiDi->trailingWSStart;
2553 while(i>0) {
2554 /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
57a6839d 2555 while(i>0 && (flag=DIRPROP_FLAG(dirProps[--i]))&MASK_WS) {
73c04bcf
A
2556 if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
2557 levels[i]=0;
2558 } else {
2559 levels[i]=GET_PARALEVEL(pBiDi, i);
2560 }
b75a7d8f
A
2561 }
2562
2563 /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
2564 /* here, i+1 is guaranteed to be <length */
2565 while(i>0) {
57a6839d 2566 flag=DIRPROP_FLAG(dirProps[--i]);
b75a7d8f
A
2567 if(flag&MASK_BN_EXPLICIT) {
2568 levels[i]=levels[i+1];
73c04bcf
A
2569 } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) {
2570 levels[i]=0;
2571 break;
b75a7d8f 2572 } else if(flag&MASK_B_S) {
73c04bcf 2573 levels[i]=GET_PARALEVEL(pBiDi, i);
b75a7d8f
A
2574 break;
2575 }
2576 }
2577 }
2578 }
2579}
2580
51004dcb 2581U_CAPI void U_EXPORT2
4388f060
A
2582ubidi_setContext(UBiDi *pBiDi,
2583 const UChar *prologue, int32_t proLength,
2584 const UChar *epilogue, int32_t epiLength,
2585 UErrorCode *pErrorCode) {
2586 /* check the argument values */
2587 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2588 if(pBiDi==NULL || proLength<-1 || epiLength<-1 ||
2589 (prologue==NULL && proLength!=0) || (epilogue==NULL && epiLength!=0)) {
2590 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2591 return;
2592 }
2593
2594 if(proLength==-1) {
2595 pBiDi->proLength=u_strlen(prologue);
2596 } else {
2597 pBiDi->proLength=proLength;
2598 }
2599 if(epiLength==-1) {
2600 pBiDi->epiLength=u_strlen(epilogue);
2601 } else {
2602 pBiDi->epiLength=epiLength;
2603 }
2604 pBiDi->prologue=prologue;
2605 pBiDi->epilogue=epilogue;
2606}
2607
2608static void
2609setParaSuccess(UBiDi *pBiDi) {
2610 pBiDi->proLength=0; /* forget the last context */
2611 pBiDi->epiLength=0;
2612 pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */
2613}
2614
73c04bcf
A
2615#define BIDI_MIN(x, y) ((x)<(y) ? (x) : (y))
2616#define BIDI_ABS(x) ((x)>=0 ? (x) : (-(x)))
57a6839d 2617
73c04bcf
A
2618static void
2619setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length,
2620 UBiDiLevel paraLevel, UErrorCode *pErrorCode) {
f3c0d7a5 2621 int32_t *runsOnlyMemory = NULL;
73c04bcf
A
2622 int32_t *visualMap;
2623 UChar *visualText;
46f4442e 2624 int32_t saveLength, saveTrailingWSStart;
73c04bcf
A
2625 const UBiDiLevel *levels;
2626 UBiDiLevel *saveLevels;
46f4442e
A
2627 UBiDiDirection saveDirection;
2628 UBool saveMayAllocateText;
73c04bcf
A
2629 Run *runs;
2630 int32_t visualLength, i, j, visualStart, logicalStart,
2631 runCount, runLength, addedRuns, insertRemove,
2632 start, limit, step, indexOddBit, logicalPos,
729e4ab9 2633 index0, index1;
73c04bcf
A
2634 uint32_t saveOptions;
2635
2636 pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT;
2637 if(length==0) {
2638 ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
2639 goto cleanup3;
2640 }
2641 /* obtain memory for mapping table and visual text */
f3c0d7a5 2642 runsOnlyMemory=static_cast<int32_t *>(uprv_malloc(length*(sizeof(int32_t)+sizeof(UChar)+sizeof(UBiDiLevel))));
73c04bcf
A
2643 if(runsOnlyMemory==NULL) {
2644 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2645 goto cleanup3;
2646 }
2647 visualMap=runsOnlyMemory;
2648 visualText=(UChar *)&visualMap[length];
2649 saveLevels=(UBiDiLevel *)&visualText[length];
2650 saveOptions=pBiDi->reorderingOptions;
2651 if(saveOptions & UBIDI_OPTION_INSERT_MARKS) {
2652 pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS;
2653 pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS;
2654 }
46f4442e 2655 paraLevel&=1; /* accept only 0 or 1 */
73c04bcf 2656 ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode);
46f4442e
A
2657 if(U_FAILURE(*pErrorCode)) {
2658 goto cleanup3;
2659 }
2660 /* we cannot access directly pBiDi->levels since it is not yet set if
2661 * direction is not MIXED
2662 */
73c04bcf 2663 levels=ubidi_getLevels(pBiDi, pErrorCode);
a62d09fc 2664 uprv_memcpy(saveLevels, levels, (size_t)pBiDi->length*sizeof(UBiDiLevel));
46f4442e
A
2665 saveTrailingWSStart=pBiDi->trailingWSStart;
2666 saveLength=pBiDi->length;
2667 saveDirection=pBiDi->direction;
73c04bcf
A
2668
2669 /* FOOD FOR THOUGHT: instead of writing the visual text, we could use
2670 * the visual map and the dirProps array to drive the second call
2671 * to ubidi_setPara (but must make provision for possible removal of
2672 * BiDi controls. Alternatively, only use the dirProps array via
2673 * customized classifier callback.
2674 */
2675 visualLength=ubidi_writeReordered(pBiDi, visualText, length,
2676 UBIDI_DO_MIRRORING, pErrorCode);
73c04bcf
A
2677 ubidi_getVisualMap(pBiDi, visualMap, pErrorCode);
2678 if(U_FAILURE(*pErrorCode)) {
2679 goto cleanup2;
2680 }
46f4442e 2681 pBiDi->reorderingOptions=saveOptions;
73c04bcf
A
2682
2683 pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT;
46f4442e
A
2684 paraLevel^=1;
2685 /* Because what we did with reorderingOptions, visualText may be shorter
2686 * than the original text. But we don't want the levels memory to be
2687 * reallocated shorter than the original length, since we need to restore
2688 * the levels as after the first call to ubidi_setpara() before returning.
2689 * We will force mayAllocateText to FALSE before the second call to
2690 * ubidi_setpara(), and will restore it afterwards.
2691 */
2692 saveMayAllocateText=pBiDi->mayAllocateText;
2693 pBiDi->mayAllocateText=FALSE;
73c04bcf 2694 ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode);
46f4442e
A
2695 pBiDi->mayAllocateText=saveMayAllocateText;
2696 ubidi_getRuns(pBiDi, pErrorCode);
73c04bcf
A
2697 if(U_FAILURE(*pErrorCode)) {
2698 goto cleanup1;
2699 }
73c04bcf
A
2700 /* check if some runs must be split, count how many splits */
2701 addedRuns=0;
2702 runCount=pBiDi->runCount;
2703 runs=pBiDi->runs;
2704 visualStart=0;
2705 for(i=0; i<runCount; i++, visualStart+=runLength) {
2706 runLength=runs[i].visualLimit-visualStart;
2707 if(runLength<2) {
2708 continue;
2709 }
2710 logicalStart=GET_INDEX(runs[i].logicalStart);
2711 for(j=logicalStart+1; j<logicalStart+runLength; j++) {
729e4ab9 2712 index0=visualMap[j];
73c04bcf 2713 index1=visualMap[j-1];
729e4ab9 2714 if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
73c04bcf
A
2715 addedRuns++;
2716 }
2717 }
2718 }
2719 if(addedRuns) {
2720 if(getRunsMemory(pBiDi, runCount+addedRuns)) {
2721 if(runCount==1) {
2722 /* because we switch from UBiDi.simpleRuns to UBiDi.runs */
2723 pBiDi->runsMemory[0]=runs[0];
2724 }
2725 runs=pBiDi->runs=pBiDi->runsMemory;
2726 pBiDi->runCount+=addedRuns;
2727 } else {
2728 goto cleanup1;
2729 }
2730 }
2731 /* split runs which are not consecutive in source text */
2732 for(i=runCount-1; i>=0; i--) {
2733 runLength= i==0 ? runs[0].visualLimit :
2734 runs[i].visualLimit-runs[i-1].visualLimit;
2735 logicalStart=runs[i].logicalStart;
2736 indexOddBit=GET_ODD_BIT(logicalStart);
2737 logicalStart=GET_INDEX(logicalStart);
2738 if(runLength<2) {
2739 if(addedRuns) {
2740 runs[i+addedRuns]=runs[i];
2741 }
2742 logicalPos=visualMap[logicalStart];
2743 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2744 saveLevels[logicalPos]^indexOddBit);
2745 continue;
2746 }
2747 if(indexOddBit) {
2748 start=logicalStart;
2749 limit=logicalStart+runLength-1;
2750 step=1;
2751 } else {
2752 start=logicalStart+runLength-1;
2753 limit=logicalStart;
2754 step=-1;
2755 }
2756 for(j=start; j!=limit; j+=step) {
729e4ab9 2757 index0=visualMap[j];
73c04bcf 2758 index1=visualMap[j+step];
729e4ab9
A
2759 if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) {
2760 logicalPos=BIDI_MIN(visualMap[start], index0);
73c04bcf
A
2761 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2762 saveLevels[logicalPos]^indexOddBit);
2763 runs[i+addedRuns].visualLimit=runs[i].visualLimit;
2764 runs[i].visualLimit-=BIDI_ABS(j-start)+1;
2765 insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER);
2766 runs[i+addedRuns].insertRemove=insertRemove;
2767 runs[i].insertRemove&=~insertRemove;
2768 start=j+step;
2769 addedRuns--;
2770 }
2771 }
2772 if(addedRuns) {
2773 runs[i+addedRuns]=runs[i];
2774 }
2775 logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]);
2776 runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos,
2777 saveLevels[logicalPos]^indexOddBit);
2778 }
2779
2780 cleanup1:
2781 /* restore initial paraLevel */
2782 pBiDi->paraLevel^=1;
2783 cleanup2:
2784 /* restore real text */
2785 pBiDi->text=text;
46f4442e
A
2786 pBiDi->length=saveLength;
2787 pBiDi->originalLength=length;
2788 pBiDi->direction=saveDirection;
2789 /* the saved levels should never excess levelsSize, but we check anyway */
2790 if(saveLength>pBiDi->levelsSize) {
2791 saveLength=pBiDi->levelsSize;
2792 }
a62d09fc 2793 uprv_memcpy(pBiDi->levels, saveLevels, (size_t)saveLength*sizeof(UBiDiLevel));
46f4442e 2794 pBiDi->trailingWSStart=saveTrailingWSStart;
46f4442e
A
2795 if(pBiDi->runCount>1) {
2796 pBiDi->direction=UBIDI_MIXED;
2797 }
73c04bcf 2798 cleanup3:
b331163b
A
2799 /* free memory for mapping table and visual text */
2800 uprv_free(runsOnlyMemory);
2801
73c04bcf
A
2802 pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY;
2803}
2804
2ca993e8
A
2805/* -------------------------------------------------------------------------- */
2806/* internal proptotype */
2807
2808static void
2809ubidi_setParaInternal(UBiDi *pBiDi,
2810 const UChar *text, int32_t length,
2811 UBiDiLevel paraLevel,
2812 UBiDiLevel *embeddingLevels,
2813 const int32_t *offsets, int32_t offsetCount,
2814 const int32_t *controlStringIndices,
2815 const UChar * const * controlStrings,
2816 UErrorCode *pErrorCode);
2817
374ca955
A
2818/* ubidi_setPara ------------------------------------------------------------ */
2819
2820U_CAPI void U_EXPORT2
2821ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
2822 UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
2823 UErrorCode *pErrorCode) {
2ca993e8
A
2824 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2825 ubidi_setParaInternal(pBiDi, text, length, paraLevel,
2826 embeddingLevels,
2827 NULL, 0, NULL, NULL,
2828 pErrorCode);
2829}
2830
2831/* ubidi_setParaWithControls ------------------------------------------------ */
2832
2833U_CAPI void U_EXPORT2
2834ubidi_setParaWithControls(UBiDi *pBiDi,
2835 const UChar *text, int32_t length,
2836 UBiDiLevel paraLevel,
2837 const int32_t *offsets, int32_t offsetCount,
2838 const int32_t *controlStringIndices,
2839 const UChar * const * controlStrings,
2840 UErrorCode *pErrorCode) {
2841 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
2842 /* check the argument values that are not already checked in ubidi_setParaInternal */
2843 if ( offsetCount < 0 || (offsetCount > 0 && (offsets == NULL || controlStrings == NULL)) ) {
2844 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2845 return;
2846 }
2847 ubidi_setParaInternal(pBiDi, text, length, paraLevel,
2848 NULL,
2849 offsets, offsetCount, controlStringIndices, controlStrings,
2850 pErrorCode);
2851}
2852
2853/* ubidi_setParaInternal ---------------------------------------------------- */
2854
2855void
2856ubidi_setParaInternal(UBiDi *pBiDi,
2857 const UChar *text, int32_t length,
2858 UBiDiLevel paraLevel,
2859 UBiDiLevel *embeddingLevels,
2860 const int32_t *offsets, int32_t offsetCount,
2861 const int32_t *controlStringIndices,
2862 const UChar * const * controlStrings,
2863 UErrorCode *pErrorCode) {
374ca955 2864 UBiDiDirection direction;
57a6839d 2865 DirProp *dirProps;
374ca955 2866
2ca993e8 2867 /* check the argument values (pErrorCode status alrecy checked before getting here) */
46f4442e
A
2868 if(pBiDi==NULL || text==NULL || length<-1 ||
2869 (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevel<UBIDI_DEFAULT_LTR)) {
374ca955
A
2870 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2871 return;
2872 }
2873
2874 if(length==-1) {
2875 length=u_strlen(text);
2876 }
2ca993e8
A
2877 if (offsetCount > 0 && pBiDi->reorderingMode > UBIDI_REORDER_GROUP_NUMBERS_WITH_R) {
2878 offsetCount = 0;
2879 }
374ca955 2880
73c04bcf
A
2881 /* special treatment for RUNS_ONLY mode */
2882 if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) {
2883 setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode);
2884 return;
2885 }
2886
374ca955 2887 /* initialize the UBiDi structure */
73c04bcf 2888 pBiDi->pParaBiDi=NULL; /* mark unfinished setPara */
374ca955 2889 pBiDi->text=text;
73c04bcf 2890 pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length;
374ca955 2891 pBiDi->paraLevel=paraLevel;
f3c0d7a5 2892 pBiDi->direction=(UBiDiDirection)(paraLevel&1);
73c04bcf 2893 pBiDi->paraCount=1;
374ca955 2894
2ca993e8 2895 pBiDi->dirInsert=NULL;
374ca955
A
2896 pBiDi->dirProps=NULL;
2897 pBiDi->levels=NULL;
2898 pBiDi->runs=NULL;
73c04bcf
A
2899 pBiDi->insertPoints.size=0; /* clean up from last call */
2900 pBiDi->insertPoints.confirmed=0; /* clean up from last call */
2901
2902 /*
2903 * Save the original paraLevel if contextual; otherwise, set to 0.
2904 */
57a6839d 2905 pBiDi->defaultParaLevel=IS_DEFAULT_LEVEL(paraLevel);
374ca955
A
2906
2907 if(length==0) {
2908 /*
2909 * For an empty paragraph, create a UBiDi object with the paraLevel and
2910 * the flags and the direction set but without allocating zero-length arrays.
2911 * There is nothing more to do.
2912 */
2913 if(IS_DEFAULT_LEVEL(paraLevel)) {
2914 pBiDi->paraLevel&=1;
73c04bcf 2915 pBiDi->defaultParaLevel=0;
374ca955 2916 }
57a6839d 2917 pBiDi->flags=DIRPROP_FLAG_LR(paraLevel);
374ca955 2918 pBiDi->runCount=0;
46f4442e 2919 pBiDi->paraCount=0;
4388f060 2920 setParaSuccess(pBiDi); /* mark successful setPara */
374ca955
A
2921 return;
2922 }
2923
2924 pBiDi->runCount=-1;
2925
57a6839d
A
2926 /* allocate paras memory */
2927 if(pBiDi->parasMemory)
2928 pBiDi->paras=pBiDi->parasMemory;
2929 else
2930 pBiDi->paras=pBiDi->simpleParas;
2931
2ca993e8
A
2932 /*
2933 * Get the inserted directional properties
2934 * if necessary.
2935 */
2936 if (offsetCount > 0) {
2937 if(getDirInsertMemory(pBiDi, length)) {
2938 pBiDi->dirInsert=pBiDi->dirInsertMemory;
2939 if(!getDirInsert(pBiDi, offsets, offsetCount, controlStringIndices, controlStrings)) {
2940 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2941 return;
2942 }
2943 } else {
2944 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2945 return;
2946 }
2947 }
2948
374ca955
A
2949 /*
2950 * Get the directional properties,
2951 * the flags bit-set, and
73c04bcf 2952 * determine the paragraph level if necessary.
374ca955
A
2953 */
2954 if(getDirPropsMemory(pBiDi, length)) {
2955 pBiDi->dirProps=pBiDi->dirPropsMemory;
57a6839d
A
2956 if(!getDirProps(pBiDi)) {
2957 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2958 return;
2959 }
374ca955
A
2960 } else {
2961 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2962 return;
2963 }
57a6839d 2964 dirProps=pBiDi->dirProps;
73c04bcf
A
2965 /* the processed length may have changed if UBIDI_OPTION_STREAMING */
2966 length= pBiDi->length;
2967 pBiDi->trailingWSStart=length; /* the levels[] will reflect the WS run */
374ca955
A
2968
2969 /* are explicit levels specified? */
2970 if(embeddingLevels==NULL) {
2971 /* no: determine explicit levels according to the (Xn) rules */\
2972 if(getLevelsMemory(pBiDi, length)) {
2973 pBiDi->levels=pBiDi->levelsMemory;
57a6839d
A
2974 direction=resolveExplicitLevels(pBiDi, pErrorCode);
2975 if(U_FAILURE(*pErrorCode)) {
2976 return;
2977 }
374ca955
A
2978 } else {
2979 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
2980 return;
2981 }
2982 } else {
73c04bcf 2983 /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */
374ca955
A
2984 pBiDi->levels=embeddingLevels;
2985 direction=checkExplicitLevels(pBiDi, pErrorCode);
2986 if(U_FAILURE(*pErrorCode)) {
2987 return;
2988 }
2989 }
2990
57a6839d 2991 /* allocate isolate memory */
b331163b 2992 if(pBiDi->isolateCount<=SIMPLE_ISOLATES_COUNT)
57a6839d
A
2993 pBiDi->isolates=pBiDi->simpleIsolates;
2994 else
2995 if((int32_t)(pBiDi->isolateCount*sizeof(Isolate))<=pBiDi->isolatesSize)
2996 pBiDi->isolates=pBiDi->isolatesMemory;
2997 else {
2998 if(getInitialIsolatesMemory(pBiDi, pBiDi->isolateCount)) {
2999 pBiDi->isolates=pBiDi->isolatesMemory;
3000 } else {
3001 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
3002 return;
3003 }
3004 }
3005 pBiDi->isolateCount=-1; /* current isolates stack entry == none */
3006
374ca955
A
3007 /*
3008 * The steps after (X9) in the UBiDi algorithm are performed only if
3009 * the paragraph text has mixed directionality!
3010 */
3011 pBiDi->direction=direction;
3012 switch(direction) {
3013 case UBIDI_LTR:
374ca955
A
3014 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
3015 pBiDi->trailingWSStart=0;
3016 break;
3017 case UBIDI_RTL:
374ca955
A
3018 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
3019 pBiDi->trailingWSStart=0;
3020 break;
3021 default:
73c04bcf
A
3022 /*
3023 * Choose the right implicit state table
3024 */
3025 switch(pBiDi->reorderingMode) {
3026 case UBIDI_REORDER_DEFAULT:
3027 pBiDi->pImpTabPair=&impTab_DEFAULT;
3028 break;
3029 case UBIDI_REORDER_NUMBERS_SPECIAL:
3030 pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL;
3031 break;
3032 case UBIDI_REORDER_GROUP_NUMBERS_WITH_R:
3033 pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R;
3034 break;
73c04bcf
A
3035 case UBIDI_REORDER_INVERSE_NUMBERS_AS_L:
3036 pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L;
3037 break;
3038 case UBIDI_REORDER_INVERSE_LIKE_DIRECT:
3039 if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
3040 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS;
3041 } else {
3042 pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT;
3043 }
3044 break;
3045 case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL:
3046 if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) {
3047 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS;
3048 } else {
3049 pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL;
3050 }
3051 break;
3052 default:
46f4442e
A
3053 /* we should never get here */
3054 U_ASSERT(FALSE);
73c04bcf
A
3055 break;
3056 }
374ca955
A
3057 /*
3058 * If there are no external levels specified and there
3059 * are no significant explicit level codes in the text,
3060 * then we can treat the entire paragraph as one run.
3061 * Otherwise, we need to perform the following rules on runs of
3062 * the text with the same embedding levels. (X10)
3063 * "Significant" explicit level codes are ones that actually
3064 * affect non-BN characters.
3065 * Examples for "insignificant" ones are empty embeddings
3066 * LRE-PDF, LRE-RLE-PDF-PDF, etc.
3067 */
46f4442e
A
3068 if(embeddingLevels==NULL && pBiDi->paraCount<=1 &&
3069 !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) {
374ca955 3070 resolveImplicitLevels(pBiDi, 0, length,
73c04bcf
A
3071 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)),
3072 GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1)));
374ca955
A
3073 } else {
3074 /* sor, eor: start and end types of same-level-run */
3075 UBiDiLevel *levels=pBiDi->levels;
3076 int32_t start, limit=0;
3077 UBiDiLevel level, nextLevel;
3078 DirProp sor, eor;
3079
3080 /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
73c04bcf 3081 level=GET_PARALEVEL(pBiDi, 0);
374ca955
A
3082 nextLevel=levels[0];
3083 if(level<nextLevel) {
3084 eor=GET_LR_FROM_LEVEL(nextLevel);
3085 } else {
3086 eor=GET_LR_FROM_LEVEL(level);
3087 }
3088
3089 do {
3090 /* determine start and limit of the run (end points just behind the run) */
3091
3092 /* the values for this run's start are the same as for the previous run's end */
374ca955
A
3093 start=limit;
3094 level=nextLevel;
57a6839d 3095 if((start>0) && (dirProps[start-1]==B)) {
73c04bcf
A
3096 /* except if this is a new paragraph, then set sor = para level */
3097 sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start));
3098 } else {
3099 sor=eor;
3100 }
374ca955
A
3101
3102 /* search for the limit of this run */
57a6839d
A
3103 while((++limit<length) &&
3104 ((levels[limit]==level) ||
3105 (DIRPROP_FLAG(dirProps[limit])&MASK_BN_EXPLICIT))) {}
374ca955
A
3106
3107 /* get the correct level of the next run */
3108 if(limit<length) {
3109 nextLevel=levels[limit];
3110 } else {
73c04bcf 3111 nextLevel=GET_PARALEVEL(pBiDi, length-1);
374ca955
A
3112 }
3113
3114 /* determine eor from max(level, nextLevel); sor is last run's eor */
57a6839d 3115 if(NO_OVERRIDE(level)<NO_OVERRIDE(nextLevel)) {
374ca955
A
3116 eor=GET_LR_FROM_LEVEL(nextLevel);
3117 } else {
3118 eor=GET_LR_FROM_LEVEL(level);
3119 }
3120
3121 /* if the run consists of overridden directional types, then there
3122 are no implicit types to be resolved */
3123 if(!(level&UBIDI_LEVEL_OVERRIDE)) {
3124 resolveImplicitLevels(pBiDi, start, limit, sor, eor);
3125 } else {
3126 /* remove the UBIDI_LEVEL_OVERRIDE flags */
3127 do {
3128 levels[start++]&=~UBIDI_LEVEL_OVERRIDE;
3129 } while(start<limit);
3130 }
3131 } while(limit<length);
3132 }
73c04bcf
A
3133 /* check if we got any memory shortage while adding insert points */
3134 if (U_FAILURE(pBiDi->insertPoints.errorCode))
3135 {
3136 *pErrorCode=pBiDi->insertPoints.errorCode;
3137 return;
3138 }
374ca955
A
3139 /* reset the embedding levels for some non-graphic characters (L1), (X9) */
3140 adjustWSLevels(pBiDi);
374ca955
A
3141 break;
3142 }
46f4442e
A
3143 /* add RLM for inverse Bidi with contextual orientation resolving
3144 * to RTL which would not round-trip otherwise
3145 */
3146 if((pBiDi->defaultParaLevel>0) &&
3147 (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) &&
3148 ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) ||
3149 (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) {
3150 int32_t i, j, start, last;
57a6839d 3151 UBiDiLevel level;
46f4442e
A
3152 DirProp dirProp;
3153 for(i=0; i<pBiDi->paraCount; i++) {
57a6839d
A
3154 last=(pBiDi->paras[i].limit)-1;
3155 level=pBiDi->paras[i].level;
3156 if(level==0)
46f4442e 3157 continue; /* LTR paragraph */
57a6839d 3158 start= i==0 ? 0 : pBiDi->paras[i-1].limit;
46f4442e 3159 for(j=last; j>=start; j--) {
57a6839d 3160 dirProp=dirProps[j];
46f4442e
A
3161 if(dirProp==L) {
3162 if(j<last) {
57a6839d 3163 while(dirProps[last]==B) {
46f4442e
A
3164 last--;
3165 }
3166 }
3167 addPoint(pBiDi, last, RLM_BEFORE);
3168 break;
3169 }
3170 if(DIRPROP_FLAG(dirProp) & MASK_R_AL) {
3171 break;
3172 }
3173 }
3174 }
3175 }
3176
73c04bcf
A
3177 if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) {
3178 pBiDi->resultLength -= pBiDi->controlCount;
3179 } else {
3180 pBiDi->resultLength += pBiDi->insertPoints.size;
3181 }
4388f060 3182 setParaSuccess(pBiDi); /* mark successful setPara */
73c04bcf
A
3183}
3184
2ca993e8
A
3185/* -------------------------------------------------------------------------- */
3186
73c04bcf
A
3187U_CAPI void U_EXPORT2
3188ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) {
3189 if(pBiDi!=NULL) {
3190 pBiDi->orderParagraphsLTR=orderParagraphsLTR;
3191 }
3192}
3193
3194U_CAPI UBool U_EXPORT2
3195ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) {
3196 if(pBiDi!=NULL) {
3197 return pBiDi->orderParagraphsLTR;
3198 } else {
3199 return FALSE;
3200 }
374ca955 3201}
b75a7d8f
A
3202
3203U_CAPI UBiDiDirection U_EXPORT2
3204ubidi_getDirection(const UBiDi *pBiDi) {
73c04bcf 3205 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
b75a7d8f
A
3206 return pBiDi->direction;
3207 } else {
3208 return UBIDI_LTR;
3209 }
3210}
3211
3212U_CAPI const UChar * U_EXPORT2
3213ubidi_getText(const UBiDi *pBiDi) {
73c04bcf 3214 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
b75a7d8f
A
3215 return pBiDi->text;
3216 } else {
3217 return NULL;
3218 }
3219}
3220
3221U_CAPI int32_t U_EXPORT2
3222ubidi_getLength(const UBiDi *pBiDi) {
73c04bcf
A
3223 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
3224 return pBiDi->originalLength;
3225 } else {
3226 return 0;
3227 }
3228}
3229
3230U_CAPI int32_t U_EXPORT2
3231ubidi_getProcessedLength(const UBiDi *pBiDi) {
3232 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
b75a7d8f
A
3233 return pBiDi->length;
3234 } else {
3235 return 0;
3236 }
3237}
3238
73c04bcf
A
3239U_CAPI int32_t U_EXPORT2
3240ubidi_getResultLength(const UBiDi *pBiDi) {
3241 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
3242 return pBiDi->resultLength;
3243 } else {
3244 return 0;
3245 }
3246}
3247
3248/* paragraphs API functions ------------------------------------------------- */
3249
b75a7d8f
A
3250U_CAPI UBiDiLevel U_EXPORT2
3251ubidi_getParaLevel(const UBiDi *pBiDi) {
73c04bcf 3252 if(IS_VALID_PARA_OR_LINE(pBiDi)) {
b75a7d8f
A
3253 return pBiDi->paraLevel;
3254 } else {
3255 return 0;
3256 }
3257}
3258
73c04bcf
A
3259U_CAPI int32_t U_EXPORT2
3260ubidi_countParagraphs(UBiDi *pBiDi) {
3261 if(!IS_VALID_PARA_OR_LINE(pBiDi)) {
3262 return 0;
3263 } else {
3264 return pBiDi->paraCount;
3265 }
3266}
b75a7d8f 3267
73c04bcf
A
3268U_CAPI void U_EXPORT2
3269ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
3270 int32_t *pParaStart, int32_t *pParaLimit,
3271 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
3272 int32_t paraStart;
b75a7d8f 3273
73c04bcf 3274 /* check the argument values */
46f4442e
A
3275 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
3276 RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode);
3277 RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode);
3278
73c04bcf
A
3279 pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */
3280 if(paraIndex) {
57a6839d 3281 paraStart=pBiDi->paras[paraIndex-1].limit;
73c04bcf
A
3282 } else {
3283 paraStart=0;
3284 }
3285 if(pParaStart!=NULL) {
3286 *pParaStart=paraStart;
3287 }
3288 if(pParaLimit!=NULL) {
57a6839d 3289 *pParaLimit=pBiDi->paras[paraIndex].limit;
73c04bcf
A
3290 }
3291 if(pParaLevel!=NULL) {
3292 *pParaLevel=GET_PARALEVEL(pBiDi, paraStart);
3293 }
73c04bcf 3294}
b75a7d8f 3295
73c04bcf
A
3296U_CAPI int32_t U_EXPORT2
3297ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex,
3298 int32_t *pParaStart, int32_t *pParaLimit,
3299 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) {
57a6839d 3300 int32_t paraIndex;
b75a7d8f 3301
73c04bcf
A
3302 /* check the argument values */
3303 /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */
46f4442e
A
3304 RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
3305 RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
73c04bcf 3306 pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */
46f4442e
A
3307 RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1);
3308
57a6839d 3309 for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex].limit; paraIndex++);
73c04bcf
A
3310 ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode);
3311 return paraIndex;
3312}
b75a7d8f 3313
73c04bcf
A
3314U_CAPI void U_EXPORT2
3315ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn,
3316 const void *newContext, UBiDiClassCallback **oldFn,
3317 const void **oldContext, UErrorCode *pErrorCode)
3318{
46f4442e
A
3319 RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
3320 if(pBiDi==NULL) {
73c04bcf
A
3321 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
3322 return;
3323 }
3324 if( oldFn )
3325 {
3326 *oldFn = pBiDi->fnClassCallback;
3327 }
3328 if( oldContext )
3329 {
3330 *oldContext = pBiDi->coClassCallback;
3331 }
3332 pBiDi->fnClassCallback = newFn;
3333 pBiDi->coClassCallback = newContext;
3334}
b75a7d8f 3335
73c04bcf
A
3336U_CAPI void U_EXPORT2
3337ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context)
3338{
46f4442e
A
3339 if(pBiDi==NULL) {
3340 return;
3341 }
73c04bcf
A
3342 if( fn )
3343 {
3344 *fn = pBiDi->fnClassCallback;
3345 }
3346 if( context )
3347 {
3348 *context = pBiDi->coClassCallback;
3349 }
3350}
b75a7d8f 3351
73c04bcf
A
3352U_CAPI UCharDirection U_EXPORT2
3353ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c)
3354{
3355 UCharDirection dir;
b75a7d8f 3356
73c04bcf
A
3357 if( pBiDi->fnClassCallback == NULL ||
3358 (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT )
3359 {
57a6839d
A
3360 dir = ubidi_getClass(pBiDi->bdp, c);
3361 }
3362 if(dir >= U_CHAR_DIRECTION_COUNT) {
f3c0d7a5 3363 dir = (UCharDirection)ON;
b75a7d8f 3364 }
57a6839d 3365 return dir;
b75a7d8f 3366}