]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/ubidi.c
ICU-6.2.22.tar.gz
[apple/icu.git] / icuSources / common / ubidi.c
1 /*
2 ******************************************************************************
3 *
4 * Copyright (C) 1999-2004, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 ******************************************************************************
8 * file name: ubidi.c
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 1999jul27
14 * created by: Markus W. Scherer
15 */
16
17 /* set import/export definitions */
18 #ifndef U_COMMON_IMPLEMENTATION
19 # define U_COMMON_IMPLEMENTATION
20 #endif
21
22 #include "cmemory.h"
23 #include "unicode/utypes.h"
24 #include "unicode/ustring.h"
25 #include "unicode/uchar.h"
26 #include "unicode/ubidi.h"
27 #include "ubidiimp.h"
28
29 /*
30 * General implementation notes:
31 *
32 * Throughout the implementation, there are comments like (W2) that refer to
33 * rules of the BiDi algorithm in its version 5, in this example to the second
34 * rule of the resolution of weak types.
35 *
36 * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
37 * character according to UTF-16, the second UChar gets the directional property of
38 * the entire character assigned, while the first one gets a BN, a boundary
39 * neutral, type, which is ignored by most of the algorithm according to
40 * rule (X9) and the implementation suggestions of the BiDi algorithm.
41 *
42 * Later, adjustWSLevels() will set the level for each BN to that of the
43 * following character (UChar), which results in surrogate pairs getting the
44 * same level on each of their surrogates.
45 *
46 * In a UTF-8 implementation, the same thing could be done: the last byte of
47 * a multi-byte sequence would get the "real" property, while all previous
48 * bytes of that sequence would get BN.
49 *
50 * It is not possible to assign all those parts of a character the same real
51 * property because this would fail in the resolution of weak types with rules
52 * that look at immediately surrounding types.
53 *
54 * As a related topic, this implementation does not remove Boundary Neutral
55 * types from the input, but ignores them whereever this is relevant.
56 * For example, the loop for the resolution of the weak types reads
57 * types until it finds a non-BN.
58 * Also, explicit embedding codes are neither changed into BN nor removed.
59 * They are only treated the same way real BNs are.
60 * As stated before, adjustWSLevels() takes care of them at the end.
61 * For the purpose of conformance, the levels of all these codes
62 * do not matter.
63 *
64 * Note that this implementation never modifies the dirProps
65 * after the initial setup.
66 *
67 *
68 * In this implementation, the resolution of weak types (Wn),
69 * neutrals (Nn), and the assignment of the resolved level (In)
70 * are all done in one single loop, in resolveImplicitLevels().
71 * Changes of dirProp values are done on the fly, without writing
72 * them back to the dirProps array.
73 *
74 *
75 * This implementation contains code that allows to bypass steps of the
76 * algorithm that are not needed on the specific paragraph
77 * in order to speed up the most common cases considerably,
78 * like text that is entirely LTR, or RTL text without numbers.
79 *
80 * Most of this is done by setting a bit for each directional property
81 * in a flags variable and later checking for whether there are
82 * any LTR characters or any RTL characters, or both, whether
83 * there are any explicit embedding codes, etc.
84 *
85 * If the (Xn) steps are performed, then the flags are re-evaluated,
86 * because they will then not contain the embedding codes any more
87 * and will be adjusted for override codes, so that subsequently
88 * more bypassing may be possible than what the initial flags suggested.
89 *
90 * If the text is not mixed-directional, then the
91 * algorithm steps for the weak type resolution are not performed,
92 * and all levels are set to the paragraph level.
93 *
94 * If there are no explicit embedding codes, then the (Xn) steps
95 * are not performed.
96 *
97 * If embedding levels are supplied as a parameter, then all
98 * explicit embedding codes are ignored, and the (Xn) steps
99 * are not performed.
100 *
101 * White Space types could get the level of the run they belong to,
102 * and are checked with a test of (flags&MASK_EMBEDDING) to
103 * consider if the paragraph direction should be considered in
104 * the flags variable.
105 *
106 * If there are no White Space types in the paragraph, then
107 * (L1) is not necessary in adjustWSLevels().
108 */
109
110 /* to avoid some conditional statements, use tiny constant arrays */
111 static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) };
112 static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) };
113 static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) };
114
115 #define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
116 #define DIRPROP_FLAG_E(level) flagE[(level)&1]
117 #define DIRPROP_FLAG_O(level) flagO[(level)&1]
118
119 /* UBiDi object management -------------------------------------------------- */
120
121 U_CAPI UBiDi * U_EXPORT2
122 ubidi_open(void)
123 {
124 UErrorCode errorCode=U_ZERO_ERROR;
125 return ubidi_openSized(0, 0, &errorCode);
126 }
127
128 U_CAPI UBiDi * U_EXPORT2
129 ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) {
130 UBiDi *pBiDi;
131
132 /* check the argument values */
133 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
134 return NULL;
135 } else if(maxLength<0 || maxRunCount<0) {
136 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
137 return NULL; /* invalid arguments */
138 }
139
140 /* allocate memory for the object */
141 pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi));
142 if(pBiDi==NULL) {
143 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
144 return NULL;
145 }
146
147 /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */
148 uprv_memset(pBiDi, 0, sizeof(UBiDi));
149
150 /* allocate memory for arrays as requested */
151 if(maxLength>0) {
152 if( !getInitialDirPropsMemory(pBiDi, maxLength) ||
153 !getInitialLevelsMemory(pBiDi, maxLength)
154 ) {
155 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
156 }
157 } else {
158 pBiDi->mayAllocateText=TRUE;
159 }
160
161 if(maxRunCount>0) {
162 if(maxRunCount==1) {
163 /* use simpleRuns[] */
164 pBiDi->runsSize=sizeof(Run);
165 } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) {
166 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
167 }
168 } else {
169 pBiDi->mayAllocateRuns=TRUE;
170 }
171
172 if(U_SUCCESS(*pErrorCode)) {
173 return pBiDi;
174 } else {
175 ubidi_close(pBiDi);
176 return NULL;
177 }
178 }
179
180 /*
181 * We are allowed to allocate memory if memory==NULL or
182 * mayAllocate==TRUE for each array that we need.
183 * We also try to grow and shrink memory as needed if we
184 * allocate it.
185 *
186 * Assume sizeNeeded>0.
187 * If *pMemory!=NULL, then assume *pSize>0.
188 *
189 * ### this realloc() may unnecessarily copy the old data,
190 * which we know we don't need any more;
191 * is this the best way to do this??
192 */
193 U_CFUNC UBool
194 ubidi_getMemory(void **pMemory, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) {
195 /* check for existing memory */
196 if(*pMemory==NULL) {
197 /* we need to allocate memory */
198 if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) {
199 *pSize=sizeNeeded;
200 return TRUE;
201 } else {
202 return FALSE;
203 }
204 } else {
205 /* there is some memory, is it enough or too much? */
206 if(sizeNeeded>*pSize && !mayAllocate) {
207 /* not enough memory, and we must not allocate */
208 return FALSE;
209 } else if(sizeNeeded!=*pSize && mayAllocate) {
210 /* we may try to grow or shrink */
211 void *memory;
212
213 if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) {
214 *pMemory=memory;
215 *pSize=sizeNeeded;
216 return TRUE;
217 } else {
218 /* we failed to grow */
219 return FALSE;
220 }
221 } else {
222 /* we have at least enough memory and must not allocate */
223 return TRUE;
224 }
225 }
226 }
227
228 U_CAPI void U_EXPORT2
229 ubidi_close(UBiDi *pBiDi) {
230 if(pBiDi!=NULL) {
231 if(pBiDi->dirPropsMemory!=NULL) {
232 uprv_free(pBiDi->dirPropsMemory);
233 }
234 if(pBiDi->levelsMemory!=NULL) {
235 uprv_free(pBiDi->levelsMemory);
236 }
237 if(pBiDi->runsMemory!=NULL) {
238 uprv_free(pBiDi->runsMemory);
239 }
240 uprv_free(pBiDi);
241 }
242 }
243
244 /* set to approximate "inverse BiDi" ---------------------------------------- */
245
246 U_CAPI void U_EXPORT2
247 ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) {
248 if(pBiDi!=NULL) {
249 pBiDi->isInverse=isInverse;
250 }
251 }
252
253 U_CAPI UBool U_EXPORT2
254 ubidi_isInverse(UBiDi *pBiDi) {
255 if(pBiDi!=NULL) {
256 return pBiDi->isInverse;
257 } else {
258 return FALSE;
259 }
260 }
261
262 /* perform (P2)..(P3) ------------------------------------------------------- */
263
264 /*
265 * Get the directional properties for the text,
266 * calculate the flags bit-set, and
267 * determine the partagraph level if necessary.
268 */
269 static void
270 getDirProps(UBiDi *pBiDi, const UChar *text) {
271 DirProp *dirProps=pBiDi->dirPropsMemory; /* pBiDi->dirProps is const */
272
273 int32_t i=0, i0, i1, length=pBiDi->length;
274 Flags flags=0; /* collect all directionalities in the text */
275 UChar32 uchar;
276 DirProp dirProp;
277
278 if(IS_DEFAULT_LEVEL(pBiDi->paraLevel)) {
279 /* determine the paragraph level (P2..P3) */
280 for(;;) {
281 i0=i; /* index of first code unit */
282 UTF_NEXT_CHAR(text, i, length, uchar);
283 i1=i-1; /* index of last code unit, gets the directional property */
284 flags|=DIRPROP_FLAG(dirProps[i1]=dirProp=u_charDirection(uchar));
285 if(i1>i0) { /* set previous code units' properties to BN */
286 flags|=DIRPROP_FLAG(BN);
287 do {
288 dirProps[--i1]=BN;
289 } while(i1>i0);
290 }
291
292 if(dirProp==L) {
293 pBiDi->paraLevel=0;
294 break;
295 } else if(dirProp==R || dirProp==AL) {
296 pBiDi->paraLevel=1;
297 break;
298 } else if(i>=length) {
299 /*
300 * see comment in ubidi.h:
301 * the DEFAULT_XXX values are designed so that
302 * their bit 0 alone yields the intended default
303 */
304 pBiDi->paraLevel&=1;
305 break;
306 }
307 }
308 } else {
309 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
310 }
311
312 /* get the rest of the directional properties and the flags bits */
313 while(i<length) {
314 i0=i; /* index of first code unit */
315 UTF_NEXT_CHAR(text, i, length, uchar);
316 i1=i-1; /* index of last code unit, gets the directional property */
317 flags|=DIRPROP_FLAG(dirProps[i1]=dirProp=u_charDirection(uchar));
318 if(i1>i0) { /* set previous code units' properties to BN */
319 flags|=DIRPROP_FLAG(BN);
320 do {
321 dirProps[--i1]=BN;
322 } while(i1>i0);
323 }
324 }
325 if(flags&MASK_EMBEDDING) {
326 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
327 }
328
329 pBiDi->flags=flags;
330 }
331
332 /* perform (X1)..(X9) ------------------------------------------------------- */
333
334 /* determine if the text is mixed-directional or single-directional */
335 static UBiDiDirection
336 directionFromFlags(Flags flags) {
337 /* if the text contains AN and neutrals, then some neutrals may become RTL */
338 if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) {
339 return UBIDI_LTR;
340 } else if(!(flags&MASK_LTR)) {
341 return UBIDI_RTL;
342 } else {
343 return UBIDI_MIXED;
344 }
345 }
346
347 /*
348 * Resolve the explicit levels as specified by explicit embedding codes.
349 * Recalculate the flags to have them reflect the real properties
350 * after taking the explicit embeddings into account.
351 *
352 * The BiDi algorithm is designed to result in the same behavior whether embedding
353 * levels are externally specified (from "styled text", supposedly the preferred
354 * method) or set by explicit embedding codes (LRx, RLx, PDF) in the plain text.
355 * That is why (X9) instructs to remove all explicit codes (and BN).
356 * However, in a real implementation, this removal of these codes and their index
357 * positions in the plain text is undesirable since it would result in
358 * reallocated, reindexed text.
359 * Instead, this implementation leaves the codes in there and just ignores them
360 * in the subsequent processing.
361 * In order to get the same reordering behavior, positions with a BN or an
362 * explicit embedding code just get the same level assigned as the last "real"
363 * character.
364 *
365 * Some implementations, not this one, then overwrite some of these
366 * directionality properties at "real" same-level-run boundaries by
367 * L or R codes so that the resolution of weak types can be performed on the
368 * entire paragraph at once instead of having to parse it once more and
369 * perform that resolution on same-level-runs.
370 * This limits the scope of the implicit rules in effectively
371 * the same way as the run limits.
372 *
373 * Instead, this implementation does not modify these codes.
374 * On one hand, the paragraph has to be scanned for same-level-runs, but
375 * on the other hand, this saves another loop to reset these codes,
376 * or saves making and modifying a copy of dirProps[].
377 *
378 *
379 * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
380 *
381 *
382 * Handling the stack of explicit levels (Xn):
383 *
384 * With the BiDi stack of explicit levels,
385 * as pushed with each LRE, RLE, LRO, and RLO and popped with each PDF,
386 * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL==61.
387 *
388 * In order to have a correct push-pop semantics even in the case of overflows,
389 * there are two overflow counters:
390 * - countOver60 is incremented with each LRx at level 60
391 * - from level 60, one RLx increases the level to 61
392 * - countOver61 is incremented with each LRx and RLx at level 61
393 *
394 * Popping levels with PDF must work in the opposite order so that level 61
395 * is correct at the correct point. Underflows (too many PDFs) must be checked.
396 *
397 * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
398 */
399 static UBiDiDirection
400 resolveExplicitLevels(UBiDi *pBiDi) {
401 const DirProp *dirProps=pBiDi->dirProps;
402 UBiDiLevel *levels=pBiDi->levels;
403
404 int32_t i=0, length=pBiDi->length;
405 Flags flags=pBiDi->flags; /* collect all directionalities in the text */
406 DirProp dirProp;
407 UBiDiLevel level=pBiDi->paraLevel;
408
409 UBiDiDirection direction;
410
411 /* determine if the text is mixed-directional or single-directional */
412 direction=directionFromFlags(flags);
413
414 /* we may not need to resolve any explicit levels */
415 if(direction!=UBIDI_MIXED) {
416 /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
417 } else if(!(flags&MASK_EXPLICIT) || pBiDi->isInverse) {
418 /* mixed, but all characters are at the same embedding level */
419 /* or we are in "inverse BiDi" */
420 /* set all levels to the paragraph level */
421 for(i=0; i<length; ++i) {
422 levels[i]=level;
423 }
424 } else {
425 /* continue to perform (Xn) */
426
427 /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
428 /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */
429 UBiDiLevel embeddingLevel=level, newLevel, stackTop=0;
430
431 UBiDiLevel stack[UBIDI_MAX_EXPLICIT_LEVEL]; /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL */
432 uint32_t countOver60=0, countOver61=0; /* count overflows of explicit levels */
433
434 /* recalculate the flags */
435 flags=0;
436
437 /* since we assume that this is a single paragraph, we ignore (X8) */
438 for(i=0; i<length; ++i) {
439 dirProp=dirProps[i];
440 switch(dirProp) {
441 case LRE:
442 case LRO:
443 /* (X3, X5) */
444 newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1)); /* least greater even level */
445 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL) {
446 stack[stackTop]=embeddingLevel;
447 ++stackTop;
448 embeddingLevel=newLevel;
449 if(dirProp==LRO) {
450 embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
451 } else {
452 embeddingLevel&=~UBIDI_LEVEL_OVERRIDE;
453 }
454 } else if((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)==UBIDI_MAX_EXPLICIT_LEVEL) {
455 ++countOver61;
456 } else /* (embeddingLevel&~UBIDI_LEVEL_OVERRIDE)==UBIDI_MAX_EXPLICIT_LEVEL-1 */ {
457 ++countOver60;
458 }
459 flags|=DIRPROP_FLAG(BN);
460 break;
461 case RLE:
462 case RLO:
463 /* (X2, X4) */
464 newLevel=(UBiDiLevel)(((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)+1)|1); /* least greater odd level */
465 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL) {
466 stack[stackTop]=embeddingLevel;
467 ++stackTop;
468 embeddingLevel=newLevel;
469 if(dirProp==RLO) {
470 embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
471 } else {
472 embeddingLevel&=~UBIDI_LEVEL_OVERRIDE;
473 }
474 } else {
475 ++countOver61;
476 }
477 flags|=DIRPROP_FLAG(BN);
478 break;
479 case PDF:
480 /* (X7) */
481 /* handle all the overflow cases first */
482 if(countOver61>0) {
483 --countOver61;
484 } else if(countOver60>0 && (embeddingLevel&~UBIDI_LEVEL_OVERRIDE)!=UBIDI_MAX_EXPLICIT_LEVEL) {
485 /* handle LRx overflows from level 60 */
486 --countOver60;
487 } else if(stackTop>0) {
488 /* this is the pop operation; it also pops level 61 while countOver60>0 */
489 --stackTop;
490 embeddingLevel=stack[stackTop];
491 /* } else { (underflow) */
492 }
493 flags|=DIRPROP_FLAG(BN);
494 break;
495 case B:
496 /*
497 * We do not really expect to see a paragraph separator (B),
498 * but we should do something reasonable with it,
499 * especially at the end of the text.
500 */
501 stackTop=0;
502 countOver60=countOver61=0;
503 embeddingLevel=level=pBiDi->paraLevel;
504 flags|=DIRPROP_FLAG(B);
505 break;
506 case BN:
507 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
508 /* they will get their levels set correctly in adjustWSLevels() */
509 flags|=DIRPROP_FLAG(BN);
510 break;
511 default:
512 /* all other types get the "real" level */
513 if(level!=embeddingLevel) {
514 level=embeddingLevel;
515 if(level&UBIDI_LEVEL_OVERRIDE) {
516 flags|=DIRPROP_FLAG_O(level)|DIRPROP_FLAG_MULTI_RUNS;
517 } else {
518 flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG_MULTI_RUNS;
519 }
520 }
521 if(!(level&UBIDI_LEVEL_OVERRIDE)) {
522 flags|=DIRPROP_FLAG(dirProp);
523 }
524 break;
525 }
526
527 /*
528 * We need to set reasonable levels even on BN codes and
529 * explicit codes because we will later look at same-level runs (X10).
530 */
531 levels[i]=level;
532 }
533 if(flags&MASK_EMBEDDING) {
534 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
535 }
536
537 /* subsequently, ignore the explicit codes and BN (X9) */
538
539 /* again, determine if the text is mixed-directional or single-directional */
540 pBiDi->flags=flags;
541 direction=directionFromFlags(flags);
542 }
543 return direction;
544 }
545
546 /*
547 * Use a pre-specified embedding levels array:
548 *
549 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
550 * ignore all explicit codes (X9),
551 * and check all the preset levels.
552 *
553 * Recalculate the flags to have them reflect the real properties
554 * after taking the explicit embeddings into account.
555 */
556 static UBiDiDirection
557 checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
558 const DirProp *dirProps=pBiDi->dirProps;
559 UBiDiLevel *levels=pBiDi->levels;
560
561 int32_t i, length=pBiDi->length;
562 Flags flags=0; /* collect all directionalities in the text */
563 UBiDiLevel level, paraLevel=pBiDi->paraLevel;
564
565 for(i=0; i<length; ++i) {
566 level=levels[i];
567 if(level&UBIDI_LEVEL_OVERRIDE) {
568 /* keep the override flag in levels[i] but adjust the flags */
569 level&=~UBIDI_LEVEL_OVERRIDE; /* make the range check below simpler */
570 flags|=DIRPROP_FLAG_O(level);
571 } else {
572 /* set the flags */
573 flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProps[i]);
574 }
575 if(level<paraLevel || UBIDI_MAX_EXPLICIT_LEVEL<level) {
576 /* level out of bounds */
577 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
578 return UBIDI_LTR;
579 }
580 }
581 if(flags&MASK_EMBEDDING) {
582 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
583 }
584
585 /* determine if the text is mixed-directional or single-directional */
586 pBiDi->flags=flags;
587 return directionFromFlags(flags);
588 }
589
590 /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
591
592 /*
593 * This implementation of the (Wn) rules applies all rules in one pass.
594 * In order to do so, it needs a look-ahead of typically 1 character
595 * (except for W5: sequences of ET) and keeps track of changes
596 * in a rule Wp that affect a later Wq (p<q).
597 *
598 * historyOfEN is a variable-saver: it contains 4 boolean states;
599 * a bit in it set to 1 means:
600 * bit 0: the current code is an EN after W2
601 * bit 1: the current code is an EN after W4
602 * bit 2: the previous code was an EN after W2
603 * bit 3: the previous code was an EN after W4
604 * In other words, b0..1 have transitions of EN in the current iteration,
605 * while b2..3 have the transitions of EN in the previous iteration.
606 * A simple historyOfEN<<=2 suffices for the propagation.
607 *
608 * The (Nn) and (In) rules are also performed in that same single loop,
609 * but effectively one iteration behind for white space.
610 *
611 * Since all implicit rules are performed in one step, it is not necessary
612 * to actually store the intermediate directional properties in dirProps[].
613 */
614
615 #define EN_SHIFT 2
616 #define EN_AFTER_W2 1
617 #define EN_AFTER_W4 2
618 #define EN_ALL 3
619 #define PREV_EN_AFTER_W2 4
620 #define PREV_EN_AFTER_W4 8
621
622 static void
623 resolveImplicitLevels(UBiDi *pBiDi,
624 int32_t start, int32_t limit,
625 DirProp sor, DirProp eor) {
626 const DirProp *dirProps=pBiDi->dirProps;
627 UBiDiLevel *levels=pBiDi->levels;
628
629 int32_t i, next, neutralStart=-1;
630 DirProp prevDirProp, dirProp, nextDirProp, lastStrong, beforeNeutral=L;
631 UBiDiLevel numberLevel;
632 uint8_t historyOfEN;
633
634 /* initialize: current at sor, next at start (it is start<limit) */
635 next=start;
636 dirProp=lastStrong=sor;
637 nextDirProp=dirProps[next];
638 historyOfEN=0;
639
640 if(pBiDi->isInverse) {
641 /*
642 * For "inverse BiDi", we set the levels of numbers just like for
643 * regular L characters, plus a flag that ubidi_getRuns() will use
644 * to set a similar flag on the corresponding output run.
645 */
646 numberLevel=levels[start];
647 if(numberLevel&1) {
648 ++numberLevel;
649 }
650 } else {
651 /* normal BiDi: least greater even level */
652 numberLevel=(UBiDiLevel)((levels[start]+2)&~1);
653 }
654
655 /*
656 * In all steps of this implementation, BN and explicit embedding codes
657 * must be treated as if they didn't exist (X9).
658 * They will get levels set before a non-neutral character, and remain
659 * undefined before a neutral one, but adjustWSLevels() will take care
660 * of all of them.
661 */
662 while(DIRPROP_FLAG(nextDirProp)&MASK_BN_EXPLICIT) {
663 if(++next<limit) {
664 nextDirProp=dirProps[next];
665 } else {
666 nextDirProp=eor;
667 break;
668 }
669 }
670
671 /*
672 * Note: at the end of this file, there is a prototype
673 * of a version of this function that uses a statetable
674 * at the core of this state machine.
675 * If you make changes to this state machine,
676 * please update that prototype as well.
677 */
678
679 /* loop for entire run */
680 while(next<limit) {
681 /* advance */
682 prevDirProp=dirProp;
683 dirProp=nextDirProp;
684 i=next;
685 do {
686 if(++next<limit) {
687 nextDirProp=dirProps[next];
688 } else {
689 nextDirProp=eor;
690 break;
691 }
692 } while(DIRPROP_FLAG(nextDirProp)&MASK_BN_EXPLICIT);
693 historyOfEN<<=EN_SHIFT;
694
695 /* (W1..W7) */
696 switch(dirProp) {
697 case L:
698 lastStrong=L;
699 break;
700 case R:
701 lastStrong=R;
702 break;
703 case AL:
704 /* (W3) */
705 lastStrong=AL;
706 dirProp=R;
707 break;
708 case EN:
709 /* we have to set historyOfEN correctly */
710 if(lastStrong==AL) {
711 /* (W2) */
712 dirProp=AN;
713 } else {
714 if(lastStrong==L) {
715 /* (W7) */
716 dirProp=L;
717 }
718 /* this EN stays after (W2) and (W4) - at least before (W7) */
719 historyOfEN|=EN_ALL;
720 }
721 break;
722 case ES:
723 if( historyOfEN&PREV_EN_AFTER_W2 && /* previous was EN before (W4) */
724 nextDirProp==EN && lastStrong!=AL /* next is EN and (W2) won't make it AN */
725 ) {
726 /* (W4) */
727 if(lastStrong!=L) {
728 dirProp=EN;
729 } else {
730 /* (W7) */
731 dirProp=L;
732 }
733 historyOfEN|=EN_AFTER_W4;
734 } else {
735 /* (W6) */
736 dirProp=ON;
737 }
738 break;
739 case CS:
740 if( historyOfEN&PREV_EN_AFTER_W2 && /* previous was EN before (W4) */
741 nextDirProp==EN && lastStrong!=AL /* next is EN and (W2) won't make it AN */
742 ) {
743 /* (W4) */
744 if(lastStrong!=L) {
745 dirProp=EN;
746 } else {
747 /* (W7) */
748 dirProp=L;
749 }
750 historyOfEN|=EN_AFTER_W4;
751 } else if(prevDirProp==AN && /* previous was AN */
752 (nextDirProp==AN || /* next is AN */
753 (nextDirProp==EN && lastStrong==AL)) /* or (W2) will make it one */
754 ) {
755 /* (W4) */
756 dirProp=AN;
757 } else {
758 /* (W6) */
759 dirProp=ON;
760 }
761 break;
762 case ET:
763 /* get sequence of ET; advance only next, not current, previous or historyOfEN */
764 if(next<limit) {
765 while(DIRPROP_FLAG(nextDirProp)&MASK_ET_NSM_BN /* (W1), (X9) */) {
766 if(++next<limit) {
767 nextDirProp=dirProps[next];
768 } else {
769 nextDirProp=eor;
770 break;
771 }
772 }
773 }
774
775 /* now process the sequence of ET like a single ET */
776 if((historyOfEN&PREV_EN_AFTER_W4) || /* previous was EN before (W5) */
777 (nextDirProp==EN && lastStrong!=AL) /* next is EN and (W2) won't make it AN */
778 ) {
779 /* (W5) */
780 if(lastStrong!=L) {
781 dirProp=EN;
782 } else {
783 /* (W7) */
784 dirProp=L;
785 }
786 } else {
787 /* (W6) */
788 dirProp=ON;
789 }
790
791 /* apply the result of (W1), (W5)..(W7) to the entire sequence of ET */
792 break;
793 case NSM:
794 /* (W1) */
795 dirProp=prevDirProp;
796 /* set historyOfEN back to prevDirProp's historyOfEN */
797 historyOfEN>>=EN_SHIFT;
798 /*
799 * Technically, this should be done before the switch() in the form
800 * if(nextDirProp==NSM) {
801 * dirProps[next]=nextDirProp=dirProp;
802 * }
803 *
804 * - effectively one iteration ahead.
805 * However, whether the next dirProp is NSM or is equal to the current dirProp
806 * does not change the outcome of any condition in (W2)..(W7).
807 */
808 break;
809 default:
810 break;
811 }
812
813 /* here, it is always [prev,this,next]dirProp!=BN; it may be next>i+1 */
814
815 /* perform (Nn) - here, only L, R, EN, AN, and neutrals are left */
816 /* for "inverse BiDi", treat neutrals like L */
817 /* this is one iteration late for the neutrals */
818 if(DIRPROP_FLAG(dirProp)&MASK_N) {
819 if(neutralStart<0) {
820 /* start of a sequence of neutrals */
821 neutralStart=i;
822 beforeNeutral=prevDirProp;
823 }
824 } else /* not a neutral, can be only one of { L, R, EN, AN } */ {
825 /*
826 * Note that all levels[] values are still the same at this
827 * point because this function is called for an entire
828 * same-level run.
829 * Therefore, we need to read only one actual level.
830 */
831 UBiDiLevel level=levels[i];
832
833 if(neutralStart>=0) {
834 UBiDiLevel final;
835 /* end of a sequence of neutrals (dirProp is "afterNeutral") */
836 if(!(pBiDi->isInverse)) {
837 if(beforeNeutral==L) {
838 if(dirProp==L) {
839 final=0; /* make all neutrals L (N1) */
840 } else {
841 final=level; /* make all neutrals "e" (N2) */
842 }
843 } else /* beforeNeutral is one of { R, EN, AN } */ {
844 if(dirProp==L) {
845 final=level; /* make all neutrals "e" (N2) */
846 } else {
847 final=1; /* make all neutrals R (N1) */
848 }
849 }
850 } else {
851 /* "inverse BiDi": collapse [before]dirProps L, EN, AN into L */
852 if(beforeNeutral!=R) {
853 if(dirProp!=R) {
854 final=0; /* make all neutrals L (N1) */
855 } else {
856 final=level; /* make all neutrals "e" (N2) */
857 }
858 } else /* beforeNeutral is one of { R, EN, AN } */ {
859 if(dirProp!=R) {
860 final=level; /* make all neutrals "e" (N2) */
861 } else {
862 final=1; /* make all neutrals R (N1) */
863 }
864 }
865 }
866 /* perform (In) on the sequence of neutrals */
867 if((level^final)&1) {
868 /* do something only if we need to _change_ the level */
869 do {
870 ++levels[neutralStart];
871 } while(++neutralStart<i);
872 }
873 neutralStart=-1;
874 }
875
876 /* perform (In) on the non-neutral character */
877 /*
878 * in the cases of (W5), processing a sequence of ET,
879 * and of (X9), skipping BN,
880 * there may be multiple characters from i to <next
881 * that all get (virtually) the same dirProp and (really) the same level
882 */
883 if(dirProp==L) {
884 if(level&1) {
885 ++level;
886 } else {
887 i=next; /* we keep the levels */
888 }
889 } else if(dirProp==R) {
890 if(!(level&1)) {
891 ++level;
892 } else {
893 i=next; /* we keep the levels */
894 }
895 } else /* EN or AN */ {
896 /* this level depends on whether we do "inverse BiDi" */
897 level=numberLevel;
898 }
899
900 /* apply the new level to the sequence, if necessary */
901 while(i<next) {
902 levels[i++]=level;
903 }
904 }
905 }
906
907 /* perform (Nn) - here,
908 the character after the the neutrals is eor, which is either L or R */
909 /* this is one iteration late for the neutrals */
910 if(neutralStart>=0) {
911 /*
912 * Note that all levels[] values are still the same at this
913 * point because this function is called for an entire
914 * same-level run.
915 * Therefore, we need to read only one actual level.
916 */
917 UBiDiLevel level=levels[neutralStart], final;
918
919 /* end of a sequence of neutrals (eor is "afterNeutral") */
920 if(!(pBiDi->isInverse)) {
921 if(beforeNeutral==L) {
922 if(eor==L) {
923 final=0; /* make all neutrals L (N1) */
924 } else {
925 final=level; /* make all neutrals "e" (N2) */
926 }
927 } else /* beforeNeutral is one of { R, EN, AN } */ {
928 if(eor==L) {
929 final=level; /* make all neutrals "e" (N2) */
930 } else {
931 final=1; /* make all neutrals R (N1) */
932 }
933 }
934 } else {
935 /* "inverse BiDi": collapse [before]dirProps L, EN, AN into L */
936 if(beforeNeutral!=R) {
937 if(eor!=R) {
938 final=0; /* make all neutrals L (N1) */
939 } else {
940 final=level; /* make all neutrals "e" (N2) */
941 }
942 } else /* beforeNeutral is one of { R, EN, AN } */ {
943 if(eor!=R) {
944 final=level; /* make all neutrals "e" (N2) */
945 } else {
946 final=1; /* make all neutrals R (N1) */
947 }
948 }
949 }
950 /* perform (In) on the sequence of neutrals */
951 if((level^final)&1) {
952 /* do something only if we need to _change_ the level */
953 do {
954 ++levels[neutralStart];
955 } while(++neutralStart<limit);
956 }
957 }
958 }
959
960 /* perform (L1) and (X9) ---------------------------------------------------- */
961
962 /*
963 * Reset the embedding levels for some non-graphic characters (L1).
964 * This function also sets appropriate levels for BN, and
965 * explicit embedding types that are supposed to have been removed
966 * from the paragraph in (X9).
967 */
968 static void
969 adjustWSLevels(UBiDi *pBiDi) {
970 const DirProp *dirProps=pBiDi->dirProps;
971 UBiDiLevel *levels=pBiDi->levels;
972 int32_t i;
973
974 if(pBiDi->flags&MASK_WS) {
975 UBiDiLevel paraLevel=pBiDi->paraLevel;
976 Flags flag;
977
978 i=pBiDi->trailingWSStart;
979 while(i>0) {
980 /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
981 while(i>0 && DIRPROP_FLAG(dirProps[--i])&MASK_WS) {
982 levels[i]=paraLevel;
983 }
984
985 /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
986 /* here, i+1 is guaranteed to be <length */
987 while(i>0) {
988 flag=DIRPROP_FLAG(dirProps[--i]);
989 if(flag&MASK_BN_EXPLICIT) {
990 levels[i]=levels[i+1];
991 } else if(flag&MASK_B_S) {
992 levels[i]=paraLevel;
993 break;
994 }
995 }
996 }
997 }
998 }
999
1000 /* ubidi_setPara ------------------------------------------------------------ */
1001
1002 U_CAPI void U_EXPORT2
1003 ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
1004 UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
1005 UErrorCode *pErrorCode) {
1006 UBiDiDirection direction;
1007
1008 /* check the argument values */
1009 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1010 return;
1011 } else if(pBiDi==NULL || text==NULL ||
1012 ((UBIDI_MAX_EXPLICIT_LEVEL<paraLevel) && !IS_DEFAULT_LEVEL(paraLevel)) ||
1013 length<-1
1014 ) {
1015 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1016 return;
1017 }
1018
1019 if(length==-1) {
1020 length=u_strlen(text);
1021 }
1022
1023 /* initialize the UBiDi structure */
1024 pBiDi->text=text;
1025 pBiDi->length=length;
1026 pBiDi->paraLevel=paraLevel;
1027 pBiDi->direction=UBIDI_LTR;
1028 pBiDi->trailingWSStart=length; /* the levels[] will reflect the WS run */
1029
1030 pBiDi->dirProps=NULL;
1031 pBiDi->levels=NULL;
1032 pBiDi->runs=NULL;
1033
1034 if(length==0) {
1035 /*
1036 * For an empty paragraph, create a UBiDi object with the paraLevel and
1037 * the flags and the direction set but without allocating zero-length arrays.
1038 * There is nothing more to do.
1039 */
1040 if(IS_DEFAULT_LEVEL(paraLevel)) {
1041 pBiDi->paraLevel&=1;
1042 }
1043 if(paraLevel&1) {
1044 pBiDi->flags=DIRPROP_FLAG(R);
1045 pBiDi->direction=UBIDI_RTL;
1046 } else {
1047 pBiDi->flags=DIRPROP_FLAG(L);
1048 pBiDi->direction=UBIDI_LTR;
1049 }
1050
1051 pBiDi->runCount=0;
1052 return;
1053 }
1054
1055 pBiDi->runCount=-1;
1056
1057 /*
1058 * Get the directional properties,
1059 * the flags bit-set, and
1060 * determine the partagraph level if necessary.
1061 */
1062 if(getDirPropsMemory(pBiDi, length)) {
1063 pBiDi->dirProps=pBiDi->dirPropsMemory;
1064 getDirProps(pBiDi, text);
1065 } else {
1066 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1067 return;
1068 }
1069
1070 /* are explicit levels specified? */
1071 if(embeddingLevels==NULL) {
1072 /* no: determine explicit levels according to the (Xn) rules */\
1073 if(getLevelsMemory(pBiDi, length)) {
1074 pBiDi->levels=pBiDi->levelsMemory;
1075 direction=resolveExplicitLevels(pBiDi);
1076 } else {
1077 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1078 return;
1079 }
1080 } else {
1081 /* set BN for all explicit codes, check that all levels are paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */
1082 pBiDi->levels=embeddingLevels;
1083 direction=checkExplicitLevels(pBiDi, pErrorCode);
1084 if(U_FAILURE(*pErrorCode)) {
1085 return;
1086 }
1087 }
1088
1089 /*
1090 * The steps after (X9) in the UBiDi algorithm are performed only if
1091 * the paragraph text has mixed directionality!
1092 */
1093 pBiDi->direction=direction;
1094 switch(direction) {
1095 case UBIDI_LTR:
1096 /* make sure paraLevel is even */
1097 pBiDi->paraLevel=(UBiDiLevel)((pBiDi->paraLevel+1)&~1);
1098
1099 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
1100 pBiDi->trailingWSStart=0;
1101 break;
1102 case UBIDI_RTL:
1103 /* make sure paraLevel is odd */
1104 pBiDi->paraLevel|=1;
1105
1106 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
1107 pBiDi->trailingWSStart=0;
1108 break;
1109 default:
1110 /*
1111 * If there are no external levels specified and there
1112 * are no significant explicit level codes in the text,
1113 * then we can treat the entire paragraph as one run.
1114 * Otherwise, we need to perform the following rules on runs of
1115 * the text with the same embedding levels. (X10)
1116 * "Significant" explicit level codes are ones that actually
1117 * affect non-BN characters.
1118 * Examples for "insignificant" ones are empty embeddings
1119 * LRE-PDF, LRE-RLE-PDF-PDF, etc.
1120 */
1121 if(embeddingLevels==NULL && !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) {
1122 resolveImplicitLevels(pBiDi, 0, length,
1123 GET_LR_FROM_LEVEL(pBiDi->paraLevel),
1124 GET_LR_FROM_LEVEL(pBiDi->paraLevel));
1125 } else {
1126 /* sor, eor: start and end types of same-level-run */
1127 UBiDiLevel *levels=pBiDi->levels;
1128 int32_t start, limit=0;
1129 UBiDiLevel level, nextLevel;
1130 DirProp sor, eor;
1131
1132 /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
1133 level=pBiDi->paraLevel;
1134 nextLevel=levels[0];
1135 if(level<nextLevel) {
1136 eor=GET_LR_FROM_LEVEL(nextLevel);
1137 } else {
1138 eor=GET_LR_FROM_LEVEL(level);
1139 }
1140
1141 do {
1142 /* determine start and limit of the run (end points just behind the run) */
1143
1144 /* the values for this run's start are the same as for the previous run's end */
1145 sor=eor;
1146 start=limit;
1147 level=nextLevel;
1148
1149 /* search for the limit of this run */
1150 while(++limit<length && levels[limit]==level) {}
1151
1152 /* get the correct level of the next run */
1153 if(limit<length) {
1154 nextLevel=levels[limit];
1155 } else {
1156 nextLevel=pBiDi->paraLevel;
1157 }
1158
1159 /* determine eor from max(level, nextLevel); sor is last run's eor */
1160 if((level&~UBIDI_LEVEL_OVERRIDE)<(nextLevel&~UBIDI_LEVEL_OVERRIDE)) {
1161 eor=GET_LR_FROM_LEVEL(nextLevel);
1162 } else {
1163 eor=GET_LR_FROM_LEVEL(level);
1164 }
1165
1166 /* if the run consists of overridden directional types, then there
1167 are no implicit types to be resolved */
1168 if(!(level&UBIDI_LEVEL_OVERRIDE)) {
1169 resolveImplicitLevels(pBiDi, start, limit, sor, eor);
1170 } else {
1171 /* remove the UBIDI_LEVEL_OVERRIDE flags */
1172 do {
1173 levels[start++]&=~UBIDI_LEVEL_OVERRIDE;
1174 } while(start<limit);
1175 }
1176 } while(limit<length);
1177 }
1178
1179 /* reset the embedding levels for some non-graphic characters (L1), (X9) */
1180 adjustWSLevels(pBiDi);
1181
1182 /* for "inverse BiDi", ubidi_getRuns() modifies the levels of numeric runs following RTL runs */
1183 if(pBiDi->isInverse) {
1184 if(!ubidi_getRuns(pBiDi)) {
1185 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1186 return;
1187 }
1188 }
1189 break;
1190 }
1191 }
1192
1193 U_CAPI UBiDiDirection U_EXPORT2
1194 ubidi_getDirection(const UBiDi *pBiDi) {
1195 if(pBiDi!=NULL) {
1196 return pBiDi->direction;
1197 } else {
1198 return UBIDI_LTR;
1199 }
1200 }
1201
1202 U_CAPI const UChar * U_EXPORT2
1203 ubidi_getText(const UBiDi *pBiDi) {
1204 if(pBiDi!=NULL) {
1205 return pBiDi->text;
1206 } else {
1207 return NULL;
1208 }
1209 }
1210
1211 U_CAPI int32_t U_EXPORT2
1212 ubidi_getLength(const UBiDi *pBiDi) {
1213 if(pBiDi!=NULL) {
1214 return pBiDi->length;
1215 } else {
1216 return 0;
1217 }
1218 }
1219
1220 U_CAPI UBiDiLevel U_EXPORT2
1221 ubidi_getParaLevel(const UBiDi *pBiDi) {
1222 if(pBiDi!=NULL) {
1223 return pBiDi->paraLevel;
1224 } else {
1225 return 0;
1226 }
1227 }
1228
1229 /* statetable prototype ----------------------------------------------------- */
1230
1231 /*
1232 * This is here for possible future
1233 * performance work and is not compiled right now.
1234 */
1235
1236 #if 0
1237 /*
1238 * This is a piece of code that could be part of ubidi.c/resolveImplicitLevels().
1239 * It replaces in the (Wn) state machine the switch()-if()-cascade with
1240 * just a few if()s and a state table.
1241 */
1242
1243 /* use the state table only for the following dirProp's */
1244 #define MASK_W_TABLE (FLAG(L)|FLAG(R)|FLAG(AL)|FLAG(EN)|FLAG(ES)|FLAG(CS)|FLAG(ET)|FLAG(AN))
1245
1246 /*
1247 * inputs:
1248 *
1249 * 0..1 historyOfEN - 2b
1250 * 2 prevDirProp==AN - 1b
1251 * 3..4 lastStrong, one of { L, R, AL, none } - 2b
1252 * 5..7 dirProp, one of { L, R, AL, EN, ES, CS, ET, AN } - 3b
1253 * 8..9 nextDirProp, one of { EN, AN, other }
1254 *
1255 * total: 10b=1024 states
1256 */
1257 enum { _L, _R, _AL, _EN, _ES, _CS, _ET, _AN, _OTHER }; /* lastStrong, dirProp */
1258 enum { __EN, __AN, __OTHER }; /* nextDirProp */
1259
1260 #define LAST_STRONG_SHIFT 3
1261 #define DIR_PROP_SHIFT 5
1262 #define NEXT_DIR_PROP_SHIFT 8
1263
1264 /* masks after shifting */
1265 #define LAST_STRONG_MASK 3
1266 #define DIR_PROP_MASK 7
1267 #define STATE_MASK 0x1f
1268
1269 /* convert dirProp into _dirProp (above enum) */
1270 static DirProp inputDirProp[dirPropCount]={ _X<<DIR_PROP_SHIFT, ... };
1271
1272 /* convert dirProp into __dirProp (above enum) */
1273 static DirProp inputNextDirProp[dirPropCount]={ __X<<NEXT_DIR_PROP_SHIFT, ... };
1274
1275 /*
1276 * outputs:
1277 *
1278 * dirProp, one of { L, R, EN, AN, ON } - 3b
1279 *
1280 * 0..1 historyOfEN - 2b
1281 * 2 prevDirProp==AN - 1b
1282 * 3..4 lastStrong, one of { L, R, AL, none } - 2b
1283 * 5..7 new dirProp, one of { L, R, EN, AN, ON }
1284 *
1285 * total: 8 bits=1 byte per state
1286 */
1287 enum { ___L, ___R, ___EN, ___AN, ___ON, ___count };
1288
1289 /* convert ___dirProp into dirProp (above enum) */
1290 static DirProp outputDirProp[___count]={ X, ... };
1291
1292 /* state table */
1293 static uint8_t wnTable[1024]={ /* calculate with switch()-if()-cascade */ };
1294
1295 static void
1296 resolveImplicitLevels(BiDi *pBiDi,
1297 Index start, Index end,
1298 DirProp sor, DirProp eor) {
1299 /* new variable */
1300 uint8_t state;
1301
1302 /* remove variable lastStrong */
1303
1304 /* set initial state (set lastStrong, the rest is 0) */
1305 state= sor==L ? 0 : _R<<LAST_STRONG_SHIFT;
1306
1307 while(next<limit) {
1308 /* advance */
1309 prevDirProp=dirProp;
1310 dirProp=nextDirProp;
1311 i=next;
1312 do {
1313 if(++next<limit) {
1314 nextDirProp=dirProps[next];
1315 } else {
1316 nextDirProp=eor;
1317 break;
1318 }
1319 } while(FLAG(nextDirProp)&MASK_BN_EXPLICIT);
1320
1321 /* (W1..W7) */
1322 /* ### This may be more efficient with a switch(dirProp). */
1323 if(FLAG(dirProp)&MASK_W_TABLE) {
1324 state=wnTable[
1325 ((int)state)|
1326 inputDirProp[dirProp]|
1327 inputNextDirProp[nextDirProp]
1328 ];
1329 dirProp=outputDirProp[state>>DIR_PROP_SHIFT];
1330 state&=STATE_MASK;
1331 } else if(dirProp==ET) {
1332 /* get sequence of ET; advance only next, not current, previous or historyOfEN */
1333 while(next<limit && FLAG(nextDirProp)&MASK_ET_NSM_BN /* (W1), (X9) */) {
1334 if(++next<limit) {
1335 nextDirProp=dirProps[next];
1336 } else {
1337 nextDirProp=eor;
1338 break;
1339 }
1340 }
1341
1342 state=wnTable[
1343 ((int)state)|
1344 _ET<<DIR_PROP_SHIFT|
1345 inputNextDirProp[nextDirProp]
1346 ];
1347 dirProp=outputDirProp[state>>DIR_PROP_SHIFT];
1348 state&=STATE_MASK;
1349
1350 /* apply the result of (W1), (W5)..(W7) to the entire sequence of ET */
1351 } else if(dirProp==NSM) {
1352 /* (W1) */
1353 dirProp=prevDirProp;
1354 /* keep prevDirProp's EN and AN states! */
1355 } else /* other */ {
1356 /* set EN and AN states to 0 */
1357 state&=LAST_STRONG_MASK<<LAST_STRONG_SHIFT;
1358 }
1359
1360 /* perform (Nn) and (In) as usual */
1361 }
1362 /* perform (Nn) and (In) as usual */
1363 }
1364 #endif