]>
Commit | Line | Data |
---|---|---|
73c04bcf | 1 | /* |
b75a7d8f A |
2 | ****************************************************************************** |
3 | * | |
46f4442e | 4 | * Copyright (C) 1999-2008, International Business Machines |
b75a7d8f A |
5 | * Corporation and others. All Rights Reserved. |
6 | * | |
7 | ****************************************************************************** | |
8 | * file name: ubidi.c | |
9 | * encoding: US-ASCII | |
10 | * tab size: 8 (not used) | |
11 | * indentation:4 | |
12 | * | |
13 | * created on: 1999jul27 | |
46f4442e | 14 | * created by: Markus W. Scherer, updated by Matitiahu Allouche |
b75a7d8f A |
15 | */ |
16 | ||
b75a7d8f A |
17 | #include "cmemory.h" |
18 | #include "unicode/utypes.h" | |
19 | #include "unicode/ustring.h" | |
20 | #include "unicode/uchar.h" | |
21 | #include "unicode/ubidi.h" | |
73c04bcf | 22 | #include "ubidi_props.h" |
b75a7d8f | 23 | #include "ubidiimp.h" |
46f4442e | 24 | #include "uassert.h" |
b75a7d8f A |
25 | |
26 | /* | |
27 | * General implementation notes: | |
28 | * | |
29 | * Throughout the implementation, there are comments like (W2) that refer to | |
30 | * rules of the BiDi algorithm in its version 5, in this example to the second | |
31 | * rule of the resolution of weak types. | |
32 | * | |
33 | * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32) | |
34 | * character according to UTF-16, the second UChar gets the directional property of | |
35 | * the entire character assigned, while the first one gets a BN, a boundary | |
36 | * neutral, type, which is ignored by most of the algorithm according to | |
37 | * rule (X9) and the implementation suggestions of the BiDi algorithm. | |
38 | * | |
39 | * Later, adjustWSLevels() will set the level for each BN to that of the | |
40 | * following character (UChar), which results in surrogate pairs getting the | |
41 | * same level on each of their surrogates. | |
42 | * | |
43 | * In a UTF-8 implementation, the same thing could be done: the last byte of | |
44 | * a multi-byte sequence would get the "real" property, while all previous | |
45 | * bytes of that sequence would get BN. | |
46 | * | |
47 | * It is not possible to assign all those parts of a character the same real | |
48 | * property because this would fail in the resolution of weak types with rules | |
49 | * that look at immediately surrounding types. | |
50 | * | |
51 | * As a related topic, this implementation does not remove Boundary Neutral | |
73c04bcf | 52 | * types from the input, but ignores them wherever this is relevant. |
b75a7d8f A |
53 | * For example, the loop for the resolution of the weak types reads |
54 | * types until it finds a non-BN. | |
55 | * Also, explicit embedding codes are neither changed into BN nor removed. | |
56 | * They are only treated the same way real BNs are. | |
57 | * As stated before, adjustWSLevels() takes care of them at the end. | |
58 | * For the purpose of conformance, the levels of all these codes | |
59 | * do not matter. | |
60 | * | |
61 | * Note that this implementation never modifies the dirProps | |
62 | * after the initial setup. | |
63 | * | |
64 | * | |
65 | * In this implementation, the resolution of weak types (Wn), | |
66 | * neutrals (Nn), and the assignment of the resolved level (In) | |
67 | * are all done in one single loop, in resolveImplicitLevels(). | |
68 | * Changes of dirProp values are done on the fly, without writing | |
69 | * them back to the dirProps array. | |
70 | * | |
71 | * | |
72 | * This implementation contains code that allows to bypass steps of the | |
73 | * algorithm that are not needed on the specific paragraph | |
74 | * in order to speed up the most common cases considerably, | |
75 | * like text that is entirely LTR, or RTL text without numbers. | |
76 | * | |
77 | * Most of this is done by setting a bit for each directional property | |
78 | * in a flags variable and later checking for whether there are | |
79 | * any LTR characters or any RTL characters, or both, whether | |
80 | * there are any explicit embedding codes, etc. | |
81 | * | |
82 | * If the (Xn) steps are performed, then the flags are re-evaluated, | |
83 | * because they will then not contain the embedding codes any more | |
84 | * and will be adjusted for override codes, so that subsequently | |
85 | * more bypassing may be possible than what the initial flags suggested. | |
86 | * | |
87 | * If the text is not mixed-directional, then the | |
88 | * algorithm steps for the weak type resolution are not performed, | |
89 | * and all levels are set to the paragraph level. | |
90 | * | |
91 | * If there are no explicit embedding codes, then the (Xn) steps | |
92 | * are not performed. | |
93 | * | |
94 | * If embedding levels are supplied as a parameter, then all | |
95 | * explicit embedding codes are ignored, and the (Xn) steps | |
96 | * are not performed. | |
97 | * | |
98 | * White Space types could get the level of the run they belong to, | |
99 | * and are checked with a test of (flags&MASK_EMBEDDING) to | |
100 | * consider if the paragraph direction should be considered in | |
101 | * the flags variable. | |
102 | * | |
103 | * If there are no White Space types in the paragraph, then | |
104 | * (L1) is not necessary in adjustWSLevels(). | |
105 | */ | |
106 | ||
b75a7d8f A |
107 | /* to avoid some conditional statements, use tiny constant arrays */ |
108 | static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) }; | |
109 | static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) }; | |
110 | static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) }; | |
111 | ||
112 | #define DIRPROP_FLAG_LR(level) flagLR[(level)&1] | |
113 | #define DIRPROP_FLAG_E(level) flagE[(level)&1] | |
114 | #define DIRPROP_FLAG_O(level) flagO[(level)&1] | |
115 | ||
116 | /* UBiDi object management -------------------------------------------------- */ | |
117 | ||
118 | U_CAPI UBiDi * U_EXPORT2 | |
73c04bcf | 119 | ubidi_open(void) |
b75a7d8f A |
120 | { |
121 | UErrorCode errorCode=U_ZERO_ERROR; | |
122 | return ubidi_openSized(0, 0, &errorCode); | |
123 | } | |
124 | ||
125 | U_CAPI UBiDi * U_EXPORT2 | |
126 | ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) { | |
127 | UBiDi *pBiDi; | |
128 | ||
129 | /* check the argument values */ | |
130 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | |
131 | return NULL; | |
132 | } else if(maxLength<0 || maxRunCount<0) { | |
133 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
134 | return NULL; /* invalid arguments */ | |
135 | } | |
136 | ||
137 | /* allocate memory for the object */ | |
138 | pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi)); | |
139 | if(pBiDi==NULL) { | |
140 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | |
141 | return NULL; | |
142 | } | |
143 | ||
144 | /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */ | |
145 | uprv_memset(pBiDi, 0, sizeof(UBiDi)); | |
146 | ||
73c04bcf A |
147 | /* get BiDi properties */ |
148 | pBiDi->bdp=ubidi_getSingleton(pErrorCode); | |
149 | if(U_FAILURE(*pErrorCode)) { | |
150 | uprv_free(pBiDi); | |
151 | return NULL; | |
152 | } | |
153 | ||
b75a7d8f A |
154 | /* allocate memory for arrays as requested */ |
155 | if(maxLength>0) { | |
156 | if( !getInitialDirPropsMemory(pBiDi, maxLength) || | |
157 | !getInitialLevelsMemory(pBiDi, maxLength) | |
158 | ) { | |
159 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | |
160 | } | |
161 | } else { | |
162 | pBiDi->mayAllocateText=TRUE; | |
163 | } | |
164 | ||
165 | if(maxRunCount>0) { | |
166 | if(maxRunCount==1) { | |
167 | /* use simpleRuns[] */ | |
168 | pBiDi->runsSize=sizeof(Run); | |
169 | } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) { | |
170 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | |
171 | } | |
172 | } else { | |
173 | pBiDi->mayAllocateRuns=TRUE; | |
174 | } | |
175 | ||
176 | if(U_SUCCESS(*pErrorCode)) { | |
177 | return pBiDi; | |
178 | } else { | |
179 | ubidi_close(pBiDi); | |
180 | return NULL; | |
181 | } | |
182 | } | |
183 | ||
184 | /* | |
185 | * We are allowed to allocate memory if memory==NULL or | |
186 | * mayAllocate==TRUE for each array that we need. | |
46f4442e | 187 | * We also try to grow memory as needed if we |
b75a7d8f A |
188 | * allocate it. |
189 | * | |
190 | * Assume sizeNeeded>0. | |
191 | * If *pMemory!=NULL, then assume *pSize>0. | |
192 | * | |
193 | * ### this realloc() may unnecessarily copy the old data, | |
194 | * which we know we don't need any more; | |
195 | * is this the best way to do this?? | |
196 | */ | |
197 | U_CFUNC UBool | |
46f4442e A |
198 | ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) { |
199 | void **pMemory = (void **)bidiMem; | |
b75a7d8f A |
200 | /* check for existing memory */ |
201 | if(*pMemory==NULL) { | |
202 | /* we need to allocate memory */ | |
203 | if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) { | |
204 | *pSize=sizeNeeded; | |
205 | return TRUE; | |
206 | } else { | |
207 | return FALSE; | |
208 | } | |
209 | } else { | |
46f4442e A |
210 | if(sizeNeeded<=*pSize) { |
211 | /* there is already enough memory */ | |
212 | return TRUE; | |
213 | } | |
214 | else if(!mayAllocate) { | |
b75a7d8f A |
215 | /* not enough memory, and we must not allocate */ |
216 | return FALSE; | |
46f4442e A |
217 | } else { |
218 | /* we try to grow */ | |
b75a7d8f | 219 | void *memory; |
46f4442e A |
220 | /* in most cases, we do not need the copy-old-data part of |
221 | * realloc, but it is needed when adding runs using getRunsMemory() | |
222 | * in setParaRunsOnly() | |
223 | */ | |
b75a7d8f A |
224 | if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) { |
225 | *pMemory=memory; | |
226 | *pSize=sizeNeeded; | |
227 | return TRUE; | |
228 | } else { | |
229 | /* we failed to grow */ | |
230 | return FALSE; | |
231 | } | |
b75a7d8f A |
232 | } |
233 | } | |
234 | } | |
235 | ||
236 | U_CAPI void U_EXPORT2 | |
237 | ubidi_close(UBiDi *pBiDi) { | |
238 | if(pBiDi!=NULL) { | |
73c04bcf | 239 | pBiDi->pParaBiDi=NULL; /* in case one tries to reuse this block */ |
b75a7d8f A |
240 | if(pBiDi->dirPropsMemory!=NULL) { |
241 | uprv_free(pBiDi->dirPropsMemory); | |
242 | } | |
243 | if(pBiDi->levelsMemory!=NULL) { | |
244 | uprv_free(pBiDi->levelsMemory); | |
245 | } | |
246 | if(pBiDi->runsMemory!=NULL) { | |
247 | uprv_free(pBiDi->runsMemory); | |
248 | } | |
73c04bcf A |
249 | if(pBiDi->parasMemory!=NULL) { |
250 | uprv_free(pBiDi->parasMemory); | |
251 | } | |
252 | if(pBiDi->insertPoints.points!=NULL) { | |
253 | uprv_free(pBiDi->insertPoints.points); | |
254 | } | |
255 | ||
b75a7d8f A |
256 | uprv_free(pBiDi); |
257 | } | |
258 | } | |
259 | ||
260 | /* set to approximate "inverse BiDi" ---------------------------------------- */ | |
261 | ||
262 | U_CAPI void U_EXPORT2 | |
263 | ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) { | |
264 | if(pBiDi!=NULL) { | |
265 | pBiDi->isInverse=isInverse; | |
73c04bcf A |
266 | pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L |
267 | : UBIDI_REORDER_DEFAULT; | |
b75a7d8f A |
268 | } |
269 | } | |
270 | ||
271 | U_CAPI UBool U_EXPORT2 | |
272 | ubidi_isInverse(UBiDi *pBiDi) { | |
273 | if(pBiDi!=NULL) { | |
274 | return pBiDi->isInverse; | |
275 | } else { | |
276 | return FALSE; | |
277 | } | |
278 | } | |
279 | ||
73c04bcf A |
280 | /* FOOD FOR THOUGHT: currently the reordering modes are a mixture of |
281 | * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre | |
282 | * concept of RUNS_ONLY which is a double operation. | |
283 | * It could be advantageous to divide this into 3 concepts: | |
284 | * a) Operation: direct / inverse / RUNS_ONLY | |
46f4442e | 285 | * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R |
73c04bcf A |
286 | * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL |
287 | * This would allow combinations not possible today like RUNS_ONLY with | |
288 | * NUMBERS_SPECIAL. | |
289 | * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and | |
290 | * REMOVE_CONTROLS for the inverse step. | |
291 | * Not all combinations would be supported, and probably not all do make sense. | |
292 | * This would need to document which ones are supported and what are the | |
293 | * fallbacks for unsupported combinations. | |
294 | */ | |
295 | U_CAPI void U_EXPORT2 | |
296 | ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) { | |
46f4442e | 297 | if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT) |
73c04bcf A |
298 | && (reorderingMode < UBIDI_REORDER_COUNT)) { |
299 | pBiDi->reorderingMode = reorderingMode; | |
46f4442e | 300 | pBiDi->isInverse = (UBool)(reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L); |
73c04bcf A |
301 | } |
302 | } | |
303 | ||
304 | U_CAPI UBiDiReorderingMode U_EXPORT2 | |
305 | ubidi_getReorderingMode(UBiDi *pBiDi) { | |
46f4442e | 306 | if (pBiDi!=NULL) { |
73c04bcf A |
307 | return pBiDi->reorderingMode; |
308 | } else { | |
309 | return UBIDI_REORDER_DEFAULT; | |
310 | } | |
311 | } | |
312 | ||
313 | U_CAPI void U_EXPORT2 | |
314 | ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) { | |
315 | if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) { | |
316 | reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS; | |
317 | } | |
46f4442e A |
318 | if (pBiDi!=NULL) { |
319 | pBiDi->reorderingOptions=reorderingOptions; | |
73c04bcf A |
320 | } |
321 | } | |
322 | ||
323 | U_CAPI uint32_t U_EXPORT2 | |
324 | ubidi_getReorderingOptions(UBiDi *pBiDi) { | |
46f4442e | 325 | if (pBiDi!=NULL) { |
73c04bcf A |
326 | return pBiDi->reorderingOptions; |
327 | } else { | |
328 | return 0; | |
329 | } | |
330 | } | |
331 | ||
b75a7d8f A |
332 | /* perform (P2)..(P3) ------------------------------------------------------- */ |
333 | ||
334 | /* | |
335 | * Get the directional properties for the text, | |
336 | * calculate the flags bit-set, and | |
73c04bcf | 337 | * determine the paragraph level if necessary. |
b75a7d8f A |
338 | */ |
339 | static void | |
73c04bcf A |
340 | getDirProps(UBiDi *pBiDi) { |
341 | const UChar *text=pBiDi->text; | |
b75a7d8f A |
342 | DirProp *dirProps=pBiDi->dirPropsMemory; /* pBiDi->dirProps is const */ |
343 | ||
73c04bcf | 344 | int32_t i=0, i0, i1, length=pBiDi->originalLength; |
b75a7d8f A |
345 | Flags flags=0; /* collect all directionalities in the text */ |
346 | UChar32 uchar; | |
73c04bcf A |
347 | DirProp dirProp=0, paraDirDefault=0;/* initialize to avoid compiler warnings */ |
348 | UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel); | |
349 | /* for inverse BiDi, the default para level is set to RTL if there is a | |
46f4442e A |
350 | strong R or AL character at either end of the text */ |
351 | UBool isDefaultLevelInverse=isDefaultLevel && (UBool) | |
73c04bcf A |
352 | (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT || |
353 | pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL); | |
354 | int32_t lastArabicPos=-1; | |
355 | int32_t controlCount=0; | |
46f4442e A |
356 | UBool removeBiDiControls = (UBool)(pBiDi->reorderingOptions & |
357 | UBIDI_OPTION_REMOVE_CONTROLS); | |
73c04bcf A |
358 | |
359 | typedef enum { | |
360 | NOT_CONTEXTUAL, /* 0: not contextual paraLevel */ | |
361 | LOOKING_FOR_STRONG, /* 1: looking for first strong char */ | |
362 | FOUND_STRONG_CHAR /* 2: found first strong char */ | |
363 | } State; | |
364 | State state; | |
365 | int32_t paraStart=0; /* index of first char in paragraph */ | |
366 | DirProp paraDir; /* == CONTEXT_RTL within paragraphs | |
367 | starting with strong R char */ | |
368 | DirProp lastStrongDir=0; /* for default level & inverse BiDi */ | |
369 | int32_t lastStrongLTR=0; /* for STREAMING option */ | |
370 | ||
371 | if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) { | |
372 | pBiDi->length=0; | |
373 | lastStrongLTR=0; | |
374 | } | |
375 | if(isDefaultLevel) { | |
376 | paraDirDefault=pBiDi->paraLevel&1 ? CONTEXT_RTL : 0; | |
377 | paraDir=paraDirDefault; | |
378 | lastStrongDir=paraDirDefault; | |
379 | state=LOOKING_FOR_STRONG; | |
374ca955 | 380 | } else { |
73c04bcf A |
381 | state=NOT_CONTEXTUAL; |
382 | paraDir=0; | |
b75a7d8f | 383 | } |
73c04bcf A |
384 | /* count paragraphs and determine the paragraph level (P2..P3) */ |
385 | /* | |
386 | * see comment in ubidi.h: | |
387 | * the DEFAULT_XXX values are designed so that | |
388 | * their bit 0 alone yields the intended default | |
389 | */ | |
390 | for( /* i=0 above */ ; i<length; ) { | |
391 | /* i is incremented by UTF_NEXT_CHAR */ | |
b75a7d8f A |
392 | i0=i; /* index of first code unit */ |
393 | UTF_NEXT_CHAR(text, i, length, uchar); | |
394 | i1=i-1; /* index of last code unit, gets the directional property */ | |
46f4442e | 395 | flags|=DIRPROP_FLAG(dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar)); |
73c04bcf | 396 | dirProps[i1]=dirProp|paraDir; |
b75a7d8f A |
397 | if(i1>i0) { /* set previous code units' properties to BN */ |
398 | flags|=DIRPROP_FLAG(BN); | |
399 | do { | |
46f4442e | 400 | dirProps[--i1]=(DirProp)(BN|paraDir); |
b75a7d8f A |
401 | } while(i1>i0); |
402 | } | |
73c04bcf A |
403 | if(state==LOOKING_FOR_STRONG) { |
404 | if(dirProp==L) { | |
405 | state=FOUND_STRONG_CHAR; | |
406 | if(paraDir) { | |
407 | paraDir=0; | |
408 | for(i1=paraStart; i1<i; i1++) { | |
409 | dirProps[i1]&=~CONTEXT_RTL; | |
410 | } | |
411 | } | |
412 | continue; | |
413 | } | |
414 | if(dirProp==R || dirProp==AL) { | |
415 | state=FOUND_STRONG_CHAR; | |
416 | if(paraDir==0) { | |
417 | paraDir=CONTEXT_RTL; | |
418 | for(i1=paraStart; i1<i; i1++) { | |
419 | dirProps[i1]|=CONTEXT_RTL; | |
420 | } | |
421 | } | |
422 | continue; | |
423 | } | |
424 | } | |
425 | if(dirProp==L) { | |
426 | lastStrongDir=0; | |
427 | lastStrongLTR=i; /* i is index to next character */ | |
428 | } | |
429 | else if(dirProp==R) { | |
430 | lastStrongDir=CONTEXT_RTL; | |
431 | } | |
432 | else if(dirProp==AL) { | |
433 | lastStrongDir=CONTEXT_RTL; | |
434 | lastArabicPos=i-1; | |
435 | } | |
436 | else if(dirProp==B) { | |
437 | if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) { | |
438 | pBiDi->length=i; /* i is index to next character */ | |
439 | } | |
440 | if(isDefaultLevelInverse && (lastStrongDir==CONTEXT_RTL) &&(paraDir!=lastStrongDir)) { | |
441 | for( ; paraStart<i; paraStart++) { | |
442 | dirProps[paraStart]|=CONTEXT_RTL; | |
443 | } | |
444 | } | |
445 | if(i<length) { /* B not last char in text */ | |
446 | if(!((uchar==CR) && (text[i]==LF))) { | |
447 | pBiDi->paraCount++; | |
448 | } | |
449 | if(isDefaultLevel) { | |
450 | state=LOOKING_FOR_STRONG; | |
451 | paraStart=i; /* i is index to next character */ | |
452 | paraDir=paraDirDefault; | |
453 | lastStrongDir=paraDirDefault; | |
454 | } | |
455 | } | |
456 | } | |
457 | if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar)) { | |
458 | controlCount++; | |
459 | } | |
b75a7d8f | 460 | } |
73c04bcf A |
461 | if(isDefaultLevelInverse && (lastStrongDir==CONTEXT_RTL) &&(paraDir!=lastStrongDir)) { |
462 | for(i1=paraStart; i1<length; i1++) { | |
463 | dirProps[i1]|=CONTEXT_RTL; | |
464 | } | |
465 | } | |
466 | if(isDefaultLevel) { | |
467 | pBiDi->paraLevel=GET_PARALEVEL(pBiDi, 0); | |
468 | } | |
469 | if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) { | |
470 | if((lastStrongLTR>pBiDi->length) && | |
471 | (GET_PARALEVEL(pBiDi, lastStrongLTR)==0)) { | |
472 | pBiDi->length = lastStrongLTR; | |
473 | } | |
474 | if(pBiDi->length<pBiDi->originalLength) { | |
475 | pBiDi->paraCount--; | |
476 | } | |
477 | } | |
478 | /* The following line does nothing new for contextual paraLevel, but is | |
479 | needed for absolute paraLevel. */ | |
480 | flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel); | |
481 | ||
482 | if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) { | |
483 | flags|=DIRPROP_FLAG(L); | |
b75a7d8f A |
484 | } |
485 | ||
73c04bcf | 486 | pBiDi->controlCount = controlCount; |
b75a7d8f | 487 | pBiDi->flags=flags; |
73c04bcf | 488 | pBiDi->lastArabicPos=lastArabicPos; |
b75a7d8f A |
489 | } |
490 | ||
491 | /* perform (X1)..(X9) ------------------------------------------------------- */ | |
492 | ||
374ca955 A |
493 | /* determine if the text is mixed-directional or single-directional */ |
494 | static UBiDiDirection | |
73c04bcf A |
495 | directionFromFlags(UBiDi *pBiDi) { |
496 | Flags flags=pBiDi->flags; | |
374ca955 A |
497 | /* if the text contains AN and neutrals, then some neutrals may become RTL */ |
498 | if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) { | |
499 | return UBIDI_LTR; | |
500 | } else if(!(flags&MASK_LTR)) { | |
501 | return UBIDI_RTL; | |
502 | } else { | |
503 | return UBIDI_MIXED; | |
504 | } | |
505 | } | |
506 | ||
b75a7d8f A |
507 | /* |
508 | * Resolve the explicit levels as specified by explicit embedding codes. | |
509 | * Recalculate the flags to have them reflect the real properties | |
510 | * after taking the explicit embeddings into account. | |
511 | * | |
512 | * The BiDi algorithm is designed to result in the same behavior whether embedding | |
513 | * levels are externally specified (from "styled text", supposedly the preferred | |
514 | * method) or set by explicit embedding codes (LRx, RLx, PDF) in the plain text. | |
515 | * That is why (X9) instructs to remove all explicit codes (and BN). | |
516 | * However, in a real implementation, this removal of these codes and their index | |
517 | * positions in the plain text is undesirable since it would result in | |
518 | * reallocated, reindexed text. | |
519 | * Instead, this implementation leaves the codes in there and just ignores them | |
520 | * in the subsequent processing. | |
521 | * In order to get the same reordering behavior, positions with a BN or an | |
522 | * explicit embedding code just get the same level assigned as the last "real" | |
523 | * character. | |
524 | * | |
525 | * Some implementations, not this one, then overwrite some of these | |
526 | * directionality properties at "real" same-level-run boundaries by | |
527 | * L or R codes so that the resolution of weak types can be performed on the | |
528 | * entire paragraph at once instead of having to parse it once more and | |
529 | * perform that resolution on same-level-runs. | |
530 | * This limits the scope of the implicit rules in effectively | |
531 | * the same way as the run limits. | |
532 | * | |
533 | * Instead, this implementation does not modify these codes. | |
534 | * On one hand, the paragraph has to be scanned for same-level-runs, but | |
535 | * on the other hand, this saves another loop to reset these codes, | |
536 | * or saves making and modifying a copy of dirProps[]. | |
537 | * | |
538 | * | |
539 | * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm. | |
540 | * | |
541 | * | |
542 | * Handling the stack of explicit levels (Xn): | |
543 | * | |
544 | * With the BiDi stack of explicit levels, | |
545 | * as pushed with each LRE, RLE, LRO, and RLO and popped with each PDF, | |
546 | * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL==61. | |
547 | * | |
548 | * In order to have a correct push-pop semantics even in the case of overflows, | |
549 | * there are two overflow counters: | |
550 | * - countOver60 is incremented with each LRx at level 60 | |
551 | * - from level 60, one RLx increases the level to 61 | |
552 | * - countOver61 is incremented with each LRx and RLx at level 61 | |
553 | * | |
554 | * Popping levels with PDF must work in the opposite order so that level 61 | |
555 | * is correct at the correct point. Underflows (too many PDFs) must be checked. | |
556 | * | |
557 | * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd. | |
558 | */ | |
b75a7d8f A |
559 | static UBiDiDirection |
560 | resolveExplicitLevels(UBiDi *pBiDi) { | |
561 | const DirProp *dirProps=pBiDi->dirProps; | |
562 | UBiDiLevel *levels=pBiDi->levels; | |
73c04bcf A |
563 | const UChar *text=pBiDi->text; |
564 | ||
b75a7d8f A |
565 | int32_t i=0, length=pBiDi->length; |
566 | Flags flags=pBiDi->flags; /* collect all directionalities in the text */ | |
567 | DirProp dirProp; | |
73c04bcf | 568 | UBiDiLevel level=GET_PARALEVEL(pBiDi, 0); |
b75a7d8f A |
569 | |
570 | UBiDiDirection direction; | |
73c04bcf | 571 | int32_t paraIndex=0; |
b75a7d8f A |
572 | |
573 | /* determine if the text is mixed-directional or single-directional */ | |
73c04bcf | 574 | direction=directionFromFlags(pBiDi); |
b75a7d8f | 575 | |
73c04bcf A |
576 | /* we may not need to resolve any explicit levels, but for multiple |
577 | paragraphs we want to loop on all chars to set the para boundaries */ | |
578 | if((direction!=UBIDI_MIXED) && (pBiDi->paraCount==1)) { | |
b75a7d8f | 579 | /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */ |
73c04bcf A |
580 | } else if((pBiDi->paraCount==1) && |
581 | (!(flags&MASK_EXPLICIT) || | |
582 | (pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL))) { | |
b75a7d8f A |
583 | /* mixed, but all characters are at the same embedding level */ |
584 | /* or we are in "inverse BiDi" */ | |
73c04bcf | 585 | /* and we don't have contextual multiple paragraphs with some B char */ |
b75a7d8f A |
586 | /* set all levels to the paragraph level */ |
587 | for(i=0; i<length; ++i) { | |
588 | levels[i]=level; | |
589 | } | |
590 | } else { | |
591 | /* continue to perform (Xn) */ | |
592 | ||
593 | /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */ | |
594 | /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */ | |
595 | UBiDiLevel embeddingLevel=level, newLevel, stackTop=0; | |
596 | ||
597 | UBiDiLevel stack[UBIDI_MAX_EXPLICIT_LEVEL]; /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL */ | |
598 | uint32_t countOver60=0, countOver61=0; /* count overflows of explicit levels */ | |
599 | ||
600 | /* recalculate the flags */ | |
601 | flags=0; | |
602 | ||
b75a7d8f | 603 | for(i=0; i<length; ++i) { |
73c04bcf | 604 | dirProp=NO_CONTEXT_RTL(dirProps[i]); |
b75a7d8f A |
605 | switch(dirProp) { |
606 | case LRE: | |
607 | case LRO: | |
608 | /* (X3, X5) */ | |
609 | newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1)); /* least greater even level */ | |
610 | if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL) { | |
611 | stack[stackTop]=embeddingLevel; | |
612 | ++stackTop; | |
613 | embeddingLevel=newLevel; | |
614 | if(dirProp==LRO) { | |
615 | embeddingLevel|=UBIDI_LEVEL_OVERRIDE; | |
b75a7d8f | 616 | } |
73c04bcf A |
617 | /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE |
618 | since this has already been done for newLevel which is | |
619 | the source for embeddingLevel. | |
620 | */ | |
b75a7d8f A |
621 | } else if((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)==UBIDI_MAX_EXPLICIT_LEVEL) { |
622 | ++countOver61; | |
623 | } else /* (embeddingLevel&~UBIDI_LEVEL_OVERRIDE)==UBIDI_MAX_EXPLICIT_LEVEL-1 */ { | |
624 | ++countOver60; | |
625 | } | |
626 | flags|=DIRPROP_FLAG(BN); | |
627 | break; | |
628 | case RLE: | |
629 | case RLO: | |
630 | /* (X2, X4) */ | |
631 | newLevel=(UBiDiLevel)(((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)+1)|1); /* least greater odd level */ | |
632 | if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL) { | |
633 | stack[stackTop]=embeddingLevel; | |
634 | ++stackTop; | |
635 | embeddingLevel=newLevel; | |
636 | if(dirProp==RLO) { | |
637 | embeddingLevel|=UBIDI_LEVEL_OVERRIDE; | |
b75a7d8f | 638 | } |
73c04bcf A |
639 | /* we don't need to set UBIDI_LEVEL_OVERRIDE off for RLE |
640 | since this has already been done for newLevel which is | |
641 | the source for embeddingLevel. | |
642 | */ | |
b75a7d8f A |
643 | } else { |
644 | ++countOver61; | |
645 | } | |
646 | flags|=DIRPROP_FLAG(BN); | |
647 | break; | |
648 | case PDF: | |
649 | /* (X7) */ | |
650 | /* handle all the overflow cases first */ | |
651 | if(countOver61>0) { | |
652 | --countOver61; | |
653 | } else if(countOver60>0 && (embeddingLevel&~UBIDI_LEVEL_OVERRIDE)!=UBIDI_MAX_EXPLICIT_LEVEL) { | |
654 | /* handle LRx overflows from level 60 */ | |
655 | --countOver60; | |
656 | } else if(stackTop>0) { | |
657 | /* this is the pop operation; it also pops level 61 while countOver60>0 */ | |
658 | --stackTop; | |
659 | embeddingLevel=stack[stackTop]; | |
660 | /* } else { (underflow) */ | |
661 | } | |
662 | flags|=DIRPROP_FLAG(BN); | |
663 | break; | |
664 | case B: | |
b75a7d8f A |
665 | stackTop=0; |
666 | countOver60=countOver61=0; | |
73c04bcf A |
667 | level=GET_PARALEVEL(pBiDi, i); |
668 | if((i+1)<length) { | |
669 | embeddingLevel=GET_PARALEVEL(pBiDi, i+1); | |
670 | if(!((text[i]==CR) && (text[i+1]==LF))) { | |
671 | pBiDi->paras[paraIndex++]=i+1; | |
672 | } | |
673 | } | |
b75a7d8f A |
674 | flags|=DIRPROP_FLAG(B); |
675 | break; | |
676 | case BN: | |
677 | /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */ | |
678 | /* they will get their levels set correctly in adjustWSLevels() */ | |
679 | flags|=DIRPROP_FLAG(BN); | |
680 | break; | |
681 | default: | |
682 | /* all other types get the "real" level */ | |
683 | if(level!=embeddingLevel) { | |
684 | level=embeddingLevel; | |
685 | if(level&UBIDI_LEVEL_OVERRIDE) { | |
686 | flags|=DIRPROP_FLAG_O(level)|DIRPROP_FLAG_MULTI_RUNS; | |
687 | } else { | |
688 | flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG_MULTI_RUNS; | |
689 | } | |
690 | } | |
691 | if(!(level&UBIDI_LEVEL_OVERRIDE)) { | |
692 | flags|=DIRPROP_FLAG(dirProp); | |
693 | } | |
694 | break; | |
695 | } | |
696 | ||
697 | /* | |
698 | * We need to set reasonable levels even on BN codes and | |
699 | * explicit codes because we will later look at same-level runs (X10). | |
700 | */ | |
701 | levels[i]=level; | |
702 | } | |
703 | if(flags&MASK_EMBEDDING) { | |
704 | flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel); | |
705 | } | |
73c04bcf A |
706 | if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) { |
707 | flags|=DIRPROP_FLAG(L); | |
708 | } | |
b75a7d8f A |
709 | |
710 | /* subsequently, ignore the explicit codes and BN (X9) */ | |
711 | ||
712 | /* again, determine if the text is mixed-directional or single-directional */ | |
713 | pBiDi->flags=flags; | |
73c04bcf | 714 | direction=directionFromFlags(pBiDi); |
b75a7d8f | 715 | } |
73c04bcf | 716 | |
b75a7d8f A |
717 | return direction; |
718 | } | |
719 | ||
720 | /* | |
721 | * Use a pre-specified embedding levels array: | |
722 | * | |
723 | * Adjust the directional properties for overrides (->LEVEL_OVERRIDE), | |
724 | * ignore all explicit codes (X9), | |
725 | * and check all the preset levels. | |
726 | * | |
727 | * Recalculate the flags to have them reflect the real properties | |
728 | * after taking the explicit embeddings into account. | |
729 | */ | |
730 | static UBiDiDirection | |
731 | checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { | |
732 | const DirProp *dirProps=pBiDi->dirProps; | |
73c04bcf | 733 | DirProp dirProp; |
b75a7d8f | 734 | UBiDiLevel *levels=pBiDi->levels; |
73c04bcf A |
735 | const UChar *text=pBiDi->text; |
736 | ||
b75a7d8f A |
737 | int32_t i, length=pBiDi->length; |
738 | Flags flags=0; /* collect all directionalities in the text */ | |
73c04bcf A |
739 | UBiDiLevel level; |
740 | uint32_t paraIndex=0; | |
b75a7d8f A |
741 | |
742 | for(i=0; i<length; ++i) { | |
743 | level=levels[i]; | |
73c04bcf | 744 | dirProp=NO_CONTEXT_RTL(dirProps[i]); |
b75a7d8f A |
745 | if(level&UBIDI_LEVEL_OVERRIDE) { |
746 | /* keep the override flag in levels[i] but adjust the flags */ | |
747 | level&=~UBIDI_LEVEL_OVERRIDE; /* make the range check below simpler */ | |
748 | flags|=DIRPROP_FLAG_O(level); | |
749 | } else { | |
750 | /* set the flags */ | |
73c04bcf | 751 | flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp); |
b75a7d8f | 752 | } |
73c04bcf A |
753 | if((level<GET_PARALEVEL(pBiDi, i) && |
754 | !((0==level)&&(dirProp==B))) || | |
755 | (UBIDI_MAX_EXPLICIT_LEVEL<level)) { | |
b75a7d8f A |
756 | /* level out of bounds */ |
757 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
758 | return UBIDI_LTR; | |
759 | } | |
73c04bcf A |
760 | if((dirProp==B) && ((i+1)<length)) { |
761 | if(!((text[i]==CR) && (text[i+1]==LF))) { | |
762 | pBiDi->paras[paraIndex++]=i+1; | |
763 | } | |
764 | } | |
b75a7d8f A |
765 | } |
766 | if(flags&MASK_EMBEDDING) { | |
767 | flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel); | |
768 | } | |
769 | ||
770 | /* determine if the text is mixed-directional or single-directional */ | |
771 | pBiDi->flags=flags; | |
73c04bcf A |
772 | return directionFromFlags(pBiDi); |
773 | } | |
774 | ||
46f4442e A |
775 | /****************************************************************** |
776 | The Properties state machine table | |
777 | ******************************************************************* | |
778 | ||
779 | All table cells are 8 bits: | |
780 | bits 0..4: next state | |
781 | bits 5..7: action to perform (if > 0) | |
782 | ||
783 | Cells may be of format "n" where n represents the next state | |
784 | (except for the rightmost column). | |
785 | Cells may also be of format "s(x,y)" where x represents an action | |
786 | to perform and y represents the next state. | |
787 | ||
788 | ******************************************************************* | |
789 | Definitions and type for properties state table | |
790 | ******************************************************************* | |
791 | */ | |
73c04bcf A |
792 | #define IMPTABPROPS_COLUMNS 14 |
793 | #define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1) | |
794 | #define GET_STATEPROPS(cell) ((cell)&0x1f) | |
795 | #define GET_ACTIONPROPS(cell) ((cell)>>5) | |
46f4442e | 796 | #define s(action, newState) ((uint8_t)(newState+(action<<5))) |
73c04bcf A |
797 | |
798 | static const uint8_t groupProp[] = /* dirProp regrouped */ | |
799 | { | |
800 | /* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN */ | |
801 | 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10 | |
802 | }; | |
46f4442e A |
803 | enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */ |
804 | ||
805 | /****************************************************************** | |
806 | ||
807 | PROPERTIES STATE TABLE | |
808 | ||
809 | In table impTabProps, | |
810 | - the ON column regroups ON and WS | |
811 | - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF | |
812 | - the Res column is the reduced property assigned to a run | |
813 | ||
814 | Action 1: process current run1, init new run1 | |
815 | 2: init new run2 | |
816 | 3: process run1, process run2, init new run1 | |
817 | 4: process run1, set run1=run2, init new run2 | |
818 | ||
819 | Notes: | |
820 | 1) This table is used in resolveImplicitLevels(). | |
821 | 2) This table triggers actions when there is a change in the Bidi | |
822 | property of incoming characters (action 1). | |
823 | 3) Most such property sequences are processed immediately (in | |
824 | fact, passed to processPropertySeq(). | |
825 | 4) However, numbers are assembled as one sequence. This means | |
826 | that undefined situations (like CS following digits, until | |
827 | it is known if the next char will be a digit) are held until | |
828 | following chars define them. | |
829 | Example: digits followed by CS, then comes another CS or ON; | |
830 | the digits will be processed, then the CS assigned | |
831 | as the start of an ON sequence (action 3). | |
832 | 5) There are cases where more than one sequence must be | |
833 | processed, for instance digits followed by CS followed by L: | |
834 | the digits must be processed as one sequence, and the CS | |
835 | must be processed as an ON sequence, all this before starting | |
836 | assembling chars for the opening L sequence. | |
837 | ||
838 | ||
839 | */ | |
73c04bcf A |
840 | static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] = |
841 | { | |
842 | /* L , R , EN , AN , ON , S , B , ES , ET , CS , BN , NSM , AL , Res */ | |
46f4442e A |
843 | /* 0 Init */ { 1 , 2 , 4 , 5 , 7 , 15 , 17 , 7 , 9 , 7 , 0 , 7 , 3 , DirProp_ON }, |
844 | /* 1 L */ { 1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 1 , 1 , s(1,3), DirProp_L }, | |
845 | /* 2 R */ { s(1,1), 2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 2 , 2 , s(1,3), DirProp_R }, | |
846 | /* 3 AL */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8), 3 , 3 , 3 , DirProp_R }, | |
847 | /* 4 EN */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10), 11 ,s(2,10), 4 , 4 , s(1,3), DirProp_EN }, | |
848 | /* 5 AN */ { s(1,1), s(1,2), s(1,4), 5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12), 5 , 5 , s(1,3), DirProp_AN }, | |
849 | /* 6 AL:EN/AN */ { s(1,1), s(1,2), 6 , 6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13), 6 , 6 , s(1,3), DirProp_AN }, | |
850 | /* 7 ON */ { s(1,1), s(1,2), s(1,4), s(1,5), 7 ,s(1,15),s(1,17), 7 ,s(2,14), 7 , 7 , 7 , s(1,3), DirProp_ON }, | |
851 | /* 8 AL:ON */ { s(1,1), s(1,2), s(1,6), s(1,6), 8 ,s(1,16),s(1,17), 8 , 8 , 8 , 8 , 8 , s(1,3), DirProp_ON }, | |
852 | /* 9 ET */ { s(1,1), s(1,2), 4 , s(1,5), 7 ,s(1,15),s(1,17), 7 , 9 , 7 , 9 , 9 , s(1,3), DirProp_ON }, | |
853 | /*10 EN+ES/CS */ { s(3,1), s(3,2), 4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 10 , s(4,7), s(3,3), DirProp_EN }, | |
854 | /*11 EN+ET */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 11 , s(1,7), 11 , 11 , s(1,3), DirProp_EN }, | |
855 | /*12 AN+CS */ { s(3,1), s(3,2), s(3,4), 5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 12 , s(4,7), s(3,3), DirProp_AN }, | |
856 | /*13 AL:EN/AN+CS */ { s(3,1), s(3,2), 6 , 6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8), 13 , s(4,8), s(3,3), DirProp_AN }, | |
857 | /*14 ON+ET */ { s(1,1), s(1,2), s(4,4), s(1,5), 7 ,s(1,15),s(1,17), 7 , 14 , 7 , 14 , 14 , s(1,3), DirProp_ON }, | |
858 | /*15 S */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7), 15 ,s(1,17), s(1,7), s(1,9), s(1,7), 15 , s(1,7), s(1,3), DirProp_S }, | |
859 | /*16 AL:S */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8), 16 ,s(1,17), s(1,8), s(1,8), s(1,8), 16 , s(1,8), s(1,3), DirProp_S }, | |
860 | /*17 B */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15), 17 , s(1,7), s(1,9), s(1,7), 17 , s(1,7), s(1,3), DirProp_B } | |
73c04bcf A |
861 | }; |
862 | ||
46f4442e | 863 | /* we must undef macro s because the levels table have a different |
73c04bcf A |
864 | * structure (4 bits for action and 4 bits for next state. |
865 | */ | |
46f4442e A |
866 | #undef s |
867 | ||
868 | /****************************************************************** | |
869 | The levels state machine tables | |
870 | ******************************************************************* | |
871 | ||
872 | All table cells are 8 bits: | |
873 | bits 0..3: next state | |
874 | bits 4..7: action to perform (if > 0) | |
875 | ||
876 | Cells may be of format "n" where n represents the next state | |
877 | (except for the rightmost column). | |
878 | Cells may also be of format "s(x,y)" where x represents an action | |
879 | to perform and y represents the next state. | |
880 | ||
881 | This format limits each table to 16 states each and to 15 actions. | |
882 | ||
883 | ******************************************************************* | |
884 | Definitions and type for levels state tables | |
885 | ******************************************************************* | |
886 | */ | |
887 | #define IMPTABLEVELS_COLUMNS (DirProp_B + 2) | |
73c04bcf A |
888 | #define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1) |
889 | #define GET_STATE(cell) ((cell)&0x0f) | |
890 | #define GET_ACTION(cell) ((cell)>>4) | |
46f4442e | 891 | #define s(action, newState) ((uint8_t)(newState+(action<<4))) |
73c04bcf A |
892 | |
893 | typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS]; | |
894 | typedef uint8_t ImpAct[]; | |
895 | ||
896 | /* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct, | |
897 | * instead of having a pair of ImpTab and a pair of ImpAct. | |
898 | */ | |
899 | typedef struct ImpTabPair { | |
46f4442e A |
900 | const void * pImpTab[2]; |
901 | const void * pImpAct[2]; | |
73c04bcf A |
902 | } ImpTabPair; |
903 | ||
46f4442e A |
904 | /****************************************************************** |
905 | ||
906 | LEVELS STATE TABLES | |
907 | ||
908 | In all levels state tables, | |
909 | - state 0 is the initial state | |
910 | - the Res column is the increment to add to the text level | |
911 | for this property sequence. | |
912 | ||
913 | The impAct arrays for each table of a pair map the local action | |
914 | numbers of the table to the total list of actions. For instance, | |
915 | action 2 in a given table corresponds to the action number which | |
916 | appears in entry [2] of the impAct array for that table. | |
917 | The first entry of all impAct arrays must be 0. | |
918 | ||
919 | Action 1: init conditional sequence | |
920 | 2: prepend conditional sequence to current sequence | |
921 | 3: set ON sequence to new level - 1 | |
922 | 4: init EN/AN/ON sequence | |
923 | 5: fix EN/AN/ON sequence followed by R | |
924 | 6: set previous level sequence to level 2 | |
925 | ||
926 | Notes: | |
927 | 1) These tables are used in processPropertySeq(). The input | |
928 | is property sequences as determined by resolveImplicitLevels. | |
929 | 2) Most such property sequences are processed immediately | |
930 | (levels are assigned). | |
931 | 3) However, some sequences cannot be assigned a final level till | |
932 | one or more following sequences are received. For instance, | |
933 | ON following an R sequence within an even-level paragraph. | |
934 | If the following sequence is R, the ON sequence will be | |
935 | assigned basic run level+1, and so will the R sequence. | |
936 | 4) S is generally handled like ON, since its level will be fixed | |
937 | to paragraph level in adjustWSLevels(). | |
938 | ||
939 | */ | |
73c04bcf A |
940 | |
941 | static const ImpTab impTabL_DEFAULT = /* Even paragraph level */ | |
942 | /* In this table, conditional sequences receive the higher possible level | |
943 | until proven otherwise. | |
944 | */ | |
945 | { | |
946 | /* L , R , EN , AN , ON , S , B , Res */ | |
947 | /* 0 : init */ { 0 , 1 , 0 , 2 , 0 , 0 , 0 , 0 }, | |
46f4442e A |
948 | /* 1 : R */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 1 }, |
949 | /* 2 : AN */ { 0 , 1 , 0 , 2 , s(1,5), s(1,5), 0 , 2 }, | |
950 | /* 3 : R+EN/AN */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 2 }, | |
951 | /* 4 : R+ON */ { s(2,0), 1 , 3 , 3 , 4 , 4 , s(2,0), 1 }, | |
952 | /* 5 : AN+ON */ { s(2,0), 1 , s(2,0), 2 , 5 , 5 , s(2,0), 1 } | |
73c04bcf A |
953 | }; |
954 | static const ImpTab impTabR_DEFAULT = /* Odd paragraph level */ | |
955 | /* In this table, conditional sequences receive the lower possible level | |
956 | until proven otherwise. | |
957 | */ | |
958 | { | |
959 | /* L , R , EN , AN , ON , S , B , Res */ | |
960 | /* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 }, | |
46f4442e | 961 | /* 1 : L */ { 1 , 0 , 1 , 3 , s(1,4), s(1,4), 0 , 1 }, |
73c04bcf A |
962 | /* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 }, |
963 | /* 3 : L+AN */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 1 }, | |
46f4442e | 964 | /* 4 : L+ON */ { s(2,1), 0 , s(2,1), 3 , 4 , 4 , 0 , 0 }, |
73c04bcf A |
965 | /* 5 : L+AN+ON */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 0 } |
966 | }; | |
967 | static const ImpAct impAct0 = {0,1,2,3,4,5,6}; | |
46f4442e A |
968 | static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT, |
969 | &impTabR_DEFAULT}, | |
970 | {&impAct0, &impAct0}}; | |
73c04bcf A |
971 | |
972 | static const ImpTab impTabL_NUMBERS_SPECIAL = /* Even paragraph level */ | |
973 | /* In this table, conditional sequences receive the higher possible level | |
974 | until proven otherwise. | |
975 | */ | |
976 | { | |
977 | /* L , R , EN , AN , ON , S , B , Res */ | |
978 | /* 0 : init */ { 0 , 2 , 1 , 1 , 0 , 0 , 0 , 0 }, | |
979 | /* 1 : L+EN/AN */ { 0 , 2 , 1 , 1 , 0 , 0 , 0 , 2 }, | |
46f4442e A |
980 | /* 2 : R */ { 0 , 2 , 4 , 4 , s(1,3), 0 , 0 , 1 }, |
981 | /* 3 : R+ON */ { s(2,0), 2 , 4 , 4 , 3 , 3 , s(2,0), 1 }, | |
982 | /* 4 : R+EN/AN */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 2 } | |
73c04bcf | 983 | }; |
46f4442e A |
984 | static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL, |
985 | &impTabR_DEFAULT}, | |
986 | {&impAct0, &impAct0}}; | |
73c04bcf A |
987 | |
988 | static const ImpTab impTabL_GROUP_NUMBERS_WITH_R = | |
989 | /* In this table, EN/AN+ON sequences receive levels as if associated with R | |
990 | until proven that there is L or sor/eor on both sides. AN is handled like EN. | |
991 | */ | |
992 | { | |
993 | /* L , R , EN , AN , ON , S , B , Res */ | |
46f4442e A |
994 | /* 0 init */ { 0 , 3 , s(1,1), s(1,1), 0 , 0 , 0 , 0 }, |
995 | /* 1 EN/AN */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 2 }, | |
996 | /* 2 EN/AN+ON */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 1 }, | |
997 | /* 3 R */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 1 }, | |
998 | /* 4 R+ON */ { s(2,0), 3 , 5 , 5 , 4 , s(2,0), s(2,0), 1 }, | |
999 | /* 5 R+EN/AN */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 2 } | |
73c04bcf A |
1000 | }; |
1001 | static const ImpTab impTabR_GROUP_NUMBERS_WITH_R = | |
1002 | /* In this table, EN/AN+ON sequences receive levels as if associated with R | |
1003 | until proven that there is L on both sides. AN is handled like EN. | |
1004 | */ | |
1005 | { | |
1006 | /* L , R , EN , AN , ON , S , B , Res */ | |
1007 | /* 0 init */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 0 }, | |
1008 | /* 1 EN/AN */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 1 }, | |
46f4442e A |
1009 | /* 2 L */ { 2 , 0 , s(1,4), s(1,4), s(1,3), 0 , 0 , 1 }, |
1010 | /* 3 L+ON */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 0 }, | |
1011 | /* 4 L+EN/AN */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 1 } | |
73c04bcf A |
1012 | }; |
1013 | static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = { | |
46f4442e A |
1014 | {&impTabL_GROUP_NUMBERS_WITH_R, |
1015 | &impTabR_GROUP_NUMBERS_WITH_R}, | |
1016 | {&impAct0, &impAct0}}; | |
73c04bcf A |
1017 | |
1018 | ||
1019 | static const ImpTab impTabL_INVERSE_NUMBERS_AS_L = | |
1020 | /* This table is identical to the Default LTR table except that EN and AN are | |
1021 | handled like L. | |
1022 | */ | |
1023 | { | |
1024 | /* L , R , EN , AN , ON , S , B , Res */ | |
1025 | /* 0 : init */ { 0 , 1 , 0 , 0 , 0 , 0 , 0 , 0 }, | |
46f4442e A |
1026 | /* 1 : R */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 1 }, |
1027 | /* 2 : AN */ { 0 , 1 , 0 , 0 , s(1,5), s(1,5), 0 , 2 }, | |
1028 | /* 3 : R+EN/AN */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 2 }, | |
1029 | /* 4 : R+ON */ { s(2,0), 1 , s(2,0), s(2,0), 4 , 4 , s(2,0), 1 }, | |
1030 | /* 5 : AN+ON */ { s(2,0), 1 , s(2,0), s(2,0), 5 , 5 , s(2,0), 1 } | |
73c04bcf A |
1031 | }; |
1032 | static const ImpTab impTabR_INVERSE_NUMBERS_AS_L = | |
1033 | /* This table is identical to the Default RTL table except that EN and AN are | |
1034 | handled like L. | |
1035 | */ | |
1036 | { | |
1037 | /* L , R , EN , AN , ON , S , B , Res */ | |
1038 | /* 0 : init */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 0 }, | |
46f4442e | 1039 | /* 1 : L */ { 1 , 0 , 1 , 1 , s(1,4), s(1,4), 0 , 1 }, |
73c04bcf A |
1040 | /* 2 : EN/AN */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 1 }, |
1041 | /* 3 : L+AN */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 1 }, | |
46f4442e | 1042 | /* 4 : L+ON */ { s(2,1), 0 , s(2,1), s(2,1), 4 , 4 , 0 , 0 }, |
73c04bcf A |
1043 | /* 5 : L+AN+ON */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 0 } |
1044 | }; | |
1045 | static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = { | |
46f4442e A |
1046 | {&impTabL_INVERSE_NUMBERS_AS_L, |
1047 | &impTabR_INVERSE_NUMBERS_AS_L}, | |
1048 | {&impAct0, &impAct0}}; | |
73c04bcf A |
1049 | |
1050 | static const ImpTab impTabR_INVERSE_LIKE_DIRECT = /* Odd paragraph level */ | |
1051 | /* In this table, conditional sequences receive the lower possible level | |
1052 | until proven otherwise. | |
1053 | */ | |
1054 | { | |
1055 | /* L , R , EN , AN , ON , S , B , Res */ | |
1056 | /* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 }, | |
46f4442e | 1057 | /* 1 : L */ { 1 , 0 , 1 , 2 , s(1,3), s(1,3), 0 , 1 }, |
73c04bcf | 1058 | /* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 }, |
46f4442e A |
1059 | /* 3 : L+ON */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 0 }, |
1060 | /* 4 : L+ON+AN */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 3 }, | |
1061 | /* 5 : L+AN+ON */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 2 }, | |
1062 | /* 6 : L+ON+EN */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 1 } | |
73c04bcf A |
1063 | }; |
1064 | static const ImpAct impAct1 = {0,1,11,12}; | |
1065 | /* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc" | |
1066 | */ | |
1067 | static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = { | |
46f4442e A |
1068 | {&impTabL_DEFAULT, |
1069 | &impTabR_INVERSE_LIKE_DIRECT}, | |
1070 | {&impAct0, &impAct1}}; | |
73c04bcf A |
1071 | |
1072 | static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS = | |
1073 | /* The case handled in this table is (visually): R EN L | |
1074 | */ | |
1075 | { | |
1076 | /* L , R , EN , AN , ON , S , B , Res */ | |
46f4442e A |
1077 | /* 0 : init */ { 0 , s(6,3), 0 , 1 , 0 , 0 , 0 , 0 }, |
1078 | /* 1 : L+AN */ { 0 , s(6,3), 0 , 1 , s(1,2), s(3,0), 0 , 4 }, | |
1079 | /* 2 : L+AN+ON */ { s(2,0), s(6,3), s(2,0), 1 , 2 , s(3,0), s(2,0), 3 }, | |
1080 | /* 3 : R */ { 0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0), 0 , 3 }, | |
1081 | /* 4 : R+ON */ { s(3,0), s(4,3), s(5,5), s(5,6), 4 , s(3,0), s(3,0), 3 }, | |
1082 | /* 5 : R+EN */ { s(3,0), s(4,3), 5 , s(5,6), s(1,4), s(3,0), s(3,0), 4 }, | |
1083 | /* 6 : R+AN */ { s(3,0), s(4,3), s(5,5), 6 , s(1,4), s(3,0), s(3,0), 4 } | |
73c04bcf A |
1084 | }; |
1085 | static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS = | |
1086 | /* The cases handled in this table are (visually): R EN L | |
1087 | R L AN L | |
1088 | */ | |
1089 | { | |
1090 | /* L , R , EN , AN , ON , S , B , Res */ | |
46f4442e A |
1091 | /* 0 : init */ { s(1,3), 0 , 1 , 1 , 0 , 0 , 0 , 0 }, |
1092 | /* 1 : R+EN/AN */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 1 }, | |
1093 | /* 2 : R+EN/AN+ON */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 0 }, | |
1094 | /* 3 : L */ { 3 , 0 , 3 , s(3,6), s(1,4), s(4,0), 0 , 1 }, | |
1095 | /* 4 : L+ON */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 0 }, | |
1096 | /* 5 : L+ON+EN */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 1 }, | |
1097 | /* 6 : L+AN */ { s(5,3), s(4,0), 6 , 6 , 4 , s(4,0), s(4,0), 3 } | |
73c04bcf A |
1098 | }; |
1099 | static const ImpAct impAct2 = {0,1,7,8,9,10}; | |
1100 | static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = { | |
46f4442e A |
1101 | {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS, |
1102 | &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS}, | |
1103 | {&impAct0, &impAct2}}; | |
73c04bcf A |
1104 | |
1105 | static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = { | |
46f4442e A |
1106 | {&impTabL_NUMBERS_SPECIAL, |
1107 | &impTabR_INVERSE_LIKE_DIRECT}, | |
1108 | {&impAct0, &impAct1}}; | |
73c04bcf A |
1109 | |
1110 | static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = | |
1111 | /* The case handled in this table is (visually): R EN L | |
1112 | */ | |
1113 | { | |
1114 | /* L , R , EN , AN , ON , S , B , Res */ | |
46f4442e A |
1115 | /* 0 : init */ { 0 , s(6,2), 1 , 1 , 0 , 0 , 0 , 0 }, |
1116 | /* 1 : L+EN/AN */ { 0 , s(6,2), 1 , 1 , 0 , s(3,0), 0 , 4 }, | |
1117 | /* 2 : R */ { 0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0), 0 , 3 }, | |
1118 | /* 3 : R+ON */ { s(3,0), s(4,2), s(5,4), s(5,4), 3 , s(3,0), s(3,0), 3 }, | |
1119 | /* 4 : R+EN/AN */ { s(3,0), s(4,2), 4 , 4 , s(1,3), s(3,0), s(3,0), 4 } | |
73c04bcf A |
1120 | }; |
1121 | static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = { | |
46f4442e A |
1122 | {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS, |
1123 | &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS}, | |
1124 | {&impAct0, &impAct2}}; | |
73c04bcf | 1125 | |
46f4442e | 1126 | #undef s |
73c04bcf A |
1127 | |
1128 | typedef struct { | |
46f4442e A |
1129 | const ImpTab * pImpTab; /* level table pointer */ |
1130 | const ImpAct * pImpAct; /* action map array */ | |
73c04bcf A |
1131 | int32_t startON; /* start of ON sequence */ |
1132 | int32_t startL2EN; /* start of level 2 sequence */ | |
1133 | int32_t lastStrongRTL; /* index of last found R or AL */ | |
1134 | int32_t state; /* current state */ | |
1135 | UBiDiLevel runLevel; /* run level before implicit solving */ | |
1136 | } LevState; | |
1137 | ||
1138 | /*------------------------------------------------------------------------*/ | |
1139 | ||
1140 | static void | |
1141 | addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag) | |
1142 | /* param pos: position where to insert | |
1143 | param flag: one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER | |
1144 | */ | |
1145 | { | |
1146 | #define FIRSTALLOC 10 | |
1147 | Point point; | |
1148 | InsertPoints * pInsertPoints=&(pBiDi->insertPoints); | |
1149 | ||
1150 | if (pInsertPoints->capacity == 0) | |
1151 | { | |
1152 | pInsertPoints->points=uprv_malloc(sizeof(Point)*FIRSTALLOC); | |
1153 | if (pInsertPoints->points == NULL) | |
1154 | { | |
1155 | pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR; | |
1156 | return; | |
1157 | } | |
1158 | pInsertPoints->capacity=FIRSTALLOC; | |
1159 | } | |
1160 | if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */ | |
1161 | { | |
1162 | void * savePoints=pInsertPoints->points; | |
1163 | pInsertPoints->points=uprv_realloc(pInsertPoints->points, | |
1164 | pInsertPoints->capacity*2*sizeof(Point)); | |
1165 | if (pInsertPoints->points == NULL) | |
1166 | { | |
1167 | pInsertPoints->points=savePoints; | |
1168 | pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR; | |
1169 | return; | |
1170 | } | |
1171 | else pInsertPoints->capacity*=2; | |
1172 | } | |
1173 | point.pos=pos; | |
1174 | point.flag=flag; | |
1175 | pInsertPoints->points[pInsertPoints->size]=point; | |
1176 | pInsertPoints->size++; | |
1177 | #undef FIRSTALLOC | |
b75a7d8f A |
1178 | } |
1179 | ||
b75a7d8f A |
1180 | /* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */ |
1181 | ||
1182 | /* | |
1183 | * This implementation of the (Wn) rules applies all rules in one pass. | |
1184 | * In order to do so, it needs a look-ahead of typically 1 character | |
1185 | * (except for W5: sequences of ET) and keeps track of changes | |
1186 | * in a rule Wp that affect a later Wq (p<q). | |
1187 | * | |
b75a7d8f A |
1188 | * The (Nn) and (In) rules are also performed in that same single loop, |
1189 | * but effectively one iteration behind for white space. | |
1190 | * | |
1191 | * Since all implicit rules are performed in one step, it is not necessary | |
1192 | * to actually store the intermediate directional properties in dirProps[]. | |
1193 | */ | |
1194 | ||
b75a7d8f | 1195 | static void |
73c04bcf A |
1196 | processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop, |
1197 | int32_t start, int32_t limit) { | |
1198 | uint8_t cell, oldStateSeq, actionSeq; | |
46f4442e A |
1199 | const ImpTab * pImpTab=pLevState->pImpTab; |
1200 | const ImpAct * pImpAct=pLevState->pImpAct; | |
73c04bcf A |
1201 | UBiDiLevel * levels=pBiDi->levels; |
1202 | UBiDiLevel level, addLevel; | |
1203 | InsertPoints * pInsertPoints; | |
1204 | int32_t start0, k; | |
1205 | ||
1206 | start0=start; /* save original start position */ | |
46f4442e | 1207 | oldStateSeq=(uint8_t)pLevState->state; |
73c04bcf A |
1208 | cell=(*pImpTab)[oldStateSeq][_prop]; |
1209 | pLevState->state=GET_STATE(cell); /* isolate the new state */ | |
1210 | actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */ | |
1211 | addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES]; | |
1212 | ||
1213 | if(actionSeq) { | |
1214 | switch(actionSeq) { | |
1215 | case 1: /* init ON seq */ | |
1216 | pLevState->startON=start0; | |
b75a7d8f | 1217 | break; |
b75a7d8f | 1218 | |
73c04bcf A |
1219 | case 2: /* prepend ON seq to current seq */ |
1220 | start=pLevState->startON; | |
1221 | break; | |
b75a7d8f | 1222 | |
73c04bcf A |
1223 | case 3: /* L or S after possible relevant EN/AN */ |
1224 | /* check if we had EN after R/AL */ | |
1225 | if (pLevState->startL2EN >= 0) { | |
1226 | addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE); | |
b75a7d8f | 1227 | } |
73c04bcf A |
1228 | pLevState->startL2EN=-1; /* not within previous if since could also be -2 */ |
1229 | /* check if we had any relevant EN/AN after R/AL */ | |
1230 | pInsertPoints=&(pBiDi->insertPoints); | |
1231 | if ((pInsertPoints->capacity == 0) || | |
1232 | (pInsertPoints->size <= pInsertPoints->confirmed)) | |
1233 | { | |
1234 | /* nothing, just clean up */ | |
1235 | pLevState->lastStrongRTL=-1; | |
1236 | /* check if we have a pending conditional segment */ | |
1237 | level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES]; | |
1238 | if ((level & 1) && (pLevState->startON > 0)) { /* after ON */ | |
1239 | start=pLevState->startON; /* reset to basic run level */ | |
b75a7d8f | 1240 | } |
46f4442e | 1241 | if (_prop == DirProp_S) /* add LRM before S */ |
73c04bcf A |
1242 | { |
1243 | addPoint(pBiDi, start0, LRM_BEFORE); | |
1244 | pInsertPoints->confirmed=pInsertPoints->size; | |
b75a7d8f | 1245 | } |
73c04bcf | 1246 | break; |
b75a7d8f | 1247 | } |
73c04bcf A |
1248 | /* reset previous RTL cont to level for LTR text */ |
1249 | for (k=pLevState->lastStrongRTL+1; k<start0; k++) | |
1250 | { | |
1251 | /* reset odd level, leave runLevel+2 as is */ | |
1252 | levels[k]=(levels[k] - 2) & ~1; | |
b75a7d8f | 1253 | } |
73c04bcf A |
1254 | /* mark insert points as confirmed */ |
1255 | pInsertPoints->confirmed=pInsertPoints->size; | |
1256 | pLevState->lastStrongRTL=-1; | |
46f4442e | 1257 | if (_prop == DirProp_S) /* add LRM before S */ |
73c04bcf A |
1258 | { |
1259 | addPoint(pBiDi, start0, LRM_BEFORE); | |
1260 | pInsertPoints->confirmed=pInsertPoints->size; | |
b75a7d8f | 1261 | } |
73c04bcf | 1262 | break; |
b75a7d8f | 1263 | |
73c04bcf A |
1264 | case 4: /* R/AL after possible relevant EN/AN */ |
1265 | /* just clean up */ | |
1266 | pInsertPoints=&(pBiDi->insertPoints); | |
1267 | if (pInsertPoints->capacity > 0) | |
1268 | /* remove all non confirmed insert points */ | |
1269 | pInsertPoints->size=pInsertPoints->confirmed; | |
1270 | pLevState->startON=-1; | |
1271 | pLevState->startL2EN=-1; | |
1272 | pLevState->lastStrongRTL=limit - 1; | |
1273 | break; | |
1274 | ||
1275 | case 5: /* EN/AN after R/AL + possible cont */ | |
1276 | /* check for real AN */ | |
46f4442e | 1277 | if ((_prop == DirProp_AN) && (NO_CONTEXT_RTL(pBiDi->dirProps[start0]) == AN) && |
73c04bcf A |
1278 | (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL)) |
1279 | { | |
1280 | /* real AN */ | |
1281 | if (pLevState->startL2EN == -1) /* if no relevant EN already found */ | |
1282 | { | |
1283 | /* just note the righmost digit as a strong RTL */ | |
1284 | pLevState->lastStrongRTL=limit - 1; | |
1285 | break; | |
b75a7d8f | 1286 | } |
73c04bcf A |
1287 | if (pLevState->startL2EN >= 0) /* after EN, no AN */ |
1288 | { | |
1289 | addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE); | |
1290 | pLevState->startL2EN=-2; | |
1291 | } | |
1292 | /* note AN */ | |
1293 | addPoint(pBiDi, start0, LRM_BEFORE); | |
1294 | break; | |
1295 | } | |
1296 | /* if first EN/AN after R/AL */ | |
1297 | if (pLevState->startL2EN == -1) { | |
1298 | pLevState->startL2EN=start0; | |
b75a7d8f | 1299 | } |
73c04bcf | 1300 | break; |
b75a7d8f | 1301 | |
73c04bcf A |
1302 | case 6: /* note location of latest R/AL */ |
1303 | pLevState->lastStrongRTL=limit - 1; | |
1304 | pLevState->startON=-1; | |
b75a7d8f | 1305 | break; |
73c04bcf A |
1306 | |
1307 | case 7: /* L after R+ON/EN/AN */ | |
1308 | /* include possible adjacent number on the left */ | |
1309 | for (k=start0-1; k>=0 && !(levels[k]&1); k--); | |
1310 | if(k>=0) { | |
1311 | addPoint(pBiDi, k, RLM_BEFORE); /* add RLM before */ | |
1312 | pInsertPoints=&(pBiDi->insertPoints); | |
1313 | pInsertPoints->confirmed=pInsertPoints->size; /* confirm it */ | |
1314 | } | |
1315 | pLevState->startON=start0; | |
b75a7d8f | 1316 | break; |
73c04bcf A |
1317 | |
1318 | case 8: /* AN after L */ | |
1319 | /* AN numbers between L text on both sides may be trouble. */ | |
1320 | /* tentatively bracket with LRMs; will be confirmed if followed by L */ | |
1321 | addPoint(pBiDi, start0, LRM_BEFORE); /* add LRM before */ | |
1322 | addPoint(pBiDi, start0, LRM_AFTER); /* add LRM after */ | |
b75a7d8f | 1323 | break; |
b75a7d8f | 1324 | |
73c04bcf A |
1325 | case 9: /* R after L+ON/EN/AN */ |
1326 | /* false alert, infirm LRMs around previous AN */ | |
1327 | pInsertPoints=&(pBiDi->insertPoints); | |
1328 | pInsertPoints->size=pInsertPoints->confirmed; | |
46f4442e | 1329 | if (_prop == DirProp_S) /* add RLM before S */ |
73c04bcf A |
1330 | { |
1331 | addPoint(pBiDi, start0, RLM_BEFORE); | |
1332 | pInsertPoints->confirmed=pInsertPoints->size; | |
1333 | } | |
1334 | break; | |
b75a7d8f | 1335 | |
73c04bcf A |
1336 | case 10: /* L after L+ON/AN */ |
1337 | level=pLevState->runLevel + addLevel; | |
1338 | for(k=pLevState->startON; k<start0; k++) { | |
1339 | if (levels[k]<level) | |
1340 | levels[k]=level; | |
b75a7d8f | 1341 | } |
73c04bcf A |
1342 | pInsertPoints=&(pBiDi->insertPoints); |
1343 | pInsertPoints->confirmed=pInsertPoints->size; /* confirm inserts */ | |
1344 | pLevState->startON=start0; | |
1345 | break; | |
1346 | ||
1347 | case 11: /* L after L+ON+EN/AN/ON */ | |
1348 | level=pLevState->runLevel; | |
1349 | for(k=start0-1; k>=pLevState->startON; k--) { | |
1350 | if(levels[k]==level+3) { | |
1351 | while(levels[k]==level+3) { | |
1352 | levels[k--]-=2; | |
b75a7d8f | 1353 | } |
73c04bcf A |
1354 | while(levels[k]==level) { |
1355 | k--; | |
b75a7d8f A |
1356 | } |
1357 | } | |
73c04bcf A |
1358 | if(levels[k]==level+2) { |
1359 | levels[k]=level; | |
1360 | continue; | |
b75a7d8f | 1361 | } |
73c04bcf | 1362 | levels[k]=level+1; |
b75a7d8f | 1363 | } |
73c04bcf | 1364 | break; |
b75a7d8f | 1365 | |
73c04bcf A |
1366 | case 12: /* R after L+ON+EN/AN/ON */ |
1367 | level=pLevState->runLevel+1; | |
1368 | for(k=start0-1; k>=pLevState->startON; k--) { | |
1369 | if(levels[k]>level) { | |
1370 | levels[k]-=2; | |
b75a7d8f | 1371 | } |
b75a7d8f | 1372 | } |
73c04bcf | 1373 | break; |
b75a7d8f | 1374 | |
73c04bcf | 1375 | default: /* we should never get here */ |
46f4442e | 1376 | U_ASSERT(FALSE); |
73c04bcf | 1377 | break; |
b75a7d8f A |
1378 | } |
1379 | } | |
73c04bcf A |
1380 | if((addLevel) || (start < start0)) { |
1381 | level=pLevState->runLevel + addLevel; | |
1382 | for(k=start; k<limit; k++) { | |
1383 | levels[k]=level; | |
1384 | } | |
1385 | } | |
1386 | } | |
b75a7d8f | 1387 | |
73c04bcf A |
1388 | static void |
1389 | resolveImplicitLevels(UBiDi *pBiDi, | |
1390 | int32_t start, int32_t limit, | |
1391 | DirProp sor, DirProp eor) { | |
1392 | const DirProp *dirProps=pBiDi->dirProps; | |
b75a7d8f | 1393 | |
73c04bcf A |
1394 | LevState levState; |
1395 | int32_t i, start1, start2; | |
1396 | uint8_t oldStateImp, stateImp, actionImp; | |
1397 | uint8_t gprop, resProp, cell; | |
1398 | UBool inverseRTL; | |
1399 | DirProp nextStrongProp=R; | |
1400 | int32_t nextStrongPos=-1; | |
1401 | ||
1402 | /* check for RTL inverse BiDi mode */ | |
1403 | /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to | |
1404 | * loop on the text characters from end to start. | |
1405 | * This would need a different properties state table (at least different | |
1406 | * actions) and different levels state tables (maybe very similar to the | |
1407 | * LTR corresponding ones. | |
1408 | */ | |
46f4442e A |
1409 | inverseRTL=(UBool) |
1410 | ((start<pBiDi->lastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) && | |
1411 | (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT || | |
1412 | pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL)); | |
73c04bcf A |
1413 | /* initialize for levels state table */ |
1414 | levState.startL2EN=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */ | |
1415 | levState.lastStrongRTL=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */ | |
1416 | levState.state=0; | |
1417 | levState.runLevel=pBiDi->levels[start]; | |
46f4442e A |
1418 | levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1]; |
1419 | levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1]; | |
73c04bcf A |
1420 | processPropertySeq(pBiDi, &levState, sor, start, start); |
1421 | /* initialize for property state table */ | |
1422 | if(dirProps[start]==NSM) { | |
1423 | stateImp = 1 + sor; | |
1424 | } else { | |
1425 | stateImp=0; | |
1426 | } | |
1427 | start1=start; | |
1428 | start2=start; | |
1429 | ||
1430 | for(i=start; i<=limit; i++) { | |
1431 | if(i>=limit) { | |
1432 | gprop=eor; | |
b75a7d8f | 1433 | } else { |
73c04bcf A |
1434 | DirProp prop, prop1; |
1435 | prop=NO_CONTEXT_RTL(dirProps[i]); | |
1436 | if(inverseRTL) { | |
1437 | if(prop==AL) { | |
1438 | /* AL before EN does not make it AN */ | |
1439 | prop=R; | |
1440 | } else if(prop==EN) { | |
1441 | if(nextStrongPos<=i) { | |
1442 | /* look for next strong char (L/R/AL) */ | |
1443 | int32_t j; | |
1444 | nextStrongProp=R; /* set default */ | |
1445 | nextStrongPos=limit; | |
1446 | for(j=i+1; j<limit; j++) { | |
1447 | prop1=NO_CONTEXT_RTL(dirProps[j]); | |
1448 | if(prop1==L || prop1==R || prop1==AL) { | |
1449 | nextStrongProp=prop1; | |
1450 | nextStrongPos=j; | |
1451 | break; | |
1452 | } | |
1453 | } | |
1454 | } | |
1455 | if(nextStrongProp==AL) { | |
1456 | prop=AN; | |
1457 | } | |
b75a7d8f A |
1458 | } |
1459 | } | |
73c04bcf | 1460 | gprop=groupProp[prop]; |
b75a7d8f | 1461 | } |
73c04bcf A |
1462 | oldStateImp=stateImp; |
1463 | cell=impTabProps[oldStateImp][gprop]; | |
1464 | stateImp=GET_STATEPROPS(cell); /* isolate the new state */ | |
1465 | actionImp=GET_ACTIONPROPS(cell); /* isolate the action */ | |
1466 | if((i==limit) && (actionImp==0)) { | |
1467 | /* there is an unprocessed sequence if its property == eor */ | |
1468 | actionImp=1; /* process the last sequence */ | |
1469 | } | |
1470 | if(actionImp) { | |
1471 | resProp=impTabProps[oldStateImp][IMPTABPROPS_RES]; | |
1472 | switch(actionImp) { | |
1473 | case 1: /* process current seq1, init new seq1 */ | |
1474 | processPropertySeq(pBiDi, &levState, resProp, start1, i); | |
1475 | start1=i; | |
1476 | break; | |
1477 | case 2: /* init new seq2 */ | |
1478 | start2=i; | |
1479 | break; | |
1480 | case 3: /* process seq1, process seq2, init new seq1 */ | |
1481 | processPropertySeq(pBiDi, &levState, resProp, start1, start2); | |
46f4442e | 1482 | processPropertySeq(pBiDi, &levState, DirProp_ON, start2, i); |
73c04bcf A |
1483 | start1=i; |
1484 | break; | |
1485 | case 4: /* process seq1, set seq1=seq2, init new seq2 */ | |
1486 | processPropertySeq(pBiDi, &levState, resProp, start1, start2); | |
1487 | start1=start2; | |
1488 | start2=i; | |
1489 | break; | |
1490 | default: /* we should never get here */ | |
46f4442e | 1491 | U_ASSERT(FALSE); |
73c04bcf A |
1492 | break; |
1493 | } | |
b75a7d8f A |
1494 | } |
1495 | } | |
73c04bcf A |
1496 | /* flush possible pending sequence, e.g. ON */ |
1497 | processPropertySeq(pBiDi, &levState, eor, limit, limit); | |
b75a7d8f A |
1498 | } |
1499 | ||
1500 | /* perform (L1) and (X9) ---------------------------------------------------- */ | |
1501 | ||
1502 | /* | |
1503 | * Reset the embedding levels for some non-graphic characters (L1). | |
1504 | * This function also sets appropriate levels for BN, and | |
1505 | * explicit embedding types that are supposed to have been removed | |
1506 | * from the paragraph in (X9). | |
1507 | */ | |
1508 | static void | |
1509 | adjustWSLevels(UBiDi *pBiDi) { | |
1510 | const DirProp *dirProps=pBiDi->dirProps; | |
1511 | UBiDiLevel *levels=pBiDi->levels; | |
1512 | int32_t i; | |
1513 | ||
1514 | if(pBiDi->flags&MASK_WS) { | |
73c04bcf | 1515 | UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR; |
b75a7d8f A |
1516 | Flags flag; |
1517 | ||
1518 | i=pBiDi->trailingWSStart; | |
1519 | while(i>0) { | |
1520 | /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */ | |
73c04bcf A |
1521 | while(i>0 && (flag=DIRPROP_FLAG_NC(dirProps[--i]))&MASK_WS) { |
1522 | if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) { | |
1523 | levels[i]=0; | |
1524 | } else { | |
1525 | levels[i]=GET_PARALEVEL(pBiDi, i); | |
1526 | } | |
b75a7d8f A |
1527 | } |
1528 | ||
1529 | /* reset BN to the next character's paraLevel until B/S, which restarts above loop */ | |
1530 | /* here, i+1 is guaranteed to be <length */ | |
1531 | while(i>0) { | |
73c04bcf | 1532 | flag=DIRPROP_FLAG_NC(dirProps[--i]); |
b75a7d8f A |
1533 | if(flag&MASK_BN_EXPLICIT) { |
1534 | levels[i]=levels[i+1]; | |
73c04bcf A |
1535 | } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) { |
1536 | levels[i]=0; | |
1537 | break; | |
b75a7d8f | 1538 | } else if(flag&MASK_B_S) { |
73c04bcf | 1539 | levels[i]=GET_PARALEVEL(pBiDi, i); |
b75a7d8f A |
1540 | break; |
1541 | } | |
1542 | } | |
1543 | } | |
1544 | } | |
1545 | } | |
1546 | ||
73c04bcf A |
1547 | #define BIDI_MIN(x, y) ((x)<(y) ? (x) : (y)) |
1548 | #define BIDI_ABS(x) ((x)>=0 ? (x) : (-(x))) | |
1549 | static void | |
1550 | setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length, | |
1551 | UBiDiLevel paraLevel, UErrorCode *pErrorCode) { | |
1552 | void *runsOnlyMemory; | |
1553 | int32_t *visualMap; | |
1554 | UChar *visualText; | |
46f4442e | 1555 | int32_t saveLength, saveTrailingWSStart; |
73c04bcf A |
1556 | const UBiDiLevel *levels; |
1557 | UBiDiLevel *saveLevels; | |
46f4442e A |
1558 | UBiDiDirection saveDirection; |
1559 | UBool saveMayAllocateText; | |
73c04bcf A |
1560 | Run *runs; |
1561 | int32_t visualLength, i, j, visualStart, logicalStart, | |
1562 | runCount, runLength, addedRuns, insertRemove, | |
1563 | start, limit, step, indexOddBit, logicalPos, | |
1564 | index, index1; | |
1565 | uint32_t saveOptions; | |
1566 | ||
1567 | pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT; | |
1568 | if(length==0) { | |
1569 | ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode); | |
1570 | goto cleanup3; | |
1571 | } | |
1572 | /* obtain memory for mapping table and visual text */ | |
1573 | runsOnlyMemory=uprv_malloc(length*(sizeof(int32_t)+sizeof(UChar)+sizeof(UBiDiLevel))); | |
1574 | if(runsOnlyMemory==NULL) { | |
1575 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | |
1576 | goto cleanup3; | |
1577 | } | |
1578 | visualMap=runsOnlyMemory; | |
1579 | visualText=(UChar *)&visualMap[length]; | |
1580 | saveLevels=(UBiDiLevel *)&visualText[length]; | |
1581 | saveOptions=pBiDi->reorderingOptions; | |
1582 | if(saveOptions & UBIDI_OPTION_INSERT_MARKS) { | |
1583 | pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS; | |
1584 | pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS; | |
1585 | } | |
46f4442e | 1586 | paraLevel&=1; /* accept only 0 or 1 */ |
73c04bcf | 1587 | ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode); |
46f4442e A |
1588 | if(U_FAILURE(*pErrorCode)) { |
1589 | goto cleanup3; | |
1590 | } | |
1591 | /* we cannot access directly pBiDi->levels since it is not yet set if | |
1592 | * direction is not MIXED | |
1593 | */ | |
73c04bcf | 1594 | levels=ubidi_getLevels(pBiDi, pErrorCode); |
46f4442e A |
1595 | uprv_memcpy(saveLevels, levels, pBiDi->length*sizeof(UBiDiLevel)); |
1596 | saveTrailingWSStart=pBiDi->trailingWSStart; | |
1597 | saveLength=pBiDi->length; | |
1598 | saveDirection=pBiDi->direction; | |
73c04bcf A |
1599 | |
1600 | /* FOOD FOR THOUGHT: instead of writing the visual text, we could use | |
1601 | * the visual map and the dirProps array to drive the second call | |
1602 | * to ubidi_setPara (but must make provision for possible removal of | |
1603 | * BiDi controls. Alternatively, only use the dirProps array via | |
1604 | * customized classifier callback. | |
1605 | */ | |
1606 | visualLength=ubidi_writeReordered(pBiDi, visualText, length, | |
1607 | UBIDI_DO_MIRRORING, pErrorCode); | |
73c04bcf A |
1608 | ubidi_getVisualMap(pBiDi, visualMap, pErrorCode); |
1609 | if(U_FAILURE(*pErrorCode)) { | |
1610 | goto cleanup2; | |
1611 | } | |
46f4442e | 1612 | pBiDi->reorderingOptions=saveOptions; |
73c04bcf A |
1613 | |
1614 | pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT; | |
46f4442e A |
1615 | paraLevel^=1; |
1616 | /* Because what we did with reorderingOptions, visualText may be shorter | |
1617 | * than the original text. But we don't want the levels memory to be | |
1618 | * reallocated shorter than the original length, since we need to restore | |
1619 | * the levels as after the first call to ubidi_setpara() before returning. | |
1620 | * We will force mayAllocateText to FALSE before the second call to | |
1621 | * ubidi_setpara(), and will restore it afterwards. | |
1622 | */ | |
1623 | saveMayAllocateText=pBiDi->mayAllocateText; | |
1624 | pBiDi->mayAllocateText=FALSE; | |
73c04bcf | 1625 | ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode); |
46f4442e A |
1626 | pBiDi->mayAllocateText=saveMayAllocateText; |
1627 | ubidi_getRuns(pBiDi, pErrorCode); | |
73c04bcf A |
1628 | if(U_FAILURE(*pErrorCode)) { |
1629 | goto cleanup1; | |
1630 | } | |
73c04bcf A |
1631 | /* check if some runs must be split, count how many splits */ |
1632 | addedRuns=0; | |
1633 | runCount=pBiDi->runCount; | |
1634 | runs=pBiDi->runs; | |
1635 | visualStart=0; | |
1636 | for(i=0; i<runCount; i++, visualStart+=runLength) { | |
1637 | runLength=runs[i].visualLimit-visualStart; | |
1638 | if(runLength<2) { | |
1639 | continue; | |
1640 | } | |
1641 | logicalStart=GET_INDEX(runs[i].logicalStart); | |
1642 | for(j=logicalStart+1; j<logicalStart+runLength; j++) { | |
1643 | index=visualMap[j]; | |
1644 | index1=visualMap[j-1]; | |
1645 | if((BIDI_ABS(index-index1)!=1) || (saveLevels[index]!=saveLevels[index1])) { | |
1646 | addedRuns++; | |
1647 | } | |
1648 | } | |
1649 | } | |
1650 | if(addedRuns) { | |
1651 | if(getRunsMemory(pBiDi, runCount+addedRuns)) { | |
1652 | if(runCount==1) { | |
1653 | /* because we switch from UBiDi.simpleRuns to UBiDi.runs */ | |
1654 | pBiDi->runsMemory[0]=runs[0]; | |
1655 | } | |
1656 | runs=pBiDi->runs=pBiDi->runsMemory; | |
1657 | pBiDi->runCount+=addedRuns; | |
1658 | } else { | |
1659 | goto cleanup1; | |
1660 | } | |
1661 | } | |
1662 | /* split runs which are not consecutive in source text */ | |
1663 | for(i=runCount-1; i>=0; i--) { | |
1664 | runLength= i==0 ? runs[0].visualLimit : | |
1665 | runs[i].visualLimit-runs[i-1].visualLimit; | |
1666 | logicalStart=runs[i].logicalStart; | |
1667 | indexOddBit=GET_ODD_BIT(logicalStart); | |
1668 | logicalStart=GET_INDEX(logicalStart); | |
1669 | if(runLength<2) { | |
1670 | if(addedRuns) { | |
1671 | runs[i+addedRuns]=runs[i]; | |
1672 | } | |
1673 | logicalPos=visualMap[logicalStart]; | |
1674 | runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, | |
1675 | saveLevels[logicalPos]^indexOddBit); | |
1676 | continue; | |
1677 | } | |
1678 | if(indexOddBit) { | |
1679 | start=logicalStart; | |
1680 | limit=logicalStart+runLength-1; | |
1681 | step=1; | |
1682 | } else { | |
1683 | start=logicalStart+runLength-1; | |
1684 | limit=logicalStart; | |
1685 | step=-1; | |
1686 | } | |
1687 | for(j=start; j!=limit; j+=step) { | |
1688 | index=visualMap[j]; | |
1689 | index1=visualMap[j+step]; | |
1690 | if((BIDI_ABS(index-index1)!=1) || (saveLevels[index]!=saveLevels[index1])) { | |
1691 | logicalPos=BIDI_MIN(visualMap[start], index); | |
1692 | runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, | |
1693 | saveLevels[logicalPos]^indexOddBit); | |
1694 | runs[i+addedRuns].visualLimit=runs[i].visualLimit; | |
1695 | runs[i].visualLimit-=BIDI_ABS(j-start)+1; | |
1696 | insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER); | |
1697 | runs[i+addedRuns].insertRemove=insertRemove; | |
1698 | runs[i].insertRemove&=~insertRemove; | |
1699 | start=j+step; | |
1700 | addedRuns--; | |
1701 | } | |
1702 | } | |
1703 | if(addedRuns) { | |
1704 | runs[i+addedRuns]=runs[i]; | |
1705 | } | |
1706 | logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]); | |
1707 | runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, | |
1708 | saveLevels[logicalPos]^indexOddBit); | |
1709 | } | |
1710 | ||
1711 | cleanup1: | |
1712 | /* restore initial paraLevel */ | |
1713 | pBiDi->paraLevel^=1; | |
1714 | cleanup2: | |
1715 | /* restore real text */ | |
1716 | pBiDi->text=text; | |
46f4442e A |
1717 | pBiDi->length=saveLength; |
1718 | pBiDi->originalLength=length; | |
1719 | pBiDi->direction=saveDirection; | |
1720 | /* the saved levels should never excess levelsSize, but we check anyway */ | |
1721 | if(saveLength>pBiDi->levelsSize) { | |
1722 | saveLength=pBiDi->levelsSize; | |
1723 | } | |
1724 | uprv_memcpy(pBiDi->levels, saveLevels, saveLength*sizeof(UBiDiLevel)); | |
1725 | pBiDi->trailingWSStart=saveTrailingWSStart; | |
73c04bcf A |
1726 | /* free memory for mapping table and visual text */ |
1727 | uprv_free(runsOnlyMemory); | |
46f4442e A |
1728 | if(pBiDi->runCount>1) { |
1729 | pBiDi->direction=UBIDI_MIXED; | |
1730 | } | |
73c04bcf A |
1731 | cleanup3: |
1732 | pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY; | |
1733 | } | |
1734 | ||
374ca955 A |
1735 | /* ubidi_setPara ------------------------------------------------------------ */ |
1736 | ||
1737 | U_CAPI void U_EXPORT2 | |
1738 | ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, | |
1739 | UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels, | |
1740 | UErrorCode *pErrorCode) { | |
1741 | UBiDiDirection direction; | |
1742 | ||
1743 | /* check the argument values */ | |
46f4442e A |
1744 | RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); |
1745 | if(pBiDi==NULL || text==NULL || length<-1 || | |
1746 | (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevel<UBIDI_DEFAULT_LTR)) { | |
374ca955 A |
1747 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
1748 | return; | |
1749 | } | |
1750 | ||
1751 | if(length==-1) { | |
1752 | length=u_strlen(text); | |
1753 | } | |
1754 | ||
73c04bcf A |
1755 | /* special treatment for RUNS_ONLY mode */ |
1756 | if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) { | |
1757 | setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode); | |
1758 | return; | |
1759 | } | |
1760 | ||
374ca955 | 1761 | /* initialize the UBiDi structure */ |
73c04bcf | 1762 | pBiDi->pParaBiDi=NULL; /* mark unfinished setPara */ |
374ca955 | 1763 | pBiDi->text=text; |
73c04bcf | 1764 | pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length; |
374ca955 A |
1765 | pBiDi->paraLevel=paraLevel; |
1766 | pBiDi->direction=UBIDI_LTR; | |
73c04bcf | 1767 | pBiDi->paraCount=1; |
374ca955 A |
1768 | |
1769 | pBiDi->dirProps=NULL; | |
1770 | pBiDi->levels=NULL; | |
1771 | pBiDi->runs=NULL; | |
73c04bcf A |
1772 | pBiDi->insertPoints.size=0; /* clean up from last call */ |
1773 | pBiDi->insertPoints.confirmed=0; /* clean up from last call */ | |
1774 | ||
1775 | /* | |
1776 | * Save the original paraLevel if contextual; otherwise, set to 0. | |
1777 | */ | |
1778 | if(IS_DEFAULT_LEVEL(paraLevel)) { | |
1779 | pBiDi->defaultParaLevel=paraLevel; | |
1780 | } else { | |
1781 | pBiDi->defaultParaLevel=0; | |
1782 | } | |
374ca955 A |
1783 | |
1784 | if(length==0) { | |
1785 | /* | |
1786 | * For an empty paragraph, create a UBiDi object with the paraLevel and | |
1787 | * the flags and the direction set but without allocating zero-length arrays. | |
1788 | * There is nothing more to do. | |
1789 | */ | |
1790 | if(IS_DEFAULT_LEVEL(paraLevel)) { | |
1791 | pBiDi->paraLevel&=1; | |
73c04bcf | 1792 | pBiDi->defaultParaLevel=0; |
374ca955 A |
1793 | } |
1794 | if(paraLevel&1) { | |
1795 | pBiDi->flags=DIRPROP_FLAG(R); | |
1796 | pBiDi->direction=UBIDI_RTL; | |
1797 | } else { | |
1798 | pBiDi->flags=DIRPROP_FLAG(L); | |
1799 | pBiDi->direction=UBIDI_LTR; | |
1800 | } | |
1801 | ||
1802 | pBiDi->runCount=0; | |
46f4442e | 1803 | pBiDi->paraCount=0; |
73c04bcf | 1804 | pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */ |
374ca955 A |
1805 | return; |
1806 | } | |
1807 | ||
1808 | pBiDi->runCount=-1; | |
1809 | ||
1810 | /* | |
1811 | * Get the directional properties, | |
1812 | * the flags bit-set, and | |
73c04bcf | 1813 | * determine the paragraph level if necessary. |
374ca955 A |
1814 | */ |
1815 | if(getDirPropsMemory(pBiDi, length)) { | |
1816 | pBiDi->dirProps=pBiDi->dirPropsMemory; | |
73c04bcf | 1817 | getDirProps(pBiDi); |
374ca955 A |
1818 | } else { |
1819 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | |
1820 | return; | |
1821 | } | |
73c04bcf A |
1822 | /* the processed length may have changed if UBIDI_OPTION_STREAMING */ |
1823 | length= pBiDi->length; | |
1824 | pBiDi->trailingWSStart=length; /* the levels[] will reflect the WS run */ | |
1825 | /* allocate paras memory */ | |
1826 | if(pBiDi->paraCount>1) { | |
1827 | if(getInitialParasMemory(pBiDi, pBiDi->paraCount)) { | |
1828 | pBiDi->paras=pBiDi->parasMemory; | |
1829 | pBiDi->paras[pBiDi->paraCount-1]=length; | |
1830 | } else { | |
1831 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | |
1832 | return; | |
1833 | } | |
1834 | } else { | |
1835 | /* initialize paras for single paragraph */ | |
1836 | pBiDi->paras=pBiDi->simpleParas; | |
1837 | pBiDi->simpleParas[0]=length; | |
1838 | } | |
374ca955 A |
1839 | |
1840 | /* are explicit levels specified? */ | |
1841 | if(embeddingLevels==NULL) { | |
1842 | /* no: determine explicit levels according to the (Xn) rules */\ | |
1843 | if(getLevelsMemory(pBiDi, length)) { | |
1844 | pBiDi->levels=pBiDi->levelsMemory; | |
1845 | direction=resolveExplicitLevels(pBiDi); | |
1846 | } else { | |
1847 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; | |
1848 | return; | |
1849 | } | |
1850 | } else { | |
73c04bcf | 1851 | /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */ |
374ca955 A |
1852 | pBiDi->levels=embeddingLevels; |
1853 | direction=checkExplicitLevels(pBiDi, pErrorCode); | |
1854 | if(U_FAILURE(*pErrorCode)) { | |
1855 | return; | |
1856 | } | |
1857 | } | |
1858 | ||
1859 | /* | |
1860 | * The steps after (X9) in the UBiDi algorithm are performed only if | |
1861 | * the paragraph text has mixed directionality! | |
1862 | */ | |
1863 | pBiDi->direction=direction; | |
1864 | switch(direction) { | |
1865 | case UBIDI_LTR: | |
1866 | /* make sure paraLevel is even */ | |
1867 | pBiDi->paraLevel=(UBiDiLevel)((pBiDi->paraLevel+1)&~1); | |
1868 | ||
1869 | /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ | |
1870 | pBiDi->trailingWSStart=0; | |
1871 | break; | |
1872 | case UBIDI_RTL: | |
1873 | /* make sure paraLevel is odd */ | |
1874 | pBiDi->paraLevel|=1; | |
1875 | ||
1876 | /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ | |
1877 | pBiDi->trailingWSStart=0; | |
1878 | break; | |
1879 | default: | |
73c04bcf A |
1880 | /* |
1881 | * Choose the right implicit state table | |
1882 | */ | |
1883 | switch(pBiDi->reorderingMode) { | |
1884 | case UBIDI_REORDER_DEFAULT: | |
1885 | pBiDi->pImpTabPair=&impTab_DEFAULT; | |
1886 | break; | |
1887 | case UBIDI_REORDER_NUMBERS_SPECIAL: | |
1888 | pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL; | |
1889 | break; | |
1890 | case UBIDI_REORDER_GROUP_NUMBERS_WITH_R: | |
1891 | pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R; | |
1892 | break; | |
73c04bcf A |
1893 | case UBIDI_REORDER_INVERSE_NUMBERS_AS_L: |
1894 | pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L; | |
1895 | break; | |
1896 | case UBIDI_REORDER_INVERSE_LIKE_DIRECT: | |
1897 | if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) { | |
1898 | pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS; | |
1899 | } else { | |
1900 | pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT; | |
1901 | } | |
1902 | break; | |
1903 | case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL: | |
1904 | if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) { | |
1905 | pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS; | |
1906 | } else { | |
1907 | pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL; | |
1908 | } | |
1909 | break; | |
1910 | default: | |
46f4442e A |
1911 | /* we should never get here */ |
1912 | U_ASSERT(FALSE); | |
73c04bcf A |
1913 | break; |
1914 | } | |
374ca955 A |
1915 | /* |
1916 | * If there are no external levels specified and there | |
1917 | * are no significant explicit level codes in the text, | |
1918 | * then we can treat the entire paragraph as one run. | |
1919 | * Otherwise, we need to perform the following rules on runs of | |
1920 | * the text with the same embedding levels. (X10) | |
1921 | * "Significant" explicit level codes are ones that actually | |
1922 | * affect non-BN characters. | |
1923 | * Examples for "insignificant" ones are empty embeddings | |
1924 | * LRE-PDF, LRE-RLE-PDF-PDF, etc. | |
1925 | */ | |
46f4442e A |
1926 | if(embeddingLevels==NULL && pBiDi->paraCount<=1 && |
1927 | !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) { | |
374ca955 | 1928 | resolveImplicitLevels(pBiDi, 0, length, |
73c04bcf A |
1929 | GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)), |
1930 | GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1))); | |
374ca955 A |
1931 | } else { |
1932 | /* sor, eor: start and end types of same-level-run */ | |
1933 | UBiDiLevel *levels=pBiDi->levels; | |
1934 | int32_t start, limit=0; | |
1935 | UBiDiLevel level, nextLevel; | |
1936 | DirProp sor, eor; | |
1937 | ||
1938 | /* determine the first sor and set eor to it because of the loop body (sor=eor there) */ | |
73c04bcf | 1939 | level=GET_PARALEVEL(pBiDi, 0); |
374ca955 A |
1940 | nextLevel=levels[0]; |
1941 | if(level<nextLevel) { | |
1942 | eor=GET_LR_FROM_LEVEL(nextLevel); | |
1943 | } else { | |
1944 | eor=GET_LR_FROM_LEVEL(level); | |
1945 | } | |
1946 | ||
1947 | do { | |
1948 | /* determine start and limit of the run (end points just behind the run) */ | |
1949 | ||
1950 | /* the values for this run's start are the same as for the previous run's end */ | |
374ca955 A |
1951 | start=limit; |
1952 | level=nextLevel; | |
73c04bcf A |
1953 | if((start>0) && (NO_CONTEXT_RTL(pBiDi->dirProps[start-1])==B)) { |
1954 | /* except if this is a new paragraph, then set sor = para level */ | |
1955 | sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start)); | |
1956 | } else { | |
1957 | sor=eor; | |
1958 | } | |
374ca955 A |
1959 | |
1960 | /* search for the limit of this run */ | |
1961 | while(++limit<length && levels[limit]==level) {} | |
1962 | ||
1963 | /* get the correct level of the next run */ | |
1964 | if(limit<length) { | |
1965 | nextLevel=levels[limit]; | |
1966 | } else { | |
73c04bcf | 1967 | nextLevel=GET_PARALEVEL(pBiDi, length-1); |
374ca955 A |
1968 | } |
1969 | ||
1970 | /* determine eor from max(level, nextLevel); sor is last run's eor */ | |
1971 | if((level&~UBIDI_LEVEL_OVERRIDE)<(nextLevel&~UBIDI_LEVEL_OVERRIDE)) { | |
1972 | eor=GET_LR_FROM_LEVEL(nextLevel); | |
1973 | } else { | |
1974 | eor=GET_LR_FROM_LEVEL(level); | |
1975 | } | |
1976 | ||
1977 | /* if the run consists of overridden directional types, then there | |
1978 | are no implicit types to be resolved */ | |
1979 | if(!(level&UBIDI_LEVEL_OVERRIDE)) { | |
1980 | resolveImplicitLevels(pBiDi, start, limit, sor, eor); | |
1981 | } else { | |
1982 | /* remove the UBIDI_LEVEL_OVERRIDE flags */ | |
1983 | do { | |
1984 | levels[start++]&=~UBIDI_LEVEL_OVERRIDE; | |
1985 | } while(start<limit); | |
1986 | } | |
1987 | } while(limit<length); | |
1988 | } | |
73c04bcf A |
1989 | /* check if we got any memory shortage while adding insert points */ |
1990 | if (U_FAILURE(pBiDi->insertPoints.errorCode)) | |
1991 | { | |
1992 | *pErrorCode=pBiDi->insertPoints.errorCode; | |
1993 | return; | |
1994 | } | |
374ca955 A |
1995 | /* reset the embedding levels for some non-graphic characters (L1), (X9) */ |
1996 | adjustWSLevels(pBiDi); | |
374ca955 A |
1997 | break; |
1998 | } | |
46f4442e A |
1999 | /* add RLM for inverse Bidi with contextual orientation resolving |
2000 | * to RTL which would not round-trip otherwise | |
2001 | */ | |
2002 | if((pBiDi->defaultParaLevel>0) && | |
2003 | (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) && | |
2004 | ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) || | |
2005 | (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) { | |
2006 | int32_t i, j, start, last; | |
2007 | DirProp dirProp; | |
2008 | for(i=0; i<pBiDi->paraCount; i++) { | |
2009 | last=pBiDi->paras[i]-1; | |
2010 | if((pBiDi->dirProps[last] & CONTEXT_RTL)==0) { | |
2011 | continue; /* LTR paragraph */ | |
2012 | } | |
2013 | start= i==0 ? 0 : pBiDi->paras[i - 1]; | |
2014 | for(j=last; j>=start; j--) { | |
2015 | dirProp=NO_CONTEXT_RTL(pBiDi->dirProps[j]); | |
2016 | if(dirProp==L) { | |
2017 | if(j<last) { | |
2018 | while(NO_CONTEXT_RTL(pBiDi->dirProps[last])==B) { | |
2019 | last--; | |
2020 | } | |
2021 | } | |
2022 | addPoint(pBiDi, last, RLM_BEFORE); | |
2023 | break; | |
2024 | } | |
2025 | if(DIRPROP_FLAG(dirProp) & MASK_R_AL) { | |
2026 | break; | |
2027 | } | |
2028 | } | |
2029 | } | |
2030 | } | |
2031 | ||
73c04bcf A |
2032 | if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) { |
2033 | pBiDi->resultLength -= pBiDi->controlCount; | |
2034 | } else { | |
2035 | pBiDi->resultLength += pBiDi->insertPoints.size; | |
2036 | } | |
2037 | pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */ | |
2038 | } | |
2039 | ||
2040 | U_CAPI void U_EXPORT2 | |
2041 | ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) { | |
2042 | if(pBiDi!=NULL) { | |
2043 | pBiDi->orderParagraphsLTR=orderParagraphsLTR; | |
2044 | } | |
2045 | } | |
2046 | ||
2047 | U_CAPI UBool U_EXPORT2 | |
2048 | ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) { | |
2049 | if(pBiDi!=NULL) { | |
2050 | return pBiDi->orderParagraphsLTR; | |
2051 | } else { | |
2052 | return FALSE; | |
2053 | } | |
374ca955 | 2054 | } |
b75a7d8f A |
2055 | |
2056 | U_CAPI UBiDiDirection U_EXPORT2 | |
2057 | ubidi_getDirection(const UBiDi *pBiDi) { | |
73c04bcf | 2058 | if(IS_VALID_PARA_OR_LINE(pBiDi)) { |
b75a7d8f A |
2059 | return pBiDi->direction; |
2060 | } else { | |
2061 | return UBIDI_LTR; | |
2062 | } | |
2063 | } | |
2064 | ||
2065 | U_CAPI const UChar * U_EXPORT2 | |
2066 | ubidi_getText(const UBiDi *pBiDi) { | |
73c04bcf | 2067 | if(IS_VALID_PARA_OR_LINE(pBiDi)) { |
b75a7d8f A |
2068 | return pBiDi->text; |
2069 | } else { | |
2070 | return NULL; | |
2071 | } | |
2072 | } | |
2073 | ||
2074 | U_CAPI int32_t U_EXPORT2 | |
2075 | ubidi_getLength(const UBiDi *pBiDi) { | |
73c04bcf A |
2076 | if(IS_VALID_PARA_OR_LINE(pBiDi)) { |
2077 | return pBiDi->originalLength; | |
2078 | } else { | |
2079 | return 0; | |
2080 | } | |
2081 | } | |
2082 | ||
2083 | U_CAPI int32_t U_EXPORT2 | |
2084 | ubidi_getProcessedLength(const UBiDi *pBiDi) { | |
2085 | if(IS_VALID_PARA_OR_LINE(pBiDi)) { | |
b75a7d8f A |
2086 | return pBiDi->length; |
2087 | } else { | |
2088 | return 0; | |
2089 | } | |
2090 | } | |
2091 | ||
73c04bcf A |
2092 | U_CAPI int32_t U_EXPORT2 |
2093 | ubidi_getResultLength(const UBiDi *pBiDi) { | |
2094 | if(IS_VALID_PARA_OR_LINE(pBiDi)) { | |
2095 | return pBiDi->resultLength; | |
2096 | } else { | |
2097 | return 0; | |
2098 | } | |
2099 | } | |
2100 | ||
2101 | /* paragraphs API functions ------------------------------------------------- */ | |
2102 | ||
b75a7d8f A |
2103 | U_CAPI UBiDiLevel U_EXPORT2 |
2104 | ubidi_getParaLevel(const UBiDi *pBiDi) { | |
73c04bcf | 2105 | if(IS_VALID_PARA_OR_LINE(pBiDi)) { |
b75a7d8f A |
2106 | return pBiDi->paraLevel; |
2107 | } else { | |
2108 | return 0; | |
2109 | } | |
2110 | } | |
2111 | ||
73c04bcf A |
2112 | U_CAPI int32_t U_EXPORT2 |
2113 | ubidi_countParagraphs(UBiDi *pBiDi) { | |
2114 | if(!IS_VALID_PARA_OR_LINE(pBiDi)) { | |
2115 | return 0; | |
2116 | } else { | |
2117 | return pBiDi->paraCount; | |
2118 | } | |
2119 | } | |
b75a7d8f | 2120 | |
73c04bcf A |
2121 | U_CAPI void U_EXPORT2 |
2122 | ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex, | |
2123 | int32_t *pParaStart, int32_t *pParaLimit, | |
2124 | UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) { | |
2125 | int32_t paraStart; | |
b75a7d8f | 2126 | |
73c04bcf | 2127 | /* check the argument values */ |
46f4442e A |
2128 | RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); |
2129 | RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode); | |
2130 | RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode); | |
2131 | ||
73c04bcf A |
2132 | pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */ |
2133 | if(paraIndex) { | |
2134 | paraStart=pBiDi->paras[paraIndex-1]; | |
2135 | } else { | |
2136 | paraStart=0; | |
2137 | } | |
2138 | if(pParaStart!=NULL) { | |
2139 | *pParaStart=paraStart; | |
2140 | } | |
2141 | if(pParaLimit!=NULL) { | |
2142 | *pParaLimit=pBiDi->paras[paraIndex]; | |
2143 | } | |
2144 | if(pParaLevel!=NULL) { | |
2145 | *pParaLevel=GET_PARALEVEL(pBiDi, paraStart); | |
2146 | } | |
73c04bcf | 2147 | } |
b75a7d8f | 2148 | |
73c04bcf A |
2149 | U_CAPI int32_t U_EXPORT2 |
2150 | ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex, | |
2151 | int32_t *pParaStart, int32_t *pParaLimit, | |
2152 | UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) { | |
2153 | uint32_t paraIndex; | |
b75a7d8f | 2154 | |
73c04bcf A |
2155 | /* check the argument values */ |
2156 | /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */ | |
46f4442e A |
2157 | RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1); |
2158 | RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1); | |
73c04bcf | 2159 | pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */ |
46f4442e A |
2160 | RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1); |
2161 | ||
73c04bcf A |
2162 | for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex]; paraIndex++); |
2163 | ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode); | |
2164 | return paraIndex; | |
2165 | } | |
b75a7d8f | 2166 | |
73c04bcf A |
2167 | U_CAPI void U_EXPORT2 |
2168 | ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn, | |
2169 | const void *newContext, UBiDiClassCallback **oldFn, | |
2170 | const void **oldContext, UErrorCode *pErrorCode) | |
2171 | { | |
46f4442e A |
2172 | RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); |
2173 | if(pBiDi==NULL) { | |
73c04bcf A |
2174 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
2175 | return; | |
2176 | } | |
2177 | if( oldFn ) | |
2178 | { | |
2179 | *oldFn = pBiDi->fnClassCallback; | |
2180 | } | |
2181 | if( oldContext ) | |
2182 | { | |
2183 | *oldContext = pBiDi->coClassCallback; | |
2184 | } | |
2185 | pBiDi->fnClassCallback = newFn; | |
2186 | pBiDi->coClassCallback = newContext; | |
2187 | } | |
b75a7d8f | 2188 | |
73c04bcf A |
2189 | U_CAPI void U_EXPORT2 |
2190 | ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context) | |
2191 | { | |
46f4442e A |
2192 | if(pBiDi==NULL) { |
2193 | return; | |
2194 | } | |
73c04bcf A |
2195 | if( fn ) |
2196 | { | |
2197 | *fn = pBiDi->fnClassCallback; | |
2198 | } | |
2199 | if( context ) | |
2200 | { | |
2201 | *context = pBiDi->coClassCallback; | |
2202 | } | |
2203 | } | |
b75a7d8f | 2204 | |
73c04bcf A |
2205 | U_CAPI UCharDirection U_EXPORT2 |
2206 | ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c) | |
2207 | { | |
2208 | UCharDirection dir; | |
b75a7d8f | 2209 | |
73c04bcf A |
2210 | if( pBiDi->fnClassCallback == NULL || |
2211 | (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT ) | |
2212 | { | |
2213 | return ubidi_getClass(pBiDi->bdp, c); | |
2214 | } else { | |
2215 | return dir; | |
b75a7d8f | 2216 | } |
b75a7d8f | 2217 | } |
73c04bcf | 2218 |