]>
Commit | Line | Data |
---|---|---|
73c04bcf | 1 | /* |
b75a7d8f A |
2 | ****************************************************************************** |
3 | * | |
46f4442e | 4 | * Copyright (C) 1999-2007, International Business Machines |
b75a7d8f A |
5 | * Corporation and others. All Rights Reserved. |
6 | * | |
7 | ****************************************************************************** | |
8 | * file name: ubidiimp.h | |
9 | * encoding: US-ASCII | |
10 | * tab size: 8 (not used) | |
11 | * indentation:4 | |
12 | * | |
13 | * created on: 1999aug06 | |
46f4442e | 14 | * created by: Markus W. Scherer, updated by Matitiahu Allouche |
b75a7d8f A |
15 | */ |
16 | ||
17 | #ifndef UBIDIIMP_H | |
18 | #define UBIDIIMP_H | |
19 | ||
20 | /* set import/export definitions */ | |
21 | #ifdef U_COMMON_IMPLEMENTATION | |
22 | ||
23 | #include "unicode/utypes.h" | |
24 | #include "unicode/uchar.h" | |
73c04bcf | 25 | #include "ubidi_props.h" |
b75a7d8f A |
26 | |
27 | /* miscellaneous definitions ---------------------------------------------- */ | |
28 | ||
29 | typedef uint8_t DirProp; | |
30 | typedef uint32_t Flags; | |
31 | ||
32 | /* Comparing the description of the BiDi algorithm with this implementation | |
33 | is easier with the same names for the BiDi types in the code as there. | |
34 | See UCharDirection in uchar.h . | |
35 | */ | |
73c04bcf | 36 | enum { |
b75a7d8f A |
37 | L= U_LEFT_TO_RIGHT, |
38 | R= U_RIGHT_TO_LEFT, | |
39 | EN= U_EUROPEAN_NUMBER, | |
40 | ES= U_EUROPEAN_NUMBER_SEPARATOR, | |
41 | ET= U_EUROPEAN_NUMBER_TERMINATOR, | |
42 | AN= U_ARABIC_NUMBER, | |
43 | CS= U_COMMON_NUMBER_SEPARATOR, | |
44 | B= U_BLOCK_SEPARATOR, | |
45 | S= U_SEGMENT_SEPARATOR, | |
46 | WS= U_WHITE_SPACE_NEUTRAL, | |
47 | ON= U_OTHER_NEUTRAL, | |
48 | LRE=U_LEFT_TO_RIGHT_EMBEDDING, | |
49 | LRO=U_LEFT_TO_RIGHT_OVERRIDE, | |
50 | AL= U_RIGHT_TO_LEFT_ARABIC, | |
51 | RLE=U_RIGHT_TO_LEFT_EMBEDDING, | |
52 | RLO=U_RIGHT_TO_LEFT_OVERRIDE, | |
53 | PDF=U_POP_DIRECTIONAL_FORMAT, | |
54 | NSM=U_DIR_NON_SPACING_MARK, | |
55 | BN= U_BOUNDARY_NEUTRAL, | |
56 | dirPropCount | |
57 | }; | |
58 | ||
59 | /* | |
60 | * Sometimes, bit values are more appropriate | |
61 | * to deal with directionality properties. | |
62 | * Abbreviations in these macro names refer to names | |
63 | * used in the BiDi algorithm. | |
64 | */ | |
65 | #define DIRPROP_FLAG(dir) (1UL<<(dir)) | |
66 | ||
67 | /* special flag for multiple runs from explicit embedding codes */ | |
68 | #define DIRPROP_FLAG_MULTI_RUNS (1UL<<31) | |
69 | ||
70 | /* are there any characters that are LTR or RTL? */ | |
71 | #define MASK_LTR (DIRPROP_FLAG(L)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN)|DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)) | |
72 | #define MASK_RTL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)) | |
46f4442e | 73 | #define MASK_R_AL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)) |
b75a7d8f A |
74 | |
75 | /* explicit embedding codes */ | |
76 | #define MASK_LRX (DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)) | |
77 | #define MASK_RLX (DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)) | |
78 | #define MASK_OVERRIDE (DIRPROP_FLAG(LRO)|DIRPROP_FLAG(RLO)) | |
79 | ||
80 | #define MASK_EXPLICIT (MASK_LRX|MASK_RLX|DIRPROP_FLAG(PDF)) | |
81 | #define MASK_BN_EXPLICIT (DIRPROP_FLAG(BN)|MASK_EXPLICIT) | |
82 | ||
83 | /* paragraph and segment separators */ | |
84 | #define MASK_B_S (DIRPROP_FLAG(B)|DIRPROP_FLAG(S)) | |
85 | ||
86 | /* all types that are counted as White Space or Neutral in some steps */ | |
87 | #define MASK_WS (MASK_B_S|DIRPROP_FLAG(WS)|MASK_BN_EXPLICIT) | |
88 | #define MASK_N (DIRPROP_FLAG(ON)|MASK_WS) | |
89 | ||
90 | /* all types that are included in a sequence of European Terminators for (W5) */ | |
91 | #define MASK_ET_NSM_BN (DIRPROP_FLAG(ET)|DIRPROP_FLAG(NSM)|MASK_BN_EXPLICIT) | |
92 | ||
93 | /* types that are neutrals or could becomes neutrals in (Wn) */ | |
94 | #define MASK_POSSIBLE_N (DIRPROP_FLAG(CS)|DIRPROP_FLAG(ES)|DIRPROP_FLAG(ET)|MASK_N) | |
95 | ||
96 | /* | |
97 | * These types may be changed to "e", | |
98 | * the embedding type (L or R) of the run, | |
99 | * in the BiDi algorithm (N2) | |
100 | */ | |
101 | #define MASK_EMBEDDING (DIRPROP_FLAG(NSM)|MASK_POSSIBLE_N) | |
102 | ||
103 | /* the dirProp's L and R are defined to 0 and 1 values in UCharDirection */ | |
104 | #define GET_LR_FROM_LEVEL(level) ((DirProp)((level)&1)) | |
105 | ||
73c04bcf A |
106 | #define IS_DEFAULT_LEVEL(level) ((level)>=0xfe) |
107 | ||
108 | /* | |
109 | * The following bit is ORed to the property of characters in paragraphs | |
110 | * with contextual RTL direction when paraLevel is contextual. | |
111 | */ | |
112 | #define CONTEXT_RTL 0x80 | |
113 | #define NO_CONTEXT_RTL(dir) ((dir)&~CONTEXT_RTL) | |
114 | /* | |
115 | * The following is a variant of DIRPROP_FLAG which ignores the CONTEXT_RTL bit. | |
116 | */ | |
117 | #define DIRPROP_FLAG_NC(dir) (1UL<<(NO_CONTEXT_RTL(dir))) | |
118 | ||
119 | #define GET_PARALEVEL(ubidi, index) \ | |
120 | (UBiDiLevel)((ubidi)->defaultParaLevel ? (ubidi)->dirProps[index]>>7 \ | |
121 | : (ubidi)->paraLevel) | |
122 | ||
123 | /* Paragraph type for multiple paragraph support ---------------------------- */ | |
124 | typedef int32_t Para; | |
125 | ||
126 | #define CR 0x000D | |
127 | #define LF 0x000A | |
b75a7d8f A |
128 | |
129 | /* Run structure for reordering --------------------------------------------- */ | |
73c04bcf A |
130 | enum { |
131 | LRM_BEFORE=1, | |
132 | LRM_AFTER=2, | |
133 | RLM_BEFORE=4, | |
134 | RLM_AFTER=8 | |
135 | }; | |
b75a7d8f A |
136 | |
137 | typedef struct Run { | |
138 | int32_t logicalStart, /* first character of the run; b31 indicates even/odd level */ | |
73c04bcf A |
139 | visualLimit, /* last visual position of the run +1 */ |
140 | insertRemove; /* if >0, flags for inserting LRM/RLM before/after run, | |
141 | if <0, count of bidi controls within run */ | |
b75a7d8f A |
142 | } Run; |
143 | ||
144 | /* in a Run, logicalStart will get this bit set if the run level is odd */ | |
145 | #define INDEX_ODD_BIT (1UL<<31) | |
146 | ||
73c04bcf A |
147 | #define MAKE_INDEX_ODD_PAIR(index, level) ((index)|((int32_t)(level)<<31)) |
148 | #define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((int32_t)(level)<<31)) | |
b75a7d8f A |
149 | #define REMOVE_ODD_BIT(x) ((x)&=~INDEX_ODD_BIT) |
150 | ||
73c04bcf A |
151 | #define GET_INDEX(x) ((x)&~INDEX_ODD_BIT) |
152 | #define GET_ODD_BIT(x) ((uint32_t)(x)>>31) | |
46f4442e A |
153 | #define IS_ODD_RUN(x) ((UBool)(((x)&INDEX_ODD_BIT)!=0)) |
154 | #define IS_EVEN_RUN(x) ((UBool)(((x)&INDEX_ODD_BIT)==0)) | |
b75a7d8f A |
155 | |
156 | U_CFUNC UBool | |
46f4442e | 157 | ubidi_getRuns(UBiDi *pBiDi, UErrorCode *pErrorCode); |
b75a7d8f | 158 | |
73c04bcf A |
159 | /** BiDi control code points */ |
160 | enum { | |
161 | ZWNJ_CHAR=0x200c, | |
162 | ZWJ_CHAR, | |
163 | LRM_CHAR, | |
164 | RLM_CHAR, | |
165 | LRE_CHAR=0x202a, | |
166 | RLE_CHAR, | |
167 | PDF_CHAR, | |
168 | LRO_CHAR, | |
169 | RLO_CHAR | |
170 | }; | |
171 | ||
172 | #define IS_BIDI_CONTROL_CHAR(c) (((uint32_t)(c)&0xfffffffc)==ZWNJ_CHAR || (uint32_t)((c)-LRE_CHAR)<5) | |
173 | ||
174 | /* InsertPoints structure for noting where to put BiDi marks ---------------- */ | |
175 | ||
176 | typedef struct Point { | |
177 | int32_t pos; /* position in text */ | |
178 | int32_t flag; /* flag for LRM/RLM, before/after */ | |
179 | } Point; | |
180 | ||
181 | typedef struct InsertPoints { | |
182 | int32_t capacity; /* number of points allocated */ | |
183 | int32_t size; /* number of points used */ | |
184 | int32_t confirmed; /* number of points confirmed */ | |
185 | UErrorCode errorCode; /* for eventual memory shortage */ | |
186 | Point *points; /* pointer to array of points */ | |
187 | } InsertPoints; | |
188 | ||
189 | ||
b75a7d8f A |
190 | /* UBiDi structure ----------------------------------------------------------- */ |
191 | ||
192 | struct UBiDi { | |
73c04bcf A |
193 | /* pointer to parent paragraph object (pointer to self if this object is |
194 | * a paragraph object); set to NULL in a newly opened object; set to a | |
195 | * real value after a successful execution of ubidi_setPara or ubidi_setLine | |
196 | */ | |
197 | const UBiDi * pParaBiDi; | |
198 | ||
199 | const UBiDiProps *bdp; | |
200 | ||
b75a7d8f A |
201 | /* alias pointer to the current text */ |
202 | const UChar *text; | |
203 | ||
204 | /* length of the current text */ | |
73c04bcf A |
205 | int32_t originalLength; |
206 | ||
207 | /* if the UBIDI_OPTION_STREAMING option is set, this is the length | |
208 | * of text actually processed by ubidi_setPara, which may be shorter than | |
209 | * the original length. | |
210 | * Otherwise, it is identical to the original length. | |
211 | */ | |
b75a7d8f A |
212 | int32_t length; |
213 | ||
73c04bcf A |
214 | /* if the UBIDI_OPTION_REMOVE_CONTROLS option is set, and/or |
215 | * marks are allowed to be inserted in one of the reordering mode, the | |
216 | * length of the result string may be different from the processed length. | |
217 | */ | |
218 | int32_t resultLength; | |
219 | ||
b75a7d8f | 220 | /* memory sizes in bytes */ |
73c04bcf | 221 | int32_t dirPropsSize, levelsSize, parasSize, runsSize; |
b75a7d8f A |
222 | |
223 | /* allocated memory */ | |
224 | DirProp *dirPropsMemory; | |
225 | UBiDiLevel *levelsMemory; | |
73c04bcf | 226 | Para *parasMemory; |
b75a7d8f A |
227 | Run *runsMemory; |
228 | ||
229 | /* indicators for whether memory may be allocated after ubidi_open() */ | |
230 | UBool mayAllocateText, mayAllocateRuns; | |
231 | ||
232 | /* arrays with one value per text-character */ | |
233 | const DirProp *dirProps; | |
234 | UBiDiLevel *levels; | |
235 | ||
236 | /* are we performing an approximation of the "inverse BiDi" algorithm? */ | |
237 | UBool isInverse; | |
238 | ||
73c04bcf A |
239 | /* are we using the basic algorithm or its variation? */ |
240 | UBiDiReorderingMode reorderingMode; | |
241 | ||
242 | /* UBIDI_REORDER_xxx values must be ordered so that all the regular | |
243 | * logical to visual modes come first, and all inverse BiDi modes | |
244 | * come last. | |
245 | */ | |
246 | #define UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL UBIDI_REORDER_NUMBERS_SPECIAL | |
247 | ||
248 | /* bitmask for reordering options */ | |
249 | uint32_t reorderingOptions; | |
250 | ||
251 | /* must block separators receive level 0? */ | |
252 | UBool orderParagraphsLTR; | |
253 | ||
b75a7d8f A |
254 | /* the paragraph level */ |
255 | UBiDiLevel paraLevel; | |
73c04bcf A |
256 | /* original paraLevel when contextual */ |
257 | /* must be one of UBIDI_DEFAULT_xxx or 0 if not contextual */ | |
258 | UBiDiLevel defaultParaLevel; | |
259 | ||
260 | /* the following is set in ubidi_setPara, used in processPropertySeq */ | |
261 | const struct ImpTabPair * pImpTabPair; /* pointer to levels state table pair */ | |
b75a7d8f A |
262 | |
263 | /* the overall paragraph or line directionality - see UBiDiDirection */ | |
264 | UBiDiDirection direction; | |
265 | ||
266 | /* flags is a bit set for which directional properties are in the text */ | |
267 | Flags flags; | |
268 | ||
73c04bcf A |
269 | /* lastArabicPos is index to the last AL in the text, -1 if none */ |
270 | int32_t lastArabicPos; | |
271 | ||
b75a7d8f A |
272 | /* characters after trailingWSStart are WS and are */ |
273 | /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */ | |
274 | int32_t trailingWSStart; | |
275 | ||
73c04bcf A |
276 | /* fields for paragraph handling */ |
277 | int32_t paraCount; /* set in getDirProps() */ | |
278 | Para *paras; /* limits of paragraphs, filled in | |
279 | ResolveExplicitLevels() or CheckExplicitLevels() */ | |
280 | ||
281 | /* for single paragraph text, we only need a tiny array of paras (no malloc()) */ | |
282 | Para simpleParas[1]; | |
283 | ||
b75a7d8f A |
284 | /* fields for line reordering */ |
285 | int32_t runCount; /* ==-1: runs not set up yet */ | |
286 | Run *runs; | |
287 | ||
288 | /* for non-mixed text, we only need a tiny array of runs (no malloc()) */ | |
289 | Run simpleRuns[1]; | |
73c04bcf A |
290 | |
291 | /* for inverse Bidi with insertion of directional marks */ | |
292 | InsertPoints insertPoints; | |
293 | ||
294 | /* for option UBIDI_OPTION_REMOVE_CONTROLS */ | |
295 | int32_t controlCount; | |
296 | ||
297 | /* for Bidi class callback */ | |
298 | UBiDiClassCallback *fnClassCallback; /* action pointer */ | |
299 | const void *coClassCallback; /* context pointer */ | |
b75a7d8f A |
300 | }; |
301 | ||
73c04bcf | 302 | #define IS_VALID_PARA(x) ((x) && ((x)->pParaBiDi==(x))) |
73c04bcf A |
303 | #define IS_VALID_PARA_OR_LINE(x) ((x) && ((x)->pParaBiDi==(x) || (((x)->pParaBiDi) && (x)->pParaBiDi->pParaBiDi==(x)->pParaBiDi))) |
304 | ||
46f4442e A |
305 | typedef union { |
306 | DirProp *dirPropsMemory; | |
307 | UBiDiLevel *levelsMemory; | |
308 | Para *parasMemory; | |
309 | Run *runsMemory; | |
310 | } BidiMemoryForAllocation; | |
311 | ||
312 | /* Macros for initial checks at function entry */ | |
313 | #define RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrcode, retvalue) \ | |
314 | if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return retvalue | |
315 | #define RETURN_IF_NOT_VALID_PARA(bidi, errcode, retvalue) \ | |
316 | if(!IS_VALID_PARA(bidi)) { \ | |
317 | errcode=U_INVALID_STATE_ERROR; \ | |
318 | return retvalue; \ | |
319 | } | |
320 | #define RETURN_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode, retvalue) \ | |
321 | if(!IS_VALID_PARA_OR_LINE(bidi)) { \ | |
322 | errcode=U_INVALID_STATE_ERROR; \ | |
323 | return retvalue; \ | |
324 | } | |
325 | #define RETURN_IF_BAD_RANGE(arg, start, limit, errcode, retvalue) \ | |
326 | if((arg)<(start) || (arg)>=(limit)) { \ | |
327 | (errcode)=U_ILLEGAL_ARGUMENT_ERROR; \ | |
328 | return retvalue; \ | |
329 | } | |
330 | ||
331 | #define RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrcode) \ | |
332 | if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return | |
333 | #define RETURN_VOID_IF_NOT_VALID_PARA(bidi, errcode) \ | |
334 | if(!IS_VALID_PARA(bidi)) { \ | |
335 | errcode=U_INVALID_STATE_ERROR; \ | |
336 | return; \ | |
337 | } | |
338 | #define RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode) \ | |
339 | if(!IS_VALID_PARA_OR_LINE(bidi)) { \ | |
340 | errcode=U_INVALID_STATE_ERROR; \ | |
341 | return; \ | |
342 | } | |
343 | #define RETURN_VOID_IF_BAD_RANGE(arg, start, limit, errcode) \ | |
344 | if((arg)<(start) || (arg)>=(limit)) { \ | |
345 | (errcode)=U_ILLEGAL_ARGUMENT_ERROR; \ | |
346 | return; \ | |
347 | } | |
348 | ||
b75a7d8f A |
349 | /* helper function to (re)allocate memory if allowed */ |
350 | U_CFUNC UBool | |
46f4442e | 351 | ubidi_getMemory(BidiMemoryForAllocation *pMemory, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded); |
b75a7d8f A |
352 | |
353 | /* helper macros for each allocated array in UBiDi */ | |
354 | #define getDirPropsMemory(pBiDi, length) \ | |
46f4442e | 355 | ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \ |
b75a7d8f A |
356 | (pBiDi)->mayAllocateText, (length)) |
357 | ||
358 | #define getLevelsMemory(pBiDi, length) \ | |
46f4442e | 359 | ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \ |
b75a7d8f A |
360 | (pBiDi)->mayAllocateText, (length)) |
361 | ||
362 | #define getRunsMemory(pBiDi, length) \ | |
46f4442e | 363 | ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \ |
b75a7d8f A |
364 | (pBiDi)->mayAllocateRuns, (length)*sizeof(Run)) |
365 | ||
366 | /* additional macros used by ubidi_open() - always allow allocation */ | |
367 | #define getInitialDirPropsMemory(pBiDi, length) \ | |
46f4442e | 368 | ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \ |
b75a7d8f A |
369 | TRUE, (length)) |
370 | ||
371 | #define getInitialLevelsMemory(pBiDi, length) \ | |
46f4442e | 372 | ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \ |
b75a7d8f A |
373 | TRUE, (length)) |
374 | ||
73c04bcf | 375 | #define getInitialParasMemory(pBiDi, length) \ |
46f4442e | 376 | ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->parasMemory, &(pBiDi)->parasSize, \ |
73c04bcf A |
377 | TRUE, (length)*sizeof(Para)) |
378 | ||
b75a7d8f | 379 | #define getInitialRunsMemory(pBiDi, length) \ |
46f4442e | 380 | ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \ |
b75a7d8f A |
381 | TRUE, (length)*sizeof(Run)) |
382 | ||
383 | #endif | |
384 | ||
385 | #endif |