]>
Commit | Line | Data |
---|---|---|
73c04bcf | 1 | /* |
b75a7d8f A |
2 | ****************************************************************************** |
3 | * | |
73c04bcf | 4 | * Copyright (C) 1999-2006, International Business Machines |
b75a7d8f A |
5 | * Corporation and others. All Rights Reserved. |
6 | * | |
7 | ****************************************************************************** | |
8 | * file name: ubidiimp.h | |
9 | * encoding: US-ASCII | |
10 | * tab size: 8 (not used) | |
11 | * indentation:4 | |
12 | * | |
13 | * created on: 1999aug06 | |
14 | * created by: Markus W. Scherer | |
15 | */ | |
16 | ||
17 | #ifndef UBIDIIMP_H | |
18 | #define UBIDIIMP_H | |
19 | ||
20 | /* set import/export definitions */ | |
21 | #ifdef U_COMMON_IMPLEMENTATION | |
22 | ||
23 | #include "unicode/utypes.h" | |
24 | #include "unicode/uchar.h" | |
73c04bcf | 25 | #include "ubidi_props.h" |
b75a7d8f A |
26 | |
27 | /* miscellaneous definitions ---------------------------------------------- */ | |
28 | ||
29 | typedef uint8_t DirProp; | |
30 | typedef uint32_t Flags; | |
31 | ||
32 | /* Comparing the description of the BiDi algorithm with this implementation | |
33 | is easier with the same names for the BiDi types in the code as there. | |
34 | See UCharDirection in uchar.h . | |
35 | */ | |
73c04bcf | 36 | enum { |
b75a7d8f A |
37 | L= U_LEFT_TO_RIGHT, |
38 | R= U_RIGHT_TO_LEFT, | |
39 | EN= U_EUROPEAN_NUMBER, | |
40 | ES= U_EUROPEAN_NUMBER_SEPARATOR, | |
41 | ET= U_EUROPEAN_NUMBER_TERMINATOR, | |
42 | AN= U_ARABIC_NUMBER, | |
43 | CS= U_COMMON_NUMBER_SEPARATOR, | |
44 | B= U_BLOCK_SEPARATOR, | |
45 | S= U_SEGMENT_SEPARATOR, | |
46 | WS= U_WHITE_SPACE_NEUTRAL, | |
47 | ON= U_OTHER_NEUTRAL, | |
48 | LRE=U_LEFT_TO_RIGHT_EMBEDDING, | |
49 | LRO=U_LEFT_TO_RIGHT_OVERRIDE, | |
50 | AL= U_RIGHT_TO_LEFT_ARABIC, | |
51 | RLE=U_RIGHT_TO_LEFT_EMBEDDING, | |
52 | RLO=U_RIGHT_TO_LEFT_OVERRIDE, | |
53 | PDF=U_POP_DIRECTIONAL_FORMAT, | |
54 | NSM=U_DIR_NON_SPACING_MARK, | |
55 | BN= U_BOUNDARY_NEUTRAL, | |
56 | dirPropCount | |
57 | }; | |
58 | ||
59 | /* | |
60 | * Sometimes, bit values are more appropriate | |
61 | * to deal with directionality properties. | |
62 | * Abbreviations in these macro names refer to names | |
63 | * used in the BiDi algorithm. | |
64 | */ | |
65 | #define DIRPROP_FLAG(dir) (1UL<<(dir)) | |
66 | ||
67 | /* special flag for multiple runs from explicit embedding codes */ | |
68 | #define DIRPROP_FLAG_MULTI_RUNS (1UL<<31) | |
69 | ||
70 | /* are there any characters that are LTR or RTL? */ | |
71 | #define MASK_LTR (DIRPROP_FLAG(L)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN)|DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)) | |
72 | #define MASK_RTL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)) | |
73 | ||
74 | /* explicit embedding codes */ | |
75 | #define MASK_LRX (DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)) | |
76 | #define MASK_RLX (DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)) | |
77 | #define MASK_OVERRIDE (DIRPROP_FLAG(LRO)|DIRPROP_FLAG(RLO)) | |
78 | ||
79 | #define MASK_EXPLICIT (MASK_LRX|MASK_RLX|DIRPROP_FLAG(PDF)) | |
80 | #define MASK_BN_EXPLICIT (DIRPROP_FLAG(BN)|MASK_EXPLICIT) | |
81 | ||
82 | /* paragraph and segment separators */ | |
83 | #define MASK_B_S (DIRPROP_FLAG(B)|DIRPROP_FLAG(S)) | |
84 | ||
85 | /* all types that are counted as White Space or Neutral in some steps */ | |
86 | #define MASK_WS (MASK_B_S|DIRPROP_FLAG(WS)|MASK_BN_EXPLICIT) | |
87 | #define MASK_N (DIRPROP_FLAG(ON)|MASK_WS) | |
88 | ||
89 | /* all types that are included in a sequence of European Terminators for (W5) */ | |
90 | #define MASK_ET_NSM_BN (DIRPROP_FLAG(ET)|DIRPROP_FLAG(NSM)|MASK_BN_EXPLICIT) | |
91 | ||
92 | /* types that are neutrals or could becomes neutrals in (Wn) */ | |
93 | #define MASK_POSSIBLE_N (DIRPROP_FLAG(CS)|DIRPROP_FLAG(ES)|DIRPROP_FLAG(ET)|MASK_N) | |
94 | ||
95 | /* | |
96 | * These types may be changed to "e", | |
97 | * the embedding type (L or R) of the run, | |
98 | * in the BiDi algorithm (N2) | |
99 | */ | |
100 | #define MASK_EMBEDDING (DIRPROP_FLAG(NSM)|MASK_POSSIBLE_N) | |
101 | ||
102 | /* the dirProp's L and R are defined to 0 and 1 values in UCharDirection */ | |
103 | #define GET_LR_FROM_LEVEL(level) ((DirProp)((level)&1)) | |
104 | ||
73c04bcf A |
105 | #define IS_DEFAULT_LEVEL(level) ((level)>=0xfe) |
106 | ||
107 | /* | |
108 | * The following bit is ORed to the property of characters in paragraphs | |
109 | * with contextual RTL direction when paraLevel is contextual. | |
110 | */ | |
111 | #define CONTEXT_RTL 0x80 | |
112 | #define NO_CONTEXT_RTL(dir) ((dir)&~CONTEXT_RTL) | |
113 | /* | |
114 | * The following is a variant of DIRPROP_FLAG which ignores the CONTEXT_RTL bit. | |
115 | */ | |
116 | #define DIRPROP_FLAG_NC(dir) (1UL<<(NO_CONTEXT_RTL(dir))) | |
117 | ||
118 | #define GET_PARALEVEL(ubidi, index) \ | |
119 | (UBiDiLevel)((ubidi)->defaultParaLevel ? (ubidi)->dirProps[index]>>7 \ | |
120 | : (ubidi)->paraLevel) | |
121 | ||
122 | /* Paragraph type for multiple paragraph support ---------------------------- */ | |
123 | typedef int32_t Para; | |
124 | ||
125 | #define CR 0x000D | |
126 | #define LF 0x000A | |
b75a7d8f A |
127 | |
128 | /* Run structure for reordering --------------------------------------------- */ | |
73c04bcf A |
129 | enum { |
130 | LRM_BEFORE=1, | |
131 | LRM_AFTER=2, | |
132 | RLM_BEFORE=4, | |
133 | RLM_AFTER=8 | |
134 | }; | |
b75a7d8f A |
135 | |
136 | typedef struct Run { | |
137 | int32_t logicalStart, /* first character of the run; b31 indicates even/odd level */ | |
73c04bcf A |
138 | visualLimit, /* last visual position of the run +1 */ |
139 | insertRemove; /* if >0, flags for inserting LRM/RLM before/after run, | |
140 | if <0, count of bidi controls within run */ | |
b75a7d8f A |
141 | } Run; |
142 | ||
143 | /* in a Run, logicalStart will get this bit set if the run level is odd */ | |
144 | #define INDEX_ODD_BIT (1UL<<31) | |
145 | ||
73c04bcf A |
146 | #define MAKE_INDEX_ODD_PAIR(index, level) ((index)|((int32_t)(level)<<31)) |
147 | #define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((int32_t)(level)<<31)) | |
b75a7d8f A |
148 | #define REMOVE_ODD_BIT(x) ((x)&=~INDEX_ODD_BIT) |
149 | ||
73c04bcf A |
150 | #define GET_INDEX(x) ((x)&~INDEX_ODD_BIT) |
151 | #define GET_ODD_BIT(x) ((uint32_t)(x)>>31) | |
152 | #define IS_ODD_RUN(x) (((x)&INDEX_ODD_BIT)!=0) | |
153 | #define IS_EVEN_RUN(x) (((x)&INDEX_ODD_BIT)==0) | |
b75a7d8f A |
154 | |
155 | U_CFUNC UBool | |
156 | ubidi_getRuns(UBiDi *pBiDi); | |
157 | ||
73c04bcf A |
158 | /** BiDi control code points */ |
159 | enum { | |
160 | ZWNJ_CHAR=0x200c, | |
161 | ZWJ_CHAR, | |
162 | LRM_CHAR, | |
163 | RLM_CHAR, | |
164 | LRE_CHAR=0x202a, | |
165 | RLE_CHAR, | |
166 | PDF_CHAR, | |
167 | LRO_CHAR, | |
168 | RLO_CHAR | |
169 | }; | |
170 | ||
171 | #define IS_BIDI_CONTROL_CHAR(c) (((uint32_t)(c)&0xfffffffc)==ZWNJ_CHAR || (uint32_t)((c)-LRE_CHAR)<5) | |
172 | ||
173 | /* InsertPoints structure for noting where to put BiDi marks ---------------- */ | |
174 | ||
175 | typedef struct Point { | |
176 | int32_t pos; /* position in text */ | |
177 | int32_t flag; /* flag for LRM/RLM, before/after */ | |
178 | } Point; | |
179 | ||
180 | typedef struct InsertPoints { | |
181 | int32_t capacity; /* number of points allocated */ | |
182 | int32_t size; /* number of points used */ | |
183 | int32_t confirmed; /* number of points confirmed */ | |
184 | UErrorCode errorCode; /* for eventual memory shortage */ | |
185 | Point *points; /* pointer to array of points */ | |
186 | } InsertPoints; | |
187 | ||
188 | ||
b75a7d8f A |
189 | /* UBiDi structure ----------------------------------------------------------- */ |
190 | ||
191 | struct UBiDi { | |
73c04bcf A |
192 | /* pointer to parent paragraph object (pointer to self if this object is |
193 | * a paragraph object); set to NULL in a newly opened object; set to a | |
194 | * real value after a successful execution of ubidi_setPara or ubidi_setLine | |
195 | */ | |
196 | const UBiDi * pParaBiDi; | |
197 | ||
198 | const UBiDiProps *bdp; | |
199 | ||
b75a7d8f A |
200 | /* alias pointer to the current text */ |
201 | const UChar *text; | |
202 | ||
203 | /* length of the current text */ | |
73c04bcf A |
204 | int32_t originalLength; |
205 | ||
206 | /* if the UBIDI_OPTION_STREAMING option is set, this is the length | |
207 | * of text actually processed by ubidi_setPara, which may be shorter than | |
208 | * the original length. | |
209 | * Otherwise, it is identical to the original length. | |
210 | */ | |
b75a7d8f A |
211 | int32_t length; |
212 | ||
73c04bcf A |
213 | /* if the UBIDI_OPTION_REMOVE_CONTROLS option is set, and/or |
214 | * marks are allowed to be inserted in one of the reordering mode, the | |
215 | * length of the result string may be different from the processed length. | |
216 | */ | |
217 | int32_t resultLength; | |
218 | ||
b75a7d8f | 219 | /* memory sizes in bytes */ |
73c04bcf | 220 | int32_t dirPropsSize, levelsSize, parasSize, runsSize; |
b75a7d8f A |
221 | |
222 | /* allocated memory */ | |
223 | DirProp *dirPropsMemory; | |
224 | UBiDiLevel *levelsMemory; | |
73c04bcf | 225 | Para *parasMemory; |
b75a7d8f A |
226 | Run *runsMemory; |
227 | ||
228 | /* indicators for whether memory may be allocated after ubidi_open() */ | |
229 | UBool mayAllocateText, mayAllocateRuns; | |
230 | ||
231 | /* arrays with one value per text-character */ | |
232 | const DirProp *dirProps; | |
233 | UBiDiLevel *levels; | |
234 | ||
235 | /* are we performing an approximation of the "inverse BiDi" algorithm? */ | |
236 | UBool isInverse; | |
237 | ||
73c04bcf A |
238 | /* are we using the basic algorithm or its variation? */ |
239 | UBiDiReorderingMode reorderingMode; | |
240 | ||
241 | /* UBIDI_REORDER_xxx values must be ordered so that all the regular | |
242 | * logical to visual modes come first, and all inverse BiDi modes | |
243 | * come last. | |
244 | */ | |
245 | #define UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL UBIDI_REORDER_NUMBERS_SPECIAL | |
246 | ||
247 | /* bitmask for reordering options */ | |
248 | uint32_t reorderingOptions; | |
249 | ||
250 | /* must block separators receive level 0? */ | |
251 | UBool orderParagraphsLTR; | |
252 | ||
b75a7d8f A |
253 | /* the paragraph level */ |
254 | UBiDiLevel paraLevel; | |
73c04bcf A |
255 | /* original paraLevel when contextual */ |
256 | /* must be one of UBIDI_DEFAULT_xxx or 0 if not contextual */ | |
257 | UBiDiLevel defaultParaLevel; | |
258 | ||
259 | /* the following is set in ubidi_setPara, used in processPropertySeq */ | |
260 | const struct ImpTabPair * pImpTabPair; /* pointer to levels state table pair */ | |
b75a7d8f A |
261 | |
262 | /* the overall paragraph or line directionality - see UBiDiDirection */ | |
263 | UBiDiDirection direction; | |
264 | ||
265 | /* flags is a bit set for which directional properties are in the text */ | |
266 | Flags flags; | |
267 | ||
73c04bcf A |
268 | /* lastArabicPos is index to the last AL in the text, -1 if none */ |
269 | int32_t lastArabicPos; | |
270 | ||
b75a7d8f A |
271 | /* characters after trailingWSStart are WS and are */ |
272 | /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */ | |
273 | int32_t trailingWSStart; | |
274 | ||
73c04bcf A |
275 | /* fields for paragraph handling */ |
276 | int32_t paraCount; /* set in getDirProps() */ | |
277 | Para *paras; /* limits of paragraphs, filled in | |
278 | ResolveExplicitLevels() or CheckExplicitLevels() */ | |
279 | ||
280 | /* for single paragraph text, we only need a tiny array of paras (no malloc()) */ | |
281 | Para simpleParas[1]; | |
282 | ||
b75a7d8f A |
283 | /* fields for line reordering */ |
284 | int32_t runCount; /* ==-1: runs not set up yet */ | |
285 | Run *runs; | |
286 | ||
287 | /* for non-mixed text, we only need a tiny array of runs (no malloc()) */ | |
288 | Run simpleRuns[1]; | |
73c04bcf A |
289 | |
290 | /* for inverse Bidi with insertion of directional marks */ | |
291 | InsertPoints insertPoints; | |
292 | ||
293 | /* for option UBIDI_OPTION_REMOVE_CONTROLS */ | |
294 | int32_t controlCount; | |
295 | ||
296 | /* for Bidi class callback */ | |
297 | UBiDiClassCallback *fnClassCallback; /* action pointer */ | |
298 | const void *coClassCallback; /* context pointer */ | |
b75a7d8f A |
299 | }; |
300 | ||
73c04bcf A |
301 | #define IS_VALID_PARA(x) ((x) && ((x)->pParaBiDi==(x))) |
302 | #define IS_VALID_LINE(x) ((x) && ((x)->pParaBiDi) && ((x)->pParaBiDi->pParaBiDi==(x)->pParaBiDi)) | |
303 | #define IS_VALID_PARA_OR_LINE(x) ((x) && ((x)->pParaBiDi==(x) || (((x)->pParaBiDi) && (x)->pParaBiDi->pParaBiDi==(x)->pParaBiDi))) | |
304 | ||
b75a7d8f A |
305 | /* helper function to (re)allocate memory if allowed */ |
306 | U_CFUNC UBool | |
307 | ubidi_getMemory(void **pMemory, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded); | |
308 | ||
309 | /* helper macros for each allocated array in UBiDi */ | |
310 | #define getDirPropsMemory(pBiDi, length) \ | |
311 | ubidi_getMemory((void **)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \ | |
312 | (pBiDi)->mayAllocateText, (length)) | |
313 | ||
314 | #define getLevelsMemory(pBiDi, length) \ | |
315 | ubidi_getMemory((void **)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \ | |
316 | (pBiDi)->mayAllocateText, (length)) | |
317 | ||
318 | #define getRunsMemory(pBiDi, length) \ | |
319 | ubidi_getMemory((void **)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \ | |
320 | (pBiDi)->mayAllocateRuns, (length)*sizeof(Run)) | |
321 | ||
322 | /* additional macros used by ubidi_open() - always allow allocation */ | |
323 | #define getInitialDirPropsMemory(pBiDi, length) \ | |
324 | ubidi_getMemory((void **)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \ | |
325 | TRUE, (length)) | |
326 | ||
327 | #define getInitialLevelsMemory(pBiDi, length) \ | |
328 | ubidi_getMemory((void **)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \ | |
329 | TRUE, (length)) | |
330 | ||
73c04bcf A |
331 | #define getInitialParasMemory(pBiDi, length) \ |
332 | ubidi_getMemory((void **)&(pBiDi)->parasMemory, &(pBiDi)->parasSize, \ | |
333 | TRUE, (length)*sizeof(Para)) | |
334 | ||
b75a7d8f A |
335 | #define getInitialRunsMemory(pBiDi, length) \ |
336 | ubidi_getMemory((void **)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \ | |
337 | TRUE, (length)*sizeof(Run)) | |
338 | ||
339 | #endif | |
340 | ||
341 | #endif |