]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/ubidiimp.h
ICU-8.11.4.tar.gz
[apple/icu.git] / icuSources / common / ubidiimp.h
CommitLineData
73c04bcf 1/*
b75a7d8f
A
2******************************************************************************
3*
73c04bcf 4* Copyright (C) 1999-2006, International Business Machines
b75a7d8f
A
5* Corporation and others. All Rights Reserved.
6*
7******************************************************************************
8* file name: ubidiimp.h
9* encoding: US-ASCII
10* tab size: 8 (not used)
11* indentation:4
12*
13* created on: 1999aug06
14* created by: Markus W. Scherer
15*/
16
17#ifndef UBIDIIMP_H
18#define UBIDIIMP_H
19
20/* set import/export definitions */
21#ifdef U_COMMON_IMPLEMENTATION
22
23#include "unicode/utypes.h"
24#include "unicode/uchar.h"
73c04bcf 25#include "ubidi_props.h"
b75a7d8f
A
26
27/* miscellaneous definitions ---------------------------------------------- */
28
29typedef uint8_t DirProp;
30typedef uint32_t Flags;
31
32/* Comparing the description of the BiDi algorithm with this implementation
33 is easier with the same names for the BiDi types in the code as there.
34 See UCharDirection in uchar.h .
35*/
73c04bcf 36enum {
b75a7d8f
A
37 L= U_LEFT_TO_RIGHT,
38 R= U_RIGHT_TO_LEFT,
39 EN= U_EUROPEAN_NUMBER,
40 ES= U_EUROPEAN_NUMBER_SEPARATOR,
41 ET= U_EUROPEAN_NUMBER_TERMINATOR,
42 AN= U_ARABIC_NUMBER,
43 CS= U_COMMON_NUMBER_SEPARATOR,
44 B= U_BLOCK_SEPARATOR,
45 S= U_SEGMENT_SEPARATOR,
46 WS= U_WHITE_SPACE_NEUTRAL,
47 ON= U_OTHER_NEUTRAL,
48 LRE=U_LEFT_TO_RIGHT_EMBEDDING,
49 LRO=U_LEFT_TO_RIGHT_OVERRIDE,
50 AL= U_RIGHT_TO_LEFT_ARABIC,
51 RLE=U_RIGHT_TO_LEFT_EMBEDDING,
52 RLO=U_RIGHT_TO_LEFT_OVERRIDE,
53 PDF=U_POP_DIRECTIONAL_FORMAT,
54 NSM=U_DIR_NON_SPACING_MARK,
55 BN= U_BOUNDARY_NEUTRAL,
56 dirPropCount
57};
58
59/*
60 * Sometimes, bit values are more appropriate
61 * to deal with directionality properties.
62 * Abbreviations in these macro names refer to names
63 * used in the BiDi algorithm.
64 */
65#define DIRPROP_FLAG(dir) (1UL<<(dir))
66
67/* special flag for multiple runs from explicit embedding codes */
68#define DIRPROP_FLAG_MULTI_RUNS (1UL<<31)
69
70/* are there any characters that are LTR or RTL? */
71#define MASK_LTR (DIRPROP_FLAG(L)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN)|DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO))
72#define MASK_RTL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO))
73
74/* explicit embedding codes */
75#define MASK_LRX (DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO))
76#define MASK_RLX (DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO))
77#define MASK_OVERRIDE (DIRPROP_FLAG(LRO)|DIRPROP_FLAG(RLO))
78
79#define MASK_EXPLICIT (MASK_LRX|MASK_RLX|DIRPROP_FLAG(PDF))
80#define MASK_BN_EXPLICIT (DIRPROP_FLAG(BN)|MASK_EXPLICIT)
81
82/* paragraph and segment separators */
83#define MASK_B_S (DIRPROP_FLAG(B)|DIRPROP_FLAG(S))
84
85/* all types that are counted as White Space or Neutral in some steps */
86#define MASK_WS (MASK_B_S|DIRPROP_FLAG(WS)|MASK_BN_EXPLICIT)
87#define MASK_N (DIRPROP_FLAG(ON)|MASK_WS)
88
89/* all types that are included in a sequence of European Terminators for (W5) */
90#define MASK_ET_NSM_BN (DIRPROP_FLAG(ET)|DIRPROP_FLAG(NSM)|MASK_BN_EXPLICIT)
91
92/* types that are neutrals or could becomes neutrals in (Wn) */
93#define MASK_POSSIBLE_N (DIRPROP_FLAG(CS)|DIRPROP_FLAG(ES)|DIRPROP_FLAG(ET)|MASK_N)
94
95/*
96 * These types may be changed to "e",
97 * the embedding type (L or R) of the run,
98 * in the BiDi algorithm (N2)
99 */
100#define MASK_EMBEDDING (DIRPROP_FLAG(NSM)|MASK_POSSIBLE_N)
101
102/* the dirProp's L and R are defined to 0 and 1 values in UCharDirection */
103#define GET_LR_FROM_LEVEL(level) ((DirProp)((level)&1))
104
73c04bcf
A
105#define IS_DEFAULT_LEVEL(level) ((level)>=0xfe)
106
107/*
108 * The following bit is ORed to the property of characters in paragraphs
109 * with contextual RTL direction when paraLevel is contextual.
110 */
111#define CONTEXT_RTL 0x80
112#define NO_CONTEXT_RTL(dir) ((dir)&~CONTEXT_RTL)
113/*
114 * The following is a variant of DIRPROP_FLAG which ignores the CONTEXT_RTL bit.
115 */
116#define DIRPROP_FLAG_NC(dir) (1UL<<(NO_CONTEXT_RTL(dir)))
117
118#define GET_PARALEVEL(ubidi, index) \
119 (UBiDiLevel)((ubidi)->defaultParaLevel ? (ubidi)->dirProps[index]>>7 \
120 : (ubidi)->paraLevel)
121
122/* Paragraph type for multiple paragraph support ---------------------------- */
123typedef int32_t Para;
124
125#define CR 0x000D
126#define LF 0x000A
b75a7d8f
A
127
128/* Run structure for reordering --------------------------------------------- */
73c04bcf
A
129enum {
130 LRM_BEFORE=1,
131 LRM_AFTER=2,
132 RLM_BEFORE=4,
133 RLM_AFTER=8
134};
b75a7d8f
A
135
136typedef struct Run {
137 int32_t logicalStart, /* first character of the run; b31 indicates even/odd level */
73c04bcf
A
138 visualLimit, /* last visual position of the run +1 */
139 insertRemove; /* if >0, flags for inserting LRM/RLM before/after run,
140 if <0, count of bidi controls within run */
b75a7d8f
A
141} Run;
142
143/* in a Run, logicalStart will get this bit set if the run level is odd */
144#define INDEX_ODD_BIT (1UL<<31)
145
73c04bcf
A
146#define MAKE_INDEX_ODD_PAIR(index, level) ((index)|((int32_t)(level)<<31))
147#define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((int32_t)(level)<<31))
b75a7d8f
A
148#define REMOVE_ODD_BIT(x) ((x)&=~INDEX_ODD_BIT)
149
73c04bcf
A
150#define GET_INDEX(x) ((x)&~INDEX_ODD_BIT)
151#define GET_ODD_BIT(x) ((uint32_t)(x)>>31)
152#define IS_ODD_RUN(x) (((x)&INDEX_ODD_BIT)!=0)
153#define IS_EVEN_RUN(x) (((x)&INDEX_ODD_BIT)==0)
b75a7d8f
A
154
155U_CFUNC UBool
156ubidi_getRuns(UBiDi *pBiDi);
157
73c04bcf
A
158/** BiDi control code points */
159enum {
160 ZWNJ_CHAR=0x200c,
161 ZWJ_CHAR,
162 LRM_CHAR,
163 RLM_CHAR,
164 LRE_CHAR=0x202a,
165 RLE_CHAR,
166 PDF_CHAR,
167 LRO_CHAR,
168 RLO_CHAR
169};
170
171#define IS_BIDI_CONTROL_CHAR(c) (((uint32_t)(c)&0xfffffffc)==ZWNJ_CHAR || (uint32_t)((c)-LRE_CHAR)<5)
172
173/* InsertPoints structure for noting where to put BiDi marks ---------------- */
174
175typedef struct Point {
176 int32_t pos; /* position in text */
177 int32_t flag; /* flag for LRM/RLM, before/after */
178} Point;
179
180typedef struct InsertPoints {
181 int32_t capacity; /* number of points allocated */
182 int32_t size; /* number of points used */
183 int32_t confirmed; /* number of points confirmed */
184 UErrorCode errorCode; /* for eventual memory shortage */
185 Point *points; /* pointer to array of points */
186} InsertPoints;
187
188
b75a7d8f
A
189/* UBiDi structure ----------------------------------------------------------- */
190
191struct UBiDi {
73c04bcf
A
192 /* pointer to parent paragraph object (pointer to self if this object is
193 * a paragraph object); set to NULL in a newly opened object; set to a
194 * real value after a successful execution of ubidi_setPara or ubidi_setLine
195 */
196 const UBiDi * pParaBiDi;
197
198 const UBiDiProps *bdp;
199
b75a7d8f
A
200 /* alias pointer to the current text */
201 const UChar *text;
202
203 /* length of the current text */
73c04bcf
A
204 int32_t originalLength;
205
206 /* if the UBIDI_OPTION_STREAMING option is set, this is the length
207 * of text actually processed by ubidi_setPara, which may be shorter than
208 * the original length.
209 * Otherwise, it is identical to the original length.
210 */
b75a7d8f
A
211 int32_t length;
212
73c04bcf
A
213 /* if the UBIDI_OPTION_REMOVE_CONTROLS option is set, and/or
214 * marks are allowed to be inserted in one of the reordering mode, the
215 * length of the result string may be different from the processed length.
216 */
217 int32_t resultLength;
218
b75a7d8f 219 /* memory sizes in bytes */
73c04bcf 220 int32_t dirPropsSize, levelsSize, parasSize, runsSize;
b75a7d8f
A
221
222 /* allocated memory */
223 DirProp *dirPropsMemory;
224 UBiDiLevel *levelsMemory;
73c04bcf 225 Para *parasMemory;
b75a7d8f
A
226 Run *runsMemory;
227
228 /* indicators for whether memory may be allocated after ubidi_open() */
229 UBool mayAllocateText, mayAllocateRuns;
230
231 /* arrays with one value per text-character */
232 const DirProp *dirProps;
233 UBiDiLevel *levels;
234
235 /* are we performing an approximation of the "inverse BiDi" algorithm? */
236 UBool isInverse;
237
73c04bcf
A
238 /* are we using the basic algorithm or its variation? */
239 UBiDiReorderingMode reorderingMode;
240
241 /* UBIDI_REORDER_xxx values must be ordered so that all the regular
242 * logical to visual modes come first, and all inverse BiDi modes
243 * come last.
244 */
245 #define UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL UBIDI_REORDER_NUMBERS_SPECIAL
246
247 /* bitmask for reordering options */
248 uint32_t reorderingOptions;
249
250 /* must block separators receive level 0? */
251 UBool orderParagraphsLTR;
252
b75a7d8f
A
253 /* the paragraph level */
254 UBiDiLevel paraLevel;
73c04bcf
A
255 /* original paraLevel when contextual */
256 /* must be one of UBIDI_DEFAULT_xxx or 0 if not contextual */
257 UBiDiLevel defaultParaLevel;
258
259 /* the following is set in ubidi_setPara, used in processPropertySeq */
260 const struct ImpTabPair * pImpTabPair; /* pointer to levels state table pair */
b75a7d8f
A
261
262 /* the overall paragraph or line directionality - see UBiDiDirection */
263 UBiDiDirection direction;
264
265 /* flags is a bit set for which directional properties are in the text */
266 Flags flags;
267
73c04bcf
A
268 /* lastArabicPos is index to the last AL in the text, -1 if none */
269 int32_t lastArabicPos;
270
b75a7d8f
A
271 /* characters after trailingWSStart are WS and are */
272 /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */
273 int32_t trailingWSStart;
274
73c04bcf
A
275 /* fields for paragraph handling */
276 int32_t paraCount; /* set in getDirProps() */
277 Para *paras; /* limits of paragraphs, filled in
278 ResolveExplicitLevels() or CheckExplicitLevels() */
279
280 /* for single paragraph text, we only need a tiny array of paras (no malloc()) */
281 Para simpleParas[1];
282
b75a7d8f
A
283 /* fields for line reordering */
284 int32_t runCount; /* ==-1: runs not set up yet */
285 Run *runs;
286
287 /* for non-mixed text, we only need a tiny array of runs (no malloc()) */
288 Run simpleRuns[1];
73c04bcf
A
289
290 /* for inverse Bidi with insertion of directional marks */
291 InsertPoints insertPoints;
292
293 /* for option UBIDI_OPTION_REMOVE_CONTROLS */
294 int32_t controlCount;
295
296 /* for Bidi class callback */
297 UBiDiClassCallback *fnClassCallback; /* action pointer */
298 const void *coClassCallback; /* context pointer */
b75a7d8f
A
299};
300
73c04bcf
A
301#define IS_VALID_PARA(x) ((x) && ((x)->pParaBiDi==(x)))
302#define IS_VALID_LINE(x) ((x) && ((x)->pParaBiDi) && ((x)->pParaBiDi->pParaBiDi==(x)->pParaBiDi))
303#define IS_VALID_PARA_OR_LINE(x) ((x) && ((x)->pParaBiDi==(x) || (((x)->pParaBiDi) && (x)->pParaBiDi->pParaBiDi==(x)->pParaBiDi)))
304
b75a7d8f
A
305/* helper function to (re)allocate memory if allowed */
306U_CFUNC UBool
307ubidi_getMemory(void **pMemory, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded);
308
309/* helper macros for each allocated array in UBiDi */
310#define getDirPropsMemory(pBiDi, length) \
311 ubidi_getMemory((void **)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \
312 (pBiDi)->mayAllocateText, (length))
313
314#define getLevelsMemory(pBiDi, length) \
315 ubidi_getMemory((void **)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \
316 (pBiDi)->mayAllocateText, (length))
317
318#define getRunsMemory(pBiDi, length) \
319 ubidi_getMemory((void **)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \
320 (pBiDi)->mayAllocateRuns, (length)*sizeof(Run))
321
322/* additional macros used by ubidi_open() - always allow allocation */
323#define getInitialDirPropsMemory(pBiDi, length) \
324 ubidi_getMemory((void **)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \
325 TRUE, (length))
326
327#define getInitialLevelsMemory(pBiDi, length) \
328 ubidi_getMemory((void **)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \
329 TRUE, (length))
330
73c04bcf
A
331#define getInitialParasMemory(pBiDi, length) \
332 ubidi_getMemory((void **)&(pBiDi)->parasMemory, &(pBiDi)->parasSize, \
333 TRUE, (length)*sizeof(Para))
334
b75a7d8f
A
335#define getInitialRunsMemory(pBiDi, length) \
336 ubidi_getMemory((void **)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \
337 TRUE, (length)*sizeof(Run))
338
339#endif
340
341#endif