]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/ubidiimp.h
ICU-59131.0.1.tar.gz
[apple/icu.git] / icuSources / common / ubidiimp.h
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 *
6 * Copyright (C) 1999-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 ******************************************************************************
10 * file name: ubidiimp.h
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 1999aug06
16 * created by: Markus W. Scherer, updated by Matitiahu Allouche
17 */
18
19 #ifndef UBIDIIMP_H
20 #define UBIDIIMP_H
21
22 #include "unicode/utypes.h"
23 #include "unicode/ubidi.h"
24 #include "unicode/uchar.h"
25 #include "ubidi_props.h"
26
27 /* miscellaneous definitions ---------------------------------------------- */
28
29 typedef uint8_t DirProp;
30 typedef uint32_t Flags;
31
32 /* Comparing the description of the BiDi algorithm with this implementation
33 is easier with the same names for the BiDi types in the code as there.
34 See UCharDirection in uchar.h .
35 */
36 enum {
37 L= U_LEFT_TO_RIGHT, /* 0 */
38 R= U_RIGHT_TO_LEFT, /* 1 */
39 EN= U_EUROPEAN_NUMBER, /* 2 */
40 ES= U_EUROPEAN_NUMBER_SEPARATOR, /* 3 */
41 ET= U_EUROPEAN_NUMBER_TERMINATOR, /* 4 */
42 AN= U_ARABIC_NUMBER, /* 5 */
43 CS= U_COMMON_NUMBER_SEPARATOR, /* 6 */
44 B= U_BLOCK_SEPARATOR, /* 7 */
45 S= U_SEGMENT_SEPARATOR, /* 8 */
46 WS= U_WHITE_SPACE_NEUTRAL, /* 9 */
47 ON= U_OTHER_NEUTRAL, /* 10 */
48 LRE=U_LEFT_TO_RIGHT_EMBEDDING, /* 11 */
49 LRO=U_LEFT_TO_RIGHT_OVERRIDE, /* 12 */
50 AL= U_RIGHT_TO_LEFT_ARABIC, /* 13 */
51 RLE=U_RIGHT_TO_LEFT_EMBEDDING, /* 14 */
52 RLO=U_RIGHT_TO_LEFT_OVERRIDE, /* 15 */
53 PDF=U_POP_DIRECTIONAL_FORMAT, /* 16 */
54 NSM=U_DIR_NON_SPACING_MARK, /* 17 */
55 BN= U_BOUNDARY_NEUTRAL, /* 18 */
56 FSI=U_FIRST_STRONG_ISOLATE, /* 19 */
57 LRI=U_LEFT_TO_RIGHT_ISOLATE, /* 20 */
58 RLI=U_RIGHT_TO_LEFT_ISOLATE, /* 21 */
59 PDI=U_POP_DIRECTIONAL_ISOLATE, /* 22 */
60 ENL, /* EN after W7 */ /* 23 */
61 ENR, /* EN not subject to W7 */ /* 24 */
62 dirPropCount
63 };
64
65 /* Sometimes, bit values are more appropriate
66 to deal with directionality properties.
67 Abbreviations in these macro names refer to names
68 used in the BiDi algorithm.
69 */
70 #define DIRPROP_FLAG(dir) (1UL<<(dir))
71 #define PURE_DIRPROP(prop) ((prop)&~0xE0) ?????????????????????????
72
73 /* special flag for multiple runs from explicit embedding codes */
74 #define DIRPROP_FLAG_MULTI_RUNS (1UL<<31)
75
76 /* are there any characters that are LTR or RTL? */
77 #define MASK_LTR (DIRPROP_FLAG(L)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(ENL)|DIRPROP_FLAG(ENR)|DIRPROP_FLAG(AN)|DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)|DIRPROP_FLAG(LRI))
78 #define MASK_RTL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)|DIRPROP_FLAG(RLI))
79 #define MASK_R_AL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL))
80 #define MASK_STRONG_EN_AN (DIRPROP_FLAG(L)|DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN))
81
82 /* explicit embedding codes */
83 #define MASK_EXPLICIT (DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)|DIRPROP_FLAG(PDF))
84
85 /* explicit isolate codes */
86 #define MASK_ISO (DIRPROP_FLAG(LRI)|DIRPROP_FLAG(RLI)|DIRPROP_FLAG(FSI)|DIRPROP_FLAG(PDI))
87
88 #define MASK_BN_EXPLICIT (DIRPROP_FLAG(BN)|MASK_EXPLICIT)
89
90 /* paragraph and segment separators */
91 #define MASK_B_S (DIRPROP_FLAG(B)|DIRPROP_FLAG(S))
92
93 /* all types that are counted as White Space or Neutral in some steps */
94 #define MASK_WS (MASK_B_S|DIRPROP_FLAG(WS)|MASK_BN_EXPLICIT|MASK_ISO)
95
96 /* types that are neutrals or could becomes neutrals in (Wn) */
97 #define MASK_POSSIBLE_N (DIRPROP_FLAG(ON)|DIRPROP_FLAG(CS)|DIRPROP_FLAG(ES)|DIRPROP_FLAG(ET)|MASK_WS)
98
99 /*
100 * These types may be changed to "e",
101 * the embedding type (L or R) of the run,
102 * in the BiDi algorithm (N2)
103 */
104 #define MASK_EMBEDDING (DIRPROP_FLAG(NSM)|MASK_POSSIBLE_N)
105
106 /* the dirProp's L and R are defined to 0 and 1 values in UCharDirection */
107 #define GET_LR_FROM_LEVEL(level) ((DirProp)((level)&1))
108
109 #define IS_DEFAULT_LEVEL(level) ((level)>=0xfe)
110
111 /*
112 * The following bit is used for the directional isolate status.
113 * Stack entries corresponding to isolate sequences are greater than ISOLATE.
114 */
115 #define ISOLATE 0x0100
116
117 U_CFUNC UBiDiLevel
118 ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t index);
119
120 #define GET_PARALEVEL(ubidi, index) \
121 ((UBiDiLevel)(!(ubidi)->defaultParaLevel || (index)<(ubidi)->paras[0].limit ? \
122 (ubidi)->paraLevel : ubidi_getParaLevelAtIndex((ubidi), (index))))
123
124 /* number of paras entries allocated initially without malloc */
125 #define SIMPLE_PARAS_COUNT 10
126 /* number of isolate entries allocated initially without malloc */
127 #define SIMPLE_ISOLATES_COUNT 5
128 /* number of isolate run entries for paired brackets allocated initially without malloc */
129 #define SIMPLE_OPENINGS_COUNT 20
130
131 #define CR 0x000D
132 #define LF 0x000A
133
134 /* Run structure for reordering --------------------------------------------- */
135 enum {
136 LRM_BEFORE=1,
137 LRM_AFTER=2,
138 RLM_BEFORE=4,
139 RLM_AFTER=8
140 };
141
142 typedef struct Para {
143 int32_t limit;
144 int32_t level;
145 } Para;
146
147 enum { /* flags for Opening.flags */
148 FOUND_L=DIRPROP_FLAG(L),
149 FOUND_R=DIRPROP_FLAG(R)
150 };
151
152 typedef struct Opening {
153 int32_t position; /* position of opening bracket */
154 int32_t match; /* matching char or -position of closing bracket */
155 int32_t contextPos; /* position of last strong char found before opening */
156 uint16_t flags; /* bits for L or R/AL found within the pair */
157 UBiDiDirection contextDir; /* L or R according to last strong char before opening */
158 uint8_t filler; /* to complete a nice multiple of 4 chars */
159 } Opening;
160
161 typedef struct IsoRun {
162 int32_t contextPos; /* position of char determining context */
163 uint16_t start; /* index of first opening entry for this run */
164 uint16_t limit; /* index after last opening entry for this run */
165 UBiDiLevel level; /* level of this run */
166 DirProp lastStrong; /* bidi class of last strong char found in this run */
167 DirProp lastBase; /* bidi class of last base char found in this run */
168 UBiDiDirection contextDir; /* L or R to use as context for following openings */
169 } IsoRun;
170
171 typedef struct BracketData {
172 UBiDi *pBiDi;
173 /* array of opening entries which should be enough in most cases; no malloc() */
174 Opening simpleOpenings[SIMPLE_OPENINGS_COUNT];
175 Opening *openings; /* pointer to current array of entries */
176 int32_t openingsCount; /* number of allocated entries */
177 int32_t isoRunLast; /* index of last used entry */
178 /* array of nested isolated sequence entries; can never excess UBIDI_MAX_EXPLICIT_LEVEL
179 + 1 for index 0, + 1 for before the first isolated sequence */
180 IsoRun isoRuns[UBIDI_MAX_EXPLICIT_LEVEL+2];
181 UBool isNumbersSpecial; /* reordering mode for NUMBERS_SPECIAL */
182 } BracketData;
183
184 typedef struct Isolate {
185 int32_t startON;
186 int32_t start1;
187 int32_t state;
188 int16_t stateImp;
189 } Isolate;
190
191 typedef struct Run {
192 int32_t logicalStart, /* first character of the run; b31 indicates even/odd level */
193 visualLimit, /* last visual position of the run +1 */
194 insertRemove; /* if >0, flags for inserting LRM/RLM before/after run,
195 if <0, count of bidi controls within run */
196 } Run;
197
198 /* in a Run, logicalStart will get this bit set if the run level is odd */
199 #define INDEX_ODD_BIT (1UL<<31)
200
201 #define MAKE_INDEX_ODD_PAIR(index, level) ((index)|((int32_t)(level)<<31))
202 #define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((int32_t)(level)<<31))
203 #define REMOVE_ODD_BIT(x) ((x)&=~INDEX_ODD_BIT)
204
205 #define GET_INDEX(x) ((x)&~INDEX_ODD_BIT)
206 #define GET_ODD_BIT(x) ((uint32_t)(x)>>31)
207 #define IS_ODD_RUN(x) ((UBool)(((x)&INDEX_ODD_BIT)!=0))
208 #define IS_EVEN_RUN(x) ((UBool)(((x)&INDEX_ODD_BIT)==0))
209
210 U_CFUNC UBool
211 ubidi_getRuns(UBiDi *pBiDi, UErrorCode *pErrorCode);
212
213 /** BiDi control code points */
214 enum {
215 ZWNJ_CHAR=0x200c,
216 ZWJ_CHAR,
217 LRM_CHAR,
218 RLM_CHAR,
219 LRE_CHAR=0x202a,
220 RLE_CHAR,
221 PDF_CHAR,
222 LRO_CHAR,
223 RLO_CHAR,
224 LRI_CHAR=0x2066,
225 RLI_CHAR,
226 FSI_CHAR,
227 PDI_CHAR
228 };
229
230 #define IS_BIDI_CONTROL_CHAR(c) (((uint32_t)(c)&0xfffffffc)==ZWNJ_CHAR || (uint32_t)((c)-LRE_CHAR)<5 || (uint32_t)((c)-LRI_CHAR)<4)
231
232 /* InsertPoints structure for noting where to put BiDi marks ---------------- */
233
234 typedef struct Point {
235 int32_t pos; /* position in text */
236 int32_t flag; /* flag for LRM/RLM, before/after */
237 } Point;
238
239 typedef struct InsertPoints {
240 int32_t capacity; /* number of points allocated */
241 int32_t size; /* number of points used */
242 int32_t confirmed; /* number of points confirmed */
243 UErrorCode errorCode; /* for eventual memory shortage */
244 Point *points; /* pointer to array of points */
245 } InsertPoints;
246
247
248 /* UBiDi structure ----------------------------------------------------------- */
249
250 struct UBiDi {
251 /* pointer to parent paragraph object (pointer to self if this object is
252 * a paragraph object); set to NULL in a newly opened object; set to a
253 * real value after a successful execution of ubidi_setPara or ubidi_setLine
254 */
255 const UBiDi * pParaBiDi;
256
257 const UBiDiProps *bdp;
258
259 /* alias pointer to the current text */
260 const UChar *text;
261
262 /* length of the current text */
263 int32_t originalLength;
264
265 /* if the UBIDI_OPTION_STREAMING option is set, this is the length
266 * of text actually processed by ubidi_setPara, which may be shorter than
267 * the original length.
268 * Otherwise, it is identical to the original length.
269 */
270 int32_t length;
271
272 /* if the UBIDI_OPTION_REMOVE_CONTROLS option is set, and/or
273 * marks are allowed to be inserted in one of the reordering mode, the
274 * length of the result string may be different from the processed length.
275 */
276 int32_t resultLength;
277
278 /* memory sizes in bytes */
279 int32_t dirInsertSize, dirPropsSize, levelsSize, openingsSize, parasSize, runsSize, isolatesSize;
280
281 /* allocated memory */
282 uint16_t *dirInsertMemory;
283 DirProp *dirPropsMemory;
284 UBiDiLevel *levelsMemory;
285 Opening *openingsMemory;
286 Para *parasMemory;
287 Run *runsMemory;
288 Isolate *isolatesMemory;
289
290 /* indicators for whether memory may be allocated after ubidi_open() */
291 UBool mayAllocateText, mayAllocateRuns;
292
293 /* arrays with one value per text-character */
294 uint16_t *dirInsert;
295 DirProp *dirProps;
296 UBiDiLevel *levels;
297
298 /* are we performing an approximation of the "inverse BiDi" algorithm? */
299 UBool isInverse;
300
301 /* are we using the basic algorithm or its variation? */
302 UBiDiReorderingMode reorderingMode;
303
304 /* UBIDI_REORDER_xxx values must be ordered so that all the regular
305 * logical to visual modes come first, and all inverse BiDi modes
306 * come last.
307 */
308 #define UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL UBIDI_REORDER_NUMBERS_SPECIAL
309
310 /* bitmask for reordering options */
311 uint32_t reorderingOptions;
312
313 /* must block separators receive level 0? */
314 UBool orderParagraphsLTR;
315
316 /* the paragraph level */
317 UBiDiLevel paraLevel;
318 /* original paraLevel when contextual */
319 /* must be one of UBIDI_DEFAULT_xxx or 0 if not contextual */
320 UBiDiLevel defaultParaLevel;
321
322 /* context data */
323 const UChar *prologue;
324 int32_t proLength;
325 const UChar *epilogue;
326 int32_t epiLength;
327
328 /* the following is set in ubidi_setPara, used in processPropertySeq */
329 const struct ImpTabPair * pImpTabPair; /* pointer to levels state table pair */
330
331 /* the overall paragraph or line directionality - see UBiDiDirection */
332 UBiDiDirection direction;
333
334 /* flags is a bit set for which directional properties are in the text */
335 Flags flags;
336
337 /* lastArabicPos is index to the last AL in the text, -1 if none */
338 int32_t lastArabicPos;
339
340 /* characters after trailingWSStart are WS and are */
341 /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */
342 int32_t trailingWSStart;
343
344 /* fields for paragraph handling */
345 int32_t paraCount; /* set in getDirProps() */
346 /* filled in getDirProps() */
347 Para *paras;
348
349 /* for relatively short text, we only need a tiny array of paras (no malloc()) */
350 Para simpleParas[SIMPLE_PARAS_COUNT];
351
352 /* fields for line reordering */
353 int32_t runCount; /* ==-1: runs not set up yet */
354 Run *runs;
355
356 /* for non-mixed text, we only need a tiny array of runs (no malloc()) */
357 Run simpleRuns[1];
358
359 /* maximum or current nesting depth of isolate sequences */
360 /* Within resolveExplicitLevels() and checkExplicitLevels(), this is the maximal
361 nesting encountered.
362 Within resolveImplicitLevels(), this is the index of the current isolates
363 stack entry. */
364 int32_t isolateCount;
365 Isolate *isolates;
366
367 /* for simple text, have a small stack (no malloc()) */
368 Isolate simpleIsolates[SIMPLE_ISOLATES_COUNT];
369
370 /* for inverse Bidi with insertion of directional marks */
371 InsertPoints insertPoints;
372
373 /* for option UBIDI_OPTION_REMOVE_CONTROLS */
374 int32_t controlCount;
375
376 /* for Bidi class callback */
377 UBiDiClassCallback *fnClassCallback; /* action pointer */
378 const void *coClassCallback; /* context pointer */
379 };
380
381 #define IS_VALID_PARA(x) ((x) && ((x)->pParaBiDi==(x)))
382 #define IS_VALID_PARA_OR_LINE(x) ((x) && ((x)->pParaBiDi==(x) || (((x)->pParaBiDi) && (x)->pParaBiDi->pParaBiDi==(x)->pParaBiDi)))
383
384 typedef union {
385 uint16_t *dirInsertMemory;
386 DirProp *dirPropsMemory;
387 UBiDiLevel *levelsMemory;
388 Opening *openingsMemory;
389 Para *parasMemory;
390 Run *runsMemory;
391 Isolate *isolatesMemory;
392 } BidiMemoryForAllocation;
393
394 /* Macros for initial checks at function entry */
395 #define RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrcode, retvalue) \
396 if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return retvalue
397 #define RETURN_IF_NOT_VALID_PARA(bidi, errcode, retvalue) \
398 if(!IS_VALID_PARA(bidi)) { \
399 errcode=U_INVALID_STATE_ERROR; \
400 return retvalue; \
401 }
402 #define RETURN_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode, retvalue) \
403 if(!IS_VALID_PARA_OR_LINE(bidi)) { \
404 errcode=U_INVALID_STATE_ERROR; \
405 return retvalue; \
406 }
407 #define RETURN_IF_BAD_RANGE(arg, start, limit, errcode, retvalue) \
408 if((arg)<(start) || (arg)>=(limit)) { \
409 (errcode)=U_ILLEGAL_ARGUMENT_ERROR; \
410 return retvalue; \
411 }
412
413 #define RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrcode) \
414 if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return
415 #define RETURN_VOID_IF_NOT_VALID_PARA(bidi, errcode) \
416 if(!IS_VALID_PARA(bidi)) { \
417 errcode=U_INVALID_STATE_ERROR; \
418 return; \
419 }
420 #define RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode) \
421 if(!IS_VALID_PARA_OR_LINE(bidi)) { \
422 errcode=U_INVALID_STATE_ERROR; \
423 return; \
424 }
425 #define RETURN_VOID_IF_BAD_RANGE(arg, start, limit, errcode) \
426 if((arg)<(start) || (arg)>=(limit)) { \
427 (errcode)=U_ILLEGAL_ARGUMENT_ERROR; \
428 return; \
429 }
430
431 /* helper function to (re)allocate memory if allowed */
432 U_CFUNC UBool
433 ubidi_getMemory(BidiMemoryForAllocation *pMemory, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded);
434
435 /* helper macros for each allocated array in UBiDi */
436 #define getDirInsertMemory(pBiDi, length) \
437 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirInsertMemory, &(pBiDi)->dirInsertSize, \
438 (pBiDi)->mayAllocateText, (length)*sizeof(uint16_t))
439
440 #define getDirPropsMemory(pBiDi, length) \
441 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \
442 (pBiDi)->mayAllocateText, (length))
443
444 #define getLevelsMemory(pBiDi, length) \
445 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \
446 (pBiDi)->mayAllocateText, (length))
447
448 #define getRunsMemory(pBiDi, length) \
449 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \
450 (pBiDi)->mayAllocateRuns, (length)*sizeof(Run))
451
452 /* additional macros used by ubidi_open() - always allow allocation */
453 #define getInitialDirInsertMemory(pBiDi, length) \
454 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirInsertMemory, &(pBiDi)->dirInsertSize, \
455 TRUE, (length))
456
457 #define getInitialDirPropsMemory(pBiDi, length) \
458 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \
459 TRUE, (length))
460
461 #define getInitialLevelsMemory(pBiDi, length) \
462 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \
463 TRUE, (length))
464
465 #define getInitialOpeningsMemory(pBiDi, length) \
466 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->openingsMemory, &(pBiDi)->openingsSize, \
467 TRUE, (length)*sizeof(Opening))
468
469 #define getInitialParasMemory(pBiDi, length) \
470 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->parasMemory, &(pBiDi)->parasSize, \
471 TRUE, (length)*sizeof(Para))
472
473 #define getInitialRunsMemory(pBiDi, length) \
474 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \
475 TRUE, (length)*sizeof(Run))
476
477 #define getInitialIsolatesMemory(pBiDi, length) \
478 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->isolatesMemory, &(pBiDi)->isolatesSize, \
479 TRUE, (length)*sizeof(Isolate))
480
481 #endif