1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 ******************************************************************************
6 * Copyright (C) 1999-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 ******************************************************************************
10 * file name: ubidiimp.h
12 * tab size: 8 (not used)
15 * created on: 1999aug06
16 * created by: Markus W. Scherer, updated by Matitiahu Allouche
22 #include "unicode/utypes.h"
23 #include "unicode/ubidi.h"
24 #include "unicode/uchar.h"
25 #include "ubidi_props.h"
27 /* miscellaneous definitions ---------------------------------------------- */
29 typedef uint8_t DirProp
;
30 typedef uint32_t Flags
;
32 /* Comparing the description of the BiDi algorithm with this implementation
33 is easier with the same names for the BiDi types in the code as there.
34 See UCharDirection in uchar.h .
37 L
= U_LEFT_TO_RIGHT
, /* 0 */
38 R
= U_RIGHT_TO_LEFT
, /* 1 */
39 EN
= U_EUROPEAN_NUMBER
, /* 2 */
40 ES
= U_EUROPEAN_NUMBER_SEPARATOR
, /* 3 */
41 ET
= U_EUROPEAN_NUMBER_TERMINATOR
, /* 4 */
42 AN
= U_ARABIC_NUMBER
, /* 5 */
43 CS
= U_COMMON_NUMBER_SEPARATOR
, /* 6 */
44 B
= U_BLOCK_SEPARATOR
, /* 7 */
45 S
= U_SEGMENT_SEPARATOR
, /* 8 */
46 WS
= U_WHITE_SPACE_NEUTRAL
, /* 9 */
47 ON
= U_OTHER_NEUTRAL
, /* 10 */
48 LRE
=U_LEFT_TO_RIGHT_EMBEDDING
, /* 11 */
49 LRO
=U_LEFT_TO_RIGHT_OVERRIDE
, /* 12 */
50 AL
= U_RIGHT_TO_LEFT_ARABIC
, /* 13 */
51 RLE
=U_RIGHT_TO_LEFT_EMBEDDING
, /* 14 */
52 RLO
=U_RIGHT_TO_LEFT_OVERRIDE
, /* 15 */
53 PDF
=U_POP_DIRECTIONAL_FORMAT
, /* 16 */
54 NSM
=U_DIR_NON_SPACING_MARK
, /* 17 */
55 BN
= U_BOUNDARY_NEUTRAL
, /* 18 */
56 FSI
=U_FIRST_STRONG_ISOLATE
, /* 19 */
57 LRI
=U_LEFT_TO_RIGHT_ISOLATE
, /* 20 */
58 RLI
=U_RIGHT_TO_LEFT_ISOLATE
, /* 21 */
59 PDI
=U_POP_DIRECTIONAL_ISOLATE
, /* 22 */
60 ENL
, /* EN after W7 */ /* 23 */
61 ENR
, /* EN not subject to W7 */ /* 24 */
65 /* Sometimes, bit values are more appropriate
66 to deal with directionality properties.
67 Abbreviations in these macro names refer to names
68 used in the BiDi algorithm.
70 #define DIRPROP_FLAG(dir) (1UL<<(dir))
71 #define PURE_DIRPROP(prop) ((prop)&~0xE0) ?????????????????????????
73 /* special flag for multiple runs from explicit embedding codes */
74 #define DIRPROP_FLAG_MULTI_RUNS (1UL<<31)
76 /* are there any characters that are LTR or RTL? */
77 #define MASK_LTR (DIRPROP_FLAG(L)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(ENL)|DIRPROP_FLAG(ENR)|DIRPROP_FLAG(AN)|DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)|DIRPROP_FLAG(LRI))
78 #define MASK_RTL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)|DIRPROP_FLAG(RLI))
79 #define MASK_R_AL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL))
80 #define MASK_STRONG_EN_AN (DIRPROP_FLAG(L)|DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN))
82 /* explicit embedding codes */
83 #define MASK_EXPLICIT (DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)|DIRPROP_FLAG(PDF))
85 /* explicit isolate codes */
86 #define MASK_ISO (DIRPROP_FLAG(LRI)|DIRPROP_FLAG(RLI)|DIRPROP_FLAG(FSI)|DIRPROP_FLAG(PDI))
88 #define MASK_BN_EXPLICIT (DIRPROP_FLAG(BN)|MASK_EXPLICIT)
90 /* paragraph and segment separators */
91 #define MASK_B_S (DIRPROP_FLAG(B)|DIRPROP_FLAG(S))
93 /* all types that are counted as White Space or Neutral in some steps */
94 #define MASK_WS (MASK_B_S|DIRPROP_FLAG(WS)|MASK_BN_EXPLICIT|MASK_ISO)
96 /* types that are neutrals or could becomes neutrals in (Wn) */
97 #define MASK_POSSIBLE_N (DIRPROP_FLAG(ON)|DIRPROP_FLAG(CS)|DIRPROP_FLAG(ES)|DIRPROP_FLAG(ET)|MASK_WS)
100 * These types may be changed to "e",
101 * the embedding type (L or R) of the run,
102 * in the BiDi algorithm (N2)
104 #define MASK_EMBEDDING (DIRPROP_FLAG(NSM)|MASK_POSSIBLE_N)
106 /* the dirProp's L and R are defined to 0 and 1 values in UCharDirection */
107 #define GET_LR_FROM_LEVEL(level) ((DirProp)((level)&1))
109 #define IS_DEFAULT_LEVEL(level) ((level)>=0xfe)
112 * The following bit is used for the directional isolate status.
113 * Stack entries corresponding to isolate sequences are greater than ISOLATE.
115 #define ISOLATE 0x0100
118 ubidi_getParaLevelAtIndex(const UBiDi
*pBiDi
, int32_t index
);
120 #define GET_PARALEVEL(ubidi, index) \
121 ((UBiDiLevel)(!(ubidi)->defaultParaLevel || (index)<(ubidi)->paras[0].limit ? \
122 (ubidi)->paraLevel : ubidi_getParaLevelAtIndex((ubidi), (index))))
124 /* number of paras entries allocated initially without malloc */
125 #define SIMPLE_PARAS_COUNT 10
126 /* number of isolate entries allocated initially without malloc */
127 #define SIMPLE_ISOLATES_COUNT 5
128 /* number of isolate run entries for paired brackets allocated initially without malloc */
129 #define SIMPLE_OPENINGS_COUNT 20
134 /* Run structure for reordering --------------------------------------------- */
142 typedef struct Para
{
147 enum { /* flags for Opening.flags */
148 FOUND_L
=DIRPROP_FLAG(L
),
149 FOUND_R
=DIRPROP_FLAG(R
)
152 typedef struct Opening
{
153 int32_t position
; /* position of opening bracket */
154 int32_t match
; /* matching char or -position of closing bracket */
155 int32_t contextPos
; /* position of last strong char found before opening */
156 uint16_t flags
; /* bits for L or R/AL found within the pair */
157 UBiDiDirection contextDir
; /* L or R according to last strong char before opening */
158 uint8_t filler
; /* to complete a nice multiple of 4 chars */
161 typedef struct IsoRun
{
162 int32_t contextPos
; /* position of char determining context */
163 uint16_t start
; /* index of first opening entry for this run */
164 uint16_t limit
; /* index after last opening entry for this run */
165 UBiDiLevel level
; /* level of this run */
166 DirProp lastStrong
; /* bidi class of last strong char found in this run */
167 DirProp lastBase
; /* bidi class of last base char found in this run */
168 UBiDiDirection contextDir
; /* L or R to use as context for following openings */
171 typedef struct BracketData
{
173 /* array of opening entries which should be enough in most cases; no malloc() */
174 Opening simpleOpenings
[SIMPLE_OPENINGS_COUNT
];
175 Opening
*openings
; /* pointer to current array of entries */
176 int32_t openingsCount
; /* number of allocated entries */
177 int32_t isoRunLast
; /* index of last used entry */
178 /* array of nested isolated sequence entries; can never excess UBIDI_MAX_EXPLICIT_LEVEL
179 + 1 for index 0, + 1 for before the first isolated sequence */
180 IsoRun isoRuns
[UBIDI_MAX_EXPLICIT_LEVEL
+2];
181 UBool isNumbersSpecial
; /* reordering mode for NUMBERS_SPECIAL */
184 typedef struct Isolate
{
192 int32_t logicalStart
, /* first character of the run; b31 indicates even/odd level */
193 visualLimit
, /* last visual position of the run +1 */
194 insertRemove
; /* if >0, flags for inserting LRM/RLM before/after run,
195 if <0, count of bidi controls within run */
198 /* in a Run, logicalStart will get this bit set if the run level is odd */
199 #define INDEX_ODD_BIT (1UL<<31)
201 #define MAKE_INDEX_ODD_PAIR(index, level) ((index)|((int32_t)(level)<<31))
202 #define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((int32_t)(level)<<31))
203 #define REMOVE_ODD_BIT(x) ((x)&=~INDEX_ODD_BIT)
205 #define GET_INDEX(x) ((x)&~INDEX_ODD_BIT)
206 #define GET_ODD_BIT(x) ((uint32_t)(x)>>31)
207 #define IS_ODD_RUN(x) ((UBool)(((x)&INDEX_ODD_BIT)!=0))
208 #define IS_EVEN_RUN(x) ((UBool)(((x)&INDEX_ODD_BIT)==0))
211 ubidi_getRuns(UBiDi
*pBiDi
, UErrorCode
*pErrorCode
);
213 /** BiDi control code points */
230 #define IS_BIDI_CONTROL_CHAR(c) (((uint32_t)(c)&0xfffffffc)==ZWNJ_CHAR || (uint32_t)((c)-LRE_CHAR)<5 || (uint32_t)((c)-LRI_CHAR)<4)
232 /* InsertPoints structure for noting where to put BiDi marks ---------------- */
234 typedef struct Point
{
235 int32_t pos
; /* position in text */
236 int32_t flag
; /* flag for LRM/RLM, before/after */
239 typedef struct InsertPoints
{
240 int32_t capacity
; /* number of points allocated */
241 int32_t size
; /* number of points used */
242 int32_t confirmed
; /* number of points confirmed */
243 UErrorCode errorCode
; /* for eventual memory shortage */
244 Point
*points
; /* pointer to array of points */
248 /* UBiDi structure ----------------------------------------------------------- */
251 /* pointer to parent paragraph object (pointer to self if this object is
252 * a paragraph object); set to NULL in a newly opened object; set to a
253 * real value after a successful execution of ubidi_setPara or ubidi_setLine
255 const UBiDi
* pParaBiDi
;
257 /* alias pointer to the current text */
260 /* length of the current text */
261 int32_t originalLength
;
263 /* if the UBIDI_OPTION_STREAMING option is set, this is the length
264 * of text actually processed by ubidi_setPara, which may be shorter than
265 * the original length.
266 * Otherwise, it is identical to the original length.
270 /* if the UBIDI_OPTION_REMOVE_CONTROLS option is set, and/or
271 * marks are allowed to be inserted in one of the reordering mode, the
272 * length of the result string may be different from the processed length.
274 int32_t resultLength
;
276 /* memory sizes in bytes */
277 int32_t dirInsertSize
, dirPropsSize
, levelsSize
, openingsSize
, parasSize
, runsSize
, isolatesSize
;
279 /* allocated memory */
280 uint16_t *dirInsertMemory
;
281 DirProp
*dirPropsMemory
;
282 UBiDiLevel
*levelsMemory
;
283 Opening
*openingsMemory
;
286 Isolate
*isolatesMemory
;
288 /* indicators for whether memory may be allocated after ubidi_open() */
289 UBool mayAllocateText
, mayAllocateRuns
;
291 /* arrays with one value per text-character */
296 /* are we performing an approximation of the "inverse BiDi" algorithm? */
299 /* are we using the basic algorithm or its variation? */
300 UBiDiReorderingMode reorderingMode
;
302 /* UBIDI_REORDER_xxx values must be ordered so that all the regular
303 * logical to visual modes come first, and all inverse BiDi modes
306 #define UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL UBIDI_REORDER_NUMBERS_SPECIAL
308 /* bitmask for reordering options */
309 uint32_t reorderingOptions
;
311 /* must block separators receive level 0? */
312 UBool orderParagraphsLTR
;
314 /* the paragraph level */
315 UBiDiLevel paraLevel
;
316 /* original paraLevel when contextual */
317 /* must be one of UBIDI_DEFAULT_xxx or 0 if not contextual */
318 UBiDiLevel defaultParaLevel
;
321 const UChar
*prologue
;
323 const UChar
*epilogue
;
326 /* the following is set in ubidi_setPara, used in processPropertySeq */
327 const struct ImpTabPair
* pImpTabPair
; /* pointer to levels state table pair */
329 /* the overall paragraph or line directionality - see UBiDiDirection */
330 UBiDiDirection direction
;
332 /* flags is a bit set for which directional properties are in the text */
335 /* lastArabicPos is index to the last AL in the text, -1 if none */
336 int32_t lastArabicPos
;
338 /* characters after trailingWSStart are WS and are */
339 /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */
340 int32_t trailingWSStart
;
342 /* fields for paragraph handling */
343 int32_t paraCount
; /* set in getDirProps() */
344 /* filled in getDirProps() */
347 /* for relatively short text, we only need a tiny array of paras (no malloc()) */
348 Para simpleParas
[SIMPLE_PARAS_COUNT
];
350 /* fields for line reordering */
351 int32_t runCount
; /* ==-1: runs not set up yet */
354 /* for non-mixed text, we only need a tiny array of runs (no malloc()) */
357 /* maximum or current nesting depth of isolate sequences */
358 /* Within resolveExplicitLevels() and checkExplicitLevels(), this is the maximal
360 Within resolveImplicitLevels(), this is the index of the current isolates
362 int32_t isolateCount
;
365 /* for simple text, have a small stack (no malloc()) */
366 Isolate simpleIsolates
[SIMPLE_ISOLATES_COUNT
];
368 /* for inverse Bidi with insertion of directional marks */
369 InsertPoints insertPoints
;
371 /* for option UBIDI_OPTION_REMOVE_CONTROLS */
372 int32_t controlCount
;
374 /* for Bidi class callback */
375 UBiDiClassCallback
*fnClassCallback
; /* action pointer */
376 const void *coClassCallback
; /* context pointer */
379 #define IS_VALID_PARA(x) ((x) && ((x)->pParaBiDi==(x)))
380 #define IS_VALID_PARA_OR_LINE(x) ((x) && ((x)->pParaBiDi==(x) || (((x)->pParaBiDi) && (x)->pParaBiDi->pParaBiDi==(x)->pParaBiDi)))
383 uint16_t *dirInsertMemory
;
384 DirProp
*dirPropsMemory
;
385 UBiDiLevel
*levelsMemory
;
386 Opening
*openingsMemory
;
389 Isolate
*isolatesMemory
;
390 } BidiMemoryForAllocation
;
392 /* Macros for initial checks at function entry */
393 #define RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrcode, retvalue) \
394 if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return retvalue
395 #define RETURN_IF_NOT_VALID_PARA(bidi, errcode, retvalue) \
396 if(!IS_VALID_PARA(bidi)) { \
397 errcode=U_INVALID_STATE_ERROR; \
400 #define RETURN_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode, retvalue) \
401 if(!IS_VALID_PARA_OR_LINE(bidi)) { \
402 errcode=U_INVALID_STATE_ERROR; \
405 #define RETURN_IF_BAD_RANGE(arg, start, limit, errcode, retvalue) \
406 if((arg)<(start) || (arg)>=(limit)) { \
407 (errcode)=U_ILLEGAL_ARGUMENT_ERROR; \
411 #define RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrcode) \
412 if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return
413 #define RETURN_VOID_IF_NOT_VALID_PARA(bidi, errcode) \
414 if(!IS_VALID_PARA(bidi)) { \
415 errcode=U_INVALID_STATE_ERROR; \
418 #define RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode) \
419 if(!IS_VALID_PARA_OR_LINE(bidi)) { \
420 errcode=U_INVALID_STATE_ERROR; \
423 #define RETURN_VOID_IF_BAD_RANGE(arg, start, limit, errcode) \
424 if((arg)<(start) || (arg)>=(limit)) { \
425 (errcode)=U_ILLEGAL_ARGUMENT_ERROR; \
429 /* helper function to (re)allocate memory if allowed */
431 ubidi_getMemory(BidiMemoryForAllocation
*pMemory
, int32_t *pSize
, UBool mayAllocate
, int32_t sizeNeeded
);
433 /* helper macros for each allocated array in UBiDi */
434 #define getDirInsertMemory(pBiDi, length) \
435 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirInsertMemory, &(pBiDi)->dirInsertSize, \
436 (pBiDi)->mayAllocateText, (length)*sizeof(uint16_t))
438 #define getDirPropsMemory(pBiDi, length) \
439 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \
440 (pBiDi)->mayAllocateText, (length))
442 #define getLevelsMemory(pBiDi, length) \
443 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \
444 (pBiDi)->mayAllocateText, (length))
446 #define getRunsMemory(pBiDi, length) \
447 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \
448 (pBiDi)->mayAllocateRuns, (length)*sizeof(Run))
450 /* additional macros used by ubidi_open() - always allow allocation */
451 #define getInitialDirInsertMemory(pBiDi, length) \
452 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirInsertMemory, &(pBiDi)->dirInsertSize, \
455 #define getInitialDirPropsMemory(pBiDi, length) \
456 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \
459 #define getInitialLevelsMemory(pBiDi, length) \
460 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \
463 #define getInitialOpeningsMemory(pBiDi, length) \
464 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->openingsMemory, &(pBiDi)->openingsSize, \
465 TRUE, (length)*sizeof(Opening))
467 #define getInitialParasMemory(pBiDi, length) \
468 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->parasMemory, &(pBiDi)->parasSize, \
469 TRUE, (length)*sizeof(Para))
471 #define getInitialRunsMemory(pBiDi, length) \
472 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \
473 TRUE, (length)*sizeof(Run))
475 #define getInitialIsolatesMemory(pBiDi, length) \
476 ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->isolatesMemory, &(pBiDi)->isolatesSize, \
477 TRUE, (length)*sizeof(Isolate))