2 **********************************************************************
3 * Copyright (C) 1999-2016, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
9 * Modification History:
11 * Date Name Description
12 * 07/08/2002 Eric Mader Creation.
13 ******************************************************************************
16 #include "unicode/uscript.h"
20 #define PAREN_STACK_DEPTH 32
22 #define MOD(sp) ((sp) % PAREN_STACK_DEPTH)
23 #define LIMIT_INC(sp) (((sp) < PAREN_STACK_DEPTH)? (sp) + 1 : PAREN_STACK_DEPTH)
24 #define INC(sp,count) (MOD((sp) + (count)))
25 #define INC1(sp) (INC(sp, 1))
26 #define DEC(sp,count) (MOD((sp) + PAREN_STACK_DEPTH - (count)))
27 #define DEC1(sp) (DEC(sp, 1))
28 #define STACK_IS_EMPTY(scriptRun) ((scriptRun)->pushCount <= 0)
29 #define STACK_IS_NOT_EMPTY(scriptRun) (! STACK_IS_EMPTY(scriptRun))
30 #define TOP(scriptRun) ((scriptRun)->parenStack[(scriptRun)->parenSP])
31 #define SYNC_FIXUP(scriptRun) ((scriptRun)->fixupCount = 0)
33 struct ParenStackEntry
36 UScriptCode scriptCode
;
42 const UChar
*textArray
;
46 UScriptCode scriptCode
;
48 struct ParenStackEntry parenStack
[PAREN_STACK_DEPTH
];
54 static int8_t highBit(int32_t value
);
56 static const UChar32 pairedChars
[] = {
57 0x0028, 0x0029, /* ascii paired punctuation */
61 0x00ab, 0x00bb, /* guillemets */
62 0x2018, 0x2019, /* general punctuation */
65 0x3008, 0x3009, /* chinese paired punctuation */
76 static void push(UScriptRun
*scriptRun
, int32_t pairIndex
, UScriptCode scriptCode
)
78 scriptRun
->pushCount
= LIMIT_INC(scriptRun
->pushCount
);
79 scriptRun
->fixupCount
= LIMIT_INC(scriptRun
->fixupCount
);
81 scriptRun
->parenSP
= INC1(scriptRun
->parenSP
);
82 scriptRun
->parenStack
[scriptRun
->parenSP
].pairIndex
= pairIndex
;
83 scriptRun
->parenStack
[scriptRun
->parenSP
].scriptCode
= scriptCode
;
86 static void pop(UScriptRun
*scriptRun
)
88 if (STACK_IS_EMPTY(scriptRun
)) {
92 if (scriptRun
->fixupCount
> 0) {
93 scriptRun
->fixupCount
-= 1;
96 scriptRun
->pushCount
-= 1;
97 scriptRun
->parenSP
= DEC1(scriptRun
->parenSP
);
99 /* If the stack is now empty, reset the stack
100 pointers to their initial values.
102 if (STACK_IS_EMPTY(scriptRun
)) {
103 scriptRun
->parenSP
= -1;
107 static void fixup(UScriptRun
*scriptRun
, UScriptCode scriptCode
)
109 int32_t fixupSP
= DEC(scriptRun
->parenSP
, scriptRun
->fixupCount
);
111 while (scriptRun
->fixupCount
-- > 0) {
112 fixupSP
= INC1(fixupSP
);
113 scriptRun
->parenStack
[fixupSP
].scriptCode
= scriptCode
;
118 highBit(int32_t value
)
126 if (value
>= 1 << 16) {
131 if (value
>= 1 << 8) {
136 if (value
>= 1 << 4) {
141 if (value
>= 1 << 2) {
146 if (value
>= 1 << 1) {
155 getPairIndex(UChar32 ch
)
157 int32_t pairedCharCount
= UPRV_LENGTHOF(pairedChars
);
158 int32_t pairedCharPower
= 1 << highBit(pairedCharCount
);
159 int32_t pairedCharExtra
= pairedCharCount
- pairedCharPower
;
161 int32_t probe
= pairedCharPower
;
162 int32_t pairIndex
= 0;
164 if (ch
>= pairedChars
[pairedCharExtra
]) {
165 pairIndex
= pairedCharExtra
;
168 while (probe
> (1 << 0)) {
171 if (ch
>= pairedChars
[pairIndex
+ probe
]) {
176 if (pairedChars
[pairIndex
] != ch
) {
184 sameScript(UScriptCode scriptOne
, UScriptCode scriptTwo
)
186 return scriptOne
<= USCRIPT_INHERITED
|| scriptTwo
<= USCRIPT_INHERITED
|| scriptOne
== scriptTwo
;
189 U_CAPI UScriptRun
* U_EXPORT2
190 uscript_openRun(const UChar
*src
, int32_t length
, UErrorCode
*pErrorCode
)
192 UScriptRun
*result
= NULL
;
194 if (pErrorCode
== NULL
|| U_FAILURE(*pErrorCode
)) {
198 result
= uprv_malloc(sizeof (UScriptRun
));
200 if (result
== NULL
) {
201 *pErrorCode
= U_MEMORY_ALLOCATION_ERROR
;
205 uscript_setRunText(result
, src
, length
, pErrorCode
);
207 /* Release the UScriptRun if uscript_setRunText() returns an error */
208 if (U_FAILURE(*pErrorCode
)) {
216 U_CAPI
void U_EXPORT2
217 uscript_closeRun(UScriptRun
*scriptRun
)
219 if (scriptRun
!= NULL
) {
220 uprv_free(scriptRun
);
224 U_CAPI
void U_EXPORT2
225 uscript_resetRun(UScriptRun
*scriptRun
)
227 if (scriptRun
!= NULL
) {
228 scriptRun
->scriptStart
= 0;
229 scriptRun
->scriptLimit
= 0;
230 scriptRun
->scriptCode
= USCRIPT_INVALID_CODE
;
231 scriptRun
->parenSP
= -1;
232 scriptRun
->pushCount
= 0;
233 scriptRun
->fixupCount
= 0;
237 U_CAPI
void U_EXPORT2
238 uscript_setRunText(UScriptRun
*scriptRun
, const UChar
*src
, int32_t length
, UErrorCode
*pErrorCode
)
240 if (pErrorCode
== NULL
|| U_FAILURE(*pErrorCode
)) {
244 if (scriptRun
== NULL
|| length
< 0 || ((src
== NULL
) != (length
== 0))) {
245 *pErrorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
249 scriptRun
->textArray
= src
;
250 scriptRun
->textLength
= length
;
252 uscript_resetRun(scriptRun
);
255 U_CAPI UBool U_EXPORT2
256 uscript_nextRun(UScriptRun
*scriptRun
, int32_t *pRunStart
, int32_t *pRunLimit
, UScriptCode
*pRunScript
)
258 UErrorCode error
= U_ZERO_ERROR
;
260 /* if we've fallen off the end of the text, we're done */
261 if (scriptRun
== NULL
|| scriptRun
->scriptLimit
>= scriptRun
->textLength
) {
265 SYNC_FIXUP(scriptRun
);
266 scriptRun
->scriptCode
= USCRIPT_COMMON
;
268 for (scriptRun
->scriptStart
= scriptRun
->scriptLimit
; scriptRun
->scriptLimit
< scriptRun
->textLength
; scriptRun
->scriptLimit
+= 1) {
269 UChar high
= scriptRun
->textArray
[scriptRun
->scriptLimit
];
275 * if the character is a high surrogate and it's not the last one
276 * in the text, see if it's followed by a low surrogate
278 if (high
>= 0xD800 && high
<= 0xDBFF && scriptRun
->scriptLimit
< scriptRun
->textLength
- 1) {
279 UChar low
= scriptRun
->textArray
[scriptRun
->scriptLimit
+ 1];
282 * if it is followed by a low surrogate,
283 * consume it and form the full character
285 if (low
>= 0xDC00 && low
<= 0xDFFF) {
286 ch
= (high
- 0xD800) * 0x0400 + low
- 0xDC00 + 0x10000;
287 scriptRun
->scriptLimit
+= 1;
291 sc
= uscript_getScript(ch
, &error
);
292 pairIndex
= getPairIndex(ch
);
295 * Paired character handling:
297 * if it's an open character, push it onto the stack.
298 * if it's a close character, find the matching open on the
299 * stack, and use that script code. Any non-matching open
300 * characters above it on the stack will be poped.
302 if (pairIndex
>= 0) {
303 if ((pairIndex
& 1) == 0) {
304 push(scriptRun
, pairIndex
, scriptRun
->scriptCode
);
306 int32_t pi
= pairIndex
& ~1;
308 while (STACK_IS_NOT_EMPTY(scriptRun
) && TOP(scriptRun
).pairIndex
!= pi
) {
312 if (STACK_IS_NOT_EMPTY(scriptRun
)) {
313 sc
= TOP(scriptRun
).scriptCode
;
318 if (sameScript(scriptRun
->scriptCode
, sc
)) {
319 if (scriptRun
->scriptCode
<= USCRIPT_INHERITED
&& sc
> USCRIPT_INHERITED
) {
320 scriptRun
->scriptCode
= sc
;
322 fixup(scriptRun
, scriptRun
->scriptCode
);
326 * if this character is a close paired character,
327 * pop the matching open character from the stack
329 if (pairIndex
>= 0 && (pairIndex
& 1) != 0) {
334 * if the run broke on a surrogate pair,
335 * end it before the high surrogate
338 scriptRun
->scriptLimit
-= 1;
346 if (pRunStart
!= NULL
) {
347 *pRunStart
= scriptRun
->scriptStart
;
350 if (pRunLimit
!= NULL
) {
351 *pRunLimit
= scriptRun
->scriptLimit
;
354 if (pRunScript
!= NULL
) {
355 *pRunScript
= scriptRun
->scriptCode
;