]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/usc_impl.c
ICU-57131.0.1.tar.gz
[apple/icu.git] / icuSources / common / usc_impl.c
CommitLineData
b75a7d8f
A
1/*
2**********************************************************************
2ca993e8 3* Copyright (C) 1999-2016, International Business Machines
b75a7d8f
A
4* Corporation and others. All Rights Reserved.
5**********************************************************************
6*
7* File USC_IMPL.C
8*
9* Modification History:
10*
11* Date Name Description
12* 07/08/2002 Eric Mader Creation.
13******************************************************************************
14*/
15
16#include "unicode/uscript.h"
17#include "usc_impl.h"
18#include "cmemory.h"
19
374ca955
A
20#define PAREN_STACK_DEPTH 32
21
22#define MOD(sp) ((sp) % PAREN_STACK_DEPTH)
23#define LIMIT_INC(sp) (((sp) < PAREN_STACK_DEPTH)? (sp) + 1 : PAREN_STACK_DEPTH)
24#define INC(sp,count) (MOD((sp) + (count)))
25#define INC1(sp) (INC(sp, 1))
26#define DEC(sp,count) (MOD((sp) + PAREN_STACK_DEPTH - (count)))
27#define DEC1(sp) (DEC(sp, 1))
28#define STACK_IS_EMPTY(scriptRun) ((scriptRun)->pushCount <= 0)
29#define STACK_IS_NOT_EMPTY(scriptRun) (! STACK_IS_EMPTY(scriptRun))
30#define TOP(scriptRun) ((scriptRun)->parenStack[(scriptRun)->parenSP])
31#define SYNC_FIXUP(scriptRun) ((scriptRun)->fixupCount = 0)
b75a7d8f
A
32
33struct ParenStackEntry
34{
35 int32_t pairIndex;
36 UScriptCode scriptCode;
37};
38
39struct UScriptRun
40{
41 int32_t textLength;
42 const UChar *textArray;
43
44 int32_t scriptStart;
45 int32_t scriptLimit;
46 UScriptCode scriptCode;
47
48 struct ParenStackEntry parenStack[PAREN_STACK_DEPTH];
49 int32_t parenSP;
374ca955
A
50 int32_t pushCount;
51 int32_t fixupCount;
b75a7d8f
A
52};
53
54static int8_t highBit(int32_t value);
55
56static const UChar32 pairedChars[] = {
57 0x0028, 0x0029, /* ascii paired punctuation */
58 0x003c, 0x003e,
59 0x005b, 0x005d,
60 0x007b, 0x007d,
61 0x00ab, 0x00bb, /* guillemets */
62 0x2018, 0x2019, /* general punctuation */
63 0x201c, 0x201d,
64 0x2039, 0x203a,
65 0x3008, 0x3009, /* chinese paired punctuation */
66 0x300a, 0x300b,
67 0x300c, 0x300d,
68 0x300e, 0x300f,
69 0x3010, 0x3011,
70 0x3014, 0x3015,
71 0x3016, 0x3017,
72 0x3018, 0x3019,
73 0x301a, 0x301b
74};
75
374ca955
A
76static void push(UScriptRun *scriptRun, int32_t pairIndex, UScriptCode scriptCode)
77{
78 scriptRun->pushCount = LIMIT_INC(scriptRun->pushCount);
79 scriptRun->fixupCount = LIMIT_INC(scriptRun->fixupCount);
80
81 scriptRun->parenSP = INC1(scriptRun->parenSP);
82 scriptRun->parenStack[scriptRun->parenSP].pairIndex = pairIndex;
83 scriptRun->parenStack[scriptRun->parenSP].scriptCode = scriptCode;
84}
85
86static void pop(UScriptRun *scriptRun)
87{
88 if (STACK_IS_EMPTY(scriptRun)) {
89 return;
90 }
91
92 if (scriptRun->fixupCount > 0) {
93 scriptRun->fixupCount -= 1;
94 }
95
96 scriptRun->pushCount -= 1;
97 scriptRun->parenSP = DEC1(scriptRun->parenSP);
98
99 /* If the stack is now empty, reset the stack
100 pointers to their initial values.
101 */
102 if (STACK_IS_EMPTY(scriptRun)) {
103 scriptRun->parenSP = -1;
104 }
105}
106
107static void fixup(UScriptRun *scriptRun, UScriptCode scriptCode)
108{
109 int32_t fixupSP = DEC(scriptRun->parenSP, scriptRun->fixupCount);
110
111 while (scriptRun->fixupCount-- > 0) {
112 fixupSP = INC1(fixupSP);
113 scriptRun->parenStack[fixupSP].scriptCode = scriptCode;
114 }
115}
116
b75a7d8f
A
117static int8_t
118highBit(int32_t value)
119{
120 int8_t bit = 0;
121
122 if (value <= 0) {
123 return -32;
124 }
125
126 if (value >= 1 << 16) {
127 value >>= 16;
128 bit += 16;
129 }
130
131 if (value >= 1 << 8) {
132 value >>= 8;
133 bit += 8;
134 }
135
136 if (value >= 1 << 4) {
137 value >>= 4;
138 bit += 4;
139 }
140
141 if (value >= 1 << 2) {
142 value >>= 2;
143 bit += 2;
144 }
145
146 if (value >= 1 << 1) {
b331163b 147 //value >>= 1;
b75a7d8f
A
148 bit += 1;
149 }
150
151 return bit;
152}
153
154static int32_t
155getPairIndex(UChar32 ch)
156{
2ca993e8 157 int32_t pairedCharCount = UPRV_LENGTHOF(pairedChars);
b75a7d8f
A
158 int32_t pairedCharPower = 1 << highBit(pairedCharCount);
159 int32_t pairedCharExtra = pairedCharCount - pairedCharPower;
160
161 int32_t probe = pairedCharPower;
729e4ab9 162 int32_t pairIndex = 0;
b75a7d8f
A
163
164 if (ch >= pairedChars[pairedCharExtra]) {
729e4ab9 165 pairIndex = pairedCharExtra;
b75a7d8f
A
166 }
167
168 while (probe > (1 << 0)) {
169 probe >>= 1;
170
729e4ab9
A
171 if (ch >= pairedChars[pairIndex + probe]) {
172 pairIndex += probe;
b75a7d8f
A
173 }
174 }
175
729e4ab9
A
176 if (pairedChars[pairIndex] != ch) {
177 pairIndex = -1;
b75a7d8f
A
178 }
179
729e4ab9 180 return pairIndex;
b75a7d8f
A
181}
182
183static UBool
184sameScript(UScriptCode scriptOne, UScriptCode scriptTwo)
185{
186 return scriptOne <= USCRIPT_INHERITED || scriptTwo <= USCRIPT_INHERITED || scriptOne == scriptTwo;
187}
188
189U_CAPI UScriptRun * U_EXPORT2
190uscript_openRun(const UChar *src, int32_t length, UErrorCode *pErrorCode)
191{
192 UScriptRun *result = NULL;
193
194 if (pErrorCode == NULL || U_FAILURE(*pErrorCode)) {
195 return NULL;
196 }
197
198 result = uprv_malloc(sizeof (UScriptRun));
199
200 if (result == NULL) {
201 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
202 return NULL;
203 }
204
205 uscript_setRunText(result, src, length, pErrorCode);
206
207 /* Release the UScriptRun if uscript_setRunText() returns an error */
208 if (U_FAILURE(*pErrorCode)) {
209 uprv_free(result);
210 result = NULL;
211 }
212
213 return result;
214}
215
216U_CAPI void U_EXPORT2
217uscript_closeRun(UScriptRun *scriptRun)
218{
219 if (scriptRun != NULL) {
220 uprv_free(scriptRun);
221 }
222}
223
224U_CAPI void U_EXPORT2
225uscript_resetRun(UScriptRun *scriptRun)
226{
227 if (scriptRun != NULL) {
228 scriptRun->scriptStart = 0;
229 scriptRun->scriptLimit = 0;
230 scriptRun->scriptCode = USCRIPT_INVALID_CODE;
231 scriptRun->parenSP = -1;
374ca955
A
232 scriptRun->pushCount = 0;
233 scriptRun->fixupCount = 0;
b75a7d8f
A
234 }
235}
236
237U_CAPI void U_EXPORT2
238uscript_setRunText(UScriptRun *scriptRun, const UChar *src, int32_t length, UErrorCode *pErrorCode)
239{
240 if (pErrorCode == NULL || U_FAILURE(*pErrorCode)) {
241 return;
242 }
243
244 if (scriptRun == NULL || length < 0 || ((src == NULL) != (length == 0))) {
245 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
246 return;
247 }
248
249 scriptRun->textArray = src;
250 scriptRun->textLength = length;
251
252 uscript_resetRun(scriptRun);
253}
254
255U_CAPI UBool U_EXPORT2
256uscript_nextRun(UScriptRun *scriptRun, int32_t *pRunStart, int32_t *pRunLimit, UScriptCode *pRunScript)
257{
b75a7d8f
A
258 UErrorCode error = U_ZERO_ERROR;
259
260 /* if we've fallen off the end of the text, we're done */
261 if (scriptRun == NULL || scriptRun->scriptLimit >= scriptRun->textLength) {
262 return FALSE;
263 }
264
374ca955 265 SYNC_FIXUP(scriptRun);
b75a7d8f
A
266 scriptRun->scriptCode = USCRIPT_COMMON;
267
268 for (scriptRun->scriptStart = scriptRun->scriptLimit; scriptRun->scriptLimit < scriptRun->textLength; scriptRun->scriptLimit += 1) {
269 UChar high = scriptRun->textArray[scriptRun->scriptLimit];
270 UChar32 ch = high;
271 UScriptCode sc;
272 int32_t pairIndex;
273
274 /*
275 * if the character is a high surrogate and it's not the last one
276 * in the text, see if it's followed by a low surrogate
277 */
278 if (high >= 0xD800 && high <= 0xDBFF && scriptRun->scriptLimit < scriptRun->textLength - 1) {
279 UChar low = scriptRun->textArray[scriptRun->scriptLimit + 1];
280
281 /*
282 * if it is followed by a low surrogate,
283 * consume it and form the full character
284 */
285 if (low >= 0xDC00 && low <= 0xDFFF) {
286 ch = (high - 0xD800) * 0x0400 + low - 0xDC00 + 0x10000;
287 scriptRun->scriptLimit += 1;
288 }
289 }
290
291 sc = uscript_getScript(ch, &error);
292 pairIndex = getPairIndex(ch);
293
294 /*
295 * Paired character handling:
296 *
297 * if it's an open character, push it onto the stack.
298 * if it's a close character, find the matching open on the
299 * stack, and use that script code. Any non-matching open
300 * characters above it on the stack will be poped.
301 */
302 if (pairIndex >= 0) {
303 if ((pairIndex & 1) == 0) {
374ca955
A
304 push(scriptRun, pairIndex, scriptRun->scriptCode);
305 } else {
b75a7d8f
A
306 int32_t pi = pairIndex & ~1;
307
374ca955
A
308 while (STACK_IS_NOT_EMPTY(scriptRun) && TOP(scriptRun).pairIndex != pi) {
309 pop(scriptRun);
b75a7d8f
A
310 }
311
374ca955
A
312 if (STACK_IS_NOT_EMPTY(scriptRun)) {
313 sc = TOP(scriptRun).scriptCode;
b75a7d8f
A
314 }
315 }
316 }
317
318 if (sameScript(scriptRun->scriptCode, sc)) {
319 if (scriptRun->scriptCode <= USCRIPT_INHERITED && sc > USCRIPT_INHERITED) {
320 scriptRun->scriptCode = sc;
321
374ca955 322 fixup(scriptRun, scriptRun->scriptCode);
b75a7d8f
A
323 }
324
325 /*
326 * if this character is a close paired character,
374ca955 327 * pop the matching open character from the stack
b75a7d8f 328 */
374ca955
A
329 if (pairIndex >= 0 && (pairIndex & 1) != 0) {
330 pop(scriptRun);
b75a7d8f
A
331 }
332 } else {
333 /*
334 * if the run broke on a surrogate pair,
335 * end it before the high surrogate
336 */
337 if (ch >= 0x10000) {
338 scriptRun->scriptLimit -= 1;
339 }
340
341 break;
342 }
343 }
344
345
346 if (pRunStart != NULL) {
347 *pRunStart = scriptRun->scriptStart;
348 }
349
350 if (pRunLimit != NULL) {
351 *pRunLimit = scriptRun->scriptLimit;
352 }
353
354 if (pRunScript != NULL) {
355 *pRunScript = scriptRun->scriptCode;
356 }
357
358 return TRUE;
359}