]>
Commit | Line | Data |
---|---|---|
b37bf2e1 | 1 | /* |
b37bf2e1 A |
2 | * Copyright (C) 2006 George Staikos <staikos@kde.org> |
3 | * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> | |
9dae56ea | 4 | * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. |
b37bf2e1 A |
5 | * |
6 | * This library is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Library General Public | |
8 | * License as published by the Free Software Foundation; either | |
9 | * version 2 of the License, or (at your option) any later version. | |
10 | * | |
11 | * This library is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | * Library General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU Library General Public License | |
17 | * along with this library; see the file COPYING.LIB. If not, write to | |
18 | * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | |
19 | * Boston, MA 02110-1301, USA. | |
20 | * | |
21 | */ | |
22 | ||
9dae56ea A |
23 | #ifndef WTF_UNICODE_QT4_H |
24 | #define WTF_UNICODE_QT4_H | |
b37bf2e1 A |
25 | |
26 | #include <QChar> | |
27 | #include <QString> | |
28 | ||
29 | #include <config.h> | |
30 | ||
31 | #include <stdint.h> | |
32 | ||
9dae56ea | 33 | QT_BEGIN_NAMESPACE |
b37bf2e1 A |
34 | namespace QUnicodeTables { |
35 | struct Properties { | |
36 | ushort category : 8; | |
37 | ushort line_break_class : 8; | |
38 | ushort direction : 8; | |
39 | ushort combiningClass :8; | |
40 | ushort joining : 2; | |
41 | signed short digitValue : 6; /* 5 needed */ | |
42 | ushort unicodeVersion : 4; | |
43 | ushort lowerCaseSpecial : 1; | |
44 | ushort upperCaseSpecial : 1; | |
45 | ushort titleCaseSpecial : 1; | |
46 | ushort caseFoldSpecial : 1; /* currently unused */ | |
47 | signed short mirrorDiff : 16; | |
48 | signed short lowerCaseDiff : 16; | |
49 | signed short upperCaseDiff : 16; | |
50 | signed short titleCaseDiff : 16; | |
51 | signed short caseFoldDiff : 16; | |
52 | }; | |
53 | Q_CORE_EXPORT const Properties * QT_FASTCALL properties(uint ucs4); | |
54 | Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2); | |
55 | } | |
9dae56ea | 56 | QT_END_NAMESPACE |
b37bf2e1 A |
57 | |
58 | // ugly hack to make UChar compatible with JSChar in API/JSStringRef.h | |
f9bf01c6 | 59 | #if defined(Q_OS_WIN) || COMPILER(WINSCW) |
b37bf2e1 A |
60 | typedef wchar_t UChar; |
61 | #else | |
62 | typedef uint16_t UChar; | |
63 | #endif | |
64 | typedef uint32_t UChar32; | |
65 | ||
66 | // some defines from ICU | |
67 | ||
68 | #define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800) | |
69 | #define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00) | |
70 | #define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) | |
71 | #define U16_GET_SUPPLEMENTARY(lead, trail) \ | |
72 | (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET) | |
73 | ||
74 | #define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) | |
75 | #define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) | |
76 | ||
77 | #define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800) | |
78 | #define U16_IS_SINGLE(c) !U_IS_SURROGATE(c) | |
79 | #define U16_IS_SURROGATE(c) U_IS_SURROGATE(c) | |
80 | #define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0) | |
81 | ||
82 | #define U16_NEXT(s, i, length, c) { \ | |
83 | (c)=(s)[(i)++]; \ | |
84 | if(U16_IS_LEAD(c)) { \ | |
85 | uint16_t __c2; \ | |
86 | if((i)<(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \ | |
87 | ++(i); \ | |
88 | (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ | |
89 | } \ | |
90 | } \ | |
91 | } | |
92 | ||
ba379fdc A |
93 | #define U16_PREV(s, start, i, c) { \ |
94 | (c)=(s)[--(i)]; \ | |
95 | if(U16_IS_TRAIL(c)) { \ | |
96 | uint16_t __c2; \ | |
97 | if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ | |
98 | --(i); \ | |
99 | (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ | |
100 | } \ | |
101 | } \ | |
102 | } | |
103 | ||
b37bf2e1 A |
104 | #define U_MASK(x) ((uint32_t)1<<(x)) |
105 | ||
106 | namespace WTF { | |
9dae56ea A |
107 | namespace Unicode { |
108 | ||
109 | enum Direction { | |
110 | LeftToRight = QChar::DirL, | |
111 | RightToLeft = QChar::DirR, | |
112 | EuropeanNumber = QChar::DirEN, | |
113 | EuropeanNumberSeparator = QChar::DirES, | |
114 | EuropeanNumberTerminator = QChar::DirET, | |
115 | ArabicNumber = QChar::DirAN, | |
116 | CommonNumberSeparator = QChar::DirCS, | |
117 | BlockSeparator = QChar::DirB, | |
118 | SegmentSeparator = QChar::DirS, | |
119 | WhiteSpaceNeutral = QChar::DirWS, | |
120 | OtherNeutral = QChar::DirON, | |
121 | LeftToRightEmbedding = QChar::DirLRE, | |
122 | LeftToRightOverride = QChar::DirLRO, | |
123 | RightToLeftArabic = QChar::DirAL, | |
124 | RightToLeftEmbedding = QChar::DirRLE, | |
125 | RightToLeftOverride = QChar::DirRLO, | |
126 | PopDirectionalFormat = QChar::DirPDF, | |
127 | NonSpacingMark = QChar::DirNSM, | |
128 | BoundaryNeutral = QChar::DirBN | |
129 | }; | |
130 | ||
131 | enum DecompositionType { | |
132 | DecompositionNone = QChar::NoDecomposition, | |
133 | DecompositionCanonical = QChar::Canonical, | |
134 | DecompositionCompat = QChar::Compat, | |
135 | DecompositionCircle = QChar::Circle, | |
136 | DecompositionFinal = QChar::Final, | |
137 | DecompositionFont = QChar::Font, | |
138 | DecompositionFraction = QChar::Fraction, | |
139 | DecompositionInitial = QChar::Initial, | |
140 | DecompositionIsolated = QChar::Isolated, | |
141 | DecompositionMedial = QChar::Medial, | |
142 | DecompositionNarrow = QChar::Narrow, | |
143 | DecompositionNoBreak = QChar::NoBreak, | |
144 | DecompositionSmall = QChar::Small, | |
145 | DecompositionSquare = QChar::Square, | |
146 | DecompositionSub = QChar::Sub, | |
147 | DecompositionSuper = QChar::Super, | |
148 | DecompositionVertical = QChar::Vertical, | |
149 | DecompositionWide = QChar::Wide | |
150 | }; | |
151 | ||
152 | enum CharCategory { | |
153 | NoCategory = 0, | |
154 | Mark_NonSpacing = U_MASK(QChar::Mark_NonSpacing), | |
155 | Mark_SpacingCombining = U_MASK(QChar::Mark_SpacingCombining), | |
156 | Mark_Enclosing = U_MASK(QChar::Mark_Enclosing), | |
157 | Number_DecimalDigit = U_MASK(QChar::Number_DecimalDigit), | |
158 | Number_Letter = U_MASK(QChar::Number_Letter), | |
159 | Number_Other = U_MASK(QChar::Number_Other), | |
160 | Separator_Space = U_MASK(QChar::Separator_Space), | |
161 | Separator_Line = U_MASK(QChar::Separator_Line), | |
162 | Separator_Paragraph = U_MASK(QChar::Separator_Paragraph), | |
163 | Other_Control = U_MASK(QChar::Other_Control), | |
164 | Other_Format = U_MASK(QChar::Other_Format), | |
165 | Other_Surrogate = U_MASK(QChar::Other_Surrogate), | |
166 | Other_PrivateUse = U_MASK(QChar::Other_PrivateUse), | |
167 | Other_NotAssigned = U_MASK(QChar::Other_NotAssigned), | |
168 | Letter_Uppercase = U_MASK(QChar::Letter_Uppercase), | |
169 | Letter_Lowercase = U_MASK(QChar::Letter_Lowercase), | |
170 | Letter_Titlecase = U_MASK(QChar::Letter_Titlecase), | |
171 | Letter_Modifier = U_MASK(QChar::Letter_Modifier), | |
172 | Letter_Other = U_MASK(QChar::Letter_Other), | |
173 | Punctuation_Connector = U_MASK(QChar::Punctuation_Connector), | |
174 | Punctuation_Dash = U_MASK(QChar::Punctuation_Dash), | |
175 | Punctuation_Open = U_MASK(QChar::Punctuation_Open), | |
176 | Punctuation_Close = U_MASK(QChar::Punctuation_Close), | |
177 | Punctuation_InitialQuote = U_MASK(QChar::Punctuation_InitialQuote), | |
178 | Punctuation_FinalQuote = U_MASK(QChar::Punctuation_FinalQuote), | |
179 | Punctuation_Other = U_MASK(QChar::Punctuation_Other), | |
180 | Symbol_Math = U_MASK(QChar::Symbol_Math), | |
181 | Symbol_Currency = U_MASK(QChar::Symbol_Currency), | |
182 | Symbol_Modifier = U_MASK(QChar::Symbol_Modifier), | |
183 | Symbol_Other = U_MASK(QChar::Symbol_Other) | |
184 | }; | |
b37bf2e1 A |
185 | |
186 | ||
9dae56ea | 187 | // FIXME: handle surrogates correctly in all methods |
b37bf2e1 | 188 | |
9dae56ea A |
189 | inline UChar32 toLower(UChar32 ch) |
190 | { | |
191 | return QChar::toLower(ch); | |
192 | } | |
193 | ||
194 | inline int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) | |
195 | { | |
196 | const UChar *e = src + srcLength; | |
197 | const UChar *s = src; | |
198 | UChar *r = result; | |
199 | uint rindex = 0; | |
200 | ||
201 | // this avoids one out of bounds check in the loop | |
202 | if (s < e && QChar(*s).isLowSurrogate()) { | |
203 | if (r) | |
204 | r[rindex] = *s++; | |
205 | ++rindex; | |
206 | } | |
207 | ||
208 | int needed = 0; | |
209 | while (s < e && (rindex < uint(resultLength) || !r)) { | |
210 | uint c = *s; | |
211 | if (QChar(c).isLowSurrogate() && QChar(*(s - 1)).isHighSurrogate()) | |
212 | c = QChar::surrogateToUcs4(*(s - 1), c); | |
213 | const QUnicodeTables::Properties *prop = QUnicodeTables::properties(c); | |
214 | if (prop->lowerCaseSpecial) { | |
215 | QString qstring; | |
216 | if (c < 0x10000) { | |
217 | qstring += QChar(c); | |
b37bf2e1 | 218 | } else { |
9dae56ea A |
219 | qstring += QChar(*(s-1)); |
220 | qstring += QChar(*s); | |
b37bf2e1 | 221 | } |
9dae56ea A |
222 | qstring = qstring.toLower(); |
223 | for (int i = 0; i < qstring.length(); ++i) { | |
224 | if (rindex >= uint(resultLength)) { | |
225 | needed += qstring.length() - i; | |
226 | break; | |
227 | } | |
228 | if (r) | |
229 | r[rindex] = qstring.at(i).unicode(); | |
230 | ++rindex; | |
231 | } | |
232 | } else { | |
233 | if (r) | |
234 | r[rindex] = *s + prop->lowerCaseDiff; | |
235 | ++rindex; | |
b37bf2e1 | 236 | } |
9dae56ea A |
237 | ++s; |
238 | } | |
239 | if (s < e) | |
240 | needed += e - s; | |
241 | *error = (needed != 0); | |
242 | if (rindex < uint(resultLength)) | |
243 | r[rindex] = 0; | |
244 | return rindex + needed; | |
245 | } | |
b37bf2e1 | 246 | |
9dae56ea A |
247 | inline UChar32 toUpper(UChar32 ch) |
248 | { | |
249 | return QChar::toUpper(ch); | |
250 | } | |
b37bf2e1 | 251 | |
9dae56ea A |
252 | inline int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) |
253 | { | |
254 | const UChar *e = src + srcLength; | |
255 | const UChar *s = src; | |
256 | UChar *r = result; | |
257 | int rindex = 0; | |
258 | ||
259 | // this avoids one out of bounds check in the loop | |
260 | if (s < e && QChar(*s).isLowSurrogate()) { | |
261 | if (r) | |
262 | r[rindex] = *s++; | |
263 | ++rindex; | |
264 | } | |
265 | ||
266 | int needed = 0; | |
267 | while (s < e && (rindex < resultLength || !r)) { | |
268 | uint c = *s; | |
269 | if (QChar(c).isLowSurrogate() && QChar(*(s - 1)).isHighSurrogate()) | |
270 | c = QChar::surrogateToUcs4(*(s - 1), c); | |
271 | const QUnicodeTables::Properties *prop = QUnicodeTables::properties(c); | |
272 | if (prop->upperCaseSpecial) { | |
273 | QString qstring; | |
274 | if (c < 0x10000) { | |
275 | qstring += QChar(c); | |
b37bf2e1 | 276 | } else { |
9dae56ea A |
277 | qstring += QChar(*(s-1)); |
278 | qstring += QChar(*s); | |
279 | } | |
280 | qstring = qstring.toUpper(); | |
281 | for (int i = 0; i < qstring.length(); ++i) { | |
282 | if (rindex >= resultLength) { | |
283 | needed += qstring.length() - i; | |
284 | break; | |
285 | } | |
286 | if (r) | |
287 | r[rindex] = qstring.at(i).unicode(); | |
288 | ++rindex; | |
b37bf2e1 | 289 | } |
9dae56ea A |
290 | } else { |
291 | if (r) | |
292 | r[rindex] = *s + prop->upperCaseDiff; | |
293 | ++rindex; | |
b37bf2e1 | 294 | } |
9dae56ea A |
295 | ++s; |
296 | } | |
297 | if (s < e) | |
298 | needed += e - s; | |
299 | *error = (needed != 0); | |
300 | if (rindex < resultLength) | |
301 | r[rindex] = 0; | |
302 | return rindex + needed; | |
303 | } | |
b37bf2e1 | 304 | |
9dae56ea A |
305 | inline int toTitleCase(UChar32 c) |
306 | { | |
307 | return QChar::toTitleCase(c); | |
308 | } | |
b37bf2e1 | 309 | |
9dae56ea A |
310 | inline UChar32 foldCase(UChar32 c) |
311 | { | |
312 | return QChar::toCaseFolded(c); | |
313 | } | |
b37bf2e1 | 314 | |
9dae56ea A |
315 | inline int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) |
316 | { | |
317 | // FIXME: handle special casing. Easiest with some low level API in Qt | |
318 | *error = false; | |
319 | if (resultLength < srcLength) { | |
b37bf2e1 A |
320 | *error = true; |
321 | return srcLength; | |
b37bf2e1 | 322 | } |
9dae56ea A |
323 | for (int i = 0; i < srcLength; ++i) |
324 | result[i] = QChar::toCaseFolded(ushort(src[i])); | |
325 | return srcLength; | |
326 | } | |
b37bf2e1 | 327 | |
9dae56ea A |
328 | inline bool isArabicChar(UChar32 c) |
329 | { | |
330 | return c >= 0x0600 && c <= 0x06FF; | |
331 | } | |
b37bf2e1 | 332 | |
9dae56ea A |
333 | inline bool isPrintableChar(UChar32 c) |
334 | { | |
335 | const uint test = U_MASK(QChar::Other_Control) | | |
336 | U_MASK(QChar::Other_NotAssigned); | |
337 | return !(U_MASK(QChar::category(c)) & test); | |
338 | } | |
b37bf2e1 | 339 | |
9dae56ea A |
340 | inline bool isSeparatorSpace(UChar32 c) |
341 | { | |
342 | return QChar::category(c) == QChar::Separator_Space; | |
343 | } | |
b37bf2e1 | 344 | |
9dae56ea A |
345 | inline bool isPunct(UChar32 c) |
346 | { | |
347 | const uint test = U_MASK(QChar::Punctuation_Connector) | | |
348 | U_MASK(QChar::Punctuation_Dash) | | |
349 | U_MASK(QChar::Punctuation_Open) | | |
350 | U_MASK(QChar::Punctuation_Close) | | |
351 | U_MASK(QChar::Punctuation_InitialQuote) | | |
352 | U_MASK(QChar::Punctuation_FinalQuote) | | |
353 | U_MASK(QChar::Punctuation_Other); | |
354 | return U_MASK(QChar::category(c)) & test; | |
355 | } | |
b37bf2e1 | 356 | |
9dae56ea A |
357 | inline bool isLower(UChar32 c) |
358 | { | |
359 | return QChar::category(c) == QChar::Letter_Lowercase; | |
360 | } | |
b37bf2e1 | 361 | |
9dae56ea A |
362 | inline bool hasLineBreakingPropertyComplexContext(UChar32) |
363 | { | |
364 | // FIXME: Implement this to return whether the character has line breaking property SA (Complex Context). | |
365 | return false; | |
366 | } | |
b37bf2e1 | 367 | |
9dae56ea A |
368 | inline UChar32 mirroredChar(UChar32 c) |
369 | { | |
370 | return QChar::mirroredChar(c); | |
371 | } | |
b37bf2e1 | 372 | |
9dae56ea A |
373 | inline uint8_t combiningClass(UChar32 c) |
374 | { | |
375 | return QChar::combiningClass(c); | |
376 | } | |
b37bf2e1 | 377 | |
9dae56ea A |
378 | inline DecompositionType decompositionType(UChar32 c) |
379 | { | |
380 | return (DecompositionType)QChar::decompositionTag(c); | |
381 | } | |
b37bf2e1 | 382 | |
9dae56ea A |
383 | inline int umemcasecmp(const UChar* a, const UChar* b, int len) |
384 | { | |
385 | // handle surrogates correctly | |
386 | for (int i = 0; i < len; ++i) { | |
387 | uint c1 = QChar::toCaseFolded(ushort(a[i])); | |
388 | uint c2 = QChar::toCaseFolded(ushort(b[i])); | |
389 | if (c1 != c2) | |
390 | return c1 - c2; | |
b37bf2e1 | 391 | } |
9dae56ea A |
392 | return 0; |
393 | } | |
b37bf2e1 | 394 | |
9dae56ea A |
395 | inline Direction direction(UChar32 c) |
396 | { | |
397 | return (Direction)QChar::direction(c); | |
398 | } | |
b37bf2e1 | 399 | |
9dae56ea A |
400 | inline CharCategory category(UChar32 c) |
401 | { | |
402 | return (CharCategory) U_MASK(QChar::category(c)); | |
403 | } | |
b37bf2e1 | 404 | |
9dae56ea | 405 | } } |
b37bf2e1 | 406 | |
9dae56ea | 407 | #endif // WTF_UNICODE_QT4_H |