]> git.saurik.com Git - apple/javascriptcore.git/blob - wtf/unicode/qt4/UnicodeQt4.h
f65e2920fd2f57871c631ad84d97fb2290777234
[apple/javascriptcore.git] / wtf / unicode / qt4 / UnicodeQt4.h
1 /*
2 * Copyright (C) 2006 George Staikos <staikos@kde.org>
3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
4 * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 *
21 */
22
23 #ifndef WTF_UNICODE_QT4_H
24 #define WTF_UNICODE_QT4_H
25
26 #include <QChar>
27 #include <QString>
28
29 #include <config.h>
30
31 #include <stdint.h>
32
33 #if QT_VERSION >= 0x040300
34 QT_BEGIN_NAMESPACE
35 namespace QUnicodeTables {
36 struct Properties {
37 ushort category : 8;
38 ushort line_break_class : 8;
39 ushort direction : 8;
40 ushort combiningClass :8;
41 ushort joining : 2;
42 signed short digitValue : 6; /* 5 needed */
43 ushort unicodeVersion : 4;
44 ushort lowerCaseSpecial : 1;
45 ushort upperCaseSpecial : 1;
46 ushort titleCaseSpecial : 1;
47 ushort caseFoldSpecial : 1; /* currently unused */
48 signed short mirrorDiff : 16;
49 signed short lowerCaseDiff : 16;
50 signed short upperCaseDiff : 16;
51 signed short titleCaseDiff : 16;
52 signed short caseFoldDiff : 16;
53 };
54 Q_CORE_EXPORT const Properties * QT_FASTCALL properties(uint ucs4);
55 Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2);
56 }
57 QT_END_NAMESPACE
58 #endif
59
60 // ugly hack to make UChar compatible with JSChar in API/JSStringRef.h
61 #if defined(Q_OS_WIN)
62 typedef wchar_t UChar;
63 #else
64 typedef uint16_t UChar;
65 #endif
66 typedef uint32_t UChar32;
67
68 // some defines from ICU
69
70 #define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
71 #define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
72 #define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
73 #define U16_GET_SUPPLEMENTARY(lead, trail) \
74 (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
75
76 #define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
77 #define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
78
79 #define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800)
80 #define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
81 #define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
82 #define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
83
84 #define U16_NEXT(s, i, length, c) { \
85 (c)=(s)[(i)++]; \
86 if(U16_IS_LEAD(c)) { \
87 uint16_t __c2; \
88 if((i)<(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
89 ++(i); \
90 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
91 } \
92 } \
93 }
94
95 #define U_MASK(x) ((uint32_t)1<<(x))
96
97 namespace WTF {
98 namespace Unicode {
99
100 enum Direction {
101 LeftToRight = QChar::DirL,
102 RightToLeft = QChar::DirR,
103 EuropeanNumber = QChar::DirEN,
104 EuropeanNumberSeparator = QChar::DirES,
105 EuropeanNumberTerminator = QChar::DirET,
106 ArabicNumber = QChar::DirAN,
107 CommonNumberSeparator = QChar::DirCS,
108 BlockSeparator = QChar::DirB,
109 SegmentSeparator = QChar::DirS,
110 WhiteSpaceNeutral = QChar::DirWS,
111 OtherNeutral = QChar::DirON,
112 LeftToRightEmbedding = QChar::DirLRE,
113 LeftToRightOverride = QChar::DirLRO,
114 RightToLeftArabic = QChar::DirAL,
115 RightToLeftEmbedding = QChar::DirRLE,
116 RightToLeftOverride = QChar::DirRLO,
117 PopDirectionalFormat = QChar::DirPDF,
118 NonSpacingMark = QChar::DirNSM,
119 BoundaryNeutral = QChar::DirBN
120 };
121
122 enum DecompositionType {
123 DecompositionNone = QChar::NoDecomposition,
124 DecompositionCanonical = QChar::Canonical,
125 DecompositionCompat = QChar::Compat,
126 DecompositionCircle = QChar::Circle,
127 DecompositionFinal = QChar::Final,
128 DecompositionFont = QChar::Font,
129 DecompositionFraction = QChar::Fraction,
130 DecompositionInitial = QChar::Initial,
131 DecompositionIsolated = QChar::Isolated,
132 DecompositionMedial = QChar::Medial,
133 DecompositionNarrow = QChar::Narrow,
134 DecompositionNoBreak = QChar::NoBreak,
135 DecompositionSmall = QChar::Small,
136 DecompositionSquare = QChar::Square,
137 DecompositionSub = QChar::Sub,
138 DecompositionSuper = QChar::Super,
139 DecompositionVertical = QChar::Vertical,
140 DecompositionWide = QChar::Wide
141 };
142
143 enum CharCategory {
144 NoCategory = 0,
145 Mark_NonSpacing = U_MASK(QChar::Mark_NonSpacing),
146 Mark_SpacingCombining = U_MASK(QChar::Mark_SpacingCombining),
147 Mark_Enclosing = U_MASK(QChar::Mark_Enclosing),
148 Number_DecimalDigit = U_MASK(QChar::Number_DecimalDigit),
149 Number_Letter = U_MASK(QChar::Number_Letter),
150 Number_Other = U_MASK(QChar::Number_Other),
151 Separator_Space = U_MASK(QChar::Separator_Space),
152 Separator_Line = U_MASK(QChar::Separator_Line),
153 Separator_Paragraph = U_MASK(QChar::Separator_Paragraph),
154 Other_Control = U_MASK(QChar::Other_Control),
155 Other_Format = U_MASK(QChar::Other_Format),
156 Other_Surrogate = U_MASK(QChar::Other_Surrogate),
157 Other_PrivateUse = U_MASK(QChar::Other_PrivateUse),
158 Other_NotAssigned = U_MASK(QChar::Other_NotAssigned),
159 Letter_Uppercase = U_MASK(QChar::Letter_Uppercase),
160 Letter_Lowercase = U_MASK(QChar::Letter_Lowercase),
161 Letter_Titlecase = U_MASK(QChar::Letter_Titlecase),
162 Letter_Modifier = U_MASK(QChar::Letter_Modifier),
163 Letter_Other = U_MASK(QChar::Letter_Other),
164 Punctuation_Connector = U_MASK(QChar::Punctuation_Connector),
165 Punctuation_Dash = U_MASK(QChar::Punctuation_Dash),
166 Punctuation_Open = U_MASK(QChar::Punctuation_Open),
167 Punctuation_Close = U_MASK(QChar::Punctuation_Close),
168 Punctuation_InitialQuote = U_MASK(QChar::Punctuation_InitialQuote),
169 Punctuation_FinalQuote = U_MASK(QChar::Punctuation_FinalQuote),
170 Punctuation_Other = U_MASK(QChar::Punctuation_Other),
171 Symbol_Math = U_MASK(QChar::Symbol_Math),
172 Symbol_Currency = U_MASK(QChar::Symbol_Currency),
173 Symbol_Modifier = U_MASK(QChar::Symbol_Modifier),
174 Symbol_Other = U_MASK(QChar::Symbol_Other)
175 };
176
177
178 #if QT_VERSION >= 0x040300
179
180 // FIXME: handle surrogates correctly in all methods
181
182 inline UChar32 toLower(UChar32 ch)
183 {
184 return QChar::toLower(ch);
185 }
186
187 inline int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
188 {
189 const UChar *e = src + srcLength;
190 const UChar *s = src;
191 UChar *r = result;
192 uint rindex = 0;
193
194 // this avoids one out of bounds check in the loop
195 if (s < e && QChar(*s).isLowSurrogate()) {
196 if (r)
197 r[rindex] = *s++;
198 ++rindex;
199 }
200
201 int needed = 0;
202 while (s < e && (rindex < uint(resultLength) || !r)) {
203 uint c = *s;
204 if (QChar(c).isLowSurrogate() && QChar(*(s - 1)).isHighSurrogate())
205 c = QChar::surrogateToUcs4(*(s - 1), c);
206 const QUnicodeTables::Properties *prop = QUnicodeTables::properties(c);
207 if (prop->lowerCaseSpecial) {
208 QString qstring;
209 if (c < 0x10000) {
210 qstring += QChar(c);
211 } else {
212 qstring += QChar(*(s-1));
213 qstring += QChar(*s);
214 }
215 qstring = qstring.toLower();
216 for (int i = 0; i < qstring.length(); ++i) {
217 if (rindex >= uint(resultLength)) {
218 needed += qstring.length() - i;
219 break;
220 }
221 if (r)
222 r[rindex] = qstring.at(i).unicode();
223 ++rindex;
224 }
225 } else {
226 if (r)
227 r[rindex] = *s + prop->lowerCaseDiff;
228 ++rindex;
229 }
230 ++s;
231 }
232 if (s < e)
233 needed += e - s;
234 *error = (needed != 0);
235 if (rindex < uint(resultLength))
236 r[rindex] = 0;
237 return rindex + needed;
238 }
239
240 inline UChar32 toUpper(UChar32 ch)
241 {
242 return QChar::toUpper(ch);
243 }
244
245 inline int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
246 {
247 const UChar *e = src + srcLength;
248 const UChar *s = src;
249 UChar *r = result;
250 int rindex = 0;
251
252 // this avoids one out of bounds check in the loop
253 if (s < e && QChar(*s).isLowSurrogate()) {
254 if (r)
255 r[rindex] = *s++;
256 ++rindex;
257 }
258
259 int needed = 0;
260 while (s < e && (rindex < resultLength || !r)) {
261 uint c = *s;
262 if (QChar(c).isLowSurrogate() && QChar(*(s - 1)).isHighSurrogate())
263 c = QChar::surrogateToUcs4(*(s - 1), c);
264 const QUnicodeTables::Properties *prop = QUnicodeTables::properties(c);
265 if (prop->upperCaseSpecial) {
266 QString qstring;
267 if (c < 0x10000) {
268 qstring += QChar(c);
269 } else {
270 qstring += QChar(*(s-1));
271 qstring += QChar(*s);
272 }
273 qstring = qstring.toUpper();
274 for (int i = 0; i < qstring.length(); ++i) {
275 if (rindex >= resultLength) {
276 needed += qstring.length() - i;
277 break;
278 }
279 if (r)
280 r[rindex] = qstring.at(i).unicode();
281 ++rindex;
282 }
283 } else {
284 if (r)
285 r[rindex] = *s + prop->upperCaseDiff;
286 ++rindex;
287 }
288 ++s;
289 }
290 if (s < e)
291 needed += e - s;
292 *error = (needed != 0);
293 if (rindex < resultLength)
294 r[rindex] = 0;
295 return rindex + needed;
296 }
297
298 inline int toTitleCase(UChar32 c)
299 {
300 return QChar::toTitleCase(c);
301 }
302
303 inline UChar32 foldCase(UChar32 c)
304 {
305 return QChar::toCaseFolded(c);
306 }
307
308 inline int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
309 {
310 // FIXME: handle special casing. Easiest with some low level API in Qt
311 *error = false;
312 if (resultLength < srcLength) {
313 *error = true;
314 return srcLength;
315 }
316 for (int i = 0; i < srcLength; ++i)
317 result[i] = QChar::toCaseFolded(ushort(src[i]));
318 return srcLength;
319 }
320
321 inline bool isArabicChar(UChar32 c)
322 {
323 return c >= 0x0600 && c <= 0x06FF;
324 }
325
326 inline bool isPrintableChar(UChar32 c)
327 {
328 const uint test = U_MASK(QChar::Other_Control) |
329 U_MASK(QChar::Other_NotAssigned);
330 return !(U_MASK(QChar::category(c)) & test);
331 }
332
333 inline bool isSeparatorSpace(UChar32 c)
334 {
335 return QChar::category(c) == QChar::Separator_Space;
336 }
337
338 inline bool isPunct(UChar32 c)
339 {
340 const uint test = U_MASK(QChar::Punctuation_Connector) |
341 U_MASK(QChar::Punctuation_Dash) |
342 U_MASK(QChar::Punctuation_Open) |
343 U_MASK(QChar::Punctuation_Close) |
344 U_MASK(QChar::Punctuation_InitialQuote) |
345 U_MASK(QChar::Punctuation_FinalQuote) |
346 U_MASK(QChar::Punctuation_Other);
347 return U_MASK(QChar::category(c)) & test;
348 }
349
350 inline bool isLower(UChar32 c)
351 {
352 return QChar::category(c) == QChar::Letter_Lowercase;
353 }
354
355 inline bool hasLineBreakingPropertyComplexContext(UChar32)
356 {
357 // FIXME: Implement this to return whether the character has line breaking property SA (Complex Context).
358 return false;
359 }
360
361 inline UChar32 mirroredChar(UChar32 c)
362 {
363 return QChar::mirroredChar(c);
364 }
365
366 inline uint8_t combiningClass(UChar32 c)
367 {
368 return QChar::combiningClass(c);
369 }
370
371 inline DecompositionType decompositionType(UChar32 c)
372 {
373 return (DecompositionType)QChar::decompositionTag(c);
374 }
375
376 inline int umemcasecmp(const UChar* a, const UChar* b, int len)
377 {
378 // handle surrogates correctly
379 for (int i = 0; i < len; ++i) {
380 uint c1 = QChar::toCaseFolded(ushort(a[i]));
381 uint c2 = QChar::toCaseFolded(ushort(b[i]));
382 if (c1 != c2)
383 return c1 - c2;
384 }
385 return 0;
386 }
387
388 inline Direction direction(UChar32 c)
389 {
390 return (Direction)QChar::direction(c);
391 }
392
393 inline CharCategory category(UChar32 c)
394 {
395 return (CharCategory) U_MASK(QChar::category(c));
396 }
397
398 #else
399
400 inline UChar32 toLower(UChar32 ch)
401 {
402 if (ch > 0xffff)
403 return ch;
404 return QChar((unsigned short)ch).toLower().unicode();
405 }
406
407 inline int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
408 {
409 *error = false;
410 if (resultLength < srcLength) {
411 *error = true;
412 return srcLength;
413 }
414 for (int i = 0; i < srcLength; ++i)
415 result[i] = QChar(src[i]).toLower().unicode();
416 return srcLength;
417 }
418
419 inline UChar32 toUpper(UChar32 ch)
420 {
421 if (ch > 0xffff)
422 return ch;
423 return QChar((unsigned short)ch).toUpper().unicode();
424 }
425
426 inline int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
427 {
428 *error = false;
429 if (resultLength < srcLength) {
430 *error = true;
431 return srcLength;
432 }
433 for (int i = 0; i < srcLength; ++i)
434 result[i] = QChar(src[i]).toUpper().unicode();
435 return srcLength;
436 }
437
438 inline int toTitleCase(UChar32 c)
439 {
440 if (c > 0xffff)
441 return c;
442 return QChar((unsigned short)c).toUpper().unicode();
443 }
444
445 inline UChar32 foldCase(UChar32 c)
446 {
447 if (c > 0xffff)
448 return c;
449 return QChar((unsigned short)c).toLower().unicode();
450 }
451
452 inline int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
453 {
454 return toLower(result, resultLength, src, srcLength, error);
455 }
456
457 inline bool isPrintableChar(UChar32 c)
458 {
459 return (c & 0xffff0000) == 0 && QChar((unsigned short)c).isPrint();
460 }
461
462 inline bool isArabicChar(UChar32 c)
463 {
464 return c >= 0x0600 && c <= 0x06FF;
465 }
466
467 inline bool isSeparatorSpace(UChar32 c)
468 {
469 return (c & 0xffff0000) == 0 && QChar((unsigned short)c).category() == QChar::Separator_Space;
470 }
471
472 inline bool isPunct(UChar32 c)
473 {
474 return (c & 0xffff0000) == 0 && QChar((unsigned short)c).isPunct();
475 }
476
477 inline bool isLower(UChar32 c)
478 {
479 return (c & 0xffff0000) == 0 && QChar((unsigned short)c).category() == QChar::Letter_Lowercase;
480 }
481
482 inline UChar32 mirroredChar(UChar32 c)
483 {
484 if (c > 0xffff)
485 return c;
486 return QChar(c).mirroredChar().unicode();
487 }
488
489 inline uint8_t combiningClass(UChar32 c)
490 {
491 if (c > 0xffff)
492 return 0;
493 return QChar((unsigned short)c).combiningClass();
494 }
495
496 inline DecompositionType decompositionType(UChar32 c)
497 {
498 if (c > 0xffff)
499 return DecompositionNone;
500 return (DecompositionType)QChar(c).decompositionTag();
501 }
502
503 inline int umemcasecmp(const UChar* a, const UChar* b, int len)
504 {
505 for (int i = 0; i < len; ++i) {
506 QChar c1 = QChar(a[i]).toLower();
507 QChar c2 = QChar(b[i]).toLower();
508 if (c1 != c2)
509 return c1.unicode() - c2.unicode();
510 }
511 return 0;
512 }
513
514 inline Direction direction(UChar32 c)
515 {
516 if (c > 0xffff)
517 return LeftToRight;
518 return (Direction)QChar(c).direction();
519 }
520
521 inline CharCategory category(UChar32 c)
522 {
523 if (c > 0xffff)
524 return NoCategory;
525 return (CharCategory) U_MASK(QChar(c).category());
526 }
527
528 #endif
529
530 } }
531
532 #endif // WTF_UNICODE_QT4_H