]> git.saurik.com Git - apple/javascriptcore.git/blob - wtf/unicode/qt4/UnicodeQt4.h
JavaScriptCore-466.1.tar.gz
[apple/javascriptcore.git] / wtf / unicode / qt4 / UnicodeQt4.h
1 /*
2 * This file is part of the KDE libraries
3 * Copyright (C) 2006 George Staikos <staikos@kde.org>
4 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 *
21 */
22
23 #ifndef KJS_UNICODE_QT4_H
24 #define KJS_UNICODE_QT4_H
25
26 #include <QChar>
27 #include <QString>
28
29 #include <config.h>
30
31 #include <stdint.h>
32
33 #if QT_VERSION >= 0x040300
34 namespace QUnicodeTables {
35 struct Properties {
36 ushort category : 8;
37 ushort line_break_class : 8;
38 ushort direction : 8;
39 ushort combiningClass :8;
40 ushort joining : 2;
41 signed short digitValue : 6; /* 5 needed */
42 ushort unicodeVersion : 4;
43 ushort lowerCaseSpecial : 1;
44 ushort upperCaseSpecial : 1;
45 ushort titleCaseSpecial : 1;
46 ushort caseFoldSpecial : 1; /* currently unused */
47 signed short mirrorDiff : 16;
48 signed short lowerCaseDiff : 16;
49 signed short upperCaseDiff : 16;
50 signed short titleCaseDiff : 16;
51 signed short caseFoldDiff : 16;
52 };
53 Q_CORE_EXPORT const Properties * QT_FASTCALL properties(uint ucs4);
54 Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2);
55 }
56 #endif
57
58 // ugly hack to make UChar compatible with JSChar in API/JSStringRef.h
59 #if defined(Q_OS_WIN)
60 typedef wchar_t UChar;
61 #else
62 typedef uint16_t UChar;
63 #endif
64 typedef uint32_t UChar32;
65
66 // some defines from ICU
67
68 #define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
69 #define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
70 #define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
71 #define U16_GET_SUPPLEMENTARY(lead, trail) \
72 (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
73
74 #define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
75 #define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
76
77 #define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800)
78 #define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
79 #define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
80 #define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
81
82 #define U16_NEXT(s, i, length, c) { \
83 (c)=(s)[(i)++]; \
84 if(U16_IS_LEAD(c)) { \
85 uint16_t __c2; \
86 if((i)<(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
87 ++(i); \
88 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
89 } \
90 } \
91 }
92
93 #define U_MASK(x) ((uint32_t)1<<(x))
94
95 namespace WTF {
96 namespace Unicode {
97
98 enum Direction {
99 LeftToRight = QChar::DirL,
100 RightToLeft = QChar::DirR,
101 EuropeanNumber = QChar::DirEN,
102 EuropeanNumberSeparator = QChar::DirES,
103 EuropeanNumberTerminator = QChar::DirET,
104 ArabicNumber = QChar::DirAN,
105 CommonNumberSeparator = QChar::DirCS,
106 BlockSeparator = QChar::DirB,
107 SegmentSeparator = QChar::DirS,
108 WhiteSpaceNeutral = QChar::DirWS,
109 OtherNeutral = QChar::DirON,
110 LeftToRightEmbedding = QChar::DirLRE,
111 LeftToRightOverride = QChar::DirLRO,
112 RightToLeftArabic = QChar::DirAL,
113 RightToLeftEmbedding = QChar::DirRLE,
114 RightToLeftOverride = QChar::DirRLO,
115 PopDirectionalFormat = QChar::DirPDF,
116 NonSpacingMark = QChar::DirNSM,
117 BoundaryNeutral = QChar::DirBN
118 };
119
120 enum DecompositionType {
121 DecompositionNone = QChar::NoDecomposition,
122 DecompositionCanonical = QChar::Canonical,
123 DecompositionCompat = QChar::Compat,
124 DecompositionCircle = QChar::Circle,
125 DecompositionFinal = QChar::Final,
126 DecompositionFont = QChar::Font,
127 DecompositionFraction = QChar::Fraction,
128 DecompositionInitial = QChar::Initial,
129 DecompositionIsolated = QChar::Isolated,
130 DecompositionMedial = QChar::Medial,
131 DecompositionNarrow = QChar::Narrow,
132 DecompositionNoBreak = QChar::NoBreak,
133 DecompositionSmall = QChar::Small,
134 DecompositionSquare = QChar::Square,
135 DecompositionSub = QChar::Sub,
136 DecompositionSuper = QChar::Super,
137 DecompositionVertical = QChar::Vertical,
138 DecompositionWide = QChar::Wide
139 };
140
141 enum CharCategory {
142 NoCategory = 0,
143 Mark_NonSpacing = U_MASK(QChar::Mark_NonSpacing),
144 Mark_SpacingCombining = U_MASK(QChar::Mark_SpacingCombining),
145 Mark_Enclosing = U_MASK(QChar::Mark_Enclosing),
146 Number_DecimalDigit = U_MASK(QChar::Number_DecimalDigit),
147 Number_Letter = U_MASK(QChar::Number_Letter),
148 Number_Other = U_MASK(QChar::Number_Other),
149 Separator_Space = U_MASK(QChar::Separator_Space),
150 Separator_Line = U_MASK(QChar::Separator_Line),
151 Separator_Paragraph = U_MASK(QChar::Separator_Paragraph),
152 Other_Control = U_MASK(QChar::Other_Control),
153 Other_Format = U_MASK(QChar::Other_Format),
154 Other_Surrogate = U_MASK(QChar::Other_Surrogate),
155 Other_PrivateUse = U_MASK(QChar::Other_PrivateUse),
156 Other_NotAssigned = U_MASK(QChar::Other_NotAssigned),
157 Letter_Uppercase = U_MASK(QChar::Letter_Uppercase),
158 Letter_Lowercase = U_MASK(QChar::Letter_Lowercase),
159 Letter_Titlecase = U_MASK(QChar::Letter_Titlecase),
160 Letter_Modifier = U_MASK(QChar::Letter_Modifier),
161 Letter_Other = U_MASK(QChar::Letter_Other),
162 Punctuation_Connector = U_MASK(QChar::Punctuation_Connector),
163 Punctuation_Dash = U_MASK(QChar::Punctuation_Dash),
164 Punctuation_Open = U_MASK(QChar::Punctuation_Open),
165 Punctuation_Close = U_MASK(QChar::Punctuation_Close),
166 Punctuation_InitialQuote = U_MASK(QChar::Punctuation_InitialQuote),
167 Punctuation_FinalQuote = U_MASK(QChar::Punctuation_FinalQuote),
168 Punctuation_Other = U_MASK(QChar::Punctuation_Other),
169 Symbol_Math = U_MASK(QChar::Symbol_Math),
170 Symbol_Currency = U_MASK(QChar::Symbol_Currency),
171 Symbol_Modifier = U_MASK(QChar::Symbol_Modifier),
172 Symbol_Other = U_MASK(QChar::Symbol_Other),
173 };
174
175
176 #if QT_VERSION >= 0x040300
177 // FIXME: handle surrogates correctly in all methods
178
179 inline UChar32 toLower(UChar32 ch)
180 {
181 return QChar::toLower(ch);
182 }
183
184 inline int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
185 {
186 const UChar *e = src + srcLength;
187 const UChar *s = src;
188 UChar *r = result;
189 UChar *re = result + resultLength;
190
191 // this avoids one out of bounds check in the loop
192 if (QChar(*s).isLowSurrogate())
193 *r++ = *s++;
194
195 int needed = 0;
196 while (s < e && r < re) {
197 uint c = *s;
198 if (QChar(c).isLowSurrogate() && QChar(*(s - 1)).isHighSurrogate())
199 c = QChar::surrogateToUcs4(*(s - 1), c);
200 const QUnicodeTables::Properties *prop = QUnicodeTables::properties(c);
201 if (prop->lowerCaseSpecial) {
202 QString qstring;
203 if (c < 0x10000) {
204 qstring += QChar(c);
205 } else {
206 qstring += QChar(*(s-1));
207 qstring += QChar(*s);
208 }
209 qstring = qstring.toLower();
210 for (int i = 0; i < qstring.length(); ++i) {
211 if (r == re) {
212 needed += qstring.length() - i;
213 break;
214 }
215 *r = qstring.at(i).unicode();
216 ++r;
217 }
218 } else {
219 *r = *s + prop->lowerCaseDiff;
220 ++r;
221 }
222 ++s;
223 }
224 if (s < e)
225 needed += e - s;
226 *error = (needed != 0);
227 if (r < re)
228 *r = 0;
229 return (r - result) + needed;
230 }
231
232 inline UChar32 toUpper(UChar32 ch)
233 {
234 return QChar::toUpper(ch);
235 }
236
237 inline int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
238 {
239 const UChar *e = src + srcLength;
240 const UChar *s = src;
241 UChar *r = result;
242 UChar *re = result + resultLength;
243
244 // this avoids one out of bounds check in the loop
245 if (QChar(*s).isLowSurrogate())
246 *r++ = *s++;
247
248 int needed = 0;
249 while (s < e && r < re) {
250 uint c = *s;
251 if (QChar(c).isLowSurrogate() && QChar(*(s - 1)).isHighSurrogate())
252 c = QChar::surrogateToUcs4(*(s - 1), c);
253 const QUnicodeTables::Properties *prop = QUnicodeTables::properties(c);
254 if (prop->upperCaseSpecial) {
255 QString qstring;
256 if (c < 0x10000) {
257 qstring += QChar(c);
258 } else {
259 qstring += QChar(*(s-1));
260 qstring += QChar(*s);
261 }
262 qstring = qstring.toUpper();
263 for (int i = 0; i < qstring.length(); ++i) {
264 if (r == re) {
265 needed += qstring.length() - i;
266 break;
267 }
268 *r = qstring.at(i).unicode();
269 ++r;
270 }
271 } else {
272 *r = *s + prop->upperCaseDiff;
273 ++r;
274 }
275 ++s;
276 }
277 if (s < e)
278 needed += e - s;
279 *error = (needed != 0);
280 if (r < re)
281 *r = 0;
282 return (r - result) + needed;
283 }
284
285 inline int toTitleCase(UChar32 c)
286 {
287 return QChar::toTitleCase(c);
288 }
289
290 inline UChar32 foldCase(UChar32 c)
291 {
292 return QChar::toCaseFolded(c);
293 }
294
295 inline int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
296 {
297 // FIXME: handle special casing. Easiest with some low level API in Qt
298 *error = false;
299 if (resultLength < srcLength) {
300 *error = true;
301 return srcLength;
302 }
303 for (int i = 0; i < srcLength; ++i)
304 result[i] = QChar::toCaseFolded(src[i]);
305 return srcLength;
306 }
307
308 inline bool isFormatChar(UChar32 c)
309 {
310 return QChar::category(c) == QChar::Other_Format;
311 }
312
313 inline bool isArabicChar(UChar32 c)
314 {
315 return c >= 0x0600 && c <= 0x06FF;
316 }
317
318 inline bool isPrintableChar(UChar32 c)
319 {
320 const uint test = U_MASK(QChar::Other_Control) |
321 U_MASK(QChar::Other_NotAssigned);
322 return !(U_MASK(QChar::category(c)) & test);
323 }
324
325 inline bool isSeparatorSpace(UChar32 c)
326 {
327 return QChar::category(c) == QChar::Separator_Space;
328 }
329
330 inline bool isPunct(UChar32 c)
331 {
332 const uint test = U_MASK(QChar::Punctuation_Connector) |
333 U_MASK(QChar::Punctuation_Dash) |
334 U_MASK(QChar::Punctuation_Open) |
335 U_MASK(QChar::Punctuation_Close) |
336 U_MASK(QChar::Punctuation_InitialQuote) |
337 U_MASK(QChar::Punctuation_FinalQuote) |
338 U_MASK(QChar::Punctuation_Other);
339 return U_MASK(QChar::category(c)) & test;
340 }
341
342 inline bool isDigit(UChar32 c)
343 {
344 return QChar::category(c) == QChar::Number_DecimalDigit;
345 }
346
347 inline bool isLower(UChar32 c)
348 {
349 return QChar::category(c) == QChar::Letter_Lowercase;
350 }
351
352 inline int digitValue(UChar32 c)
353 {
354 return QChar::digitValue(c);
355 }
356
357 inline UChar32 mirroredChar(UChar32 c)
358 {
359 return QChar::mirroredChar(c);
360 }
361
362 inline uint8_t combiningClass(UChar32 c)
363 {
364 return QChar::combiningClass(c);
365 }
366
367 inline DecompositionType decompositionType(UChar32 c)
368 {
369 return (DecompositionType)QChar::decompositionTag(c);
370 }
371
372 inline int umemcasecmp(const UChar* a, const UChar* b, int len)
373 {
374 // handle surrogates correctly
375 for (int i = 0; i < len; ++i) {
376 uint c1 = QChar::toCaseFolded(a[i]);
377 uint c2 = QChar::toCaseFolded(b[i]);
378 if (c1 != c2)
379 return c1 - c2;
380 }
381 return 0;
382 }
383
384 inline Direction direction(UChar32 c)
385 {
386 return (Direction)QChar::direction(c);
387 }
388
389 inline CharCategory category(UChar32 c)
390 {
391 return (CharCategory) U_MASK(QChar::category(c));
392 }
393
394 #else
395
396 inline UChar32 toLower(UChar32 ch)
397 {
398 if (ch > 0xffff)
399 return ch;
400 return QChar((unsigned short)ch).toLower().unicode();
401 }
402
403 inline int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
404 {
405 *error = false;
406 if (resultLength < srcLength) {
407 *error = true;
408 return srcLength;
409 }
410 for (int i = 0; i < srcLength; ++i)
411 result[i] = QChar(src[i]).toLower().unicode();
412 return srcLength;
413 }
414
415 inline UChar32 toUpper(UChar32 ch)
416 {
417 if (ch > 0xffff)
418 return ch;
419 return QChar((unsigned short)ch).toUpper().unicode();
420 }
421
422 inline int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
423 {
424 *error = false;
425 if (resultLength < srcLength) {
426 *error = true;
427 return srcLength;
428 }
429 for (int i = 0; i < srcLength; ++i)
430 result[i] = QChar(src[i]).toUpper().unicode();
431 return srcLength;
432 }
433
434 inline int toTitleCase(UChar32 c)
435 {
436 if (c > 0xffff)
437 return c;
438 return QChar((unsigned short)c).toUpper().unicode();
439 }
440
441 inline UChar32 foldCase(UChar32 c)
442 {
443 if (c > 0xffff)
444 return c;
445 return QChar((unsigned short)c).toLower().unicode();
446 }
447
448 inline int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
449 {
450 return toLower(result, resultLength, src, srcLength, error);
451 }
452
453 inline bool isFormatChar(UChar32 c)
454 {
455 return (c & 0xffff0000) == 0 && QChar((unsigned short)c).category() == QChar::Other_Format;
456 }
457
458 inline bool isPrintableChar(UChar32 c)
459 {
460 return (c & 0xffff0000) == 0 && QChar((unsigned short)c).isPrint();
461 }
462
463 inline bool isArabicChar(UChar32 c)
464 {
465 return c >= 0x0600 && c <= 0x06FF;
466 }
467
468 inline bool isSeparatorSpace(UChar32 c)
469 {
470 return (c & 0xffff0000) == 0 && QChar((unsigned short)c).category() == QChar::Separator_Space;
471 }
472
473 inline bool isPunct(UChar32 c)
474 {
475 return (c & 0xffff0000) == 0 && QChar((unsigned short)c).isPunct();
476 }
477
478 inline bool isDigit(UChar32 c)
479 {
480 return (c & 0xffff0000) == 0 && QChar((unsigned short)c).isDigit();
481 }
482
483 inline bool isLower(UChar32 c)
484 {
485 return (c & 0xffff0000) == 0 && QChar((unsigned short)c).category() == QChar::Letter_Lowercase;
486 }
487
488 inline int digitValue(UChar32 c)
489 {
490 if (c > 0xffff)
491 return 0;
492 return QChar(c).digitValue();
493 }
494
495 inline UChar32 mirroredChar(UChar32 c)
496 {
497 if (c > 0xffff)
498 return c;
499 return QChar(c).mirroredChar().unicode();
500 }
501
502 inline uint8_t combiningClass(UChar32 c)
503 {
504 if (c > 0xffff)
505 return 0;
506 return QChar((unsigned short)c).combiningClass();
507 }
508
509 inline DecompositionType decompositionType(UChar32 c)
510 {
511 if (c > 0xffff)
512 return DecompositionNone;
513 return (DecompositionType)QChar(c).decompositionTag();
514 }
515
516 inline int umemcasecmp(const UChar* a, const UChar* b, int len)
517 {
518 for (int i = 0; i < len; ++i) {
519 QChar c1 = QChar(a[i]).toLower();
520 QChar c2 = QChar(b[i]).toLower();
521 if (c1 != c2)
522 return c1.unicode() - c2.unicode();
523 }
524 return 0;
525 }
526
527 inline Direction direction(UChar32 c)
528 {
529 if (c > 0xffff)
530 return LeftToRight;
531 return (Direction)QChar(c).direction();
532 }
533
534 inline CharCategory category(UChar32 c)
535 {
536 if (c > 0xffff)
537 return NoCategory;
538 return (CharCategory) U_MASK(QChar(c).category());
539 }
540
541 #endif
542
543 }
544 }
545
546 #endif
547 // vim: ts=2 sw=2 et