]>
Commit | Line | Data |
---|---|---|
b37bf2e1 A |
1 | /* |
2 | * This file is part of the KDE libraries | |
3 | * Copyright (C) 2006 George Staikos <staikos@kde.org> | |
4 | * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> | |
5 | * | |
6 | * This library is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Library General Public | |
8 | * License as published by the Free Software Foundation; either | |
9 | * version 2 of the License, or (at your option) any later version. | |
10 | * | |
11 | * This library is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | * Library General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU Library General Public License | |
17 | * along with this library; see the file COPYING.LIB. If not, write to | |
18 | * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | |
19 | * Boston, MA 02110-1301, USA. | |
20 | * | |
21 | */ | |
22 | ||
23 | #ifndef KJS_UNICODE_QT4_H | |
24 | #define KJS_UNICODE_QT4_H | |
25 | ||
26 | #include <QChar> | |
27 | #include <QString> | |
28 | ||
29 | #include <config.h> | |
30 | ||
31 | #include <stdint.h> | |
32 | ||
33 | #if QT_VERSION >= 0x040300 | |
34 | namespace QUnicodeTables { | |
35 | struct Properties { | |
36 | ushort category : 8; | |
37 | ushort line_break_class : 8; | |
38 | ushort direction : 8; | |
39 | ushort combiningClass :8; | |
40 | ushort joining : 2; | |
41 | signed short digitValue : 6; /* 5 needed */ | |
42 | ushort unicodeVersion : 4; | |
43 | ushort lowerCaseSpecial : 1; | |
44 | ushort upperCaseSpecial : 1; | |
45 | ushort titleCaseSpecial : 1; | |
46 | ushort caseFoldSpecial : 1; /* currently unused */ | |
47 | signed short mirrorDiff : 16; | |
48 | signed short lowerCaseDiff : 16; | |
49 | signed short upperCaseDiff : 16; | |
50 | signed short titleCaseDiff : 16; | |
51 | signed short caseFoldDiff : 16; | |
52 | }; | |
53 | Q_CORE_EXPORT const Properties * QT_FASTCALL properties(uint ucs4); | |
54 | Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2); | |
55 | } | |
56 | #endif | |
57 | ||
58 | // ugly hack to make UChar compatible with JSChar in API/JSStringRef.h | |
59 | #if defined(Q_OS_WIN) | |
60 | typedef wchar_t UChar; | |
61 | #else | |
62 | typedef uint16_t UChar; | |
63 | #endif | |
64 | typedef uint32_t UChar32; | |
65 | ||
66 | // some defines from ICU | |
67 | ||
68 | #define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800) | |
69 | #define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00) | |
70 | #define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) | |
71 | #define U16_GET_SUPPLEMENTARY(lead, trail) \ | |
72 | (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET) | |
73 | ||
74 | #define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) | |
75 | #define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) | |
76 | ||
77 | #define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800) | |
78 | #define U16_IS_SINGLE(c) !U_IS_SURROGATE(c) | |
79 | #define U16_IS_SURROGATE(c) U_IS_SURROGATE(c) | |
80 | #define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0) | |
81 | ||
82 | #define U16_NEXT(s, i, length, c) { \ | |
83 | (c)=(s)[(i)++]; \ | |
84 | if(U16_IS_LEAD(c)) { \ | |
85 | uint16_t __c2; \ | |
86 | if((i)<(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \ | |
87 | ++(i); \ | |
88 | (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ | |
89 | } \ | |
90 | } \ | |
91 | } | |
92 | ||
93 | #define U_MASK(x) ((uint32_t)1<<(x)) | |
94 | ||
95 | namespace WTF { | |
96 | namespace Unicode { | |
97 | ||
98 | enum Direction { | |
99 | LeftToRight = QChar::DirL, | |
100 | RightToLeft = QChar::DirR, | |
101 | EuropeanNumber = QChar::DirEN, | |
102 | EuropeanNumberSeparator = QChar::DirES, | |
103 | EuropeanNumberTerminator = QChar::DirET, | |
104 | ArabicNumber = QChar::DirAN, | |
105 | CommonNumberSeparator = QChar::DirCS, | |
106 | BlockSeparator = QChar::DirB, | |
107 | SegmentSeparator = QChar::DirS, | |
108 | WhiteSpaceNeutral = QChar::DirWS, | |
109 | OtherNeutral = QChar::DirON, | |
110 | LeftToRightEmbedding = QChar::DirLRE, | |
111 | LeftToRightOverride = QChar::DirLRO, | |
112 | RightToLeftArabic = QChar::DirAL, | |
113 | RightToLeftEmbedding = QChar::DirRLE, | |
114 | RightToLeftOverride = QChar::DirRLO, | |
115 | PopDirectionalFormat = QChar::DirPDF, | |
116 | NonSpacingMark = QChar::DirNSM, | |
117 | BoundaryNeutral = QChar::DirBN | |
118 | }; | |
119 | ||
120 | enum DecompositionType { | |
121 | DecompositionNone = QChar::NoDecomposition, | |
122 | DecompositionCanonical = QChar::Canonical, | |
123 | DecompositionCompat = QChar::Compat, | |
124 | DecompositionCircle = QChar::Circle, | |
125 | DecompositionFinal = QChar::Final, | |
126 | DecompositionFont = QChar::Font, | |
127 | DecompositionFraction = QChar::Fraction, | |
128 | DecompositionInitial = QChar::Initial, | |
129 | DecompositionIsolated = QChar::Isolated, | |
130 | DecompositionMedial = QChar::Medial, | |
131 | DecompositionNarrow = QChar::Narrow, | |
132 | DecompositionNoBreak = QChar::NoBreak, | |
133 | DecompositionSmall = QChar::Small, | |
134 | DecompositionSquare = QChar::Square, | |
135 | DecompositionSub = QChar::Sub, | |
136 | DecompositionSuper = QChar::Super, | |
137 | DecompositionVertical = QChar::Vertical, | |
138 | DecompositionWide = QChar::Wide | |
139 | }; | |
140 | ||
141 | enum CharCategory { | |
142 | NoCategory = 0, | |
143 | Mark_NonSpacing = U_MASK(QChar::Mark_NonSpacing), | |
144 | Mark_SpacingCombining = U_MASK(QChar::Mark_SpacingCombining), | |
145 | Mark_Enclosing = U_MASK(QChar::Mark_Enclosing), | |
146 | Number_DecimalDigit = U_MASK(QChar::Number_DecimalDigit), | |
147 | Number_Letter = U_MASK(QChar::Number_Letter), | |
148 | Number_Other = U_MASK(QChar::Number_Other), | |
149 | Separator_Space = U_MASK(QChar::Separator_Space), | |
150 | Separator_Line = U_MASK(QChar::Separator_Line), | |
151 | Separator_Paragraph = U_MASK(QChar::Separator_Paragraph), | |
152 | Other_Control = U_MASK(QChar::Other_Control), | |
153 | Other_Format = U_MASK(QChar::Other_Format), | |
154 | Other_Surrogate = U_MASK(QChar::Other_Surrogate), | |
155 | Other_PrivateUse = U_MASK(QChar::Other_PrivateUse), | |
156 | Other_NotAssigned = U_MASK(QChar::Other_NotAssigned), | |
157 | Letter_Uppercase = U_MASK(QChar::Letter_Uppercase), | |
158 | Letter_Lowercase = U_MASK(QChar::Letter_Lowercase), | |
159 | Letter_Titlecase = U_MASK(QChar::Letter_Titlecase), | |
160 | Letter_Modifier = U_MASK(QChar::Letter_Modifier), | |
161 | Letter_Other = U_MASK(QChar::Letter_Other), | |
162 | Punctuation_Connector = U_MASK(QChar::Punctuation_Connector), | |
163 | Punctuation_Dash = U_MASK(QChar::Punctuation_Dash), | |
164 | Punctuation_Open = U_MASK(QChar::Punctuation_Open), | |
165 | Punctuation_Close = U_MASK(QChar::Punctuation_Close), | |
166 | Punctuation_InitialQuote = U_MASK(QChar::Punctuation_InitialQuote), | |
167 | Punctuation_FinalQuote = U_MASK(QChar::Punctuation_FinalQuote), | |
168 | Punctuation_Other = U_MASK(QChar::Punctuation_Other), | |
169 | Symbol_Math = U_MASK(QChar::Symbol_Math), | |
170 | Symbol_Currency = U_MASK(QChar::Symbol_Currency), | |
171 | Symbol_Modifier = U_MASK(QChar::Symbol_Modifier), | |
172 | Symbol_Other = U_MASK(QChar::Symbol_Other), | |
173 | }; | |
174 | ||
175 | ||
176 | #if QT_VERSION >= 0x040300 | |
177 | // FIXME: handle surrogates correctly in all methods | |
178 | ||
179 | inline UChar32 toLower(UChar32 ch) | |
180 | { | |
181 | return QChar::toLower(ch); | |
182 | } | |
183 | ||
184 | inline int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) | |
185 | { | |
186 | const UChar *e = src + srcLength; | |
187 | const UChar *s = src; | |
188 | UChar *r = result; | |
189 | UChar *re = result + resultLength; | |
190 | ||
191 | // this avoids one out of bounds check in the loop | |
192 | if (QChar(*s).isLowSurrogate()) | |
193 | *r++ = *s++; | |
194 | ||
195 | int needed = 0; | |
196 | while (s < e && r < re) { | |
197 | uint c = *s; | |
198 | if (QChar(c).isLowSurrogate() && QChar(*(s - 1)).isHighSurrogate()) | |
199 | c = QChar::surrogateToUcs4(*(s - 1), c); | |
200 | const QUnicodeTables::Properties *prop = QUnicodeTables::properties(c); | |
201 | if (prop->lowerCaseSpecial) { | |
202 | QString qstring; | |
203 | if (c < 0x10000) { | |
204 | qstring += QChar(c); | |
205 | } else { | |
206 | qstring += QChar(*(s-1)); | |
207 | qstring += QChar(*s); | |
208 | } | |
209 | qstring = qstring.toLower(); | |
210 | for (int i = 0; i < qstring.length(); ++i) { | |
211 | if (r == re) { | |
212 | needed += qstring.length() - i; | |
213 | break; | |
214 | } | |
215 | *r = qstring.at(i).unicode(); | |
216 | ++r; | |
217 | } | |
218 | } else { | |
219 | *r = *s + prop->lowerCaseDiff; | |
220 | ++r; | |
221 | } | |
222 | ++s; | |
223 | } | |
224 | if (s < e) | |
225 | needed += e - s; | |
226 | *error = (needed != 0); | |
227 | if (r < re) | |
228 | *r = 0; | |
229 | return (r - result) + needed; | |
230 | } | |
231 | ||
232 | inline UChar32 toUpper(UChar32 ch) | |
233 | { | |
234 | return QChar::toUpper(ch); | |
235 | } | |
236 | ||
237 | inline int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) | |
238 | { | |
239 | const UChar *e = src + srcLength; | |
240 | const UChar *s = src; | |
241 | UChar *r = result; | |
242 | UChar *re = result + resultLength; | |
243 | ||
244 | // this avoids one out of bounds check in the loop | |
245 | if (QChar(*s).isLowSurrogate()) | |
246 | *r++ = *s++; | |
247 | ||
248 | int needed = 0; | |
249 | while (s < e && r < re) { | |
250 | uint c = *s; | |
251 | if (QChar(c).isLowSurrogate() && QChar(*(s - 1)).isHighSurrogate()) | |
252 | c = QChar::surrogateToUcs4(*(s - 1), c); | |
253 | const QUnicodeTables::Properties *prop = QUnicodeTables::properties(c); | |
254 | if (prop->upperCaseSpecial) { | |
255 | QString qstring; | |
256 | if (c < 0x10000) { | |
257 | qstring += QChar(c); | |
258 | } else { | |
259 | qstring += QChar(*(s-1)); | |
260 | qstring += QChar(*s); | |
261 | } | |
262 | qstring = qstring.toUpper(); | |
263 | for (int i = 0; i < qstring.length(); ++i) { | |
264 | if (r == re) { | |
265 | needed += qstring.length() - i; | |
266 | break; | |
267 | } | |
268 | *r = qstring.at(i).unicode(); | |
269 | ++r; | |
270 | } | |
271 | } else { | |
272 | *r = *s + prop->upperCaseDiff; | |
273 | ++r; | |
274 | } | |
275 | ++s; | |
276 | } | |
277 | if (s < e) | |
278 | needed += e - s; | |
279 | *error = (needed != 0); | |
280 | if (r < re) | |
281 | *r = 0; | |
282 | return (r - result) + needed; | |
283 | } | |
284 | ||
285 | inline int toTitleCase(UChar32 c) | |
286 | { | |
287 | return QChar::toTitleCase(c); | |
288 | } | |
289 | ||
290 | inline UChar32 foldCase(UChar32 c) | |
291 | { | |
292 | return QChar::toCaseFolded(c); | |
293 | } | |
294 | ||
295 | inline int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) | |
296 | { | |
297 | // FIXME: handle special casing. Easiest with some low level API in Qt | |
298 | *error = false; | |
299 | if (resultLength < srcLength) { | |
300 | *error = true; | |
301 | return srcLength; | |
302 | } | |
303 | for (int i = 0; i < srcLength; ++i) | |
304 | result[i] = QChar::toCaseFolded(src[i]); | |
305 | return srcLength; | |
306 | } | |
307 | ||
308 | inline bool isFormatChar(UChar32 c) | |
309 | { | |
310 | return QChar::category(c) == QChar::Other_Format; | |
311 | } | |
312 | ||
313 | inline bool isArabicChar(UChar32 c) | |
314 | { | |
315 | return c >= 0x0600 && c <= 0x06FF; | |
316 | } | |
317 | ||
318 | inline bool isPrintableChar(UChar32 c) | |
319 | { | |
320 | const uint test = U_MASK(QChar::Other_Control) | | |
321 | U_MASK(QChar::Other_NotAssigned); | |
322 | return !(U_MASK(QChar::category(c)) & test); | |
323 | } | |
324 | ||
325 | inline bool isSeparatorSpace(UChar32 c) | |
326 | { | |
327 | return QChar::category(c) == QChar::Separator_Space; | |
328 | } | |
329 | ||
330 | inline bool isPunct(UChar32 c) | |
331 | { | |
332 | const uint test = U_MASK(QChar::Punctuation_Connector) | | |
333 | U_MASK(QChar::Punctuation_Dash) | | |
334 | U_MASK(QChar::Punctuation_Open) | | |
335 | U_MASK(QChar::Punctuation_Close) | | |
336 | U_MASK(QChar::Punctuation_InitialQuote) | | |
337 | U_MASK(QChar::Punctuation_FinalQuote) | | |
338 | U_MASK(QChar::Punctuation_Other); | |
339 | return U_MASK(QChar::category(c)) & test; | |
340 | } | |
341 | ||
342 | inline bool isDigit(UChar32 c) | |
343 | { | |
344 | return QChar::category(c) == QChar::Number_DecimalDigit; | |
345 | } | |
346 | ||
347 | inline bool isLower(UChar32 c) | |
348 | { | |
349 | return QChar::category(c) == QChar::Letter_Lowercase; | |
350 | } | |
351 | ||
352 | inline int digitValue(UChar32 c) | |
353 | { | |
354 | return QChar::digitValue(c); | |
355 | } | |
356 | ||
357 | inline UChar32 mirroredChar(UChar32 c) | |
358 | { | |
359 | return QChar::mirroredChar(c); | |
360 | } | |
361 | ||
362 | inline uint8_t combiningClass(UChar32 c) | |
363 | { | |
364 | return QChar::combiningClass(c); | |
365 | } | |
366 | ||
367 | inline DecompositionType decompositionType(UChar32 c) | |
368 | { | |
369 | return (DecompositionType)QChar::decompositionTag(c); | |
370 | } | |
371 | ||
372 | inline int umemcasecmp(const UChar* a, const UChar* b, int len) | |
373 | { | |
374 | // handle surrogates correctly | |
375 | for (int i = 0; i < len; ++i) { | |
376 | uint c1 = QChar::toCaseFolded(a[i]); | |
377 | uint c2 = QChar::toCaseFolded(b[i]); | |
378 | if (c1 != c2) | |
379 | return c1 - c2; | |
380 | } | |
381 | return 0; | |
382 | } | |
383 | ||
384 | inline Direction direction(UChar32 c) | |
385 | { | |
386 | return (Direction)QChar::direction(c); | |
387 | } | |
388 | ||
389 | inline CharCategory category(UChar32 c) | |
390 | { | |
391 | return (CharCategory) U_MASK(QChar::category(c)); | |
392 | } | |
393 | ||
394 | #else | |
395 | ||
396 | inline UChar32 toLower(UChar32 ch) | |
397 | { | |
398 | if (ch > 0xffff) | |
399 | return ch; | |
400 | return QChar((unsigned short)ch).toLower().unicode(); | |
401 | } | |
402 | ||
403 | inline int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) | |
404 | { | |
405 | *error = false; | |
406 | if (resultLength < srcLength) { | |
407 | *error = true; | |
408 | return srcLength; | |
409 | } | |
410 | for (int i = 0; i < srcLength; ++i) | |
411 | result[i] = QChar(src[i]).toLower().unicode(); | |
412 | return srcLength; | |
413 | } | |
414 | ||
415 | inline UChar32 toUpper(UChar32 ch) | |
416 | { | |
417 | if (ch > 0xffff) | |
418 | return ch; | |
419 | return QChar((unsigned short)ch).toUpper().unicode(); | |
420 | } | |
421 | ||
422 | inline int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) | |
423 | { | |
424 | *error = false; | |
425 | if (resultLength < srcLength) { | |
426 | *error = true; | |
427 | return srcLength; | |
428 | } | |
429 | for (int i = 0; i < srcLength; ++i) | |
430 | result[i] = QChar(src[i]).toUpper().unicode(); | |
431 | return srcLength; | |
432 | } | |
433 | ||
434 | inline int toTitleCase(UChar32 c) | |
435 | { | |
436 | if (c > 0xffff) | |
437 | return c; | |
438 | return QChar((unsigned short)c).toUpper().unicode(); | |
439 | } | |
440 | ||
441 | inline UChar32 foldCase(UChar32 c) | |
442 | { | |
443 | if (c > 0xffff) | |
444 | return c; | |
445 | return QChar((unsigned short)c).toLower().unicode(); | |
446 | } | |
447 | ||
448 | inline int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) | |
449 | { | |
450 | return toLower(result, resultLength, src, srcLength, error); | |
451 | } | |
452 | ||
453 | inline bool isFormatChar(UChar32 c) | |
454 | { | |
455 | return (c & 0xffff0000) == 0 && QChar((unsigned short)c).category() == QChar::Other_Format; | |
456 | } | |
457 | ||
458 | inline bool isPrintableChar(UChar32 c) | |
459 | { | |
460 | return (c & 0xffff0000) == 0 && QChar((unsigned short)c).isPrint(); | |
461 | } | |
462 | ||
463 | inline bool isArabicChar(UChar32 c) | |
464 | { | |
465 | return c >= 0x0600 && c <= 0x06FF; | |
466 | } | |
467 | ||
468 | inline bool isSeparatorSpace(UChar32 c) | |
469 | { | |
470 | return (c & 0xffff0000) == 0 && QChar((unsigned short)c).category() == QChar::Separator_Space; | |
471 | } | |
472 | ||
473 | inline bool isPunct(UChar32 c) | |
474 | { | |
475 | return (c & 0xffff0000) == 0 && QChar((unsigned short)c).isPunct(); | |
476 | } | |
477 | ||
478 | inline bool isDigit(UChar32 c) | |
479 | { | |
480 | return (c & 0xffff0000) == 0 && QChar((unsigned short)c).isDigit(); | |
481 | } | |
482 | ||
483 | inline bool isLower(UChar32 c) | |
484 | { | |
485 | return (c & 0xffff0000) == 0 && QChar((unsigned short)c).category() == QChar::Letter_Lowercase; | |
486 | } | |
487 | ||
488 | inline int digitValue(UChar32 c) | |
489 | { | |
490 | if (c > 0xffff) | |
491 | return 0; | |
492 | return QChar(c).digitValue(); | |
493 | } | |
494 | ||
495 | inline UChar32 mirroredChar(UChar32 c) | |
496 | { | |
497 | if (c > 0xffff) | |
498 | return c; | |
499 | return QChar(c).mirroredChar().unicode(); | |
500 | } | |
501 | ||
502 | inline uint8_t combiningClass(UChar32 c) | |
503 | { | |
504 | if (c > 0xffff) | |
505 | return 0; | |
506 | return QChar((unsigned short)c).combiningClass(); | |
507 | } | |
508 | ||
509 | inline DecompositionType decompositionType(UChar32 c) | |
510 | { | |
511 | if (c > 0xffff) | |
512 | return DecompositionNone; | |
513 | return (DecompositionType)QChar(c).decompositionTag(); | |
514 | } | |
515 | ||
516 | inline int umemcasecmp(const UChar* a, const UChar* b, int len) | |
517 | { | |
518 | for (int i = 0; i < len; ++i) { | |
519 | QChar c1 = QChar(a[i]).toLower(); | |
520 | QChar c2 = QChar(b[i]).toLower(); | |
521 | if (c1 != c2) | |
522 | return c1.unicode() - c2.unicode(); | |
523 | } | |
524 | return 0; | |
525 | } | |
526 | ||
527 | inline Direction direction(UChar32 c) | |
528 | { | |
529 | if (c > 0xffff) | |
530 | return LeftToRight; | |
531 | return (Direction)QChar(c).direction(); | |
532 | } | |
533 | ||
534 | inline CharCategory category(UChar32 c) | |
535 | { | |
536 | if (c > 0xffff) | |
537 | return NoCategory; | |
538 | return (CharCategory) U_MASK(QChar(c).category()); | |
539 | } | |
540 | ||
541 | #endif | |
542 | ||
543 | } | |
544 | } | |
545 | ||
546 | #endif | |
547 | // vim: ts=2 sw=2 et |