]>
Commit | Line | Data |
---|---|---|
ba379fdc A |
1 | /* |
2 | * Copyright (C) 2008 Jürg Billeter <j@bitron.ch> | |
3 | * Copyright (C) 2008 Dominik Röttsches <dominik.roettsches@access-company.com> | |
14957cd0 | 4 | * Copyright (C) 2010 Igalia S.L. |
ba379fdc A |
5 | * |
6 | * This library is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Library General Public | |
8 | * License as published by the Free Software Foundation; either | |
9 | * version 2 of the License, or (at your option) any later version. | |
10 | * | |
11 | * This library is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | * Library General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU Library General Public License | |
17 | * along with this library; see the file COPYING.LIB. If not, write to | |
18 | * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | |
19 | * Boston, MA 02110-1301, USA. | |
20 | * | |
21 | */ | |
22 | ||
f9bf01c6 | 23 | #include "config.h" |
ba379fdc A |
24 | #include "UnicodeGLib.h" |
25 | ||
14957cd0 A |
26 | #include <wtf/Vector.h> |
27 | #include <wtf/unicode/UTF8.h> | |
28 | ||
29 | #define UTF8_IS_SURROGATE(character) (character >= 0x10000 && character <= 0x10FFFF) | |
30 | ||
ba379fdc A |
31 | namespace WTF { |
32 | namespace Unicode { | |
33 | ||
34 | UChar32 foldCase(UChar32 ch) | |
35 | { | |
36 | GOwnPtr<GError> gerror; | |
37 | ||
38 | GOwnPtr<char> utf8char; | |
39 | utf8char.set(g_ucs4_to_utf8(reinterpret_cast<gunichar*>(&ch), 1, 0, 0, &gerror.outPtr())); | |
40 | if (gerror) | |
41 | return ch; | |
42 | ||
43 | GOwnPtr<char> utf8caseFolded; | |
44 | utf8caseFolded.set(g_utf8_casefold(utf8char.get(), -1)); | |
45 | ||
46 | GOwnPtr<gunichar> ucs4Result; | |
47 | ucs4Result.set(g_utf8_to_ucs4_fast(utf8caseFolded.get(), -1, 0)); | |
48 | ||
49 | return *ucs4Result; | |
50 | } | |
51 | ||
14957cd0 | 52 | static int getUTF16LengthFromUTF8(const gchar* utf8String, int length) |
ba379fdc | 53 | { |
14957cd0 A |
54 | int utf16Length = 0; |
55 | const gchar* inputString = utf8String; | |
ba379fdc | 56 | |
14957cd0 A |
57 | while ((utf8String + length - inputString > 0) && *inputString) { |
58 | gunichar character = g_utf8_get_char(inputString); | |
ba379fdc | 59 | |
14957cd0 A |
60 | utf16Length += UTF8_IS_SURROGATE(character) ? 2 : 1; |
61 | inputString = g_utf8_next_char(inputString); | |
ba379fdc A |
62 | } |
63 | ||
14957cd0 | 64 | return utf16Length; |
ba379fdc A |
65 | } |
66 | ||
14957cd0 A |
67 | typedef gchar* (*UTF8CaseFunction)(const gchar*, gssize length); |
68 | ||
69 | static int convertCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error, UTF8CaseFunction caseFunction) | |
ba379fdc A |
70 | { |
71 | *error = false; | |
ba379fdc | 72 | |
14957cd0 A |
73 | // Allocate a buffer big enough to hold all the characters. |
74 | Vector<char> buffer(srcLength * 3); | |
75 | char* utf8Target = buffer.data(); | |
76 | const UChar* utf16Source = src; | |
77 | ConversionResult conversionResult = convertUTF16ToUTF8(&utf16Source, utf16Source + srcLength, &utf8Target, utf8Target + buffer.size(), true); | |
78 | if (conversionResult != conversionOK) { | |
ba379fdc A |
79 | *error = true; |
80 | return -1; | |
81 | } | |
14957cd0 | 82 | buffer.shrink(utf8Target - buffer.data()); |
ba379fdc | 83 | |
14957cd0 A |
84 | GOwnPtr<char> utf8Result(caseFunction(buffer.data(), buffer.size())); |
85 | long utf8ResultLength = strlen(utf8Result.get()); | |
ba379fdc | 86 | |
14957cd0 A |
87 | // Calculate the destination buffer size. |
88 | int realLength = getUTF16LengthFromUTF8(utf8Result.get(), utf8ResultLength); | |
89 | if (realLength > resultLength) { | |
ba379fdc | 90 | *error = true; |
14957cd0 | 91 | return realLength; |
ba379fdc A |
92 | } |
93 | ||
14957cd0 A |
94 | // Convert the result to UTF-16. |
95 | UChar* utf16Target = result; | |
96 | const char* utf8Source = utf8Result.get(); | |
97 | conversionResult = convertUTF8ToUTF16(&utf8Source, utf8Source + utf8ResultLength, &utf16Target, utf16Target + resultLength, true); | |
98 | long utf16ResultLength = utf16Target - result; | |
99 | if (conversionResult != conversionOK) | |
ba379fdc | 100 | *error = true; |
ba379fdc | 101 | |
14957cd0 | 102 | return utf16ResultLength <= 0 ? -1 : utf16ResultLength; |
ba379fdc | 103 | } |
14957cd0 | 104 | int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) |
ba379fdc | 105 | { |
14957cd0 A |
106 | return convertCase(result, resultLength, src, srcLength, error, g_utf8_casefold); |
107 | } | |
ba379fdc | 108 | |
14957cd0 A |
109 | int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) |
110 | { | |
111 | return convertCase(result, resultLength, src, srcLength, error, g_utf8_strdown); | |
112 | } | |
ba379fdc | 113 | |
14957cd0 A |
114 | int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) |
115 | { | |
116 | return convertCase(result, resultLength, src, srcLength, error, g_utf8_strup); | |
ba379fdc A |
117 | } |
118 | ||
119 | Direction direction(UChar32 c) | |
120 | { | |
121 | PangoBidiType type = pango_bidi_type_for_unichar(c); | |
122 | switch (type) { | |
123 | case PANGO_BIDI_TYPE_L: | |
124 | return LeftToRight; | |
125 | case PANGO_BIDI_TYPE_R: | |
126 | return RightToLeft; | |
127 | case PANGO_BIDI_TYPE_AL: | |
128 | return RightToLeftArabic; | |
129 | case PANGO_BIDI_TYPE_LRE: | |
130 | return LeftToRightEmbedding; | |
131 | case PANGO_BIDI_TYPE_RLE: | |
132 | return RightToLeftEmbedding; | |
133 | case PANGO_BIDI_TYPE_LRO: | |
134 | return LeftToRightOverride; | |
135 | case PANGO_BIDI_TYPE_RLO: | |
136 | return RightToLeftOverride; | |
137 | case PANGO_BIDI_TYPE_PDF: | |
138 | return PopDirectionalFormat; | |
139 | case PANGO_BIDI_TYPE_EN: | |
140 | return EuropeanNumber; | |
141 | case PANGO_BIDI_TYPE_AN: | |
142 | return ArabicNumber; | |
143 | case PANGO_BIDI_TYPE_ES: | |
144 | return EuropeanNumberSeparator; | |
145 | case PANGO_BIDI_TYPE_ET: | |
146 | return EuropeanNumberTerminator; | |
147 | case PANGO_BIDI_TYPE_CS: | |
148 | return CommonNumberSeparator; | |
149 | case PANGO_BIDI_TYPE_NSM: | |
150 | return NonSpacingMark; | |
151 | case PANGO_BIDI_TYPE_BN: | |
152 | return BoundaryNeutral; | |
153 | case PANGO_BIDI_TYPE_B: | |
154 | return BlockSeparator; | |
155 | case PANGO_BIDI_TYPE_S: | |
156 | return SegmentSeparator; | |
157 | case PANGO_BIDI_TYPE_WS: | |
158 | return WhiteSpaceNeutral; | |
159 | default: | |
160 | return OtherNeutral; | |
161 | } | |
162 | } | |
163 | ||
164 | int umemcasecmp(const UChar* a, const UChar* b, int len) | |
165 | { | |
166 | GOwnPtr<char> utf8a; | |
167 | GOwnPtr<char> utf8b; | |
168 | ||
169 | utf8a.set(g_utf16_to_utf8(a, len, 0, 0, 0)); | |
170 | utf8b.set(g_utf16_to_utf8(b, len, 0, 0, 0)); | |
171 | ||
172 | GOwnPtr<char> foldedA; | |
173 | GOwnPtr<char> foldedB; | |
174 | ||
175 | foldedA.set(g_utf8_casefold(utf8a.get(), -1)); | |
176 | foldedB.set(g_utf8_casefold(utf8b.get(), -1)); | |
177 | ||
178 | // FIXME: umemcasecmp needs to mimic u_memcasecmp of icu | |
179 | // from the ICU docs: | |
180 | // "Compare two strings case-insensitively using full case folding. | |
181 | // his is equivalent to u_strcmp(u_strFoldCase(s1, n, options), u_strFoldCase(s2, n, options))." | |
182 | // | |
183 | // So it looks like we don't need the full g_utf8_collate here, | |
184 | // but really a bitwise comparison of casefolded unicode chars (not utf-8 bytes). | |
185 | // As there is no direct equivalent to this icu function in GLib, for now | |
186 | // we'll use g_utf8_collate(): | |
187 | ||
188 | return g_utf8_collate(foldedA.get(), foldedB.get()); | |
189 | } | |
190 | ||
191 | } | |
192 | } |