2 * Copyright (C) 2008 Jürg Billeter <j@bitron.ch>
3 * Copyright (C) 2008 Dominik Röttsches <dominik.roettsches@access-company.com>
4 * Copyright (C) 2010 Igalia S.L.
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
24 #include "UnicodeGLib.h"
26 #include <wtf/Vector.h>
27 #include <wtf/unicode/UTF8.h>
29 #define UTF8_IS_SURROGATE(character) (character >= 0x10000 && character <= 0x10FFFF)
34 UChar32
foldCase(UChar32 ch
)
36 GOwnPtr
<GError
> gerror
;
38 GOwnPtr
<char> utf8char
;
39 utf8char
.set(g_ucs4_to_utf8(reinterpret_cast<gunichar
*>(&ch
), 1, 0, 0, &gerror
.outPtr()));
43 GOwnPtr
<char> utf8caseFolded
;
44 utf8caseFolded
.set(g_utf8_casefold(utf8char
.get(), -1));
46 GOwnPtr
<gunichar
> ucs4Result
;
47 ucs4Result
.set(g_utf8_to_ucs4_fast(utf8caseFolded
.get(), -1, 0));
52 static int getUTF16LengthFromUTF8(const gchar
* utf8String
, int length
)
55 const gchar
* inputString
= utf8String
;
57 while ((utf8String
+ length
- inputString
> 0) && *inputString
) {
58 gunichar character
= g_utf8_get_char(inputString
);
60 utf16Length
+= UTF8_IS_SURROGATE(character
) ? 2 : 1;
61 inputString
= g_utf8_next_char(inputString
);
67 typedef gchar
* (*UTF8CaseFunction
)(const gchar
*, gssize length
);
69 static int convertCase(UChar
* result
, int resultLength
, const UChar
* src
, int srcLength
, bool* error
, UTF8CaseFunction caseFunction
)
73 // Allocate a buffer big enough to hold all the characters.
74 Vector
<char> buffer(srcLength
* 3);
75 char* utf8Target
= buffer
.data();
76 const UChar
* utf16Source
= src
;
77 ConversionResult conversionResult
= convertUTF16ToUTF8(&utf16Source
, utf16Source
+ srcLength
, &utf8Target
, utf8Target
+ buffer
.size(), true);
78 if (conversionResult
!= conversionOK
) {
82 buffer
.shrink(utf8Target
- buffer
.data());
84 GOwnPtr
<char> utf8Result(caseFunction(buffer
.data(), buffer
.size()));
85 long utf8ResultLength
= strlen(utf8Result
.get());
87 // Calculate the destination buffer size.
88 int realLength
= getUTF16LengthFromUTF8(utf8Result
.get(), utf8ResultLength
);
89 if (realLength
> resultLength
) {
94 // Convert the result to UTF-16.
95 UChar
* utf16Target
= result
;
96 const char* utf8Source
= utf8Result
.get();
97 conversionResult
= convertUTF8ToUTF16(&utf8Source
, utf8Source
+ utf8ResultLength
, &utf16Target
, utf16Target
+ resultLength
, true);
98 long utf16ResultLength
= utf16Target
- result
;
99 if (conversionResult
!= conversionOK
)
102 return utf16ResultLength
<= 0 ? -1 : utf16ResultLength
;
104 int foldCase(UChar
* result
, int resultLength
, const UChar
* src
, int srcLength
, bool* error
)
106 return convertCase(result
, resultLength
, src
, srcLength
, error
, g_utf8_casefold
);
109 int toLower(UChar
* result
, int resultLength
, const UChar
* src
, int srcLength
, bool* error
)
111 return convertCase(result
, resultLength
, src
, srcLength
, error
, g_utf8_strdown
);
114 int toUpper(UChar
* result
, int resultLength
, const UChar
* src
, int srcLength
, bool* error
)
116 return convertCase(result
, resultLength
, src
, srcLength
, error
, g_utf8_strup
);
119 Direction
direction(UChar32 c
)
121 PangoBidiType type
= pango_bidi_type_for_unichar(c
);
123 case PANGO_BIDI_TYPE_L
:
125 case PANGO_BIDI_TYPE_R
:
127 case PANGO_BIDI_TYPE_AL
:
128 return RightToLeftArabic
;
129 case PANGO_BIDI_TYPE_LRE
:
130 return LeftToRightEmbedding
;
131 case PANGO_BIDI_TYPE_RLE
:
132 return RightToLeftEmbedding
;
133 case PANGO_BIDI_TYPE_LRO
:
134 return LeftToRightOverride
;
135 case PANGO_BIDI_TYPE_RLO
:
136 return RightToLeftOverride
;
137 case PANGO_BIDI_TYPE_PDF
:
138 return PopDirectionalFormat
;
139 case PANGO_BIDI_TYPE_EN
:
140 return EuropeanNumber
;
141 case PANGO_BIDI_TYPE_AN
:
143 case PANGO_BIDI_TYPE_ES
:
144 return EuropeanNumberSeparator
;
145 case PANGO_BIDI_TYPE_ET
:
146 return EuropeanNumberTerminator
;
147 case PANGO_BIDI_TYPE_CS
:
148 return CommonNumberSeparator
;
149 case PANGO_BIDI_TYPE_NSM
:
150 return NonSpacingMark
;
151 case PANGO_BIDI_TYPE_BN
:
152 return BoundaryNeutral
;
153 case PANGO_BIDI_TYPE_B
:
154 return BlockSeparator
;
155 case PANGO_BIDI_TYPE_S
:
156 return SegmentSeparator
;
157 case PANGO_BIDI_TYPE_WS
:
158 return WhiteSpaceNeutral
;
164 int umemcasecmp(const UChar
* a
, const UChar
* b
, int len
)
169 utf8a
.set(g_utf16_to_utf8(a
, len
, 0, 0, 0));
170 utf8b
.set(g_utf16_to_utf8(b
, len
, 0, 0, 0));
172 GOwnPtr
<char> foldedA
;
173 GOwnPtr
<char> foldedB
;
175 foldedA
.set(g_utf8_casefold(utf8a
.get(), -1));
176 foldedB
.set(g_utf8_casefold(utf8b
.get(), -1));
178 // FIXME: umemcasecmp needs to mimic u_memcasecmp of icu
179 // from the ICU docs:
180 // "Compare two strings case-insensitively using full case folding.
181 // his is equivalent to u_strcmp(u_strFoldCase(s1, n, options), u_strFoldCase(s2, n, options))."
183 // So it looks like we don't need the full g_utf8_collate here,
184 // but really a bitwise comparison of casefolded unicode chars (not utf-8 bytes).
185 // As there is no direct equivalent to this icu function in GLib, for now
186 // we'll use g_utf8_collate():
188 return g_utf8_collate(foldedA
.get(), foldedB
.get());