[apple/javascriptcore.git] / wtf / unicode / glib / UnicodeGLib.cpp

/*
 *  Copyright (C) 2008 Jürg Billeter <j@bitron.ch>
 *  Copyright (C) 2008 Dominik Röttsches <dominik.roettsches@access-company.com>
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Library General Public
 *  License as published by the Free Software Foundation; either
 *  version 2 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Library General Public License for more details.
 *
 *  You should have received a copy of the GNU Library General Public License
 *  along with this library; see the file COPYING.LIB.  If not, write to
 *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 *  Boston, MA 02110-1301, USA.
 *
 */

#include "config.h"
#include "UnicodeGLib.h"

namespace WTF {
namespace Unicode {

UChar32 foldCase(UChar32 ch)
{
    GOwnPtr<GError> gerror;

    GOwnPtr<char> utf8char;
    utf8char.set(g_ucs4_to_utf8(reinterpret_cast<gunichar*>(&ch), 1, 0, 0, &gerror.outPtr()));
    if (gerror)
        return ch;

    GOwnPtr<char> utf8caseFolded;
    utf8caseFolded.set(g_utf8_casefold(utf8char.get(), -1));

    GOwnPtr<gunichar> ucs4Result;
    ucs4Result.set(g_utf8_to_ucs4_fast(utf8caseFolded.get(), -1, 0));

    return *ucs4Result;
}

int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
{
    *error = false;
    GOwnPtr<GError> gerror;

    GOwnPtr<char> utf8src;
    utf8src.set(g_utf16_to_utf8(src, srcLength, 0, 0, &gerror.outPtr()));
    if (gerror) {
        *error = true;
        return -1;
    }

    GOwnPtr<char> utf8result;
    utf8result.set(g_utf8_casefold(utf8src.get(), -1));

    long utf16resultLength = -1;
    GOwnPtr<UChar> utf16result;
    utf16result.set(g_utf8_to_utf16(utf8result.get(), -1, 0, &utf16resultLength, &gerror.outPtr()));
    if (gerror) {
        *error = true;
        return -1;
    }

    if (utf16resultLength > resultLength) {
        *error = true;
        return utf16resultLength;
    }
    memcpy(result, utf16result.get(), utf16resultLength * sizeof(UChar));

    return utf16resultLength;
}

int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
{
    *error = false;
    GOwnPtr<GError> gerror;

    GOwnPtr<char> utf8src;
    utf8src.set(g_utf16_to_utf8(src, srcLength, 0, 0, &gerror.outPtr()));
    if (gerror) {
        *error = true;
        return -1;
    }

    GOwnPtr<char> utf8result;
    utf8result.set(g_utf8_strdown(utf8src.get(), -1));

    long utf16resultLength = -1;
    GOwnPtr<UChar> utf16result;
    utf16result.set(g_utf8_to_utf16(utf8result.get(), -1, 0, &utf16resultLength, &gerror.outPtr()));
    if (gerror) {
        *error = true;
        return -1;
    }

    if (utf16resultLength > resultLength) {
        *error = true;
        return utf16resultLength;
    }
    memcpy(result, utf16result.get(), utf16resultLength * sizeof(UChar));

    return utf16resultLength;
}

int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
{
    *error = false;
    GOwnPtr<GError> gerror;

    GOwnPtr<char> utf8src;
    utf8src.set(g_utf16_to_utf8(src, srcLength, 0, 0, &gerror.outPtr()));
    if (gerror) {
        *error = true;
        return -1;
    }

    GOwnPtr<char> utf8result;
    utf8result.set(g_utf8_strup(utf8src.get(), -1));

    long utf16resultLength = -1;
    GOwnPtr<UChar> utf16result;
    utf16result.set(g_utf8_to_utf16(utf8result.get(), -1, 0, &utf16resultLength, &gerror.outPtr()));
    if (gerror) {
        *error = true;
        return -1;
    }

    if (utf16resultLength > resultLength) {
        *error = true;
        return utf16resultLength;
    }
    memcpy(result, utf16result.get(), utf16resultLength * sizeof(UChar));

    return utf16resultLength;
}

Direction direction(UChar32 c)
{
    PangoBidiType type = pango_bidi_type_for_unichar(c);
    switch (type) {
    case PANGO_BIDI_TYPE_L:
        return LeftToRight;
    case PANGO_BIDI_TYPE_R:
        return RightToLeft;
    case PANGO_BIDI_TYPE_AL:
        return RightToLeftArabic;
    case PANGO_BIDI_TYPE_LRE:
        return LeftToRightEmbedding;
    case PANGO_BIDI_TYPE_RLE:
        return RightToLeftEmbedding;
    case PANGO_BIDI_TYPE_LRO:
        return LeftToRightOverride;
    case PANGO_BIDI_TYPE_RLO:
        return RightToLeftOverride;
    case PANGO_BIDI_TYPE_PDF:
        return PopDirectionalFormat;
    case PANGO_BIDI_TYPE_EN:
        return EuropeanNumber;
    case PANGO_BIDI_TYPE_AN:
        return ArabicNumber;
    case PANGO_BIDI_TYPE_ES:
        return EuropeanNumberSeparator;
    case PANGO_BIDI_TYPE_ET:
        return EuropeanNumberTerminator;
    case PANGO_BIDI_TYPE_CS:
        return CommonNumberSeparator;
    case PANGO_BIDI_TYPE_NSM:
        return NonSpacingMark;
    case PANGO_BIDI_TYPE_BN:
        return BoundaryNeutral;
    case PANGO_BIDI_TYPE_B:
        return BlockSeparator;
    case PANGO_BIDI_TYPE_S:
        return SegmentSeparator;
    case PANGO_BIDI_TYPE_WS:
        return WhiteSpaceNeutral;
    default:
        return OtherNeutral;
    }
}

int umemcasecmp(const UChar* a, const UChar* b, int len)
{
    GOwnPtr<char> utf8a;
    GOwnPtr<char> utf8b;

    utf8a.set(g_utf16_to_utf8(a, len, 0, 0, 0));
    utf8b.set(g_utf16_to_utf8(b, len, 0, 0, 0));

    GOwnPtr<char> foldedA;
    GOwnPtr<char> foldedB;

    foldedA.set(g_utf8_casefold(utf8a.get(), -1));
    foldedB.set(g_utf8_casefold(utf8b.get(), -1));

    // FIXME: umemcasecmp needs to mimic u_memcasecmp of icu
    // from the ICU docs:
    // "Compare two strings case-insensitively using full case folding.
    // his is equivalent to u_strcmp(u_strFoldCase(s1, n, options), u_strFoldCase(s2, n, options))."
    //
    // So it looks like we don't need the full g_utf8_collate here,
    // but really a bitwise comparison of casefolded unicode chars (not utf-8 bytes).
    // As there is no direct equivalent to this icu function in GLib, for now
    // we'll use g_utf8_collate():

    return g_utf8_collate(foldedA.get(), foldedB.get());
}

}
}
Commit	Line	Data
ba379fdc A	1	/*
	2	* Copyright (C) 2008 Jürg Billeter <j@bitron.ch>
	3	* Copyright (C) 2008 Dominik Röttsches <dominik.roettsches@access-company.com>
	4	*
	5	* This library is free software; you can redistribute it and/or
	6	* modify it under the terms of the GNU Library General Public
	7	* License as published by the Free Software Foundation; either
	8	* version 2 of the License, or (at your option) any later version.
	9	*
	10	* This library is distributed in the hope that it will be useful,
	11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	13	* Library General Public License for more details.
	14	*
	15	* You should have received a copy of the GNU Library General Public License
	16	* along with this library; see the file COPYING.LIB. If not, write to
	17	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
	18	* Boston, MA 02110-1301, USA.
	19	*
	20	*/
	21
f9bf01c6	22	#include "config.h"
ba379fdc A	23	#include "UnicodeGLib.h"
	24
	25	namespace WTF {
	26	namespace Unicode {
	27
	28	UChar32 foldCase(UChar32 ch)
	29	{
	30	GOwnPtr<GError> gerror;
	31
	32	GOwnPtr<char> utf8char;
	33	utf8char.set(g_ucs4_to_utf8(reinterpret_cast<gunichar*>(&ch), 1, 0, 0, &gerror.outPtr()));
	34	if (gerror)
	35	return ch;
	36
	37	GOwnPtr<char> utf8caseFolded;
	38	utf8caseFolded.set(g_utf8_casefold(utf8char.get(), -1));
	39
	40	GOwnPtr<gunichar> ucs4Result;
	41	ucs4Result.set(g_utf8_to_ucs4_fast(utf8caseFolded.get(), -1, 0));
	42
	43	return *ucs4Result;
	44	}
	45
	46	int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
	47	{
	48	*error = false;
	49	GOwnPtr<GError> gerror;
	50
	51	GOwnPtr<char> utf8src;
	52	utf8src.set(g_utf16_to_utf8(src, srcLength, 0, 0, &gerror.outPtr()));
	53	if (gerror) {
	54	*error = true;
	55	return -1;
	56	}
	57
	58	GOwnPtr<char> utf8result;
	59	utf8result.set(g_utf8_casefold(utf8src.get(), -1));
	60
	61	long utf16resultLength = -1;
	62	GOwnPtr<UChar> utf16result;
	63	utf16result.set(g_utf8_to_utf16(utf8result.get(), -1, 0, &utf16resultLength, &gerror.outPtr()));
	64	if (gerror) {
	65	*error = true;
	66	return -1;
	67	}
	68
	69	if (utf16resultLength > resultLength) {
	70	*error = true;
	71	return utf16resultLength;
	72	}
	73	memcpy(result, utf16result.get(), utf16resultLength * sizeof(UChar));
	74
	75	return utf16resultLength;
	76	}
	77
	78	int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
	79	{
	80	*error = false;
	81	GOwnPtr<GError> gerror;
	82
	83	GOwnPtr<char> utf8src;
	84	utf8src.set(g_utf16_to_utf8(src, srcLength, 0, 0, &gerror.outPtr()));
	85	if (gerror) {
	86	*error = true;
87	return -1;
88	}
89
90	GOwnPtr<char> utf8result;
91	utf8result.set(g_utf8_strdown(utf8src.get(), -1));
92
93	long utf16resultLength = -1;
94	GOwnPtr<UChar> utf16result;
95	utf16result.set(g_utf8_to_utf16(utf8result.get(), -1, 0, &utf16resultLength, &gerror.outPtr()));
96	if (gerror) {
97	*error = true;
98	return -1;
99	}
100
101	if (utf16resultLength > resultLength) {
102	*error = true;
103	return utf16resultLength;
104	}
105	memcpy(result, utf16result.get(), utf16resultLength * sizeof(UChar));
106
107	return utf16resultLength;
108	}
109
110	int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
111	{
112	*error = false;
113	GOwnPtr<GError> gerror;
114
115	GOwnPtr<char> utf8src;
116	utf8src.set(g_utf16_to_utf8(src, srcLength, 0, 0, &gerror.outPtr()));
117	if (gerror) {
118	*error = true;
119	return -1;
120	}
121
122	GOwnPtr<char> utf8result;
123	utf8result.set(g_utf8_strup(utf8src.get(), -1));
124
125	long utf16resultLength = -1;
126	GOwnPtr<UChar> utf16result;
127	utf16result.set(g_utf8_to_utf16(utf8result.get(), -1, 0, &utf16resultLength, &gerror.outPtr()));
128	if (gerror) {
129	*error = true;
130	return -1;
131	}
132
133	if (utf16resultLength > resultLength) {
134	*error = true;
135	return utf16resultLength;
136	}
137	memcpy(result, utf16result.get(), utf16resultLength * sizeof(UChar));
138
139	return utf16resultLength;
140	}
141
142	Direction direction(UChar32 c)
143	{
144	PangoBidiType type = pango_bidi_type_for_unichar(c);
145	switch (type) {
146	case PANGO_BIDI_TYPE_L:
147	return LeftToRight;
148	case PANGO_BIDI_TYPE_R:
149	return RightToLeft;
150	case PANGO_BIDI_TYPE_AL:
151	return RightToLeftArabic;
152	case PANGO_BIDI_TYPE_LRE:
153	return LeftToRightEmbedding;
154	case PANGO_BIDI_TYPE_RLE:
155	return RightToLeftEmbedding;
156	case PANGO_BIDI_TYPE_LRO:
157	return LeftToRightOverride;
158	case PANGO_BIDI_TYPE_RLO:
159	return RightToLeftOverride;
160	case PANGO_BIDI_TYPE_PDF:
161	return PopDirectionalFormat;
162	case PANGO_BIDI_TYPE_EN:
163	return EuropeanNumber;
164	case PANGO_BIDI_TYPE_AN:
165	return ArabicNumber;
166	case PANGO_BIDI_TYPE_ES:
167	return EuropeanNumberSeparator;
168	case PANGO_BIDI_TYPE_ET:
169	return EuropeanNumberTerminator;
170	case PANGO_BIDI_TYPE_CS:
171	return CommonNumberSeparator;
172	case PANGO_BIDI_TYPE_NSM:
173	return NonSpacingMark;
174	case PANGO_BIDI_TYPE_BN:
175	return BoundaryNeutral;
176	case PANGO_BIDI_TYPE_B:
177	return BlockSeparator;
178	case PANGO_BIDI_TYPE_S:
179	return SegmentSeparator;
180	case PANGO_BIDI_TYPE_WS:
181	return WhiteSpaceNeutral;
182	default:
183	return OtherNeutral;
184	}
185	}
186
187	int umemcasecmp(const UChar* a, const UChar* b, int len)
188	{
189	GOwnPtr<char> utf8a;
190	GOwnPtr<char> utf8b;
191
192	utf8a.set(g_utf16_to_utf8(a, len, 0, 0, 0));
193	utf8b.set(g_utf16_to_utf8(b, len, 0, 0, 0));
194
195	GOwnPtr<char> foldedA;
196	GOwnPtr<char> foldedB;
197
198	foldedA.set(g_utf8_casefold(utf8a.get(), -1));
199	foldedB.set(g_utf8_casefold(utf8b.get(), -1));
200
201	// FIXME: umemcasecmp needs to mimic u_memcasecmp of icu
202	// from the ICU docs:
203	// "Compare two strings case-insensitively using full case folding.
204	// his is equivalent to u_strcmp(u_strFoldCase(s1, n, options), u_strFoldCase(s2, n, options))."
205	//
206	// So it looks like we don't need the full g_utf8_collate here,
207	// but really a bitwise comparison of casefolded unicode chars (not utf-8 bytes).
208	// As there is no direct equivalent to this icu function in GLib, for now
209	// we'll use g_utf8_collate():
210
211	return g_utf8_collate(foldedA.get(), foldedB.get());
212	}
213
214	}
215	}