wtf/unicode/glib/UnicodeGLib.cpp

   1 /*
   2  *  Copyright (C) 2008 Jürg Billeter <j@bitron.ch>
   3  *  Copyright (C) 2008 Dominik Röttsches <dominik.roettsches@access-company.com>
   4  *
   5  *  This library is free software; you can redistribute it and/or
   6  *  modify it under the terms of the GNU Library General Public
   7  *  License as published by the Free Software Foundation; either
   8  *  version 2 of the License, or (at your option) any later version.
   9  *
  10  *  This library is distributed in the hope that it will be useful,
  11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  *  Library General Public License for more details.
  14  *
  15  *  You should have received a copy of the GNU Library General Public License
  16  *  along with this library; see the file COPYING.LIB.  If not, write to
  17  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  18  *  Boston, MA 02110-1301, USA.
  19  *
  20  */
  21
  22 #include "UnicodeGLib.h"
  23
  24 namespace WTF {
  25 namespace Unicode {
  26
  27 UChar32 foldCase(UChar32 ch)
  28 {
  29     GOwnPtr<GError> gerror;
  30
  31     GOwnPtr<char> utf8char;
  32     utf8char.set(g_ucs4_to_utf8(reinterpret_cast<gunichar*>(&ch), 1, 0, 0, &gerror.outPtr()));
  33     if (gerror)
  34         return ch;
  35
  36     GOwnPtr<char> utf8caseFolded;
  37     utf8caseFolded.set(g_utf8_casefold(utf8char.get(), -1));
  38
  39     GOwnPtr<gunichar> ucs4Result;
  40     ucs4Result.set(g_utf8_to_ucs4_fast(utf8caseFolded.get(), -1, 0));
  41
  42     return *ucs4Result;
  43 }
  44
  45 int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
  46 {
  47     *error = false;
  48     GOwnPtr<GError> gerror;
  49
  50     GOwnPtr<char> utf8src;
  51     utf8src.set(g_utf16_to_utf8(src, srcLength, 0, 0, &gerror.outPtr()));
  52     if (gerror) {
  53         *error = true;
  54         return -1;
  55     }
  56
  57     GOwnPtr<char> utf8result;
  58     utf8result.set(g_utf8_casefold(utf8src.get(), -1));
  59
  60     long utf16resultLength = -1;
  61     GOwnPtr<UChar> utf16result;
  62     utf16result.set(g_utf8_to_utf16(utf8result.get(), -1, 0, &utf16resultLength, &gerror.outPtr()));
  63     if (gerror) {
  64         *error = true;
  65         return -1;
  66     }
  67
  68     if (utf16resultLength > resultLength) {
  69         *error = true;
  70         return utf16resultLength;
  71     }
  72     memcpy(result, utf16result.get(), utf16resultLength * sizeof(UChar));
  73
  74     return utf16resultLength;
  75 }
  76
  77 int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
  78 {
  79     *error = false;
  80     GOwnPtr<GError> gerror;
  81
  82     GOwnPtr<char> utf8src;
  83     utf8src.set(g_utf16_to_utf8(src, srcLength, 0, 0, &gerror.outPtr()));
  84     if (gerror) {
  85         *error = true;
  86         return -1;
  87     }
  88
  89     GOwnPtr<char> utf8result;
  90     utf8result.set(g_utf8_strdown(utf8src.get(), -1));
  91
  92     long utf16resultLength = -1;
  93     GOwnPtr<UChar> utf16result;
  94     utf16result.set(g_utf8_to_utf16(utf8result.get(), -1, 0, &utf16resultLength, &gerror.outPtr()));
  95     if (gerror) {
  96         *error = true;
  97         return -1;
  98     }
  99
 100     if (utf16resultLength > resultLength) {
 101         *error = true;
 102         return utf16resultLength;
 103     }
 104     memcpy(result, utf16result.get(), utf16resultLength * sizeof(UChar));
 105
 106     return utf16resultLength;
 107 }
 108
 109 int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
 110 {
 111     *error = false;
 112     GOwnPtr<GError> gerror;
 113
 114     GOwnPtr<char> utf8src;
 115     utf8src.set(g_utf16_to_utf8(src, srcLength, 0, 0, &gerror.outPtr()));
 116     if (gerror) {
 117         *error = true;
 118         return -1;
 119     }
 120
 121     GOwnPtr<char> utf8result;
 122     utf8result.set(g_utf8_strup(utf8src.get(), -1));
 123
 124     long utf16resultLength = -1;
 125     GOwnPtr<UChar> utf16result;
 126     utf16result.set(g_utf8_to_utf16(utf8result.get(), -1, 0, &utf16resultLength, &gerror.outPtr()));
 127     if (gerror) {
 128         *error = true;
 129         return -1;
 130     }
 131
 132     if (utf16resultLength > resultLength) {
 133         *error = true;
 134         return utf16resultLength;
 135     }
 136     memcpy(result, utf16result.get(), utf16resultLength * sizeof(UChar));
 137
 138     return utf16resultLength;
 139 }
 140
 141 Direction direction(UChar32 c)
 142 {
 143     PangoBidiType type = pango_bidi_type_for_unichar(c);
 144     switch (type) {
 145     case PANGO_BIDI_TYPE_L:
 146         return LeftToRight;
 147     case PANGO_BIDI_TYPE_R:
 148         return RightToLeft;
 149     case PANGO_BIDI_TYPE_AL:
 150         return RightToLeftArabic;
 151     case PANGO_BIDI_TYPE_LRE:
 152         return LeftToRightEmbedding;
 153     case PANGO_BIDI_TYPE_RLE:
 154         return RightToLeftEmbedding;
 155     case PANGO_BIDI_TYPE_LRO:
 156         return LeftToRightOverride;
 157     case PANGO_BIDI_TYPE_RLO:
 158         return RightToLeftOverride;
 159     case PANGO_BIDI_TYPE_PDF:
 160         return PopDirectionalFormat;
 161     case PANGO_BIDI_TYPE_EN:
 162         return EuropeanNumber;
 163     case PANGO_BIDI_TYPE_AN:
 164         return ArabicNumber;
 165     case PANGO_BIDI_TYPE_ES:
 166         return EuropeanNumberSeparator;
 167     case PANGO_BIDI_TYPE_ET:
 168         return EuropeanNumberTerminator;
 169     case PANGO_BIDI_TYPE_CS:
 170         return CommonNumberSeparator;
 171     case PANGO_BIDI_TYPE_NSM:
 172         return NonSpacingMark;
 173     case PANGO_BIDI_TYPE_BN:
 174         return BoundaryNeutral;
 175     case PANGO_BIDI_TYPE_B:
 176         return BlockSeparator;
 177     case PANGO_BIDI_TYPE_S:
 178         return SegmentSeparator;
 179     case PANGO_BIDI_TYPE_WS:
 180         return WhiteSpaceNeutral;
 181     default:
 182         return OtherNeutral;
 183     }
 184 }
 185
 186 int umemcasecmp(const UChar* a, const UChar* b, int len)
 187 {
 188     GOwnPtr<char> utf8a;
 189     GOwnPtr<char> utf8b;
 190
 191     utf8a.set(g_utf16_to_utf8(a, len, 0, 0, 0));
 192     utf8b.set(g_utf16_to_utf8(b, len, 0, 0, 0));
 193
 194     GOwnPtr<char> foldedA;
 195     GOwnPtr<char> foldedB;
 196
 197     foldedA.set(g_utf8_casefold(utf8a.get(), -1));
 198     foldedB.set(g_utf8_casefold(utf8b.get(), -1));
 199
 200     // FIXME: umemcasecmp needs to mimic u_memcasecmp of icu
 201     // from the ICU docs:
 202     // "Compare two strings case-insensitively using full case folding.
 203     // his is equivalent to u_strcmp(u_strFoldCase(s1, n, options), u_strFoldCase(s2, n, options))."
 204     //
 205     // So it looks like we don't need the full g_utf8_collate here,
 206     // but really a bitwise comparison of casefolded unicode chars (not utf-8 bytes).
 207     // As there is no direct equivalent to this icu function in GLib, for now
 208     // we'll use g_utf8_collate():
 209
 210     return g_utf8_collate(foldedA.get(), foldedB.get());
 211 }
 212
 213 }
 214 }