2 * Copyright (C) 2006 George Staikos <staikos@kde.org>
3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
4 * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
23 #ifndef WTF_UNICODE_ICU_H
24 #define WTF_UNICODE_ICU_H
27 #include <unicode/uchar.h>
28 #include <unicode/ustring.h>
29 #include <unicode/utf16.h>
35 LeftToRight
= U_LEFT_TO_RIGHT
,
36 RightToLeft
= U_RIGHT_TO_LEFT
,
37 EuropeanNumber
= U_EUROPEAN_NUMBER
,
38 EuropeanNumberSeparator
= U_EUROPEAN_NUMBER_SEPARATOR
,
39 EuropeanNumberTerminator
= U_EUROPEAN_NUMBER_TERMINATOR
,
40 ArabicNumber
= U_ARABIC_NUMBER
,
41 CommonNumberSeparator
= U_COMMON_NUMBER_SEPARATOR
,
42 BlockSeparator
= U_BLOCK_SEPARATOR
,
43 SegmentSeparator
= U_SEGMENT_SEPARATOR
,
44 WhiteSpaceNeutral
= U_WHITE_SPACE_NEUTRAL
,
45 OtherNeutral
= U_OTHER_NEUTRAL
,
46 LeftToRightEmbedding
= U_LEFT_TO_RIGHT_EMBEDDING
,
47 LeftToRightOverride
= U_LEFT_TO_RIGHT_OVERRIDE
,
48 RightToLeftArabic
= U_RIGHT_TO_LEFT_ARABIC
,
49 RightToLeftEmbedding
= U_RIGHT_TO_LEFT_EMBEDDING
,
50 RightToLeftOverride
= U_RIGHT_TO_LEFT_OVERRIDE
,
51 PopDirectionalFormat
= U_POP_DIRECTIONAL_FORMAT
,
52 NonSpacingMark
= U_DIR_NON_SPACING_MARK
,
53 BoundaryNeutral
= U_BOUNDARY_NEUTRAL
56 enum DecompositionType
{
57 DecompositionNone
= U_DT_NONE
,
58 DecompositionCanonical
= U_DT_CANONICAL
,
59 DecompositionCompat
= U_DT_COMPAT
,
60 DecompositionCircle
= U_DT_CIRCLE
,
61 DecompositionFinal
= U_DT_FINAL
,
62 DecompositionFont
= U_DT_FONT
,
63 DecompositionFraction
= U_DT_FRACTION
,
64 DecompositionInitial
= U_DT_INITIAL
,
65 DecompositionIsolated
= U_DT_ISOLATED
,
66 DecompositionMedial
= U_DT_MEDIAL
,
67 DecompositionNarrow
= U_DT_NARROW
,
68 DecompositionNoBreak
= U_DT_NOBREAK
,
69 DecompositionSmall
= U_DT_SMALL
,
70 DecompositionSquare
= U_DT_SQUARE
,
71 DecompositionSub
= U_DT_SUB
,
72 DecompositionSuper
= U_DT_SUPER
,
73 DecompositionVertical
= U_DT_VERTICAL
,
74 DecompositionWide
= U_DT_WIDE
,
79 Other_NotAssigned
= U_MASK(U_GENERAL_OTHER_TYPES
),
80 Letter_Uppercase
= U_MASK(U_UPPERCASE_LETTER
),
81 Letter_Lowercase
= U_MASK(U_LOWERCASE_LETTER
),
82 Letter_Titlecase
= U_MASK(U_TITLECASE_LETTER
),
83 Letter_Modifier
= U_MASK(U_MODIFIER_LETTER
),
84 Letter_Other
= U_MASK(U_OTHER_LETTER
),
86 Mark_NonSpacing
= U_MASK(U_NON_SPACING_MARK
),
87 Mark_Enclosing
= U_MASK(U_ENCLOSING_MARK
),
88 Mark_SpacingCombining
= U_MASK(U_COMBINING_SPACING_MARK
),
90 Number_DecimalDigit
= U_MASK(U_DECIMAL_DIGIT_NUMBER
),
91 Number_Letter
= U_MASK(U_LETTER_NUMBER
),
92 Number_Other
= U_MASK(U_OTHER_NUMBER
),
94 Separator_Space
= U_MASK(U_SPACE_SEPARATOR
),
95 Separator_Line
= U_MASK(U_LINE_SEPARATOR
),
96 Separator_Paragraph
= U_MASK(U_PARAGRAPH_SEPARATOR
),
98 Other_Control
= U_MASK(U_CONTROL_CHAR
),
99 Other_Format
= U_MASK(U_FORMAT_CHAR
),
100 Other_PrivateUse
= U_MASK(U_PRIVATE_USE_CHAR
),
101 Other_Surrogate
= U_MASK(U_SURROGATE
),
103 Punctuation_Dash
= U_MASK(U_DASH_PUNCTUATION
),
104 Punctuation_Open
= U_MASK(U_START_PUNCTUATION
),
105 Punctuation_Close
= U_MASK(U_END_PUNCTUATION
),
106 Punctuation_Connector
= U_MASK(U_CONNECTOR_PUNCTUATION
),
107 Punctuation_Other
= U_MASK(U_OTHER_PUNCTUATION
),
109 Symbol_Math
= U_MASK(U_MATH_SYMBOL
),
110 Symbol_Currency
= U_MASK(U_CURRENCY_SYMBOL
),
111 Symbol_Modifier
= U_MASK(U_MODIFIER_SYMBOL
),
112 Symbol_Other
= U_MASK(U_OTHER_SYMBOL
),
114 Punctuation_InitialQuote
= U_MASK(U_INITIAL_PUNCTUATION
),
115 Punctuation_FinalQuote
= U_MASK(U_FINAL_PUNCTUATION
)
118 inline UChar32
foldCase(UChar32 c
)
120 return u_foldCase(c
, U_FOLD_CASE_DEFAULT
);
123 inline int foldCase(UChar
* result
, int resultLength
, const UChar
* src
, int srcLength
, bool* error
)
125 UErrorCode status
= U_ZERO_ERROR
;
126 int realLength
= u_strFoldCase(result
, resultLength
, src
, srcLength
, U_FOLD_CASE_DEFAULT
, &status
);
127 *error
= !U_SUCCESS(status
);
131 inline int toLower(UChar
* result
, int resultLength
, const UChar
* src
, int srcLength
, bool* error
)
133 UErrorCode status
= U_ZERO_ERROR
;
134 int realLength
= u_strToLower(result
, resultLength
, src
, srcLength
, "", &status
);
135 *error
= !!U_FAILURE(status
);
139 inline UChar32
toLower(UChar32 c
)
144 inline UChar32
toUpper(UChar32 c
)
149 inline int toUpper(UChar
* result
, int resultLength
, const UChar
* src
, int srcLength
, bool* error
)
151 UErrorCode status
= U_ZERO_ERROR
;
152 int realLength
= u_strToUpper(result
, resultLength
, src
, srcLength
, "", &status
);
153 *error
= !!U_FAILURE(status
);
157 inline UChar32
toTitleCase(UChar32 c
)
162 inline bool isArabicChar(UChar32 c
)
164 return ublock_getCode(c
) == UBLOCK_ARABIC
;
167 inline bool isSeparatorSpace(UChar32 c
)
169 return u_charType(c
) == U_SPACE_SEPARATOR
;
172 inline bool isPrintableChar(UChar32 c
)
174 return !!u_isprint(c
);
177 inline bool isPunct(UChar32 c
)
179 return !!u_ispunct(c
);
182 inline bool hasLineBreakingPropertyComplexContext(UChar32 c
)
184 return u_getIntPropertyValue(c
, UCHAR_LINE_BREAK
) == U_LB_COMPLEX_CONTEXT
;
187 inline bool hasLineBreakingPropertyComplexContextOrIdeographic(UChar32 c
)
189 int32_t prop
= u_getIntPropertyValue(c
, UCHAR_LINE_BREAK
);
190 return prop
== U_LB_COMPLEX_CONTEXT
|| prop
== U_LB_IDEOGRAPHIC
;
193 inline UChar32
mirroredChar(UChar32 c
)
195 return u_charMirror(c
);
198 inline CharCategory
category(UChar32 c
)
200 return static_cast<CharCategory
>(U_GET_GC_MASK(c
));
203 inline Direction
direction(UChar32 c
)
205 return static_cast<Direction
>(u_charDirection(c
));
208 inline bool isLower(UChar32 c
)
210 return !!u_islower(c
);
213 inline uint8_t combiningClass(UChar32 c
)
215 return u_getCombiningClass(c
);
218 inline DecompositionType
decompositionType(UChar32 c
)
220 return static_cast<DecompositionType
>(u_getIntPropertyValue(c
, UCHAR_DECOMPOSITION_TYPE
));
223 inline int umemcasecmp(const UChar
* a
, const UChar
* b
, int len
)
225 return u_memcasecmp(a
, b
, len
, U_FOLD_CASE_DEFAULT
);
230 #endif // WTF_UNICODE_ICU_H