]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/scriptset.cpp
ICU-57131.0.1.tar.gz
[apple/icu.git] / icuSources / i18n / scriptset.cpp
CommitLineData
51004dcb
A
1/*
2**********************************************************************
b331163b 3* Copyright (C) 2014, International Business Machines
51004dcb
A
4* Corporation and others. All Rights Reserved.
5**********************************************************************
6*
7* scriptset.cpp
8*
9* created on: 2013 Jan 7
10* created by: Andy Heninger
11*/
12
13#include "unicode/utypes.h"
14
15#include "unicode/uchar.h"
16#include "unicode/unistr.h"
17
18#include "scriptset.h"
19#include "uassert.h"
b331163b 20#include "cmemory.h"
51004dcb
A
21
22U_NAMESPACE_BEGIN
23
51004dcb
A
24//----------------------------------------------------------------------------
25//
26// ScriptSet implementation
27//
28//----------------------------------------------------------------------------
29ScriptSet::ScriptSet() {
b331163b 30 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
51004dcb
A
31 bits[i] = 0;
32 }
33}
34
35ScriptSet::~ScriptSet() {
36}
37
38ScriptSet::ScriptSet(const ScriptSet &other) {
39 *this = other;
40}
41
42
43ScriptSet & ScriptSet::operator =(const ScriptSet &other) {
b331163b 44 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
51004dcb
A
45 bits[i] = other.bits[i];
46 }
47 return *this;
48}
49
50
51UBool ScriptSet::operator == (const ScriptSet &other) const {
b331163b 52 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
51004dcb
A
53 if (bits[i] != other.bits[i]) {
54 return FALSE;
55 }
56 }
57 return TRUE;
58}
59
60UBool ScriptSet::test(UScriptCode script, UErrorCode &status) const {
61 if (U_FAILURE(status)) {
62 return FALSE;
63 }
64 if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
65 status = U_ILLEGAL_ARGUMENT_ERROR;
66 return FALSE;
67 }
68 uint32_t index = script / 32;
69 uint32_t bit = 1 << (script & 31);
70 return ((bits[index] & bit) != 0);
71}
72
73
74ScriptSet &ScriptSet::set(UScriptCode script, UErrorCode &status) {
75 if (U_FAILURE(status)) {
76 return *this;
77 }
78 if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
79 status = U_ILLEGAL_ARGUMENT_ERROR;
80 return *this;
81 }
82 uint32_t index = script / 32;
83 uint32_t bit = 1 << (script & 31);
84 bits[index] |= bit;
85 return *this;
86}
87
88ScriptSet &ScriptSet::reset(UScriptCode script, UErrorCode &status) {
89 if (U_FAILURE(status)) {
90 return *this;
91 }
92 if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
93 status = U_ILLEGAL_ARGUMENT_ERROR;
94 return *this;
95 }
96 uint32_t index = script / 32;
97 uint32_t bit = 1 << (script & 31);
98 bits[index] &= ~bit;
99 return *this;
100}
101
102
103
104ScriptSet &ScriptSet::Union(const ScriptSet &other) {
b331163b 105 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
51004dcb
A
106 bits[i] |= other.bits[i];
107 }
108 return *this;
109}
110
111ScriptSet &ScriptSet::intersect(const ScriptSet &other) {
b331163b 112 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
51004dcb
A
113 bits[i] &= other.bits[i];
114 }
115 return *this;
116}
117
118ScriptSet &ScriptSet::intersect(UScriptCode script, UErrorCode &status) {
119 ScriptSet t;
120 t.set(script, status);
121 if (U_SUCCESS(status)) {
122 this->intersect(t);
123 }
124 return *this;
125}
126
127UBool ScriptSet::intersects(const ScriptSet &other) const {
b331163b 128 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
51004dcb
A
129 if ((bits[i] & other.bits[i]) != 0) {
130 return true;
131 }
132 }
133 return false;
134}
135
136UBool ScriptSet::contains(const ScriptSet &other) const {
137 ScriptSet t(*this);
138 t.intersect(other);
139 return (t == other);
140}
141
142
143ScriptSet &ScriptSet::setAll() {
b331163b 144 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
51004dcb
A
145 bits[i] = 0xffffffffu;
146 }
147 return *this;
148}
149
150
151ScriptSet &ScriptSet::resetAll() {
b331163b 152 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
51004dcb
A
153 bits[i] = 0;
154 }
155 return *this;
156}
157
158int32_t ScriptSet::countMembers() const {
159 // This bit counter is good for sparse numbers of '1's, which is
160 // very much the case that we will usually have.
161 int32_t count = 0;
b331163b 162 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
51004dcb
A
163 uint32_t x = bits[i];
164 while (x > 0) {
165 count++;
166 x &= (x - 1); // and off the least significant one bit.
167 }
168 }
169 return count;
170}
171
172int32_t ScriptSet::hashCode() const {
173 int32_t hash = 0;
b331163b 174 for (int32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
51004dcb
A
175 hash ^= bits[i];
176 }
177 return hash;
178}
179
180int32_t ScriptSet::nextSetBit(int32_t fromIndex) const {
181 // TODO: Wants a better implementation.
182 if (fromIndex < 0) {
183 return -1;
184 }
185 UErrorCode status = U_ZERO_ERROR;
186 for (int32_t scriptIndex = fromIndex; scriptIndex < (int32_t)sizeof(bits)*8; scriptIndex++) {
187 if (test((UScriptCode)scriptIndex, status)) {
188 return scriptIndex;
189 }
190 }
191 return -1;
192}
193
194UnicodeString &ScriptSet::displayScripts(UnicodeString &dest) const {
195 UBool firstTime = TRUE;
196 for (int32_t i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) {
197 if (!firstTime) {
57a6839d 198 dest.append((UChar)0x20);
51004dcb
A
199 }
200 firstTime = FALSE;
201 const char *scriptName = uscript_getShortName((UScriptCode(i)));
202 dest.append(UnicodeString(scriptName, -1, US_INV));
203 }
204 return dest;
205}
206
207ScriptSet &ScriptSet::parseScripts(const UnicodeString &scriptString, UErrorCode &status) {
208 resetAll();
209 if (U_FAILURE(status)) {
210 return *this;
211 }
212 UnicodeString oneScriptName;
213 for (int32_t i=0; i<scriptString.length();) {
214 UChar32 c = scriptString.char32At(i);
215 i = scriptString.moveIndex32(i, 1);
216 if (!u_isUWhiteSpace(c)) {
217 oneScriptName.append(c);
218 if (i < scriptString.length()) {
219 continue;
220 }
221 }
222 if (oneScriptName.length() > 0) {
223 char buf[40];
224 oneScriptName.extract(0, oneScriptName.length(), buf, sizeof(buf)-1, US_INV);
225 buf[sizeof(buf)-1] = 0;
226 int32_t sc = u_getPropertyValueEnum(UCHAR_SCRIPT, buf);
227 if (sc == UCHAR_INVALID_CODE) {
228 status = U_ILLEGAL_ARGUMENT_ERROR;
229 } else {
230 this->set((UScriptCode)sc, status);
231 }
232 if (U_FAILURE(status)) {
233 return *this;
234 }
235 oneScriptName.remove();
236 }
237 }
238 return *this;
239}
240
241U_NAMESPACE_END
242
243U_CAPI UBool U_EXPORT2
244uhash_equalsScriptSet(const UElement key1, const UElement key2) {
245 icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
246 icu::ScriptSet *s2 = static_cast<icu::ScriptSet *>(key2.pointer);
247 return (*s1 == *s2);
248}
249
250U_CAPI int8_t U_EXPORT2
251uhash_compareScriptSet(UElement key0, UElement key1) {
252 icu::ScriptSet *s0 = static_cast<icu::ScriptSet *>(key0.pointer);
253 icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
254 int32_t diff = s0->countMembers() - s1->countMembers();
255 if (diff != 0) return diff;
256 int32_t i0 = s0->nextSetBit(0);
257 int32_t i1 = s1->nextSetBit(0);
258 while ((diff = i0-i1) == 0 && i0 > 0) {
259 i0 = s0->nextSetBit(i0+1);
260 i1 = s1->nextSetBit(i1+1);
261 }
262 return (int8_t)diff;
263}
264
265U_CAPI int32_t U_EXPORT2
266uhash_hashScriptSet(const UElement key) {
267 icu::ScriptSet *s = static_cast<icu::ScriptSet *>(key.pointer);
268 return s->hashCode();
269}
270
271U_CAPI void U_EXPORT2
272uhash_deleteScriptSet(void *obj) {
273 icu::ScriptSet *s = static_cast<icu::ScriptSet *>(obj);
274 delete s;
275}