2 **********************************************************************
3 * Copyright (C) 2013, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
9 * created on: 2013 Jan 7
10 * created by: Andy Heninger
13 #include "unicode/utypes.h"
15 #include "unicode/uchar.h"
16 #include "unicode/unistr.h"
18 #include "scriptset.h"
23 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
25 //----------------------------------------------------------------------------
27 // ScriptSet implementation
29 //----------------------------------------------------------------------------
30 ScriptSet::ScriptSet() {
31 for (uint32_t i
=0; i
<LENGTHOF(bits
); i
++) {
36 ScriptSet::~ScriptSet() {
39 ScriptSet::ScriptSet(const ScriptSet
&other
) {
44 ScriptSet
& ScriptSet::operator =(const ScriptSet
&other
) {
45 for (uint32_t i
=0; i
<LENGTHOF(bits
); i
++) {
46 bits
[i
] = other
.bits
[i
];
52 UBool
ScriptSet::operator == (const ScriptSet
&other
) const {
53 for (uint32_t i
=0; i
<LENGTHOF(bits
); i
++) {
54 if (bits
[i
] != other
.bits
[i
]) {
61 UBool
ScriptSet::test(UScriptCode script
, UErrorCode
&status
) const {
62 if (U_FAILURE(status
)) {
65 if (script
< 0 || script
>= (int32_t)sizeof(bits
) * 8) {
66 status
= U_ILLEGAL_ARGUMENT_ERROR
;
69 uint32_t index
= script
/ 32;
70 uint32_t bit
= 1 << (script
& 31);
71 return ((bits
[index
] & bit
) != 0);
75 ScriptSet
&ScriptSet::set(UScriptCode script
, UErrorCode
&status
) {
76 if (U_FAILURE(status
)) {
79 if (script
< 0 || script
>= (int32_t)sizeof(bits
) * 8) {
80 status
= U_ILLEGAL_ARGUMENT_ERROR
;
83 uint32_t index
= script
/ 32;
84 uint32_t bit
= 1 << (script
& 31);
89 ScriptSet
&ScriptSet::reset(UScriptCode script
, UErrorCode
&status
) {
90 if (U_FAILURE(status
)) {
93 if (script
< 0 || script
>= (int32_t)sizeof(bits
) * 8) {
94 status
= U_ILLEGAL_ARGUMENT_ERROR
;
97 uint32_t index
= script
/ 32;
98 uint32_t bit
= 1 << (script
& 31);
105 ScriptSet
&ScriptSet::Union(const ScriptSet
&other
) {
106 for (uint32_t i
=0; i
<LENGTHOF(bits
); i
++) {
107 bits
[i
] |= other
.bits
[i
];
112 ScriptSet
&ScriptSet::intersect(const ScriptSet
&other
) {
113 for (uint32_t i
=0; i
<LENGTHOF(bits
); i
++) {
114 bits
[i
] &= other
.bits
[i
];
119 ScriptSet
&ScriptSet::intersect(UScriptCode script
, UErrorCode
&status
) {
121 t
.set(script
, status
);
122 if (U_SUCCESS(status
)) {
128 UBool
ScriptSet::intersects(const ScriptSet
&other
) const {
129 for (uint32_t i
=0; i
<LENGTHOF(bits
); i
++) {
130 if ((bits
[i
] & other
.bits
[i
]) != 0) {
137 UBool
ScriptSet::contains(const ScriptSet
&other
) const {
144 ScriptSet
&ScriptSet::setAll() {
145 for (uint32_t i
=0; i
<LENGTHOF(bits
); i
++) {
146 bits
[i
] = 0xffffffffu
;
152 ScriptSet
&ScriptSet::resetAll() {
153 for (uint32_t i
=0; i
<LENGTHOF(bits
); i
++) {
159 int32_t ScriptSet::countMembers() const {
160 // This bit counter is good for sparse numbers of '1's, which is
161 // very much the case that we will usually have.
163 for (uint32_t i
=0; i
<LENGTHOF(bits
); i
++) {
164 uint32_t x
= bits
[i
];
167 x
&= (x
- 1); // and off the least significant one bit.
173 int32_t ScriptSet::hashCode() const {
175 for (int32_t i
=0; i
<LENGTHOF(bits
); i
++) {
181 int32_t ScriptSet::nextSetBit(int32_t fromIndex
) const {
182 // TODO: Wants a better implementation.
186 UErrorCode status
= U_ZERO_ERROR
;
187 for (int32_t scriptIndex
= fromIndex
; scriptIndex
< (int32_t)sizeof(bits
)*8; scriptIndex
++) {
188 if (test((UScriptCode
)scriptIndex
, status
)) {
195 UnicodeString
&ScriptSet::displayScripts(UnicodeString
&dest
) const {
196 UBool firstTime
= TRUE
;
197 for (int32_t i
= nextSetBit(0); i
>= 0; i
= nextSetBit(i
+ 1)) {
202 const char *scriptName
= uscript_getShortName((UScriptCode(i
)));
203 dest
.append(UnicodeString(scriptName
, -1, US_INV
));
208 ScriptSet
&ScriptSet::parseScripts(const UnicodeString
&scriptString
, UErrorCode
&status
) {
210 if (U_FAILURE(status
)) {
213 UnicodeString oneScriptName
;
214 for (int32_t i
=0; i
<scriptString
.length();) {
215 UChar32 c
= scriptString
.char32At(i
);
216 i
= scriptString
.moveIndex32(i
, 1);
217 if (!u_isUWhiteSpace(c
)) {
218 oneScriptName
.append(c
);
219 if (i
< scriptString
.length()) {
223 if (oneScriptName
.length() > 0) {
225 oneScriptName
.extract(0, oneScriptName
.length(), buf
, sizeof(buf
)-1, US_INV
);
226 buf
[sizeof(buf
)-1] = 0;
227 int32_t sc
= u_getPropertyValueEnum(UCHAR_SCRIPT
, buf
);
228 if (sc
== UCHAR_INVALID_CODE
) {
229 status
= U_ILLEGAL_ARGUMENT_ERROR
;
231 this->set((UScriptCode
)sc
, status
);
233 if (U_FAILURE(status
)) {
236 oneScriptName
.remove();
244 U_CAPI UBool U_EXPORT2
245 uhash_equalsScriptSet(const UElement key1
, const UElement key2
) {
246 icu::ScriptSet
*s1
= static_cast<icu::ScriptSet
*>(key1
.pointer
);
247 icu::ScriptSet
*s2
= static_cast<icu::ScriptSet
*>(key2
.pointer
);
251 U_CAPI
int8_t U_EXPORT2
252 uhash_compareScriptSet(UElement key0
, UElement key1
) {
253 icu::ScriptSet
*s0
= static_cast<icu::ScriptSet
*>(key0
.pointer
);
254 icu::ScriptSet
*s1
= static_cast<icu::ScriptSet
*>(key1
.pointer
);
255 int32_t diff
= s0
->countMembers() - s1
->countMembers();
256 if (diff
!= 0) return diff
;
257 int32_t i0
= s0
->nextSetBit(0);
258 int32_t i1
= s1
->nextSetBit(0);
259 while ((diff
= i0
-i1
) == 0 && i0
> 0) {
260 i0
= s0
->nextSetBit(i0
+1);
261 i1
= s1
->nextSetBit(i1
+1);
266 U_CAPI
int32_t U_EXPORT2
267 uhash_hashScriptSet(const UElement key
) {
268 icu::ScriptSet
*s
= static_cast<icu::ScriptSet
*>(key
.pointer
);
269 return s
->hashCode();
272 U_CAPI
void U_EXPORT2
273 uhash_deleteScriptSet(void *obj
) {
274 icu::ScriptSet
*s
= static_cast<icu::ScriptSet
*>(obj
);