1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 **********************************************************************
5 * Copyright (C) 2014, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
11 * created on: 2013 Jan 7
12 * created by: Andy Heninger
15 #include "unicode/utypes.h"
17 #include "unicode/uchar.h"
18 #include "unicode/unistr.h"
20 #include "scriptset.h"
26 //----------------------------------------------------------------------------
28 // ScriptSet implementation
30 //----------------------------------------------------------------------------
31 ScriptSet::ScriptSet() {
32 for (uint32_t i
=0; i
<UPRV_LENGTHOF(bits
); i
++) {
37 ScriptSet::~ScriptSet() {
40 ScriptSet::ScriptSet(const ScriptSet
&other
) {
45 ScriptSet
& ScriptSet::operator =(const ScriptSet
&other
) {
46 for (uint32_t i
=0; i
<UPRV_LENGTHOF(bits
); i
++) {
47 bits
[i
] = other
.bits
[i
];
53 UBool
ScriptSet::operator == (const ScriptSet
&other
) const {
54 for (uint32_t i
=0; i
<UPRV_LENGTHOF(bits
); i
++) {
55 if (bits
[i
] != other
.bits
[i
]) {
62 UBool
ScriptSet::test(UScriptCode script
, UErrorCode
&status
) const {
63 if (U_FAILURE(status
)) {
66 if (script
< 0 || script
>= (int32_t)sizeof(bits
) * 8) {
67 status
= U_ILLEGAL_ARGUMENT_ERROR
;
70 uint32_t index
= script
/ 32;
71 uint32_t bit
= 1 << (script
& 31);
72 return ((bits
[index
] & bit
) != 0);
76 ScriptSet
&ScriptSet::set(UScriptCode script
, UErrorCode
&status
) {
77 if (U_FAILURE(status
)) {
80 if (script
< 0 || script
>= (int32_t)sizeof(bits
) * 8) {
81 status
= U_ILLEGAL_ARGUMENT_ERROR
;
84 uint32_t index
= script
/ 32;
85 uint32_t bit
= 1 << (script
& 31);
90 ScriptSet
&ScriptSet::reset(UScriptCode script
, UErrorCode
&status
) {
91 if (U_FAILURE(status
)) {
94 if (script
< 0 || script
>= (int32_t)sizeof(bits
) * 8) {
95 status
= U_ILLEGAL_ARGUMENT_ERROR
;
98 uint32_t index
= script
/ 32;
99 uint32_t bit
= 1 << (script
& 31);
106 ScriptSet
&ScriptSet::Union(const ScriptSet
&other
) {
107 for (uint32_t i
=0; i
<UPRV_LENGTHOF(bits
); i
++) {
108 bits
[i
] |= other
.bits
[i
];
113 ScriptSet
&ScriptSet::intersect(const ScriptSet
&other
) {
114 for (uint32_t i
=0; i
<UPRV_LENGTHOF(bits
); i
++) {
115 bits
[i
] &= other
.bits
[i
];
120 ScriptSet
&ScriptSet::intersect(UScriptCode script
, UErrorCode
&status
) {
122 t
.set(script
, status
);
123 if (U_SUCCESS(status
)) {
129 UBool
ScriptSet::intersects(const ScriptSet
&other
) const {
130 for (uint32_t i
=0; i
<UPRV_LENGTHOF(bits
); i
++) {
131 if ((bits
[i
] & other
.bits
[i
]) != 0) {
138 UBool
ScriptSet::contains(const ScriptSet
&other
) const {
145 ScriptSet
&ScriptSet::setAll() {
146 for (uint32_t i
=0; i
<UPRV_LENGTHOF(bits
); i
++) {
147 bits
[i
] = 0xffffffffu
;
153 ScriptSet
&ScriptSet::resetAll() {
154 for (uint32_t i
=0; i
<UPRV_LENGTHOF(bits
); i
++) {
160 int32_t ScriptSet::countMembers() const {
161 // This bit counter is good for sparse numbers of '1's, which is
162 // very much the case that we will usually have.
164 for (uint32_t i
=0; i
<UPRV_LENGTHOF(bits
); i
++) {
165 uint32_t x
= bits
[i
];
168 x
&= (x
- 1); // and off the least significant one bit.
174 int32_t ScriptSet::hashCode() const {
176 for (int32_t i
=0; i
<UPRV_LENGTHOF(bits
); i
++) {
182 int32_t ScriptSet::nextSetBit(int32_t fromIndex
) const {
183 // TODO: Wants a better implementation.
187 UErrorCode status
= U_ZERO_ERROR
;
188 for (int32_t scriptIndex
= fromIndex
; scriptIndex
< (int32_t)sizeof(bits
)*8; scriptIndex
++) {
189 if (test((UScriptCode
)scriptIndex
, status
)) {
196 UBool
ScriptSet::isEmpty() const {
197 for (uint32_t i
=0; i
<UPRV_LENGTHOF(bits
); i
++) {
205 UnicodeString
&ScriptSet::displayScripts(UnicodeString
&dest
) const {
206 UBool firstTime
= TRUE
;
207 for (int32_t i
= nextSetBit(0); i
>= 0; i
= nextSetBit(i
+ 1)) {
209 dest
.append((UChar
)0x20);
212 const char *scriptName
= uscript_getShortName((UScriptCode(i
)));
213 dest
.append(UnicodeString(scriptName
, -1, US_INV
));
218 ScriptSet
&ScriptSet::parseScripts(const UnicodeString
&scriptString
, UErrorCode
&status
) {
220 if (U_FAILURE(status
)) {
223 UnicodeString oneScriptName
;
224 for (int32_t i
=0; i
<scriptString
.length();) {
225 UChar32 c
= scriptString
.char32At(i
);
226 i
= scriptString
.moveIndex32(i
, 1);
227 if (!u_isUWhiteSpace(c
)) {
228 oneScriptName
.append(c
);
229 if (i
< scriptString
.length()) {
233 if (oneScriptName
.length() > 0) {
235 oneScriptName
.extract(0, oneScriptName
.length(), buf
, sizeof(buf
)-1, US_INV
);
236 buf
[sizeof(buf
)-1] = 0;
237 int32_t sc
= u_getPropertyValueEnum(UCHAR_SCRIPT
, buf
);
238 if (sc
== UCHAR_INVALID_CODE
) {
239 status
= U_ILLEGAL_ARGUMENT_ERROR
;
241 this->set((UScriptCode
)sc
, status
);
243 if (U_FAILURE(status
)) {
246 oneScriptName
.remove();
252 void ScriptSet::setScriptExtensions(UChar32 codePoint
, UErrorCode
& status
) {
253 if (U_FAILURE(status
)) { return; }
254 static const int32_t FIRST_GUESS_SCRIPT_CAPACITY
= 5;
255 MaybeStackArray
<UScriptCode
,FIRST_GUESS_SCRIPT_CAPACITY
> scripts
;
256 UErrorCode internalStatus
= U_ZERO_ERROR
;
257 int32_t script_count
= -1;
260 script_count
= uscript_getScriptExtensions(
261 codePoint
, scripts
.getAlias(), scripts
.getCapacity(), &internalStatus
);
262 if (internalStatus
== U_BUFFER_OVERFLOW_ERROR
) {
263 // Need to allocate more space
264 if (scripts
.resize(script_count
) == NULL
) {
265 status
= U_MEMORY_ALLOCATION_ERROR
;
268 internalStatus
= U_ZERO_ERROR
;
274 // Check if we failed for some reason other than buffer overflow
275 if (U_FAILURE(internalStatus
)) {
276 status
= internalStatus
;
280 // Load the scripts into the ScriptSet and return
281 for (int32_t i
= 0; i
< script_count
; i
++) {
282 this->set(scripts
[i
], status
);
283 if (U_FAILURE(status
)) { return; }
289 U_CAPI UBool U_EXPORT2
290 uhash_equalsScriptSet(const UElement key1
, const UElement key2
) {
291 icu::ScriptSet
*s1
= static_cast<icu::ScriptSet
*>(key1
.pointer
);
292 icu::ScriptSet
*s2
= static_cast<icu::ScriptSet
*>(key2
.pointer
);
296 U_CAPI
int8_t U_EXPORT2
297 uhash_compareScriptSet(UElement key0
, UElement key1
) {
298 icu::ScriptSet
*s0
= static_cast<icu::ScriptSet
*>(key0
.pointer
);
299 icu::ScriptSet
*s1
= static_cast<icu::ScriptSet
*>(key1
.pointer
);
300 int32_t diff
= s0
->countMembers() - s1
->countMembers();
301 if (diff
!= 0) return diff
;
302 int32_t i0
= s0
->nextSetBit(0);
303 int32_t i1
= s1
->nextSetBit(0);
304 while ((diff
= i0
-i1
) == 0 && i0
> 0) {
305 i0
= s0
->nextSetBit(i0
+1);
306 i1
= s1
->nextSetBit(i1
+1);
311 U_CAPI
int32_t U_EXPORT2
312 uhash_hashScriptSet(const UElement key
) {
313 icu::ScriptSet
*s
= static_cast<icu::ScriptSet
*>(key
.pointer
);
314 return s
->hashCode();
317 U_CAPI
void U_EXPORT2
318 uhash_deleteScriptSet(void *obj
) {
319 icu::ScriptSet
*s
= static_cast<icu::ScriptSet
*>(obj
);