]> git.saurik.com Git - apple/cf.git/blob - StringEncodings.subproj/CFUniChar.c
CF-368.26.tar.gz
[apple/cf.git] / StringEncodings.subproj / CFUniChar.c
1 /*
2 * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23 /* CFUniChar.c
24 Copyright 2001-2002, Apple, Inc. All rights reserved.
25 Responsibility: Aki Inoue
26 */
27
28 #include <CoreFoundation/CFByteOrder.h>
29 #include "CFInternal.h"
30 #include "CFUniChar.h"
31 #include "CFStringEncodingConverterExt.h"
32 #include "CFUnicodeDecomposition.h"
33 #include "CFUniCharPriv.h"
34 #if defined(__MACOS8__)
35 #include <stdio.h>
36 #elif defined(__WIN32__)
37 #include <windows.h>
38 #include <sys/stat.h>
39 #include <fcntl.h>
40 #include <io.h>
41 #elif defined(__MACH__) || defined(__LINUX__) || defined(__FREEBSD__)
42 #if defined(__MACH__)
43 #include <mach/mach.h>
44 #endif
45 #include <fcntl.h>
46 #include <sys/types.h>
47 #include <sys/stat.h>
48 #include <sys/param.h>
49 #include <sys/mman.h>
50 #include <unistd.h>
51 #include <stdlib.h>
52 #endif
53
54 #if defined(__MACOS8__)
55 #define MAXPATHLEN FILENAME_MAX
56 #elif defined WIN32
57 #define MAXPATHLEN MAX_PATH
58 #endif
59
60 // Memory map the file
61 #if !defined(__MACOS8__)
62
63 CF_INLINE void __CFUniCharCharacterSetPath(char *cpath) {
64 #if defined(__MACH__)
65 strlcpy(cpath, __kCFCharacterSetDir, MAXPATHLEN);
66 #elif defined(__WIN32__)
67 strlcpy(cpath, _CFDLLPath(), MAXPATHLEN);
68 #else
69 strlcpy(cpath, __kCFCharacterSetDir, MAXPATHLEN);
70 #endif
71
72 #if defined(__WIN32__)
73 strlcat(cpath, "\\CharacterSets\\", MAXPATHLEN);
74 #else
75 strlcat(cpath, "/CharacterSets/", MAXPATHLEN);
76 #endif
77 }
78
79 static bool __CFUniCharLoadBytesFromFile(const char *fileName, const void **bytes) {
80 #if defined(__WIN32__)
81 HANDLE bitmapFileHandle;
82 HANDLE mappingHandle;
83
84 if ((bitmapFileHandle = CreateFile(fileName, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL)) == INVALID_HANDLE_VALUE) return false;
85 mappingHandle = CreateFileMapping(bitmapFileHandle, NULL, PAGE_READONLY, 0, 0, NULL);
86 CloseHandle(bitmapFileHandle);
87 if (!mappingHandle) return false;
88
89 *bytes = MapViewOfFileEx(mappingHandle, FILE_MAP_READ, 0, 0, 0, NULL);
90 CloseHandle(mappingHandle);
91
92 return (*bytes ? true : false);
93 #else
94 struct stat statBuf;
95 int fd = -1;
96
97 if ((fd = open(fileName, O_RDONLY, 0)) < 0) return false;
98
99 if (fstat(fd, &statBuf) < 0 || (*bytes = mmap(0, statBuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0)) == (void *)-1) {
100 close(fd);
101 return false;
102 }
103 close(fd);
104
105 return true;
106 #endif
107 }
108
109 static bool __CFUniCharLoadFile(const char *bitmapName, const void **bytes) {
110 char cpath[MAXPATHLEN];
111
112 __CFUniCharCharacterSetPath(cpath);
113 strlcat(cpath, bitmapName, MAXPATHLEN);
114
115 return __CFUniCharLoadBytesFromFile(cpath, bytes);
116 }
117 #endif !defined(__MACOS8__)
118
119 // Bitmap functions
120 CF_INLINE bool isControl(UTF32Char theChar, uint16_t charset, const void *data) { // ISO Control
121 if ((theChar <= 0x001F) || (theChar >= 0x007F && theChar <= 0x009F)) return true;
122 return false;
123 }
124
125 CF_INLINE bool isWhitespace(UTF32Char theChar, uint16_t charset, const void *data) { // Space
126 if ((theChar == 0x0020) || (theChar == 0x0009) || (theChar == 0x00A0) || (theChar == 0x1680) || (theChar >= 0x2000 && theChar <= 0x200B) || (theChar == 0x202F) || (theChar == 0x205F) || (theChar == 0x3000)) return true;
127 return false;
128 }
129
130 CF_INLINE bool isWhitespaceAndNewLine(UTF32Char theChar, uint16_t charset, const void *data) { // White space
131 if (isWhitespace(theChar, charset, data) || (theChar >= 0x000A && theChar <= 0x000D) || (theChar == 0x0085) || (theChar == 0x2028) || (theChar == 0x2029)) return true;
132 return false;
133 }
134
135 #if defined(__MACOS8__)
136 /* This structure MUST match the sets in NSRulebook.h The "__CFCSetIsMemberSet()" function is a modified version of the one in Text shlib.
137 */
138 typedef struct _CFCharSetPrivateStruct {
139 int issorted; /* 1=sorted or 0=unsorted ; 2=is_property_table */
140 int bitrange[4]; /* bitmap (each bit is a 1k range in space of 2^17) */
141 int nsingles; /* number of single elements */
142 int nranges; /* number of ranges */
143 int singmin; /* minimum single element */
144 int singmax; /* maximum single element */
145 int array[1]; /* actually bunch of singles followed by ranges */
146 } CFCharSetPrivateStruct;
147
148 /* Membership function for complex sets
149 */
150 CF_INLINE bool __CFCSetIsMemberSet(const CFCharSetPrivateStruct *set, UTF16Char theChar) {
151 int *tmp, *tmp2;
152 int i, nel;
153 int *p, *q, *wari;
154
155 if (set->issorted != 1) {
156 return false;
157 }
158 theChar &= 0x0001FFFF; /* range 1-131k */
159 if (__CFCSetBitsInRange(theChar, set->bitrange)) {
160 if (theChar >= set->singmin && theChar <= set->singmax) {
161 tmp = (int *) &(set->array[0]);
162 if ((nel = set->nsingles) < __kCFSetBreakeven) {
163 for (i = 0; i < nel; i++) {
164 if (*tmp == theChar) return true;
165 ++tmp;
166 }
167 }
168 else { // this does a binary search
169 p = tmp; q = tmp + (nel-1);
170 while (p <= q) {
171 wari = (p + ((q-p)>>1));
172 if (theChar < *wari) q = wari - 1;
173 else if (theChar > *wari) p = wari + 1;
174 else return true;
175 }
176 }
177 }
178 tmp = (int *) &(set->array[0]) + set->nsingles;
179 if ((nel = set->nranges) < __kCFSetBreakeven) {
180 i = nel;
181 tmp2 = tmp+1;
182 while (i) {
183 if (theChar <= *tmp2) {
184 if (theChar >= *tmp) return true;
185 }
186 tmp += 2;
187 tmp2 = tmp+1;
188 --i;
189 }
190 } else { /* binary search the ranges */
191 p = tmp; q = tmp + (2*nel-2);
192 while (p <= q) {
193 i = (q - p) >> 1; /* >>1 means divide by 2 */
194 wari = p + (i & 0xFFFFFFFE); /* &fffffffe make it an even num */
195 if (theChar < *wari) q = wari - 2;
196 else if (theChar > *(wari + 1)) p = wari + 2;
197 else return true;
198 }
199 }
200 return false;
201 /* fall through & return zero */
202 }
203 return false; /* not a member */
204 }
205
206 /* Take a private "set" structure and make a bitmap from it. Return the bitmap. THE CALLER MUST RELEASE THE RETURNED MEMORY as necessary.
207 */
208
209 CF_INLINE void __CFCSetBitmapProcessManyCharacters(unsigned char *map, unsigned n, unsigned m) {
210 unsigned tmp;
211 for (tmp = n; tmp <= m; tmp++) CFUniCharAddCharacterToBitmap(tmp, map);
212 }
213
214 CF_INLINE void __CFCSetMakeSetBitmapFromSet(const CFCharSetPrivateStruct *theSet, uint8_t *map)
215 {
216 int *ip;
217 UTF16Char ctmp;
218 int cnt;
219
220 for (cnt = 0; cnt < theSet->nsingles; cnt++) {
221 ctmp = theSet->array[cnt];
222 CFUniCharAddCharacterToBitmap(tmp, map);
223 }
224 ip = (int *) (&(theSet->array[0]) + theSet->nsingles);
225 cnt = theSet->nranges;
226 while (cnt) {
227 /* This could be more efficient: turn on whole bytes at a time
228 when there are such cases as 8 characters in a row... */
229 __CFCSetBitmapProcessManyCharacters((unsigned char *)map, *ip, *(ip+1));
230 ip += 2;
231 --cnt;
232 }
233 }
234
235 extern const CFCharSetPrivateStruct *_CFdecimalDigitCharacterSetData;
236 extern const CFCharSetPrivateStruct *_CFletterCharacterSetData;
237 extern const CFCharSetPrivateStruct *_CFlowercaseLetterCharacterSetData;
238 extern const CFCharSetPrivateStruct *_CFuppercaseLetterCharacterSetData;
239 extern const CFCharSetPrivateStruct *_CFnonBaseCharacterSetData;
240 extern const CFCharSetPrivateStruct *_CFdecomposableCharacterSetData;
241 extern const CFCharSetPrivateStruct *_CFpunctuationCharacterSetData;
242 extern const CFCharSetPrivateStruct *_CFalphanumericCharacterSetData;
243 extern const CFCharSetPrivateStruct *_CFillegalCharacterSetData;
244 extern const CFCharSetPrivateStruct *_CFhasNonSelfLowercaseMappingData;
245 extern const CFCharSetPrivateStruct *_CFhasNonSelfUppercaseMappingData;
246 extern const CFCharSetPrivateStruct *_CFhasNonSelfTitlecaseMappingData;
247
248 #else __MACOS8__
249 typedef struct {
250 uint32_t _numPlanes;
251 const uint8_t **_planes;
252 } __CFUniCharBitmapData;
253
254 static char __CFUniCharUnicodeVersionString[8] = {0, 0, 0, 0, 0, 0, 0, 0};
255
256 static uint32_t __CFUniCharNumberOfBitmaps = 0;
257 static __CFUniCharBitmapData *__CFUniCharBitmapDataArray = NULL;
258
259 static CFSpinLock_t __CFUniCharBitmapLock = 0;
260
261 #ifndef CF_UNICHAR_BITMAP_FILE
262 #define CF_UNICHAR_BITMAP_FILE "CFCharacterSetBitmaps.bitmap"
263 #endif CF_UNICHAR_BITMAP_FILE
264
265 static bool __CFUniCharLoadBitmapData(void) {
266 uint32_t headerSize;
267 uint32_t bitmapSize;
268 int numPlanes;
269 uint8_t currentPlane;
270 const void *bytes;
271 const void *bitmapBase;
272 const void *bitmap;
273 int idx, bitmapIndex;
274
275 __CFSpinLock(&__CFUniCharBitmapLock);
276
277 if (__CFUniCharBitmapDataArray || !__CFUniCharLoadFile(CF_UNICHAR_BITMAP_FILE, &bytes)) {
278 __CFSpinUnlock(&__CFUniCharBitmapLock);
279 return false;
280 }
281
282 for (idx = 0;idx < 4 && ((const uint8_t *)bytes)[idx];idx++) {
283 __CFUniCharUnicodeVersionString[idx * 2] = ((const uint8_t *)bytes)[idx];
284 __CFUniCharUnicodeVersionString[idx * 2 + 1] = '.';
285 }
286 __CFUniCharUnicodeVersionString[(idx < 4 ? idx * 2 - 1 : 7)] = '\0';
287
288 headerSize = CFSwapInt32BigToHost(*((uint32_t *)((char *)bytes + 4)));
289
290 bitmapBase = (char *)bytes + headerSize;
291 (char *)bytes += (sizeof(uint32_t) * 2);
292 headerSize -= (sizeof(uint32_t) * 2);
293
294 __CFUniCharNumberOfBitmaps = headerSize / (sizeof(uint32_t) * 2);
295
296 __CFUniCharBitmapDataArray = (__CFUniCharBitmapData *)CFAllocatorAllocate(NULL, sizeof(__CFUniCharBitmapData) * __CFUniCharNumberOfBitmaps, 0);
297
298 for (idx = 0;idx < (int)__CFUniCharNumberOfBitmaps;idx++) {
299 bitmap = (char *)bitmapBase + CFSwapInt32BigToHost(*(((uint32_t *)bytes)++));
300 bitmapSize = CFSwapInt32BigToHost(*(((uint32_t *)bytes)++));
301
302 numPlanes = bitmapSize / (8 * 1024);
303 numPlanes = *(const uint8_t *)((char *)bitmap + (((numPlanes - 1) * ((8 * 1024) + 1)) - 1)) + 1;
304 __CFUniCharBitmapDataArray[idx]._planes = (const uint8_t **)CFAllocatorAllocate(NULL, sizeof(const void *) * numPlanes, 0);
305 __CFUniCharBitmapDataArray[idx]._numPlanes = numPlanes;
306
307 currentPlane = 0;
308 for (bitmapIndex = 0;bitmapIndex < numPlanes;bitmapIndex++) {
309 if (bitmapIndex == currentPlane) {
310 __CFUniCharBitmapDataArray[idx]._planes[bitmapIndex] = bitmap;
311 (char *)bitmap += (8 * 1024);
312 currentPlane = *(((const uint8_t *)bitmap)++);
313 } else {
314 __CFUniCharBitmapDataArray[idx]._planes[bitmapIndex] = NULL;
315 }
316 }
317 }
318
319 __CFSpinUnlock(&__CFUniCharBitmapLock);
320
321 return true;
322 }
323
324 __private_extern__ const char *__CFUniCharGetUnicodeVersionString(void) {
325 if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
326 return __CFUniCharUnicodeVersionString;
327 }
328
329 #endif __MACOS8__
330
331 #define CONTROLSET_HAS_FORMATTER 1
332
333 bool CFUniCharIsMemberOf(UTF32Char theChar, uint32_t charset) {
334 #if CONTROLSET_HAS_FORMATTER
335 if (charset == kCFUniCharControlCharacterSet) charset = kCFUniCharControlAndFormatterCharacterSet;
336 #endif CONTROLSET_HAS_FORMATTER
337
338 switch (charset) {
339 case kCFUniCharControlCharacterSet:
340 return isControl(theChar, charset, NULL);
341
342 case kCFUniCharWhitespaceCharacterSet:
343 return isWhitespace(theChar, charset, NULL);
344
345 case kCFUniCharWhitespaceAndNewlineCharacterSet:
346 return isWhitespaceAndNewLine(theChar, charset, NULL);
347
348 #if defined(__MACOS8__)
349 case kCFUniCharDecimalDigitCharacterSet:
350 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct *)&_CFdecimalDigitCharacterSetData, theChar);
351 case kCFUniCharLetterCharacterSet:
352 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct *)&_CFletterCharacterSetData, theChar);
353 case kCFUniCharLowercaseLetterCharacterSet:
354 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct *)&_CFlowercaseLetterCharacterSetData, theChar);
355 case kCFUniCharUppercaseLetterCharacterSet:
356 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct *)&_CFuppercaseLetterCharacterSetData, theChar);
357 case kCFUniCharNonBaseCharacterSet:
358 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct *)&_CFnonBaseCharacterSetData, theChar);
359 case kCFUniCharAlphaNumericCharacterSet:
360 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct *)&_CFalphanumericCharacterSetData, theChar);
361 case kCFUniCharDecomposableCharacterSet:
362 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct *)&_CFdecomposableCharacterSetData, theChar);
363 case kCFUniCharPunctuationCharacterSet:
364 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct *)&_CFpunctuationCharacterSetData, theChar);
365 case kCFUniCharIllegalCharacterSet:
366 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct *)&_CFillegalCharacterSetData, theChar);
367 case kCFUniCharHasNonSelfLowercaseMapping:
368 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct *)&_CFhasNonSelfLowercaseMappingData, theChar);
369 case kCFUniCharHasNonSelfUppercaseMapping:
370 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct *)&_CFhasNonSelfUppercaseMappingData, theChar);
371 case kCFUniCharHasNonSelfTitlecaseMapping:
372 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct *)&_CFhasNonSelfTitlecaseMappingData, theChar);
373 default:
374 return false;
375 #else
376 default:
377 if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
378
379 if ((charset - kCFUniCharDecimalDigitCharacterSet) < __CFUniCharNumberOfBitmaps) {
380 __CFUniCharBitmapData *data = __CFUniCharBitmapDataArray + (charset - kCFUniCharDecimalDigitCharacterSet);
381 uint8_t planeNo = (theChar >> 16) & 0xFF;
382
383 // The bitmap data for kCFUniCharIllegalCharacterSet is actually LEGAL set less Plane 14 ~ 16
384 if (charset == kCFUniCharIllegalCharacterSet) {
385 if (planeNo == 0x0E) { // Plane 14
386 theChar &= 0xFF;
387 return (((theChar == 0x01) || ((theChar > 0x1F) && (theChar < 0x80))) ? false : true);
388 } else if (planeNo == 0x0F || planeNo == 0x10) { // Plane 15 & 16
389 return ((theChar & 0xFF) > 0xFFFD ? true : false);
390 } else {
391 return (planeNo < data->_numPlanes && data->_planes[planeNo] ? !CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) : true);
392 }
393 } else if (charset == kCFUniCharControlAndFormatterCharacterSet) {
394 if (planeNo == 0x0E) { // Plane 14
395 theChar &= 0xFF;
396 return (((theChar == 0x01) || ((theChar > 0x1F) && (theChar < 0x80))) ? true : false);
397 } else {
398 return (planeNo < data->_numPlanes && data->_planes[planeNo] ? CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) : false);
399 }
400 } else {
401 return (planeNo < data->_numPlanes && data->_planes[planeNo] ? CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) : false);
402 }
403 }
404 return false;
405 #endif
406 }
407 }
408
409 const uint8_t *CFUniCharGetBitmapPtrForPlane(uint32_t charset, uint32_t plane) {
410 if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
411
412 #if CONTROLSET_HAS_FORMATTER
413 if (charset == kCFUniCharControlCharacterSet) charset = kCFUniCharControlAndFormatterCharacterSet;
414 #endif CONTROLSET_HAS_FORMATTER
415
416 if (charset > kCFUniCharWhitespaceAndNewlineCharacterSet && (charset - kCFUniCharDecimalDigitCharacterSet) < __CFUniCharNumberOfBitmaps && charset != kCFUniCharIllegalCharacterSet) {
417 __CFUniCharBitmapData *data = __CFUniCharBitmapDataArray + (charset - kCFUniCharDecimalDigitCharacterSet);
418
419 return (plane < data->_numPlanes ? data->_planes[plane] : NULL);
420 }
421 return NULL;
422 }
423
424 __private_extern__ uint8_t CFUniCharGetBitmapForPlane(uint32_t charset, uint32_t plane, void *bitmap, bool isInverted) {
425 const uint8_t *src = CFUniCharGetBitmapPtrForPlane(charset, plane);
426 int numBytes = (8 * 1024);
427
428 if (src) {
429 if (isInverted) {
430 while (numBytes-- > 0) *(((uint8_t *)bitmap)++) = ~(*(src++));
431 } else {
432 while (numBytes-- > 0) *(((uint8_t *)bitmap)++) = *(src++);
433 }
434 return kCFUniCharBitmapFilled;
435 } else if (charset == kCFUniCharIllegalCharacterSet) {
436 __CFUniCharBitmapData *data = __CFUniCharBitmapDataArray + (charset - kCFUniCharDecimalDigitCharacterSet);
437
438 if (plane < data->_numPlanes && (src = data->_planes[plane])) {
439 if (isInverted) {
440 while (numBytes-- > 0) *(((uint8_t *)bitmap)++) = *(src++);
441 } else {
442 while (numBytes-- > 0) *(((uint8_t *)bitmap)++) = ~(*(src++));
443 }
444 return kCFUniCharBitmapFilled;
445 } else if (plane == 0x0E) { // Plane 14
446 int idx;
447 uint8_t asciiRange = (isInverted ? (uint8_t)0xFF : (uint8_t)0);
448 uint8_t otherRange = (isInverted ? (uint8_t)0 : (uint8_t)0xFF);
449
450 *(((uint8_t *)bitmap)++) = 0x02; // UE0001 LANGUAGE TAG
451 for (idx = 1;idx < numBytes;idx++) {
452 *(((uint8_t *)bitmap)++) = ((idx >= (0x20 / 8) && (idx < (0x80 / 8))) ? asciiRange : otherRange);
453 }
454 return kCFUniCharBitmapFilled;
455 } else if (plane == 0x0F || plane == 0x10) { // Plane 15 & 16
456 uint32_t value = (isInverted ? 0xFFFFFFFF : 0);
457 numBytes /= 4; // for 32bit
458
459 while (numBytes-- > 0) *(((uint32_t *)bitmap)++) = value;
460 *(((uint8_t *)bitmap) - 5) = (isInverted ? 0x3F : 0xC0); // 0xFFFE & 0xFFFF
461 return kCFUniCharBitmapFilled;
462 }
463 return (isInverted ? kCFUniCharBitmapEmpty : kCFUniCharBitmapAll);
464 #if CONTROLSET_HAS_FORMATTER
465 } else if ((charset == kCFUniCharControlCharacterSet) && (plane == 0x0E)) { // Language tags
466 int idx;
467 uint8_t asciiRange = (isInverted ? (uint8_t)0 : (uint8_t)0xFF);
468 uint8_t otherRange = (isInverted ? (uint8_t)0xFF : (uint8_t)0);
469
470 *(((uint8_t *)bitmap)++) = 0x02; // UE0001 LANGUAGE TAG
471 for (idx = 1;idx < numBytes;idx++) {
472 *(((uint8_t *)bitmap)++) = ((idx >= (0x20 / 8) && (idx < (0x80 / 8))) ? asciiRange : otherRange);
473 }
474 return kCFUniCharBitmapFilled;
475 #endif CONTROLSET_HAS_FORMATTER
476 } else if (charset < kCFUniCharDecimalDigitCharacterSet) {
477 if (plane) return (isInverted ? kCFUniCharBitmapAll : kCFUniCharBitmapEmpty);
478
479 if (charset == kCFUniCharControlCharacterSet) {
480 int idx;
481 uint8_t nonFillValue = (isInverted ? (uint8_t)0xFF : (uint8_t)0);
482 uint8_t fillValue = (isInverted ? (uint8_t)0 : (uint8_t)0xFF);
483 uint8_t *bitmapP = (uint8_t *)bitmap;
484
485 for (idx = 0;idx < numBytes;idx++) {
486 *(bitmapP++) = (idx < (0x20 / 8) || (idx >= (0x80 / 8) && idx < (0xA0 / 8)) ? fillValue : nonFillValue);
487 }
488
489 // DEL
490 if (isInverted) {
491 CFUniCharRemoveCharacterFromBitmap(0x007F, bitmap);
492 } else {
493 CFUniCharAddCharacterToBitmap(0x007F, bitmap);
494 }
495 } else {
496 uint8_t *bitmapBase = (uint8_t *)bitmap;
497 int idx;
498 uint8_t nonFillValue = (isInverted ? (uint8_t)0xFF : (uint8_t)0);
499
500 while (numBytes-- > 0) *(((uint8_t *)bitmap)++) = nonFillValue;
501
502 if (charset == kCFUniCharWhitespaceAndNewlineCharacterSet) {
503 static const UniChar newlines[] = {0x000A, 0x000B, 0x000C, 0x000D, 0x0085, 0x2028, 0x2029};
504
505 for (idx = 0;idx < (int)(sizeof(newlines) / sizeof(*newlines)); idx++) {
506 if (isInverted) {
507 CFUniCharRemoveCharacterFromBitmap(newlines[idx], bitmapBase);
508 } else {
509 CFUniCharAddCharacterToBitmap(newlines[idx], bitmapBase);
510 }
511 }
512 }
513
514 if (isInverted) {
515 CFUniCharRemoveCharacterFromBitmap(0x0009, bitmapBase);
516 CFUniCharRemoveCharacterFromBitmap(0x0020, bitmapBase);
517 CFUniCharRemoveCharacterFromBitmap(0x00A0, bitmapBase);
518 CFUniCharRemoveCharacterFromBitmap(0x1680, bitmapBase);
519 CFUniCharRemoveCharacterFromBitmap(0x202F, bitmapBase);
520 CFUniCharRemoveCharacterFromBitmap(0x205F, bitmapBase);
521 CFUniCharRemoveCharacterFromBitmap(0x3000, bitmapBase);
522 } else {
523 CFUniCharAddCharacterToBitmap(0x0009, bitmapBase);
524 CFUniCharAddCharacterToBitmap(0x0020, bitmapBase);
525 CFUniCharAddCharacterToBitmap(0x00A0, bitmapBase);
526 CFUniCharAddCharacterToBitmap(0x1680, bitmapBase);
527 CFUniCharAddCharacterToBitmap(0x202F, bitmapBase);
528 CFUniCharAddCharacterToBitmap(0x205F, bitmapBase);
529 CFUniCharAddCharacterToBitmap(0x3000, bitmapBase);
530 }
531
532 for (idx = 0x2000;idx <= 0x200B;idx++) {
533 if (isInverted) {
534 CFUniCharRemoveCharacterFromBitmap(idx, bitmapBase);
535 } else {
536 CFUniCharAddCharacterToBitmap(idx, bitmapBase);
537 }
538 }
539 }
540 return kCFUniCharBitmapFilled;
541 }
542 return (isInverted ? kCFUniCharBitmapAll : kCFUniCharBitmapEmpty);
543 }
544
545 __private_extern__ uint32_t CFUniCharGetNumberOfPlanes(uint32_t charset) {
546 #if defined(__MACOS8__)
547 return 1;
548 #else __MACOS8__
549 #if CONTROLSET_HAS_FORMATTER
550 if (charset == kCFUniCharControlCharacterSet) return 15; // 0 to 14
551 #endif CONTROLSET_HAS_FORMATTER
552
553 if (charset < kCFUniCharDecimalDigitCharacterSet) {
554 return 1;
555 } else if (charset == kCFUniCharIllegalCharacterSet) {
556 return 17;
557 } else {
558 uint32_t numPlanes;
559
560 if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
561
562 numPlanes = __CFUniCharBitmapDataArray[charset - kCFUniCharDecimalDigitCharacterSet]._numPlanes;
563
564 return numPlanes;
565 }
566 #endif __MACOS8__
567 }
568
569 // Mapping data loading
570 static const void **__CFUniCharMappingTables = NULL;
571
572 static CFSpinLock_t __CFUniCharMappingTableLock = 0;
573
574 #if defined(__BIG_ENDIAN__)
575 #define MAPPING_TABLE_FILE "CFUnicodeData-B.mapping"
576 #else __BIG_ENDIAN__
577 #define MAPPING_TABLE_FILE "CFUnicodeData-L.mapping"
578 #endif __BIG_ENDIAN__
579
580 __private_extern__ const void *CFUniCharGetMappingData(uint32_t type) {
581
582 __CFSpinLock(&__CFUniCharMappingTableLock);
583
584 if (NULL == __CFUniCharMappingTables) {
585 const void *bytes;
586 const void *bodyBase;
587 int headerSize;
588 int idx, count;
589
590 if (!__CFUniCharLoadFile(MAPPING_TABLE_FILE, &bytes)) {
591 __CFSpinUnlock(&__CFUniCharMappingTableLock);
592 return NULL;
593 }
594
595 (char *)bytes += 4; // Skip Unicode version
596 headerSize = *(((uint32_t *)bytes)++);
597 headerSize -= (sizeof(uint32_t) * 2);
598 bodyBase = (char *)bytes + headerSize;
599
600 count = headerSize / sizeof(uint32_t);
601
602 __CFUniCharMappingTables = (const void **)CFAllocatorAllocate(NULL, sizeof(const void *) * count, 0);
603
604 for (idx = 0;idx < count;idx++) {
605 __CFUniCharMappingTables[idx] = (char *)bodyBase + *(((uint32_t *)bytes)++);
606 }
607 }
608
609 __CFSpinUnlock(&__CFUniCharMappingTableLock);
610
611 return __CFUniCharMappingTables[type];
612 }
613
614 // Case mapping functions
615 #define DO_SPECIAL_CASE_MAPPING 1
616
617 static uint32_t *__CFUniCharCaseMappingTableCounts = NULL;
618 static uint32_t **__CFUniCharCaseMappingTable = NULL;
619 static const uint32_t **__CFUniCharCaseMappingExtraTable = NULL;
620
621 typedef struct {
622 uint32_t _key;
623 uint32_t _value;
624 } __CFUniCharCaseMappings;
625
626 /* Binary searches CFStringEncodingUnicodeTo8BitCharMap */
627 static uint32_t __CFUniCharGetMappedCase(const __CFUniCharCaseMappings *theTable, uint32_t numElem, UTF32Char character) {
628 const __CFUniCharCaseMappings *p, *q, *divider;
629
630 if ((character < theTable[0]._key) || (character > theTable[numElem-1]._key)) {
631 return 0;
632 }
633 p = theTable;
634 q = p + (numElem-1);
635 while (p <= q) {
636 divider = p + ((q - p) >> 1); /* divide by 2 */
637 if (character < divider->_key) { q = divider - 1; }
638 else if (character > divider->_key) { p = divider + 1; }
639 else { return divider->_value; }
640 }
641 return 0;
642 }
643
644 #define NUM_CASE_MAP_DATA (kCFUniCharCaseFold + 1)
645
646 static bool __CFUniCharLoadCaseMappingTable(void) {
647 int idx;
648
649 if (NULL == __CFUniCharMappingTables) (void)CFUniCharGetMappingData(kCFUniCharToLowercase);
650 if (NULL == __CFUniCharMappingTables) return false;
651
652 __CFSpinLock(&__CFUniCharMappingTableLock);
653
654 if (__CFUniCharCaseMappingTableCounts) {
655 __CFSpinUnlock(&__CFUniCharMappingTableLock);
656 return true;
657 }
658
659 __CFUniCharCaseMappingTableCounts = (uint32_t *)CFAllocatorAllocate(NULL, sizeof(uint32_t) * NUM_CASE_MAP_DATA + sizeof(uint32_t *) * NUM_CASE_MAP_DATA * 2, 0);
660 __CFUniCharCaseMappingTable = (uint32_t **)((char *)__CFUniCharCaseMappingTableCounts + sizeof(uint32_t) * NUM_CASE_MAP_DATA);
661 __CFUniCharCaseMappingExtraTable = (const uint32_t **)__CFUniCharCaseMappingTable + NUM_CASE_MAP_DATA;
662
663 for (idx = 0;idx < NUM_CASE_MAP_DATA;idx++) {
664 __CFUniCharCaseMappingTableCounts[idx] = *((uint32_t *)__CFUniCharMappingTables[idx]) / (sizeof(uint32_t) * 2);
665 __CFUniCharCaseMappingTable[idx] = ((uint32_t *)__CFUniCharMappingTables[idx]) + 1;
666 __CFUniCharCaseMappingExtraTable[idx] = (const uint32_t *)((char *)__CFUniCharCaseMappingTable[idx] + *((uint32_t *)__CFUniCharMappingTables[idx]));
667 }
668
669 __CFSpinUnlock(&__CFUniCharMappingTableLock);
670 return true;
671 }
672
673 #if __BIG_ENDIAN__
674 #define TURKISH_LANG_CODE (0x7472) // tr
675 #define LITHUANIAN_LANG_CODE (0x6C74) // lt
676 #define AZERI_LANG_CODE (0x617A) // az
677 #else __BIG_ENDIAN__
678 #define TURKISH_LANG_CODE (0x7274) // tr
679 #define LITHUANIAN_LANG_CODE (0x746C) // lt
680 #define AZERI_LANG_CODE (0x7A61) // az
681 #endif __BIG_ENDIAN__
682
683 uint32_t CFUniCharMapCaseTo(UTF32Char theChar, UTF16Char *convertedChar, uint32_t maxLength, uint32_t ctype, uint32_t flags, const uint8_t *langCode) {
684 __CFUniCharBitmapData *data;
685 uint8_t planeNo = (theChar >> 16) & 0xFF;
686
687 caseFoldRetry:
688
689 #if DO_SPECIAL_CASE_MAPPING
690 if (flags & kCFUniCharCaseMapFinalSigma) {
691 if (theChar == 0x03A3) { // Final sigma
692 *convertedChar = (ctype == kCFUniCharToLowercase ? 0x03C2 : 0x03A3);
693 return 1;
694 }
695 }
696
697 if (langCode) {
698 switch (*(uint16_t *)langCode) {
699 case LITHUANIAN_LANG_CODE:
700 if (theChar == 0x0307 && (flags & kCFUniCharCaseMapAfter_i)) {
701 return 0;
702 } else if (ctype == kCFUniCharToLowercase) {
703 if (flags & kCFUniCharCaseMapMoreAbove) {
704 switch (theChar) {
705 case 0x0049: // LATIN CAPITAL LETTER I
706 *(convertedChar++) = 0x0069;
707 *(convertedChar++) = 0x0307;
708 return 2;
709
710 case 0x004A: // LATIN CAPITAL LETTER J
711 *(convertedChar++) = 0x006A;
712 *(convertedChar++) = 0x0307;
713 return 2;
714
715 case 0x012E: // LATIN CAPITAL LETTER I WITH OGONEK
716 *(convertedChar++) = 0x012F;
717 *(convertedChar++) = 0x0307;
718 return 2;
719
720 default: break;
721 }
722 }
723 switch (theChar) {
724 case 0x00CC: // LATIN CAPITAL LETTER I WITH GRAVE
725 *(convertedChar++) = 0x0069;
726 *(convertedChar++) = 0x0307;
727 *(convertedChar++) = 0x0300;
728 return 3;
729
730 case 0x00CD: // LATIN CAPITAL LETTER I WITH ACUTE
731 *(convertedChar++) = 0x0069;
732 *(convertedChar++) = 0x0307;
733 *(convertedChar++) = 0x0301;
734 return 3;
735
736 case 0x0128: // LATIN CAPITAL LETTER I WITH TILDE
737 *(convertedChar++) = 0x0069;
738 *(convertedChar++) = 0x0307;
739 *(convertedChar++) = 0x0303;
740 return 3;
741
742 default: break;
743 }
744 }
745 break;
746
747 case TURKISH_LANG_CODE:
748 case AZERI_LANG_CODE:
749 if ((theChar == 0x0049) || (theChar == 0x0131)) { // LATIN CAPITAL LETTER I & LATIN SMALL LETTER DOTLESS I
750 *convertedChar = (((ctype == kCFUniCharToLowercase) || (ctype == kCFUniCharCaseFold)) ? ((kCFUniCharCaseMapMoreAbove & flags) ? 0x0069 : 0x0131) : 0x0049);
751 return 1;
752 } else if ((theChar == 0x0069) || (theChar == 0x0130)) { // LATIN SMALL LETTER I & LATIN CAPITAL LETTER I WITH DOT ABOVE
753 *convertedChar = (((ctype == kCFUniCharToLowercase) || (ctype == kCFUniCharCaseFold)) ? 0x0069 : 0x0130);
754 return 1;
755 } else if (theChar == 0x0307 && (kCFUniCharCaseMapAfter_i & flags)) { // COMBINING DOT ABOVE AFTER_i
756 if (ctype == kCFUniCharToLowercase) {
757 return 0;
758 } else {
759 *convertedChar = 0x0307;
760 return 1;
761 }
762 }
763 break;
764
765 default: break;
766 }
767 }
768 #endif DO_SPECIAL_CASE_MAPPING
769
770 if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
771
772 data = __CFUniCharBitmapDataArray + ((ctype + kCFUniCharHasNonSelfLowercaseCharacterSet) - kCFUniCharDecimalDigitCharacterSet);
773
774 if (planeNo < data->_numPlanes && data->_planes[planeNo] && CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) && (__CFUniCharCaseMappingTableCounts || __CFUniCharLoadCaseMappingTable())) {
775 uint32_t value = __CFUniCharGetMappedCase((const __CFUniCharCaseMappings *)__CFUniCharCaseMappingTable[ctype], __CFUniCharCaseMappingTableCounts[ctype], theChar);
776
777 if (!value && ctype == kCFUniCharToTitlecase) {
778 value = __CFUniCharGetMappedCase((const __CFUniCharCaseMappings *)__CFUniCharCaseMappingTable[kCFUniCharToUppercase], __CFUniCharCaseMappingTableCounts[kCFUniCharToUppercase], theChar);
779 if (value) ctype = kCFUniCharToUppercase;
780 }
781
782 if (value) {
783 int count = CFUniCharConvertFlagToCount(value);
784
785 if (count == 1) {
786 if (value & kCFUniCharNonBmpFlag) {
787 if (maxLength > 1) {
788 value = (value & 0xFFFFFF) - 0x10000;
789 *(convertedChar++) = (value >> 10) + 0xD800UL;
790 *(convertedChar++) = (value & 0x3FF) + 0xDC00UL;
791 return 2;
792 }
793 } else {
794 *convertedChar = (UTF16Char)value;
795 return 1;
796 }
797 } else if (count < (int)maxLength) {
798 const uint32_t *extraMapping = __CFUniCharCaseMappingExtraTable[ctype] + (value & 0xFFFFFF);
799
800 if (value & kCFUniCharNonBmpFlag) {
801 int copiedLen = 0;
802
803 while (count-- > 0) {
804 value = *(extraMapping++);
805 if (value > 0xFFFF) {
806 if (copiedLen + 2 >= (int)maxLength) break;
807 value = (value & 0xFFFFFF) - 0x10000;
808 convertedChar[copiedLen++] = (value >> 10) + 0xD800UL;
809 convertedChar[copiedLen++] = (value & 0x3FF) + 0xDC00UL;
810 } else {
811 if (copiedLen + 1 >= (int)maxLength) break;
812 convertedChar[copiedLen++] = value;
813 }
814 }
815 if (!count) return copiedLen;
816 } else {
817 int idx;
818
819 for (idx = 0;idx < count;idx++) *(convertedChar++) = (UTF16Char)*(extraMapping++);
820 return count;
821 }
822 }
823 }
824 } else if (ctype == kCFUniCharCaseFold) {
825 ctype = kCFUniCharToLowercase;
826 goto caseFoldRetry;
827 }
828
829 if (theChar > 0xFFFF) { // non-BMP
830 theChar = (theChar & 0xFFFFFF) - 0x10000;
831 *(convertedChar++) = (theChar >> 10) + 0xD800UL;
832 *(convertedChar++) = (theChar & 0x3FF) + 0xDC00UL;
833 return 2;
834 } else {
835 *convertedChar = theChar;
836 return 1;
837 }
838 }
839
840 UInt32 CFUniCharMapTo(UniChar theChar, UniChar *convertedChar, UInt32 maxLength, uint16_t ctype, UInt32 flags) {
841 if (ctype == kCFUniCharCaseFold + 1) { // kCFUniCharDecompose
842 if (CFUniCharIsDecomposableCharacter(theChar, false)) {
843 UTF32Char buffer[MAX_DECOMPOSED_LENGTH];
844 CFIndex usedLength = CFUniCharDecomposeCharacter(theChar, buffer, MAX_DECOMPOSED_LENGTH);
845 CFIndex idx;
846
847 for (idx = 0;idx < usedLength;idx++) *(convertedChar++) = buffer[idx];
848 return usedLength;
849 } else {
850 *convertedChar = theChar;
851 return 1;
852 }
853 } else {
854 return CFUniCharMapCaseTo(theChar, convertedChar, maxLength, ctype, flags, NULL);
855 }
856 }
857
858 CF_INLINE bool __CFUniCharIsMoreAbove(UTF16Char *buffer, uint32_t length) {
859 UTF32Char currentChar;
860 uint32_t property;
861
862 while (length-- > 0) {
863 currentChar = *(buffer)++;
864 if (CFUniCharIsSurrogateHighCharacter(currentChar) && (length > 0) && CFUniCharIsSurrogateLowCharacter(*(buffer + 1))) {
865 currentChar = CFUniCharGetLongCharacterForSurrogatePair(currentChar, *(buffer++));
866 --length;
867 }
868 if (!CFUniCharIsMemberOf(currentChar, kCFUniCharNonBaseCharacterSet)) break;
869
870 property = CFUniCharGetCombiningPropertyForCharacter(currentChar, CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16) & 0xFF));
871
872 if (property == 230) return true; // Above priority
873 }
874 return false;
875 }
876
877 CF_INLINE bool __CFUniCharIsAfter_i(UTF16Char *buffer, uint32_t length) {
878 UTF32Char currentChar = 0;
879 uint32_t property;
880 UTF32Char decomposed[MAX_DECOMPOSED_LENGTH];
881 uint32_t decompLength;
882 uint32_t idx;
883
884 if (length < 1) return 0;
885
886 buffer += length;
887 while (length-- > 1) {
888 currentChar = *(--buffer);
889 if (CFUniCharIsSurrogateLowCharacter(currentChar)) {
890 if ((length > 1) && CFUniCharIsSurrogateHighCharacter(*(buffer - 1))) {
891 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*(--buffer), currentChar);
892 --length;
893 } else {
894 break;
895 }
896 }
897 if (!CFUniCharIsMemberOf(currentChar, kCFUniCharNonBaseCharacterSet)) break;
898
899 property = CFUniCharGetCombiningPropertyForCharacter(currentChar, CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16) & 0xFF));
900
901 if (property == 230) return false; // Above priority
902 }
903 if (length == 0) {
904 currentChar = *(--buffer);
905 } else if (CFUniCharIsSurrogateLowCharacter(currentChar) && CFUniCharIsSurrogateHighCharacter(*(--buffer))) {
906 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*buffer, currentChar);
907 }
908
909 decompLength = CFUniCharDecomposeCharacter(currentChar, decomposed, MAX_DECOMPOSED_LENGTH);
910 currentChar = *decomposed;
911
912
913 for (idx = 1;idx < decompLength;idx++) {
914 currentChar = decomposed[idx];
915 property = CFUniCharGetCombiningPropertyForCharacter(currentChar, CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16) & 0xFF));
916
917 if (property == 230) return false; // Above priority
918 }
919 return true;
920 }
921
922 __private_extern__ uint32_t CFUniCharGetConditionalCaseMappingFlags(UTF32Char theChar, UTF16Char *buffer, uint32_t currentIndex, uint32_t length, uint32_t type, const uint8_t *langCode, uint32_t lastFlags) {
923 if (theChar == 0x03A3) { // GREEK CAPITAL LETTER SIGMA
924 if ((type == kCFUniCharToLowercase) && (currentIndex > 0)) {
925 UTF16Char *start = buffer;
926 UTF16Char *end = buffer + length;
927 UTF32Char otherChar;
928
929 // First check if we're after a cased character
930 buffer += (currentIndex - 1);
931 while (start <= buffer) {
932 otherChar = *(buffer--);
933 if (CFUniCharIsSurrogateLowCharacter(otherChar) && (start <= buffer) && CFUniCharIsSurrogateHighCharacter(*buffer)) {
934 otherChar = CFUniCharGetLongCharacterForSurrogatePair(*(buffer--), otherChar);
935 }
936 if (!CFUniCharIsMemberOf(otherChar, kCFUniCharCaseIgnorableCharacterSet)) {
937 if (!CFUniCharIsMemberOf(otherChar, kCFUniCharUppercaseLetterCharacterSet) && !CFUniCharIsMemberOf(otherChar, kCFUniCharLowercaseLetterCharacterSet)) return 0; // Uppercase set contains titlecase
938 break;
939 }
940 }
941
942 // Next check if we're before a cased character
943 buffer = start + currentIndex + 1;
944 while (buffer < end) {
945 otherChar = *(buffer++);
946 if (CFUniCharIsSurrogateHighCharacter(otherChar) && (buffer < end) && CFUniCharIsSurrogateLowCharacter(*buffer)) {
947 otherChar = CFUniCharGetLongCharacterForSurrogatePair(otherChar, *(buffer++));
948 }
949 if (!CFUniCharIsMemberOf(otherChar, kCFUniCharCaseIgnorableCharacterSet)) {
950 if (CFUniCharIsMemberOf(otherChar, kCFUniCharUppercaseLetterCharacterSet) || CFUniCharIsMemberOf(otherChar, kCFUniCharLowercaseLetterCharacterSet)) return 0; // Uppercase set contains titlecase
951 break;
952 }
953 }
954 return kCFUniCharCaseMapFinalSigma;
955 }
956 } else if (langCode) {
957 if (*((const uint16_t *)langCode) == LITHUANIAN_LANG_CODE) {
958 if ((theChar == 0x0307) && ((kCFUniCharCaseMapAfter_i|kCFUniCharCaseMapMoreAbove) & lastFlags) == (kCFUniCharCaseMapAfter_i|kCFUniCharCaseMapMoreAbove)) {
959 return (__CFUniCharIsAfter_i(buffer, currentIndex) ? kCFUniCharCaseMapAfter_i : 0);
960 } else if (type == kCFUniCharToLowercase) {
961 if ((theChar == 0x0049) || (theChar == 0x004A) || (theChar == 0x012E)) {
962 return (__CFUniCharIsMoreAbove(buffer + (++currentIndex), length - currentIndex) ? kCFUniCharCaseMapMoreAbove : 0);
963 }
964 } else if ((theChar == 'i') || (theChar == 'j')) {
965 return (__CFUniCharIsMoreAbove(buffer + (++currentIndex), length - currentIndex) ? (kCFUniCharCaseMapAfter_i|kCFUniCharCaseMapMoreAbove) : 0);
966 }
967 } else if ((*((const uint16_t *)langCode) == TURKISH_LANG_CODE) || (*((const uint16_t *)langCode) == AZERI_LANG_CODE)) {
968 if (type == kCFUniCharToLowercase) {
969 if (theChar == 0x0307) {
970 return (kCFUniCharCaseMapMoreAbove & lastFlags ? kCFUniCharCaseMapAfter_i : 0);
971 } else if (theChar == 0x0049) {
972 return (((++currentIndex < length) && (buffer[currentIndex] == 0x0307)) ? kCFUniCharCaseMapMoreAbove : 0);
973 }
974 }
975 }
976 }
977 return 0;
978 }
979
980 // Unicode property database
981 static __CFUniCharBitmapData *__CFUniCharUnicodePropertyTable = NULL;
982 static int __CFUniCharUnicodePropertyTableCount = 0;
983
984 static CFSpinLock_t __CFUniCharPropTableLock = 0;
985
986 #define PROP_DB_FILE "CFUniCharPropertyDatabase.data"
987
988 const void *CFUniCharGetUnicodePropertyDataForPlane(uint32_t propertyType, uint32_t plane) {
989
990 __CFSpinLock(&__CFUniCharPropTableLock);
991
992 if (NULL == __CFUniCharUnicodePropertyTable) {
993 const void *bytes;
994 const void *bodyBase;
995 const void *planeBase;
996 int headerSize;
997 int idx, count;
998 int planeIndex, planeCount;
999 int planeSize;
1000
1001 if (!__CFUniCharLoadFile(PROP_DB_FILE, &bytes)) {
1002 __CFSpinUnlock(&__CFUniCharPropTableLock);
1003 return NULL;
1004 }
1005
1006 (char *)bytes += 4; // Skip Unicode version
1007 headerSize = CFSwapInt32BigToHost(*(((uint32_t *)bytes)++));
1008 headerSize -= (sizeof(uint32_t) * 2);
1009 bodyBase = (char *)bytes + headerSize;
1010
1011 count = headerSize / sizeof(uint32_t);
1012 __CFUniCharUnicodePropertyTableCount = count;
1013
1014 __CFUniCharUnicodePropertyTable = (__CFUniCharBitmapData *)CFAllocatorAllocate(NULL, sizeof(__CFUniCharBitmapData) * count, 0);
1015
1016 for (idx = 0;idx < count;idx++) {
1017 planeCount = *((const uint8_t *)bodyBase);
1018 (char *)planeBase = (char *)bodyBase + planeCount + (planeCount % 4 ? 4 - (planeCount % 4) : 0);
1019 __CFUniCharUnicodePropertyTable[idx]._planes = (const uint8_t **)CFAllocatorAllocate(NULL, sizeof(const void *) * planeCount, 0);
1020
1021 for (planeIndex = 0;planeIndex < planeCount;planeIndex++) {
1022 if ((planeSize = ((const uint8_t *)bodyBase)[planeIndex + 1])) {
1023 __CFUniCharUnicodePropertyTable[idx]._planes[planeIndex] = planeBase;
1024 (char *)planeBase += (planeSize * 256);
1025 } else {
1026 __CFUniCharUnicodePropertyTable[idx]._planes[planeIndex] = NULL;
1027 }
1028 }
1029
1030 __CFUniCharUnicodePropertyTable[idx]._numPlanes = planeCount;
1031 (char *)bodyBase += (CFSwapInt32BigToHost(*(((uint32_t *)bytes)++)));
1032 }
1033 }
1034
1035 __CFSpinUnlock(&__CFUniCharPropTableLock);
1036
1037 return (plane < __CFUniCharUnicodePropertyTable[propertyType]._numPlanes ? __CFUniCharUnicodePropertyTable[propertyType]._planes[plane] : NULL);
1038 }
1039
1040 __private_extern__ uint32_t CFUniCharGetNumberOfPlanesForUnicodePropertyData(uint32_t propertyType) {
1041 (void)CFUniCharGetUnicodePropertyDataForPlane(propertyType, 0);
1042 return __CFUniCharUnicodePropertyTable[propertyType]._numPlanes;
1043 }
1044
1045 __private_extern__ uint32_t CFUniCharGetUnicodeProperty(UTF32Char character, uint32_t propertyType) {
1046 if (propertyType == kCFUniCharCombiningProperty) {
1047 return CFUniCharGetCombiningPropertyForCharacter(character, CFUniCharGetUnicodePropertyDataForPlane(propertyType, (character >> 16) & 0xFF));
1048 } else if (propertyType == kCFUniCharBidiProperty) {
1049 return CFUniCharGetBidiPropertyForCharacter(character, CFUniCharGetUnicodePropertyDataForPlane(propertyType, (character >> 16) & 0xFF));
1050 } else {
1051 return 0;
1052 }
1053 }
1054
1055
1056
1057 /*
1058 The UTF8 conversion in the following function is derived from ConvertUTF.c
1059 */
1060 /*
1061 * Copyright 2001 Unicode, Inc.
1062 *
1063 * Disclaimer
1064 *
1065 * This source code is provided as is by Unicode, Inc. No claims are
1066 * made as to fitness for any particular purpose. No warranties of any
1067 * kind are expressed or implied. The recipient agrees to determine
1068 * applicability of information provided. If this file has been
1069 * purchased on magnetic or optical media from Unicode, Inc., the
1070 * sole remedy for any claim will be exchange of defective media
1071 * within 90 days of receipt.
1072 *
1073 * Limitations on Rights to Redistribute This Code
1074 *
1075 * Unicode, Inc. hereby grants the right to freely use the information
1076 * supplied in this file in the creation of products supporting the
1077 * Unicode Standard, and to make copies of this file in any form
1078 * for internal or external distribution as long as this notice
1079 * remains attached.
1080 */
1081 #define UNI_REPLACEMENT_CHAR (0x0000FFFDUL)
1082
1083 bool CFUniCharFillDestinationBuffer(const UTF32Char *src, uint32_t srcLength, void **dst, uint32_t dstLength, uint32_t *filledLength, uint32_t dstFormat) {
1084 UTF32Char currentChar;
1085 uint32_t usedLength = *filledLength;
1086
1087 if (dstFormat == kCFUniCharUTF16Format) {
1088 UTF16Char *dstBuffer = (UTF16Char *)*dst;
1089
1090 while (srcLength-- > 0) {
1091 currentChar = *(src++);
1092
1093 if (currentChar > 0xFFFF) { // Non-BMP
1094 usedLength += 2;
1095 if (dstLength) {
1096 if (usedLength > dstLength) return false;
1097 currentChar -= 0x10000;
1098 *(dstBuffer++) = (UTF16Char)((currentChar >> 10) + 0xD800UL);
1099 *(dstBuffer++) = (UTF16Char)((currentChar & 0x3FF) + 0xDC00UL);
1100 }
1101 } else {
1102 ++usedLength;
1103 if (dstLength) {
1104 if (usedLength > dstLength) return false;
1105 *(dstBuffer++) = (UTF16Char)currentChar;
1106 }
1107 }
1108 }
1109
1110 *dst = dstBuffer;
1111 } else if (dstFormat == kCFUniCharUTF8Format) {
1112 uint8_t *dstBuffer = (uint8_t *)*dst;
1113 uint16_t bytesToWrite = 0;
1114 const UTF32Char byteMask = 0xBF;
1115 const UTF32Char byteMark = 0x80;
1116 static const uint8_t firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
1117
1118 while (srcLength-- > 0) {
1119 currentChar = *(src++);
1120
1121 /* Figure out how many bytes the result will require */
1122 if (currentChar < (UTF32Char)0x80) {
1123 bytesToWrite = 1;
1124 } else if (currentChar < (UTF32Char)0x800) {
1125 bytesToWrite = 2;
1126 } else if (currentChar < (UTF32Char)0x10000) {
1127 bytesToWrite = 3;
1128 } else if (currentChar < (UTF32Char)0x200000) {
1129 bytesToWrite = 4;
1130 } else {
1131 bytesToWrite = 2;
1132 currentChar = UNI_REPLACEMENT_CHAR;
1133 }
1134
1135 usedLength += bytesToWrite;
1136
1137 if (dstLength) {
1138 if (usedLength > dstLength) return false;
1139
1140 dstBuffer += bytesToWrite;
1141 switch (bytesToWrite) { /* note: everything falls through. */
1142 case 4: *--dstBuffer = (currentChar | byteMark) & byteMask; currentChar >>= 6;
1143 case 3: *--dstBuffer = (currentChar | byteMark) & byteMask; currentChar >>= 6;
1144 case 2: *--dstBuffer = (currentChar | byteMark) & byteMask; currentChar >>= 6;
1145 case 1: *--dstBuffer = currentChar | firstByteMark[bytesToWrite];
1146 }
1147 dstBuffer += bytesToWrite;
1148 }
1149 }
1150
1151 *dst = dstBuffer;
1152 } else {
1153 UTF32Char *dstBuffer = (UTF32Char *)*dst;
1154
1155 while (srcLength-- > 0) {
1156 currentChar = *(src++);
1157
1158 ++usedLength;
1159 if (dstLength) {
1160 if (usedLength > dstLength) return false;
1161 *(dstBuffer++) = currentChar;
1162 }
1163 }
1164
1165 *dst = dstBuffer;
1166 }
1167
1168 *filledLength = usedLength;
1169
1170 return true;
1171 }
1172
1173 #if defined(__WIN32__)
1174 void __CFUniCharCleanup(void)
1175 {
1176 int idx;
1177
1178 // cleanup memory allocated by __CFUniCharLoadBitmapData()
1179 __CFSpinLock(&__CFUniCharBitmapLock);
1180
1181 if (__CFUniCharBitmapDataArray != NULL) {
1182 for (idx = 0; idx < __CFUniCharNumberOfBitmaps; idx++) {
1183 CFAllocatorDeallocate(NULL, __CFUniCharBitmapDataArray[idx]._planes);
1184 __CFUniCharBitmapDataArray[idx]._planes = NULL;
1185 }
1186
1187 CFAllocatorDeallocate(NULL, __CFUniCharBitmapDataArray);
1188 __CFUniCharBitmapDataArray = NULL;
1189 __CFUniCharNumberOfBitmaps = 0;
1190 }
1191
1192 __CFSpinUnlock(&__CFUniCharBitmapLock);
1193
1194 // cleanup memory allocated by CFUniCharGetMappingData()
1195 __CFSpinLock(&__CFUniCharMappingTableLock);
1196
1197 if (__CFUniCharMappingTables != NULL) {
1198 CFAllocatorDeallocate(NULL, __CFUniCharMappingTables);
1199 __CFUniCharMappingTables = NULL;
1200 }
1201
1202 // cleanup memory allocated by __CFUniCharLoadCaseMappingTable()
1203 if (__CFUniCharCaseMappingTableCounts != NULL) {
1204 CFAllocatorDeallocate(NULL, __CFUniCharCaseMappingTableCounts);
1205 __CFUniCharCaseMappingTableCounts = NULL;
1206
1207 __CFUniCharCaseMappingTable = NULL;
1208 __CFUniCharCaseMappingExtraTable = NULL;
1209 }
1210
1211 __CFSpinUnlock(&__CFUniCharMappingTableLock);
1212
1213 // cleanup memory allocated by CFUniCharGetUnicodePropertyDataForPlane()
1214 __CFSpinLock(&__CFUniCharPropTableLock);
1215
1216 if (__CFUniCharUnicodePropertyTable != NULL) {
1217 for (idx = 0; idx < __CFUniCharUnicodePropertyTableCount; idx++) {
1218 CFAllocatorDeallocate(NULL, __CFUniCharUnicodePropertyTable[idx]._planes);
1219 __CFUniCharUnicodePropertyTable[idx]._planes = NULL;
1220 }
1221
1222 CFAllocatorDeallocate(NULL, __CFUniCharUnicodePropertyTable);
1223 __CFUniCharUnicodePropertyTable = NULL;
1224 __CFUniCharUnicodePropertyTableCount = 0;
1225 }
1226
1227 __CFSpinUnlock(&__CFUniCharPropTableLock);
1228 }
1229 #endif // __WIN32__
1230