]> git.saurik.com Git - apple/cf.git/blob - StringEncodings.subproj/CFUniChar.c
CF-299.35.tar.gz
[apple/cf.git] / StringEncodings.subproj / CFUniChar.c
1 /*
2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
7 *
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25 /* CFUniChar.c
26 Copyright 2001-2002, Apple, Inc. All rights reserved.
27 Responsibility: Aki Inoue
28 */
29
30 #include <CoreFoundation/CFByteOrder.h>
31 #include "CFInternal.h"
32 #include "CFUniChar.h"
33 #include "CFStringEncodingConverterExt.h"
34 #include "CFUnicodeDecomposition.h"
35 #include "CFUniCharPriv.h"
36 #if defined(__MACOS8__)
37 #include <stdio.h>
38 #elif defined(__WIN32__)
39 #include <windows.h>
40 #include <sys/stat.h>
41 #include <fcntl.h>
42 #include <io.h>
43 #elif defined(__MACH__) || defined(__LINUX__) || defined(__FREEBSD__)
44 #if defined(__MACH__)
45 #include <mach/mach.h>
46 #endif
47 #include <fcntl.h>
48 #include <sys/types.h>
49 #include <sys/stat.h>
50 #include <sys/param.h>
51 #include <sys/mman.h>
52 #include <unistd.h>
53 #include <stdlib.h>
54 #endif
55
56 #if defined(__MACOS8__)
57 #define MAXPATHLEN FILENAME_MAX
58 #elif defined WIN32
59 #define MAXPATHLEN MAX_PATH
60 #endif
61
62 // Memory map the file
63 #if !defined(__MACOS8__)
64
65 CF_INLINE void __CFUniCharCharacterSetPath(char *cpath) {
66 strlcpy(cpath, __kCFCharacterSetDir, MAXPATHLEN);
67 strlcat(cpath, "/CharacterSets/", MAXPATHLEN);
68 }
69
70 static bool __CFUniCharLoadBytesFromFile(const char *fileName, const void **bytes) {
71 #if defined(__WIN32__)
72 HANDLE bitmapFileHandle;
73 HANDLE mappingHandle;
74
75 if ((bitmapFileHandle = CreateFile(fileName, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL)) == INVALID_HANDLE_VALUE) return false;
76 mappingHandle = CreateFileMapping(bitmapFileHandle, NULL, PAGE_READONLY, 0, 0, NULL);
77 CloseHandle(bitmapFileHandle);
78 if (!mappingHandle) return false;
79
80 *bytes = MapViewOfFileEx(mappingHandle, FILE_MAP_READ, 0, 0, 0, NULL);
81 CloseHandle(mappingHandle);
82
83 return (*bytes ? true : false);
84 #else
85 struct stat statBuf;
86 int fd = -1;
87
88 if ((fd = open(fileName, O_RDONLY, 0)) < 0) return false;
89
90 #if defined(__MACH__)
91 if (fstat(fd, &statBuf) < 0 || map_fd(fd, 0, (vm_offset_t *)bytes, true, (vm_size_t)statBuf.st_size)) {
92 close(fd);
93 return false;
94 }
95 #else
96 if (fstat(fd, &statBuf) < 0 || (*bytes = mmap(0, statBuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0)) == (void *)-1) {
97 close(fd);
98
99 return false;
100 }
101 #endif
102 close(fd);
103
104 return true;
105 #endif
106 }
107
108 static bool __CFUniCharLoadFile(const char *bitmapName, const void **bytes) {
109 char cpath[MAXPATHLEN];
110
111 __CFUniCharCharacterSetPath(cpath);
112 strlcat(cpath, bitmapName, MAXPATHLEN);
113
114 return __CFUniCharLoadBytesFromFile(cpath, bytes);
115 }
116 #endif !defined(__MACOS8__)
117
118 // Bitmap functions
119 CF_INLINE bool isControl(UTF32Char theChar, uint16_t charset, const void *data) { // ISO Control
120 if ((theChar <= 0x001F) || (theChar >= 0x007F && theChar <= 0x009F)) return true;
121 return false;
122 }
123
124 CF_INLINE bool isWhitespace(UTF32Char theChar, uint16_t charset, const void *data) { // Space
125 if ((theChar == 0x0020) || (theChar == 0x0009) || (theChar == 0x00A0) || (theChar == 0x1680) || (theChar >= 0x2000 && theChar <= 0x200B) || (theChar == 0x202F) || (theChar == 0x205F) || (theChar == 0x3000)) return true;
126 return false;
127 }
128
129 CF_INLINE bool isWhitespaceAndNewLine(UTF32Char theChar, uint16_t charset, const void *data) { // White space
130 if (isWhitespace(theChar, charset, data) || (theChar >= 0x000A && theChar <= 0x000D) || (theChar == 0x0085) || (theChar == 0x2028) || (theChar == 0x2029)) return true;
131 return false;
132 }
133
134 #if defined(__MACOS8__)
135 /* This structure MUST match the sets in NSRulebook.h The "__CFCSetIsMemberSet()" function is a modified version of the one in Text shlib.
136 */
137 typedef struct _CFCharSetPrivateStruct {
138 int issorted; /* 1=sorted or 0=unsorted ; 2=is_property_table */
139 int bitrange[4]; /* bitmap (each bit is a 1k range in space of 2^17) */
140 int nsingles; /* number of single elements */
141 int nranges; /* number of ranges */
142 int singmin; /* minimum single element */
143 int singmax; /* maximum single element */
144 int array[1]; /* actually bunch of singles followed by ranges */
145 } CFCharSetPrivateStruct;
146
147 /* Membership function for complex sets
148 */
149 CF_INLINE bool __CFCSetIsMemberSet(const CFCharSetPrivateStruct *set, UTF16Char theChar) {
150 int *tmp, *tmp2;
151 int i, nel;
152 int *p, *q, *wari;
153
154 if (set->issorted != 1) {
155 return false;
156 }
157 theChar &= 0x0001FFFF; /* range 1-131k */
158 if (__CFCSetBitsInRange(theChar, set->bitrange)) {
159 if (theChar >= set->singmin && theChar <= set->singmax) {
160 tmp = (int *) &(set->array[0]);
161 if ((nel = set->nsingles) < __kCFSetBreakeven) {
162 for (i = 0; i < nel; i++) {
163 if (*tmp == theChar) return true;
164 ++tmp;
165 }
166 }
167 else { // this does a binary search
168 p = tmp; q = tmp + (nel-1);
169 while (p <= q) {
170 wari = (p + ((q-p)>>1));
171 if (theChar < *wari) q = wari - 1;
172 else if (theChar > *wari) p = wari + 1;
173 else return true;
174 }
175 }
176 }
177 tmp = (int *) &(set->array[0]) + set->nsingles;
178 if ((nel = set->nranges) < __kCFSetBreakeven) {
179 i = nel;
180 tmp2 = tmp+1;
181 while (i) {
182 if (theChar <= *tmp2) {
183 if (theChar >= *tmp) return true;
184 }
185 tmp += 2;
186 tmp2 = tmp+1;
187 --i;
188 }
189 } else { /* binary search the ranges */
190 p = tmp; q = tmp + (2*nel-2);
191 while (p <= q) {
192 i = (q - p) >> 1; /* >>1 means divide by 2 */
193 wari = p + (i & 0xFFFFFFFE); /* &fffffffe make it an even num */
194 if (theChar < *wari) q = wari - 2;
195 else if (theChar > *(wari + 1)) p = wari + 2;
196 else return true;
197 }
198 }
199 return false;
200 /* fall through & return zero */
201 }
202 return false; /* not a member */
203 }
204
205 /* Take a private "set" structure and make a bitmap from it. Return the bitmap. THE CALLER MUST RELEASE THE RETURNED MEMORY as necessary.
206 */
207
208 CF_INLINE void __CFCSetBitmapProcessManyCharacters(unsigned char *map, unsigned n, unsigned m) {
209 unsigned tmp;
210 for (tmp = n; tmp <= m; tmp++) CFUniCharAddCharacterToBitmap(tmp, map);
211 }
212
213 CF_INLINE void __CFCSetMakeSetBitmapFromSet(const CFCharSetPrivateStruct *theSet, uint8_t *map)
214 {
215 int *ip;
216 UTF16Char ctmp;
217 int cnt;
218
219 for (cnt = 0; cnt < theSet->nsingles; cnt++) {
220 ctmp = theSet->array[cnt];
221 CFUniCharAddCharacterToBitmap(tmp, map);
222 }
223 ip = (int *) (&(theSet->array[0]) + theSet->nsingles);
224 cnt = theSet->nranges;
225 while (cnt) {
226 /* This could be more efficient: turn on whole bytes at a time
227 when there are such cases as 8 characters in a row... */
228 __CFCSetBitmapProcessManyCharacters((unsigned char *)map, *ip, *(ip+1));
229 ip += 2;
230 --cnt;
231 }
232 }
233
234 extern const CFCharSetPrivateStruct *_CFdecimalDigitCharacterSetData;
235 extern const CFCharSetPrivateStruct *_CFletterCharacterSetData;
236 extern const CFCharSetPrivateStruct *_CFlowercaseLetterCharacterSetData;
237 extern const CFCharSetPrivateStruct *_CFuppercaseLetterCharacterSetData;
238 extern const CFCharSetPrivateStruct *_CFnonBaseCharacterSetData;
239 extern const CFCharSetPrivateStruct *_CFdecomposableCharacterSetData;
240 extern const CFCharSetPrivateStruct *_CFpunctuationCharacterSetData;
241 extern const CFCharSetPrivateStruct *_CFalphanumericCharacterSetData;
242 extern const CFCharSetPrivateStruct *_CFillegalCharacterSetData;
243 extern const CFCharSetPrivateStruct *_CFhasNonSelfLowercaseMappingData;
244 extern const CFCharSetPrivateStruct *_CFhasNonSelfUppercaseMappingData;
245 extern const CFCharSetPrivateStruct *_CFhasNonSelfTitlecaseMappingData;
246
247 #else __MACOS8__
248 typedef struct {
249 uint32_t _numPlanes;
250 const uint8_t **_planes;
251 } __CFUniCharBitmapData;
252
253 static char __CFUniCharUnicodeVersionString[8] = {0, 0, 0, 0, 0, 0, 0, 0};
254
255 static uint32_t __CFUniCharNumberOfBitmaps = 0;
256 static __CFUniCharBitmapData *__CFUniCharBitmapDataArray = NULL;
257
258 static CFSpinLock_t __CFUniCharBitmapLock = 0;
259
260 #ifndef CF_UNICHAR_BITMAP_FILE
261 #define CF_UNICHAR_BITMAP_FILE "CFCharacterSetBitmaps.bitmap"
262 #endif CF_UNICHAR_BITMAP_FILE
263
264 static bool __CFUniCharLoadBitmapData(void) {
265 uint32_t headerSize;
266 uint32_t bitmapSize;
267 int numPlanes;
268 uint8_t currentPlane;
269 const void *bytes;
270 const void *bitmapBase;
271 const void *bitmap;
272 int idx, bitmapIndex;
273
274 __CFSpinLock(&__CFUniCharBitmapLock);
275
276 if (__CFUniCharBitmapDataArray || !__CFUniCharLoadFile(CF_UNICHAR_BITMAP_FILE, &bytes)) {
277 __CFSpinUnlock(&__CFUniCharBitmapLock);
278 return false;
279 }
280
281 for (idx = 0;idx < 4 && ((const uint8_t *)bytes)[idx];idx++) {
282 __CFUniCharUnicodeVersionString[idx * 2] = ((const uint8_t *)bytes)[idx];
283 __CFUniCharUnicodeVersionString[idx * 2 + 1] = '.';
284 }
285 __CFUniCharUnicodeVersionString[(idx < 4 ? idx * 2 - 1 : 7)] = '\0';
286
287 headerSize = CFSwapInt32BigToHost(*((uint32_t *)((char *)bytes + 4)));
288
289 bitmapBase = (char *)bytes + headerSize;
290 (char *)bytes += (sizeof(uint32_t) * 2);
291 headerSize -= (sizeof(uint32_t) * 2);
292
293 __CFUniCharNumberOfBitmaps = headerSize / (sizeof(uint32_t) * 2);
294
295 __CFUniCharBitmapDataArray = (__CFUniCharBitmapData *)CFAllocatorAllocate(NULL, sizeof(__CFUniCharBitmapData) * __CFUniCharNumberOfBitmaps, 0);
296
297 for (idx = 0;idx < (int)__CFUniCharNumberOfBitmaps;idx++) {
298 bitmap = (char *)bitmapBase + CFSwapInt32BigToHost(*(((uint32_t *)bytes)++));
299 bitmapSize = CFSwapInt32BigToHost(*(((uint32_t *)bytes)++));
300
301 numPlanes = bitmapSize / (8 * 1024);
302 numPlanes = *(const uint8_t *)((char *)bitmap + (((numPlanes - 1) * ((8 * 1024) + 1)) - 1)) + 1;
303 __CFUniCharBitmapDataArray[idx]._planes = (const uint8_t **)CFAllocatorAllocate(NULL, sizeof(const void *) * numPlanes, NULL);
304 __CFUniCharBitmapDataArray[idx]._numPlanes = numPlanes;
305
306 currentPlane = 0;
307 for (bitmapIndex = 0;bitmapIndex < numPlanes;bitmapIndex++) {
308 if (bitmapIndex == currentPlane) {
309 __CFUniCharBitmapDataArray[idx]._planes[bitmapIndex] = bitmap;
310 (char *)bitmap += (8 * 1024);
311 currentPlane = *(((const uint8_t *)bitmap)++);
312 } else {
313 __CFUniCharBitmapDataArray[idx]._planes[bitmapIndex] = NULL;
314 }
315 }
316 }
317
318 __CFSpinUnlock(&__CFUniCharBitmapLock);
319
320 return true;
321 }
322
323 __private_extern__ const char *__CFUniCharGetUnicodeVersionString(void) {
324 if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
325 return __CFUniCharUnicodeVersionString;
326 }
327
328 #endif __MACOS8__
329
330 #define CONTROLSET_HAS_FORMATTER 1
331
332 bool CFUniCharIsMemberOf(UTF32Char theChar, uint32_t charset) {
333 #if CONTROLSET_HAS_FORMATTER
334 if (charset == kCFUniCharControlCharacterSet) charset = kCFUniCharControlAndFormatterCharacterSet;
335 #endif CONTROLSET_HAS_FORMATTER
336
337 switch (charset) {
338 case kCFUniCharControlCharacterSet:
339 return isControl(theChar, charset, NULL);
340
341 case kCFUniCharWhitespaceCharacterSet:
342 return isWhitespace(theChar, charset, NULL);
343
344 case kCFUniCharWhitespaceAndNewlineCharacterSet:
345 return isWhitespaceAndNewLine(theChar, charset, NULL);
346
347 #if defined(__MACOS8__)
348 case kCFUniCharDecimalDigitCharacterSet:
349 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct *)&_CFdecimalDigitCharacterSetData, theChar);
350 case kCFUniCharLetterCharacterSet:
351 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct *)&_CFletterCharacterSetData, theChar);
352 case kCFUniCharLowercaseLetterCharacterSet:
353 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct *)&_CFlowercaseLetterCharacterSetData, theChar);
354 case kCFUniCharUppercaseLetterCharacterSet:
355 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct *)&_CFuppercaseLetterCharacterSetData, theChar);
356 case kCFUniCharNonBaseCharacterSet:
357 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct *)&_CFnonBaseCharacterSetData, theChar);
358 case kCFUniCharAlphaNumericCharacterSet:
359 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct *)&_CFalphanumericCharacterSetData, theChar);
360 case kCFUniCharDecomposableCharacterSet:
361 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct *)&_CFdecomposableCharacterSetData, theChar);
362 case kCFUniCharPunctuationCharacterSet:
363 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct *)&_CFpunctuationCharacterSetData, theChar);
364 case kCFUniCharIllegalCharacterSet:
365 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct *)&_CFillegalCharacterSetData, theChar);
366 case kCFUniCharHasNonSelfLowercaseMapping:
367 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct *)&_CFhasNonSelfLowercaseMappingData, theChar);
368 case kCFUniCharHasNonSelfUppercaseMapping:
369 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct *)&_CFhasNonSelfUppercaseMappingData, theChar);
370 case kCFUniCharHasNonSelfTitlecaseMapping:
371 return __CFCSetIsMemberSet((const CFCharSetPrivateStruct *)&_CFhasNonSelfTitlecaseMappingData, theChar);
372 default:
373 return false;
374 #else
375 default:
376 if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
377
378 if ((charset - kCFUniCharDecimalDigitCharacterSet) < __CFUniCharNumberOfBitmaps) {
379 __CFUniCharBitmapData *data = __CFUniCharBitmapDataArray + (charset - kCFUniCharDecimalDigitCharacterSet);
380 uint8_t planeNo = (theChar >> 16) & 0xFF;
381
382 // The bitmap data for kCFUniCharIllegalCharacterSet is actually LEGAL set less Plane 14 ~ 16
383 if (charset == kCFUniCharIllegalCharacterSet) {
384 if (planeNo == 0x0E) { // Plane 14
385 theChar &= 0xFF;
386 return (((theChar == 0x01) || ((theChar > 0x1F) && (theChar < 0x80))) ? false : true);
387 } else if (planeNo == 0x0F || planeNo == 0x10) { // Plane 15 & 16
388 return ((theChar & 0xFF) > 0xFFFD ? true : false);
389 } else {
390 return (planeNo < data->_numPlanes && data->_planes[planeNo] ? !CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) : true);
391 }
392 } else if (charset == kCFUniCharControlAndFormatterCharacterSet) {
393 if (planeNo == 0x0E) { // Plane 14
394 theChar &= 0xFF;
395 return (((theChar == 0x01) || ((theChar > 0x1F) && (theChar < 0x80))) ? true : false);
396 } else {
397 return (planeNo < data->_numPlanes && data->_planes[planeNo] ? CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) : false);
398 }
399 } else {
400 return (planeNo < data->_numPlanes && data->_planes[planeNo] ? CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) : false);
401 }
402 }
403 return false;
404 #endif
405 }
406 }
407
408 const uint8_t *CFUniCharGetBitmapPtrForPlane(uint32_t charset, uint32_t plane) {
409 if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
410
411 #if CONTROLSET_HAS_FORMATTER
412 if (charset == kCFUniCharControlCharacterSet) charset = kCFUniCharControlAndFormatterCharacterSet;
413 #endif CONTROLSET_HAS_FORMATTER
414
415 if (charset > kCFUniCharWhitespaceAndNewlineCharacterSet && (charset - kCFUniCharDecimalDigitCharacterSet) < __CFUniCharNumberOfBitmaps && charset != kCFUniCharIllegalCharacterSet) {
416 __CFUniCharBitmapData *data = __CFUniCharBitmapDataArray + (charset - kCFUniCharDecimalDigitCharacterSet);
417
418 return (plane < data->_numPlanes ? data->_planes[plane] : NULL);
419 }
420 return NULL;
421 }
422
423 __private_extern__ uint8_t CFUniCharGetBitmapForPlane(uint32_t charset, uint32_t plane, void *bitmap, bool isInverted) {
424 const uint8_t *src = CFUniCharGetBitmapPtrForPlane(charset, plane);
425 int numBytes = (8 * 1024);
426
427 if (src) {
428 if (isInverted) {
429 while (numBytes-- > 0) *(((uint8_t *)bitmap)++) = ~(*(src++));
430 } else {
431 while (numBytes-- > 0) *(((uint8_t *)bitmap)++) = *(src++);
432 }
433 return kCFUniCharBitmapFilled;
434 } else if (charset == kCFUniCharIllegalCharacterSet) {
435 __CFUniCharBitmapData *data = __CFUniCharBitmapDataArray + (charset - kCFUniCharDecimalDigitCharacterSet);
436
437 if (plane < data->_numPlanes && (src = data->_planes[plane])) {
438 if (isInverted) {
439 while (numBytes-- > 0) *(((uint8_t *)bitmap)++) = *(src++);
440 } else {
441 while (numBytes-- > 0) *(((uint8_t *)bitmap)++) = ~(*(src++));
442 }
443 return kCFUniCharBitmapFilled;
444 } else if (plane == 0x0E) { // Plane 14
445 int idx;
446 uint8_t asciiRange = (isInverted ? (uint8_t)0xFF : (uint8_t)0);
447 uint8_t otherRange = (isInverted ? (uint8_t)0 : (uint8_t)0xFF);
448
449 *(((uint8_t *)bitmap)++) = 0x02; // UE0001 LANGUAGE TAG
450 for (idx = 1;idx < numBytes;idx++) {
451 *(((uint8_t *)bitmap)++) = ((idx >= (0x20 / 8) && (idx < (0x80 / 8))) ? asciiRange : otherRange);
452 }
453 return kCFUniCharBitmapFilled;
454 } else if (plane == 0x0F || plane == 0x10) { // Plane 15 & 16
455 uint32_t value = (isInverted ? 0xFFFFFFFF : 0);
456 numBytes /= 4; // for 32bit
457
458 while (numBytes-- > 0) *(((uint32_t *)bitmap)++) = value;
459 *(((uint8_t *)bitmap) - 5) = (isInverted ? 0x3F : 0xC0); // 0xFFFE & 0xFFFF
460 return kCFUniCharBitmapFilled;
461 }
462 return (isInverted ? kCFUniCharBitmapEmpty : kCFUniCharBitmapAll);
463 #if CONTROLSET_HAS_FORMATTER
464 } else if ((charset == kCFUniCharControlCharacterSet) && (plane == 0x0E)) { // Language tags
465 int idx;
466 uint8_t asciiRange = (isInverted ? (uint8_t)0 : (uint8_t)0xFF);
467 uint8_t otherRange = (isInverted ? (uint8_t)0xFF : (uint8_t)0);
468
469 *(((uint8_t *)bitmap)++) = 0x02; // UE0001 LANGUAGE TAG
470 for (idx = 1;idx < numBytes;idx++) {
471 *(((uint8_t *)bitmap)++) = ((idx >= (0x20 / 8) && (idx < (0x80 / 8))) ? asciiRange : otherRange);
472 }
473 return kCFUniCharBitmapFilled;
474 #endif CONTROLSET_HAS_FORMATTER
475 } else if (charset < kCFUniCharDecimalDigitCharacterSet) {
476 if (plane) return (isInverted ? kCFUniCharBitmapAll : kCFUniCharBitmapEmpty);
477
478 if (charset == kCFUniCharControlCharacterSet) {
479 int idx;
480 uint8_t nonFillValue = (isInverted ? (uint8_t)0xFF : (uint8_t)0);
481 uint8_t fillValue = (isInverted ? (uint8_t)0 : (uint8_t)0xFF);
482 uint8_t *bitmapP = (uint8_t *)bitmap;
483
484 for (idx = 0;idx < numBytes;idx++) {
485 *(bitmapP++) = (idx < (0x20 / 8) || (idx >= (0x80 / 8) && idx < (0xA0 / 8)) ? fillValue : nonFillValue);
486 }
487
488 // DEL
489 if (isInverted) {
490 CFUniCharRemoveCharacterFromBitmap(0x007F, bitmap);
491 } else {
492 CFUniCharAddCharacterToBitmap(0x007F, bitmap);
493 }
494 } else {
495 uint8_t *bitmapBase = (uint8_t *)bitmap;
496 int idx;
497 uint8_t nonFillValue = (isInverted ? (uint8_t)0xFF : (uint8_t)0);
498
499 while (numBytes-- > 0) *(((uint8_t *)bitmap)++) = nonFillValue;
500
501 if (charset == kCFUniCharWhitespaceAndNewlineCharacterSet) {
502 static const UniChar newlines[] = {0x000A, 0x000B, 0x000C, 0x000D, 0x0085, 0x2028, 0x2029};
503
504 for (idx = 0;idx < (int)(sizeof(newlines) / sizeof(*newlines)); idx++) {
505 if (isInverted) {
506 CFUniCharRemoveCharacterFromBitmap(newlines[idx], bitmapBase);
507 } else {
508 CFUniCharAddCharacterToBitmap(newlines[idx], bitmapBase);
509 }
510 }
511 }
512
513 if (isInverted) {
514 CFUniCharRemoveCharacterFromBitmap(0x0009, bitmapBase);
515 CFUniCharRemoveCharacterFromBitmap(0x0020, bitmapBase);
516 CFUniCharRemoveCharacterFromBitmap(0x00A0, bitmapBase);
517 CFUniCharRemoveCharacterFromBitmap(0x1680, bitmapBase);
518 CFUniCharRemoveCharacterFromBitmap(0x202F, bitmapBase);
519 CFUniCharRemoveCharacterFromBitmap(0x205F, bitmapBase);
520 CFUniCharRemoveCharacterFromBitmap(0x3000, bitmapBase);
521 } else {
522 CFUniCharAddCharacterToBitmap(0x0009, bitmapBase);
523 CFUniCharAddCharacterToBitmap(0x0020, bitmapBase);
524 CFUniCharAddCharacterToBitmap(0x00A0, bitmapBase);
525 CFUniCharAddCharacterToBitmap(0x1680, bitmapBase);
526 CFUniCharAddCharacterToBitmap(0x202F, bitmapBase);
527 CFUniCharAddCharacterToBitmap(0x205F, bitmapBase);
528 CFUniCharAddCharacterToBitmap(0x3000, bitmapBase);
529 }
530
531 for (idx = 0x2000;idx <= 0x200B;idx++) {
532 if (isInverted) {
533 CFUniCharRemoveCharacterFromBitmap(idx, bitmapBase);
534 } else {
535 CFUniCharAddCharacterToBitmap(idx, bitmapBase);
536 }
537 }
538 }
539 return kCFUniCharBitmapFilled;
540 }
541 return (isInverted ? kCFUniCharBitmapAll : kCFUniCharBitmapEmpty);
542 }
543
544 __private_extern__ uint32_t CFUniCharGetNumberOfPlanes(uint32_t charset) {
545 #if defined(__MACOS8__)
546 return 1;
547 #else __MACOS8__
548 #if CONTROLSET_HAS_FORMATTER
549 if (charset == kCFUniCharControlCharacterSet) return 15; // 0 to 14
550 #endif CONTROLSET_HAS_FORMATTER
551
552 if (charset < kCFUniCharDecimalDigitCharacterSet) {
553 return 1;
554 } else if (charset == kCFUniCharIllegalCharacterSet) {
555 return 17;
556 } else {
557 uint32_t numPlanes;
558
559 if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
560
561 numPlanes = __CFUniCharBitmapDataArray[charset - kCFUniCharDecimalDigitCharacterSet]._numPlanes;
562
563 return numPlanes;
564 }
565 #endif __MACOS8__
566 }
567
568 // Mapping data loading
569 static const void **__CFUniCharMappingTables = NULL;
570
571 static CFSpinLock_t __CFUniCharMappingTableLock = 0;
572
573 #if defined(__BIG_ENDIAN__)
574 #define MAPPING_TABLE_FILE "CFUnicodeData-B.mapping"
575 #else __BIG_ENDIAN__
576 #define MAPPING_TABLE_FILE "CFUnicodeData-L.mapping"
577 #endif __BIG_ENDIAN__
578
579 __private_extern__ const void *CFUniCharGetMappingData(uint32_t type) {
580
581 __CFSpinLock(&__CFUniCharMappingTableLock);
582
583 if (NULL == __CFUniCharMappingTables) {
584 const void *bytes;
585 const void *bodyBase;
586 int headerSize;
587 int idx, count;
588
589 if (!__CFUniCharLoadFile(MAPPING_TABLE_FILE, &bytes)) {
590 __CFSpinUnlock(&__CFUniCharMappingTableLock);
591 return NULL;
592 }
593
594 (char *)bytes += 4; // Skip Unicode version
595 headerSize = *(((uint32_t *)bytes)++);
596 headerSize -= (sizeof(uint32_t) * 2);
597 bodyBase = (char *)bytes + headerSize;
598
599 count = headerSize / sizeof(uint32_t);
600
601 __CFUniCharMappingTables = (const void **)CFAllocatorAllocate(NULL, sizeof(const void *) * count, 0);
602
603 for (idx = 0;idx < count;idx++) {
604 __CFUniCharMappingTables[idx] = (char *)bodyBase + *(((uint32_t *)bytes)++);
605 }
606 }
607
608 __CFSpinUnlock(&__CFUniCharMappingTableLock);
609
610 return __CFUniCharMappingTables[type];
611 }
612
613 // Case mapping functions
614 #define DO_SPECIAL_CASE_MAPPING 1
615
616 static uint32_t *__CFUniCharCaseMappingTableCounts = NULL;
617 static uint32_t **__CFUniCharCaseMappingTable = NULL;
618 static const uint32_t **__CFUniCharCaseMappingExtraTable = NULL;
619
620 typedef struct {
621 uint32_t _key;
622 uint32_t _value;
623 } __CFUniCharCaseMappings;
624
625 /* Binary searches CFStringEncodingUnicodeTo8BitCharMap */
626 static uint32_t __CFUniCharGetMappedCase(const __CFUniCharCaseMappings *theTable, uint32_t numElem, UTF32Char character) {
627 const __CFUniCharCaseMappings *p, *q, *divider;
628
629 if ((character < theTable[0]._key) || (character > theTable[numElem-1]._key)) {
630 return 0;
631 }
632 p = theTable;
633 q = p + (numElem-1);
634 while (p <= q) {
635 divider = p + ((q - p) >> 1); /* divide by 2 */
636 if (character < divider->_key) { q = divider - 1; }
637 else if (character > divider->_key) { p = divider + 1; }
638 else { return divider->_value; }
639 }
640 return 0;
641 }
642
643 #define NUM_CASE_MAP_DATA (kCFUniCharCaseFold + 1)
644
645 static bool __CFUniCharLoadCaseMappingTable(void) {
646 int idx;
647
648 if (NULL == __CFUniCharMappingTables) (void)CFUniCharGetMappingData(kCFUniCharToLowercase);
649 if (NULL == __CFUniCharMappingTables) return false;
650
651 __CFSpinLock(&__CFUniCharMappingTableLock);
652
653 if (__CFUniCharCaseMappingTableCounts) {
654 __CFSpinUnlock(&__CFUniCharMappingTableLock);
655 return true;
656 }
657
658 __CFUniCharCaseMappingTableCounts = (uint32_t *)CFAllocatorAllocate(NULL, sizeof(uint32_t) * NUM_CASE_MAP_DATA + sizeof(uint32_t *) * NUM_CASE_MAP_DATA * 2, 0);
659 __CFUniCharCaseMappingTable = (uint32_t **)((char *)__CFUniCharCaseMappingTableCounts + sizeof(uint32_t) * NUM_CASE_MAP_DATA);
660 __CFUniCharCaseMappingExtraTable = (const uint32_t **)__CFUniCharCaseMappingTable + NUM_CASE_MAP_DATA;
661
662 for (idx = 0;idx < NUM_CASE_MAP_DATA;idx++) {
663 __CFUniCharCaseMappingTableCounts[idx] = *((uint32_t *)__CFUniCharMappingTables[idx]) / (sizeof(uint32_t) * 2);
664 __CFUniCharCaseMappingTable[idx] = ((uint32_t *)__CFUniCharMappingTables[idx]) + 1;
665 __CFUniCharCaseMappingExtraTable[idx] = (const uint32_t *)((char *)__CFUniCharCaseMappingTable[idx] + *((uint32_t *)__CFUniCharMappingTables[idx]));
666 }
667
668 __CFSpinUnlock(&__CFUniCharMappingTableLock);
669 return true;
670 }
671
672 #if __BIG_ENDIAN__
673 #define TURKISH_LANG_CODE (0x7472) // tr
674 #define LITHUANIAN_LANG_CODE (0x6C74) // lt
675 #define AZERI_LANG_CODE (0x617A) // az
676 #else __BIG_ENDIAN__
677 #define TURKISH_LANG_CODE (0x7274) // tr
678 #define LITHUANIAN_LANG_CODE (0x746C) // lt
679 #define AZERI_LANG_CODE (0x7A61) // az
680 #endif __BIG_ENDIAN__
681
682 uint32_t CFUniCharMapCaseTo(UTF32Char theChar, UTF16Char *convertedChar, uint32_t maxLength, uint32_t ctype, uint32_t flags, const uint8_t *langCode) {
683 __CFUniCharBitmapData *data;
684 uint8_t planeNo = (theChar >> 16) & 0xFF;
685
686 caseFoldRetry:
687
688 #if DO_SPECIAL_CASE_MAPPING
689 if (flags & kCFUniCharCaseMapFinalSigma) {
690 if (theChar == 0x03A3) { // Final sigma
691 *convertedChar = (ctype == kCFUniCharToLowercase ? 0x03C2 : 0x03A3);
692 return 1;
693 }
694 }
695
696 if (langCode) {
697 switch (*(uint16_t *)langCode) {
698 case LITHUANIAN_LANG_CODE:
699 if (theChar == 0x0307 && (flags & kCFUniCharCaseMapAfter_i)) {
700 return 0;
701 } else if (ctype == kCFUniCharToLowercase) {
702 if (flags & kCFUniCharCaseMapMoreAbove) {
703 switch (theChar) {
704 case 0x0049: // LATIN CAPITAL LETTER I
705 *(convertedChar++) = 0x0069;
706 *(convertedChar++) = 0x0307;
707 return 2;
708
709 case 0x004A: // LATIN CAPITAL LETTER J
710 *(convertedChar++) = 0x006A;
711 *(convertedChar++) = 0x0307;
712 return 2;
713
714 case 0x012E: // LATIN CAPITAL LETTER I WITH OGONEK
715 *(convertedChar++) = 0x012F;
716 *(convertedChar++) = 0x0307;
717 return 2;
718
719 default: break;
720 }
721 }
722 switch (theChar) {
723 case 0x00CC: // LATIN CAPITAL LETTER I WITH GRAVE
724 *(convertedChar++) = 0x0069;
725 *(convertedChar++) = 0x0307;
726 *(convertedChar++) = 0x0300;
727 return 3;
728
729 case 0x00CD: // LATIN CAPITAL LETTER I WITH ACUTE
730 *(convertedChar++) = 0x0069;
731 *(convertedChar++) = 0x0307;
732 *(convertedChar++) = 0x0301;
733 return 3;
734
735 case 0x0128: // LATIN CAPITAL LETTER I WITH TILDE
736 *(convertedChar++) = 0x0069;
737 *(convertedChar++) = 0x0307;
738 *(convertedChar++) = 0x0303;
739 return 3;
740
741 default: break;
742 }
743 }
744 break;
745
746 case TURKISH_LANG_CODE:
747 case AZERI_LANG_CODE:
748 if (theChar == 0x0049) { // LATIN CAPITAL LETTER I
749 *convertedChar = (ctype == kCFUniCharToLowercase ? ((kCFUniCharCaseMapMoreAbove & flags) ? 0x0069 : 0x0131) : 0x0049);
750 return 1;
751 } else if ((theChar == 0x0069) || (theChar == 0x0130)) { // LATIN SMALL LETTER I & LATIN CAPITAL LETTER I WITH DOT ABOVE
752 *convertedChar = (ctype == kCFUniCharToLowercase ? 0x0069 : 0x0130);
753 return 1;
754 } else if (theChar == 0x0307 && (kCFUniCharCaseMapAfter_i & flags)) { // COMBINING DOT ABOVE AFTER_i
755 if (ctype == kCFUniCharToLowercase) {
756 return 0;
757 } else {
758 *convertedChar = 0x0307;
759 return 1;
760 }
761 }
762 break;
763
764 default: break;
765 }
766 }
767 #endif DO_SPECIAL_CASE_MAPPING
768
769 if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
770
771 data = __CFUniCharBitmapDataArray + ((ctype + kCFUniCharHasNonSelfLowercaseCharacterSet) - kCFUniCharDecimalDigitCharacterSet);
772
773 if (planeNo < data->_numPlanes && data->_planes[planeNo] && CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) && (__CFUniCharCaseMappingTableCounts || __CFUniCharLoadCaseMappingTable())) {
774 uint32_t value = __CFUniCharGetMappedCase((const __CFUniCharCaseMappings *)__CFUniCharCaseMappingTable[ctype], __CFUniCharCaseMappingTableCounts[ctype], theChar);
775
776 if (!value && ctype == kCFUniCharToTitlecase) {
777 value = __CFUniCharGetMappedCase((const __CFUniCharCaseMappings *)__CFUniCharCaseMappingTable[kCFUniCharToUppercase], __CFUniCharCaseMappingTableCounts[kCFUniCharToUppercase], theChar);
778 if (value) ctype = kCFUniCharToUppercase;
779 }
780
781 if (value) {
782 int count = CFUniCharConvertFlagToCount(value);
783
784 if (count == 1) {
785 if (value & kCFUniCharNonBmpFlag) {
786 if (maxLength > 1) {
787 value = (value & 0xFFFFFF) - 0x10000;
788 *(convertedChar++) = (value >> 10) + 0xD800UL;
789 *(convertedChar++) = (value & 0x3FF) + 0xDC00UL;
790 return 2;
791 }
792 } else {
793 *convertedChar = (UTF16Char)value;
794 return 1;
795 }
796 } else if (count < (int)maxLength) {
797 const uint32_t *extraMapping = __CFUniCharCaseMappingExtraTable[ctype] + (value & 0xFFFFFF);
798
799 if (value & kCFUniCharNonBmpFlag) {
800 int copiedLen = 0;
801
802 while (count-- > 0) {
803 value = *(extraMapping++);
804 if (value > 0xFFFF) {
805 if (copiedLen + 2 >= (int)maxLength) break;
806 value = (value & 0xFFFFFF) - 0x10000;
807 convertedChar[copiedLen++] = (value >> 10) + 0xD800UL;
808 convertedChar[copiedLen++] = (value & 0x3FF) + 0xDC00UL;
809 } else {
810 if (copiedLen + 1 >= (int)maxLength) break;
811 convertedChar[copiedLen++] = value;
812 }
813 }
814 if (!count) return copiedLen;
815 } else {
816 int idx;
817
818 for (idx = 0;idx < count;idx++) *(convertedChar++) = (UTF16Char)*(extraMapping++);
819 return count;
820 }
821 }
822 }
823 } else if (ctype == kCFUniCharCaseFold) {
824 ctype = kCFUniCharToLowercase;
825 goto caseFoldRetry;
826 }
827
828 *convertedChar = theChar;
829 return 1;
830 }
831
832 UInt32 CFUniCharMapTo(UniChar theChar, UniChar *convertedChar, UInt32 maxLength, uint16_t ctype, UInt32 flags) {
833 if (ctype == kCFUniCharCaseFold + 1) { // kCFUniCharDecompose
834 if (CFUniCharIsDecomposableCharacter(theChar, false)) {
835 UTF32Char buffer[MAX_DECOMPOSED_LENGTH];
836 CFIndex usedLength = CFUniCharDecomposeCharacter(theChar, buffer, MAX_DECOMPOSED_LENGTH);
837 CFIndex idx;
838
839 for (idx = 0;idx < usedLength;idx++) *(convertedChar++) = buffer[idx];
840 return usedLength;
841 } else {
842 *convertedChar = theChar;
843 return 1;
844 }
845 } else {
846 return CFUniCharMapCaseTo(theChar, convertedChar, maxLength, ctype, flags, NULL);
847 }
848 }
849
850 CF_INLINE bool __CFUniCharIsMoreAbove(UTF16Char *buffer, uint32_t length) {
851 UTF32Char currentChar;
852 uint32_t property;
853
854 while (length-- > 0) {
855 currentChar = *(buffer)++;
856 if (CFUniCharIsSurrogateHighCharacter(currentChar) && (length > 0) && CFUniCharIsSurrogateLowCharacter(*(buffer + 1))) {
857 currentChar = CFUniCharGetLongCharacterForSurrogatePair(currentChar, *(buffer++));
858 --length;
859 }
860 if (!CFUniCharIsMemberOf(currentChar, kCFUniCharNonBaseCharacterSet)) break;
861
862 property = CFUniCharGetCombiningPropertyForCharacter(currentChar, CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16) & 0xFF));
863
864 if (property == 230) return true; // Above priority
865 }
866 return false;
867 }
868
869 CF_INLINE bool __CFUniCharIsAfter_i(UTF16Char *buffer, uint32_t length) {
870 UTF32Char currentChar = 0;
871 uint32_t property;
872 UTF32Char decomposed[MAX_DECOMPOSED_LENGTH];
873 uint32_t decompLength;
874 uint32_t idx;
875
876 if (length < 1) return 0;
877
878 buffer += length;
879 while (length-- > 1) {
880 currentChar = *(--buffer);
881 if (CFUniCharIsSurrogateLowCharacter(currentChar)) {
882 if ((length > 1) && CFUniCharIsSurrogateHighCharacter(*(buffer - 1))) {
883 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*(--buffer), currentChar);
884 --length;
885 } else {
886 break;
887 }
888 }
889 if (!CFUniCharIsMemberOf(currentChar, kCFUniCharNonBaseCharacterSet)) break;
890
891 property = CFUniCharGetCombiningPropertyForCharacter(currentChar, CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16) & 0xFF));
892
893 if (property == 230) return false; // Above priority
894 }
895 if (length == 0) {
896 currentChar = *(--buffer);
897 } else if (CFUniCharIsSurrogateLowCharacter(currentChar) && CFUniCharIsSurrogateHighCharacter(*(--buffer))) {
898 currentChar = CFUniCharGetLongCharacterForSurrogatePair(*buffer, currentChar);
899 }
900
901 decompLength = CFUniCharDecomposeCharacter(currentChar, decomposed, MAX_DECOMPOSED_LENGTH);
902 currentChar = *decomposed;
903
904
905 for (idx = 1;idx < decompLength;idx++) {
906 currentChar = decomposed[idx];
907 property = CFUniCharGetCombiningPropertyForCharacter(currentChar, CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16) & 0xFF));
908
909 if (property == 230) return false; // Above priority
910 }
911 return true;
912 }
913
914 __private_extern__ uint32_t CFUniCharGetConditionalCaseMappingFlags(UTF32Char theChar, UTF16Char *buffer, uint32_t currentIndex, uint32_t length, uint32_t type, const uint8_t *langCode, uint32_t lastFlags) {
915 if (theChar == 0x03A3) { // GREEK CAPITAL LETTER SIGMA
916 if ((type == kCFUniCharToLowercase) && (currentIndex > 0)) {
917 UTF16Char *start = buffer;
918 UTF16Char *end = buffer + length;
919 UTF32Char otherChar;
920
921 // First check if we're after a cased character
922 buffer += (currentIndex - 1);
923 while (start <= buffer) {
924 otherChar = *(buffer--);
925 if (CFUniCharIsSurrogateLowCharacter(otherChar) && (start <= buffer) && CFUniCharIsSurrogateHighCharacter(*buffer)) {
926 otherChar = CFUniCharGetLongCharacterForSurrogatePair(*(buffer--), otherChar);
927 }
928 if (!CFUniCharIsMemberOf(otherChar, kCFUniCharCaseIgnorableCharacterSet)) {
929 if (!CFUniCharIsMemberOf(otherChar, kCFUniCharUppercaseLetterCharacterSet) && !CFUniCharIsMemberOf(otherChar, kCFUniCharLowercaseLetterCharacterSet)) return 0; // Uppercase set contains titlecase
930 break;
931 }
932 }
933
934 // Next check if we're before a cased character
935 buffer = start + currentIndex + 1;
936 while (buffer < end) {
937 otherChar = *(buffer++);
938 if (CFUniCharIsSurrogateHighCharacter(otherChar) && (buffer < end) && CFUniCharIsSurrogateLowCharacter(*buffer)) {
939 otherChar = CFUniCharGetLongCharacterForSurrogatePair(otherChar, *(buffer++));
940 }
941 if (!CFUniCharIsMemberOf(otherChar, kCFUniCharCaseIgnorableCharacterSet)) {
942 if (CFUniCharIsMemberOf(otherChar, kCFUniCharUppercaseLetterCharacterSet) || CFUniCharIsMemberOf(otherChar, kCFUniCharLowercaseLetterCharacterSet)) return 0; // Uppercase set contains titlecase
943 break;
944 }
945 }
946 return kCFUniCharCaseMapFinalSigma;
947 }
948 } else if (langCode) {
949 if (*((const uint16_t *)langCode) == LITHUANIAN_LANG_CODE) {
950 if ((theChar == 0x0307) && ((kCFUniCharCaseMapAfter_i|kCFUniCharCaseMapMoreAbove) & lastFlags) == (kCFUniCharCaseMapAfter_i|kCFUniCharCaseMapMoreAbove)) {
951 return (__CFUniCharIsAfter_i(buffer, currentIndex) ? kCFUniCharCaseMapAfter_i : 0);
952 } else if (type == kCFUniCharToLowercase) {
953 if ((theChar == 0x0049) || (theChar == 0x004A) || (theChar == 0x012E)) {
954 return (__CFUniCharIsMoreAbove(buffer + (++currentIndex), length - currentIndex) ? kCFUniCharCaseMapMoreAbove : 0);
955 }
956 } else if ((theChar == 'i') || (theChar == 'j')) {
957 return (__CFUniCharIsMoreAbove(buffer + (++currentIndex), length - currentIndex) ? (kCFUniCharCaseMapAfter_i|kCFUniCharCaseMapMoreAbove) : 0);
958 }
959 } else if ((*((const uint16_t *)langCode) == TURKISH_LANG_CODE) || (*((const uint16_t *)langCode) == AZERI_LANG_CODE)) {
960 if (type == kCFUniCharToLowercase) {
961 if (theChar == 0x0307) {
962 return (kCFUniCharCaseMapMoreAbove & lastFlags ? kCFUniCharCaseMapAfter_i : 0);
963 } else if (theChar == 0x0049) {
964 return (((++currentIndex < length) && (buffer[currentIndex] == 0x0307)) ? kCFUniCharCaseMapMoreAbove : 0);
965 }
966 }
967 }
968 }
969 return 0;
970 }
971
972 // Unicode property database
973 static __CFUniCharBitmapData *__CFUniCharUnicodePropertyTable = NULL;
974
975 static CFSpinLock_t __CFUniCharPropTableLock = 0;
976
977 #define PROP_DB_FILE "CFUniCharPropertyDatabase.data"
978
979 const void *CFUniCharGetUnicodePropertyDataForPlane(uint32_t propertyType, uint32_t plane) {
980
981 __CFSpinLock(&__CFUniCharPropTableLock);
982
983 if (NULL == __CFUniCharUnicodePropertyTable) {
984 const void *bytes;
985 const void *bodyBase;
986 const void *planeBase;
987 int headerSize;
988 int idx, count;
989 int planeIndex, planeCount;
990 int planeSize;
991
992 if (!__CFUniCharLoadFile(PROP_DB_FILE, &bytes)) {
993 __CFSpinUnlock(&__CFUniCharPropTableLock);
994 return NULL;
995 }
996
997 (char *)bytes += 4; // Skip Unicode version
998 headerSize = CFSwapInt32BigToHost(*(((uint32_t *)bytes)++));
999 headerSize -= (sizeof(uint32_t) * 2);
1000 bodyBase = (char *)bytes + headerSize;
1001
1002 count = headerSize / sizeof(uint32_t);
1003
1004 __CFUniCharUnicodePropertyTable = (__CFUniCharBitmapData *)CFAllocatorAllocate(NULL, sizeof(__CFUniCharBitmapData) * count, 0);
1005
1006 for (idx = 0;idx < count;idx++) {
1007 planeCount = *((const uint8_t *)bodyBase);
1008 (char *)planeBase = (char *)bodyBase + planeCount + (planeCount % 4 ? 4 - (planeCount % 4) : 0);
1009 __CFUniCharUnicodePropertyTable[idx]._planes = (const uint8_t **)CFAllocatorAllocate(NULL, sizeof(const void *) * planeCount, 0);
1010
1011 for (planeIndex = 0;planeIndex < planeCount;planeIndex++) {
1012 if ((planeSize = ((const uint8_t *)bodyBase)[planeIndex + 1])) {
1013 __CFUniCharUnicodePropertyTable[idx]._planes[planeIndex] = planeBase;
1014 (char *)planeBase += (planeSize * 256);
1015 } else {
1016 __CFUniCharUnicodePropertyTable[idx]._planes[planeIndex] = NULL;
1017 }
1018 }
1019
1020 __CFUniCharUnicodePropertyTable[idx]._numPlanes = planeCount;
1021 (char *)bodyBase += (CFSwapInt32BigToHost(*(((uint32_t *)bytes)++)));
1022 }
1023 }
1024
1025 __CFSpinUnlock(&__CFUniCharPropTableLock);
1026
1027 return (plane < __CFUniCharUnicodePropertyTable[propertyType]._numPlanes ? __CFUniCharUnicodePropertyTable[propertyType]._planes[plane] : NULL);
1028 }
1029
1030 __private_extern__ uint32_t CFUniCharGetNumberOfPlanesForUnicodePropertyData(uint32_t propertyType) {
1031 (void)CFUniCharGetUnicodePropertyDataForPlane(propertyType, 0);
1032 return __CFUniCharUnicodePropertyTable[propertyType]._numPlanes;
1033 }
1034
1035 __private_extern__ uint32_t CFUniCharGetUnicodeProperty(UTF32Char character, uint32_t propertyType) {
1036 if (propertyType == kCFUniCharCombiningProperty) {
1037 return CFUniCharGetCombiningPropertyForCharacter(character, CFUniCharGetUnicodePropertyDataForPlane(propertyType, (character >> 16) & 0xFF));
1038 } else if (propertyType == kCFUniCharBidiProperty) {
1039 return CFUniCharGetBidiPropertyForCharacter(character, CFUniCharGetUnicodePropertyDataForPlane(propertyType, (character >> 16) & 0xFF));
1040 } else {
1041 return 0;
1042 }
1043 }
1044
1045
1046
1047 /*
1048 The UTF8 conversion in the following function is derived from ConvertUTF.c
1049 */
1050 /*
1051 * Copyright 2001 Unicode, Inc.
1052 *
1053 * Disclaimer
1054 *
1055 * This source code is provided as is by Unicode, Inc. No claims are
1056 * made as to fitness for any particular purpose. No warranties of any
1057 * kind are expressed or implied. The recipient agrees to determine
1058 * applicability of information provided. If this file has been
1059 * purchased on magnetic or optical media from Unicode, Inc., the
1060 * sole remedy for any claim will be exchange of defective media
1061 * within 90 days of receipt.
1062 *
1063 * Limitations on Rights to Redistribute This Code
1064 *
1065 * Unicode, Inc. hereby grants the right to freely use the information
1066 * supplied in this file in the creation of products supporting the
1067 * Unicode Standard, and to make copies of this file in any form
1068 * for internal or external distribution as long as this notice
1069 * remains attached.
1070 */
1071 #define UNI_REPLACEMENT_CHAR (0x0000FFFDUL)
1072
1073 bool CFUniCharFillDestinationBuffer(const UTF32Char *src, uint32_t srcLength, void **dst, uint32_t dstLength, uint32_t *filledLength, uint32_t dstFormat) {
1074 UTF32Char currentChar;
1075 uint32_t usedLength = *filledLength;
1076
1077 if (dstFormat == kCFUniCharUTF16Format) {
1078 UTF16Char *dstBuffer = (UTF16Char *)*dst;
1079
1080 while (srcLength-- > 0) {
1081 currentChar = *(src++);
1082
1083 if (currentChar > 0xFFFF) { // Non-BMP
1084 usedLength += 2;
1085 if (dstLength) {
1086 if (usedLength > dstLength) return false;
1087 currentChar -= 0x10000;
1088 *(dstBuffer++) = (UTF16Char)((currentChar >> 10) + 0xD800UL);
1089 *(dstBuffer++) = (UTF16Char)((currentChar & 0x3FF) + 0xDC00UL);
1090 }
1091 } else {
1092 ++usedLength;
1093 if (dstLength) {
1094 if (usedLength > dstLength) return false;
1095 *(dstBuffer++) = (UTF16Char)currentChar;
1096 }
1097 }
1098 }
1099
1100 *dst = dstBuffer;
1101 } else if (dstFormat == kCFUniCharUTF8Format) {
1102 uint8_t *dstBuffer = (uint8_t *)*dst;
1103 uint16_t bytesToWrite = 0;
1104 const UTF32Char byteMask = 0xBF;
1105 const UTF32Char byteMark = 0x80;
1106 static const uint8_t firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
1107
1108 while (srcLength-- > 0) {
1109 currentChar = *(src++);
1110
1111 /* Figure out how many bytes the result will require */
1112 if (currentChar < (UTF32Char)0x80) {
1113 bytesToWrite = 1;
1114 } else if (currentChar < (UTF32Char)0x800) {
1115 bytesToWrite = 2;
1116 } else if (currentChar < (UTF32Char)0x10000) {
1117 bytesToWrite = 3;
1118 } else if (currentChar < (UTF32Char)0x200000) {
1119 bytesToWrite = 4;
1120 } else {
1121 bytesToWrite = 2;
1122 currentChar = UNI_REPLACEMENT_CHAR;
1123 }
1124
1125 usedLength += bytesToWrite;
1126
1127 if (dstLength) {
1128 if (usedLength > dstLength) return false;
1129
1130 dstBuffer += bytesToWrite;
1131 switch (bytesToWrite) { /* note: everything falls through. */
1132 case 4: *--dstBuffer = (currentChar | byteMark) & byteMask; currentChar >>= 6;
1133 case 3: *--dstBuffer = (currentChar | byteMark) & byteMask; currentChar >>= 6;
1134 case 2: *--dstBuffer = (currentChar | byteMark) & byteMask; currentChar >>= 6;
1135 case 1: *--dstBuffer = currentChar | firstByteMark[bytesToWrite];
1136 }
1137 dstBuffer += bytesToWrite;
1138 }
1139 }
1140
1141 *dst = dstBuffer;
1142 } else {
1143 UTF32Char *dstBuffer = (UTF32Char *)*dst;
1144
1145 while (srcLength-- > 0) {
1146 currentChar = *(src++);
1147
1148 ++usedLength;
1149 if (dstLength) {
1150 if (usedLength > dstLength) return false;
1151 *(dstBuffer++) = currentChar;
1152 }
1153 }
1154
1155 *dst = dstBuffer;
1156 }
1157
1158 *filledLength = usedLength;
1159
1160 return true;
1161 }