]>
Commit | Line | Data |
---|---|---|
9ce05555 | 1 | /* |
8ca704e1 | 2 | * Copyright (c) 2011 Apple Inc. All rights reserved. |
9ce05555 A |
3 | * |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
9ce05555 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. Please obtain a copy of the License at | |
10 | * http://www.opensource.apple.com/apsl/ and read it before using this | |
11 | * file. | |
12 | * | |
13 | * The Original Code and all software distributed under the License are | |
14 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
15 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
16 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
18 | * Please see the License for the specific language governing rights and | |
19 | * limitations under the License. | |
20 | * | |
21 | * @APPLE_LICENSE_HEADER_END@ | |
22 | */ | |
f64f9b69 | 23 | |
9ce05555 | 24 | /* CFUniChar.c |
8ca704e1 | 25 | Copyright (c) 2001-2011, Apple Inc. All rights reserved. |
9ce05555 A |
26 | Responsibility: Aki Inoue |
27 | */ | |
28 | ||
29 | #include <CoreFoundation/CFByteOrder.h> | |
30 | #include "CFInternal.h" | |
31 | #include "CFUniChar.h" | |
32 | #include "CFStringEncodingConverterExt.h" | |
33 | #include "CFUnicodeDecomposition.h" | |
34 | #include "CFUniCharPriv.h" | |
cf7d2af9 | 35 | #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD |
9ce05555 A |
36 | #include <fcntl.h> |
37 | #include <sys/types.h> | |
38 | #include <sys/stat.h> | |
39 | #include <sys/param.h> | |
40 | #include <sys/mman.h> | |
41 | #include <unistd.h> | |
42 | #include <stdlib.h> | |
43 | #endif | |
cf7d2af9 | 44 | #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED |
bd5b749c A |
45 | #include <mach/mach.h> |
46 | #endif | |
9ce05555 | 47 | |
cf7d2af9 A |
48 | #if DEPLOYMENT_TARGET_WINDOWS |
49 | extern void _CFGetFrameworkPath(wchar_t *path, int maxLength); | |
50 | #endif | |
51 | ||
52 | #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED | |
bd5b749c A |
53 | #define __kCFCharacterSetDir "/System/Library/CoreServices" |
54 | #elif DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD | |
55 | #define __kCFCharacterSetDir "/usr/local/share/CoreFoundation" | |
cf7d2af9 | 56 | #elif DEPLOYMENT_TARGET_WINDOWS |
bd5b749c | 57 | #define __kCFCharacterSetDir "\\Windows\\CoreFoundation" |
9ce05555 A |
58 | #endif |
59 | ||
cf7d2af9 | 60 | #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED |
bd5b749c | 61 | #define USE_MACHO_SEGMENT 1 |
cf7d2af9 | 62 | #endif |
bd5b749c A |
63 | |
64 | enum { | |
65 | kCFUniCharLastExternalSet = kCFUniCharNewlineCharacterSet, | |
66 | kCFUniCharFirstInternalSet = kCFUniCharCompatibilityDecomposableCharacterSet, | |
67 | kCFUniCharLastInternalSet = kCFUniCharGraphemeExtendCharacterSet, | |
68 | kCFUniCharFirstBitmapSet = kCFUniCharDecimalDigitCharacterSet | |
69 | }; | |
70 | ||
71 | CF_INLINE uint32_t __CFUniCharMapExternalSetToInternalIndex(uint32_t cset) { return ((kCFUniCharFirstInternalSet <= cset) ? ((cset - kCFUniCharFirstInternalSet) + kCFUniCharLastExternalSet) : cset) - kCFUniCharFirstBitmapSet; } | |
72 | CF_INLINE uint32_t __CFUniCharMapCompatibilitySetID(uint32_t cset) { return ((cset == kCFUniCharControlCharacterSet) ? kCFUniCharControlAndFormatterCharacterSet : (((cset > kCFUniCharLastExternalSet) && (cset < kCFUniCharFirstInternalSet)) ? ((cset - kCFUniCharLastExternalSet) + kCFUniCharFirstInternalSet) : cset)); } | |
73 | ||
cf7d2af9 | 74 | #if (DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED) && USE_MACHO_SEGMENT |
bd5b749c A |
75 | #include <mach-o/getsect.h> |
76 | #include <mach-o/dyld.h> | |
77 | #include <mach-o/ldsyms.h> | |
78 | ||
79 | static const void *__CFGetSectDataPtr(const char *segname, const char *sectname, uint64_t *sizep) { | |
80 | uint32_t idx, cnt = _dyld_image_count(); | |
81 | for (idx = 0; idx < cnt; idx++) { | |
82 | void *mh = (void *)_dyld_get_image_header(idx); | |
83 | if (mh != &_mh_dylib_header) continue; | |
84 | #if __LP64__ | |
85 | const struct section_64 *sect = getsectbynamefromheader_64((struct mach_header_64 *)mh, segname, sectname); | |
86 | #else | |
87 | const struct section *sect = getsectbynamefromheader((struct mach_header *)mh, segname, sectname); | |
88 | #endif | |
89 | if (!sect) break; | |
90 | if (sizep) *sizep = (uint64_t)sect->size; | |
91 | return (char *)sect->addr + _dyld_get_image_vmaddr_slide(idx); | |
92 | } | |
93 | if (sizep) *sizep = 0ULL; | |
94 | return NULL; | |
95 | } | |
bd5b749c A |
96 | #endif |
97 | ||
98 | #if !USE_MACHO_SEGMENT | |
99 | ||
9ce05555 | 100 | // Memory map the file |
9ce05555 | 101 | |
8ca704e1 | 102 | #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX |
9ce05555 | 103 | CF_INLINE void __CFUniCharCharacterSetPath(char *cpath) { |
cf7d2af9 A |
104 | #elif DEPLOYMENT_TARGET_WINDOWS |
105 | CF_INLINE void __CFUniCharCharacterSetPath(wchar_t *wpath) { | |
bd5b749c | 106 | #else |
cf7d2af9 | 107 | #error Unknown or unspecified DEPLOYMENT_TARGET |
bd5b749c | 108 | #endif |
cf7d2af9 A |
109 | #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED |
110 | strlcpy(cpath, __kCFCharacterSetDir, MAXPATHLEN); | |
8ca704e1 A |
111 | #elif DEPLOYMENT_TARGET_LINUX |
112 | strlcpy(cpath, __kCFCharacterSetDir, MAXPATHLEN); | |
cf7d2af9 A |
113 | #elif DEPLOYMENT_TARGET_WINDOWS |
114 | wchar_t frameworkPath[MAXPATHLEN]; | |
115 | _CFGetFrameworkPath(frameworkPath, MAXPATHLEN); | |
116 | wcsncpy(wpath, frameworkPath, MAXPATHLEN); | |
117 | wcsncat(wpath, L"\\CoreFoundation.resources\\", MAXPATHLEN - wcslen(wpath)); | |
d8925383 | 118 | #else |
cf7d2af9 | 119 | strlcpy(cpath, __kCFCharacterSetDir, MAXPATHLEN); |
df7f3a2a | 120 | strlcat(cpath, "/CharacterSets/", MAXPATHLEN); |
d8925383 | 121 | #endif |
9ce05555 A |
122 | } |
123 | ||
cf7d2af9 | 124 | #if DEPLOYMENT_TARGET_WINDOWS |
bd5b749c A |
125 | #define MAX_BITMAP_STATE 512 |
126 | // | |
127 | // If a string is placed into this array, then it has been previously | |
128 | // determined that the bitmap-file cannot be found. Thus, we make | |
129 | // the assumption it won't be there in future calls and we avoid | |
130 | // hitting the disk un-necessarily. This assumption isn't 100% | |
131 | // correct, as bitmap-files can be added. We would have to re-start | |
132 | // the application in order to pick-up the new bitmap info. | |
133 | // | |
134 | // We should probably re-visit this. | |
135 | // | |
cf7d2af9 | 136 | static wchar_t *mappedBitmapState[MAX_BITMAP_STATE]; |
bd5b749c A |
137 | static int __nNumStateEntries = -1; |
138 | CRITICAL_SECTION __bitmapStateLock = {0}; | |
139 | ||
cf7d2af9 | 140 | bool __GetBitmapStateForName(const wchar_t *bitmapName) { |
bd5b749c A |
141 | if (NULL == __bitmapStateLock.DebugInfo) |
142 | InitializeCriticalSection(&__bitmapStateLock); | |
143 | EnterCriticalSection(&__bitmapStateLock); | |
144 | if (__nNumStateEntries >= 0) { | |
145 | for (int i = 0; i < __nNumStateEntries; i++) { | |
cf7d2af9 | 146 | if (wcscmp(mappedBitmapState[i], bitmapName) == 0) { |
bd5b749c A |
147 | LeaveCriticalSection(&__bitmapStateLock); |
148 | return true; | |
149 | } | |
150 | } | |
151 | } | |
152 | LeaveCriticalSection(&__bitmapStateLock); | |
153 | return false; | |
154 | } | |
cf7d2af9 | 155 | void __AddBitmapStateForName(const wchar_t *bitmapName) { |
bd5b749c A |
156 | if (NULL == __bitmapStateLock.DebugInfo) |
157 | InitializeCriticalSection(&__bitmapStateLock); | |
158 | EnterCriticalSection(&__bitmapStateLock); | |
159 | __nNumStateEntries++; | |
cf7d2af9 A |
160 | mappedBitmapState[__nNumStateEntries] = (wchar_t *)malloc((lstrlenW(bitmapName)+1) * sizeof(wchar_t)); |
161 | lstrcpyW(mappedBitmapState[__nNumStateEntries], bitmapName); | |
bd5b749c A |
162 | LeaveCriticalSection(&__bitmapStateLock); |
163 | } | |
cf7d2af9 | 164 | #endif |
9ce05555 | 165 | |
8ca704e1 A |
166 | #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX |
167 | static bool __CFUniCharLoadBytesFromFile(const char *fileName, const void **bytes, int64_t *fileSize) { | |
cf7d2af9 | 168 | #elif DEPLOYMENT_TARGET_WINDOWS |
8ca704e1 | 169 | static bool __CFUniCharLoadBytesFromFile(const wchar_t *fileName, const void **bytes, int64_t *fileSize) { |
cf7d2af9 A |
170 | #else |
171 | #error Unknown or unspecified DEPLOYMENT_TARGET | |
172 | #endif | |
173 | #if DEPLOYMENT_TARGET_WINDOWS | |
bd5b749c A |
174 | HANDLE bitmapFileHandle = NULL; |
175 | HANDLE mappingHandle = NULL; | |
176 | ||
cf7d2af9 | 177 | if (__GetBitmapStateForName(fileName)) { |
bd5b749c A |
178 | // The fileName has been tried in the past, so just return false |
179 | // and move on. | |
180 | *bytes = NULL; | |
181 | return false; | |
182 | } | |
cf7d2af9 | 183 | mappingHandle = OpenFileMappingW(FILE_MAP_READ, TRUE, fileName); |
bd5b749c | 184 | if (NULL == mappingHandle) { |
cf7d2af9 | 185 | if ((bitmapFileHandle = CreateFileW(fileName, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL)) == INVALID_HANDLE_VALUE) { |
bd5b749c A |
186 | // We tried to get the bitmap file for mapping, but it's not there. Add to list of non-existant bitmap-files so |
187 | // we don't have to try this again in the future. | |
cf7d2af9 | 188 | __AddBitmapStateForName(fileName); |
bd5b749c A |
189 | return false; |
190 | } | |
cf7d2af9 | 191 | mappingHandle = CreateFileMapping(bitmapFileHandle, NULL, PAGE_READONLY, 0, 0, NULL); |
bd5b749c A |
192 | CloseHandle(bitmapFileHandle); |
193 | if (!mappingHandle) return false; | |
8ca704e1 | 194 | } |
9ce05555 | 195 | |
8ca704e1 A |
196 | *bytes = MapViewOfFileEx(mappingHandle, FILE_MAP_READ, 0, 0, 0, 0); |
197 | ||
198 | if (NULL != fileSize) { | |
199 | MEMORY_BASIC_INFORMATION memoryInfo; | |
200 | ||
201 | if (0 == VirtualQueryEx(mappingHandle, *bytes, &memoryInfo, sizeof(memoryInfo))) { | |
202 | *fileSize = 0; // This indicates no checking. Is it right ? | |
203 | } else { | |
204 | *fileSize = memoryInfo.RegionSize; | |
205 | } | |
bd5b749c | 206 | } |
9ce05555 | 207 | |
8ca704e1 A |
208 | CloseHandle(mappingHandle); |
209 | ||
9ce05555 A |
210 | return (*bytes ? true : false); |
211 | #else | |
212 | struct stat statBuf; | |
213 | int fd = -1; | |
214 | ||
bd5b749c | 215 | if ((fd = open(fileName, O_RDONLY, 0)) < 0) { |
bd5b749c A |
216 | return false; |
217 | } | |
9ce05555 A |
218 | if (fstat(fd, &statBuf) < 0 || (*bytes = mmap(0, statBuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0)) == (void *)-1) { |
219 | close(fd); | |
9ce05555 A |
220 | return false; |
221 | } | |
9ce05555 A |
222 | close(fd); |
223 | ||
8ca704e1 A |
224 | if (NULL != fileSize) *fileSize = statBuf.st_size; |
225 | ||
9ce05555 A |
226 | return true; |
227 | #endif | |
228 | } | |
229 | ||
bd5b749c A |
230 | #endif // USE_MACHO_SEGMENT |
231 | ||
8ca704e1 A |
232 | #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX |
233 | static bool __CFUniCharLoadFile(const char *bitmapName, const void **bytes, int64_t *fileSize) { | |
cf7d2af9 | 234 | #elif DEPLOYMENT_TARGET_WINDOWS |
8ca704e1 | 235 | static bool __CFUniCharLoadFile(const wchar_t *bitmapName, const void **bytes, int64_t *fileSize) { |
cf7d2af9 A |
236 | #else |
237 | #error Unknown or unspecified DEPLOYMENT_TARGET | |
238 | #endif | |
bd5b749c A |
239 | #if USE_MACHO_SEGMENT |
240 | *bytes = __CFGetSectDataPtr("__UNICODE", bitmapName, NULL); | |
8ca704e1 A |
241 | |
242 | if (NULL != fileSize) *fileSize = 0; | |
243 | ||
bd5b749c A |
244 | return *bytes ? true : false; |
245 | #else | |
8ca704e1 | 246 | #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX |
9ce05555 | 247 | char cpath[MAXPATHLEN]; |
9ce05555 | 248 | __CFUniCharCharacterSetPath(cpath); |
8ca704e1 A |
249 | strlcat(cpath, bitmapName, MAXPATHLEN); |
250 | return __CFUniCharLoadBytesFromFile(cpath, bytes, fileSize); | |
cf7d2af9 A |
251 | #elif DEPLOYMENT_TARGET_WINDOWS |
252 | wchar_t wpath[MAXPATHLEN]; | |
253 | __CFUniCharCharacterSetPath(wpath); | |
8ca704e1 A |
254 | wcsncat(wpath, bitmapName, MAXPATHLEN); |
255 | return __CFUniCharLoadBytesFromFile(wpath, bytes, fileSize); | |
cf7d2af9 A |
256 | #else |
257 | #error Unknown or unspecified DEPLOYMENT_TARGET | |
258 | #endif | |
bd5b749c | 259 | #endif |
9ce05555 | 260 | } |
9ce05555 A |
261 | |
262 | // Bitmap functions | |
263 | CF_INLINE bool isControl(UTF32Char theChar, uint16_t charset, const void *data) { // ISO Control | |
bd5b749c | 264 | return (((theChar <= 0x001F) || (theChar >= 0x007F && theChar <= 0x009F)) ? true : false); |
9ce05555 A |
265 | } |
266 | ||
267 | CF_INLINE bool isWhitespace(UTF32Char theChar, uint16_t charset, const void *data) { // Space | |
bd5b749c | 268 | return (((theChar == 0x0020) || (theChar == 0x0009) || (theChar == 0x00A0) || (theChar == 0x1680) || (theChar >= 0x2000 && theChar <= 0x200B) || (theChar == 0x202F) || (theChar == 0x205F) || (theChar == 0x3000)) ? true : false); |
9ce05555 A |
269 | } |
270 | ||
bd5b749c A |
271 | CF_INLINE bool isNewline(UTF32Char theChar, uint16_t charset, const void *data) { // White space |
272 | return (((theChar >= 0x000A && theChar <= 0x000D) || (theChar == 0x0085) || (theChar == 0x2028) || (theChar == 0x2029)) ? true : false); | |
9ce05555 A |
273 | } |
274 | ||
bd5b749c A |
275 | CF_INLINE bool isWhitespaceAndNewline(UTF32Char theChar, uint16_t charset, const void *data) { // White space |
276 | return ((isWhitespace(theChar, charset, data) || isNewline(theChar, charset, data)) ? true : false); | |
9ce05555 A |
277 | } |
278 | ||
8ca704e1 A |
279 | #if USE_MACHO_SEGMENT |
280 | CF_INLINE bool __CFSimpleFileSizeVerification(const void *bytes, int64_t fileSize) { return true; } | |
281 | #elif 1 | |
282 | // <rdar://problem/8961744> __CFSimpleFileSizeVerification is broken | |
283 | static bool __CFSimpleFileSizeVerification(const void *bytes, int64_t fileSize) { return true; } | |
284 | #else | |
285 | static bool __CFSimpleFileSizeVerification(const void *bytes, int64_t fileSize) { | |
286 | bool result = true; | |
287 | ||
288 | if (fileSize > 0) { | |
289 | if ((sizeof(uint32_t) * 2) > fileSize) { | |
290 | result = false; | |
291 | } else { | |
292 | uint32_t headerSize = CFSwapInt32BigToHost(*((uint32_t *)((char *)bytes + 4))); | |
293 | ||
294 | if ((headerSize < (sizeof(uint32_t) * 4)) || (headerSize > fileSize)) { | |
295 | result = false; | |
296 | } else { | |
297 | const uint32_t *lastElement = (uint32_t *)(((uint8_t *)bytes) + headerSize) - 2; | |
298 | ||
299 | if ((headerSize + CFSwapInt32BigToHost(lastElement[0]) + CFSwapInt32BigToHost(lastElement[1])) > headerSize) result = false; | |
300 | } | |
301 | } | |
302 | } | |
303 | ||
304 | if (!result) CFLog(kCFLogLevelCritical, CFSTR("File size verification for Unicode database file failed.")); | |
305 | ||
306 | return result; | |
307 | } | |
308 | #endif // USE_MACHO_SEGMENT | |
309 | ||
9ce05555 A |
310 | typedef struct { |
311 | uint32_t _numPlanes; | |
312 | const uint8_t **_planes; | |
313 | } __CFUniCharBitmapData; | |
314 | ||
315 | static char __CFUniCharUnicodeVersionString[8] = {0, 0, 0, 0, 0, 0, 0, 0}; | |
316 | ||
317 | static uint32_t __CFUniCharNumberOfBitmaps = 0; | |
318 | static __CFUniCharBitmapData *__CFUniCharBitmapDataArray = NULL; | |
319 | ||
bd5b749c | 320 | static CFSpinLock_t __CFUniCharBitmapLock = CFSpinLockInit; |
9ce05555 | 321 | |
8ca704e1 | 322 | #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX |
bd5b749c A |
323 | #if !defined(CF_UNICHAR_BITMAP_FILE) |
324 | #if USE_MACHO_SEGMENT | |
325 | #define CF_UNICHAR_BITMAP_FILE "__csbitmaps" | |
326 | #else | |
8ca704e1 | 327 | #define CF_UNICHAR_BITMAP_FILE "/CFCharacterSetBitmaps.bitmap" |
bd5b749c A |
328 | #endif |
329 | #endif | |
cf7d2af9 A |
330 | #elif DEPLOYMENT_TARGET_WINDOWS |
331 | #if !defined(CF_UNICHAR_BITMAP_FILE) | |
332 | #define CF_UNICHAR_BITMAP_FILE L"CFCharacterSetBitmaps.bitmap" | |
333 | #endif | |
334 | #else | |
335 | #error Unknown or unspecified DEPLOYMENT_TARGET | |
336 | #endif | |
9ce05555 A |
337 | |
338 | static bool __CFUniCharLoadBitmapData(void) { | |
bd5b749c | 339 | __CFUniCharBitmapData *array; |
9ce05555 A |
340 | uint32_t headerSize; |
341 | uint32_t bitmapSize; | |
342 | int numPlanes; | |
343 | uint8_t currentPlane; | |
344 | const void *bytes; | |
345 | const void *bitmapBase; | |
346 | const void *bitmap; | |
347 | int idx, bitmapIndex; | |
8ca704e1 | 348 | int64_t fileSize; |
9ce05555 A |
349 | |
350 | __CFSpinLock(&__CFUniCharBitmapLock); | |
351 | ||
8ca704e1 | 352 | if (__CFUniCharBitmapDataArray || !__CFUniCharLoadFile(CF_UNICHAR_BITMAP_FILE, &bytes, &fileSize) || !__CFSimpleFileSizeVerification(bytes, fileSize)) { |
9ce05555 A |
353 | __CFSpinUnlock(&__CFUniCharBitmapLock); |
354 | return false; | |
355 | } | |
356 | ||
357 | for (idx = 0;idx < 4 && ((const uint8_t *)bytes)[idx];idx++) { | |
358 | __CFUniCharUnicodeVersionString[idx * 2] = ((const uint8_t *)bytes)[idx]; | |
359 | __CFUniCharUnicodeVersionString[idx * 2 + 1] = '.'; | |
360 | } | |
361 | __CFUniCharUnicodeVersionString[(idx < 4 ? idx * 2 - 1 : 7)] = '\0'; | |
362 | ||
363 | headerSize = CFSwapInt32BigToHost(*((uint32_t *)((char *)bytes + 4))); | |
364 | ||
bd5b749c A |
365 | bitmapBase = (uint8_t *)bytes + headerSize; |
366 | bytes = (uint8_t *)bytes + (sizeof(uint32_t) * 2); | |
9ce05555 A |
367 | headerSize -= (sizeof(uint32_t) * 2); |
368 | ||
369 | __CFUniCharNumberOfBitmaps = headerSize / (sizeof(uint32_t) * 2); | |
370 | ||
bd5b749c | 371 | array = (__CFUniCharBitmapData *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(__CFUniCharBitmapData) * __CFUniCharNumberOfBitmaps, 0); |
9ce05555 A |
372 | |
373 | for (idx = 0;idx < (int)__CFUniCharNumberOfBitmaps;idx++) { | |
bd5b749c A |
374 | bitmap = (uint8_t *)bitmapBase + CFSwapInt32BigToHost(*((uint32_t *)bytes)); bytes = (uint8_t *)bytes + sizeof(uint32_t); |
375 | bitmapSize = CFSwapInt32BigToHost(*((uint32_t *)bytes)); bytes = (uint8_t *)bytes + sizeof(uint32_t); | |
9ce05555 A |
376 | |
377 | numPlanes = bitmapSize / (8 * 1024); | |
378 | numPlanes = *(const uint8_t *)((char *)bitmap + (((numPlanes - 1) * ((8 * 1024) + 1)) - 1)) + 1; | |
bd5b749c A |
379 | array[idx]._planes = (const uint8_t **)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(const void *) * numPlanes, 0); |
380 | array[idx]._numPlanes = numPlanes; | |
9ce05555 A |
381 | |
382 | currentPlane = 0; | |
383 | for (bitmapIndex = 0;bitmapIndex < numPlanes;bitmapIndex++) { | |
384 | if (bitmapIndex == currentPlane) { | |
bd5b749c A |
385 | array[idx]._planes[bitmapIndex] = (const uint8_t *)bitmap; |
386 | bitmap = (uint8_t *)bitmap + (8 * 1024); | |
387 | #if defined (__cplusplus) | |
388 | currentPlane = *(((const uint8_t*&)bitmap)++); | |
389 | #else | |
390 | currentPlane = *((const uint8_t *)bitmap++); | |
cf7d2af9 | 391 | #endif |
bd5b749c | 392 | |
9ce05555 | 393 | } else { |
bd5b749c | 394 | array[idx]._planes[bitmapIndex] = NULL; |
9ce05555 A |
395 | } |
396 | } | |
397 | } | |
398 | ||
bd5b749c A |
399 | __CFUniCharBitmapDataArray = array; |
400 | ||
9ce05555 A |
401 | __CFSpinUnlock(&__CFUniCharBitmapLock); |
402 | ||
403 | return true; | |
404 | } | |
405 | ||
406 | __private_extern__ const char *__CFUniCharGetUnicodeVersionString(void) { | |
407 | if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData(); | |
408 | return __CFUniCharUnicodeVersionString; | |
409 | } | |
410 | ||
9ce05555 | 411 | bool CFUniCharIsMemberOf(UTF32Char theChar, uint32_t charset) { |
bd5b749c | 412 | charset = __CFUniCharMapCompatibilitySetID(charset); |
9ce05555 A |
413 | |
414 | switch (charset) { | |
9ce05555 A |
415 | case kCFUniCharWhitespaceCharacterSet: |
416 | return isWhitespace(theChar, charset, NULL); | |
417 | ||
418 | case kCFUniCharWhitespaceAndNewlineCharacterSet: | |
bd5b749c A |
419 | return isWhitespaceAndNewline(theChar, charset, NULL); |
420 | ||
421 | case kCFUniCharNewlineCharacterSet: | |
422 | return isNewline(theChar, charset, NULL); | |
423 | ||
424 | default: { | |
425 | uint32_t tableIndex = __CFUniCharMapExternalSetToInternalIndex(charset); | |
426 | ||
9ce05555 A |
427 | if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData(); |
428 | ||
bd5b749c A |
429 | if (tableIndex < __CFUniCharNumberOfBitmaps) { |
430 | __CFUniCharBitmapData *data = __CFUniCharBitmapDataArray + tableIndex; | |
9ce05555 A |
431 | uint8_t planeNo = (theChar >> 16) & 0xFF; |
432 | ||
433 | // The bitmap data for kCFUniCharIllegalCharacterSet is actually LEGAL set less Plane 14 ~ 16 | |
434 | if (charset == kCFUniCharIllegalCharacterSet) { | |
435 | if (planeNo == 0x0E) { // Plane 14 | |
436 | theChar &= 0xFF; | |
437 | return (((theChar == 0x01) || ((theChar > 0x1F) && (theChar < 0x80))) ? false : true); | |
438 | } else if (planeNo == 0x0F || planeNo == 0x10) { // Plane 15 & 16 | |
439 | return ((theChar & 0xFF) > 0xFFFD ? true : false); | |
440 | } else { | |
441 | return (planeNo < data->_numPlanes && data->_planes[planeNo] ? !CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) : true); | |
442 | } | |
443 | } else if (charset == kCFUniCharControlAndFormatterCharacterSet) { | |
444 | if (planeNo == 0x0E) { // Plane 14 | |
445 | theChar &= 0xFF; | |
446 | return (((theChar == 0x01) || ((theChar > 0x1F) && (theChar < 0x80))) ? true : false); | |
447 | } else { | |
448 | return (planeNo < data->_numPlanes && data->_planes[planeNo] ? CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) : false); | |
449 | } | |
450 | } else { | |
451 | return (planeNo < data->_numPlanes && data->_planes[planeNo] ? CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) : false); | |
452 | } | |
453 | } | |
454 | return false; | |
bd5b749c | 455 | } |
9ce05555 A |
456 | } |
457 | } | |
458 | ||
459 | const uint8_t *CFUniCharGetBitmapPtrForPlane(uint32_t charset, uint32_t plane) { | |
460 | if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData(); | |
461 | ||
bd5b749c | 462 | charset = __CFUniCharMapCompatibilitySetID(charset); |
9ce05555 | 463 | |
bd5b749c A |
464 | if ((charset > kCFUniCharWhitespaceAndNewlineCharacterSet) && (charset != kCFUniCharIllegalCharacterSet) && (charset != kCFUniCharNewlineCharacterSet)) { |
465 | uint32_t tableIndex = __CFUniCharMapExternalSetToInternalIndex(charset); | |
9ce05555 | 466 | |
bd5b749c A |
467 | if (tableIndex < __CFUniCharNumberOfBitmaps) { |
468 | __CFUniCharBitmapData *data = __CFUniCharBitmapDataArray + tableIndex; | |
469 | ||
470 | return (plane < data->_numPlanes ? data->_planes[plane] : NULL); | |
471 | } | |
9ce05555 A |
472 | } |
473 | return NULL; | |
474 | } | |
475 | ||
476 | __private_extern__ uint8_t CFUniCharGetBitmapForPlane(uint32_t charset, uint32_t plane, void *bitmap, bool isInverted) { | |
477 | const uint8_t *src = CFUniCharGetBitmapPtrForPlane(charset, plane); | |
478 | int numBytes = (8 * 1024); | |
479 | ||
480 | if (src) { | |
481 | if (isInverted) { | |
bd5b749c A |
482 | #if defined (__cplusplus) |
483 | while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = ~(*(src++)); | |
484 | #else | |
485 | while (numBytes-- > 0) *((uint8_t *)bitmap++) = ~(*(src++)); | |
cf7d2af9 | 486 | #endif |
9ce05555 | 487 | } else { |
bd5b749c A |
488 | #if defined (__cplusplus) |
489 | while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = *(src++); | |
490 | #else | |
491 | while (numBytes-- > 0) *((uint8_t *)bitmap++) = *(src++); | |
cf7d2af9 | 492 | #endif |
9ce05555 A |
493 | } |
494 | return kCFUniCharBitmapFilled; | |
495 | } else if (charset == kCFUniCharIllegalCharacterSet) { | |
bd5b749c | 496 | __CFUniCharBitmapData *data = __CFUniCharBitmapDataArray + __CFUniCharMapExternalSetToInternalIndex(__CFUniCharMapCompatibilitySetID(charset)); |
9ce05555 A |
497 | |
498 | if (plane < data->_numPlanes && (src = data->_planes[plane])) { | |
499 | if (isInverted) { | |
bd5b749c A |
500 | #if defined (__cplusplus) |
501 | while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = *(src++); | |
502 | #else | |
503 | while (numBytes-- > 0) *((uint8_t *)bitmap++) = *(src++); | |
cf7d2af9 | 504 | #endif |
9ce05555 | 505 | } else { |
bd5b749c A |
506 | #if defined (__cplusplus) |
507 | while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = ~(*(src++)); | |
508 | #else | |
509 | while (numBytes-- > 0) *((uint8_t *)bitmap++) = ~(*(src++)); | |
cf7d2af9 | 510 | #endif |
9ce05555 A |
511 | } |
512 | return kCFUniCharBitmapFilled; | |
513 | } else if (plane == 0x0E) { // Plane 14 | |
514 | int idx; | |
515 | uint8_t asciiRange = (isInverted ? (uint8_t)0xFF : (uint8_t)0); | |
516 | uint8_t otherRange = (isInverted ? (uint8_t)0 : (uint8_t)0xFF); | |
517 | ||
bd5b749c A |
518 | #if defined (__cplusplus) |
519 | *(((uint8_t *&)bitmap)++) = 0x02; // UE0001 LANGUAGE TAG | |
520 | #else | |
521 | *((uint8_t *)bitmap++) = 0x02; // UE0001 LANGUAGE TAG | |
cf7d2af9 | 522 | #endif |
9ce05555 | 523 | for (idx = 1;idx < numBytes;idx++) { |
bd5b749c A |
524 | #if defined (__cplusplus) |
525 | *(((uint8_t *&)bitmap)++) = ((idx >= (0x20 / 8) && (idx < (0x80 / 8))) ? asciiRange : otherRange); | |
526 | #else | |
527 | *((uint8_t *)bitmap++) = ((idx >= (0x20 / 8) && (idx < (0x80 / 8))) ? asciiRange : otherRange); | |
cf7d2af9 | 528 | #endif |
9ce05555 A |
529 | } |
530 | return kCFUniCharBitmapFilled; | |
531 | } else if (plane == 0x0F || plane == 0x10) { // Plane 15 & 16 | |
bd5b749c | 532 | uint32_t value = (isInverted ? ~0 : 0); |
9ce05555 A |
533 | numBytes /= 4; // for 32bit |
534 | ||
bd5b749c A |
535 | while (numBytes-- > 0) { |
536 | *((uint32_t *)bitmap) = value; | |
537 | #if defined (__cplusplus) | |
538 | bitmap = (uint8_t *)bitmap + sizeof(uint32_t); | |
539 | #else | |
540 | bitmap += sizeof(uint32_t); | |
cf7d2af9 | 541 | #endif |
bd5b749c | 542 | } |
9ce05555 A |
543 | *(((uint8_t *)bitmap) - 5) = (isInverted ? 0x3F : 0xC0); // 0xFFFE & 0xFFFF |
544 | return kCFUniCharBitmapFilled; | |
545 | } | |
546 | return (isInverted ? kCFUniCharBitmapEmpty : kCFUniCharBitmapAll); | |
bd5b749c | 547 | } else if ((charset < kCFUniCharDecimalDigitCharacterSet) || (charset == kCFUniCharNewlineCharacterSet)) { |
9ce05555 A |
548 | if (plane) return (isInverted ? kCFUniCharBitmapAll : kCFUniCharBitmapEmpty); |
549 | ||
bd5b749c A |
550 | uint8_t *bitmapBase = (uint8_t *)bitmap; |
551 | CFIndex idx; | |
552 | uint8_t nonFillValue = (isInverted ? (uint8_t)0xFF : (uint8_t)0); | |
9ce05555 | 553 | |
bd5b749c A |
554 | #if defined (__cplusplus) |
555 | while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = nonFillValue; | |
556 | #else | |
557 | while (numBytes-- > 0) *((uint8_t *)bitmap++) = nonFillValue; | |
cf7d2af9 | 558 | #endif |
9ce05555 | 559 | |
bd5b749c A |
560 | if ((charset == kCFUniCharWhitespaceAndNewlineCharacterSet) || (charset == kCFUniCharNewlineCharacterSet)) { |
561 | const UniChar newlines[] = {0x000A, 0x000B, 0x000C, 0x000D, 0x0085, 0x2028, 0x2029}; | |
9ce05555 | 562 | |
bd5b749c A |
563 | for (idx = 0;idx < (int)(sizeof(newlines) / sizeof(*newlines)); idx++) { |
564 | if (isInverted) { | |
565 | CFUniCharRemoveCharacterFromBitmap(newlines[idx], bitmapBase); | |
566 | } else { | |
567 | CFUniCharAddCharacterToBitmap(newlines[idx], bitmapBase); | |
9ce05555 A |
568 | } |
569 | } | |
570 | ||
bd5b749c A |
571 | if (charset == kCFUniCharNewlineCharacterSet) return kCFUniCharBitmapFilled; |
572 | } | |
573 | ||
574 | if (isInverted) { | |
575 | CFUniCharRemoveCharacterFromBitmap(0x0009, bitmapBase); | |
576 | CFUniCharRemoveCharacterFromBitmap(0x0020, bitmapBase); | |
577 | CFUniCharRemoveCharacterFromBitmap(0x00A0, bitmapBase); | |
578 | CFUniCharRemoveCharacterFromBitmap(0x1680, bitmapBase); | |
579 | CFUniCharRemoveCharacterFromBitmap(0x202F, bitmapBase); | |
580 | CFUniCharRemoveCharacterFromBitmap(0x205F, bitmapBase); | |
581 | CFUniCharRemoveCharacterFromBitmap(0x3000, bitmapBase); | |
582 | } else { | |
583 | CFUniCharAddCharacterToBitmap(0x0009, bitmapBase); | |
584 | CFUniCharAddCharacterToBitmap(0x0020, bitmapBase); | |
585 | CFUniCharAddCharacterToBitmap(0x00A0, bitmapBase); | |
586 | CFUniCharAddCharacterToBitmap(0x1680, bitmapBase); | |
587 | CFUniCharAddCharacterToBitmap(0x202F, bitmapBase); | |
588 | CFUniCharAddCharacterToBitmap(0x205F, bitmapBase); | |
589 | CFUniCharAddCharacterToBitmap(0x3000, bitmapBase); | |
590 | } | |
591 | ||
592 | for (idx = 0x2000;idx <= 0x200B;idx++) { | |
9ce05555 | 593 | if (isInverted) { |
bd5b749c | 594 | CFUniCharRemoveCharacterFromBitmap(idx, bitmapBase); |
9ce05555 | 595 | } else { |
bd5b749c | 596 | CFUniCharAddCharacterToBitmap(idx, bitmapBase); |
9ce05555 A |
597 | } |
598 | } | |
599 | return kCFUniCharBitmapFilled; | |
600 | } | |
601 | return (isInverted ? kCFUniCharBitmapAll : kCFUniCharBitmapEmpty); | |
602 | } | |
603 | ||
604 | __private_extern__ uint32_t CFUniCharGetNumberOfPlanes(uint32_t charset) { | |
bd5b749c A |
605 | if ((charset == kCFUniCharControlCharacterSet) || (charset == kCFUniCharControlAndFormatterCharacterSet)) { |
606 | return 15; // 0 to 14 | |
607 | } else if (charset < kCFUniCharDecimalDigitCharacterSet) { | |
9ce05555 A |
608 | return 1; |
609 | } else if (charset == kCFUniCharIllegalCharacterSet) { | |
610 | return 17; | |
611 | } else { | |
612 | uint32_t numPlanes; | |
613 | ||
614 | if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData(); | |
615 | ||
bd5b749c | 616 | numPlanes = __CFUniCharBitmapDataArray[__CFUniCharMapExternalSetToInternalIndex(__CFUniCharMapCompatibilitySetID(charset))]._numPlanes; |
9ce05555 A |
617 | |
618 | return numPlanes; | |
619 | } | |
9ce05555 A |
620 | } |
621 | ||
622 | // Mapping data loading | |
623 | static const void **__CFUniCharMappingTables = NULL; | |
624 | ||
bd5b749c | 625 | static CFSpinLock_t __CFUniCharMappingTableLock = CFSpinLockInit; |
9ce05555 | 626 | |
8ca704e1 | 627 | #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX |
bd5b749c A |
628 | #if __CF_BIG_ENDIAN__ |
629 | #if USE_MACHO_SEGMENT | |
630 | #define MAPPING_TABLE_FILE "__data" | |
631 | #else | |
8ca704e1 | 632 | #define MAPPING_TABLE_FILE "/CFUnicodeData-B.mapping" |
bd5b749c A |
633 | #endif |
634 | #else | |
635 | #if USE_MACHO_SEGMENT | |
636 | #define MAPPING_TABLE_FILE "__data" | |
637 | #else | |
8ca704e1 | 638 | #define MAPPING_TABLE_FILE "/CFUnicodeData-L.mapping" |
bd5b749c A |
639 | #endif |
640 | #endif | |
cf7d2af9 A |
641 | #elif DEPLOYMENT_TARGET_WINDOWS |
642 | #if __CF_BIG_ENDIAN__ | |
643 | #if USE_MACHO_SEGMENT | |
644 | #define MAPPING_TABLE_FILE "__data" | |
645 | #else | |
646 | #define MAPPING_TABLE_FILE L"CFUnicodeData-B.mapping" | |
647 | #endif | |
648 | #else | |
649 | #if USE_MACHO_SEGMENT | |
650 | #define MAPPING_TABLE_FILE "__data" | |
651 | #else | |
652 | #define MAPPING_TABLE_FILE L"CFUnicodeData-L.mapping" | |
653 | #endif | |
654 | #endif | |
655 | #else | |
656 | #error Unknown or unspecified DEPLOYMENT_TARGET | |
657 | #endif | |
9ce05555 A |
658 | |
659 | __private_extern__ const void *CFUniCharGetMappingData(uint32_t type) { | |
660 | ||
661 | __CFSpinLock(&__CFUniCharMappingTableLock); | |
662 | ||
663 | if (NULL == __CFUniCharMappingTables) { | |
664 | const void *bytes; | |
665 | const void *bodyBase; | |
666 | int headerSize; | |
667 | int idx, count; | |
8ca704e1 | 668 | int64_t fileSize; |
9ce05555 | 669 | |
8ca704e1 | 670 | if (!__CFUniCharLoadFile(MAPPING_TABLE_FILE, &bytes, &fileSize) || !__CFSimpleFileSizeVerification(bytes, fileSize)) { |
9ce05555 A |
671 | __CFSpinUnlock(&__CFUniCharMappingTableLock); |
672 | return NULL; | |
673 | } | |
674 | ||
bd5b749c A |
675 | #if defined (__cplusplus) |
676 | bytes = (uint8_t *)bytes + 4; // Skip Unicode version | |
677 | headerSize = *((uint8_t *)bytes); bytes = (uint8_t *)bytes + sizeof(uint32_t); | |
678 | #else | |
679 | bytes += 4; // Skip Unicode version | |
680 | headerSize = *((uint32_t *)bytes); bytes += sizeof(uint32_t); | |
cf7d2af9 | 681 | #endif |
9ce05555 A |
682 | headerSize -= (sizeof(uint32_t) * 2); |
683 | bodyBase = (char *)bytes + headerSize; | |
684 | ||
685 | count = headerSize / sizeof(uint32_t); | |
686 | ||
bd5b749c | 687 | __CFUniCharMappingTables = (const void **)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(const void *) * count, 0); |
9ce05555 A |
688 | |
689 | for (idx = 0;idx < count;idx++) { | |
bd5b749c A |
690 | #if defined (__cplusplus) |
691 | __CFUniCharMappingTables[idx] = (char *)bodyBase + *((uint32_t *)bytes); bytes = (uint8_t *)bytes + sizeof(uint32_t); | |
692 | #else | |
693 | __CFUniCharMappingTables[idx] = (char *)bodyBase + *((uint32_t *)bytes); bytes += sizeof(uint32_t); | |
cf7d2af9 | 694 | #endif |
9ce05555 A |
695 | } |
696 | } | |
697 | ||
698 | __CFSpinUnlock(&__CFUniCharMappingTableLock); | |
699 | ||
700 | return __CFUniCharMappingTables[type]; | |
701 | } | |
702 | ||
703 | // Case mapping functions | |
704 | #define DO_SPECIAL_CASE_MAPPING 1 | |
705 | ||
706 | static uint32_t *__CFUniCharCaseMappingTableCounts = NULL; | |
707 | static uint32_t **__CFUniCharCaseMappingTable = NULL; | |
708 | static const uint32_t **__CFUniCharCaseMappingExtraTable = NULL; | |
709 | ||
710 | typedef struct { | |
711 | uint32_t _key; | |
712 | uint32_t _value; | |
713 | } __CFUniCharCaseMappings; | |
714 | ||
715 | /* Binary searches CFStringEncodingUnicodeTo8BitCharMap */ | |
716 | static uint32_t __CFUniCharGetMappedCase(const __CFUniCharCaseMappings *theTable, uint32_t numElem, UTF32Char character) { | |
717 | const __CFUniCharCaseMappings *p, *q, *divider; | |
718 | ||
719 | if ((character < theTable[0]._key) || (character > theTable[numElem-1]._key)) { | |
720 | return 0; | |
721 | } | |
722 | p = theTable; | |
723 | q = p + (numElem-1); | |
724 | while (p <= q) { | |
725 | divider = p + ((q - p) >> 1); /* divide by 2 */ | |
726 | if (character < divider->_key) { q = divider - 1; } | |
727 | else if (character > divider->_key) { p = divider + 1; } | |
728 | else { return divider->_value; } | |
729 | } | |
730 | return 0; | |
731 | } | |
732 | ||
733 | #define NUM_CASE_MAP_DATA (kCFUniCharCaseFold + 1) | |
734 | ||
735 | static bool __CFUniCharLoadCaseMappingTable(void) { | |
bd5b749c | 736 | uint32_t *countArray; |
9ce05555 A |
737 | int idx; |
738 | ||
739 | if (NULL == __CFUniCharMappingTables) (void)CFUniCharGetMappingData(kCFUniCharToLowercase); | |
740 | if (NULL == __CFUniCharMappingTables) return false; | |
741 | ||
742 | __CFSpinLock(&__CFUniCharMappingTableLock); | |
743 | ||
744 | if (__CFUniCharCaseMappingTableCounts) { | |
745 | __CFSpinUnlock(&__CFUniCharMappingTableLock); | |
746 | return true; | |
747 | } | |
748 | ||
bd5b749c A |
749 | countArray = (uint32_t *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(uint32_t) * NUM_CASE_MAP_DATA + sizeof(uint32_t *) * NUM_CASE_MAP_DATA * 2, 0); |
750 | __CFUniCharCaseMappingTable = (uint32_t **)((char *)countArray + sizeof(uint32_t) * NUM_CASE_MAP_DATA); | |
9ce05555 A |
751 | __CFUniCharCaseMappingExtraTable = (const uint32_t **)__CFUniCharCaseMappingTable + NUM_CASE_MAP_DATA; |
752 | ||
753 | for (idx = 0;idx < NUM_CASE_MAP_DATA;idx++) { | |
bd5b749c | 754 | countArray[idx] = *((uint32_t *)__CFUniCharMappingTables[idx]) / (sizeof(uint32_t) * 2); |
9ce05555 A |
755 | __CFUniCharCaseMappingTable[idx] = ((uint32_t *)__CFUniCharMappingTables[idx]) + 1; |
756 | __CFUniCharCaseMappingExtraTable[idx] = (const uint32_t *)((char *)__CFUniCharCaseMappingTable[idx] + *((uint32_t *)__CFUniCharMappingTables[idx])); | |
757 | } | |
758 | ||
bd5b749c A |
759 | __CFUniCharCaseMappingTableCounts = countArray; |
760 | ||
9ce05555 A |
761 | __CFSpinUnlock(&__CFUniCharMappingTableLock); |
762 | return true; | |
763 | } | |
764 | ||
bd5b749c | 765 | #if __CF_BIG_ENDIAN__ |
9ce05555 A |
766 | #define TURKISH_LANG_CODE (0x7472) // tr |
767 | #define LITHUANIAN_LANG_CODE (0x6C74) // lt | |
768 | #define AZERI_LANG_CODE (0x617A) // az | |
8ca704e1 | 769 | #define DUTCH_LANG_CODE (0x6E6C) // nl |
bd5b749c | 770 | #else |
9ce05555 A |
771 | #define TURKISH_LANG_CODE (0x7274) // tr |
772 | #define LITHUANIAN_LANG_CODE (0x746C) // lt | |
773 | #define AZERI_LANG_CODE (0x7A61) // az | |
8ca704e1 | 774 | #define DUTCH_LANG_CODE (0x6C6E) // nl |
bd5b749c | 775 | #endif |
9ce05555 | 776 | |
bd5b749c | 777 | CFIndex CFUniCharMapCaseTo(UTF32Char theChar, UTF16Char *convertedChar, CFIndex maxLength, uint32_t ctype, uint32_t flags, const uint8_t *langCode) { |
9ce05555 A |
778 | __CFUniCharBitmapData *data; |
779 | uint8_t planeNo = (theChar >> 16) & 0xFF; | |
780 | ||
781 | caseFoldRetry: | |
782 | ||
783 | #if DO_SPECIAL_CASE_MAPPING | |
784 | if (flags & kCFUniCharCaseMapFinalSigma) { | |
785 | if (theChar == 0x03A3) { // Final sigma | |
786 | *convertedChar = (ctype == kCFUniCharToLowercase ? 0x03C2 : 0x03A3); | |
787 | return 1; | |
788 | } | |
789 | } | |
790 | ||
791 | if (langCode) { | |
792 | switch (*(uint16_t *)langCode) { | |
793 | case LITHUANIAN_LANG_CODE: | |
794 | if (theChar == 0x0307 && (flags & kCFUniCharCaseMapAfter_i)) { | |
795 | return 0; | |
796 | } else if (ctype == kCFUniCharToLowercase) { | |
797 | if (flags & kCFUniCharCaseMapMoreAbove) { | |
798 | switch (theChar) { | |
799 | case 0x0049: // LATIN CAPITAL LETTER I | |
800 | *(convertedChar++) = 0x0069; | |
801 | *(convertedChar++) = 0x0307; | |
802 | return 2; | |
803 | ||
804 | case 0x004A: // LATIN CAPITAL LETTER J | |
805 | *(convertedChar++) = 0x006A; | |
806 | *(convertedChar++) = 0x0307; | |
807 | return 2; | |
808 | ||
809 | case 0x012E: // LATIN CAPITAL LETTER I WITH OGONEK | |
810 | *(convertedChar++) = 0x012F; | |
811 | *(convertedChar++) = 0x0307; | |
812 | return 2; | |
813 | ||
814 | default: break; | |
815 | } | |
816 | } | |
817 | switch (theChar) { | |
818 | case 0x00CC: // LATIN CAPITAL LETTER I WITH GRAVE | |
819 | *(convertedChar++) = 0x0069; | |
820 | *(convertedChar++) = 0x0307; | |
821 | *(convertedChar++) = 0x0300; | |
822 | return 3; | |
823 | ||
824 | case 0x00CD: // LATIN CAPITAL LETTER I WITH ACUTE | |
825 | *(convertedChar++) = 0x0069; | |
826 | *(convertedChar++) = 0x0307; | |
827 | *(convertedChar++) = 0x0301; | |
828 | return 3; | |
829 | ||
830 | case 0x0128: // LATIN CAPITAL LETTER I WITH TILDE | |
831 | *(convertedChar++) = 0x0069; | |
832 | *(convertedChar++) = 0x0307; | |
833 | *(convertedChar++) = 0x0303; | |
834 | return 3; | |
835 | ||
836 | default: break; | |
837 | } | |
838 | } | |
839 | break; | |
840 | ||
841 | case TURKISH_LANG_CODE: | |
842 | case AZERI_LANG_CODE: | |
d8925383 A |
843 | if ((theChar == 0x0049) || (theChar == 0x0131)) { // LATIN CAPITAL LETTER I & LATIN SMALL LETTER DOTLESS I |
844 | *convertedChar = (((ctype == kCFUniCharToLowercase) || (ctype == kCFUniCharCaseFold)) ? ((kCFUniCharCaseMapMoreAbove & flags) ? 0x0069 : 0x0131) : 0x0049); | |
9ce05555 A |
845 | return 1; |
846 | } else if ((theChar == 0x0069) || (theChar == 0x0130)) { // LATIN SMALL LETTER I & LATIN CAPITAL LETTER I WITH DOT ABOVE | |
d8925383 | 847 | *convertedChar = (((ctype == kCFUniCharToLowercase) || (ctype == kCFUniCharCaseFold)) ? 0x0069 : 0x0130); |
9ce05555 A |
848 | return 1; |
849 | } else if (theChar == 0x0307 && (kCFUniCharCaseMapAfter_i & flags)) { // COMBINING DOT ABOVE AFTER_i | |
850 | if (ctype == kCFUniCharToLowercase) { | |
851 | return 0; | |
852 | } else { | |
853 | *convertedChar = 0x0307; | |
854 | return 1; | |
855 | } | |
856 | } | |
857 | break; | |
858 | ||
8ca704e1 A |
859 | case DUTCH_LANG_CODE: |
860 | if ((theChar == 0x004A) || (theChar == 0x006A)) { | |
861 | *convertedChar = (((ctype == kCFUniCharToUppercase) || (ctype == kCFUniCharToTitlecase) || (kCFUniCharCaseMapDutchDigraph & flags)) ? 0x004A : 0x006A); | |
862 | return 1; | |
863 | } | |
864 | break; | |
865 | ||
9ce05555 A |
866 | default: break; |
867 | } | |
868 | } | |
8ca704e1 | 869 | #endif // DO_SPECIAL_CASE_MAPPING |
9ce05555 A |
870 | |
871 | if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData(); | |
872 | ||
bd5b749c | 873 | data = __CFUniCharBitmapDataArray + __CFUniCharMapExternalSetToInternalIndex(__CFUniCharMapCompatibilitySetID(ctype + kCFUniCharHasNonSelfLowercaseCharacterSet)); |
9ce05555 A |
874 | |
875 | if (planeNo < data->_numPlanes && data->_planes[planeNo] && CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) && (__CFUniCharCaseMappingTableCounts || __CFUniCharLoadCaseMappingTable())) { | |
876 | uint32_t value = __CFUniCharGetMappedCase((const __CFUniCharCaseMappings *)__CFUniCharCaseMappingTable[ctype], __CFUniCharCaseMappingTableCounts[ctype], theChar); | |
877 | ||
878 | if (!value && ctype == kCFUniCharToTitlecase) { | |
879 | value = __CFUniCharGetMappedCase((const __CFUniCharCaseMappings *)__CFUniCharCaseMappingTable[kCFUniCharToUppercase], __CFUniCharCaseMappingTableCounts[kCFUniCharToUppercase], theChar); | |
880 | if (value) ctype = kCFUniCharToUppercase; | |
881 | } | |
882 | ||
883 | if (value) { | |
bd5b749c | 884 | CFIndex count = CFUniCharConvertFlagToCount(value); |
9ce05555 A |
885 | |
886 | if (count == 1) { | |
887 | if (value & kCFUniCharNonBmpFlag) { | |
888 | if (maxLength > 1) { | |
889 | value = (value & 0xFFFFFF) - 0x10000; | |
bd5b749c A |
890 | *(convertedChar++) = (UTF16Char)(value >> 10) + 0xD800UL; |
891 | *(convertedChar++) = (UTF16Char)(value & 0x3FF) + 0xDC00UL; | |
9ce05555 A |
892 | return 2; |
893 | } | |
894 | } else { | |
895 | *convertedChar = (UTF16Char)value; | |
896 | return 1; | |
897 | } | |
bd5b749c | 898 | } else if (count < maxLength) { |
9ce05555 A |
899 | const uint32_t *extraMapping = __CFUniCharCaseMappingExtraTable[ctype] + (value & 0xFFFFFF); |
900 | ||
901 | if (value & kCFUniCharNonBmpFlag) { | |
bd5b749c | 902 | CFIndex copiedLen = 0; |
9ce05555 A |
903 | |
904 | while (count-- > 0) { | |
905 | value = *(extraMapping++); | |
906 | if (value > 0xFFFF) { | |
bd5b749c | 907 | if (copiedLen + 2 >= maxLength) break; |
9ce05555 | 908 | value = (value & 0xFFFFFF) - 0x10000; |
bd5b749c A |
909 | convertedChar[copiedLen++] = (UTF16Char)(value >> 10) + 0xD800UL; |
910 | convertedChar[copiedLen++] = (UTF16Char)(value & 0x3FF) + 0xDC00UL; | |
9ce05555 | 911 | } else { |
bd5b749c | 912 | if (copiedLen + 1 >= maxLength) break; |
9ce05555 A |
913 | convertedChar[copiedLen++] = value; |
914 | } | |
915 | } | |
916 | if (!count) return copiedLen; | |
917 | } else { | |
bd5b749c | 918 | CFIndex idx; |
9ce05555 A |
919 | |
920 | for (idx = 0;idx < count;idx++) *(convertedChar++) = (UTF16Char)*(extraMapping++); | |
921 | return count; | |
922 | } | |
923 | } | |
924 | } | |
925 | } else if (ctype == kCFUniCharCaseFold) { | |
926 | ctype = kCFUniCharToLowercase; | |
927 | goto caseFoldRetry; | |
928 | } | |
929 | ||
d8925383 A |
930 | if (theChar > 0xFFFF) { // non-BMP |
931 | theChar = (theChar & 0xFFFFFF) - 0x10000; | |
bd5b749c A |
932 | *(convertedChar++) = (UTF16Char)(theChar >> 10) + 0xD800UL; |
933 | *(convertedChar++) = (UTF16Char)(theChar & 0x3FF) + 0xDC00UL; | |
d8925383 A |
934 | return 2; |
935 | } else { | |
936 | *convertedChar = theChar; | |
937 | return 1; | |
938 | } | |
9ce05555 A |
939 | } |
940 | ||
bd5b749c | 941 | CFIndex CFUniCharMapTo(UniChar theChar, UniChar *convertedChar, CFIndex maxLength, uint16_t ctype, uint32_t flags) { |
9ce05555 A |
942 | if (ctype == kCFUniCharCaseFold + 1) { // kCFUniCharDecompose |
943 | if (CFUniCharIsDecomposableCharacter(theChar, false)) { | |
944 | UTF32Char buffer[MAX_DECOMPOSED_LENGTH]; | |
945 | CFIndex usedLength = CFUniCharDecomposeCharacter(theChar, buffer, MAX_DECOMPOSED_LENGTH); | |
946 | CFIndex idx; | |
947 | ||
948 | for (idx = 0;idx < usedLength;idx++) *(convertedChar++) = buffer[idx]; | |
949 | return usedLength; | |
950 | } else { | |
951 | *convertedChar = theChar; | |
952 | return 1; | |
953 | } | |
954 | } else { | |
955 | return CFUniCharMapCaseTo(theChar, convertedChar, maxLength, ctype, flags, NULL); | |
956 | } | |
957 | } | |
958 | ||
bd5b749c | 959 | CF_INLINE bool __CFUniCharIsMoreAbove(UTF16Char *buffer, CFIndex length) { |
9ce05555 A |
960 | UTF32Char currentChar; |
961 | uint32_t property; | |
962 | ||
963 | while (length-- > 0) { | |
964 | currentChar = *(buffer)++; | |
965 | if (CFUniCharIsSurrogateHighCharacter(currentChar) && (length > 0) && CFUniCharIsSurrogateLowCharacter(*(buffer + 1))) { | |
966 | currentChar = CFUniCharGetLongCharacterForSurrogatePair(currentChar, *(buffer++)); | |
967 | --length; | |
968 | } | |
969 | if (!CFUniCharIsMemberOf(currentChar, kCFUniCharNonBaseCharacterSet)) break; | |
970 | ||
bd5b749c | 971 | property = CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16) & 0xFF)); |
9ce05555 A |
972 | |
973 | if (property == 230) return true; // Above priority | |
974 | } | |
975 | return false; | |
976 | } | |
977 | ||
bd5b749c | 978 | CF_INLINE bool __CFUniCharIsAfter_i(UTF16Char *buffer, CFIndex length) { |
9ce05555 A |
979 | UTF32Char currentChar = 0; |
980 | uint32_t property; | |
981 | UTF32Char decomposed[MAX_DECOMPOSED_LENGTH]; | |
bd5b749c A |
982 | CFIndex decompLength; |
983 | CFIndex idx; | |
9ce05555 A |
984 | |
985 | if (length < 1) return 0; | |
986 | ||
987 | buffer += length; | |
988 | while (length-- > 1) { | |
989 | currentChar = *(--buffer); | |
990 | if (CFUniCharIsSurrogateLowCharacter(currentChar)) { | |
991 | if ((length > 1) && CFUniCharIsSurrogateHighCharacter(*(buffer - 1))) { | |
992 | currentChar = CFUniCharGetLongCharacterForSurrogatePair(*(--buffer), currentChar); | |
993 | --length; | |
994 | } else { | |
995 | break; | |
996 | } | |
997 | } | |
998 | if (!CFUniCharIsMemberOf(currentChar, kCFUniCharNonBaseCharacterSet)) break; | |
999 | ||
bd5b749c | 1000 | property = CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16) & 0xFF)); |
9ce05555 A |
1001 | |
1002 | if (property == 230) return false; // Above priority | |
1003 | } | |
1004 | if (length == 0) { | |
1005 | currentChar = *(--buffer); | |
1006 | } else if (CFUniCharIsSurrogateLowCharacter(currentChar) && CFUniCharIsSurrogateHighCharacter(*(--buffer))) { | |
1007 | currentChar = CFUniCharGetLongCharacterForSurrogatePair(*buffer, currentChar); | |
1008 | } | |
1009 | ||
1010 | decompLength = CFUniCharDecomposeCharacter(currentChar, decomposed, MAX_DECOMPOSED_LENGTH); | |
1011 | currentChar = *decomposed; | |
1012 | ||
1013 | ||
1014 | for (idx = 1;idx < decompLength;idx++) { | |
1015 | currentChar = decomposed[idx]; | |
bd5b749c | 1016 | property = CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16) & 0xFF)); |
9ce05555 A |
1017 | |
1018 | if (property == 230) return false; // Above priority | |
1019 | } | |
1020 | return true; | |
1021 | } | |
1022 | ||
bd5b749c | 1023 | __private_extern__ uint32_t CFUniCharGetConditionalCaseMappingFlags(UTF32Char theChar, UTF16Char *buffer, CFIndex currentIndex, CFIndex length, uint32_t type, const uint8_t *langCode, uint32_t lastFlags) { |
9ce05555 A |
1024 | if (theChar == 0x03A3) { // GREEK CAPITAL LETTER SIGMA |
1025 | if ((type == kCFUniCharToLowercase) && (currentIndex > 0)) { | |
1026 | UTF16Char *start = buffer; | |
1027 | UTF16Char *end = buffer + length; | |
1028 | UTF32Char otherChar; | |
1029 | ||
1030 | // First check if we're after a cased character | |
1031 | buffer += (currentIndex - 1); | |
1032 | while (start <= buffer) { | |
1033 | otherChar = *(buffer--); | |
1034 | if (CFUniCharIsSurrogateLowCharacter(otherChar) && (start <= buffer) && CFUniCharIsSurrogateHighCharacter(*buffer)) { | |
1035 | otherChar = CFUniCharGetLongCharacterForSurrogatePair(*(buffer--), otherChar); | |
1036 | } | |
1037 | if (!CFUniCharIsMemberOf(otherChar, kCFUniCharCaseIgnorableCharacterSet)) { | |
1038 | if (!CFUniCharIsMemberOf(otherChar, kCFUniCharUppercaseLetterCharacterSet) && !CFUniCharIsMemberOf(otherChar, kCFUniCharLowercaseLetterCharacterSet)) return 0; // Uppercase set contains titlecase | |
1039 | break; | |
1040 | } | |
1041 | } | |
1042 | ||
1043 | // Next check if we're before a cased character | |
1044 | buffer = start + currentIndex + 1; | |
1045 | while (buffer < end) { | |
1046 | otherChar = *(buffer++); | |
1047 | if (CFUniCharIsSurrogateHighCharacter(otherChar) && (buffer < end) && CFUniCharIsSurrogateLowCharacter(*buffer)) { | |
1048 | otherChar = CFUniCharGetLongCharacterForSurrogatePair(otherChar, *(buffer++)); | |
1049 | } | |
1050 | if (!CFUniCharIsMemberOf(otherChar, kCFUniCharCaseIgnorableCharacterSet)) { | |
1051 | if (CFUniCharIsMemberOf(otherChar, kCFUniCharUppercaseLetterCharacterSet) || CFUniCharIsMemberOf(otherChar, kCFUniCharLowercaseLetterCharacterSet)) return 0; // Uppercase set contains titlecase | |
1052 | break; | |
1053 | } | |
1054 | } | |
1055 | return kCFUniCharCaseMapFinalSigma; | |
1056 | } | |
1057 | } else if (langCode) { | |
1058 | if (*((const uint16_t *)langCode) == LITHUANIAN_LANG_CODE) { | |
1059 | if ((theChar == 0x0307) && ((kCFUniCharCaseMapAfter_i|kCFUniCharCaseMapMoreAbove) & lastFlags) == (kCFUniCharCaseMapAfter_i|kCFUniCharCaseMapMoreAbove)) { | |
1060 | return (__CFUniCharIsAfter_i(buffer, currentIndex) ? kCFUniCharCaseMapAfter_i : 0); | |
1061 | } else if (type == kCFUniCharToLowercase) { | |
1062 | if ((theChar == 0x0049) || (theChar == 0x004A) || (theChar == 0x012E)) { | |
1063 | return (__CFUniCharIsMoreAbove(buffer + (++currentIndex), length - currentIndex) ? kCFUniCharCaseMapMoreAbove : 0); | |
1064 | } | |
1065 | } else if ((theChar == 'i') || (theChar == 'j')) { | |
1066 | return (__CFUniCharIsMoreAbove(buffer + (++currentIndex), length - currentIndex) ? (kCFUniCharCaseMapAfter_i|kCFUniCharCaseMapMoreAbove) : 0); | |
1067 | } | |
1068 | } else if ((*((const uint16_t *)langCode) == TURKISH_LANG_CODE) || (*((const uint16_t *)langCode) == AZERI_LANG_CODE)) { | |
1069 | if (type == kCFUniCharToLowercase) { | |
1070 | if (theChar == 0x0307) { | |
1071 | return (kCFUniCharCaseMapMoreAbove & lastFlags ? kCFUniCharCaseMapAfter_i : 0); | |
1072 | } else if (theChar == 0x0049) { | |
1073 | return (((++currentIndex < length) && (buffer[currentIndex] == 0x0307)) ? kCFUniCharCaseMapMoreAbove : 0); | |
1074 | } | |
1075 | } | |
8ca704e1 A |
1076 | } else if (*((const uint16_t *)langCode) == DUTCH_LANG_CODE) { |
1077 | if (kCFUniCharCaseMapDutchDigraph & lastFlags) { | |
1078 | return (((theChar == 0x006A) || (theChar == 0x004A)) ? kCFUniCharCaseMapDutchDigraph : 0); | |
1079 | } else { | |
1080 | if ((type == kCFUniCharToTitlecase) && ((theChar == 0x0069) || (theChar == 0x0049))) { | |
1081 | return (((++currentIndex < length) && ((buffer[currentIndex] == 0x006A) || (buffer[currentIndex] == 0x004A))) ? kCFUniCharCaseMapDutchDigraph : 0); | |
1082 | } | |
1083 | } | |
1084 | } | |
9ce05555 A |
1085 | } |
1086 | return 0; | |
1087 | } | |
1088 | ||
1089 | // Unicode property database | |
1090 | static __CFUniCharBitmapData *__CFUniCharUnicodePropertyTable = NULL; | |
d8925383 | 1091 | static int __CFUniCharUnicodePropertyTableCount = 0; |
9ce05555 | 1092 | |
bd5b749c | 1093 | static CFSpinLock_t __CFUniCharPropTableLock = CFSpinLockInit; |
9ce05555 | 1094 | |
8ca704e1 | 1095 | #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX |
bd5b749c A |
1096 | #if USE_MACHO_SEGMENT |
1097 | #define PROP_DB_FILE "__properties" | |
1098 | #else | |
8ca704e1 | 1099 | #define PROP_DB_FILE "/CFUniCharPropertyDatabase.data" |
bd5b749c | 1100 | #endif |
cf7d2af9 A |
1101 | #elif DEPLOYMENT_TARGET_WINDOWS |
1102 | #if USE_MACHO_SEGMENT | |
1103 | #define PROP_DB_FILE "__properties" | |
1104 | #else | |
1105 | #define PROP_DB_FILE L"CFUniCharPropertyDatabase.data" | |
1106 | #endif | |
1107 | #else | |
1108 | #error Unknown or unspecified DEPLOYMENT_TARGET | |
1109 | #endif | |
9ce05555 A |
1110 | |
1111 | const void *CFUniCharGetUnicodePropertyDataForPlane(uint32_t propertyType, uint32_t plane) { | |
1112 | ||
1113 | __CFSpinLock(&__CFUniCharPropTableLock); | |
1114 | ||
1115 | if (NULL == __CFUniCharUnicodePropertyTable) { | |
bd5b749c | 1116 | __CFUniCharBitmapData *table; |
9ce05555 A |
1117 | const void *bytes; |
1118 | const void *bodyBase; | |
1119 | const void *planeBase; | |
1120 | int headerSize; | |
1121 | int idx, count; | |
1122 | int planeIndex, planeCount; | |
1123 | int planeSize; | |
8ca704e1 | 1124 | int64_t fileSize; |
9ce05555 | 1125 | |
8ca704e1 | 1126 | if (!__CFUniCharLoadFile(PROP_DB_FILE, &bytes, &fileSize) || !__CFSimpleFileSizeVerification(bytes, fileSize)) { |
9ce05555 A |
1127 | __CFSpinUnlock(&__CFUniCharPropTableLock); |
1128 | return NULL; | |
1129 | } | |
1130 | ||
bd5b749c A |
1131 | #if defined (__cplusplus) |
1132 | bytes = (uint8_t*)bytes + 4; // Skip Unicode version | |
1133 | headerSize = CFSwapInt32BigToHost(*((uint32_t *)bytes)); bytes = (uint8_t *)bytes + sizeof(uint32_t); | |
1134 | #else | |
1135 | bytes += 4; // Skip Unicode version | |
1136 | headerSize = CFSwapInt32BigToHost(*((uint32_t *)bytes)); bytes += sizeof(uint32_t); | |
cf7d2af9 | 1137 | #endif |
bd5b749c | 1138 | |
9ce05555 A |
1139 | headerSize -= (sizeof(uint32_t) * 2); |
1140 | bodyBase = (char *)bytes + headerSize; | |
1141 | ||
1142 | count = headerSize / sizeof(uint32_t); | |
d8925383 | 1143 | __CFUniCharUnicodePropertyTableCount = count; |
9ce05555 | 1144 | |
bd5b749c | 1145 | table = (__CFUniCharBitmapData *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(__CFUniCharBitmapData) * count, 0); |
9ce05555 A |
1146 | |
1147 | for (idx = 0;idx < count;idx++) { | |
1148 | planeCount = *((const uint8_t *)bodyBase); | |
bd5b749c A |
1149 | planeBase = (char *)bodyBase + planeCount + (planeCount % 4 ? 4 - (planeCount % 4) : 0); |
1150 | table[idx]._planes = (const uint8_t **)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(const void *) * planeCount, 0); | |
9ce05555 A |
1151 | |
1152 | for (planeIndex = 0;planeIndex < planeCount;planeIndex++) { | |
1153 | if ((planeSize = ((const uint8_t *)bodyBase)[planeIndex + 1])) { | |
bd5b749c A |
1154 | table[idx]._planes[planeIndex] = (const uint8_t *)planeBase; |
1155 | #if defined (__cplusplus) | |
1156 | planeBase = (char*)planeBase + (planeSize * 256); | |
1157 | #else | |
1158 | planeBase += (planeSize * 256); | |
cf7d2af9 | 1159 | #endif |
9ce05555 | 1160 | } else { |
bd5b749c | 1161 | table[idx]._planes[planeIndex] = NULL; |
9ce05555 A |
1162 | } |
1163 | } | |
1164 | ||
bd5b749c A |
1165 | table[idx]._numPlanes = planeCount; |
1166 | #if defined (__cplusplus) | |
1167 | bodyBase = (const uint8_t *)bodyBase + (CFSwapInt32BigToHost(*(uint32_t *)bytes)); | |
1168 | ((uint32_t *&)bytes) ++; | |
1169 | #else | |
1170 | bodyBase += (CFSwapInt32BigToHost(*((uint32_t *)bytes++))); | |
cf7d2af9 | 1171 | #endif |
9ce05555 | 1172 | } |
bd5b749c A |
1173 | |
1174 | __CFUniCharUnicodePropertyTable = table; | |
9ce05555 A |
1175 | } |
1176 | ||
1177 | __CFSpinUnlock(&__CFUniCharPropTableLock); | |
1178 | ||
1179 | return (plane < __CFUniCharUnicodePropertyTable[propertyType]._numPlanes ? __CFUniCharUnicodePropertyTable[propertyType]._planes[plane] : NULL); | |
1180 | } | |
1181 | ||
1182 | __private_extern__ uint32_t CFUniCharGetNumberOfPlanesForUnicodePropertyData(uint32_t propertyType) { | |
1183 | (void)CFUniCharGetUnicodePropertyDataForPlane(propertyType, 0); | |
1184 | return __CFUniCharUnicodePropertyTable[propertyType]._numPlanes; | |
1185 | } | |
1186 | ||
1187 | __private_extern__ uint32_t CFUniCharGetUnicodeProperty(UTF32Char character, uint32_t propertyType) { | |
1188 | if (propertyType == kCFUniCharCombiningProperty) { | |
bd5b749c | 1189 | return CFUniCharGetCombiningPropertyForCharacter(character, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(propertyType, (character >> 16) & 0xFF)); |
9ce05555 | 1190 | } else if (propertyType == kCFUniCharBidiProperty) { |
bd5b749c | 1191 | return CFUniCharGetBidiPropertyForCharacter(character, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(propertyType, (character >> 16) & 0xFF)); |
9ce05555 A |
1192 | } else { |
1193 | return 0; | |
1194 | } | |
1195 | } | |
1196 | ||
1197 | ||
1198 | ||
1199 | /* | |
1200 | The UTF8 conversion in the following function is derived from ConvertUTF.c | |
1201 | */ | |
1202 | /* | |
1203 | * Copyright 2001 Unicode, Inc. | |
1204 | * | |
1205 | * Disclaimer | |
1206 | * | |
1207 | * This source code is provided as is by Unicode, Inc. No claims are | |
1208 | * made as to fitness for any particular purpose. No warranties of any | |
1209 | * kind are expressed or implied. The recipient agrees to determine | |
1210 | * applicability of information provided. If this file has been | |
1211 | * purchased on magnetic or optical media from Unicode, Inc., the | |
1212 | * sole remedy for any claim will be exchange of defective media | |
1213 | * within 90 days of receipt. | |
1214 | * | |
1215 | * Limitations on Rights to Redistribute This Code | |
1216 | * | |
1217 | * Unicode, Inc. hereby grants the right to freely use the information | |
1218 | * supplied in this file in the creation of products supporting the | |
1219 | * Unicode Standard, and to make copies of this file in any form | |
1220 | * for internal or external distribution as long as this notice | |
1221 | * remains attached. | |
1222 | */ | |
1223 | #define UNI_REPLACEMENT_CHAR (0x0000FFFDUL) | |
1224 | ||
bd5b749c | 1225 | bool CFUniCharFillDestinationBuffer(const UTF32Char *src, CFIndex srcLength, void **dst, CFIndex dstLength, CFIndex *filledLength, uint32_t dstFormat) { |
9ce05555 | 1226 | UTF32Char currentChar; |
bd5b749c | 1227 | CFIndex usedLength = *filledLength; |
9ce05555 A |
1228 | |
1229 | if (dstFormat == kCFUniCharUTF16Format) { | |
1230 | UTF16Char *dstBuffer = (UTF16Char *)*dst; | |
1231 | ||
1232 | while (srcLength-- > 0) { | |
1233 | currentChar = *(src++); | |
1234 | ||
1235 | if (currentChar > 0xFFFF) { // Non-BMP | |
1236 | usedLength += 2; | |
1237 | if (dstLength) { | |
1238 | if (usedLength > dstLength) return false; | |
1239 | currentChar -= 0x10000; | |
1240 | *(dstBuffer++) = (UTF16Char)((currentChar >> 10) + 0xD800UL); | |
1241 | *(dstBuffer++) = (UTF16Char)((currentChar & 0x3FF) + 0xDC00UL); | |
1242 | } | |
1243 | } else { | |
1244 | ++usedLength; | |
1245 | if (dstLength) { | |
1246 | if (usedLength > dstLength) return false; | |
1247 | *(dstBuffer++) = (UTF16Char)currentChar; | |
1248 | } | |
1249 | } | |
1250 | } | |
1251 | ||
1252 | *dst = dstBuffer; | |
1253 | } else if (dstFormat == kCFUniCharUTF8Format) { | |
1254 | uint8_t *dstBuffer = (uint8_t *)*dst; | |
1255 | uint16_t bytesToWrite = 0; | |
1256 | const UTF32Char byteMask = 0xBF; | |
1257 | const UTF32Char byteMark = 0x80; | |
1258 | static const uint8_t firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; | |
1259 | ||
1260 | while (srcLength-- > 0) { | |
1261 | currentChar = *(src++); | |
1262 | ||
1263 | /* Figure out how many bytes the result will require */ | |
1264 | if (currentChar < (UTF32Char)0x80) { | |
1265 | bytesToWrite = 1; | |
1266 | } else if (currentChar < (UTF32Char)0x800) { | |
1267 | bytesToWrite = 2; | |
1268 | } else if (currentChar < (UTF32Char)0x10000) { | |
1269 | bytesToWrite = 3; | |
1270 | } else if (currentChar < (UTF32Char)0x200000) { | |
1271 | bytesToWrite = 4; | |
1272 | } else { | |
1273 | bytesToWrite = 2; | |
1274 | currentChar = UNI_REPLACEMENT_CHAR; | |
1275 | } | |
1276 | ||
1277 | usedLength += bytesToWrite; | |
1278 | ||
1279 | if (dstLength) { | |
1280 | if (usedLength > dstLength) return false; | |
1281 | ||
1282 | dstBuffer += bytesToWrite; | |
1283 | switch (bytesToWrite) { /* note: everything falls through. */ | |
1284 | case 4: *--dstBuffer = (currentChar | byteMark) & byteMask; currentChar >>= 6; | |
1285 | case 3: *--dstBuffer = (currentChar | byteMark) & byteMask; currentChar >>= 6; | |
1286 | case 2: *--dstBuffer = (currentChar | byteMark) & byteMask; currentChar >>= 6; | |
1287 | case 1: *--dstBuffer = currentChar | firstByteMark[bytesToWrite]; | |
1288 | } | |
1289 | dstBuffer += bytesToWrite; | |
1290 | } | |
1291 | } | |
1292 | ||
1293 | *dst = dstBuffer; | |
1294 | } else { | |
1295 | UTF32Char *dstBuffer = (UTF32Char *)*dst; | |
1296 | ||
1297 | while (srcLength-- > 0) { | |
1298 | currentChar = *(src++); | |
1299 | ||
1300 | ++usedLength; | |
1301 | if (dstLength) { | |
1302 | if (usedLength > dstLength) return false; | |
1303 | *(dstBuffer++) = currentChar; | |
1304 | } | |
1305 | } | |
1306 | ||
1307 | *dst = dstBuffer; | |
1308 | } | |
1309 | ||
1310 | *filledLength = usedLength; | |
1311 | ||
1312 | return true; | |
1313 | } | |
d8925383 | 1314 | |
cf7d2af9 | 1315 | #if DEPLOYMENT_TARGET_WINDOWS |
d8925383 A |
1316 | void __CFUniCharCleanup(void) |
1317 | { | |
1318 | int idx; | |
1319 | ||
1320 | // cleanup memory allocated by __CFUniCharLoadBitmapData() | |
1321 | __CFSpinLock(&__CFUniCharBitmapLock); | |
1322 | ||
1323 | if (__CFUniCharBitmapDataArray != NULL) { | |
bd5b749c A |
1324 | for (idx = 0; idx < (int)__CFUniCharNumberOfBitmaps; idx++) { |
1325 | CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharBitmapDataArray[idx]._planes); | |
d8925383 A |
1326 | __CFUniCharBitmapDataArray[idx]._planes = NULL; |
1327 | } | |
1328 | ||
bd5b749c | 1329 | CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharBitmapDataArray); |
d8925383 A |
1330 | __CFUniCharBitmapDataArray = NULL; |
1331 | __CFUniCharNumberOfBitmaps = 0; | |
1332 | } | |
1333 | ||
1334 | __CFSpinUnlock(&__CFUniCharBitmapLock); | |
1335 | ||
1336 | // cleanup memory allocated by CFUniCharGetMappingData() | |
1337 | __CFSpinLock(&__CFUniCharMappingTableLock); | |
1338 | ||
1339 | if (__CFUniCharMappingTables != NULL) { | |
bd5b749c | 1340 | CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharMappingTables); |
d8925383 A |
1341 | __CFUniCharMappingTables = NULL; |
1342 | } | |
1343 | ||
1344 | // cleanup memory allocated by __CFUniCharLoadCaseMappingTable() | |
1345 | if (__CFUniCharCaseMappingTableCounts != NULL) { | |
bd5b749c | 1346 | CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharCaseMappingTableCounts); |
d8925383 A |
1347 | __CFUniCharCaseMappingTableCounts = NULL; |
1348 | ||
1349 | __CFUniCharCaseMappingTable = NULL; | |
1350 | __CFUniCharCaseMappingExtraTable = NULL; | |
1351 | } | |
1352 | ||
1353 | __CFSpinUnlock(&__CFUniCharMappingTableLock); | |
1354 | ||
1355 | // cleanup memory allocated by CFUniCharGetUnicodePropertyDataForPlane() | |
1356 | __CFSpinLock(&__CFUniCharPropTableLock); | |
1357 | ||
1358 | if (__CFUniCharUnicodePropertyTable != NULL) { | |
1359 | for (idx = 0; idx < __CFUniCharUnicodePropertyTableCount; idx++) { | |
bd5b749c | 1360 | CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharUnicodePropertyTable[idx]._planes); |
d8925383 A |
1361 | __CFUniCharUnicodePropertyTable[idx]._planes = NULL; |
1362 | } | |
1363 | ||
bd5b749c | 1364 | CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharUnicodePropertyTable); |
d8925383 A |
1365 | __CFUniCharUnicodePropertyTable = NULL; |
1366 | __CFUniCharUnicodePropertyTableCount = 0; | |
1367 | } | |
1368 | ||
1369 | __CFSpinUnlock(&__CFUniCharPropTableLock); | |
1370 | } | |
cf7d2af9 | 1371 | #endif |
d8925383 | 1372 | |
bd5b749c A |
1373 | #undef USE_MACHO_SEGMENT |
1374 |