]> git.saurik.com Git - apple/cf.git/blob - CFStringEncodingConverter.c
CF-635.19.tar.gz
[apple/cf.git] / CFStringEncodingConverter.c
1 /*
2 * Copyright (c) 2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /* CFStringEncodingConverter.c
25 Copyright (c) 1998-2011, Apple Inc. All rights reserved.
26 Responsibility: Aki Inoue
27 */
28
29 #include "CFInternal.h"
30 #include <CoreFoundation/CFArray.h>
31 #include <CoreFoundation/CFDictionary.h>
32 #include "CFICUConverters.h"
33 #include <CoreFoundation/CFUniChar.h>
34 #include <CoreFoundation/CFPriv.h>
35 #include "CFUnicodeDecomposition.h"
36 #include "CFStringEncodingConverterExt.h"
37 #include "CFStringEncodingConverterPriv.h"
38 #include <stdlib.h>
39 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
40 #include <pthread.h>
41 #endif
42
43 typedef CFIndex (*_CFToBytesProc)(const void *converter, uint32_t flags, const UniChar *characters, CFIndex numChars, uint8_t *bytes, CFIndex maxByteLen, CFIndex *usedByteLen);
44 typedef CFIndex (*_CFToUnicodeProc)(const void *converter, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen);
45
46 typedef struct {
47 const CFStringEncodingConverter *definition;
48 _CFToBytesProc toBytes;
49 _CFToUnicodeProc toUnicode;
50 _CFToUnicodeProc toCanonicalUnicode;
51 CFStringEncodingToBytesFallbackProc toBytesFallback;
52 CFStringEncodingToUnicodeFallbackProc toUnicodeFallback;
53 } _CFEncodingConverter;
54
55 /* Macros
56 */
57 #define TO_BYTE(conv,flags,chars,numChars,bytes,max,used) (conv->toBytes ? conv->toBytes(conv,flags,chars,numChars,bytes,max,used) : ((CFStringEncodingToBytesProc)conv->definition->toBytes)(flags,chars,numChars,bytes,max,used))
58 #define TO_UNICODE(conv,flags,bytes,numBytes,chars,max,used) (conv->toUnicode ? (flags & (kCFStringEncodingUseCanonical|kCFStringEncodingUseHFSPlusCanonical) ? conv->toCanonicalUnicode(conv,flags,bytes,numBytes,chars,max,used) : conv->toUnicode(conv,flags,bytes,numBytes,chars,max,used)) : ((CFStringEncodingToUnicodeProc)conv->definition->toUnicode)(flags,bytes,numBytes,chars,max,used))
59
60 #define ASCIINewLine 0x0a
61 #define kSurrogateHighStart 0xD800
62 #define kSurrogateHighEnd 0xDBFF
63 #define kSurrogateLowStart 0xDC00
64 #define kSurrogateLowEnd 0xDFFF
65
66 static const uint8_t __CFMaximumConvertedLength = 20;
67
68 /* Mapping 128..255 to lossy ASCII
69 */
70 static const struct {
71 unsigned char chars[4];
72 } _toLossyASCIITable[] = {
73 {{' ', 0, 0, 0}}, // NO-BREAK SPACE
74 {{'!', 0, 0, 0}}, // INVERTED EXCLAMATION MARK
75 {{'c', 0, 0, 0}}, // CENT SIGN
76 {{'L', 0, 0, 0}}, // POUND SIGN
77 {{'$', 0, 0, 0}}, // CURRENCY SIGN
78 {{'Y', 0, 0, 0}}, // YEN SIGN
79 {{'|', 0, 0, 0}}, // BROKEN BAR
80 {{0, 0, 0, 0}}, // SECTION SIGN
81 {{0, 0, 0, 0}}, // DIAERESIS
82 {{'(', 'C', ')', 0}}, // COPYRIGHT SIGN
83 {{'a', 0, 0, 0}}, // FEMININE ORDINAL INDICATOR
84 {{'<', '<', 0, 0}}, // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
85 {{0, 0, 0, 0}}, // NOT SIGN
86 {{'-', 0, 0, 0}}, // SOFT HYPHEN
87 {{'(', 'R', ')', 0}}, // REGISTERED SIGN
88 {{0, 0, 0, 0}}, // MACRON
89 {{0, 0, 0, 0}}, // DEGREE SIGN
90 {{'+', '-', 0, 0}}, // PLUS-MINUS SIGN
91 {{'2', 0, 0, 0}}, // SUPERSCRIPT TWO
92 {{'3', 0, 0, 0}}, // SUPERSCRIPT THREE
93 {{0, 0, 0, 0}}, // ACUTE ACCENT
94 {{0, 0, 0, 0}}, // MICRO SIGN
95 {{0, 0, 0, 0}}, // PILCROW SIGN
96 {{0, 0, 0, 0}}, // MIDDLE DOT
97 {{0, 0, 0, 0}}, // CEDILLA
98 {{'1', 0, 0, 0}}, // SUPERSCRIPT ONE
99 {{'o', 0, 0, 0}}, // MASCULINE ORDINAL INDICATOR
100 {{'>', '>', 0, 0}}, // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
101 {{'1', '/', '4', 0}}, // VULGAR FRACTION ONE QUARTER
102 {{'1', '/', '2', 0}}, // VULGAR FRACTION ONE HALF
103 {{'3', '/', '4', 0}}, // VULGAR FRACTION THREE QUARTERS
104 {{'?', 0, 0, 0}}, // INVERTED QUESTION MARK
105 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH GRAVE
106 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH ACUTE
107 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
108 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH TILDE
109 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH DIAERESIS
110 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH RING ABOVE
111 {{'A', 'E', 0, 0}}, // LATIN CAPITAL LETTER AE
112 {{'C', 0, 0, 0}}, // LATIN CAPITAL LETTER C WITH CEDILLA
113 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH GRAVE
114 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH ACUTE
115 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX
116 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH DIAERESIS
117 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH GRAVE
118 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH ACUTE
119 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
120 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH DIAERESIS
121 {{'T', 'H', 0, 0}}, // LATIN CAPITAL LETTER ETH (Icelandic)
122 {{'N', 0, 0, 0}}, // LATIN CAPITAL LETTER N WITH TILDE
123 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH GRAVE
124 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH ACUTE
125 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
126 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH TILDE
127 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH DIAERESIS
128 {{'X', 0, 0, 0}}, // MULTIPLICATION SIGN
129 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH STROKE
130 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH GRAVE
131 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH ACUTE
132 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH CIRCUMFLEX
133 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH DIAERESIS
134 {{'Y', 0, 0, 0}}, // LATIN CAPITAL LETTER Y WITH ACUTE
135 {{'t', 'h', 0, 0}}, // LATIN CAPITAL LETTER THORN (Icelandic)
136 {{'s', 0, 0, 0}}, // LATIN SMALL LETTER SHARP S (German)
137 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH GRAVE
138 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH ACUTE
139 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH CIRCUMFLEX
140 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH TILDE
141 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH DIAERESIS
142 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH RING ABOVE
143 {{'a', 'e', 0, 0}}, // LATIN SMALL LETTER AE
144 {{'c', 0, 0, 0}}, // LATIN SMALL LETTER C WITH CEDILLA
145 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH GRAVE
146 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH ACUTE
147 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH CIRCUMFLEX
148 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH DIAERESIS
149 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH GRAVE
150 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH ACUTE
151 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH CIRCUMFLEX
152 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH DIAERESIS
153 {{'T', 'H', 0, 0}}, // LATIN SMALL LETTER ETH (Icelandic)
154 {{'n', 0, 0, 0}}, // LATIN SMALL LETTER N WITH TILDE
155 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH GRAVE
156 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH ACUTE
157 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH CIRCUMFLEX
158 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH TILDE
159 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH DIAERESIS
160 {{'/', 0, 0, 0}}, // DIVISION SIGN
161 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH STROKE
162 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH GRAVE
163 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH ACUTE
164 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH CIRCUMFLEX
165 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH DIAERESIS
166 {{'y', 0, 0, 0}}, // LATIN SMALL LETTER Y WITH ACUTE
167 {{'t', 'h', 0, 0}}, // LATIN SMALL LETTER THORN (Icelandic)
168 {{'y', 0, 0, 0}}, // LATIN SMALL LETTER Y WITH DIAERESIS
169 };
170
171 CF_INLINE CFIndex __CFToASCIILatin1Fallback(UniChar character, uint8_t *bytes, CFIndex maxByteLen) {
172 const uint8_t *losChars = (const uint8_t*)_toLossyASCIITable + (character - 0xA0) * sizeof(uint8_t[4]);
173 CFIndex numBytes = 0;
174 CFIndex idx, max = (maxByteLen && (maxByteLen < 4) ? maxByteLen : 4);
175
176 for (idx = 0;idx < max;idx++) {
177 if (losChars[idx]) {
178 if (maxByteLen) bytes[idx] = losChars[idx];
179 ++numBytes;
180 } else {
181 break;
182 }
183 }
184
185 return numBytes;
186 }
187
188 static CFIndex __CFDefaultToBytesFallbackProc(const UniChar *characters, CFIndex numChars, uint8_t *bytes, CFIndex maxByteLen, CFIndex *usedByteLen) {
189 CFIndex processCharLen = 1, filledBytesLen = 1;
190 uint8_t byte = '?';
191
192 if (*characters < 0xA0) { // 0x80 to 0x9F maps to ASCII C0 range
193 byte = (uint8_t)(*characters - 0x80);
194 } else if (*characters < 0x100) {
195 *usedByteLen = __CFToASCIILatin1Fallback(*characters, bytes, maxByteLen);
196 return 1;
197 } else if (*characters >= kSurrogateHighStart && *characters <= kSurrogateLowEnd) {
198 processCharLen = (numChars > 1 && *characters <= kSurrogateLowStart && *(characters + 1) >= kSurrogateLowStart && *(characters + 1) <= kSurrogateLowEnd ? 2 : 1);
199 } else if (CFUniCharIsMemberOf(*characters, kCFUniCharWhitespaceCharacterSet)) {
200 byte = ' ';
201 } else if (CFUniCharIsMemberOf(*characters, kCFUniCharWhitespaceAndNewlineCharacterSet)) {
202 byte = ASCIINewLine;
203 } else if (*characters == 0x2026) { // ellipsis
204 if (0 == maxByteLen) {
205 filledBytesLen = 3;
206 } else if (maxByteLen > 2) {
207 memset(bytes, '.', 3);
208 *usedByteLen = 3;
209 return processCharLen;
210 }
211 } else if (CFUniCharIsMemberOf(*characters, kCFUniCharDecomposableCharacterSet)) {
212 UTF32Char decomposed[MAX_DECOMPOSED_LENGTH];
213
214 (void)CFUniCharDecomposeCharacter(*characters, decomposed, MAX_DECOMPOSED_LENGTH);
215 if (*decomposed < 0x80) {
216 byte = (uint8_t)(*decomposed);
217 } else {
218 UTF16Char theChar = *decomposed;
219
220 return __CFDefaultToBytesFallbackProc(&theChar, 1, bytes, maxByteLen, usedByteLen);
221 }
222 }
223
224 if (maxByteLen) *bytes = byte;
225 *usedByteLen = filledBytesLen;
226 return processCharLen;
227 }
228
229 static CFIndex __CFDefaultToUnicodeFallbackProc(const uint8_t *bytes, CFIndex numBytes, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
230 if (maxCharLen) *characters = (UniChar)'?';
231 *usedCharLen = 1;
232 return 1;
233 }
234
235 #define TO_BYTE_FALLBACK(conv,chars,numChars,bytes,max,used) (conv->toBytesFallback(chars,numChars,bytes,max,used))
236 #define TO_UNICODE_FALLBACK(conv,bytes,numBytes,chars,max,used) (conv->toUnicodeFallback(bytes,numBytes,chars,max,used))
237
238 #define EXTRA_BASE (0x0F00)
239
240 /* Wrapper funcs for non-standard converters
241 */
242 static CFIndex __CFToBytesCheapEightBitWrapper(const void *converter, uint32_t flags, const UniChar *characters, CFIndex numChars, uint8_t *bytes, CFIndex maxByteLen, CFIndex *usedByteLen) {
243 CFIndex processedCharLen = 0;
244 CFIndex length = (maxByteLen && (maxByteLen < numChars) ? maxByteLen : numChars);
245 uint8_t byte;
246
247 while (processedCharLen < length) {
248 if (!((CFStringEncodingCheapEightBitToBytesProc)((const _CFEncodingConverter*)converter)->definition->toBytes)(flags, characters[processedCharLen], &byte)) break;
249
250 if (maxByteLen) bytes[processedCharLen] = byte;
251 processedCharLen++;
252 }
253
254 *usedByteLen = processedCharLen;
255 return processedCharLen;
256 }
257
258 static CFIndex __CFToUnicodeCheapEightBitWrapper(const void *converter, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
259 CFIndex processedByteLen = 0;
260 CFIndex length = (maxCharLen && (maxCharLen < numBytes) ? maxCharLen : numBytes);
261 UniChar character;
262
263 while (processedByteLen < length) {
264 if (!((CFStringEncodingCheapEightBitToUnicodeProc)((const _CFEncodingConverter*)converter)->definition->toUnicode)(flags, bytes[processedByteLen], &character)) break;
265
266 if (maxCharLen) characters[processedByteLen] = character;
267 processedByteLen++;
268 }
269
270 *usedCharLen = processedByteLen;
271 return processedByteLen;
272 }
273
274 static CFIndex __CFToCanonicalUnicodeCheapEightBitWrapper(const void *converter, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
275 CFIndex processedByteLen = 0;
276 CFIndex theUsedCharLen = 0;
277 UTF32Char charBuffer[MAX_DECOMPOSED_LENGTH];
278 CFIndex usedLen;
279 UniChar character;
280 bool isHFSPlus = (flags & kCFStringEncodingUseHFSPlusCanonical ? true : false);
281
282 while ((processedByteLen < numBytes) && (!maxCharLen || (theUsedCharLen < maxCharLen))) {
283 if (!((CFStringEncodingCheapEightBitToUnicodeProc)((const _CFEncodingConverter*)converter)->definition->toUnicode)(flags, bytes[processedByteLen], &character)) break;
284
285 if (CFUniCharIsDecomposableCharacter(character, isHFSPlus)) {
286 CFIndex idx;
287
288 usedLen = CFUniCharDecomposeCharacter(character, charBuffer, MAX_DECOMPOSED_LENGTH);
289 *usedCharLen = theUsedCharLen;
290
291 for (idx = 0;idx < usedLen;idx++) {
292 if (charBuffer[idx] > 0xFFFF) { // Non-BMP
293 if (theUsedCharLen + 2 > maxCharLen) return processedByteLen;
294 theUsedCharLen += 2;
295 if (maxCharLen) {
296 charBuffer[idx] = charBuffer[idx] - 0x10000;
297 *(characters++) = (UniChar)(charBuffer[idx] >> 10) + 0xD800UL;
298 *(characters++) = (UniChar)(charBuffer[idx] & 0x3FF) + 0xDC00UL;
299 }
300 } else {
301 if (theUsedCharLen + 1 > maxCharLen) return processedByteLen;
302 ++theUsedCharLen;
303 *(characters++) = charBuffer[idx];
304 }
305 }
306 } else {
307 if (maxCharLen) *(characters++) = character;
308 ++theUsedCharLen;
309 }
310 processedByteLen++;
311 }
312
313 *usedCharLen = theUsedCharLen;
314 return processedByteLen;
315 }
316
317 static CFIndex __CFToBytesStandardEightBitWrapper(const void *converter, uint32_t flags, const UniChar *characters, CFIndex numChars, uint8_t *bytes, CFIndex maxByteLen, CFIndex *usedByteLen) {
318 CFIndex processedCharLen = 0;
319 uint8_t byte;
320 CFIndex usedLen;
321
322 *usedByteLen = 0;
323
324 while (numChars && (!maxByteLen || (*usedByteLen < maxByteLen))) {
325 if (!(usedLen = ((CFStringEncodingStandardEightBitToBytesProc)((const _CFEncodingConverter*)converter)->definition->toBytes)(flags, characters, numChars, &byte))) break;
326
327 if (maxByteLen) bytes[*usedByteLen] = byte;
328 (*usedByteLen)++;
329 characters += usedLen;
330 numChars -= usedLen;
331 processedCharLen += usedLen;
332 }
333
334 return processedCharLen;
335 }
336
337 static CFIndex __CFToUnicodeStandardEightBitWrapper(const void *converter, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
338 CFIndex processedByteLen = 0;
339 UniChar charBuffer[__CFMaximumConvertedLength];
340 CFIndex usedLen;
341
342 *usedCharLen = 0;
343
344 while ((processedByteLen < numBytes) && (!maxCharLen || (*usedCharLen < maxCharLen))) {
345 if (!(usedLen = ((CFStringEncodingCheapEightBitToUnicodeProc)((const _CFEncodingConverter*)converter)->definition->toUnicode)(flags, bytes[processedByteLen], charBuffer))) break;
346
347 if (maxCharLen) {
348 CFIndex idx;
349
350 if (*usedCharLen + usedLen > maxCharLen) break;
351
352 for (idx = 0;idx < usedLen;idx++) {
353 characters[*usedCharLen + idx] = charBuffer[idx];
354 }
355 }
356 *usedCharLen += usedLen;
357 processedByteLen++;
358 }
359
360 return processedByteLen;
361 }
362
363 static CFIndex __CFToCanonicalUnicodeStandardEightBitWrapper(const void *converter, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
364 CFIndex processedByteLen = 0;
365 UniChar charBuffer[__CFMaximumConvertedLength];
366 UTF32Char decompBuffer[MAX_DECOMPOSED_LENGTH];
367 CFIndex usedLen;
368 CFIndex decompedLen;
369 CFIndex idx, decompIndex;
370 bool isHFSPlus = (flags & kCFStringEncodingUseHFSPlusCanonical ? true : false);
371 CFIndex theUsedCharLen = 0;
372
373 while ((processedByteLen < numBytes) && (!maxCharLen || (theUsedCharLen < maxCharLen))) {
374 if (!(usedLen = ((CFStringEncodingCheapEightBitToUnicodeProc)((const _CFEncodingConverter*)converter)->definition->toUnicode)(flags, bytes[processedByteLen], charBuffer))) break;
375
376 for (idx = 0;idx < usedLen;idx++) {
377 if (CFUniCharIsDecomposableCharacter(charBuffer[idx], isHFSPlus)) {
378 decompedLen = CFUniCharDecomposeCharacter(charBuffer[idx], decompBuffer, MAX_DECOMPOSED_LENGTH);
379 *usedCharLen = theUsedCharLen;
380
381 for (decompIndex = 0;decompIndex < decompedLen;decompIndex++) {
382 if (decompBuffer[decompIndex] > 0xFFFF) { // Non-BMP
383 if (theUsedCharLen + 2 > maxCharLen) return processedByteLen;
384 theUsedCharLen += 2;
385 if (maxCharLen) {
386 charBuffer[idx] = charBuffer[idx] - 0x10000;
387 *(characters++) = (charBuffer[idx] >> 10) + 0xD800UL;
388 *(characters++) = (charBuffer[idx] & 0x3FF) + 0xDC00UL;
389 }
390 } else {
391 if (theUsedCharLen + 1 > maxCharLen) return processedByteLen;
392 ++theUsedCharLen;
393 *(characters++) = charBuffer[idx];
394 }
395 }
396 } else {
397 if (maxCharLen) *(characters++) = charBuffer[idx];
398 ++theUsedCharLen;
399 }
400 }
401 processedByteLen++;
402 }
403
404 *usedCharLen = theUsedCharLen;
405 return processedByteLen;
406 }
407
408 static CFIndex __CFToBytesCheapMultiByteWrapper(const void *converter, uint32_t flags, const UniChar *characters, CFIndex numChars, uint8_t *bytes, CFIndex maxByteLen, CFIndex *usedByteLen) {
409 CFIndex processedCharLen = 0;
410 uint8_t byteBuffer[__CFMaximumConvertedLength];
411 CFIndex usedLen;
412
413 *usedByteLen = 0;
414
415 while ((processedCharLen < numChars) && (!maxByteLen || (*usedByteLen < maxByteLen))) {
416 if (!(usedLen = ((CFStringEncodingCheapMultiByteToBytesProc)((const _CFEncodingConverter*)converter)->definition->toBytes)(flags, characters[processedCharLen], byteBuffer))) break;
417
418 if (maxByteLen) {
419 CFIndex idx;
420
421 if (*usedByteLen + usedLen > maxByteLen) break;
422
423 for (idx = 0;idx <usedLen;idx++) {
424 bytes[*usedByteLen + idx] = byteBuffer[idx];
425 }
426 }
427
428 *usedByteLen += usedLen;
429 processedCharLen++;
430 }
431
432 return processedCharLen;
433 }
434
435 static CFIndex __CFToUnicodeCheapMultiByteWrapper(const void *converter, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
436 CFIndex processedByteLen = 0;
437 UniChar character;
438 CFIndex usedLen;
439
440 *usedCharLen = 0;
441
442 while (numBytes && (!maxCharLen || (*usedCharLen < maxCharLen))) {
443 if (!(usedLen = ((CFStringEncodingCheapMultiByteToUnicodeProc)((const _CFEncodingConverter*)converter)->definition->toUnicode)(flags, bytes, numBytes, &character))) break;
444
445 if (maxCharLen) *(characters++) = character;
446 (*usedCharLen)++;
447 processedByteLen += usedLen;
448 bytes += usedLen;
449 numBytes -= usedLen;
450 }
451
452 return processedByteLen;
453 }
454
455 static CFIndex __CFToCanonicalUnicodeCheapMultiByteWrapper(const void *converter, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
456 CFIndex processedByteLen = 0;
457 UTF32Char charBuffer[MAX_DECOMPOSED_LENGTH];
458 UniChar character;
459 CFIndex usedLen;
460 CFIndex decomposedLen;
461 CFIndex theUsedCharLen = 0;
462 bool isHFSPlus = (flags & kCFStringEncodingUseHFSPlusCanonical ? true : false);
463
464 while (numBytes && (!maxCharLen || (theUsedCharLen < maxCharLen))) {
465 if (!(usedLen = ((CFStringEncodingCheapMultiByteToUnicodeProc)((const _CFEncodingConverter*)converter)->definition->toUnicode)(flags, bytes, numBytes, &character))) break;
466
467 if (CFUniCharIsDecomposableCharacter(character, isHFSPlus)) {
468 CFIndex idx;
469
470 decomposedLen = CFUniCharDecomposeCharacter(character, charBuffer, MAX_DECOMPOSED_LENGTH);
471 *usedCharLen = theUsedCharLen;
472
473 for (idx = 0;idx < decomposedLen;idx++) {
474 if (charBuffer[idx] > 0xFFFF) { // Non-BMP
475 if (theUsedCharLen + 2 > maxCharLen) return processedByteLen;
476 theUsedCharLen += 2;
477 if (maxCharLen) {
478 charBuffer[idx] = charBuffer[idx] - 0x10000;
479 *(characters++) = (UniChar)(charBuffer[idx] >> 10) + 0xD800UL;
480 *(characters++) = (UniChar)(charBuffer[idx] & 0x3FF) + 0xDC00UL;
481 }
482 } else {
483 if (theUsedCharLen + 1 > maxCharLen) return processedByteLen;
484 ++theUsedCharLen;
485 *(characters++) = charBuffer[idx];
486 }
487 }
488 } else {
489 if (maxCharLen) *(characters++) = character;
490 ++theUsedCharLen;
491 }
492
493 processedByteLen += usedLen;
494 bytes += usedLen;
495 numBytes -= usedLen;
496 }
497 *usedCharLen = theUsedCharLen;
498 return processedByteLen;
499 }
500
501 /* static functions
502 */
503 CF_INLINE _CFEncodingConverter *__CFEncodingConverterFromDefinition(const CFStringEncodingConverter *definition, CFStringEncoding encoding) {
504 #define NUM_OF_ENTRIES_CYCLE (10)
505 static uint32_t _currentIndex = 0;
506 static uint32_t _allocatedSize = 0;
507 static _CFEncodingConverter *_allocatedEntries = NULL;
508 _CFEncodingConverter *converter;
509
510
511 if ((_currentIndex + 1) >= _allocatedSize) {
512 _currentIndex = 0;
513 _allocatedSize = 0;
514 _allocatedEntries = NULL;
515 }
516 if (_allocatedEntries == NULL) { // Not allocated yet
517 _allocatedEntries = (_CFEncodingConverter *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(_CFEncodingConverter) * NUM_OF_ENTRIES_CYCLE, 0);
518 _allocatedSize = NUM_OF_ENTRIES_CYCLE;
519 converter = &(_allocatedEntries[_currentIndex]);
520 } else {
521 converter = &(_allocatedEntries[++_currentIndex]);
522 }
523
524 memset(converter, 0, sizeof(_CFEncodingConverter));
525
526 converter->definition = definition;
527
528 switch (definition->encodingClass) {
529 case kCFStringEncodingConverterStandard:
530 converter->toBytes = NULL;
531 converter->toUnicode = NULL;
532 converter->toCanonicalUnicode = NULL;
533 break;
534
535 case kCFStringEncodingConverterCheapEightBit:
536 converter->toBytes = __CFToBytesCheapEightBitWrapper;
537 converter->toUnicode = __CFToUnicodeCheapEightBitWrapper;
538 converter->toCanonicalUnicode = __CFToCanonicalUnicodeCheapEightBitWrapper;
539 break;
540
541 case kCFStringEncodingConverterStandardEightBit:
542 converter->toBytes = __CFToBytesStandardEightBitWrapper;
543 converter->toUnicode = __CFToUnicodeStandardEightBitWrapper;
544 converter->toCanonicalUnicode = __CFToCanonicalUnicodeStandardEightBitWrapper;
545 break;
546
547 case kCFStringEncodingConverterCheapMultiByte:
548 converter->toBytes = __CFToBytesCheapMultiByteWrapper;
549 converter->toUnicode = __CFToUnicodeCheapMultiByteWrapper;
550 converter->toCanonicalUnicode = __CFToCanonicalUnicodeCheapMultiByteWrapper;
551 break;
552
553 case kCFStringEncodingConverterICU:
554 converter->toBytes = (_CFToBytesProc)__CFStringEncodingGetICUName(encoding);
555 break;
556
557 case kCFStringEncodingConverterPlatformSpecific:
558 break;
559
560 default: // Shouln't be here
561 return NULL;
562 }
563
564 converter->toBytesFallback = (definition->toBytesFallback ? definition->toBytesFallback : __CFDefaultToBytesFallbackProc);
565 converter->toUnicodeFallback = (definition->toUnicodeFallback ? definition->toUnicodeFallback : __CFDefaultToUnicodeFallbackProc);
566
567 return converter;
568 }
569
570 CF_INLINE const CFStringEncodingConverter *__CFStringEncodingConverterGetDefinition(CFStringEncoding encoding) {
571 switch (encoding) {
572 case kCFStringEncodingUTF8:
573 return &__CFConverterUTF8;
574
575 case kCFStringEncodingMacRoman:
576 return &__CFConverterMacRoman;
577
578 case kCFStringEncodingWindowsLatin1:
579 return &__CFConverterWinLatin1;
580
581 case kCFStringEncodingASCII:
582 return &__CFConverterASCII;
583
584 case kCFStringEncodingISOLatin1:
585 return &__CFConverterISOLatin1;
586
587
588 case kCFStringEncodingNextStepLatin:
589 return &__CFConverterNextStepLatin;
590
591
592 default:
593 return __CFStringEncodingGetExternalConverter(encoding);
594 }
595 }
596
597 static const _CFEncodingConverter *__CFGetConverter(uint32_t encoding) {
598 const _CFEncodingConverter *converter = NULL;
599 const _CFEncodingConverter **commonConverterSlot = NULL;
600 static _CFEncodingConverter *commonConverters[3] = {NULL, NULL, NULL}; // UTF8, MacRoman/WinLatin1, and the default encoding*
601 static CFMutableDictionaryRef mappingTable = NULL;
602 static CFSpinLock_t lock = CFSpinLockInit;
603
604 switch (encoding) {
605 case kCFStringEncodingUTF8: commonConverterSlot = (const _CFEncodingConverter **)&(commonConverters[0]); break;
606
607 /* the swith here should avoid possible bootstrap issues in the default: case below when invoked from CFStringGetSystemEncoding() */
608 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_LINUX
609 case kCFStringEncodingMacRoman: commonConverterSlot = (const _CFEncodingConverter **)&(commonConverters[1]); break;
610 #elif DEPLOYMENT_TARGET_WINDOWS
611 case kCFStringEncodingWindowsLatin1: commonConverterSlot = (const _CFEncodingConverter **)(&(commonConverters[1])); break;
612 #else
613 #warning This case must match __defaultEncoding value defined in CFString.c
614 case kCFStringEncodingISOLatin1: commonConverterSlot = (const _CFEncodingConverter **)(&(commonConverters[1])); break;
615 #endif /* DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED */
616
617 default: if (CFStringGetSystemEncoding() == encoding) commonConverterSlot = (const _CFEncodingConverter **)&(commonConverters[2]); break;
618 }
619
620 __CFSpinLock(&lock);
621 converter = ((NULL == commonConverterSlot) ? ((NULL == mappingTable) ? NULL : (const _CFEncodingConverter *)CFDictionaryGetValue(mappingTable, (const void *)(uintptr_t)encoding)) : *commonConverterSlot);
622 __CFSpinUnlock(&lock);
623
624 if (NULL == converter) {
625 const CFStringEncodingConverter *definition = __CFStringEncodingConverterGetDefinition(encoding);
626
627 if (NULL != definition) {
628 __CFSpinLock(&lock);
629 converter = ((NULL == commonConverterSlot) ? ((NULL == mappingTable) ? NULL : (const _CFEncodingConverter *)CFDictionaryGetValue(mappingTable, (const void *)(uintptr_t)encoding)) : *commonConverterSlot);
630
631 if (NULL == converter) {
632 converter = __CFEncodingConverterFromDefinition(definition, encoding);
633
634 if (NULL == commonConverterSlot) {
635 if (NULL == mappingTable) mappingTable = CFDictionaryCreateMutable(NULL, 0, NULL, NULL);
636
637 CFDictionarySetValue(mappingTable, (const void *)(uintptr_t)encoding, converter);
638 } else {
639 *commonConverterSlot = converter;
640 }
641 }
642 __CFSpinUnlock(&lock);
643 }
644 }
645
646 return converter;
647 }
648
649 /* Public API
650 */
651 uint32_t CFStringEncodingUnicodeToBytes(uint32_t encoding, uint32_t flags, const UniChar *characters, CFIndex numChars, CFIndex *usedCharLen, uint8_t *bytes, CFIndex maxByteLen, CFIndex *usedByteLen) {
652 if (encoding == kCFStringEncodingUTF8) {
653 static CFStringEncodingToBytesProc __CFToUTF8 = NULL;
654 CFIndex convertedCharLen;
655 CFIndex usedLen;
656
657
658 if ((flags & kCFStringEncodingUseCanonical) || (flags & kCFStringEncodingUseHFSPlusCanonical)) {
659 (void)CFUniCharDecompose(characters, numChars, &convertedCharLen, (void *)bytes, maxByteLen, &usedLen, true, kCFUniCharUTF8Format, (flags & kCFStringEncodingUseHFSPlusCanonical ? true : false));
660 } else {
661 if (!__CFToUTF8) {
662 const CFStringEncodingConverter *utf8Converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
663 __CFToUTF8 = (CFStringEncodingToBytesProc)utf8Converter->toBytes;
664 }
665 convertedCharLen = __CFToUTF8(0, characters, numChars, bytes, maxByteLen, &usedLen);
666 }
667 if (usedCharLen) *usedCharLen = convertedCharLen;
668 if (usedByteLen) *usedByteLen = usedLen;
669
670 if (convertedCharLen == numChars) {
671 return kCFStringEncodingConversionSuccess;
672 } else if ((maxByteLen > 0) && ((maxByteLen - usedLen) < 10)) { // could be filled outbuf
673 UTF16Char character = characters[convertedCharLen];
674
675 if (((character >= kSurrogateLowStart) && (character <= kSurrogateLowEnd)) || ((character >= kSurrogateHighStart) && (character <= kSurrogateHighEnd) && ((1 == (numChars - convertedCharLen)) || (characters[convertedCharLen + 1] < kSurrogateLowStart) || (characters[convertedCharLen + 1] > kSurrogateLowEnd)))) return kCFStringEncodingInvalidInputStream;
676
677 return kCFStringEncodingInsufficientOutputBufferLength;
678 } else {
679 return kCFStringEncodingInvalidInputStream;
680 }
681 } else {
682 const _CFEncodingConverter *converter = __CFGetConverter(encoding);
683 CFIndex usedLen = 0;
684 CFIndex localUsedByteLen;
685 CFIndex theUsedByteLen = 0;
686 uint32_t theResult = kCFStringEncodingConversionSuccess;
687 CFStringEncodingToBytesPrecomposeProc toBytesPrecompose = NULL;
688 CFStringEncodingIsValidCombiningCharacterProc isValidCombiningChar = NULL;
689
690 if (!converter) return kCFStringEncodingConverterUnavailable;
691
692 if (flags & kCFStringEncodingSubstituteCombinings) {
693 if (!(flags & kCFStringEncodingAllowLossyConversion)) isValidCombiningChar = converter->definition->isValidCombiningChar;
694 } else {
695 isValidCombiningChar = converter->definition->isValidCombiningChar;
696 if (!(flags & kCFStringEncodingIgnoreCombinings)) {
697 toBytesPrecompose = converter->definition->toBytesPrecompose;
698 flags |= kCFStringEncodingComposeCombinings;
699 }
700 }
701
702 if (kCFStringEncodingConverterICU == converter->definition->encodingClass) return __CFStringEncodingICUToBytes((const char *)converter->toBytes, flags, characters, numChars, usedCharLen, bytes, maxByteLen, usedByteLen);
703
704 /* Platform converter */
705 if (kCFStringEncodingConverterPlatformSpecific == converter->definition->encodingClass) return __CFStringEncodingPlatformUnicodeToBytes(encoding, flags, characters, numChars, usedCharLen, bytes, maxByteLen, usedByteLen);
706
707 while ((usedLen < numChars) && (!maxByteLen || (theUsedByteLen < maxByteLen))) {
708 if ((usedLen += TO_BYTE(converter, flags, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen)) < numChars) {
709 CFIndex dummy;
710
711 if (isValidCombiningChar && (usedLen > 0) && isValidCombiningChar(characters[usedLen])) {
712 if (toBytesPrecompose) {
713 CFIndex localUsedLen = usedLen;
714
715 while (isValidCombiningChar(characters[--usedLen]));
716 theUsedByteLen += localUsedByteLen;
717 if (converter->definition->maxBytesPerChar > 1) {
718 TO_BYTE(converter, flags, characters + usedLen, localUsedLen - usedLen, NULL, 0, &localUsedByteLen);
719 theUsedByteLen -= localUsedByteLen;
720 } else {
721 theUsedByteLen--;
722 }
723 if ((localUsedLen = toBytesPrecompose(flags, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen)) > 0) {
724 usedLen += localUsedLen;
725 if ((usedLen < numChars) && isValidCombiningChar(characters[usedLen])) { // There is a non-base char not combined remaining
726 theUsedByteLen += localUsedByteLen;
727 theResult = kCFStringEncodingInvalidInputStream;
728 break;
729 }
730 } else if (flags & kCFStringEncodingAllowLossyConversion) {
731 uint8_t lossyByte = CFStringEncodingMaskToLossyByte(flags);
732
733 if (lossyByte) {
734 while (isValidCombiningChar(characters[++usedLen]));
735 localUsedByteLen = 1;
736 if (maxByteLen) *(bytes + theUsedByteLen) = lossyByte;
737 } else {
738 ++usedLen;
739 usedLen += TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen);
740 }
741 } else {
742 theResult = kCFStringEncodingInvalidInputStream;
743 break;
744 }
745 } else if (maxByteLen && ((maxByteLen == theUsedByteLen + localUsedByteLen) || TO_BYTE(converter, flags, characters + usedLen, numChars - usedLen, NULL, 0, &dummy))) { // buffer was filled up
746 theUsedByteLen += localUsedByteLen;
747 theResult = kCFStringEncodingInsufficientOutputBufferLength;
748 break;
749 } else if (flags & kCFStringEncodingIgnoreCombinings) {
750 while ((++usedLen < numChars) && isValidCombiningChar(characters[usedLen]));
751 } else {
752 uint8_t lossyByte = CFStringEncodingMaskToLossyByte(flags);
753
754 theUsedByteLen += localUsedByteLen;
755 if (lossyByte) {
756 ++usedLen;
757 localUsedByteLen = 1;
758 if (maxByteLen) *(bytes + theUsedByteLen) = lossyByte;
759 } else {
760 usedLen += TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen);
761 }
762 }
763 } else if (maxByteLen && ((maxByteLen == theUsedByteLen + localUsedByteLen) || TO_BYTE(converter, flags, characters + usedLen, numChars - usedLen, NULL, 0, &dummy))) { // buffer was filled up
764 theUsedByteLen += localUsedByteLen;
765
766 if (flags & kCFStringEncodingAllowLossyConversion && !CFStringEncodingMaskToLossyByte(flags)) {
767 CFIndex localUsedLen;
768
769 localUsedByteLen = 0;
770 while ((usedLen < numChars) && !localUsedByteLen && (localUsedLen = TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, NULL, 0, &localUsedByteLen))) usedLen += localUsedLen;
771 }
772 if (usedLen < numChars) theResult = kCFStringEncodingInsufficientOutputBufferLength;
773 break;
774 } else if (flags & kCFStringEncodingAllowLossyConversion) {
775 uint8_t lossyByte = CFStringEncodingMaskToLossyByte(flags);
776
777 theUsedByteLen += localUsedByteLen;
778 if (lossyByte) {
779 ++usedLen;
780 localUsedByteLen = 1;
781 if (maxByteLen) *(bytes + theUsedByteLen) = lossyByte;
782 } else {
783 usedLen += TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen);
784 }
785 } else {
786 theUsedByteLen += localUsedByteLen;
787 theResult = kCFStringEncodingInvalidInputStream;
788 break;
789 }
790 }
791 theUsedByteLen += localUsedByteLen;
792 }
793
794 if (usedLen < numChars && maxByteLen && theResult == kCFStringEncodingConversionSuccess) {
795 if (flags & kCFStringEncodingAllowLossyConversion && !CFStringEncodingMaskToLossyByte(flags)) {
796 CFIndex localUsedLen;
797
798 localUsedByteLen = 0;
799 while ((usedLen < numChars) && !localUsedByteLen && (localUsedLen = TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, NULL, 0, &localUsedByteLen))) usedLen += localUsedLen;
800 }
801 if (usedLen < numChars) theResult = kCFStringEncodingInsufficientOutputBufferLength;
802 }
803 if (usedByteLen) *usedByteLen = theUsedByteLen;
804 if (usedCharLen) *usedCharLen = usedLen;
805
806 return theResult;
807 }
808 }
809
810 uint32_t CFStringEncodingBytesToUnicode(uint32_t encoding, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, CFIndex *usedByteLen, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
811 const _CFEncodingConverter *converter = __CFGetConverter(encoding);
812 CFIndex usedLen = 0;
813 CFIndex theUsedCharLen = 0;
814 CFIndex localUsedCharLen;
815 uint32_t theResult = kCFStringEncodingConversionSuccess;
816
817 if (!converter) return kCFStringEncodingConverterUnavailable;
818
819 if (kCFStringEncodingConverterICU == converter->definition->encodingClass) return __CFStringEncodingICUToUnicode((const char *)converter->toBytes, flags, bytes, numBytes, usedByteLen, characters, maxCharLen, usedCharLen);
820
821 /* Platform converter */
822 if (kCFStringEncodingConverterPlatformSpecific == converter->definition->encodingClass) return __CFStringEncodingPlatformBytesToUnicode(encoding, flags, bytes, numBytes, usedByteLen, characters, maxCharLen, usedCharLen);
823
824 while ((usedLen < numBytes) && (!maxCharLen || (theUsedCharLen < maxCharLen))) {
825 if ((usedLen += TO_UNICODE(converter, flags, bytes + usedLen, numBytes - usedLen, characters + theUsedCharLen, (maxCharLen ? maxCharLen - theUsedCharLen : 0), &localUsedCharLen)) < numBytes) {
826 CFIndex tempUsedCharLen;
827
828 if (maxCharLen && ((maxCharLen == theUsedCharLen + localUsedCharLen) || (((flags & (kCFStringEncodingUseCanonical|kCFStringEncodingUseHFSPlusCanonical)) || (maxCharLen == theUsedCharLen + localUsedCharLen + 1)) && TO_UNICODE(converter, flags, bytes + usedLen, numBytes - usedLen, NULL, 0, &tempUsedCharLen)))) { // buffer was filled up
829 theUsedCharLen += localUsedCharLen;
830 theResult = kCFStringEncodingInsufficientOutputBufferLength;
831 break;
832 } else if (flags & kCFStringEncodingAllowLossyConversion) {
833 theUsedCharLen += localUsedCharLen;
834 usedLen += TO_UNICODE_FALLBACK(converter, bytes + usedLen, numBytes - usedLen, characters + theUsedCharLen, (maxCharLen ? maxCharLen - theUsedCharLen : 0), &localUsedCharLen);
835 } else {
836 theUsedCharLen += localUsedCharLen;
837 theResult = kCFStringEncodingInvalidInputStream;
838 break;
839 }
840 }
841 theUsedCharLen += localUsedCharLen;
842 }
843
844 if (usedLen < numBytes && maxCharLen && theResult == kCFStringEncodingConversionSuccess) {
845 theResult = kCFStringEncodingInsufficientOutputBufferLength;
846 }
847 if (usedCharLen) *usedCharLen = theUsedCharLen;
848 if (usedByteLen) *usedByteLen = usedLen;
849
850 return theResult;
851 }
852
853 __private_extern__ bool CFStringEncodingIsValidEncoding(uint32_t encoding) {
854 return (CFStringEncodingGetConverter(encoding) ? true : false);
855 }
856
857 __private_extern__ CFIndex CFStringEncodingCharLengthForBytes(uint32_t encoding, uint32_t flags, const uint8_t *bytes, CFIndex numBytes) {
858 const _CFEncodingConverter *converter = __CFGetConverter(encoding);
859
860 if (converter) {
861 if (kCFStringEncodingConverterICU == converter->definition->encodingClass) return __CFStringEncodingICUCharLength((const char *)converter->toBytes, flags, bytes, numBytes);
862
863 if (kCFStringEncodingConverterPlatformSpecific == converter->definition->encodingClass) return __CFStringEncodingPlatformCharLengthForBytes(encoding, flags, bytes, numBytes);
864
865 if (1 == converter->definition->maxBytesPerChar) return numBytes;
866
867 if (NULL == converter->definition->toUnicodeLen) {
868 CFIndex usedByteLen = 0;
869 CFIndex totalLength = 0;
870 CFIndex usedCharLen;
871
872 while (numBytes > 0) {
873 usedByteLen = TO_UNICODE(converter, flags, bytes, numBytes, NULL, 0, &usedCharLen);
874
875 bytes += usedByteLen;
876 numBytes -= usedByteLen;
877 totalLength += usedCharLen;
878
879 if (numBytes > 0) {
880 if (0 == (flags & kCFStringEncodingAllowLossyConversion)) return 0;
881
882 usedByteLen = TO_UNICODE_FALLBACK(converter, bytes, numBytes, NULL, 0, &usedCharLen);
883
884 bytes += usedByteLen;
885 numBytes -= usedByteLen;
886 totalLength += usedCharLen;
887 }
888 }
889
890 return totalLength;
891 } else {
892 return converter->definition->toUnicodeLen(flags, bytes, numBytes);
893 }
894 }
895
896 return 0;
897 }
898
899 __private_extern__ CFIndex CFStringEncodingByteLengthForCharacters(uint32_t encoding, uint32_t flags, const UniChar *characters, CFIndex numChars) {
900 const _CFEncodingConverter *converter = __CFGetConverter(encoding);
901
902 if (converter) {
903 if (kCFStringEncodingConverterICU == converter->definition->encodingClass) return __CFStringEncodingICUByteLength((const char *)converter->toBytes, flags, characters, numChars);
904
905 if (kCFStringEncodingConverterPlatformSpecific == converter->definition->encodingClass) return __CFStringEncodingPlatformByteLengthForCharacters(encoding, flags, characters, numChars);
906
907 if (1 == converter->definition->maxBytesPerChar) return numChars;
908
909 if (NULL == converter->definition->toBytesLen) {
910 CFIndex usedByteLen;
911
912 return ((kCFStringEncodingConversionSuccess == CFStringEncodingUnicodeToBytes(encoding, flags, characters, numChars, NULL, NULL, 0, &usedByteLen)) ? usedByteLen : 0);
913 } else {
914 return converter->definition->toBytesLen(flags, characters, numChars);
915 }
916 }
917
918 return 0;
919 }
920
921 __private_extern__ void CFStringEncodingRegisterFallbackProcedures(uint32_t encoding, CFStringEncodingToBytesFallbackProc toBytes, CFStringEncodingToUnicodeFallbackProc toUnicode) {
922 _CFEncodingConverter *converter = (_CFEncodingConverter *)__CFGetConverter(encoding);
923
924 if (NULL != converter) {
925 const CFStringEncodingConverter *body = CFStringEncodingGetConverter(encoding);
926
927 converter->toBytesFallback = ((NULL == toBytes) ? ((NULL == body) ? __CFDefaultToBytesFallbackProc : body->toBytesFallback) : toBytes);
928 converter->toUnicodeFallback = ((NULL == toUnicode) ? ((NULL == body) ? __CFDefaultToUnicodeFallbackProc : body->toUnicodeFallback) : toUnicode);
929 }
930 }
931
932 __private_extern__ const CFStringEncodingConverter *CFStringEncodingGetConverter(uint32_t encoding) {
933 const _CFEncodingConverter *converter = __CFGetConverter(encoding);
934
935 return ((NULL == converter) ? NULL : converter->definition);
936 }
937
938 static const CFStringEncoding __CFBuiltinEncodings[] = {
939 kCFStringEncodingMacRoman,
940 kCFStringEncodingWindowsLatin1,
941 kCFStringEncodingISOLatin1,
942 kCFStringEncodingNextStepLatin,
943 kCFStringEncodingASCII,
944 kCFStringEncodingUTF8,
945 /* These seven are available only in CFString-level */
946 kCFStringEncodingNonLossyASCII,
947
948 kCFStringEncodingUTF16,
949 kCFStringEncodingUTF16BE,
950 kCFStringEncodingUTF16LE,
951
952 kCFStringEncodingUTF32,
953 kCFStringEncodingUTF32BE,
954 kCFStringEncodingUTF32LE,
955
956 kCFStringEncodingInvalidId,
957 };
958
959 static CFComparisonResult __CFStringEncodingComparator(const void *v1, const void *v2, void *context) {
960 CFComparisonResult val1 = (*(const CFStringEncoding *)v1) & 0xFFFF;
961 CFComparisonResult val2 = (*(const CFStringEncoding *)v2) & 0xFFFF;
962
963 return ((val1 == val2) ? ((CFComparisonResult)(*(const CFStringEncoding *)v1) - (CFComparisonResult)(*(const CFStringEncoding *)v2)) : val1 - val2);
964 }
965
966 static void __CFStringEncodingFliterDupes(CFStringEncoding *encodings, CFIndex numSlots) {
967 CFStringEncoding last = kCFStringEncodingInvalidId;
968 const CFStringEncoding *limitEncodings = encodings + numSlots;
969
970 while (encodings < limitEncodings) {
971 if (last == *encodings) {
972 if ((encodings + 1) < limitEncodings) memmove(encodings, encodings + 1, sizeof(CFStringEncoding) * (limitEncodings - encodings - 1));
973 --limitEncodings;
974 } else {
975 last = *(encodings++);
976 }
977 }
978 }
979
980 __private_extern__ const CFStringEncoding *CFStringEncodingListOfAvailableEncodings(void) {
981 static const CFStringEncoding *encodings = NULL;
982
983 if (NULL == encodings) {
984 CFStringEncoding *list = (CFStringEncoding *)__CFBuiltinEncodings;
985 CFIndex numICUConverters = 0, numPlatformConverters = 0;
986 CFStringEncoding *icuConverters = __CFStringEncodingCreateICUEncodings(NULL, &numICUConverters);
987 CFStringEncoding *platformConverters = __CFStringEncodingCreateListOfAvailablePlatformConverters(NULL, &numPlatformConverters);
988
989 if ((NULL != icuConverters) || (NULL != platformConverters)) {
990 CFIndex numSlots = (sizeof(__CFBuiltinEncodings) / sizeof(*__CFBuiltinEncodings)) + numICUConverters + numPlatformConverters;
991
992 list = (CFStringEncoding *)CFAllocatorAllocate(NULL, sizeof(CFStringEncoding) * numSlots, 0);
993
994 memcpy(list, __CFBuiltinEncodings, sizeof(__CFBuiltinEncodings));
995
996 if (NULL != icuConverters) {
997 memcpy(list + (sizeof(__CFBuiltinEncodings) / sizeof(*__CFBuiltinEncodings)), icuConverters, sizeof(CFStringEncoding) * numICUConverters);
998 CFAllocatorDeallocate(NULL, icuConverters);
999 }
1000
1001 if (NULL != platformConverters) {
1002 memcpy(list + (sizeof(__CFBuiltinEncodings) / sizeof(*__CFBuiltinEncodings)) + numICUConverters, platformConverters, sizeof(CFStringEncoding) * numPlatformConverters);
1003 CFAllocatorDeallocate(NULL, platformConverters);
1004 }
1005
1006 CFQSortArray(list, numSlots, sizeof(CFStringEncoding), (CFComparatorFunction)__CFStringEncodingComparator, NULL);
1007 __CFStringEncodingFliterDupes(list, numSlots);
1008 }
1009 if (!OSAtomicCompareAndSwapPtrBarrier(NULL, list, (void * volatile *)&encodings) && (list != __CFBuiltinEncodings)) CFAllocatorDeallocate(NULL, list);
1010 }
1011
1012 return encodings;
1013 }
1014
1015 #undef TO_BYTE
1016 #undef TO_UNICODE
1017 #undef ASCIINewLine
1018 #undef kSurrogateHighStart
1019 #undef kSurrogateHighEnd
1020 #undef kSurrogateLowStart
1021 #undef kSurrogateLowEnd
1022 #undef TO_BYTE_FALLBACK
1023 #undef TO_UNICODE_FALLBACK
1024 #undef EXTRA_BASE
1025 #undef NUM_OF_ENTRIES_CYCLE
1026