]> git.saurik.com Git - apple/cf.git/blame - CFStringEncodingConverter.c
CF-476.19.tar.gz
[apple/cf.git] / CFStringEncodingConverter.c
CommitLineData
9ce05555 1/*
bd5b749c 2 * Copyright (c) 2008 Apple Inc. All rights reserved.
9ce05555
A
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
9ce05555
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23/* CFStringEncodingConverter.c
24 Copyright 1998-2002, Apple, Inc. All rights reserved.
25 Responsibility: Aki Inoue
26*/
27
28#include "CFInternal.h"
29#include <CoreFoundation/CFArray.h>
30#include <CoreFoundation/CFDictionary.h>
31#include "CFUniChar.h"
bd5b749c 32#include "CFPriv.h"
9ce05555
A
33#include "CFUnicodeDecomposition.h"
34#include "CFStringEncodingConverterExt.h"
35#include "CFStringEncodingConverterPriv.h"
36#include <stdlib.h>
bd5b749c 37#if !defined(__WIN32__)
9ce05555
A
38#include <pthread.h>
39#endif
9ce05555
A
40
41
42/* Macros
43*/
44#define TO_BYTE(conv,flags,chars,numChars,bytes,max,used) (conv->_toBytes ? conv->toBytes(conv,flags,chars,numChars,bytes,max,used) : ((CFStringEncodingToBytesProc)conv->toBytes)(flags,chars,numChars,bytes,max,used))
45#define TO_UNICODE(conv,flags,bytes,numBytes,chars,max,used) (conv->_toUnicode ? (flags & (kCFStringEncodingUseCanonical|kCFStringEncodingUseHFSPlusCanonical) ? conv->toCanonicalUnicode(conv,flags,bytes,numBytes,chars,max,used) : conv->toUnicode(conv,flags,bytes,numBytes,chars,max,used)) : ((CFStringEncodingToUnicodeProc)conv->toUnicode)(flags,bytes,numBytes,chars,max,used))
46
9ce05555
A
47#define ASCIINewLine 0x0a
48#define kSurrogateHighStart 0xD800
49#define kSurrogateHighEnd 0xDBFF
50#define kSurrogateLowStart 0xDC00
51#define kSurrogateLowEnd 0xDFFF
52
53/* Mapping 128..255 to lossy ASCII
54*/
55static const struct {
56 unsigned char chars[4];
57} _toLossyASCIITable[] = {
58 {{' ', 0, 0, 0}}, // NO-BREAK SPACE
59 {{'!', 0, 0, 0}}, // INVERTED EXCLAMATION MARK
60 {{'c', 0, 0, 0}}, // CENT SIGN
61 {{'L', 0, 0, 0}}, // POUND SIGN
62 {{'$', 0, 0, 0}}, // CURRENCY SIGN
63 {{'Y', 0, 0, 0}}, // YEN SIGN
64 {{'|', 0, 0, 0}}, // BROKEN BAR
65 {{0, 0, 0, 0}}, // SECTION SIGN
66 {{0, 0, 0, 0}}, // DIAERESIS
67 {{'(', 'C', ')', 0}}, // COPYRIGHT SIGN
68 {{'a', 0, 0, 0}}, // FEMININE ORDINAL INDICATOR
69 {{'<', '<', 0, 0}}, // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
70 {{0, 0, 0, 0}}, // NOT SIGN
71 {{'-', 0, 0, 0}}, // SOFT HYPHEN
72 {{'(', 'R', ')', 0}}, // REGISTERED SIGN
73 {{0, 0, 0, 0}}, // MACRON
74 {{0, 0, 0, 0}}, // DEGREE SIGN
75 {{'+', '-', 0, 0}}, // PLUS-MINUS SIGN
76 {{'2', 0, 0, 0}}, // SUPERSCRIPT TWO
77 {{'3', 0, 0, 0}}, // SUPERSCRIPT THREE
78 {{0, 0, 0, 0}}, // ACUTE ACCENT
79 {{0, 0, 0, 0}}, // MICRO SIGN
80 {{0, 0, 0, 0}}, // PILCROW SIGN
81 {{0, 0, 0, 0}}, // MIDDLE DOT
82 {{0, 0, 0, 0}}, // CEDILLA
83 {{'1', 0, 0, 0}}, // SUPERSCRIPT ONE
84 {{'o', 0, 0, 0}}, // MASCULINE ORDINAL INDICATOR
85 {{'>', '>', 0, 0}}, // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
86 {{'1', '/', '4', 0}}, // VULGAR FRACTION ONE QUARTER
87 {{'1', '/', '2', 0}}, // VULGAR FRACTION ONE HALF
88 {{'3', '/', '4', 0}}, // VULGAR FRACTION THREE QUARTERS
89 {{'?', 0, 0, 0}}, // INVERTED QUESTION MARK
90 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH GRAVE
91 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH ACUTE
92 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
93 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH TILDE
94 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH DIAERESIS
95 {{'A', 0, 0, 0}}, // LATIN CAPITAL LETTER A WITH RING ABOVE
96 {{'A', 'E', 0, 0}}, // LATIN CAPITAL LETTER AE
97 {{'C', 0, 0, 0}}, // LATIN CAPITAL LETTER C WITH CEDILLA
98 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH GRAVE
99 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH ACUTE
100 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX
101 {{'E', 0, 0, 0}}, // LATIN CAPITAL LETTER E WITH DIAERESIS
102 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH GRAVE
103 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH ACUTE
104 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
105 {{'I', 0, 0, 0}}, // LATIN CAPITAL LETTER I WITH DIAERESIS
106 {{'T', 'H', 0, 0}}, // LATIN CAPITAL LETTER ETH (Icelandic)
107 {{'N', 0, 0, 0}}, // LATIN CAPITAL LETTER N WITH TILDE
108 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH GRAVE
109 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH ACUTE
110 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
111 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH TILDE
112 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH DIAERESIS
113 {{'X', 0, 0, 0}}, // MULTIPLICATION SIGN
114 {{'O', 0, 0, 0}}, // LATIN CAPITAL LETTER O WITH STROKE
115 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH GRAVE
116 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH ACUTE
117 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH CIRCUMFLEX
118 {{'U', 0, 0, 0}}, // LATIN CAPITAL LETTER U WITH DIAERESIS
119 {{'Y', 0, 0, 0}}, // LATIN CAPITAL LETTER Y WITH ACUTE
120 {{'t', 'h', 0, 0}}, // LATIN CAPITAL LETTER THORN (Icelandic)
121 {{'s', 0, 0, 0}}, // LATIN SMALL LETTER SHARP S (German)
122 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH GRAVE
123 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH ACUTE
124 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH CIRCUMFLEX
125 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH TILDE
126 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH DIAERESIS
127 {{'a', 0, 0, 0}}, // LATIN SMALL LETTER A WITH RING ABOVE
128 {{'a', 'e', 0, 0}}, // LATIN SMALL LETTER AE
129 {{'c', 0, 0, 0}}, // LATIN SMALL LETTER C WITH CEDILLA
130 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH GRAVE
131 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH ACUTE
132 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH CIRCUMFLEX
133 {{'e', 0, 0, 0}}, // LATIN SMALL LETTER E WITH DIAERESIS
134 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH GRAVE
135 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH ACUTE
136 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH CIRCUMFLEX
137 {{'i', 0, 0, 0}}, // LATIN SMALL LETTER I WITH DIAERESIS
138 {{'T', 'H', 0, 0}}, // LATIN SMALL LETTER ETH (Icelandic)
139 {{'n', 0, 0, 0}}, // LATIN SMALL LETTER N WITH TILDE
140 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH GRAVE
141 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH ACUTE
142 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH CIRCUMFLEX
143 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH TILDE
144 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH DIAERESIS
145 {{'/', 0, 0, 0}}, // DIVISION SIGN
146 {{'o', 0, 0, 0}}, // LATIN SMALL LETTER O WITH STROKE
147 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH GRAVE
148 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH ACUTE
149 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH CIRCUMFLEX
150 {{'u', 0, 0, 0}}, // LATIN SMALL LETTER U WITH DIAERESIS
151 {{'y', 0, 0, 0}}, // LATIN SMALL LETTER Y WITH ACUTE
152 {{'t', 'h', 0, 0}}, // LATIN SMALL LETTER THORN (Icelandic)
153 {{'y', 0, 0, 0}}, // LATIN SMALL LETTER Y WITH DIAERESIS
154};
155
bd5b749c
A
156CF_INLINE CFIndex __CFToASCIILatin1Fallback(UniChar character, uint8_t *bytes, CFIndex maxByteLen) {
157 const uint8_t *losChars = (const uint8_t*)_toLossyASCIITable + (character - 0xA0) * sizeof(uint8_t[4]);
158 CFIndex numBytes = 0;
159 CFIndex idx, max = (maxByteLen && (maxByteLen < 4) ? maxByteLen : 4);
9ce05555
A
160
161 for (idx = 0;idx < max;idx++) {
162 if (losChars[idx]) {
163 if (maxByteLen) bytes[idx] = losChars[idx];
164 ++numBytes;
165 } else {
166 break;
167 }
168 }
169
170 return numBytes;
171}
172
bd5b749c
A
173static CFIndex __CFDefaultToBytesFallbackProc(const UniChar *characters, CFIndex numChars, uint8_t *bytes, CFIndex maxByteLen, CFIndex *usedByteLen) {
174 CFIndex processCharLen = 1, filledBytesLen = 1;
175 uint8_t byte = '?';
176
9ce05555 177 if (*characters < 0xA0) { // 0x80 to 0x9F maps to ASCII C0 range
bd5b749c 178 byte = (uint8_t)(*characters - 0x80);
9ce05555
A
179 } else if (*characters < 0x100) {
180 *usedByteLen = __CFToASCIILatin1Fallback(*characters, bytes, maxByteLen);
181 return 1;
182 } else if (*characters >= kSurrogateHighStart && *characters <= kSurrogateLowEnd) {
bd5b749c 183 processCharLen = (numChars > 1 && *characters <= kSurrogateLowStart && *(characters + 1) >= kSurrogateLowStart && *(characters + 1) <= kSurrogateLowEnd ? 2 : 1);
9ce05555 184 } else if (CFUniCharIsMemberOf(*characters, kCFUniCharWhitespaceCharacterSet)) {
bd5b749c 185 byte = ' ';
9ce05555 186 } else if (CFUniCharIsMemberOf(*characters, kCFUniCharWhitespaceAndNewlineCharacterSet)) {
bd5b749c
A
187 byte = ASCIINewLine;
188 } else if (*characters == 0x2026) { // ellipsis
189 if (0 == maxByteLen) {
190 filledBytesLen = 3;
191 } else if (maxByteLen > 2) {
192 memset(bytes, '.', 3);
193 *usedByteLen = 3;
194 return processCharLen;
195 }
9ce05555
A
196 } else if (CFUniCharIsMemberOf(*characters, kCFUniCharDecomposableCharacterSet)) {
197 UTF32Char decomposed[MAX_DECOMPOSED_LENGTH];
198
199 (void)CFUniCharDecomposeCharacter(*characters, decomposed, MAX_DECOMPOSED_LENGTH);
200 if (*decomposed < 0x80) {
bd5b749c 201 byte = (uint8_t)(*decomposed);
9ce05555
A
202 } else {
203 UTF16Char theChar = *decomposed;
204
205 return __CFDefaultToBytesFallbackProc(&theChar, 1, bytes, maxByteLen, usedByteLen);
206 }
9ce05555 207 }
bd5b749c
A
208
209 if (maxByteLen) *bytes = byte;
210 *usedByteLen = filledBytesLen;
211 return processCharLen;
9ce05555
A
212}
213
bd5b749c 214static CFIndex __CFDefaultToUnicodeFallbackProc(const uint8_t *bytes, CFIndex numBytes, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
9ce05555
A
215 if (maxCharLen) *characters = (UniChar)'?';
216 *usedCharLen = 1;
217 return 1;
218}
219
220#define TO_BYTE_FALLBACK(conv,chars,numChars,bytes,max,used) (conv->toBytesFallback(chars,numChars,bytes,max,used))
221#define TO_UNICODE_FALLBACK(conv,bytes,numBytes,chars,max,used) (conv->toUnicodeFallback(bytes,numBytes,chars,max,used))
222
223#define EXTRA_BASE (0x0F00)
224
225/* Wrapper funcs for non-standard converters
226*/
bd5b749c
A
227static CFIndex __CFToBytesCheapEightBitWrapper(const void *converter, uint32_t flags, const UniChar *characters, CFIndex numChars, uint8_t *bytes, CFIndex maxByteLen, CFIndex *usedByteLen) {
228 CFIndex processedCharLen = 0;
229 CFIndex length = (maxByteLen && (maxByteLen < numChars) ? maxByteLen : numChars);
9ce05555
A
230 uint8_t byte;
231
232 while (processedCharLen < length) {
233 if (!((CFStringEncodingCheapEightBitToBytesProc)((const _CFEncodingConverter*)converter)->_toBytes)(flags, characters[processedCharLen], &byte)) break;
234
235 if (maxByteLen) bytes[processedCharLen] = byte;
236 processedCharLen++;
237 }
238
239 *usedByteLen = processedCharLen;
240 return processedCharLen;
241}
242
bd5b749c
A
243static CFIndex __CFToUnicodeCheapEightBitWrapper(const void *converter, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
244 CFIndex processedByteLen = 0;
245 CFIndex length = (maxCharLen && (maxCharLen < numBytes) ? maxCharLen : numBytes);
9ce05555
A
246 UniChar character;
247
248 while (processedByteLen < length) {
249 if (!((CFStringEncodingCheapEightBitToUnicodeProc)((const _CFEncodingConverter*)converter)->_toUnicode)(flags, bytes[processedByteLen], &character)) break;
250
251 if (maxCharLen) characters[processedByteLen] = character;
252 processedByteLen++;
253 }
254
255 *usedCharLen = processedByteLen;
256 return processedByteLen;
257}
258
bd5b749c
A
259static CFIndex __CFToCanonicalUnicodeCheapEightBitWrapper(const void *converter, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
260 CFIndex processedByteLen = 0;
261 CFIndex theUsedCharLen = 0;
9ce05555 262 UTF32Char charBuffer[MAX_DECOMPOSED_LENGTH];
bd5b749c 263 CFIndex usedLen;
9ce05555
A
264 UniChar character;
265 bool isHFSPlus = (flags & kCFStringEncodingUseHFSPlusCanonical ? true : false);
266
267 while ((processedByteLen < numBytes) && (!maxCharLen || (theUsedCharLen < maxCharLen))) {
268 if (!((CFStringEncodingCheapEightBitToUnicodeProc)((const _CFEncodingConverter*)converter)->_toUnicode)(flags, bytes[processedByteLen], &character)) break;
269
270 if (CFUniCharIsDecomposableCharacter(character, isHFSPlus)) {
bd5b749c 271 CFIndex idx;
9ce05555
A
272
273 usedLen = CFUniCharDecomposeCharacter(character, charBuffer, MAX_DECOMPOSED_LENGTH);
274 *usedCharLen = theUsedCharLen;
275
276 for (idx = 0;idx < usedLen;idx++) {
277 if (charBuffer[idx] > 0xFFFF) { // Non-BMP
278 if (theUsedCharLen + 2 > maxCharLen) return processedByteLen;
279 theUsedCharLen += 2;
280 if (maxCharLen) {
281 charBuffer[idx] = charBuffer[idx] - 0x10000;
bd5b749c
A
282 *(characters++) = (UniChar)(charBuffer[idx] >> 10) + 0xD800UL;
283 *(characters++) = (UniChar)(charBuffer[idx] & 0x3FF) + 0xDC00UL;
9ce05555
A
284 }
285 } else {
286 if (theUsedCharLen + 1 > maxCharLen) return processedByteLen;
287 ++theUsedCharLen;
288 *(characters++) = charBuffer[idx];
289 }
290 }
291 } else {
292 if (maxCharLen) *(characters++) = character;
293 ++theUsedCharLen;
294 }
295 processedByteLen++;
296 }
297
298 *usedCharLen = theUsedCharLen;
299 return processedByteLen;
300}
301
bd5b749c
A
302static CFIndex __CFToBytesStandardEightBitWrapper(const void *converter, uint32_t flags, const UniChar *characters, CFIndex numChars, uint8_t *bytes, CFIndex maxByteLen, CFIndex *usedByteLen) {
303 CFIndex processedCharLen = 0;
9ce05555 304 uint8_t byte;
bd5b749c 305 CFIndex usedLen;
9ce05555
A
306
307 *usedByteLen = 0;
308
309 while (numChars && (!maxByteLen || (*usedByteLen < maxByteLen))) {
310 if (!(usedLen = ((CFStringEncodingStandardEightBitToBytesProc)((const _CFEncodingConverter*)converter)->_toBytes)(flags, characters, numChars, &byte))) break;
311
312 if (maxByteLen) bytes[*usedByteLen] = byte;
313 (*usedByteLen)++;
314 characters += usedLen;
315 numChars -= usedLen;
316 processedCharLen += usedLen;
317 }
318
319 return processedCharLen;
320}
321
bd5b749c
A
322static CFIndex __CFToUnicodeStandardEightBitWrapper(const void *converter, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
323 CFIndex processedByteLen = 0;
324#if 0 || 0
9ce05555
A
325 UniChar charBuffer[20]; // Dynamic stack allocation is GNU specific
326#else
327 UniChar charBuffer[((const _CFEncodingConverter*)converter)->maxLen];
328#endif
bd5b749c 329 CFIndex usedLen;
9ce05555
A
330
331 *usedCharLen = 0;
332
333 while ((processedByteLen < numBytes) && (!maxCharLen || (*usedCharLen < maxCharLen))) {
334 if (!(usedLen = ((CFStringEncodingCheapEightBitToUnicodeProc)((const _CFEncodingConverter*)converter)->_toUnicode)(flags, bytes[processedByteLen], charBuffer))) break;
335
336 if (maxCharLen) {
bd5b749c 337 CFIndex idx;
9ce05555
A
338
339 if (*usedCharLen + usedLen > maxCharLen) break;
340
341 for (idx = 0;idx < usedLen;idx++) {
342 characters[*usedCharLen + idx] = charBuffer[idx];
343 }
344 }
345 *usedCharLen += usedLen;
346 processedByteLen++;
347 }
348
349 return processedByteLen;
350}
351
bd5b749c
A
352static CFIndex __CFToCanonicalUnicodeStandardEightBitWrapper(const void *converter, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
353 CFIndex processedByteLen = 0;
354#if 0 || 0
9ce05555
A
355 UniChar charBuffer[20]; // Dynamic stack allocation is GNU specific
356#else
357 UniChar charBuffer[((const _CFEncodingConverter*)converter)->maxLen];
358#endif
359 UTF32Char decompBuffer[MAX_DECOMPOSED_LENGTH];
bd5b749c
A
360 CFIndex usedLen;
361 CFIndex decompedLen;
362 CFIndex idx, decompIndex;
9ce05555 363 bool isHFSPlus = (flags & kCFStringEncodingUseHFSPlusCanonical ? true : false);
bd5b749c 364 CFIndex theUsedCharLen = 0;
9ce05555
A
365
366 while ((processedByteLen < numBytes) && (!maxCharLen || (theUsedCharLen < maxCharLen))) {
367 if (!(usedLen = ((CFStringEncodingCheapEightBitToUnicodeProc)((const _CFEncodingConverter*)converter)->_toUnicode)(flags, bytes[processedByteLen], charBuffer))) break;
368
369 for (idx = 0;idx < usedLen;idx++) {
370 if (CFUniCharIsDecomposableCharacter(charBuffer[idx], isHFSPlus)) {
371 decompedLen = CFUniCharDecomposeCharacter(charBuffer[idx], decompBuffer, MAX_DECOMPOSED_LENGTH);
372 *usedCharLen = theUsedCharLen;
373
374 for (decompIndex = 0;decompIndex < decompedLen;decompIndex++) {
375 if (decompBuffer[decompIndex] > 0xFFFF) { // Non-BMP
376 if (theUsedCharLen + 2 > maxCharLen) return processedByteLen;
377 theUsedCharLen += 2;
378 if (maxCharLen) {
379 charBuffer[idx] = charBuffer[idx] - 0x10000;
380 *(characters++) = (charBuffer[idx] >> 10) + 0xD800UL;
381 *(characters++) = (charBuffer[idx] & 0x3FF) + 0xDC00UL;
382 }
383 } else {
384 if (theUsedCharLen + 1 > maxCharLen) return processedByteLen;
385 ++theUsedCharLen;
386 *(characters++) = charBuffer[idx];
387 }
388 }
389 } else {
390 if (maxCharLen) *(characters++) = charBuffer[idx];
391 ++theUsedCharLen;
392 }
393 }
394 processedByteLen++;
395 }
396
397 *usedCharLen = theUsedCharLen;
398 return processedByteLen;
399}
400
bd5b749c
A
401static CFIndex __CFToBytesCheapMultiByteWrapper(const void *converter, uint32_t flags, const UniChar *characters, CFIndex numChars, uint8_t *bytes, CFIndex maxByteLen, CFIndex *usedByteLen) {
402 CFIndex processedCharLen = 0;
403#if 0 || 0
9ce05555
A
404 uint8_t byteBuffer[20]; // Dynamic stack allocation is GNU specific
405#else
406 uint8_t byteBuffer[((const _CFEncodingConverter*)converter)->maxLen];
407#endif
bd5b749c 408 CFIndex usedLen;
9ce05555
A
409
410 *usedByteLen = 0;
411
412 while ((processedCharLen < numChars) && (!maxByteLen || (*usedByteLen < maxByteLen))) {
413 if (!(usedLen = ((CFStringEncodingCheapMultiByteToBytesProc)((const _CFEncodingConverter*)converter)->_toBytes)(flags, characters[processedCharLen], byteBuffer))) break;
414
415 if (maxByteLen) {
bd5b749c 416 CFIndex idx;
9ce05555
A
417
418 if (*usedByteLen + usedLen > maxByteLen) break;
419
420 for (idx = 0;idx <usedLen;idx++) {
421 bytes[*usedByteLen + idx] = byteBuffer[idx];
422 }
423 }
424
425 *usedByteLen += usedLen;
426 processedCharLen++;
427 }
428
429 return processedCharLen;
430}
431
bd5b749c
A
432static CFIndex __CFToUnicodeCheapMultiByteWrapper(const void *converter, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
433 CFIndex processedByteLen = 0;
9ce05555 434 UniChar character;
bd5b749c 435 CFIndex usedLen;
9ce05555
A
436
437 *usedCharLen = 0;
438
439 while (numBytes && (!maxCharLen || (*usedCharLen < maxCharLen))) {
440 if (!(usedLen = ((CFStringEncodingCheapMultiByteToUnicodeProc)((const _CFEncodingConverter*)converter)->_toUnicode)(flags, bytes, numBytes, &character))) break;
441
442 if (maxCharLen) *(characters++) = character;
443 (*usedCharLen)++;
444 processedByteLen += usedLen;
445 bytes += usedLen;
446 numBytes -= usedLen;
447 }
448
449 return processedByteLen;
450}
451
bd5b749c
A
452static CFIndex __CFToCanonicalUnicodeCheapMultiByteWrapper(const void *converter, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
453 CFIndex processedByteLen = 0;
9ce05555
A
454 UTF32Char charBuffer[MAX_DECOMPOSED_LENGTH];
455 UniChar character;
bd5b749c
A
456 CFIndex usedLen;
457 CFIndex decomposedLen;
458 CFIndex theUsedCharLen = 0;
9ce05555
A
459 bool isHFSPlus = (flags & kCFStringEncodingUseHFSPlusCanonical ? true : false);
460
461 while (numBytes && (!maxCharLen || (theUsedCharLen < maxCharLen))) {
462 if (!(usedLen = ((CFStringEncodingCheapMultiByteToUnicodeProc)((const _CFEncodingConverter*)converter)->_toUnicode)(flags, bytes, numBytes, &character))) break;
463
464 if (CFUniCharIsDecomposableCharacter(character, isHFSPlus)) {
bd5b749c 465 CFIndex idx;
9ce05555
A
466
467 decomposedLen = CFUniCharDecomposeCharacter(character, charBuffer, MAX_DECOMPOSED_LENGTH);
468 *usedCharLen = theUsedCharLen;
469
470 for (idx = 0;idx < decomposedLen;idx++) {
471 if (charBuffer[idx] > 0xFFFF) { // Non-BMP
472 if (theUsedCharLen + 2 > maxCharLen) return processedByteLen;
473 theUsedCharLen += 2;
474 if (maxCharLen) {
475 charBuffer[idx] = charBuffer[idx] - 0x10000;
bd5b749c
A
476 *(characters++) = (UniChar)(charBuffer[idx] >> 10) + 0xD800UL;
477 *(characters++) = (UniChar)(charBuffer[idx] & 0x3FF) + 0xDC00UL;
9ce05555
A
478 }
479 } else {
480 if (theUsedCharLen + 1 > maxCharLen) return processedByteLen;
481 ++theUsedCharLen;
482 *(characters++) = charBuffer[idx];
483 }
484 }
485 } else {
486 if (maxCharLen) *(characters++) = character;
487 ++theUsedCharLen;
488 }
489
490 processedByteLen += usedLen;
491 bytes += usedLen;
492 numBytes -= usedLen;
493 }
494 *usedCharLen = theUsedCharLen;
495 return processedByteLen;
496}
497
498/* static functions
499*/
500static _CFConverterEntry __CFConverterEntryASCII = {
501 kCFStringEncodingASCII, NULL,
502 "Western (ASCII)", {"us-ascii", "ascii", "iso-646-us", NULL}, NULL, NULL, NULL, NULL,
503 kCFStringEncodingMacRoman // We use string encoding's script range here
504};
505
506static _CFConverterEntry __CFConverterEntryISOLatin1 = {
507 kCFStringEncodingISOLatin1, NULL,
508 "Western (ISO Latin 1)", {"iso-8859-1", "latin1","iso-latin-1", NULL}, NULL, NULL, NULL, NULL,
509 kCFStringEncodingMacRoman // We use string encoding's script range here
510};
511
512static _CFConverterEntry __CFConverterEntryMacRoman = {
513 kCFStringEncodingMacRoman, NULL,
514 "Western (Mac OS Roman)", {"macintosh", "mac", "x-mac-roman", NULL}, NULL, NULL, NULL, NULL,
515 kCFStringEncodingMacRoman // We use string encoding's script range here
516};
517
518static _CFConverterEntry __CFConverterEntryWinLatin1 = {
519 kCFStringEncodingWindowsLatin1, NULL,
520 "Western (Windows Latin 1)", {"windows-1252", "cp1252", "windows latin1", NULL}, NULL, NULL, NULL, NULL,
521 kCFStringEncodingMacRoman // We use string encoding's script range here
522};
523
524static _CFConverterEntry __CFConverterEntryNextStepLatin = {
525 kCFStringEncodingNextStepLatin, NULL,
526 "Western (NextStep)", {"x-nextstep", NULL, NULL, NULL}, NULL, NULL, NULL, NULL,
527 kCFStringEncodingMacRoman // We use string encoding's script range here
528};
529
530static _CFConverterEntry __CFConverterEntryUTF8 = {
531 kCFStringEncodingUTF8, NULL,
532 "UTF-8", {"utf-8", "unicode-1-1-utf8", NULL, NULL}, NULL, NULL, NULL, NULL,
533 kCFStringEncodingUnicode // We use string encoding's script range here
534};
535
bd5b749c 536CF_INLINE _CFConverterEntry *__CFStringEncodingConverterGetEntry(uint32_t encoding) {
9ce05555
A
537 switch (encoding) {
538 case kCFStringEncodingInvalidId:
539 case kCFStringEncodingASCII:
540 return &__CFConverterEntryASCII;
541
542 case kCFStringEncodingISOLatin1:
543 return &__CFConverterEntryISOLatin1;
544
545 case kCFStringEncodingMacRoman:
546 return &__CFConverterEntryMacRoman;
547
548 case kCFStringEncodingWindowsLatin1:
549 return &__CFConverterEntryWinLatin1;
550
551 case kCFStringEncodingNextStepLatin:
552 return &__CFConverterEntryNextStepLatin;
553
554 case kCFStringEncodingUTF8:
555 return &__CFConverterEntryUTF8;
556
d8925383
A
557 default: {
558 return NULL;
559 }
9ce05555
A
560 }
561}
562
563CF_INLINE _CFEncodingConverter *__CFEncodingConverterFromDefinition(const CFStringEncodingConverter *definition) {
564#define NUM_OF_ENTRIES_CYCLE (10)
bd5b749c
A
565 static CFSpinLock_t _indexLock = CFSpinLockInit;
566 static uint32_t _currentIndex = 0;
567 static uint32_t _allocatedSize = 0;
9ce05555
A
568 static _CFEncodingConverter *_allocatedEntries = NULL;
569 _CFEncodingConverter *converter;
570
571
572 __CFSpinLock(&_indexLock);
573 if ((_currentIndex + 1) >= _allocatedSize) {
574 _currentIndex = 0;
575 _allocatedSize = 0;
576 _allocatedEntries = NULL;
577 }
578 if (_allocatedEntries == NULL) { // Not allocated yet
bd5b749c 579 _allocatedEntries = (_CFEncodingConverter *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(_CFEncodingConverter) * NUM_OF_ENTRIES_CYCLE, 0);
9ce05555
A
580 _allocatedSize = NUM_OF_ENTRIES_CYCLE;
581 converter = &(_allocatedEntries[_currentIndex]);
582 } else {
583 converter = &(_allocatedEntries[++_currentIndex]);
584 }
585 __CFSpinUnlock(&_indexLock);
586
587 switch (definition->encodingClass) {
588 case kCFStringEncodingConverterStandard:
bd5b749c
A
589 converter->toBytes = (_CFToBytesProc)definition->toBytes;
590 converter->toUnicode = (_CFToUnicodeProc)definition->toUnicode;
591 converter->toCanonicalUnicode = (_CFToUnicodeProc)definition->toUnicode;
9ce05555
A
592 converter->_toBytes = NULL;
593 converter->_toUnicode = NULL;
594 converter->maxLen = 2;
595 break;
596
597 case kCFStringEncodingConverterCheapEightBit:
598 converter->toBytes = __CFToBytesCheapEightBitWrapper;
599 converter->toUnicode = __CFToUnicodeCheapEightBitWrapper;
600 converter->toCanonicalUnicode = __CFToCanonicalUnicodeCheapEightBitWrapper;
601 converter->_toBytes = definition->toBytes;
602 converter->_toUnicode = definition->toUnicode;
603 converter->maxLen = 1;
604 break;
605
606 case kCFStringEncodingConverterStandardEightBit:
607 converter->toBytes = __CFToBytesStandardEightBitWrapper;
608 converter->toUnicode = __CFToUnicodeStandardEightBitWrapper;
609 converter->toCanonicalUnicode = __CFToCanonicalUnicodeStandardEightBitWrapper;
610 converter->_toBytes = definition->toBytes;
611 converter->_toUnicode = definition->toUnicode;
612 converter->maxLen = definition->maxDecomposedCharLen;
613 break;
614
615 case kCFStringEncodingConverterCheapMultiByte:
616 converter->toBytes = __CFToBytesCheapMultiByteWrapper;
617 converter->toUnicode = __CFToUnicodeCheapMultiByteWrapper;
618 converter->toCanonicalUnicode = __CFToCanonicalUnicodeCheapMultiByteWrapper;
619 converter->_toBytes = definition->toBytes;
620 converter->_toUnicode = definition->toUnicode;
621 converter->maxLen = definition->maxBytesPerChar;
622 break;
623
624 case kCFStringEncodingConverterPlatformSpecific:
625 converter->toBytes = NULL;
626 converter->toUnicode = NULL;
627 converter->toCanonicalUnicode = NULL;
628 converter->_toBytes = NULL;
629 converter->_toUnicode = NULL;
630 converter->maxLen = 0;
631 converter->toBytesLen = NULL;
632 converter->toUnicodeLen = NULL;
633 converter->toBytesFallback = NULL;
634 converter->toUnicodeFallback = NULL;
635 converter->toBytesPrecompose = NULL;
636 converter->isValidCombiningChar = NULL;
637 return converter;
638
639 default: // Shouln't be here
640 return NULL;
641 }
642
bd5b749c
A
643 converter->toBytesLen = (definition->toBytesLen ? definition->toBytesLen : (CFStringEncodingToBytesLenProc)(uintptr_t)definition->maxBytesPerChar);
644 converter->toUnicodeLen = (definition->toUnicodeLen ? definition->toUnicodeLen : (CFStringEncodingToUnicodeLenProc)(uintptr_t)definition->maxDecomposedCharLen);
9ce05555
A
645 converter->toBytesFallback = (definition->toBytesFallback ? definition->toBytesFallback : __CFDefaultToBytesFallbackProc);
646 converter->toUnicodeFallback = (definition->toUnicodeFallback ? definition->toUnicodeFallback : __CFDefaultToUnicodeFallbackProc);
647 converter->toBytesPrecompose = (definition->toBytesPrecompose ? definition->toBytesPrecompose : NULL);
648 converter->isValidCombiningChar = (definition->isValidCombiningChar ? definition->isValidCombiningChar : NULL);
649
650 return converter;
651}
652
653CF_INLINE const CFStringEncodingConverter *__CFStringEncodingConverterGetDefinition(_CFConverterEntry *entry) {
654 if (!entry) return NULL;
655
656 switch (entry->encoding) {
657 case kCFStringEncodingASCII:
658 return &__CFConverterASCII;
659
660 case kCFStringEncodingISOLatin1:
661 return &__CFConverterISOLatin1;
662
663 case kCFStringEncodingMacRoman:
664 return &__CFConverterMacRoman;
665
666 case kCFStringEncodingWindowsLatin1:
667 return &__CFConverterWinLatin1;
668
669 case kCFStringEncodingNextStepLatin:
670 return &__CFConverterNextStepLatin;
671
672 case kCFStringEncodingUTF8:
673 return &__CFConverterUTF8;
674
675 default:
bd5b749c 676 return NULL;
9ce05555
A
677 }
678}
679
bd5b749c 680static const _CFEncodingConverter *__CFGetConverter(uint32_t encoding) {
9ce05555
A
681 _CFConverterEntry *entry = __CFStringEncodingConverterGetEntry(encoding);
682
683 if (!entry) return NULL;
684
685 if (!entry->converter) {
686 const CFStringEncodingConverter *definition = __CFStringEncodingConverterGetDefinition(entry);
687
688 if (definition) {
689 entry->converter = __CFEncodingConverterFromDefinition(definition);
690 entry->toBytesFallback = definition->toBytesFallback;
691 entry->toUnicodeFallback = definition->toUnicodeFallback;
692 }
693 }
694
695 return (_CFEncodingConverter *)entry->converter;
696}
697
698/* Public API
699*/
bd5b749c 700uint32_t CFStringEncodingUnicodeToBytes(uint32_t encoding, uint32_t flags, const UniChar *characters, CFIndex numChars, CFIndex *usedCharLen, uint8_t *bytes, CFIndex maxByteLen, CFIndex *usedByteLen) {
9ce05555
A
701 if (encoding == kCFStringEncodingUTF8) {
702 static CFStringEncodingToBytesProc __CFToUTF8 = NULL;
bd5b749c
A
703 CFIndex convertedCharLen;
704 CFIndex usedLen;
9ce05555
A
705
706
707 if ((flags & kCFStringEncodingUseCanonical) || (flags & kCFStringEncodingUseHFSPlusCanonical)) {
708 (void)CFUniCharDecompose(characters, numChars, &convertedCharLen, (void *)bytes, maxByteLen, &usedLen, true, kCFUniCharUTF8Format, (flags & kCFStringEncodingUseHFSPlusCanonical ? true : false));
709 } else {
710 if (!__CFToUTF8) {
711 const CFStringEncodingConverter *utf8Converter = CFStringEncodingGetConverter(kCFStringEncodingUTF8);
712 __CFToUTF8 = (CFStringEncodingToBytesProc)utf8Converter->toBytes;
713 }
bd5b749c 714 convertedCharLen = __CFToUTF8(0, characters, numChars, bytes, maxByteLen, &usedLen);
9ce05555
A
715 }
716 if (usedCharLen) *usedCharLen = convertedCharLen;
717 if (usedByteLen) *usedByteLen = usedLen;
718
719 if (convertedCharLen == numChars) {
720 return kCFStringEncodingConversionSuccess;
721 } else if (maxByteLen && (maxByteLen == usedLen)) {
722 return kCFStringEncodingInsufficientOutputBufferLength;
723 } else {
724 return kCFStringEncodingInvalidInputStream;
725 }
726 } else {
727 const _CFEncodingConverter *converter = __CFGetConverter(encoding);
bd5b749c
A
728 CFIndex usedLen = 0;
729 CFIndex localUsedByteLen;
730 CFIndex theUsedByteLen = 0;
731 uint32_t theResult = kCFStringEncodingConversionSuccess;
9ce05555
A
732 CFStringEncodingToBytesPrecomposeProc toBytesPrecompose = NULL;
733 CFStringEncodingIsValidCombiningCharacterProc isValidCombiningChar = NULL;
734
735 if (!converter) return kCFStringEncodingConverterUnavailable;
736
737 if (flags & kCFStringEncodingSubstituteCombinings) {
738 if (!(flags & kCFStringEncodingAllowLossyConversion)) isValidCombiningChar = converter->isValidCombiningChar;
739 } else {
740 isValidCombiningChar = converter->isValidCombiningChar;
741 if (!(flags & kCFStringEncodingIgnoreCombinings)) {
742 toBytesPrecompose = converter->toBytesPrecompose;
743 flags |= kCFStringEncodingComposeCombinings;
744 }
745 }
746
747
748 while ((usedLen < numChars) && (!maxByteLen || (theUsedByteLen < maxByteLen))) {
749 if ((usedLen += TO_BYTE(converter, flags, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen)) < numChars) {
bd5b749c 750 CFIndex dummy;
9ce05555
A
751
752 if (isValidCombiningChar && (usedLen > 0) && isValidCombiningChar(characters[usedLen])) {
753 if (toBytesPrecompose) {
bd5b749c 754 CFIndex localUsedLen = usedLen;
9ce05555
A
755
756 while (isValidCombiningChar(characters[--usedLen]));
757 theUsedByteLen += localUsedByteLen;
758 if (converter->maxLen > 1) {
759 TO_BYTE(converter, flags, characters + usedLen, localUsedLen - usedLen, NULL, 0, &localUsedByteLen);
760 theUsedByteLen -= localUsedByteLen;
761 } else {
762 theUsedByteLen--;
763 }
764 if ((localUsedLen = toBytesPrecompose(flags, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen)) > 0) {
765 usedLen += localUsedLen;
766 if ((usedLen < numChars) && isValidCombiningChar(characters[usedLen])) { // There is a non-base char not combined remaining
767 theUsedByteLen += localUsedByteLen;
768 theResult = kCFStringEncodingInvalidInputStream;
769 break;
770 }
771 } else if (flags & kCFStringEncodingAllowLossyConversion) {
772 uint8_t lossyByte = CFStringEncodingMaskToLossyByte(flags);
773
774 if (lossyByte) {
775 while (isValidCombiningChar(characters[++usedLen]));
776 localUsedByteLen = 1;
777 if (maxByteLen) *(bytes + theUsedByteLen) = lossyByte;
778 } else {
779 ++usedLen;
780 usedLen += TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen);
781 }
782 } else {
783 theResult = kCFStringEncodingInvalidInputStream;
784 break;
785 }
786 } else if (maxByteLen && ((maxByteLen == theUsedByteLen + localUsedByteLen) || TO_BYTE(converter, flags, characters + usedLen, numChars - usedLen, NULL, 0, &dummy))) { // buffer was filled up
787 theUsedByteLen += localUsedByteLen;
788 theResult = kCFStringEncodingInsufficientOutputBufferLength;
789 break;
790 } else if (flags & kCFStringEncodingIgnoreCombinings) {
791 while ((++usedLen < numChars) && isValidCombiningChar(characters[usedLen]));
792 } else {
793 uint8_t lossyByte = CFStringEncodingMaskToLossyByte(flags);
794
795 theUsedByteLen += localUsedByteLen;
796 if (lossyByte) {
797 ++usedLen;
798 localUsedByteLen = 1;
799 if (maxByteLen) *(bytes + theUsedByteLen) = lossyByte;
800 } else {
801 usedLen += TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen);
802 }
803 }
804 } else if (maxByteLen && ((maxByteLen == theUsedByteLen + localUsedByteLen) || TO_BYTE(converter, flags, characters + usedLen, numChars - usedLen, NULL, 0, &dummy))) { // buffer was filled up
805 theUsedByteLen += localUsedByteLen;
806
807 if (flags & kCFStringEncodingAllowLossyConversion && !CFStringEncodingMaskToLossyByte(flags)) {
bd5b749c 808 CFIndex localUsedLen;
9ce05555
A
809
810 localUsedByteLen = 0;
811 while ((usedLen < numChars) && !localUsedByteLen && (localUsedLen = TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, NULL, 0, &localUsedByteLen))) usedLen += localUsedLen;
812 }
813 if (usedLen < numChars) theResult = kCFStringEncodingInsufficientOutputBufferLength;
814 break;
815 } else if (flags & kCFStringEncodingAllowLossyConversion) {
816 uint8_t lossyByte = CFStringEncodingMaskToLossyByte(flags);
817
818 theUsedByteLen += localUsedByteLen;
819 if (lossyByte) {
820 ++usedLen;
821 localUsedByteLen = 1;
822 if (maxByteLen) *(bytes + theUsedByteLen) = lossyByte;
823 } else {
824 usedLen += TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, bytes + theUsedByteLen, (maxByteLen ? maxByteLen - theUsedByteLen : 0), &localUsedByteLen);
825 }
826 } else {
827 theUsedByteLen += localUsedByteLen;
828 theResult = kCFStringEncodingInvalidInputStream;
829 break;
830 }
831 }
832 theUsedByteLen += localUsedByteLen;
833 }
834
835 if (usedLen < numChars && maxByteLen && theResult == kCFStringEncodingConversionSuccess) {
836 if (flags & kCFStringEncodingAllowLossyConversion && !CFStringEncodingMaskToLossyByte(flags)) {
bd5b749c 837 CFIndex localUsedLen;
9ce05555
A
838
839 localUsedByteLen = 0;
840 while ((usedLen < numChars) && !localUsedByteLen && (localUsedLen = TO_BYTE_FALLBACK(converter, characters + usedLen, numChars - usedLen, NULL, 0, &localUsedByteLen))) usedLen += localUsedLen;
841 }
842 if (usedLen < numChars) theResult = kCFStringEncodingInsufficientOutputBufferLength;
843 }
844 if (usedByteLen) *usedByteLen = theUsedByteLen;
845 if (usedCharLen) *usedCharLen = usedLen;
846
847 return theResult;
848 }
849}
850
bd5b749c 851uint32_t CFStringEncodingBytesToUnicode(uint32_t encoding, uint32_t flags, const uint8_t *bytes, CFIndex numBytes, CFIndex *usedByteLen, UniChar *characters, CFIndex maxCharLen, CFIndex *usedCharLen) {
9ce05555 852 const _CFEncodingConverter *converter = __CFGetConverter(encoding);
bd5b749c
A
853 CFIndex usedLen = 0;
854 CFIndex theUsedCharLen = 0;
855 CFIndex localUsedCharLen;
856 uint32_t theResult = kCFStringEncodingConversionSuccess;
9ce05555
A
857
858 if (!converter) return kCFStringEncodingConverterUnavailable;
859
860
861 while ((usedLen < numBytes) && (!maxCharLen || (theUsedCharLen < maxCharLen))) {
862 if ((usedLen += TO_UNICODE(converter, flags, bytes + usedLen, numBytes - usedLen, characters + theUsedCharLen, (maxCharLen ? maxCharLen - theUsedCharLen : 0), &localUsedCharLen)) < numBytes) {
bd5b749c 863 CFIndex tempUsedCharLen;
9ce05555 864
bd5b749c 865 if (maxCharLen && ((maxCharLen == theUsedCharLen + localUsedCharLen) || (((flags & (kCFStringEncodingUseCanonical|kCFStringEncodingUseHFSPlusCanonical)) || (maxCharLen == theUsedCharLen + localUsedCharLen + 1)) && TO_UNICODE(converter, flags, bytes + usedLen, numBytes - usedLen, NULL, 0, &tempUsedCharLen)))) { // buffer was filled up
9ce05555
A
866 theUsedCharLen += localUsedCharLen;
867 theResult = kCFStringEncodingInsufficientOutputBufferLength;
868 break;
869 } else if (flags & kCFStringEncodingAllowLossyConversion) {
870 theUsedCharLen += localUsedCharLen;
871 usedLen += TO_UNICODE_FALLBACK(converter, bytes + usedLen, numBytes - usedLen, characters + theUsedCharLen, (maxCharLen ? maxCharLen - theUsedCharLen : 0), &localUsedCharLen);
872 } else {
873 theUsedCharLen += localUsedCharLen;
874 theResult = kCFStringEncodingInvalidInputStream;
875 break;
876 }
877 }
878 theUsedCharLen += localUsedCharLen;
879 }
880
881 if (usedLen < numBytes && maxCharLen && theResult == kCFStringEncodingConversionSuccess) {
882 theResult = kCFStringEncodingInsufficientOutputBufferLength;
883 }
884 if (usedCharLen) *usedCharLen = theUsedCharLen;
885 if (usedByteLen) *usedByteLen = usedLen;
886
887 return theResult;
888}
889
bd5b749c 890__private_extern__ bool CFStringEncodingIsValidEncoding(uint32_t encoding) {
9ce05555
A
891 return (CFStringEncodingGetConverter(encoding) ? true : false);
892}
893
bd5b749c 894__private_extern__ const char *CFStringEncodingName(uint32_t encoding) {
9ce05555
A
895 _CFConverterEntry *entry = __CFStringEncodingConverterGetEntry(encoding);
896 if (entry) return entry->encodingName;
897 return NULL;
898}
899
bd5b749c 900__private_extern__ const char **CFStringEncodingCanonicalCharsetNames(uint32_t encoding) {
9ce05555
A
901 _CFConverterEntry *entry = __CFStringEncodingConverterGetEntry(encoding);
902 if (entry) return entry->ianaNames;
903 return NULL;
904}
905
bd5b749c 906__private_extern__ uint32_t CFStringEncodingGetScriptCodeForEncoding(CFStringEncoding encoding) {
9ce05555
A
907 _CFConverterEntry *entry = __CFStringEncodingConverterGetEntry(encoding);
908
d8925383 909 return (entry ? entry->scriptCode : ((encoding & 0x0FFF) == kCFStringEncodingUnicode ? kCFStringEncodingUnicode : (encoding < 0xFF ? encoding : kCFStringEncodingInvalidId)));
9ce05555
A
910}
911
bd5b749c 912__private_extern__ CFIndex CFStringEncodingCharLengthForBytes(uint32_t encoding, uint32_t flags, const uint8_t *bytes, CFIndex numBytes) {
9ce05555
A
913 const _CFEncodingConverter *converter = __CFGetConverter(encoding);
914
915 if (converter) {
bd5b749c 916 uintptr_t switchVal = (uintptr_t)(converter->toUnicodeLen);
9ce05555 917
bd5b749c 918 if (switchVal < 0xFFFF) {
9ce05555 919 return switchVal * numBytes;
bd5b749c 920 } else {
9ce05555 921 return converter->toUnicodeLen(flags, bytes, numBytes);
bd5b749c 922 }
9ce05555
A
923 }
924
925 return 0;
926}
927
bd5b749c 928__private_extern__ CFIndex CFStringEncodingByteLengthForCharacters(uint32_t encoding, uint32_t flags, const UniChar *characters, CFIndex numChars) {
9ce05555
A
929 const _CFEncodingConverter *converter = __CFGetConverter(encoding);
930
931 if (converter) {
bd5b749c 932 uintptr_t switchVal = (uintptr_t)(converter->toBytesLen);
9ce05555 933
bd5b749c 934 if (switchVal < 0xFFFF) {
9ce05555 935 return switchVal * numChars;
bd5b749c 936 } else {
9ce05555 937 return converter->toBytesLen(flags, characters, numChars);
bd5b749c 938 }
9ce05555
A
939 }
940
941 return 0;
942}
943
bd5b749c 944__private_extern__ void CFStringEncodingRegisterFallbackProcedures(uint32_t encoding, CFStringEncodingToBytesFallbackProc toBytes, CFStringEncodingToUnicodeFallbackProc toUnicode) {
9ce05555
A
945 _CFConverterEntry *entry = __CFStringEncodingConverterGetEntry(encoding);
946
947 if (entry && __CFGetConverter(encoding)) {
948 ((_CFEncodingConverter*)entry->converter)->toBytesFallback = (toBytes ? toBytes : entry->toBytesFallback);
949 ((_CFEncodingConverter*)entry->converter)->toUnicodeFallback = (toUnicode ? toUnicode : entry->toUnicodeFallback);
950 }
951}
952
bd5b749c 953__private_extern__ const CFStringEncodingConverter *CFStringEncodingGetConverter(uint32_t encoding) {
9ce05555
A
954 return __CFStringEncodingConverterGetDefinition(__CFStringEncodingConverterGetEntry(encoding));
955}
956
bd5b749c 957static const uint32_t __CFBuiltinEncodings[] = {
9ce05555
A
958 kCFStringEncodingMacRoman,
959 kCFStringEncodingWindowsLatin1,
960 kCFStringEncodingISOLatin1,
961 kCFStringEncodingNextStepLatin,
962 kCFStringEncodingASCII,
963 kCFStringEncodingUTF8,
d8925383 964 /* These seven are available only in CFString-level */
9ce05555 965 kCFStringEncodingNonLossyASCII,
d8925383
A
966
967 kCFStringEncodingUTF16,
968 kCFStringEncodingUTF16BE,
969 kCFStringEncodingUTF16LE,
970
971 kCFStringEncodingUTF32,
972 kCFStringEncodingUTF32BE,
973 kCFStringEncodingUTF32LE,
974
9ce05555
A
975 kCFStringEncodingInvalidId,
976};
977
978
bd5b749c 979__private_extern__ const uint32_t *CFStringEncodingListOfAvailableEncodings(void) {
9ce05555
A
980 return __CFBuiltinEncodings;
981}
982
bd5b749c
A
983
984#undef TO_BYTE
985#undef TO_UNICODE
986#undef ASCIINewLine
987#undef kSurrogateHighStart
988#undef kSurrogateHighEnd
989#undef kSurrogateLowStart
990#undef kSurrogateLowEnd
991#undef TO_BYTE_FALLBACK
992#undef TO_UNICODE_FALLBACK
993#undef EXTRA_BASE
994#undef NUM_OF_ENTRIES_CYCLE
995