]> git.saurik.com Git - apple/cf.git/blob - String.subproj/CFStringScanner.c
CF-299.tar.gz
[apple/cf.git] / String.subproj / CFStringScanner.c
1 /*
2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
7 *
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25 /* CFStringScanner.c
26 Copyright 1999-2002, Apple, Inc. All rights reserved.
27 Responsibility: Ali Ozer
28 */
29
30 #include "CFInternal.h"
31 #include <CoreFoundation/CFString.h>
32 #if !defined(__MACOS8__)
33 #include <sys/types.h>
34 #endif
35 #include <limits.h>
36 #include <stdlib.h>
37 #include <string.h>
38
39 CF_INLINE Boolean __CFCharacterIsADigit(UniChar ch) {
40 return (ch >= '0' && ch <= '9') ? true : false;
41 }
42
43 /* Returns -1 on illegal value */
44 CF_INLINE SInt32 __CFCharacterNumericOrHexValue (UniChar ch) {
45 if (ch >= '0' && ch <= '9') {
46 return ch - '0';
47 } else if (ch >= 'A' && ch <= 'F') {
48 return ch + 10 - 'A';
49 } else if (ch >= 'a' && ch <= 'f') {
50 return ch + 10 - 'a';
51 } else {
52 return -1;
53 }
54 }
55
56 /* Returns -1 on illegal value */
57 CF_INLINE SInt32 __CFCharacterNumericValue(UniChar ch) {
58 return (ch >= '0' && ch <= '9') ? (ch - '0') : -1;
59 }
60
61 CF_INLINE UniChar __CFStringGetFirstNonSpaceCharacterFromInlineBuffer(CFStringInlineBuffer *buf, SInt32 *indexPtr) {
62 UniChar ch;
63 while (__CFIsWhitespace(ch = __CFStringGetCharacterFromInlineBufferAux(buf, *indexPtr))) (*indexPtr)++;
64 return ch;
65 }
66
67 /* result is int64_t or int, depending on doLonglong
68 */
69 __private_extern__ Boolean __CFStringScanInteger(CFStringInlineBuffer *buf, CFDictionaryRef locale, SInt32 *indexPtr, Boolean doLonglong, void *result) {
70 Boolean doingLonglong = false; /* Set to true if doLonglong, and we overflow an int... */
71 Boolean neg = false;
72 int intResult = 0;
73 register int64_t longlongResult = 0; /* ??? int64_t is slow when not in regs; I hope this does the right thing. */
74 UniChar ch;
75
76 ch = __CFStringGetFirstNonSpaceCharacterFromInlineBuffer(buf, indexPtr);
77
78 if (ch == '-' || ch == '+') {
79 neg = (ch == '-');
80 (*indexPtr)++;
81 ch = __CFStringGetFirstNonSpaceCharacterFromInlineBuffer(buf, indexPtr);
82 }
83
84 if (! __CFCharacterIsADigit(ch)) return false; /* No digits, bail out... */
85 do {
86 if (doingLonglong) {
87 if ((longlongResult >= LLONG_MAX / 10) && ((longlongResult > LLONG_MAX / 10) || (__CFCharacterNumericValue(ch) - (neg ? 1 : 0) >= LLONG_MAX - longlongResult * 10))) {
88 /* ??? This might not handle LLONG_MIN correctly... */
89 longlongResult = neg ? LLONG_MIN : LLONG_MAX;
90 neg = false;
91 while (__CFCharacterIsADigit(ch = __CFStringGetCharacterFromInlineBufferAux(buf, ++(*indexPtr)))); /* Skip remaining digits */
92 } else {
93 longlongResult = longlongResult * 10 + __CFCharacterNumericValue(ch);
94 ch = __CFStringGetCharacterFromInlineBufferAux(buf, ++(*indexPtr));
95 }
96 } else {
97 if ((intResult >= INT_MAX / 10) && ((intResult > INT_MAX / 10) || (__CFCharacterNumericValue(ch) - (neg ? 1 : 0) >= INT_MAX - intResult * 10))) {
98 // Overflow, check for int64_t...
99 if (doLonglong) {
100 longlongResult = intResult;
101 doingLonglong = true;
102 } else {
103 /* ??? This might not handle INT_MIN correctly... */
104 intResult = neg ? INT_MIN : INT_MAX;
105 neg = false;
106 while (__CFCharacterIsADigit(ch = __CFStringGetCharacterFromInlineBufferAux(buf, ++(*indexPtr)))); /* Skip remaining digits */
107 }
108 } else {
109 intResult = intResult * 10 + __CFCharacterNumericValue(ch);
110 ch = __CFStringGetCharacterFromInlineBufferAux(buf, ++(*indexPtr));
111 }
112 }
113 } while (__CFCharacterIsADigit(ch));
114
115 if (result) {
116 if (doLonglong) {
117 if (!doingLonglong) longlongResult = intResult;
118 *(int64_t *)result = neg ? -longlongResult : longlongResult;
119 } else {
120 *(int *)result = neg ? -intResult : intResult;
121 }
122 }
123
124 return true;
125 }
126
127 __private_extern__ Boolean __CFStringScanHex(CFStringInlineBuffer *buf, SInt32 *indexPtr, unsigned *result) {
128 UInt32 value = 0;
129 SInt32 curDigit;
130 UniChar ch;
131
132 ch = __CFStringGetFirstNonSpaceCharacterFromInlineBuffer(buf, indexPtr);
133 /* Ignore the optional "0x" or "0X"; if it's followed by a non-hex, just parse the "0" and leave pointer at "x" */
134 if (ch == '0') {
135 ch = __CFStringGetCharacterFromInlineBufferAux(buf, ++(*indexPtr));
136 if (ch == 'x' || ch == 'X') ch = __CFStringGetCharacterFromInlineBufferAux(buf, ++(*indexPtr));
137 curDigit = __CFCharacterNumericOrHexValue(ch);
138 if (curDigit == -1) {
139 (*indexPtr)--; /* Go back over the "x" or "X" */
140 if (result) *result = 0;
141 return true; /* We just saw "0" */
142 }
143 } else {
144 curDigit = __CFCharacterNumericOrHexValue(ch);
145 if (curDigit == -1) return false;
146 }
147
148 do {
149 if (value > (UINT_MAX >> 4)) {
150 value = UINT_MAX; /* We do this over and over again, but it's an error case anyway */
151 } else {
152 value = (value << 4) + curDigit;
153 }
154 curDigit = __CFCharacterNumericOrHexValue(__CFStringGetCharacterFromInlineBufferAux(buf, ++(*indexPtr)));
155 } while (curDigit != -1);
156
157 if (result) *result = value;
158 return true;
159 }
160
161 // Packed array of Boolean
162 static const char __CFNumberSet[16] = {
163 0X00, // 0, 0, 0, 0, 0, 0, 0, 0, // nul soh stx etx eot enq ack bel
164 0X00, // 0, 0, 0, 0, 0, 0, 0, 0, // bs ht nl vt np cr so si
165 0X00, // 0, 0, 0, 0, 0, 0, 0, 0, // dle dc1 dc2 dc3 dc4 nak syn etb
166 0X00, // 0, 0, 0, 0, 0, 0, 0, 0, // can em sub esc fs gs rs us
167 0X00, // 0, 0, 0, 0, 0, 0, 0, 0, // sp ! " # $ % & '
168 0X28, // 0, 0, 0, 1, 0, 1, 0, 0, // ( ) * + , - . /
169 0XFF, // 1, 1, 1, 1, 1, 1, 1, 1, // 0 1 2 3 4 5 6 7
170 0X03, // 1, 1, 0, 0, 0, 0, 0, 0, // 8 9 : ; < = > ?
171 0X20, // 0, 0, 0, 0, 0, 1, 0, 0, // @ A B C D E F G
172 0X00, // 0, 0, 0, 0, 0, 0, 0, 0, // H I J K L M N O
173 0X00, // 0, 0, 0, 0, 0, 0, 0, 0, // P Q R S T U V W
174 0X00, // 0, 0, 0, 0, 0, 0, 0, 0, // X Y Z [ \ ] ^ _
175 0X20, // 0, 0, 0, 0, 0, 1, 0, 0, // ` a b c d e f g
176 0X00, // 0, 0, 0, 0, 0, 0, 0, 0, // h i j k l m n o
177 0X00, // 0, 0, 0, 0, 0, 0, 0, 0, // p q r s t u v w
178 0X00, // 0, 0, 0, 0, 0, 0, 0, 0 // x y z { | } ~ del
179 };
180
181 __private_extern__ Boolean __CFStringScanDouble(CFStringInlineBuffer *buf, CFDictionaryRef locale, SInt32 *indexPtr, double *resultPtr) {
182 #define STACK_BUFFER_SIZE 256
183 #define ALLOC_CHUNK_SIZE 256 // first and subsequent malloc size. Should be greater than STACK_BUFFER_SIZE
184 char localCharBuffer[STACK_BUFFER_SIZE];
185 char *charPtr = localCharBuffer;
186 char *endCharPtr;
187 UniChar decimalChar = '.';
188 SInt32 numChars = 0;
189 SInt32 capacity = STACK_BUFFER_SIZE; // in chars
190 double result;
191 UniChar ch;
192 CFAllocatorRef tmpAlloc = NULL;
193
194 #if 0
195 if (locale != NULL) {
196 CFStringRef decimalSeparator = [locale objectForKey: NSDecimalSeparator];
197 if (decimalSeparator != nil) decimalChar = [decimalSeparator characterAtIndex:0];
198 }
199 #endif
200 ch = __CFStringGetFirstNonSpaceCharacterFromInlineBuffer(buf, indexPtr);
201 // At this point indexPtr points at the first non-space char
202 #if 0
203 #warning need to allow, case insensitively, all of: "nan", "inf", "-inf", "+inf", "-infinity", "+infinity", "infinity";
204 #warning -- strtod() will actually do most or all of that for us
205 #define BITSFORDOUBLENAN ((uint64_t)0x7ff8000000000000)
206 #define BITSFORDOUBLEPOSINF ((uint64_t)0x7ff0000000000000)
207 #define BITSFORDOUBLENEGINF ((uint64_t)0xfff0000000000000)
208 if ('N' == ch || 'n' == ch) { // check for "NaN", case insensitively
209 UniChar next1 = __CFStringGetCharacterFromInlineBufferAux(buf, *indexPtr + 1);
210 UniChar next2 = __CFStringGetCharacterFromInlineBufferAux(buf, *indexPtr + 2);
211 if (('a' == next1 || 'A' == next1) &&
212 ('N' == next2 || 'n' == next2)) {
213 *indexPtr += 3;
214 if (resultPtr) *(uint64_t *)resultPtr = BITSFORDOUBLENAN;
215 return true;
216 }
217 }
218 if ('I' == ch || 'i' == ch) { // check for "Inf", case insensitively
219 UniChar next1 = __CFStringGetCharacterFromInlineBufferAux(buf, *indexPtr + 1);
220 UniChar next2 = __CFStringGetCharacterFromInlineBufferAux(buf, *indexPtr + 2);
221 if (('n' == next1 || 'N' == next1) &&
222 ('f' == next2 || 'F' == next2)) {
223 *indexPtr += 3;
224 if (resultPtr) *(uint64_t *)resultPtr = BITSFORDOUBLEPOSINF;
225 return true;
226 }
227 }
228 if ('+' == ch || '-' == ch) { // check for "+/-Inf", case insensitively
229 UniChar next1 = __CFStringGetCharacterFromInlineBufferAux(buf, *indexPtr + 1);
230 UniChar next2 = __CFStringGetCharacterFromInlineBufferAux(buf, *indexPtr + 2);
231 UniChar next3 = __CFStringGetCharacterFromInlineBufferAux(buf, *indexPtr + 3);
232 if (('I' == next1 || 'i' == next1) &&
233 ('n' == next2 || 'N' == next2) &&
234 ('f' == next3 || 'F' == next3)) {
235 *indexPtr += 4;
236 if (resultPtr) *(uint64_t *)resultPtr = ('-' == ch) ? BITSFORDOUBLENEGINF : BITSFORDOUBLEPOSINF;
237 return true;
238 }
239 }
240 #endif 0
241 do {
242 if (ch >= 128 || (__CFNumberSet[ch >> 3] & (1 << (ch & 7))) == 0) {
243 // Not in __CFNumberSet
244 if (ch != decimalChar) break;
245 ch = '.'; // Replace the decimal character with something strtod will understand
246 }
247 if (numChars >= capacity - 1) {
248 capacity += ALLOC_CHUNK_SIZE;
249 if (tmpAlloc == NULL) tmpAlloc = __CFGetDefaultAllocator();
250 if (charPtr == localCharBuffer) {
251 charPtr = CFAllocatorAllocate(tmpAlloc, capacity * sizeof(char), 0);
252 memmove(charPtr, localCharBuffer, numChars * sizeof(char));
253 } else {
254 charPtr = CFAllocatorReallocate(tmpAlloc, charPtr, capacity * sizeof(char), 0);
255 }
256 }
257 charPtr[numChars++] = (char)ch;
258 ch = __CFStringGetCharacterFromInlineBufferAux(buf, *indexPtr + numChars);
259 } while (true);
260 charPtr[numChars] = 0; // Null byte for strtod
261 result = strtod(charPtr, &endCharPtr);
262 if (tmpAlloc) CFAllocatorDeallocate(tmpAlloc, charPtr);
263 if (charPtr == endCharPtr) return false;
264 *indexPtr += (endCharPtr - charPtr);
265 if (resultPtr) *resultPtr = result; // only store result if we succeed
266
267 return true;
268 }
269
270
271 #undef STACK_BUFFER_SIZE
272 #undef ALLOC_CHUNK_SIZE
273
274