]> git.saurik.com Git - apple/xnu.git/blame_incremental - libkern/stdio/scanf.c
xnu-792.13.8.tar.gz
[apple/xnu.git] / libkern / stdio / scanf.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
14 * agreement.
15 *
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
18 * file.
19 *
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
27 *
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
29 */
30/*-
31 * Copyright (c) 1990, 1993
32 * The Regents of the University of California. All rights reserved.
33 *
34 * This code is derived from software contributed to Berkeley by
35 * Chris Torek.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 */
65
66#include <sys/cdefs.h>
67
68#if 0 /* XXX coming soon */
69#include <ctype.h>
70#else
71static inline int
72isspace(char c)
73{
74 return (c == ' ' || c == '\t' || c == '\n' || c == '\12');
75}
76#endif
77#include <stdarg.h>
78#include <string.h>
79#include <sys/param.h>
80#include <sys/systm.h>
81
82#define BUF 32 /* Maximum length of numeric string. */
83
84/*
85 * Flags used during conversion.
86 */
87#define LONG 0x01 /* l: long or double */
88#define SHORT 0x04 /* h: short */
89#define SUPPRESS 0x08 /* *: suppress assignment */
90#define POINTER 0x10 /* p: void * (as hex) */
91#define NOSKIP 0x20 /* [ or c: do not skip blanks */
92#define LONGLONG 0x400 /* ll: long long (+ deprecated q: quad) */
93#define SHORTSHORT 0x4000 /* hh: char */
94#define UNSIGNED 0x8000 /* %[oupxX] conversions */
95
96/*
97 * The following are used in numeric conversions only:
98 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point;
99 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral.
100 */
101#define SIGNOK 0x40 /* +/- is (still) legal */
102#define NDIGITS 0x80 /* no digits detected */
103
104#define DPTOK 0x100 /* (float) decimal point is still legal */
105#define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */
106
107#define PFXOK 0x100 /* 0x prefix is (still) legal */
108#define NZDIGITS 0x200 /* no zero digits detected */
109
110/*
111 * Conversion types.
112 */
113#define CT_CHAR 0 /* %c conversion */
114#define CT_CCL 1 /* %[...] conversion */
115#define CT_STRING 2 /* %s conversion */
116#define CT_INT 3 /* %[dioupxX] conversion */
117
118static const u_char *__sccl(char *, const u_char *);
119
120int
121sscanf(const char *ibuf, const char *fmt, ...)
122{
123 va_list ap;
124 int ret;
125
126 va_start(ap, fmt);
127 ret = vsscanf(ibuf, fmt, ap);
128 va_end(ap);
129 return(ret);
130}
131
132int
133vsscanf(const char *inp, char const *fmt0, va_list ap)
134{
135 int inr;
136 const u_char *fmt = (const u_char *)fmt0;
137 int c; /* character from format, or conversion */
138 size_t width; /* field width, or 0 */
139 char *p; /* points into all kinds of strings */
140 int n; /* handy integer */
141 int flags; /* flags as defined above */
142 char *p0; /* saves original value of p when necessary */
143 int nassigned; /* number of fields assigned */
144 int nconversions; /* number of conversions */
145 int nread; /* number of characters consumed from fp */
146 int base; /* base argument to conversion function */
147 char ccltab[256]; /* character class table for %[...] */
148 char buf[BUF]; /* buffer for numeric conversions */
149
150 /* `basefix' is used to avoid `if' tests in the integer scanner */
151 static short basefix[17] =
152 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
153
154 inr = strlen(inp);
155
156 nassigned = 0;
157 nconversions = 0;
158 nread = 0;
159 base = 0; /* XXX just to keep gcc happy */
160 for (;;) {
161 c = *fmt++;
162 if (c == 0)
163 return (nassigned);
164 if (isspace(c)) {
165 while (inr > 0 && isspace(*inp))
166 nread++, inr--, inp++;
167 continue;
168 }
169 if (c != '%')
170 goto literal;
171 width = 0;
172 flags = 0;
173 /*
174 * switch on the format. continue if done;
175 * break once format type is derived.
176 */
177again: c = *fmt++;
178 switch (c) {
179 case '%':
180literal:
181 if (inr <= 0)
182 goto input_failure;
183 if (*inp != c)
184 goto match_failure;
185 inr--, inp++;
186 nread++;
187 continue;
188
189 case '*':
190 flags |= SUPPRESS;
191 goto again;
192 case 'l':
193 if (flags & LONG) {
194 flags &= ~LONG;
195 flags |= LONGLONG;
196 } else
197 flags |= LONG;
198 goto again;
199 case 'q':
200 flags |= LONGLONG; /* not quite */
201 goto again;
202 case 'h':
203 if (flags & SHORT) {
204 flags &= ~SHORT;
205 flags |= SHORTSHORT;
206 } else
207 flags |= SHORT;
208 goto again;
209
210 case '0': case '1': case '2': case '3': case '4':
211 case '5': case '6': case '7': case '8': case '9':
212 width = width * 10 + c - '0';
213 goto again;
214
215 /*
216 * Conversions.
217 */
218 case 'd':
219 c = CT_INT;
220 base = 10;
221 break;
222
223 case 'i':
224 c = CT_INT;
225 base = 0;
226 break;
227
228 case 'o':
229 c = CT_INT;
230 flags |= UNSIGNED;
231 base = 8;
232 break;
233
234 case 'u':
235 c = CT_INT;
236 flags |= UNSIGNED;
237 base = 10;
238 break;
239
240 case 'X':
241 case 'x':
242 flags |= PFXOK; /* enable 0x prefixing */
243 c = CT_INT;
244 flags |= UNSIGNED;
245 base = 16;
246 break;
247
248 case 's':
249 c = CT_STRING;
250 break;
251
252 case '[':
253 fmt = __sccl(ccltab, fmt);
254 flags |= NOSKIP;
255 c = CT_CCL;
256 break;
257
258 case 'c':
259 flags |= NOSKIP;
260 c = CT_CHAR;
261 break;
262
263 case 'p': /* pointer format is like hex */
264 flags |= POINTER | PFXOK;
265 c = CT_INT;
266 flags |= UNSIGNED;
267 base = 16;
268 break;
269
270 case 'n':
271 nconversions++;
272 if (flags & SUPPRESS) /* ??? */
273 continue;
274 if (flags & SHORTSHORT)
275 *va_arg(ap, char *) = nread;
276 else if (flags & SHORT)
277 *va_arg(ap, short *) = nread;
278 else if (flags & LONG)
279 *va_arg(ap, long *) = nread;
280 else if (flags & LONGLONG)
281 *va_arg(ap, long long *) = nread;
282 else
283 *va_arg(ap, int *) = nread;
284 continue;
285 }
286
287 /*
288 * We have a conversion that requires input.
289 */
290 if (inr <= 0)
291 goto input_failure;
292
293 /*
294 * Consume leading white space, except for formats
295 * that suppress this.
296 */
297 if ((flags & NOSKIP) == 0) {
298 while (isspace(*inp)) {
299 nread++;
300 if (--inr > 0)
301 inp++;
302 else
303 goto input_failure;
304 }
305 /*
306 * Note that there is at least one character in
307 * the buffer, so conversions that do not set NOSKIP
308 * can no longer result in an input failure.
309 */
310 }
311
312 /*
313 * Do the conversion.
314 */
315 switch (c) {
316
317 case CT_CHAR:
318 /* scan arbitrary characters (sets NOSKIP) */
319 if (width == 0)
320 width = 1;
321 if (flags & SUPPRESS) {
322 size_t sum = 0;
323 for (;;) {
324 if ((n = inr) < (int)width) {
325 sum += n;
326 width -= n;
327 inp += n;
328 if (sum == 0)
329 goto input_failure;
330 break;
331 } else {
332 sum += width;
333 inr -= width;
334 inp += width;
335 break;
336 }
337 }
338 nread += sum;
339 } else {
340 bcopy(inp, va_arg(ap, char *), width);
341 inr -= width;
342 inp += width;
343 nread += width;
344 nassigned++;
345 }
346 nconversions++;
347 break;
348
349 case CT_CCL:
350 /* scan a (nonempty) character class (sets NOSKIP) */
351 if (width == 0)
352 width = (size_t)~0; /* `infinity' */
353 /* take only those things in the class */
354 if (flags & SUPPRESS) {
355 n = 0;
356 while (ccltab[(unsigned char)*inp]) {
357 n++, inr--, inp++;
358 if (--width == 0)
359 break;
360 if (inr <= 0) {
361 if (n == 0)
362 goto input_failure;
363 break;
364 }
365 }
366 if (n == 0)
367 goto match_failure;
368 } else {
369 p0 = p = va_arg(ap, char *);
370 while (ccltab[(unsigned char)*inp]) {
371 inr--;
372 *p++ = *inp++;
373 if (--width == 0)
374 break;
375 if (inr <= 0) {
376 if (p == p0)
377 goto input_failure;
378 break;
379 }
380 }
381 n = p - p0;
382 if (n == 0)
383 goto match_failure;
384 *p = 0;
385 nassigned++;
386 }
387 nread += n;
388 nconversions++;
389 break;
390
391 case CT_STRING:
392 /* like CCL, but zero-length string OK, & no NOSKIP */
393 if (width == 0)
394 width = (size_t)~0;
395 if (flags & SUPPRESS) {
396 n = 0;
397 while (!isspace(*inp)) {
398 n++, inr--, inp++;
399 if (--width == 0)
400 break;
401 if (inr <= 0)
402 break;
403 }
404 nread += n;
405 } else {
406 p0 = p = va_arg(ap, char *);
407 while (!isspace(*inp)) {
408 inr--;
409 *p++ = *inp++;
410 if (--width == 0)
411 break;
412 if (inr <= 0)
413 break;
414 }
415 *p = 0;
416 nread += p - p0;
417 nassigned++;
418 }
419 nconversions++;
420 continue;
421
422 case CT_INT:
423 /* scan an integer as if by the conversion function */
424#ifdef hardway
425 if (width == 0 || width > sizeof(buf) - 1)
426 width = sizeof(buf) - 1;
427#else
428 /* size_t is unsigned, hence this optimisation */
429 if (--width > sizeof(buf) - 2)
430 width = sizeof(buf) - 2;
431 width++;
432#endif
433 flags |= SIGNOK | NDIGITS | NZDIGITS;
434 for (p = buf; width; width--) {
435 c = *inp;
436 /*
437 * Switch on the character; `goto ok'
438 * if we accept it as a part of number.
439 */
440 switch (c) {
441
442 /*
443 * The digit 0 is always legal, but is
444 * special. For %i conversions, if no
445 * digits (zero or nonzero) have been
446 * scanned (only signs), we will have
447 * base==0. In that case, we should set
448 * it to 8 and enable 0x prefixing.
449 * Also, if we have not scanned zero digits
450 * before this, do not turn off prefixing
451 * (someone else will turn it off if we
452 * have scanned any nonzero digits).
453 */
454 case '0':
455 if (base == 0) {
456 base = 8;
457 flags |= PFXOK;
458 }
459 if (flags & NZDIGITS)
460 flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
461 else
462 flags &= ~(SIGNOK|PFXOK|NDIGITS);
463 goto ok;
464
465 /* 1 through 7 always legal */
466 case '1': case '2': case '3':
467 case '4': case '5': case '6': case '7':
468 base = basefix[base];
469 flags &= ~(SIGNOK | PFXOK | NDIGITS);
470 goto ok;
471
472 /* digits 8 and 9 ok iff decimal or hex */
473 case '8': case '9':
474 base = basefix[base];
475 if (base <= 8)
476 break; /* not legal here */
477 flags &= ~(SIGNOK | PFXOK | NDIGITS);
478 goto ok;
479
480 /* letters ok iff hex */
481 case 'A': case 'B': case 'C':
482 case 'D': case 'E': case 'F':
483 case 'a': case 'b': case 'c':
484 case 'd': case 'e': case 'f':
485 /* no need to fix base here */
486 if (base <= 10)
487 break; /* not legal here */
488 flags &= ~(SIGNOK | PFXOK | NDIGITS);
489 goto ok;
490
491 /* sign ok only as first character */
492 case '+': case '-':
493 if (flags & SIGNOK) {
494 flags &= ~SIGNOK;
495 goto ok;
496 }
497 break;
498
499 /* x ok iff flag still set & 2nd char */
500 case 'x': case 'X':
501 if (flags & PFXOK && p == buf + 1) {
502 base = 16; /* if %i */
503 flags &= ~PFXOK;
504 goto ok;
505 }
506 break;
507 }
508
509 /*
510 * If we got here, c is not a legal character
511 * for a number. Stop accumulating digits.
512 */
513 break;
514 ok:
515 /*
516 * c is legal: store it and look at the next.
517 */
518 *p++ = c;
519 if (--inr > 0)
520 inp++;
521 else
522 break; /* end of input */
523 }
524 /*
525 * If we had only a sign, it is no good; push
526 * back the sign. If the number ends in `x',
527 * it was [sign] '0' 'x', so push back the x
528 * and treat it as [sign] '0'.
529 */
530 if (flags & NDIGITS) {
531 if (p > buf) {
532 inp--;
533 inr++;
534 }
535 goto match_failure;
536 }
537 c = ((u_char *)p)[-1];
538 if (c == 'x' || c == 'X') {
539 --p;
540 inp--;
541 inr++;
542 }
543 if ((flags & SUPPRESS) == 0) {
544 u_quad_t res;
545
546 *p = 0;
547 if ((flags & UNSIGNED) == 0)
548 res = strtoq(buf, (char **)NULL, base);
549 else
550 res = strtouq(buf, (char **)NULL, base);
551 if (flags & POINTER)
552 *va_arg(ap, void **) =
553 (void *)(uintptr_t)res;
554 else if (flags & SHORTSHORT)
555 *va_arg(ap, char *) = res;
556 else if (flags & SHORT)
557 *va_arg(ap, short *) = res;
558 else if (flags & LONG)
559 *va_arg(ap, long *) = res;
560 else if (flags & LONGLONG)
561 *va_arg(ap, long long *) = res;
562 else
563 *va_arg(ap, int *) = res;
564 nassigned++;
565 }
566 nread += p - buf;
567 nconversions++;
568 break;
569
570 }
571 }
572input_failure:
573 return (nconversions != 0 ? nassigned : -1);
574match_failure:
575 return (nassigned);
576}
577
578/*
579 * Fill in the given table from the scanset at the given format
580 * (just after `['). Return a pointer to the character past the
581 * closing `]'. The table has a 1 wherever characters should be
582 * considered part of the scanset.
583 */
584static const u_char *
585__sccl(char *tab, const u_char *fmt)
586{
587 int c, n, v;
588
589 /* first `clear' the whole table */
590 c = *fmt++; /* first char hat => negated scanset */
591 if (c == '^') {
592 v = 1; /* default => accept */
593 c = *fmt++; /* get new first char */
594 } else
595 v = 0; /* default => reject */
596
597 /* XXX: Will not work if sizeof(tab*) > sizeof(char) */
598 (void) memset(tab, v, 256);
599
600 if (c == 0)
601 return (fmt - 1);/* format ended before closing ] */
602
603 /*
604 * Now set the entries corresponding to the actual scanset
605 * to the opposite of the above.
606 *
607 * The first character may be ']' (or '-') without being special;
608 * the last character may be '-'.
609 */
610 v = 1 - v;
611 for (;;) {
612 tab[c] = v; /* take character c */
613doswitch:
614 n = *fmt++; /* and examine the next */
615 switch (n) {
616
617 case 0: /* format ended too soon */
618 return (fmt - 1);
619
620 case '-':
621 /*
622 * A scanset of the form
623 * [01+-]
624 * is defined as `the digit 0, the digit 1,
625 * the character +, the character -', but
626 * the effect of a scanset such as
627 * [a-zA-Z0-9]
628 * is implementation defined. The V7 Unix
629 * scanf treats `a-z' as `the letters a through
630 * z', but treats `a-a' as `the letter a, the
631 * character -, and the letter a'.
632 *
633 * For compatibility, the `-' is not considerd
634 * to define a range if the character following
635 * it is either a close bracket (required by ANSI)
636 * or is not numerically greater than the character
637 * we just stored in the table (c).
638 */
639 n = *fmt;
640 if (n == ']' || n < c) {
641 c = '-';
642 break; /* resume the for(;;) */
643 }
644 fmt++;
645 /* fill in the range */
646 do {
647 tab[++c] = v;
648 } while (c < n);
649 c = n;
650 /*
651 * Alas, the V7 Unix scanf also treats formats
652 * such as [a-c-e] as `the letters a through e'.
653 * This too is permitted by the standard....
654 */
655 goto doswitch;
656 break;
657
658 case ']': /* end of scanset */
659 return (fmt);
660
661 default: /* just another character */
662 c = n;
663 break;
664 }
665 }
666 /* NOTREACHED */
667}