]> git.saurik.com Git - apple/xnu.git/blame - libkern/stdio/scanf.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / libkern / stdio / scanf.c
CommitLineData
91447636 1/*
39037602 2 * Copyright (c) 2004-2016 Apple Computer, Inc. All rights reserved.
91447636 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
0a7de745 5 *
8f6c56a5
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
2d21ac55
A
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
0a7de745 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
0a7de745 17 *
8f6c56a5
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
91447636 24 * limitations under the License.
0a7de745 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
91447636
A
27 */
28/*-
29 * Copyright (c) 1990, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * This code is derived from software contributed to Berkeley by
33 * Chris Torek.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 */
63
f427ee49
A
64#include <stdarg.h>
65#include <stddef.h>
66#include <string.h>
91447636 67#include <sys/cdefs.h>
f427ee49
A
68#include <sys/param.h>
69
70quad_t strtoq(const char *, char **, int);
71u_quad_t strtouq(const char *, char **, int);
91447636 72
91447636
A
73static inline int
74isspace(char c)
75{
0a7de745 76 return c == ' ' || c == '\t' || c == '\n' || c == '\12';
91447636 77}
91447636 78
0a7de745 79#define BUF 32 /* Maximum length of numeric string. */
91447636
A
80
81/*
82 * Flags used during conversion.
83 */
0a7de745
A
84#define LONG 0x01 /* l: long or double */
85#define SHORT 0x04 /* h: short */
86#define SUPPRESS 0x08 /* *: suppress assignment */
87#define POINTER 0x10 /* p: void * (as hex) */
88#define NOSKIP 0x20 /* [ or c: do not skip blanks */
89#define LONGLONG 0x400 /* ll: long long (+ deprecated q: quad) */
90#define SHORTSHORT 0x4000 /* hh: char */
91#define UNSIGNED 0x8000 /* %[oupxX] conversions */
91447636
A
92
93/*
94 * The following are used in numeric conversions only:
95 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point;
96 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral.
97 */
0a7de745
A
98#define SIGNOK 0x40 /* +/- is (still) legal */
99#define NDIGITS 0x80 /* no digits detected */
91447636 100
0a7de745
A
101#define DPTOK 0x100 /* (float) decimal point is still legal */
102#define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */
91447636 103
0a7de745
A
104#define PFXOK 0x100 /* 0x prefix is (still) legal */
105#define NZDIGITS 0x200 /* no zero digits detected */
91447636
A
106
107/*
108 * Conversion types.
109 */
0a7de745
A
110#define CT_CHAR 0 /* %c conversion */
111#define CT_CCL 1 /* %[...] conversion */
112#define CT_STRING 2 /* %s conversion */
113#define CT_INT 3 /* %[dioupxX] conversion */
91447636
A
114
115static const u_char *__sccl(char *, const u_char *);
116
f427ee49
A
117int sscanf(const char *, const char *, ...);
118int vsscanf(const char *, char const *, va_list);
119
91447636
A
120int
121sscanf(const char *ibuf, const char *fmt, ...)
122{
123 va_list ap;
124 int ret;
0a7de745 125
91447636
A
126 va_start(ap, fmt);
127 ret = vsscanf(ibuf, fmt, ap);
128 va_end(ap);
0a7de745 129 return ret;
91447636
A
130}
131
132int
133vsscanf(const char *inp, char const *fmt0, va_list ap)
134{
f427ee49 135 ssize_t inr;
91447636 136 const u_char *fmt = (const u_char *)fmt0;
f427ee49 137 ssize_t width; /* field width, or 0 */
0a7de745 138 char *p; /* points into all kinds of strings */
0a7de745
A
139 int flags; /* flags as defined above */
140 char *p0; /* saves original value of p when necessary */
f427ee49
A
141 int nassigned = 0; /* number of fields assigned */
142 int nconversions = 0; /* number of conversions */
143 int nread = 0; /* number of characters consumed from fp */
144 int base = 0; /* base argument to conversion function */
0a7de745
A
145 char ccltab[256]; /* character class table for %[...] */
146 char buf[BUF]; /* buffer for numeric conversions */
91447636
A
147
148 /* `basefix' is used to avoid `if' tests in the integer scanner */
149 static short basefix[17] =
0a7de745 150 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
91447636 151
f427ee49 152 inr = (ssize_t)strlen(inp);
0a7de745 153
91447636 154 for (;;) {
f427ee49 155 char c = (char)*fmt++; /* character from format, or conversion */
0a7de745
A
156 if (c == 0) {
157 return nassigned;
158 }
91447636 159 if (isspace(c)) {
39037602
A
160 while (inr > 0 && isspace(*inp)) {
161 nread++;
162 inr--;
163 inp++;
164 }
91447636
A
165 continue;
166 }
0a7de745 167 if (c != '%') {
91447636 168 goto literal;
0a7de745 169 }
91447636
A
170 width = 0;
171 flags = 0;
172 /*
173 * switch on the format. continue if done;
174 * break once format type is derived.
175 */
f427ee49
A
176again:
177 c = (char)*fmt++;
91447636
A
178 switch (c) {
179 case '%':
180literal:
0a7de745 181 if (inr <= 0) {
91447636 182 goto input_failure;
0a7de745
A
183 }
184 if (*inp != c) {
91447636 185 goto match_failure;
0a7de745 186 }
39037602
A
187 inr--;
188 inp++;
91447636
A
189 nread++;
190 continue;
191
192 case '*':
193 flags |= SUPPRESS;
194 goto again;
195 case 'l':
196 if (flags & LONG) {
197 flags &= ~LONG;
198 flags |= LONGLONG;
0a7de745 199 } else {
91447636 200 flags |= LONG;
0a7de745 201 }
91447636
A
202 goto again;
203 case 'q':
0a7de745 204 flags |= LONGLONG; /* not quite */
91447636
A
205 goto again;
206 case 'h':
207 if (flags & SHORT) {
208 flags &= ~SHORT;
209 flags |= SHORTSHORT;
0a7de745 210 } else {
91447636 211 flags |= SHORT;
0a7de745 212 }
91447636
A
213 goto again;
214
215 case '0': case '1': case '2': case '3': case '4':
216 case '5': case '6': case '7': case '8': case '9':
217 width = width * 10 + c - '0';
218 goto again;
219
220 /*
221 * Conversions.
222 */
223 case 'd':
224 c = CT_INT;
225 base = 10;
226 break;
227
228 case 'i':
229 c = CT_INT;
230 base = 0;
231 break;
232
233 case 'o':
234 c = CT_INT;
235 flags |= UNSIGNED;
236 base = 8;
237 break;
238
239 case 'u':
240 c = CT_INT;
241 flags |= UNSIGNED;
242 base = 10;
243 break;
244
245 case 'X':
246 case 'x':
0a7de745 247 flags |= PFXOK; /* enable 0x prefixing */
91447636
A
248 c = CT_INT;
249 flags |= UNSIGNED;
250 base = 16;
251 break;
252
253 case 's':
254 c = CT_STRING;
255 break;
256
257 case '[':
258 fmt = __sccl(ccltab, fmt);
259 flags |= NOSKIP;
260 c = CT_CCL;
261 break;
262
263 case 'c':
264 flags |= NOSKIP;
265 c = CT_CHAR;
266 break;
267
0a7de745 268 case 'p': /* pointer format is like hex */
91447636
A
269 flags |= POINTER | PFXOK;
270 c = CT_INT;
271 flags |= UNSIGNED;
272 base = 16;
273 break;
274
275 case 'n':
276 nconversions++;
0a7de745 277 if (flags & SUPPRESS) { /* ??? */
91447636 278 continue;
0a7de745
A
279 }
280 if (flags & SHORTSHORT) {
f427ee49 281 *va_arg(ap, char *) = (char)nread;
0a7de745 282 } else if (flags & SHORT) {
f427ee49 283 *va_arg(ap, short *) = (short)nread;
0a7de745 284 } else if (flags & LONG) {
f427ee49 285 *va_arg(ap, long *) = (long)nread;
0a7de745 286 } else if (flags & LONGLONG) {
f427ee49 287 *va_arg(ap, long long *) = (long long)nread;
0a7de745 288 } else {
f427ee49 289 *va_arg(ap, int *) = (int)nread;
0a7de745 290 }
91447636
A
291 continue;
292 }
293
294 /*
295 * We have a conversion that requires input.
296 */
0a7de745 297 if (inr <= 0) {
91447636 298 goto input_failure;
0a7de745 299 }
91447636
A
300
301 /*
302 * Consume leading white space, except for formats
303 * that suppress this.
304 */
305 if ((flags & NOSKIP) == 0) {
306 while (isspace(*inp)) {
307 nread++;
0a7de745 308 if (--inr > 0) {
91447636 309 inp++;
0a7de745 310 } else {
91447636 311 goto input_failure;
0a7de745 312 }
91447636
A
313 }
314 /*
315 * Note that there is at least one character in
316 * the buffer, so conversions that do not set NOSKIP
317 * can no longer result in an input failure.
318 */
319 }
320
321 /*
322 * Do the conversion.
323 */
324 switch (c) {
91447636
A
325 case CT_CHAR:
326 /* scan arbitrary characters (sets NOSKIP) */
0a7de745 327 if (width == 0) {
91447636 328 width = 1;
0a7de745 329 }
91447636
A
330 if (flags & SUPPRESS) {
331 size_t sum = 0;
332 for (;;) {
f427ee49
A
333 ssize_t n = inr;
334 if (n < width) {
335 sum += (size_t)n;
91447636
A
336 width -= n;
337 inp += n;
0a7de745 338 if (sum == 0) {
91447636 339 goto input_failure;
0a7de745 340 }
91447636
A
341 break;
342 } else {
f427ee49 343 sum += (size_t)width;
91447636
A
344 inr -= width;
345 inp += width;
346 break;
347 }
348 }
349 nread += sum;
350 } else {
351 bcopy(inp, va_arg(ap, char *), width);
352 inr -= width;
353 inp += width;
354 nread += width;
355 nassigned++;
356 }
357 nconversions++;
358 break;
359
f427ee49 360 case CT_CCL: {
91447636 361 /* scan a (nonempty) character class (sets NOSKIP) */
0a7de745 362 if (width == 0) {
f427ee49 363 width = SSIZE_MAX; /* `infinity' */
0a7de745 364 }
91447636 365 /* take only those things in the class */
f427ee49 366 ptrdiff_t n;
91447636
A
367 if (flags & SUPPRESS) {
368 n = 0;
369 while (ccltab[(unsigned char)*inp]) {
39037602
A
370 n++;
371 inr--;
372 inp++;
0a7de745 373 if (--width == 0) {
91447636 374 break;
0a7de745 375 }
91447636 376 if (inr <= 0) {
0a7de745 377 if (n == 0) {
91447636 378 goto input_failure;
0a7de745 379 }
91447636
A
380 break;
381 }
382 }
0a7de745 383 if (n == 0) {
91447636 384 goto match_failure;
0a7de745 385 }
91447636
A
386 } else {
387 p0 = p = va_arg(ap, char *);
388 while (ccltab[(unsigned char)*inp]) {
389 inr--;
390 *p++ = *inp++;
0a7de745 391 if (--width == 0) {
91447636 392 break;
0a7de745 393 }
91447636 394 if (inr <= 0) {
0a7de745 395 if (p == p0) {
91447636 396 goto input_failure;
0a7de745 397 }
91447636
A
398 break;
399 }
400 }
401 n = p - p0;
0a7de745 402 if (n == 0) {
91447636 403 goto match_failure;
0a7de745 404 }
91447636
A
405 *p = 0;
406 nassigned++;
407 }
408 nread += n;
409 nconversions++;
410 break;
f427ee49 411 }
91447636
A
412
413 case CT_STRING:
414 /* like CCL, but zero-length string OK, & no NOSKIP */
0a7de745 415 if (width == 0) {
f427ee49 416 width = SSIZE_MAX;
0a7de745 417 }
91447636 418 if (flags & SUPPRESS) {
f427ee49 419 size_t n = 0;
91447636 420 while (!isspace(*inp)) {
39037602
A
421 n++;
422 inr--;
423 inp++;
0a7de745 424 if (--width == 0) {
91447636 425 break;
0a7de745
A
426 }
427 if (inr <= 0) {
91447636 428 break;
0a7de745 429 }
91447636
A
430 }
431 nread += n;
432 } else {
433 p0 = p = va_arg(ap, char *);
434 while (!isspace(*inp)) {
435 inr--;
436 *p++ = *inp++;
0a7de745 437 if (--width == 0) {
91447636 438 break;
0a7de745
A
439 }
440 if (inr <= 0) {
91447636 441 break;
0a7de745 442 }
91447636
A
443 }
444 *p = 0;
445 nread += p - p0;
446 nassigned++;
447 }
448 nconversions++;
449 continue;
450
451 case CT_INT:
452 /* scan an integer as if by the conversion function */
f427ee49 453 if (width <= 0 || width > (ssize_t)(sizeof(buf) - 1)) {
91447636 454 width = sizeof(buf) - 1;
0a7de745 455 }
91447636
A
456 flags |= SIGNOK | NDIGITS | NZDIGITS;
457 for (p = buf; width; width--) {
458 c = *inp;
459 /*
460 * Switch on the character; `goto ok'
461 * if we accept it as a part of number.
462 */
463 switch (c) {
91447636
A
464 /*
465 * The digit 0 is always legal, but is
466 * special. For %i conversions, if no
467 * digits (zero or nonzero) have been
468 * scanned (only signs), we will have
469 * base==0. In that case, we should set
470 * it to 8 and enable 0x prefixing.
471 * Also, if we have not scanned zero digits
472 * before this, do not turn off prefixing
473 * (someone else will turn it off if we
474 * have scanned any nonzero digits).
475 */
476 case '0':
477 if (base == 0) {
478 base = 8;
479 flags |= PFXOK;
480 }
0a7de745
A
481 if (flags & NZDIGITS) {
482 flags &= ~(SIGNOK | NZDIGITS | NDIGITS);
483 } else {
484 flags &= ~(SIGNOK | PFXOK | NDIGITS);
485 }
91447636
A
486 goto ok;
487
488 /* 1 through 7 always legal */
489 case '1': case '2': case '3':
490 case '4': case '5': case '6': case '7':
491 base = basefix[base];
492 flags &= ~(SIGNOK | PFXOK | NDIGITS);
493 goto ok;
494
495 /* digits 8 and 9 ok iff decimal or hex */
496 case '8': case '9':
497 base = basefix[base];
0a7de745
A
498 if (base <= 8) {
499 break; /* not legal here */
500 }
91447636
A
501 flags &= ~(SIGNOK | PFXOK | NDIGITS);
502 goto ok;
503
504 /* letters ok iff hex */
505 case 'A': case 'B': case 'C':
506 case 'D': case 'E': case 'F':
507 case 'a': case 'b': case 'c':
508 case 'd': case 'e': case 'f':
509 /* no need to fix base here */
0a7de745
A
510 if (base <= 10) {
511 break; /* not legal here */
512 }
91447636
A
513 flags &= ~(SIGNOK | PFXOK | NDIGITS);
514 goto ok;
515
516 /* sign ok only as first character */
517 case '+': case '-':
518 if (flags & SIGNOK) {
519 flags &= ~SIGNOK;
520 goto ok;
521 }
522 break;
523
524 /* x ok iff flag still set & 2nd char */
525 case 'x': case 'X':
526 if (flags & PFXOK && p == buf + 1) {
0a7de745 527 base = 16; /* if %i */
91447636
A
528 flags &= ~PFXOK;
529 goto ok;
530 }
531 break;
532 }
533
534 /*
535 * If we got here, c is not a legal character
536 * for a number. Stop accumulating digits.
537 */
538 break;
0a7de745 539ok:
91447636
A
540 /*
541 * c is legal: store it and look at the next.
542 */
543 *p++ = c;
0a7de745 544 if (--inr > 0) {
91447636 545 inp++;
0a7de745
A
546 } else {
547 break; /* end of input */
548 }
91447636
A
549 }
550 /*
551 * If we had only a sign, it is no good; push
552 * back the sign. If the number ends in `x',
553 * it was [sign] '0' 'x', so push back the x
554 * and treat it as [sign] '0'.
555 */
556 if (flags & NDIGITS) {
557 if (p > buf) {
558 inp--;
559 inr++;
560 }
561 goto match_failure;
562 }
f427ee49 563 c = p[-1];
91447636
A
564 if (c == 'x' || c == 'X') {
565 --p;
566 inp--;
567 inr++;
568 }
569 if ((flags & SUPPRESS) == 0) {
570 u_quad_t res;
571
572 *p = 0;
0a7de745 573 if ((flags & UNSIGNED) == 0) {
f427ee49 574 res = (u_quad_t)strtoq(buf, (char **)NULL, base);
0a7de745
A
575 } else {
576 res = strtouq(buf, (char **)NULL, base);
577 }
578 if (flags & POINTER) {
91447636 579 *va_arg(ap, void **) =
0a7de745
A
580 (void *)(uintptr_t)res;
581 } else if (flags & SHORTSHORT) {
f427ee49 582 *va_arg(ap, char *) = (char)res;
0a7de745 583 } else if (flags & SHORT) {
f427ee49 584 *va_arg(ap, short *) = (short)res;
0a7de745 585 } else if (flags & LONG) {
f427ee49 586 *va_arg(ap, long *) = (long)res;
0a7de745 587 } else if (flags & LONGLONG) {
f427ee49 588 *va_arg(ap, long long *) = (long long)res;
0a7de745 589 } else {
f427ee49 590 *va_arg(ap, int *) = (int)res;
0a7de745 591 }
91447636
A
592 nassigned++;
593 }
594 nread += p - buf;
595 nconversions++;
596 break;
91447636
A
597 }
598 }
599input_failure:
0a7de745 600 return nconversions != 0 ? nassigned : -1;
91447636 601match_failure:
0a7de745 602 return nassigned;
91447636
A
603}
604
605/*
606 * Fill in the given table from the scanset at the given format
607 * (just after `['). Return a pointer to the character past the
608 * closing `]'. The table has a 1 wherever characters should be
609 * considered part of the scanset.
610 */
611static const u_char *
612__sccl(char *tab, const u_char *fmt)
613{
f427ee49 614 char v;
91447636
A
615
616 /* first `clear' the whole table */
f427ee49 617 int c = *fmt++; /* first char hat => negated scanset */
91447636 618 if (c == '^') {
0a7de745
A
619 v = 1; /* default => accept */
620 c = *fmt++; /* get new first char */
621 } else {
622 v = 0; /* default => reject */
623 }
91447636
A
624 /* XXX: Will not work if sizeof(tab*) > sizeof(char) */
625 (void) memset(tab, v, 256);
626
0a7de745
A
627 if (c == 0) {
628 return fmt - 1;/* format ended before closing ] */
629 }
91447636
A
630 /*
631 * Now set the entries corresponding to the actual scanset
632 * to the opposite of the above.
633 *
634 * The first character may be ']' (or '-') without being special;
635 * the last character may be '-'.
636 */
637 v = 1 - v;
638 for (;;) {
f427ee49 639 int n;
0a7de745 640 tab[c] = v; /* take character c */
91447636 641doswitch:
f427ee49 642 n = *fmt++;
91447636 643 switch (n) {
0a7de745
A
644 case 0: /* format ended too soon */
645 return fmt - 1;
91447636
A
646
647 case '-':
648 /*
649 * A scanset of the form
650 * [01+-]
651 * is defined as `the digit 0, the digit 1,
652 * the character +, the character -', but
653 * the effect of a scanset such as
654 * [a-zA-Z0-9]
655 * is implementation defined. The V7 Unix
656 * scanf treats `a-z' as `the letters a through
657 * z', but treats `a-a' as `the letter a, the
658 * character -, and the letter a'.
659 *
660 * For compatibility, the `-' is not considerd
661 * to define a range if the character following
662 * it is either a close bracket (required by ANSI)
663 * or is not numerically greater than the character
664 * we just stored in the table (c).
665 */
666 n = *fmt;
667 if (n == ']' || n < c) {
668 c = '-';
0a7de745 669 break; /* resume the for(;;) */
91447636
A
670 }
671 fmt++;
672 /* fill in the range */
673 do {
0a7de745 674 tab[++c] = v;
91447636
A
675 } while (c < n);
676 c = n;
677 /*
678 * Alas, the V7 Unix scanf also treats formats
679 * such as [a-c-e] as `the letters a through e'.
680 * This too is permitted by the standard....
681 */
682 goto doswitch;
91447636 683
0a7de745
A
684 case ']': /* end of scanset */
685 return fmt;
91447636 686
0a7de745 687 default: /* just another character */
91447636
A
688 c = n;
689 break;
690 }
691 }
692 /* NOTREACHED */
693}