file_cmds-60.tar.gz
[apple/file_cmds.git] / file / apprentice.c
1 /*
2 * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Portions Copyright (c) 1999 Apple Computer, Inc. All Rights
7 * Reserved. This file contains Original Code and/or Modifications of
8 * Original Code as defined in and that are subject to the Apple Public
9 * Source License Version 1.1 (the "License"). You may not use this file
10 * except in compliance with the License. Please obtain a copy of the
11 * License at http://www.apple.com/publicsource and read it before using
12 * this file.
13 *
14 * The Original Code and all software distributed under the License are
15 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE OR NON- INFRINGEMENT. Please see the
19 * License for the specific language governing rights and limitations
20 * under the License.
21 *
22 * @APPLE_LICENSE_HEADER_END@
23 */
24 /* $OpenBSD: apprentice.c,v 1.4 1997/02/09 23:58:16 millert Exp $ */
25
26 /*
27 * apprentice - make one pass through /etc/magic, learning its secrets.
28 *
29 * Copyright (c) Ian F. Darwin, 1987.
30 * Written by Ian F. Darwin.
31 *
32 * This software is not subject to any license of the American Telephone
33 * and Telegraph Company or of the Regents of the University of California.
34 *
35 * Permission is granted to anyone to use this software for any purpose on
36 * any computer system, and to alter it and redistribute it freely, subject
37 * to the following restrictions:
38 *
39 * 1. The author is not responsible for the consequences of use of this
40 * software, no matter how awful, even if they arise from flaws in it.
41 *
42 * 2. The origin of this software must not be misrepresented, either by
43 * explicit claim or by omission. Since few users ever read sources,
44 * credits must appear in the documentation.
45 *
46 * 3. Altered versions must be plainly marked as such, and must not be
47 * misrepresented as being the original software. Since few users
48 * ever read sources, credits must appear in the documentation.
49 *
50 * 4. This notice may not be removed or altered.
51 */
52
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <string.h>
56 #include <ctype.h>
57 #include <errno.h>
58 #include "file.h"
59
60 #ifndef lint
61 #if 0
62 static char *moduleid = "$OpenBSD: apprentice.c,v 1.4 1997/02/09 23:58:16 millert Exp $";
63 #endif
64 #endif /* lint */
65
66 #define EATAB {while (isascii((unsigned char) *l) && \
67 isspace((unsigned char) *l)) ++l;}
68 #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
69 tolower((unsigned char) (l)) : (l))
70
71
72 static int getvalue __P((struct magic *, char **));
73 static int hextoint __P((int));
74 static char *getstr __P((char *, char *, int, int *));
75 static int parse __P((char *, int *, int));
76 static void eatsize __P((char **));
77
78 static int maxmagic = 0;
79
80 static int apprentice_1 __P((char *, int));
81
82 int
83 apprentice(fn, check)
84 char *fn; /* list of magic files */
85 int check; /* non-zero? checking-only run. */
86 {
87 char *p, *mfn;
88 int file_err, errs = -1;
89
90 maxmagic = MAXMAGIS;
91 magic = (struct magic *) calloc(sizeof(struct magic), maxmagic);
92 mfn = malloc(strlen(fn)+1);
93 if (magic == NULL || mfn == NULL) {
94 (void) fprintf(stderr, "%s: Out of memory.\n", progname);
95 if (check)
96 return -1;
97 else
98 exit(1);
99 }
100 fn = strcpy(mfn, fn);
101
102 while (fn) {
103 p = strchr(fn, ':');
104 if (p)
105 *p++ = '\0';
106 file_err = apprentice_1(fn, check);
107 if (file_err > errs)
108 errs = file_err;
109 fn = p;
110 }
111 if (errs == -1)
112 (void) fprintf(stderr, "%s: couldn't find any magic files!\n",
113 progname);
114 if (!check && errs)
115 exit(1);
116
117 free(mfn);
118 return errs;
119 }
120
121 static int
122 apprentice_1(fn, check)
123 char *fn; /* name of magic file */
124 int check; /* non-zero? checking-only run. */
125 {
126 static const char hdr[] =
127 "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
128 FILE *f;
129 char line[BUFSIZ+1];
130 int errs = 0;
131
132 f = fopen(fn, "r");
133 if (f==NULL) {
134 if (errno != ENOENT)
135 (void) fprintf(stderr,
136 "%s: can't read magic file %s (%s)\n",
137 progname, fn, strerror(errno));
138 return -1;
139 }
140
141 /* parse it */
142 if (check) /* print silly verbose header for USG compat. */
143 (void) printf("%s\n", hdr);
144
145 for (lineno = 1;fgets(line, BUFSIZ, f) != NULL; lineno++) {
146 if (line[0]=='#') /* comment, do not parse */
147 continue;
148 if (strlen(line) <= (unsigned)1) /* null line, garbage, etc */
149 continue;
150 line[strlen(line)-1] = '\0'; /* delete newline */
151 if (parse(line, &nmagic, check) != 0)
152 errs = 1;
153 }
154
155 (void) fclose(f);
156 return errs;
157 }
158
159 /*
160 * extend the sign bit if the comparison is to be signed
161 */
162 uint32
163 signextend(m, v)
164 struct magic *m;
165 uint32 v;
166 {
167 if (!(m->flag & UNSIGNED))
168 switch(m->type) {
169 /*
170 * Do not remove the casts below. They are
171 * vital. When later compared with the data,
172 * the sign extension must have happened.
173 */
174 case BYTE:
175 v = (char) v;
176 break;
177 case SHORT:
178 case BESHORT:
179 case LESHORT:
180 v = (short) v;
181 break;
182 case DATE:
183 case BEDATE:
184 case LEDATE:
185 case LONG:
186 case BELONG:
187 case LELONG:
188 v = (int32) v;
189 break;
190 case STRING:
191 break;
192 default:
193 magwarn("can't happen: m->type=%d\n",
194 m->type);
195 return -1;
196 }
197 return v;
198 }
199
200 /*
201 * parse one line from magic file, put into magic[index++] if valid
202 */
203 static int
204 parse(l, ndx, check)
205 char *l;
206 int *ndx, check;
207 {
208 int i = 0, nd = *ndx;
209 struct magic *m;
210 char *t, *s;
211
212 #define ALLOC_INCR 20
213 if (nd+1 >= maxmagic){
214 maxmagic += ALLOC_INCR;
215 if ((magic = (struct magic *) realloc(magic,
216 sizeof(struct magic) *
217 maxmagic)) == NULL) {
218 (void) fprintf(stderr, "%s: Out of memory.\n", progname);
219 if (check)
220 return -1;
221 else
222 exit(1);
223 }
224 memset(&magic[*ndx], 0, sizeof(struct magic) * ALLOC_INCR);
225 }
226 m = &magic[*ndx];
227 m->flag = 0;
228 m->cont_level = 0;
229
230 while (*l == '>') {
231 ++l; /* step over */
232 m->cont_level++;
233 }
234
235 if (m->cont_level != 0 && *l == '(') {
236 ++l; /* step over */
237 m->flag |= INDIR;
238 }
239 if (m->cont_level != 0 && *l == '&') {
240 ++l; /* step over */
241 m->flag |= ADD;
242 }
243
244 /* get offset, then skip over it */
245 m->offset = (int) strtoul(l,&t,0);
246 if (l == t)
247 magwarn("offset %s invalid", l);
248 l = t;
249
250 if (m->flag & INDIR) {
251 m->in.type = LONG;
252 m->in.offset = 0;
253 /*
254 * read [.lbs][+-]nnnnn)
255 */
256 if (*l == '.') {
257 l++;
258 switch (LOWCASE(*l)) {
259 case 'l':
260 m->in.type = LONG;
261 break;
262 case 'h':
263 case 's':
264 m->in.type = SHORT;
265 break;
266 case 'c':
267 case 'b':
268 m->in.type = BYTE;
269 break;
270 default:
271 magwarn("indirect offset type %c invalid", *l);
272 break;
273 }
274 l++;
275 }
276 s = l;
277 if (*l == '+' || *l == '-') l++;
278 if (isdigit((unsigned char)*l)) {
279 m->in.offset = strtoul(l, &t, 0);
280 if (*s == '-') m->in.offset = - m->in.offset;
281 }
282 else
283 t = l;
284 if (*t++ != ')')
285 magwarn("missing ')' in indirect offset");
286 l = t;
287 }
288
289
290 while (isascii((unsigned char)*l) && isdigit((unsigned char)*l))
291 ++l;
292 EATAB;
293
294 #define NBYTE 4
295 #define NSHORT 5
296 #define NLONG 4
297 #define NSTRING 6
298 #define NDATE 4
299 #define NBESHORT 7
300 #define NBELONG 6
301 #define NBEDATE 6
302 #define NLESHORT 7
303 #define NLELONG 6
304 #define NLEDATE 6
305
306 if (*l == 'u') {
307 ++l;
308 m->flag |= UNSIGNED;
309 }
310
311 /* get type, skip it */
312 if (strncmp(l, "byte", NBYTE)==0) {
313 m->type = BYTE;
314 l += NBYTE;
315 } else if (strncmp(l, "short", NSHORT)==0) {
316 m->type = SHORT;
317 l += NSHORT;
318 } else if (strncmp(l, "long", NLONG)==0) {
319 m->type = LONG;
320 l += NLONG;
321 } else if (strncmp(l, "string", NSTRING)==0) {
322 m->type = STRING;
323 l += NSTRING;
324 } else if (strncmp(l, "date", NDATE)==0) {
325 m->type = DATE;
326 l += NDATE;
327 } else if (strncmp(l, "beshort", NBESHORT)==0) {
328 m->type = BESHORT;
329 l += NBESHORT;
330 } else if (strncmp(l, "belong", NBELONG)==0) {
331 m->type = BELONG;
332 l += NBELONG;
333 } else if (strncmp(l, "bedate", NBEDATE)==0) {
334 m->type = BEDATE;
335 l += NBEDATE;
336 } else if (strncmp(l, "leshort", NLESHORT)==0) {
337 m->type = LESHORT;
338 l += NLESHORT;
339 } else if (strncmp(l, "lelong", NLELONG)==0) {
340 m->type = LELONG;
341 l += NLELONG;
342 } else if (strncmp(l, "ledate", NLEDATE)==0) {
343 m->type = LEDATE;
344 l += NLEDATE;
345 } else {
346 magwarn("type %s invalid", l);
347 return -1;
348 }
349 /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
350 if (*l == '&') {
351 ++l;
352 m->mask = signextend(m, strtoul(l, &l, 0));
353 eatsize(&l);
354 } else
355 m->mask = ~0L;
356 EATAB;
357
358 switch (*l) {
359 case '>':
360 case '<':
361 /* Old-style anding: "0 byte &0x80 dynamically linked" */
362 case '&':
363 case '^':
364 case '=':
365 m->reln = *l;
366 ++l;
367 break;
368 case '!':
369 if (m->type != STRING) {
370 m->reln = *l;
371 ++l;
372 break;
373 }
374 /* FALL THROUGH */
375 default:
376 if (*l == 'x' && isascii((unsigned char)l[1]) &&
377 isspace((unsigned char)l[1])) {
378 m->reln = *l;
379 ++l;
380 goto GetDesc; /* Bill The Cat */
381 }
382 m->reln = '=';
383 break;
384 }
385 EATAB;
386
387 if (getvalue(m, &l))
388 return -1;
389 /*
390 * TODO finish this macro and start using it!
391 * #define offsetcheck {if (offset > HOWMANY-1)
392 * magwarn("offset too big"); }
393 */
394
395 /*
396 * now get last part - the description
397 */
398 GetDesc:
399 EATAB;
400 if (l[0] == '\b') {
401 ++l;
402 m->nospflag = 1;
403 } else if ((l[0] == '\\') && (l[1] == 'b')) {
404 ++l;
405 ++l;
406 m->nospflag = 1;
407 } else
408 m->nospflag = 0;
409 while ((m->desc[i++] = *l++) != '\0' && i<MAXDESC)
410 /* NULLBODY */;
411
412 if (check) {
413 mdump(m);
414 }
415 ++(*ndx); /* make room for next */
416 return 0;
417 }
418
419 /*
420 * Read a numeric value from a pointer, into the value union of a magic
421 * pointer, according to the magic type. Update the string pointer to point
422 * just after the number read. Return 0 for success, non-zero for failure.
423 */
424 static int
425 getvalue(m, p)
426 struct magic *m;
427 char **p;
428 {
429 int slen;
430
431 if (m->type == STRING) {
432 *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen);
433 m->vallen = slen;
434 } else
435 if (m->reln != 'x') {
436 m->value.l = signextend(m, strtoul(*p, p, 0));
437 eatsize(p);
438 }
439 return 0;
440 }
441
442 /*
443 * Convert a string containing C character escapes. Stop at an unescaped
444 * space or tab.
445 * Copy the converted version to "p", returning its length in *slen.
446 * Return updated scan pointer as function result.
447 */
448 static char *
449 getstr(s, p, plen, slen)
450 register char *s;
451 register char *p;
452 int plen, *slen;
453 {
454 char *origs = s, *origp = p;
455 char *pmax = p + plen - 1;
456 register int c;
457 register int val;
458
459 while ((c = *s++) != '\0') {
460 if (isspace((unsigned char) c))
461 break;
462 if (p >= pmax) {
463 fprintf(stderr, "String too long: %s\n", origs);
464 break;
465 }
466 if(c == '\\') {
467 switch(c = *s++) {
468
469 case '\0':
470 goto out;
471
472 default:
473 *p++ = (char) c;
474 break;
475
476 case 'n':
477 *p++ = '\n';
478 break;
479
480 case 'r':
481 *p++ = '\r';
482 break;
483
484 case 'b':
485 *p++ = '\b';
486 break;
487
488 case 't':
489 *p++ = '\t';
490 break;
491
492 case 'f':
493 *p++ = '\f';
494 break;
495
496 case 'v':
497 *p++ = '\v';
498 break;
499
500 /* \ and up to 3 octal digits */
501 case '0':
502 case '1':
503 case '2':
504 case '3':
505 case '4':
506 case '5':
507 case '6':
508 case '7':
509 val = c - '0';
510 c = *s++; /* try for 2 */
511 if(c >= '0' && c <= '7') {
512 val = (val<<3) | (c - '0');
513 c = *s++; /* try for 3 */
514 if(c >= '0' && c <= '7')
515 val = (val<<3) | (c-'0');
516 else
517 --s;
518 }
519 else
520 --s;
521 *p++ = (char)val;
522 break;
523
524 /* \x and up to 2 hex digits */
525 case 'x':
526 val = 'x'; /* Default if no digits */
527 c = hextoint(*s++); /* Get next char */
528 if (c >= 0) {
529 val = c;
530 c = hextoint(*s++);
531 if (c >= 0)
532 val = (val << 4) + c;
533 else
534 --s;
535 } else
536 --s;
537 *p++ = (char)val;
538 break;
539 }
540 } else
541 *p++ = (char)c;
542 }
543 out:
544 *p = '\0';
545 *slen = p - origp;
546 return s;
547 }
548
549
550 /* Single hex char to int; -1 if not a hex char. */
551 static int
552 hextoint(c)
553 int c;
554 {
555 if (!isascii((unsigned char) c)) return -1;
556 if (isdigit((unsigned char) c)) return c - '0';
557 if ((c>='a')&&(c<='f')) return c + 10 - 'a';
558 if ((c>='A')&&(c<='F')) return c + 10 - 'A';
559 return -1;
560 }
561
562
563 /*
564 * Print a string containing C character escapes.
565 */
566 void
567 showstr(fp, s, len)
568 FILE *fp;
569 const char *s;
570 int len;
571 {
572 register char c;
573
574 for (;;) {
575 c = *s++;
576 if (len == -1) {
577 if (c == '\0')
578 break;
579 }
580 else {
581 if (len-- == 0)
582 break;
583 }
584 if(c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */
585 (void) fputc(c, fp);
586 else {
587 (void) fputc('\\', fp);
588 switch (c) {
589
590 case '\n':
591 (void) fputc('n', fp);
592 break;
593
594 case '\r':
595 (void) fputc('r', fp);
596 break;
597
598 case '\b':
599 (void) fputc('b', fp);
600 break;
601
602 case '\t':
603 (void) fputc('t', fp);
604 break;
605
606 case '\f':
607 (void) fputc('f', fp);
608 break;
609
610 case '\v':
611 (void) fputc('v', fp);
612 break;
613
614 default:
615 (void) fprintf(fp, "%.3o", c & 0377);
616 break;
617 }
618 }
619 }
620 }
621
622 /*
623 * eatsize(): Eat the size spec from a number [eg. 10UL]
624 */
625 static void
626 eatsize(p)
627 char **p;
628 {
629 char *l = *p;
630
631 if (LOWCASE(*l) == 'u')
632 l++;
633
634 switch (LOWCASE(*l)) {
635 case 'l': /* long */
636 case 's': /* short */
637 case 'h': /* short */
638 case 'b': /* char/byte */
639 case 'c': /* char/byte */
640 l++;
641 /*FALLTHROUGH*/
642 default:
643 break;
644 }
645
646 *p = l;
647 }