hexdump/parse.c

   1 /*
   2  * Copyright (c) 1989, 1993
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  * 3. All advertising materials mentioning features or use of this software
  14  *    must display the following acknowledgement:
  15  *      This product includes software developed by the University of
  16  *      California, Berkeley and its contributors.
  17  * 4. Neither the name of the University nor the names of its contributors
  18  *    may be used to endorse or promote products derived from this software
  19  *    without specific prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  31  * SUCH DAMAGE.
  32  */
  33
  34 #ifndef lint
  35 #if 0
  36 static char sccsid[] = "@(#)parse.c     8.1 (Berkeley) 6/6/93";
  37 #endif
  38 #endif /* not lint */
  39 #include <sys/cdefs.h>
  40 __FBSDID("$FreeBSD: src/usr.bin/hexdump/parse.c,v 1.12 2002/09/04 23:29:01 dwmalone Exp $");
  41
  42 #include <sys/types.h>
  43
  44 #include <err.h>
  45 #include <fcntl.h>
  46 #include <stdio.h>
  47 #include <stdlib.h>
  48 #include <ctype.h>
  49 #include <string.h>
  50 #include "hexdump.h"
  51
  52 FU *endfu;                                      /* format at end-of-data */
  53
  54 void
  55 addfile(char *name)
  56 {
  57         unsigned char *p;
  58         FILE *fp;
  59         int ch;
  60         char buf[2048 + 1];
  61
  62         if ((fp = fopen(name, "r")) == NULL)
  63                 err(1, "%s", name);
  64         while (fgets(buf, sizeof(buf), fp)) {
  65                 if (!(p = (unsigned char *)index(buf, '\n'))) {
  66                         warnx("line too long");
  67                         while ((ch = getchar()) != '\n' && ch != EOF);
  68                         continue;
  69                 }
  70                 *p = '\0';
  71                 for (p = (unsigned char *)buf; *p && isspace(*p); ++p);
  72                 if (!*p || *p == '#')
  73                         continue;
  74                 add((const char *)p);
  75         }
  76         (void)fclose(fp);
  77 }
  78
  79 void
  80 add(const char *fmt)
  81 {
  82         unsigned const char *p, *savep;
  83         static FS **nextfs;
  84         FS *tfs;
  85         FU *tfu, **nextfu;
  86
  87         /* start new linked list of format units */
  88         if ((tfs = calloc(1, sizeof(FS))) == NULL)
  89                 err(1, NULL);
  90         if (!fshead)
  91                 fshead = tfs;
  92         else
  93                 *nextfs = tfs;
  94         nextfs = &tfs->nextfs;
  95         nextfu = &tfs->nextfu;
  96
  97         /* take the format string and break it up into format units */
  98         for (p = (unsigned const char *)fmt;;) {
  99                 /* skip leading white space */
 100                 for (; isspace(*p); ++p);
 101                 if (!*p)
 102                         break;
 103
 104                 /* allocate a new format unit and link it in */
 105                 if ((tfu = calloc(1, sizeof(FU))) == NULL)
 106                         err(1, NULL);
 107                 *nextfu = tfu;
 108                 nextfu = &tfu->nextfu;
 109                 tfu->reps = 1;
 110
 111                 /* if leading digit, repetition count */
 112                 if (isdigit(*p)) {
 113                         for (savep = p; isdigit(*p); ++p);
 114                         if (!isspace(*p) && *p != '/')
 115                                 badfmt(fmt);
 116                         /* may overwrite either white space or slash */
 117                         tfu->reps = atoi((const char *)savep);
 118                         tfu->flags = F_SETREP;
 119                         /* skip trailing white space */
 120                         for (++p; isspace(*p); ++p);
 121                 }
 122
 123                 /* skip slash and trailing white space */
 124                 if (*p == '/')
 125                         while (isspace(*++p));
 126
 127                 /* byte count */
 128                 if (isdigit(*p)) {
 129                         for (savep = p; isdigit(*p); ++p);
 130                         if (!isspace(*p))
 131                                 badfmt(fmt);
 132                         tfu->bcnt = atoi((const char *)savep);
 133                         /* skip trailing white space */
 134                         for (++p; isspace(*p); ++p);
 135                 }
 136
 137                 /* format */
 138                 if (*p != '"')
 139                         badfmt(fmt);
 140                 for (savep = ++p; *p != '"';)
 141                         if (*p++ == 0)
 142                                 badfmt(fmt);
 143                 if (!(tfu->fmt = malloc(p - savep + 1)))
 144                         err(1, NULL);
 145                 (void) strncpy(tfu->fmt, (const char *)savep, p - savep);
 146                 tfu->fmt[p - savep] = '\0';
 147                 escape(tfu->fmt);
 148                 p++;
 149         }
 150 }
 151
 152 static const char *spec = ".#-+ 0123456789";
 153
 154 int
 155 size(FS *fs)
 156 {
 157         FU *fu;
 158         int bcnt, cursize;
 159         unsigned char *fmt;
 160         int prec;
 161
 162         /* figure out the data block size needed for each format unit */
 163         for (cursize = 0, fu = fs->nextfu; fu; fu = fu->nextfu) {
 164                 if (fu->bcnt) {
 165                         cursize += fu->bcnt * fu->reps;
 166                         continue;
 167                 }
 168                 for (bcnt = prec = 0, fmt = (unsigned char *)fu->fmt; *fmt; ++fmt) {
 169                         if (*fmt != '%')
 170                                 continue;
 171                         /*
 172                          * skip any special chars -- save precision in
 173                          * case it's a %s format.
 174                          */
 175                         while (index(spec + 1, *++fmt));
 176                         if (*fmt == '.' && isdigit(*++fmt)) {
 177                                 prec = atoi((const char *)fmt);
 178                                 while (isdigit(*++fmt));
 179                         }
 180                         switch(*fmt) {
 181                         case 'c':
 182                                 bcnt += 1;
 183                                 break;
 184                         case 'd': case 'i': case 'o': case 'u':
 185                         case 'x': case 'X':
 186                                 bcnt += 4;
 187                                 break;
 188                         case 'e': case 'E': case 'f': case 'g': case 'G':
 189                                 bcnt += 8;
 190                                 break;
 191                         case 's':
 192                                 bcnt += prec;
 193                                 break;
 194                         case '_':
 195                                 switch(*++fmt) {
 196                                 case 'c': case 'p': case 'u':
 197                                         bcnt += 1;
 198                                         break;
 199                                 }
 200                         }
 201                 }
 202                 cursize += bcnt * fu->reps;
 203         }
 204         return (cursize);
 205 }
 206
 207 void
 208 rewrite(FS *fs)
 209 {
 210         enum { NOTOKAY, USEBCNT, USEPREC } sokay;
 211         PR *pr, **nextpr = NULL;
 212         FU *fu;
 213         unsigned char *p1, *p2, *fmtp;
 214         char savech, cs[3];
 215         int nconv, prec = 0;
 216
 217         for (fu = fs->nextfu; fu; fu = fu->nextfu) {
 218                 /*
 219                  * Break each format unit into print units; each conversion
 220                  * character gets its own.
 221                  */
 222                 for (nconv = 0, fmtp = (unsigned char *)fu->fmt; *fmtp; nextpr = &pr->nextpr) {
 223                         if ((pr = calloc(1, sizeof(PR))) == NULL)
 224                                 err(1, NULL);
 225                         if (!fu->nextpr)
 226                                 fu->nextpr = pr;
 227                         else
 228                                 *nextpr = pr;
 229
 230                         /* Skip preceding text and up to the next % sign. */
 231                         for (p1 = fmtp; *p1 && *p1 != '%'; ++p1);
 232
 233                         /* Only text in the string. */
 234                         if (!*p1) {
 235                                 pr->fmt = (char *)fmtp;
 236                                 pr->flags = F_TEXT;
 237                                 break;
 238                         }
 239
 240                         /*
 241                          * Get precision for %s -- if have a byte count, don't
 242                          * need it.
 243                          */
 244                         if (fu->bcnt) {
 245                                 sokay = USEBCNT;
 246                                 /* Skip to conversion character. */
 247                                 for (++p1; index(spec, *p1); ++p1);
 248                         } else {
 249                                 /* Skip any special chars, field width. */
 250                                 while (index(spec + 1, *++p1));
 251                                 if (*p1 == '.' && isdigit(*++p1)) {
 252                                         sokay = USEPREC;
 253                                         prec = atoi((const char *)p1);
 254                                         while (isdigit(*++p1));
 255                                 } else
 256                                         sokay = NOTOKAY;
 257                         }
 258
 259                         p2 = p1 + 1;            /* Set end pointer. */
 260                         cs[0] = *p1;            /* Set conversion string. */
 261                         cs[1] = '\0';
 262
 263                         /*
 264                          * Figure out the byte count for each conversion;
 265                          * rewrite the format as necessary, set up blank-
 266                          * padding for end of data.
 267                          */
 268                         switch(cs[0]) {
 269                         case 'c':
 270                                 pr->flags = F_CHAR;
 271                                 switch(fu->bcnt) {
 272                                 case 0: case 1:
 273                                         pr->bcnt = 1;
 274                                         break;
 275                                 default:
 276                                         p1[1] = '\0';
 277                                         badcnt((char *)p1);
 278                                 }
 279                                 break;
 280                         case 'd': case 'i':
 281                                 pr->flags = F_INT;
 282                                 goto isint;
 283                         case 'o': case 'u': case 'x': case 'X':
 284                                 pr->flags = F_UINT;
 285 isint:                          cs[2] = '\0';
 286                                 cs[1] = cs[0];
 287                                 cs[0] = 'q';
 288                                 switch(fu->bcnt) {
 289                                 case 0: case 4:
 290                                         pr->bcnt = 4;
 291                                         break;
 292                                 case 1:
 293                                         pr->bcnt = 1;
 294                                         break;
 295                                 case 2:
 296                                         pr->bcnt = 2;
 297                                         break;
 298 #ifdef __APPLE__
 299                                 case 8:
 300                                         pr->bcnt = 8;
 301                                         break;
 302 #endif /* __APPLE__ */
 303                                 default:
 304                                         p1[1] = '\0';
 305                                         badcnt((char *)p1);
 306                                 }
 307                                 break;
 308                         case 'e': case 'E': case 'f': case 'g': case 'G':
 309                                 pr->flags = F_DBL;
 310                                 switch(fu->bcnt) {
 311                                 case 0: case 8:
 312                                         pr->bcnt = 8;
 313                                         break;
 314                                 case 4:
 315                                         pr->bcnt = 4;
 316                                         break;
 317                                 default:
 318                                         if (fu->bcnt == sizeof(long double)) {
 319                                                 cs[2] = '\0';
 320                                                 cs[1] = cs[0];
 321                                                 cs[0] = 'L';
 322                                                 pr->bcnt = sizeof(long double);
 323                                         } else {
 324                                                 p1[1] = '\0';
 325                                                 badcnt((char *)p1);
 326                                         }
 327                                 }
 328                                 break;
 329                         case 's':
 330                                 pr->flags = F_STR;
 331                                 switch(sokay) {
 332                                 case NOTOKAY:
 333                                         badsfmt();
 334                                 case USEBCNT:
 335                                         pr->bcnt = fu->bcnt;
 336                                         break;
 337                                 case USEPREC:
 338                                         pr->bcnt = prec;
 339                                         break;
 340                                 }
 341                                 break;
 342                         case '_':
 343                                 ++p2;
 344                                 switch(p1[1]) {
 345                                 case 'A':
 346                                         endfu = fu;
 347                                         fu->flags |= F_IGNORE;
 348                                         /* FALLTHROUGH */
 349                                 case 'a':
 350                                         pr->flags = F_ADDRESS;
 351                                         ++p2;
 352                                         switch(p1[2]) {
 353                                         case 'd': case 'o': case'x':
 354                                                 cs[0] = 'q';
 355                                                 cs[1] = p1[2];
 356                                                 cs[2] = '\0';
 357                                                 break;
 358                                         default:
 359                                                 p1[3] = '\0';
 360                                                 badconv((char *)p1);
 361                                         }
 362                                         break;
 363                                 case 'c':
 364                                         pr->flags = F_C;
 365                                         /* cs[0] = 'c'; set in conv_c */
 366                                         goto isint2;
 367                                 case 'p':
 368                                         pr->flags = F_P;
 369                                         cs[0] = 'c';
 370                                         goto isint2;
 371                                 case 'u':
 372                                         pr->flags = F_U;
 373                                         /* cs[0] = 'c'; set in conv_u */
 374 isint2:                                 switch(fu->bcnt) {
 375                                         case 0: case 1:
 376                                                 pr->bcnt = 1;
 377                                                 break;
 378                                         default:
 379                                                 p1[2] = '\0';
 380                                                 badcnt((char *)p1);
 381                                         }
 382                                         break;
 383                                 case 'n': /* Force -A n to dump extra blank line like default od behavior */
 384                                         endfu = fu;
 385                                         fu->flags = F_IGNORE;
 386                                         pr->flags = F_TEXT;
 387                                         fmtp = (unsigned char *)"\n";
 388                                         cs[0] = '\0';
 389                                         break;
 390                                 default:
 391                                         p1[2] = '\0';
 392                                         badconv((char *)p1);
 393                                 }
 394                                 break;
 395                         default:
 396                                 p1[1] = '\0';
 397                                 badconv((char *)p1);
 398                         }
 399
 400                         /*
 401                          * Copy to PR format string, set conversion character
 402                          * pointer, update original.
 403                          */
 404                         savech = *p2;
 405                         p1[0] = '\0';
 406                         if ((pr->fmt = calloc(1, strlen((const char *)fmtp) + 2)) == NULL)
 407                                 err(1, NULL);
 408                         (void)strcpy(pr->fmt, (const char *)fmtp);
 409                         (void)strcat(pr->fmt, cs);
 410                         *p2 = savech;
 411                         pr->cchar = pr->fmt + (p1 - fmtp);
 412                         fmtp = p2;
 413
 414                         /* Only one conversion character if byte count. */
 415                         if (!(pr->flags&F_ADDRESS) && fu->bcnt && nconv++)
 416                                 errx(1, "byte count with multiple conversion characters");
 417                 }
 418                 /*
 419                  * If format unit byte count not specified, figure it out
 420                  * so can adjust rep count later.
 421                  */
 422                 if (!fu->bcnt)
 423                         for (pr = fu->nextpr; pr; pr = pr->nextpr)
 424                                 fu->bcnt += pr->bcnt;
 425         }
 426         /*
 427          * If the format string interprets any data at all, and it's
 428          * not the same as the blocksize, and its last format unit
 429          * interprets any data at all, and has no iteration count,
 430          * repeat it as necessary.
 431          *
 432          * If, rep count is greater than 1, no trailing whitespace
 433          * gets output from the last iteration of the format unit.
 434          */
 435         for (fu = fs->nextfu; fu; fu = fu->nextfu) {
 436                 if (!fu->nextfu && fs->bcnt < blocksize &&
 437                     !(fu->flags&F_SETREP) && fu->bcnt)
 438                         fu->reps += (blocksize - fs->bcnt) / fu->bcnt;
 439                 if (fu->reps > 1) {
 440                         for (pr = fu->nextpr;; pr = pr->nextpr)
 441                                 if (!pr->nextpr)
 442                                         break;
 443                         for (p1 = (unsigned char *)pr->fmt, p2 = NULL; *p1; ++p1)
 444                                 p2 = isspace(*p1) ? p1 : NULL;
 445                         if (p2)
 446                                 pr->nospace = (char *)p2;
 447                 }
 448         }
 449 #ifdef DEBUG
 450         for (fu = fs->nextfu; fu; fu = fu->nextfu) {
 451                 (void)printf("fmt:");
 452                 for (pr = fu->nextpr; pr; pr = pr->nextpr)
 453                         (void)printf(" {%s}", pr->fmt);
 454                 (void)printf("\n");
 455         }
 456 #endif
 457 }
 458
 459 void
 460 escape(char *p1)
 461 {
 462         char *p2;
 463
 464         /* alphabetic escape sequences have to be done in place */
 465         for (p2 = p1;; ++p1, ++p2) {
 466                 if (!*p1) {
 467                         *p2 = *p1;
 468                         break;
 469                 }
 470                 if (*p1 == '\\')
 471                         switch(*++p1) {
 472                         case 'a':
 473                              /* *p2 = '\a'; */
 474                                 *p2 = '\007';
 475                                 break;
 476                         case 'b':
 477                                 *p2 = '\b';
 478                                 break;
 479                         case 'f':
 480                                 *p2 = '\f';
 481                                 break;
 482                         case 'n':
 483                                 *p2 = '\n';
 484                                 break;
 485                         case 'r':
 486                                 *p2 = '\r';
 487                                 break;
 488                         case 't':
 489                                 *p2 = '\t';
 490                                 break;
 491                         case 'v':
 492                                 *p2 = '\v';
 493                                 break;
 494                         default:
 495                                 *p2 = *p1;
 496                                 break;
 497                         }
 498         }
 499 }
 500
 501 void
 502 badcnt(char *s)
 503 {
 504         errx(1, "%s: bad byte count", s);
 505 }
 506
 507 void
 508 badsfmt(void)
 509 {
 510         errx(1, "%%s: requires a precision or a byte count");
 511 }
 512
 513 void
 514 badfmt(const char *fmt)
 515 {
 516         errx(1, "\"%s\": bad format", fmt);
 517 }
 518
 519 void
 520 badconv(char *ch)
 521 {
 522         errx(1, "%%%s: bad conversion character", ch);
 523 }