hexdump/parse.c

   1 /*
   2  * Copyright (c) 1989, 1993
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  * 3. All advertising materials mentioning features or use of this software
  14  *    must display the following acknowledgement:
  15  *      This product includes software developed by the University of
  16  *      California, Berkeley and its contributors.
  17  * 4. Neither the name of the University nor the names of its contributors
  18  *    may be used to endorse or promote products derived from this software
  19  *    without specific prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  31  * SUCH DAMAGE.
  32  */
  33
  34 #ifndef lint
  35 #if 0
  36 static char sccsid[] = "@(#)parse.c     8.1 (Berkeley) 6/6/93";
  37 #endif
  38 #endif /* not lint */
  39 #include <sys/cdefs.h>
  40 __FBSDID("$FreeBSD: src/usr.bin/hexdump/parse.c,v 1.12 2002/09/04 23:29:01 dwmalone Exp $");
  41
  42 #include <sys/types.h>
  43
  44 #include <err.h>
  45 #include <fcntl.h>
  46 #include <stdio.h>
  47 #include <stdlib.h>
  48 #include <ctype.h>
  49 #include <string.h>
  50 #include "hexdump.h"
  51
  52 FU *endfu;                                      /* format at end-of-data */
  53
  54 void
  55 addfile(char *name)
  56 {
  57         unsigned char *p;
  58         FILE *fp;
  59         int ch;
  60         char buf[2048 + 1];
  61
  62         if ((fp = fopen(name, "r")) == NULL)
  63                 err(1, "%s", name);
  64         while (fgets(buf, sizeof(buf), fp)) {
  65                 if (!(p = (unsigned char *)index(buf, '\n'))) {
  66                         warnx("line too long");
  67                         while ((ch = getchar()) != '\n' && ch != EOF);
  68                         continue;
  69                 }
  70                 *p = '\0';
  71                 for (p = (unsigned char *)buf; *p && isspace(*p); ++p);
  72                 if (!*p || *p == '#')
  73                         continue;
  74                 add((const char *)p);
  75         }
  76         (void)fclose(fp);
  77 }
  78
  79 void
  80 add(const char *fmt)
  81 {
  82         unsigned const char *p, *savep;
  83         static FS **nextfs;
  84         FS *tfs;
  85         FU *tfu, **nextfu;
  86
  87         /* start new linked list of format units */
  88         if ((tfs = calloc(1, sizeof(FS))) == NULL)
  89                 err(1, NULL);
  90         if (!fshead)
  91                 fshead = tfs;
  92         else
  93                 *nextfs = tfs;
  94         nextfs = &tfs->nextfs;
  95         nextfu = &tfs->nextfu;
  96
  97         /* take the format string and break it up into format units */
  98         for (p = (unsigned const char *)fmt;;) {
  99                 /* skip leading white space */
 100                 for (; isspace(*p); ++p);
 101                 if (!*p)
 102                         break;
 103
 104                 /* allocate a new format unit and link it in */
 105                 if ((tfu = calloc(1, sizeof(FU))) == NULL)
 106                         err(1, NULL);
 107                 *nextfu = tfu;
 108                 nextfu = &tfu->nextfu;
 109                 tfu->reps = 1;
 110
 111                 /* if leading digit, repetition count */
 112                 if (isdigit(*p)) {
 113                         for (savep = p; isdigit(*p); ++p);
 114                         if (!isspace(*p) && *p != '/')
 115                                 badfmt(fmt);
 116                         /* may overwrite either white space or slash */
 117                         tfu->reps = atoi((const char *)savep);
 118                         tfu->flags = F_SETREP;
 119                         /* skip trailing white space */
 120                         for (++p; isspace(*p); ++p);
 121                 }
 122
 123                 /* skip slash and trailing white space */
 124                 if (*p == '/')
 125                         while (isspace(*++p));
 126
 127                 /* byte count */
 128                 if (isdigit(*p)) {
 129                         for (savep = p; isdigit(*p); ++p);
 130                         if (!isspace(*p))
 131                                 badfmt(fmt);
 132                         tfu->bcnt = atoi((const char *)savep);
 133                         /* skip trailing white space */
 134                         for (++p; isspace(*p); ++p);
 135                 }
 136
 137                 /* format */
 138                 if (*p != '"')
 139                         badfmt(fmt);
 140                 for (savep = ++p; *p != '"';)
 141                         if (*p++ == 0)
 142                                 badfmt(fmt);
 143                 if (!(tfu->fmt = malloc(p - savep + 1)))
 144                         err(1, NULL);
 145                 (void) strncpy(tfu->fmt, (const char *)savep, p - savep);
 146                 tfu->fmt[p - savep] = '\0';
 147                 escape(tfu->fmt);
 148                 p++;
 149         }
 150 }
 151
 152 static const char *spec = ".#-+ 0123456789";
 153
 154 int
 155 size(FS *fs)
 156 {
 157         FU *fu;
 158         int bcnt, cursize;
 159         unsigned char *fmt;
 160         int prec;
 161
 162         /* figure out the data block size needed for each format unit */
 163         for (cursize = 0, fu = fs->nextfu; fu; fu = fu->nextfu) {
 164                 if (fu->bcnt) {
 165                         cursize += fu->bcnt * fu->reps;
 166                         continue;
 167                 }
 168                 for (bcnt = prec = 0, fmt = (unsigned char *)fu->fmt; *fmt; ++fmt) {
 169                         if (*fmt != '%')
 170                                 continue;
 171                         /*
 172                          * skip any special chars -- save precision in
 173                          * case it's a %s format.
 174                          */
 175                         while (index(spec + 1, *++fmt));
 176                         if (*fmt == '.' && isdigit(*++fmt)) {
 177                                 prec = atoi((const char *)fmt);
 178                                 while (isdigit(*++fmt));
 179                         }
 180                         switch(*fmt) {
 181                         case 'c':
 182                                 bcnt += 1;
 183                                 break;
 184                         case 'd': case 'i': case 'o': case 'u':
 185                         case 'x': case 'X':
 186                                 bcnt += 4;
 187                                 break;
 188                         case 'e': case 'E': case 'f': case 'g': case 'G':
 189                                 bcnt += 8;
 190                                 break;
 191                         case 's':
 192                                 bcnt += prec;
 193                                 break;
 194                         case '_':
 195                                 switch(*++fmt) {
 196                                 case 'c': case 'p': case 'u':
 197                                         bcnt += 1;
 198                                         break;
 199                                 }
 200                         }
 201                 }
 202                 cursize += bcnt * fu->reps;
 203         }
 204         return (cursize);
 205 }
 206
 207 void
 208 rewrite(FS *fs)
 209 {
 210         enum { NOTOKAY, USEBCNT, USEPREC } sokay;
 211         PR *pr, **nextpr = NULL;
 212         FU *fu;
 213         unsigned char *p1, *p2, *fmtp;
 214         char savech, cs[3];
 215         int nconv, prec = 0;
 216
 217         for (fu = fs->nextfu; fu; fu = fu->nextfu) {
 218                 /*
 219                  * Break each format unit into print units; each conversion
 220                  * character gets its own.
 221                  */
 222                 for (nconv = 0, fmtp = (unsigned char *)fu->fmt; *fmtp; nextpr = &pr->nextpr) {
 223                         if ((pr = calloc(1, sizeof(PR))) == NULL)
 224                                 err(1, NULL);
 225                         if (!fu->nextpr)
 226                                 fu->nextpr = pr;
 227                         else
 228                                 *nextpr = pr;
 229
 230                         /* Skip preceding text and up to the next % sign. */
 231                         for (p1 = fmtp; *p1 && *p1 != '%'; ++p1);
 232
 233                         /* Only text in the string. */
 234                         if (!*p1) {
 235                                 pr->fmt = (char *)fmtp;
 236                                 pr->flags = F_TEXT;
 237                                 break;
 238                         }
 239
 240                         /*
 241                          * Get precision for %s -- if have a byte count, don't
 242                          * need it.
 243                          */
 244                         if (fu->bcnt) {
 245                                 sokay = USEBCNT;
 246                                 /* Skip to conversion character. */
 247                                 for (++p1; index(spec, *p1); ++p1);
 248                         } else {
 249                                 /* Skip any special chars, field width. */
 250                                 while (index(spec + 1, *++p1));
 251                                 if (*p1 == '.' && isdigit(*++p1)) {
 252                                         sokay = USEPREC;
 253                                         prec = atoi((const char *)p1);
 254                                         while (isdigit(*++p1));
 255                                 } else
 256                                         sokay = NOTOKAY;
 257                         }
 258
 259                         p2 = p1 + 1;            /* Set end pointer. */
 260                         cs[0] = *p1;            /* Set conversion string. */
 261                         cs[1] = '\0';
 262
 263                         /*
 264                          * Figure out the byte count for each conversion;
 265                          * rewrite the format as necessary, set up blank-
 266                          * padding for end of data.
 267                          */
 268                         switch(cs[0]) {
 269                         case 'c':
 270                                 pr->flags = F_CHAR;
 271                                 switch(fu->bcnt) {
 272                                 case 0: case 1:
 273                                         pr->bcnt = 1;
 274                                         break;
 275                                 default:
 276                                         p1[1] = '\0';
 277                                         badcnt((char *)p1);
 278                                 }
 279                                 break;
 280                         case 'd': case 'i':
 281                                 pr->flags = F_INT;
 282                                 goto isint;
 283                         case 'o': case 'u': case 'x': case 'X':
 284                                 pr->flags = F_UINT;
 285 isint:                          cs[2] = '\0';
 286                                 cs[1] = cs[0];
 287                                 cs[0] = 'q';
 288                                 switch(fu->bcnt) {
 289                                 case 0: case 4:
 290                                         pr->bcnt = 4;
 291                                         break;
 292                                 case 1:
 293                                         pr->bcnt = 1;
 294                                         break;
 295                                 case 2:
 296                                         pr->bcnt = 2;
 297                                         break;
 298                                 default:
 299                                         p1[1] = '\0';
 300                                         badcnt((char *)p1);
 301                                 }
 302                                 break;
 303                         case 'e': case 'E': case 'f': case 'g': case 'G':
 304                                 pr->flags = F_DBL;
 305                                 switch(fu->bcnt) {
 306                                 case 0: case 8:
 307                                         pr->bcnt = 8;
 308                                         break;
 309                                 case 4:
 310                                         pr->bcnt = 4;
 311                                         break;
 312                                 default:
 313                                         if (fu->bcnt == sizeof(long double)) {
 314                                                 cs[2] = '\0';
 315                                                 cs[1] = cs[0];
 316                                                 cs[0] = 'L';
 317                                                 pr->bcnt = sizeof(long double);
 318                                         } else {
 319                                                 p1[1] = '\0';
 320                                                 badcnt((char *)p1);
 321                                         }
 322                                 }
 323                                 break;
 324                         case 's':
 325                                 pr->flags = F_STR;
 326                                 switch(sokay) {
 327                                 case NOTOKAY:
 328                                         badsfmt();
 329                                 case USEBCNT:
 330                                         pr->bcnt = fu->bcnt;
 331                                         break;
 332                                 case USEPREC:
 333                                         pr->bcnt = prec;
 334                                         break;
 335                                 }
 336                                 break;
 337                         case '_':
 338                                 ++p2;
 339                                 switch(p1[1]) {
 340                                 case 'A':
 341                                         endfu = fu;
 342                                         fu->flags |= F_IGNORE;
 343                                         /* FALLTHROUGH */
 344                                 case 'a':
 345                                         pr->flags = F_ADDRESS;
 346                                         ++p2;
 347                                         switch(p1[2]) {
 348                                         case 'd': case 'o': case'x':
 349                                                 cs[0] = 'q';
 350                                                 cs[1] = p1[2];
 351                                                 cs[2] = '\0';
 352                                                 break;
 353                                         default:
 354                                                 p1[3] = '\0';
 355                                                 badconv((char *)p1);
 356                                         }
 357                                         break;
 358                                 case 'c':
 359                                         pr->flags = F_C;
 360                                         /* cs[0] = 'c'; set in conv_c */
 361                                         goto isint2;
 362                                 case 'p':
 363                                         pr->flags = F_P;
 364                                         cs[0] = 'c';
 365                                         goto isint2;
 366                                 case 'u':
 367                                         pr->flags = F_U;
 368                                         /* cs[0] = 'c'; set in conv_u */
 369 isint2:                                 switch(fu->bcnt) {
 370                                         case 0: case 1:
 371                                                 pr->bcnt = 1;
 372                                                 break;
 373                                         default:
 374                                                 p1[2] = '\0';
 375                                                 badcnt((char *)p1);
 376                                         }
 377                                         break;
 378                                 case 'n': /* Force -A n to dump extra blank line like default od behavior */
 379                                         endfu = fu;
 380                                         fu->flags = F_IGNORE;
 381                                         pr->flags = F_TEXT;
 382                                         fmtp = (unsigned char *)"\n";
 383                                         cs[0] = '\0';
 384                                         break;
 385                                 default:
 386                                         p1[2] = '\0';
 387                                         badconv((char *)p1);
 388                                 }
 389                                 break;
 390                         default:
 391                                 p1[1] = '\0';
 392                                 badconv((char *)p1);
 393                         }
 394
 395                         /*
 396                          * Copy to PR format string, set conversion character
 397                          * pointer, update original.
 398                          */
 399                         savech = *p2;
 400                         p1[0] = '\0';
 401                         if ((pr->fmt = calloc(1, strlen((const char *)fmtp) + 2)) == NULL)
 402                                 err(1, NULL);
 403                         (void)strcpy(pr->fmt, (const char *)fmtp);
 404                         (void)strcat(pr->fmt, cs);
 405                         *p2 = savech;
 406                         pr->cchar = pr->fmt + (p1 - fmtp);
 407                         fmtp = p2;
 408
 409                         /* Only one conversion character if byte count. */
 410                         if (!(pr->flags&F_ADDRESS) && fu->bcnt && nconv++)
 411                                 errx(1, "byte count with multiple conversion characters");
 412                 }
 413                 /*
 414                  * If format unit byte count not specified, figure it out
 415                  * so can adjust rep count later.
 416                  */
 417                 if (!fu->bcnt)
 418                         for (pr = fu->nextpr; pr; pr = pr->nextpr)
 419                                 fu->bcnt += pr->bcnt;
 420         }
 421         /*
 422          * If the format string interprets any data at all, and it's
 423          * not the same as the blocksize, and its last format unit
 424          * interprets any data at all, and has no iteration count,
 425          * repeat it as necessary.
 426          *
 427          * If, rep count is greater than 1, no trailing whitespace
 428          * gets output from the last iteration of the format unit.
 429          */
 430         for (fu = fs->nextfu; fu; fu = fu->nextfu) {
 431                 if (!fu->nextfu && fs->bcnt < blocksize &&
 432                     !(fu->flags&F_SETREP) && fu->bcnt)
 433                         fu->reps += (blocksize - fs->bcnt) / fu->bcnt;
 434                 if (fu->reps > 1) {
 435                         for (pr = fu->nextpr;; pr = pr->nextpr)
 436                                 if (!pr->nextpr)
 437                                         break;
 438                         for (p1 = (unsigned char *)pr->fmt, p2 = NULL; *p1; ++p1)
 439                                 p2 = isspace(*p1) ? p1 : NULL;
 440                         if (p2)
 441                                 pr->nospace = (char *)p2;
 442                 }
 443         }
 444 #ifdef DEBUG
 445         for (fu = fs->nextfu; fu; fu = fu->nextfu) {
 446                 (void)printf("fmt:");
 447                 for (pr = fu->nextpr; pr; pr = pr->nextpr)
 448                         (void)printf(" {%s}", pr->fmt);
 449                 (void)printf("\n");
 450         }
 451 #endif
 452 }
 453
 454 void
 455 escape(char *p1)
 456 {
 457         char *p2;
 458
 459         /* alphabetic escape sequences have to be done in place */
 460         for (p2 = p1;; ++p1, ++p2) {
 461                 if (!*p1) {
 462                         *p2 = *p1;
 463                         break;
 464                 }
 465                 if (*p1 == '\\')
 466                         switch(*++p1) {
 467                         case 'a':
 468                              /* *p2 = '\a'; */
 469                                 *p2 = '\007';
 470                                 break;
 471                         case 'b':
 472                                 *p2 = '\b';
 473                                 break;
 474                         case 'f':
 475                                 *p2 = '\f';
 476                                 break;
 477                         case 'n':
 478                                 *p2 = '\n';
 479                                 break;
 480                         case 'r':
 481                                 *p2 = '\r';
 482                                 break;
 483                         case 't':
 484                                 *p2 = '\t';
 485                                 break;
 486                         case 'v':
 487                                 *p2 = '\v';
 488                                 break;
 489                         default:
 490                                 *p2 = *p1;
 491                                 break;
 492                         }
 493         }
 494 }
 495
 496 void
 497 badcnt(char *s)
 498 {
 499         errx(1, "%s: bad byte count", s);
 500 }
 501
 502 void
 503 badsfmt(void)
 504 {
 505         errx(1, "%%s: requires a precision or a byte count");
 506 }
 507
 508 void
 509 badfmt(const char *fmt)
 510 {
 511         errx(1, "\"%s\": bad format", fmt);
 512 }
 513
 514 void
 515 badconv(char *ch)
 516 {
 517         errx(1, "%%%s: bad conversion character", ch);
 518 }