]> git.saurik.com Git - bison.git/blob - lib/ansi2knr.c
* lib/hash.c: New file.
[bison.git] / lib / ansi2knr.c
1 /* Copyright (C) 1989, 1997, 1998 Aladdin Enterprises. All rights reserved. */
2
3 /*$Id$*/
4 /* Convert ANSI C function definitions to K&R ("traditional C") syntax */
5
6 /*
7 ansi2knr is distributed in the hope that it will be useful, but WITHOUT ANY
8 WARRANTY. No author or distributor accepts responsibility to anyone for the
9 consequences of using it or for whether it serves any particular purpose or
10 works at all, unless he says so in writing. Refer to the GNU General Public
11 License (the "GPL") for full details.
12
13 Everyone is granted permission to copy, modify and redistribute ansi2knr,
14 but only under the conditions described in the GPL. A copy of this license
15 is supposed to have been given to you along with ansi2knr so you can know
16 your rights and responsibilities. It should be in a file named COPYLEFT,
17 or, if there is no file named COPYLEFT, a file named COPYING. Among other
18 things, the copyright notice and this notice must be preserved on all
19 copies.
20
21 We explicitly state here what we believe is already implied by the GPL: if
22 the ansi2knr program is distributed as a separate set of sources and a
23 separate executable file which are aggregated on a storage medium together
24 with another program, this in itself does not bring the other program under
25 the GPL, nor does the mere fact that such a program or the procedures for
26 constructing it invoke the ansi2knr executable bring any other part of the
27 program under the GPL.
28 */
29
30 /*
31 * Usage:
32 ansi2knr [--filename FILENAME] [INPUT_FILE [OUTPUT_FILE]]
33 * --filename provides the file name for the #line directive in the output,
34 * overriding input_file (if present).
35 * If no input_file is supplied, input is read from stdin.
36 * If no output_file is supplied, output goes to stdout.
37 * There are no error messages.
38 *
39 * ansi2knr recognizes function definitions by seeing a non-keyword
40 * identifier at the left margin, followed by a left parenthesis,
41 * with a right parenthesis as the last character on the line,
42 * and with a left brace as the first token on the following line
43 * (ignoring possible intervening comments), except that a line
44 * consisting of only
45 * identifier1(identifier2)
46 * will not be considered a function definition unless identifier2 is
47 * the word "void". ansi2knr will recognize a multi-line header provided
48 * that no intervening line ends with a left or right brace or a semicolon.
49 * These algorithms ignore whitespace and comments, except that
50 * the function name must be the first thing on the line.
51 * The following constructs will confuse it:
52 * - Any other construct that starts at the left margin and
53 * follows the above syntax (such as a macro or function call).
54 * - Some macros that tinker with the syntax of the function header.
55 */
56
57 /*
58 * The original and principal author of ansi2knr is L. Peter Deutsch
59 * <ghost@aladdin.com>. Other authors are noted in the change history
60 * that follows (in reverse chronological order):
61 lpd 1998-11-09 added further hack to recognize identifier(void)
62 as being a procedure
63 lpd 1998-10-23 added hack to recognize lines consisting of
64 identifier1(identifier2) as *not* being procedures
65 lpd 1997-12-08 made input_file optional; only closes input and/or
66 output file if not stdin or stdout respectively; prints
67 usage message on stderr rather than stdout; adds
68 --filename switch (changes suggested by
69 <ceder@lysator.liu.se>)
70 lpd 1996-01-21 added code to cope with not HAVE_CONFIG_H and with
71 compilers that don't understand void, as suggested by
72 Tom Lane
73 lpd 1996-01-15 changed to require that the first non-comment token
74 on the line following a function header be a left brace,
75 to reduce sensitivity to macros, as suggested by Tom Lane
76 <tgl@sss.pgh.pa.us>
77 lpd 1995-06-22 removed #ifndefs whose sole purpose was to define
78 undefined preprocessor symbols as 0; changed all #ifdefs
79 for configuration symbols to #ifs
80 lpd 1995-04-05 changed copyright notice to make it clear that
81 including ansi2knr in a program does not bring the entire
82 program under the GPL
83 lpd 1994-12-18 added conditionals for systems where ctype macros
84 don't handle 8-bit characters properly, suggested by
85 Francois Pinard <pinard@iro.umontreal.ca>;
86 removed --varargs switch (this is now the default)
87 lpd 1994-10-10 removed CONFIG_BROKETS conditional
88 lpd 1994-07-16 added some conditionals to help GNU `configure',
89 suggested by Francois Pinard <pinard@iro.umontreal.ca>;
90 properly erase prototype args in function parameters,
91 contributed by Jim Avera <jima@netcom.com>;
92 correct error in writeblanks (it shouldn't erase EOLs)
93 lpd 1989-xx-xx original version
94 */
95
96 /* Most of the conditionals here are to make ansi2knr work with */
97 /* or without the GNU configure machinery. */
98
99 #if HAVE_CONFIG_H
100 # include <config.h>
101 #endif
102
103 #include <stdio.h>
104 #include <ctype.h>
105
106 #if HAVE_CONFIG_H
107
108 /*
109 For properly autoconfiguring ansi2knr, use AC_CONFIG_HEADER(config.h).
110 This will define HAVE_CONFIG_H and so, activate the following lines.
111 */
112
113 # if STDC_HEADERS || HAVE_STRING_H
114 # include <string.h>
115 # else
116 # include <strings.h>
117 # endif
118
119 #else /* not HAVE_CONFIG_H */
120
121 /* Otherwise do it the hard way */
122
123 # ifdef BSD
124 # include <strings.h>
125 # else
126 # ifdef VMS
127 extern int strlen(), strncmp();
128 # else
129 # include <string.h>
130 # endif
131 # endif
132
133 #endif /* not HAVE_CONFIG_H */
134
135 #if STDC_HEADERS
136 # include <stdlib.h>
137 #else
138 /*
139 malloc and free should be declared in stdlib.h,
140 but if you've got a K&R compiler, they probably aren't.
141 */
142 # ifdef MSDOS
143 # include <malloc.h>
144 # else
145 # ifdef VMS
146 extern char *malloc();
147 extern void free();
148 # else
149 extern char *malloc();
150 extern int free();
151 # endif
152 # endif
153
154 #endif
155
156 /*
157 * The ctype macros don't always handle 8-bit characters correctly.
158 * Compensate for this here.
159 */
160 #ifdef isascii
161 # undef HAVE_ISASCII /* just in case */
162 # define HAVE_ISASCII 1
163 #else
164 #endif
165 #if STDC_HEADERS || !HAVE_ISASCII
166 # define is_ascii(c) 1
167 #else
168 # define is_ascii(c) isascii(c)
169 #endif
170
171 #define is_space(c) (is_ascii(c) && isspace(c))
172 #define is_alpha(c) (is_ascii(c) && isalpha(c))
173 #define is_alnum(c) (is_ascii(c) && isalnum(c))
174
175 /* Scanning macros */
176 #define isidchar(ch) (is_alnum(ch) || (ch) == '_')
177 #define isidfirstchar(ch) (is_alpha(ch) || (ch) == '_')
178
179 /* Forward references */
180 char *skipspace();
181 int writeblanks();
182 int test1();
183 int convert1();
184
185 /* The main program */
186 int
187 main(argc, argv)
188 int argc;
189 char *argv[];
190 { FILE *in = stdin;
191 FILE *out = stdout;
192 char *filename = 0;
193 #define bufsize 5000 /* arbitrary size */
194 char *buf;
195 char *line;
196 char *more;
197 char *usage =
198 "Usage: ansi2knr [--filename FILENAME] [INPUT_FILE [OUTPUT_FILE]]\n";
199 /*
200 * In previous versions, ansi2knr recognized a --varargs switch.
201 * If this switch was supplied, ansi2knr would attempt to convert
202 * a ... argument to va_alist and va_dcl; if this switch was not
203 * supplied, ansi2knr would simply drop any such arguments.
204 * Now, ansi2knr always does this conversion, and we only
205 * check for this switch for backward compatibility.
206 */
207 int convert_varargs = 1;
208
209 while ( argc > 1 && argv[1][0] == '-' ) {
210 if ( !strcmp(argv[1], "--varargs") ) {
211 convert_varargs = 1;
212 argc--;
213 argv++;
214 continue;
215 }
216 if ( !strcmp(argv[1], "--filename") && argc > 2 ) {
217 filename = argv[2];
218 argc -= 2;
219 argv += 2;
220 continue;
221 }
222 fprintf(stderr, "Unrecognized switch: %s\n", argv[1]);
223 fprintf(stderr, usage);
224 exit(1);
225 }
226 switch ( argc )
227 {
228 default:
229 fprintf(stderr, usage);
230 exit(0);
231 case 3:
232 out = fopen(argv[2], "w");
233 if ( out == NULL ) {
234 fprintf(stderr, "Cannot open output file %s\n", argv[2]);
235 exit(1);
236 }
237 /* falls through */
238 case 2:
239 in = fopen(argv[1], "r");
240 if ( in == NULL ) {
241 fprintf(stderr, "Cannot open input file %s\n", argv[1]);
242 exit(1);
243 }
244 if ( filename == 0 )
245 filename = argv[1];
246 /* falls through */
247 case 1:
248 break;
249 }
250 if ( filename )
251 fprintf(out, "#line 1 \"%s\"\n", filename);
252 buf = malloc(bufsize);
253 line = buf;
254 while ( fgets(line, (unsigned)(buf + bufsize - line), in) != NULL )
255 {
256 test: line += strlen(line);
257 switch ( test1(buf) )
258 {
259 case 2: /* a function header */
260 convert1(buf, out, 1, convert_varargs);
261 break;
262 case 1: /* a function */
263 /* Check for a { at the start of the next line. */
264 more = ++line;
265 f: if ( line >= buf + (bufsize - 1) ) /* overflow check */
266 goto wl;
267 if ( fgets(line, (unsigned)(buf + bufsize - line), in) == NULL )
268 goto wl;
269 switch ( *skipspace(more, 1) )
270 {
271 case '{':
272 /* Definitely a function header. */
273 convert1(buf, out, 0, convert_varargs);
274 fputs(more, out);
275 break;
276 case 0:
277 /* The next line was blank or a comment: */
278 /* keep scanning for a non-comment. */
279 line += strlen(line);
280 goto f;
281 default:
282 /* buf isn't a function header, but */
283 /* more might be. */
284 fputs(buf, out);
285 strcpy(buf, more);
286 line = buf;
287 goto test;
288 }
289 break;
290 case -1: /* maybe the start of a function */
291 if ( line != buf + (bufsize - 1) ) /* overflow check */
292 continue;
293 /* falls through */
294 default: /* not a function */
295 wl: fputs(buf, out);
296 break;
297 }
298 line = buf;
299 }
300 if ( line != buf )
301 fputs(buf, out);
302 free(buf);
303 if ( out != stdout )
304 fclose(out);
305 if ( in != stdin )
306 fclose(in);
307 return 0;
308 }
309
310 /* Skip over space and comments, in either direction. */
311 char *
312 skipspace(p, dir)
313 register char *p;
314 register int dir; /* 1 for forward, -1 for backward */
315 { for ( ; ; )
316 { while ( is_space(*p) )
317 p += dir;
318 if ( !(*p == '/' && p[dir] == '*') )
319 break;
320 p += dir; p += dir;
321 while ( !(*p == '*' && p[dir] == '/') )
322 { if ( *p == 0 )
323 return p; /* multi-line comment?? */
324 p += dir;
325 }
326 p += dir; p += dir;
327 }
328 return p;
329 }
330
331 /*
332 * Write blanks over part of a string.
333 * Don't overwrite end-of-line characters.
334 */
335 int
336 writeblanks(start, end)
337 char *start;
338 char *end;
339 { char *p;
340 for ( p = start; p < end; p++ )
341 if ( *p != '\r' && *p != '\n' )
342 *p = ' ';
343 return 0;
344 }
345
346 /*
347 * Test whether the string in buf is a function definition.
348 * The string may contain and/or end with a newline.
349 * Return as follows:
350 * 0 - definitely not a function definition;
351 * 1 - definitely a function definition;
352 * 2 - definitely a function prototype (NOT USED);
353 * -1 - may be the beginning of a function definition,
354 * append another line and look again.
355 * The reason we don't attempt to convert function prototypes is that
356 * Ghostscript's declaration-generating macros look too much like
357 * prototypes, and confuse the algorithms.
358 */
359 int
360 test1(buf)
361 char *buf;
362 { register char *p = buf;
363 char *bend;
364 char *endfn;
365 int contin;
366
367 if ( !isidfirstchar(*p) )
368 return 0; /* no name at left margin */
369 bend = skipspace(buf + strlen(buf) - 1, -1);
370 switch ( *bend )
371 {
372 case ';': contin = 0 /*2*/; break;
373 case ')': contin = 1; break;
374 case '{': return 0; /* not a function */
375 case '}': return 0; /* not a function */
376 default: contin = -1;
377 }
378 while ( isidchar(*p) )
379 p++;
380 endfn = p;
381 p = skipspace(p, 1);
382 if ( *p++ != '(' )
383 return 0; /* not a function */
384 p = skipspace(p, 1);
385 if ( *p == ')' )
386 return 0; /* no parameters */
387 /* Check that the apparent function name isn't a keyword. */
388 /* We only need to check for keywords that could be followed */
389 /* by a left parenthesis (which, unfortunately, is most of them). */
390 { static char *words[] =
391 { "asm", "auto", "case", "char", "const", "double",
392 "extern", "float", "for", "if", "int", "long",
393 "register", "return", "short", "signed", "sizeof",
394 "static", "switch", "typedef", "unsigned",
395 "void", "volatile", "while", 0
396 };
397 char **key = words;
398 char *kp;
399 int len = endfn - buf;
400
401 while ( (kp = *key) != 0 )
402 { if ( strlen(kp) == len && !strncmp(kp, buf, len) )
403 return 0; /* name is a keyword */
404 key++;
405 }
406 }
407 {
408 char *id = p;
409 int len;
410 /*
411 * Check for identifier1(identifier2) and not
412 * identifier1(void).
413 */
414
415 while ( isidchar(*p) )
416 p++;
417 len = p - id;
418 p = skipspace(p, 1);
419 if ( *p == ')' && (len != 4 || strncmp(id, "void", 4)) )
420 return 0; /* not a function */
421 }
422 /*
423 * If the last significant character was a ), we need to count
424 * parentheses, because it might be part of a formal parameter
425 * that is a procedure.
426 */
427 if (contin > 0) {
428 int level = 0;
429
430 for (p = skipspace(buf, 1); *p; p = skipspace(p + 1, 1))
431 level += (*p == '(' ? 1 : *p == ')' ? -1 : 0);
432 if (level > 0)
433 contin = -1;
434 }
435 return contin;
436 }
437
438 /* Convert a recognized function definition or header to K&R syntax. */
439 int
440 convert1(buf, out, header, convert_varargs)
441 char *buf;
442 FILE *out;
443 int header; /* Boolean */
444 int convert_varargs; /* Boolean */
445 { char *endfn;
446 register char *p;
447 /*
448 * The breaks table contains pointers to the beginning and end
449 * of each argument.
450 */
451 char **breaks;
452 unsigned num_breaks = 2; /* for testing */
453 char **btop;
454 char **bp;
455 char **ap;
456 char *vararg = 0;
457
458 /* Pre-ANSI implementations don't agree on whether strchr */
459 /* is called strchr or index, so we open-code it here. */
460 for ( endfn = buf; *(endfn++) != '('; )
461 ;
462 top: p = endfn;
463 breaks = (char **)malloc(sizeof(char *) * num_breaks * 2);
464 if ( breaks == 0 )
465 { /* Couldn't allocate break table, give up */
466 fprintf(stderr, "Unable to allocate break table!\n");
467 fputs(buf, out);
468 return -1;
469 }
470 btop = breaks + num_breaks * 2 - 2;
471 bp = breaks;
472 /* Parse the argument list */
473 do
474 { int level = 0;
475 char *lp = NULL;
476 char *rp;
477 char *end = NULL;
478
479 if ( bp >= btop )
480 { /* Filled up break table. */
481 /* Allocate a bigger one and start over. */
482 free((char *)breaks);
483 num_breaks <<= 1;
484 goto top;
485 }
486 *bp++ = p;
487 /* Find the end of the argument */
488 for ( ; end == NULL; p++ )
489 { switch(*p)
490 {
491 case ',':
492 if ( !level ) end = p;
493 break;
494 case '(':
495 if ( !level ) lp = p;
496 level++;
497 break;
498 case ')':
499 if ( --level < 0 ) end = p;
500 else rp = p;
501 break;
502 case '/':
503 p = skipspace(p, 1) - 1;
504 break;
505 default:
506 ;
507 }
508 }
509 /* Erase any embedded prototype parameters. */
510 if ( lp )
511 writeblanks(lp + 1, rp);
512 p--; /* back up over terminator */
513 /* Find the name being declared. */
514 /* This is complicated because of procedure and */
515 /* array modifiers. */
516 for ( ; ; )
517 { p = skipspace(p - 1, -1);
518 switch ( *p )
519 {
520 case ']': /* skip array dimension(s) */
521 case ')': /* skip procedure args OR name */
522 { int level = 1;
523 while ( level )
524 switch ( *--p )
525 {
526 case ']': case ')': level++; break;
527 case '[': case '(': level--; break;
528 case '/': p = skipspace(p, -1) + 1; break;
529 default: ;
530 }
531 }
532 if ( *p == '(' && *skipspace(p + 1, 1) == '*' )
533 { /* We found the name being declared */
534 while ( !isidfirstchar(*p) )
535 p = skipspace(p, 1) + 1;
536 goto found;
537 }
538 break;
539 default:
540 goto found;
541 }
542 }
543 found: if ( *p == '.' && p[-1] == '.' && p[-2] == '.' )
544 { if ( convert_varargs )
545 { *bp++ = "va_alist";
546 vararg = p-2;
547 }
548 else
549 { p++;
550 if ( bp == breaks + 1 ) /* sole argument */
551 writeblanks(breaks[0], p);
552 else
553 writeblanks(bp[-1] - 1, p);
554 bp--;
555 }
556 }
557 else
558 { while ( isidchar(*p) ) p--;
559 *bp++ = p+1;
560 }
561 p = end;
562 }
563 while ( *p++ == ',' );
564 *bp = p;
565 /* Make a special check for 'void' arglist */
566 if ( bp == breaks+2 )
567 { p = skipspace(breaks[0], 1);
568 if ( !strncmp(p, "void", 4) )
569 { p = skipspace(p+4, 1);
570 if ( p == breaks[2] - 1 )
571 { bp = breaks; /* yup, pretend arglist is empty */
572 writeblanks(breaks[0], p + 1);
573 }
574 }
575 }
576 /* Put out the function name and left parenthesis. */
577 p = buf;
578 while ( p != endfn ) putc(*p, out), p++;
579 /* Put out the declaration. */
580 if ( header )
581 { fputs(");", out);
582 for ( p = breaks[0]; *p; p++ )
583 if ( *p == '\r' || *p == '\n' )
584 putc(*p, out);
585 }
586 else
587 { for ( ap = breaks+1; ap < bp; ap += 2 )
588 { p = *ap;
589 while ( isidchar(*p) )
590 putc(*p, out), p++;
591 if ( ap < bp - 1 )
592 fputs(", ", out);
593 }
594 fputs(") ", out);
595 /* Put out the argument declarations */
596 for ( ap = breaks+2; ap <= bp; ap += 2 )
597 (*ap)[-1] = ';';
598 if ( vararg != 0 )
599 { *vararg = 0;
600 fputs(breaks[0], out); /* any prior args */
601 fputs("va_dcl", out); /* the final arg */
602 fputs(bp[0], out);
603 }
604 else
605 fputs(breaks[0], out);
606 }
607 free((char *)breaks);
608 return 0;
609 }