]> git.saurik.com Git - wxWidgets.git/blame - utils/tex2rtf/tools/lacheck.lex
compilation fixes for gcc-3.4 + Unicode
[wxWidgets.git] / utils / tex2rtf / tools / lacheck.lex
CommitLineData
56d34922
JS
1/* -*- Mode: C -*-
2 *
3 * lacheck.lex - A consistency checker checker for LaTeX documents
4 *
5 * Copyright (C) 1991 Kresten Krab Thorup (krab@iesd.auc.dk).
6 *
7 * $Locker$
8 * $Revision$
9 * Author : Kresten Krab Thorup
10 * Created On : Sun May 26 18:11:58 1991
11 * Last Modified By: Kresten Krab Thorup
12 * Last Modified On: Thu May 30 02:29:57 1991
13 * Update Count : 16
14 *
15 * HISTORY
16 * 30-May-1991 (Last Mod: Thu May 30 02:22:33 1991 #15) Kresten Krab Thorup
17 * Added check for `$${punct}' and `{punct}$' constructions
18 * 30-May-1991 (Last Mod: Wed May 29 10:31:35 1991 #6) Kresten Krab Thorup
19 * Improved (dynamical) stack management from Andreas Stolcke ...
20 * <stolcke@ICSI.Berkeley.EDU>
21 * 26-May-1991 Kresten Krab Thorup
22 * Initial distribution version.
23 */
24%{
25#include <stdio.h>
26#include <string.h>
27#include <sys/param.h>
28
29extern char *realloc();
30
31#ifdef NEED_STRSTR
32char *strstr();
33#endif
34
35#define GROUP_STACK_SIZE 10
36#define INPUT_STACK_SIZE 10
37
38#define PROGNAME "LaCheck"
39
40 /* macros */
41
42#define CG_NAME gstack[gstackp-1].s_name
43#define CG_TYPE gstack[gstackp-1].s_type
44#define CG_LINE gstack[gstackp-1].s_line
45#define CG_FILE gstack[gstackp-1].s_file
46
47char *bg_command();
48void pop();
49void push();
50void g_checkend();
51void e_checkend();
52void f_checkend();
53void input_file();
54void print_bad_match();
55int check_top_level_end();
56
57 /* global variables */
58
59char returnval[100];
60int line_count = 1;
61int warn_count = 0;
62char *file_name;
63char verb_char;
64
65 /* the group stack */
66
67typedef struct tex_group
68 {
69 unsigned char *s_name;
70 int s_type;
71 int s_line;
72 char *s_file;
73 } tex_group;
74
75tex_group *gstack;
76int gstack_size = GROUP_STACK_SIZE;
77int gstackp = 0;
78
79typedef struct input_
80 {
81 YY_BUFFER_STATE stream;
82 char *name;
83 int linenum;
84 } input_;
85
86input_ *istack;
87int istack_size = INPUT_STACK_SIZE;
88int istackp = 0;
89
90int def_count = 0;
91
92%}
93
94%x B_ENVIRONMENT E_ENVIRONMENT VERBATIM INCLUDE MATH COMMENT VERB DEF
95%x AFTER_DISPLAY
96
97b_group ("{"|\\bgroup)
98e_group ("}"|\\egroup)
99
100b_math \\\(
101e_math \\\)
102math \$
103
104b_display \\\[
105e_display \\\]
106display \$\$
107
108non_par_ws ([ \t]+\n?[ \t]*|[ \t]*\n[ \t]*|[ \t]*\n?[ \t]+)
109
110ws [ \n\t]
111space ({ws}|\~|\\space)
112hard_space (\~|\\space)
113
114u_letter [A-ZFXE]
115l_letter [a-zfxe]
116punct [\!\.\?]
117atoz [a-zA-Z]
118letter (u_letter|l_letter)
119
120c_bin ("-"|"+"|"\\cdot"|"\\oplus"|"\\otimes"|"\\times")
121l_bin (",")
122
123general_abbrev {letter}+{punct}
124
125non_abbrev {u_letter}{u_letter}+{punct}
126
127font_spec (rm|bf|sl|it|tt|em|mediumseries|normalshape)
128
129primitive \\(above|advance|catcode|chardef|closein|closeout|copy|count|countdef|cr|crcr|csname|delcode|dimendef|dimen|divide|expandafter|font|hskp|vskip|openout)
130
131symbol ("$"("\\"{atoz}+|.)"$"|"\\#"|"\\$"|"\\%"|"\\ref")
132
133%%
134
135"\\\\" { ; }
136
137<DEF,INITIAL>"\\\%" { ; }
138
139<DEF,INITIAL>"%"[^\n]* { ; }
140
141<DEF,INITIAL>\n { line_count++; }
142
143<DEF,INITIAL>"\\\{" { ; }
144
145<DEF,INITIAL>"\\\}" { ; }
146
147"\\\$" { ; }
148
149{b_group} { push( "{", 0, line_count);}
150
151{e_group} { g_checkend(0); }
152
153"\\"[exg]?def[^\{] BEGIN(DEF);
154
155<DEF>{b_group} { ++def_count; }
156
157<DEF>{e_group} { --def_count;
158 if(def_count == 0)
159 BEGIN(INITIAL); }
160
161<DEF>. { ; }
162
163{b_math} {
164 if(CG_TYPE == 4 || CG_TYPE == 5)
165 print_bad_match(yytext,4);
166 else
167 {
168 push( yytext, 4, line_count);
169 }}
170
171{e_math} { g_checkend(4); }
172
173{b_display} {
174 if(CG_TYPE == 4 || CG_TYPE == 5)
175 print_bad_match(yytext,5);
176 else
177 {
178 push( yytext, 5, line_count);
179 }}
180
181
182{e_display} { g_checkend(5);
183 BEGIN(AFTER_DISPLAY);}
184
185<AFTER_DISPLAY>{punct} {
186
187 printf( "\"%s\", line %d: puctation mark \"%s\" should be placed before end of displaymath\n",
188 file_name, line_count, yytext);
189 ++warn_count ;
190
191 BEGIN(INITIAL); }
192
193<AFTER_DISPLAY>. { BEGIN(INITIAL); }
194
195<AFTER_DISPLAY>\n { ++line_count;
196 BEGIN(INITIAL); }
197
198{punct}/("\$"|"\\)") { if (CG_TYPE == 4)
199 {
200 printf( "\"%s\", line %d: puctation mark \"%s\" should be placed after end of math mode\n",
201 file_name, line_count, yytext);
202 ++warn_count ;
203 }}
204
205{math} {
206
207 if(CG_TYPE == 5)
208 print_bad_match(yytext, 4);
209 else
210
211 if(CG_TYPE == 4)
212 {
213 e_checkend(4, yytext);
214 }
215 else
216 {
217 push( yytext, 4, line_count);
218 }}
219
220
221{display} {
222
223 if(CG_TYPE == 4)
224 print_bad_match(yytext,5);
225 else
226
227 if(CG_TYPE == 5)
228 {
229 e_checkend(5, yytext);
230 BEGIN(AFTER_DISPLAY);
231 }
232 else
233 {
234 push( yytext, 5, line_count);
235 }}
236
237\\begingroup/[^a-zA-Z] {
238 {
239 push((unsigned char *)"\\begingroup", 1, line_count);
240 }}
241
242
243\\endgroup/[^a-zA-Z] {
244 {
245 g_checkend(1);
246 }}
247
248
249\\begin[ \t]*"{" { BEGIN(B_ENVIRONMENT); }
250
251\\begin[ \t]*/\n {
252 {
253
254 printf("\"%s\", line %i: {argument} missing for \\begin\n",
255 file_name, line_count) ;
256 ++warn_count;
257 }}
258
259<B_ENVIRONMENT>[^\}\n]+ {
260 {
261 if (strcmp( yytext, "verbatim" ) == 0 )
262 {
263 input();
264 BEGIN(VERBATIM);
265 }
266 else
267 {
268 push(yytext, 2, line_count);
269 input();
270 BEGIN(INITIAL);
271 }
272 }}
273
274<VERBATIM>\\end[ \t]*\{verbatim\} { BEGIN(INITIAL); }
275
276<VERBATIM>. { ; }
277
278<VERBATIM>\n { ++line_count; }
279
280
281\\verb. {
282 sscanf (yytext, "\\verb%c", &verb_char );
283 BEGIN(VERB);
284 }
285
286<VERB>. {
287 if ( *yytext == verb_char )
288 BEGIN(INITIAL);
289 if ( *yytext == '\n' )
290 ++line_count;
291 }
292
293
294\\end[ \t]*"{" { BEGIN(E_ENVIRONMENT); }
295
296\\end[ \t]*/\n {
297 {
298 printf("\"%s\", line %i: {argument} missing for \\end\n",
299 file_name, line_count) ;
300 ++warn_count;
301 }}
302
303
304<E_ENVIRONMENT>[^\}\n]+ {
305 {
306 e_checkend(2, yytext);
307 input();
308
309 BEGIN(INITIAL);
310 }}
311
312
313{ws}([a-zfxe]".")*[a-zA-ZfxeFXE]*[a-zfxe]"."/{non_par_ws}+[a-zfxe] {
314 {
315 if ( *yytext == '\n' )
316 ++line_count;
317
318 printf( "\"%s\", line %d: missing `\\ ' after \"%s\"\n",
319 file_name, line_count, ++yytext);
320 ++warn_count ;
321 }}
322
323([a-zfxe]".")*[a-zA-ZfxeFXE]*[a-zfxe]"."/{non_par_ws}+[a-zfxe] {
324 {
325 printf( "\"%s\", line %d: missing `\\ ' after \"%s\"\n",
326 file_name, line_count, yytext);
327 ++warn_count ;
328 }}
329
330{ws}{non_abbrev}/{non_par_ws}{u_letter} {
331 {
332 if ( *yytext == '\n' )
333 ++line_count;
334 printf("\"%s\", line %d: missing `\\\@' before punctation mark in \"%s\"\n",
335 file_name, line_count, ++yytext);
336 ++warn_count ;
337 }}
338
339{non_abbrev}/{non_par_ws}{u_letter} {
340 {
341 printf("\"%s\", line %d: missing `\\\@' before `.' in \"%s\"\n",
342 file_name, line_count, yytext);
343 ++warn_count ;
344 }}
345
346({hard_space}{space}|{space}{hard_space}) {
347
348 printf("\"%s\", line %d: double space at \"%s\"\n",
349 file_name, line_count, yytext);
350 ++warn_count;
351 }
352
353{c_bin}{ws}?(\\(\.|\,|\;|\:))*{ws}?\\ldots{ws}?(\\(\.|\,|\;|\:))*{ws}?{c_bin} {
354 printf("\"%s\", line %d: \\ldots should be \\cdots in \"%s\"\n",
355 file_name, line_count, yytext);
356 ++warn_count;
357 }
358
359[^\\]{l_bin}{ws}?(\\(\.|\,|\;|\:))*{ws}?\\cdots{ws}?(\\(\.|\,|\;|\:))*{ws}?[^\\]{l_bin} {
360 printf("\"%s\", line %d: \\cdots should be \\ldots in \"%s\"\n",
361 file_name, line_count, yytext);
362 ++warn_count;
363 }
364
365{c_bin}{ws}?(\\(\.|\,|\;|\:))*{ws}?"."+{ws}?(\\(\.|\,|\;|\:))*{ws}?{c_bin} {
366 printf("\"%s\", line %d: Dots should be \\cdots in \"%s\"\n",
367 file_name, line_count, yytext);
368 ++warn_count;
369 }
370
371[^\\]{l_bin}{ws}?(\\(\.|\,|\;|\:))*{ws}?"."+{ws}?(\\(\.|\,|\;|\:))*{ws}?[^\\]{l_bin} {
372 printf("\"%s\", line %d: Dots should be \\ldots in \"%s\"\n",
373 file_name, line_count, yytext);
374 ++warn_count;
375 }
376
377
378\.\.\. {
379 printf("\"%s\", line %d: Dots should be ellipsis \"%s\"\n",
380 file_name, line_count, yytext);
381 ++warn_count;
382 }
383
384 /*
385 *
386 * The `~' one is not too good, perhaps it shoud be an option.
387 *
388 */
389 /*
390 {l_letter}" "{symbol} {
391
392 printf("\"%s\", line %d: perhaps you should insert a `~' before%s\n",
393 file_name, line_count, ++yytext);
394 }
395 */
396
397 /*
398 {primitive}/[^a-zA-Z] {
399 {
400 printf("\"%s\", line %d: Don't use \"%s\" in LaTeX documents\n",
401 file_name, line_count, yytext);
402 ++warn_count ;
403 }}
404 */
405
406\\{font_spec}/[ \t]*"{" {
407 {
408 printf("\"%s\", line %d: Fontspecifiers don't take arguments. \"%s\"\n",
409 file_name, line_count, yytext);
410 ++warn_count;
411 /* (void) input(); */
412 }}
413
414
415\\([a-zA-Z\@]+\@[a-zA-Z\@]*|[a-zA-Z\@]*\@[a-zA-Z\@]+) {
416 {
417 printf("\"%s\", line %d: Do not use @ in LaTeX macro names. \"%s\"\n",
418 file_name, line_count, yytext);
419 ++warn_count;
420 }}
421
422
423"%" { BEGIN(COMMENT); }
424
425<COMMENT>\n { BEGIN(INITIAL); ++line_count; }
426
427<COMMENT>. { ; }
428
429
430\\(input|include)([ \t]|"{") { BEGIN(INCLUDE); }
431
432<INCLUDE>[^\}\n]+ {
433 {
434 if ( strstr(yytext,"\.sty") == NULL )
435 {
436 input_file(yytext);
437 }
438 else
439 {
440 printf("\"%s\", line %d: Style file \`%s\' omitted.\n",
441 file_name,
442 line_count,
443 yytext);
444 input();
445 }
446 BEGIN(INITIAL);
447 }}
448
449<<EOF>> {
450 if (--istackp < 0)
451 yyterminate();
452
453 else
454 {
455 fclose(yyin);
456 f_checkend(file_name);
457 yy_switch_to_buffer(istack[istackp].stream);
458 free(file_name);
459 line_count = istack[istackp].linenum;
460 file_name = istack[istackp].name;
461 input();
462 BEGIN(INITIAL);
463 }
464
465 }
466
467
468. { ; }
469%%
470int main( argc, argv )
471int argc;
472char *argv[];
473{
474 /* allocate initial stacks */
475 gstack = (tex_group *)malloc(gstack_size * sizeof(tex_group));
476 istack = (input_ *)malloc(istack_size * sizeof(input_));
477 if ( gstack == NULL || istack == NULL ) {
478 fprintf(stderr, "%s: not enough memory for stacks\n", PROGNAME);
479 exit(3);
480 }
481
482 if(argc > 1)
483 {
484 if ( (file_name = malloc(strlen(argv[1]) + 5)) == NULL ) {
485 fprintf(stderr, "%s: out of memory\n", PROGNAME);
486 exit(3);
487 }
488
489 strcpy(file_name, argv[1]);
490
491 if ((yyin = fopen( file_name, "r")) != NULL )
492 {
493 push(file_name, 3, 1);
494 yylex();
495 f_checkend(file_name);
496 }
497 else {
498 strcat(file_name, ".tex" );
499 if ((yyin = fopen( file_name, "r")) != NULL )
500 {
501 push(file_name, 3, 1);
502 yylex();
503 f_checkend(file_name);
504 }
505 else
506 fprintf(stderr,
507 "%s: Could not open : %s\n",PROGNAME, argv[1]);
508 }
509 }
510 else
511 {
512 printf("\n* %s *\n\n",PROGNAME);
513 printf("\t...a consistency checker for LaTeX documents.\n\n");
514
515 printf("Usage:\n\tlacheck filename[.tex] <return>\n\n\n");
516
517 printf("\tFrom within Emacs:\n\n\t");
518 printf("M-x compile <return>\n\tlacheck filename[.tex] <return>");
519 printf("\n\n\tUse C-x ` to step through the messages.\n\n");
520 printf("\n\tThe found context is displayed in \"double quotes\"\n\n");
521 printf("Remark:\n\tAll messages are only warnings!\n\n");
522 printf("\tYour document may be right though LaCheck tells\n");
523 printf("\tsomthing else.\n\n");
524 }
525 return(0);
526}
527
528#ifdef NEED_STRSTR
529char *
530strstr(string, substring)
531 register char *string; /* String to search. */
532 char *substring; /* Substring to try to find in string. */
533{
534 register char *a, *b;
535
536 /* First scan quickly through the two strings looking for a
537 * single-character match. When it's found, then compare the
538 * rest of the substring.
539 */
540
541 b = substring;
542 if (*b == 0) {
543 return string;
544 }
545 for ( ; *string != 0; string += 1) {
546 if (*string != *b) {
547 continue;
548 }
549 a = string;
550 while (1) {
551 if (*b == 0) {
552 return string;
553 }
554 if (*a++ != *b++) {
555 break;
556 }
557 }
558 b = substring;
559 }
560 return (char *) 0;
561}
562#endif /* NEED_STRSTR */
563
564void push(p_name, p_type, p_line)
565unsigned char *p_name;
566int p_type;
567int p_line;
568{
569 if ( gstackp == gstack_size ) { /* extend stack */
570 gstack_size *= 2;
571 gstack = (tex_group *)realloc(gstack, gstack_size * sizeof(tex_group));
572 if ( gstack == NULL ) {
573 fprintf(stderr, "%s: stack out of memory", PROGNAME);
574 exit(3);
575 }
576 }
577
578 if ( (gstack[gstackp].s_name =
579 (unsigned char *)malloc(strlen(p_name) + 1)) == NULL ||
580 (gstack[gstackp].s_file =
581 (char *)malloc(strlen(file_name) + 1)) == NULL ) {
582 fprintf(stderr, "%s: out of memory\n", PROGNAME);
583 exit(3);
584 }
585
586 strcpy(gstack[gstackp].s_name,p_name);
587 gstack[gstackp].s_type = p_type;
588 gstack[gstackp].s_line = p_line;
589 strcpy(gstack[gstackp].s_file,file_name);
590 ++gstackp;
591
592}
593
594void input_file(file_nam)
595char *file_nam;
596{
597 char *tmp_file_name;
598 FILE *tmp_yyin;
599
600 if ( (tmp_file_name = malloc(strlen(file_nam) + 5)) == NULL ) {
601 fprintf(stderr, "%s: out of memory\n", PROGNAME);
602 exit(3);
603 }
604 strcpy(tmp_file_name,file_nam);
605
606 if (istackp == istack_size) { /* extend stack */
607 istack_size *= 2;
608 istack = (input_ *)realloc(istack, istack_size * sizeof(input_));
609 if ( istack == NULL ) {
610 fprintf(stderr, "%s: \\input stack out of memory\n", PROGNAME);
611 exit(3);
612 }
613 }
614
615 istack[istackp].stream = YY_CURRENT_BUFFER;
616 istack[istackp].linenum = line_count;
617 istack[istackp].name = file_name;
618 ++istackp;
619
620 if ((tmp_yyin = fopen( file_nam, "r")) != NULL )
621 {
622 yyin = tmp_yyin;
623 yy_switch_to_buffer(yy_create_buffer(yyin,YY_BUF_SIZE));
624 file_name = tmp_file_name;
625 push(file_name, 3, 1);
626 line_count = 1;
627 }
628 else {
629 (void) strcat(tmp_file_name, ".tex");
630 if ((tmp_yyin = fopen( tmp_file_name , "r")) != NULL )
631 {
632 yyin = tmp_yyin;
633 yy_switch_to_buffer(yy_create_buffer(yyin,YY_BUF_SIZE));
634 file_name = tmp_file_name;
635 push(file_name, 3, 1);
636 line_count = 1;
637 }
638 else
639 {
640 --istackp;
641 fclose(tmp_yyin);
642 free(tmp_file_name);
643 printf("\"%s\", line %d: Could not open \"%s\"\n",
644 file_name,
645 line_count,
646 file_nam);
647 input();
648 }
649 }
650}
651
652void pop()
653{
654 if ( gstackp == 0 )
655 {
656 fprintf(stderr, "%s: Stack underflow\n", PROGNAME);
657 exit(4);
658 }
659 --gstackp;
660
661 free(gstack[gstackp].s_name);
662 free(gstack[gstackp].s_file);
663}
664
665char *bg_command(name)
666char *name;
667{
668
669 switch (CG_TYPE) {
670
671 case 2:
672 (void) strcpy( returnval, "\\begin\{" );
673 (void) strcat( returnval, (char *) name);
674 (void) strcat( returnval, "}" );
675 break;
676
677 case 3:
678 (void) strcpy( returnval, "beginning of file " );
679 (void) strcat( returnval, (char *) name);
680 break;
681
682 case 4:
683 (void) strcpy( returnval, "math begin " );
684 (void) strcat( returnval, (char *) name);
685 break;
686
687 case 5:
688 (void) strcpy( returnval, "display math begin " );
689 (void) strcat( returnval, (char *) name);
690 break;
691
692 default:
693 (void) strcpy( returnval, name );
694
695 }
696
697 return ((char *)returnval);
698}
699
700char *eg_command(name,type)
701int type;
702char *name;
703{
704
705 switch (type) {
706
707 case 2:
708 (void) strcpy( returnval, "\\end{" );
709 (void) strcat( returnval, (char *) name);
710 (void) strcat( returnval, "}" );
711 break;
712
713 case 3:
714 (void) strcpy( returnval, "end of file " );
715 (void) strcat( returnval, (char *) name);
716 break;
717
718 case 4:
719 (void) strcpy( returnval, "math end " );
720 (void) strcat( returnval, (char *) name);
721 break;
722
723 case 5:
724 (void) strcpy( returnval, "display math end " );
725 (void) strcat( returnval, (char *) name);
726 break;
727
728 default:
729 (void) strcpy( returnval, name );
730 break;
731 }
732
733 return ((char *)returnval);
734}
735
736
737void g_checkend(n)
738int n;
739{
740 if ( check_top_level_end(yytext,n) == 1 )
741 if ( CG_TYPE != n )
742 print_bad_match(yytext,n);
743 else
744 pop();
745}
746
747void e_checkend(n, name)
748int n;
749char *name;
750{
751 if ( check_top_level_end(name,n) == 1 )
752 {
753 if ( CG_TYPE != n || strcmp( CG_NAME, name ) != 0 )
754 print_bad_match(name,n);
755
756 pop();
757
758 }
759}
760
761void f_checkend(name)
762char *name;
763{
764 if ( check_top_level_end(name,3) == 1 )
765 {
766 if ( CG_TYPE != 3 || strcmp( CG_NAME, name ) != 0 )
767
768 while( CG_TYPE != 3 )
769 {
770 print_bad_match(name,3);
771 pop();
772 }
773
774 pop();
775 }
776}
777
778void print_bad_match(end_command,type)
779char *end_command;
780int type;
781{
782 printf("\"%s\", line %i: <- unmatched \"%s\"\n",
783 file_name,
784 line_count,
785 eg_command( end_command , type) ) ;
786
787 printf("\"%s\", line %i: -> unmatched \"%s\"\n",
788 CG_FILE,
789 CG_LINE,
790 bg_command( CG_NAME ) ) ;
791 warn_count += 2;
792}
793
794int check_top_level_end(end_command,type)
795char *end_command;
796int type;
797{
798 if ( gstackp == 0 )
799 {
800 printf("\"%s\", line %i: \"%s\" found at top level\n",
801 file_name,
802 line_count,
803 eg_command( end_command, type )) ;
804 ++warn_count;
805 return(0);
806 }
807 else
808 return(1);
809}
810
811
812