]> git.saurik.com Git - wxWidgets.git/blob - utils/tex2rtf/tools/lacheck.lex
Applied patch [ 809066 ] Cleanup for text2rtf
[wxWidgets.git] / utils / tex2rtf / tools / lacheck.lex
1 /* -*- Mode: C -*-
2 *
3 * lacheck.lex - A consistency checker checker for LaTeX documents
4 *
5 * Copyright (C) 1991 Kresten Krab Thorup (krab@iesd.auc.dk).
6 *
7 * $Locker$
8 * $Revision$
9 * Author : Kresten Krab Thorup
10 * Created On : Sun May 26 18:11:58 1991
11 * Last Modified By: Kresten Krab Thorup
12 * Last Modified On: Thu May 30 02:29:57 1991
13 * Update Count : 16
14 *
15 * HISTORY
16 * 30-May-1991 (Last Mod: Thu May 30 02:22:33 1991 #15) Kresten Krab Thorup
17 * Added check for `$${punct}' and `{punct}$' constructions
18 * 30-May-1991 (Last Mod: Wed May 29 10:31:35 1991 #6) Kresten Krab Thorup
19 * Improved (dynamical) stack management from Andreas Stolcke ...
20 * <stolcke@ICSI.Berkeley.EDU>
21 * 26-May-1991 Kresten Krab Thorup
22 * Initial distribution version.
23 */
24 %{
25 #include <stdio.h>
26 #include <string.h>
27 #include <sys/param.h>
28
29 extern char *realloc();
30
31 #ifdef NEED_STRSTR
32 char *strstr();
33 #endif
34
35 #define GROUP_STACK_SIZE 10
36 #define INPUT_STACK_SIZE 10
37
38 #define PROGNAME "LaCheck"
39
40 /* macros */
41
42 #define CG_NAME gstack[gstackp-1].s_name
43 #define CG_TYPE gstack[gstackp-1].s_type
44 #define CG_LINE gstack[gstackp-1].s_line
45 #define CG_FILE gstack[gstackp-1].s_file
46
47 char *bg_command();
48 void pop();
49 void push();
50 void g_checkend();
51 void e_checkend();
52 void f_checkend();
53 void input_file();
54 void print_bad_match();
55 int check_top_level_end();
56
57 /* global variables */
58
59 char returnval[100];
60 int line_count = 1;
61 int warn_count = 0;
62 char *file_name;
63 char verb_char;
64
65 /* the group stack */
66
67 typedef struct tex_group
68 {
69 unsigned char *s_name;
70 int s_type;
71 int s_line;
72 char *s_file;
73 } tex_group;
74
75 tex_group *gstack;
76 int gstack_size = GROUP_STACK_SIZE;
77 int gstackp = 0;
78
79 typedef struct input_
80 {
81 YY_BUFFER_STATE stream;
82 char *name;
83 int linenum;
84 } input_;
85
86 input_ *istack;
87 int istack_size = INPUT_STACK_SIZE;
88 int istackp = 0;
89
90 int def_count = 0;
91
92 %}
93
94 %x B_ENVIRONMENT E_ENVIRONMENT VERBATIM INCLUDE MATH COMMENT VERB DEF
95 %x AFTER_DISPLAY
96
97 b_group ("{"|\\bgroup)
98 e_group ("}"|\\egroup)
99
100 b_math \\\(
101 e_math \\\)
102 math \$
103
104 b_display \\\[
105 e_display \\\]
106 display \$\$
107
108 non_par_ws ([ \t]+\n?[ \t]*|[ \t]*\n[ \t]*|[ \t]*\n?[ \t]+)
109
110 ws [ \n\t]
111 space ({ws}|\~|\\space)
112 hard_space (\~|\\space)
113
114 u_letter [A-ZFXE]
115 l_letter [a-zfxe]
116 punct [\!\.\?]
117 atoz [a-zA-Z]
118 letter (u_letter|l_letter)
119
120 c_bin ("-"|"+"|"\\cdot"|"\\oplus"|"\\otimes"|"\\times")
121 l_bin (",")
122
123 general_abbrev {letter}+{punct}
124
125 non_abbrev {u_letter}{u_letter}+{punct}
126
127 font_spec (rm|bf|sl|it|tt|em|mediumseries|normalshape)
128
129 primitive \\(above|advance|catcode|chardef|closein|closeout|copy|count|countdef|cr|crcr|csname|delcode|dimendef|dimen|divide|expandafter|font|hskp|vskip|openout)
130
131 symbol ("$"("\\"{atoz}+|.)"$"|"\\#"|"\\$"|"\\%"|"\\ref")
132
133 %%
134
135 "\\\\" { ; }
136
137 <DEF,INITIAL>"\\\%" { ; }
138
139 <DEF,INITIAL>"%"[^\n]* { ; }
140
141 <DEF,INITIAL>\n { line_count++; }
142
143 <DEF,INITIAL>"\\\{" { ; }
144
145 <DEF,INITIAL>"\\\}" { ; }
146
147 "\\\$" { ; }
148
149 {b_group} { push( "{", 0, line_count);}
150
151 {e_group} { g_checkend(0); }
152
153 "\\"[exg]?def[^\{] BEGIN(DEF);
154
155 <DEF>{b_group} { ++def_count; }
156
157 <DEF>{e_group} { --def_count;
158 if(def_count == 0)
159 BEGIN(INITIAL); }
160
161 <DEF>. { ; }
162
163 {b_math} {
164 if(CG_TYPE == 4 || CG_TYPE == 5)
165 print_bad_match(yytext,4);
166 else
167 {
168 push( yytext, 4, line_count);
169 }}
170
171 {e_math} { g_checkend(4); }
172
173 {b_display} {
174 if(CG_TYPE == 4 || CG_TYPE == 5)
175 print_bad_match(yytext,5);
176 else
177 {
178 push( yytext, 5, line_count);
179 }}
180
181
182 {e_display} { g_checkend(5);
183 BEGIN(AFTER_DISPLAY);}
184
185 <AFTER_DISPLAY>{punct} {
186
187 printf( "\"%s\", line %d: puctation mark \"%s\" should be placed before end of displaymath\n",
188 file_name, line_count, yytext);
189 ++warn_count ;
190
191 BEGIN(INITIAL); }
192
193 <AFTER_DISPLAY>. { BEGIN(INITIAL); }
194
195 <AFTER_DISPLAY>\n { ++line_count;
196 BEGIN(INITIAL); }
197
198 {punct}/("\$"|"\\)") { if (CG_TYPE == 4)
199 {
200 printf( "\"%s\", line %d: puctation mark \"%s\" should be placed after end of math mode\n",
201 file_name, line_count, yytext);
202 ++warn_count ;
203 }}
204
205 {math} {
206
207 if(CG_TYPE == 5)
208 print_bad_match(yytext, 4);
209 else
210
211 if(CG_TYPE == 4)
212 {
213 e_checkend(4, yytext);
214 }
215 else
216 {
217 push( yytext, 4, line_count);
218 }}
219
220
221 {display} {
222
223 if(CG_TYPE == 4)
224 print_bad_match(yytext,5);
225 else
226
227 if(CG_TYPE == 5)
228 {
229 e_checkend(5, yytext);
230 BEGIN(AFTER_DISPLAY);
231 }
232 else
233 {
234 push( yytext, 5, line_count);
235 }}
236
237 \\begingroup/[^a-zA-Z] {
238 {
239 push((unsigned char *)"\\begingroup", 1, line_count);
240 }}
241
242
243 \\endgroup/[^a-zA-Z] {
244 {
245 g_checkend(1);
246 }}
247
248
249 \\begin[ \t]*"{" { BEGIN(B_ENVIRONMENT); }
250
251 \\begin[ \t]*/\n {
252 {
253
254 printf("\"%s\", line %i: {argument} missing for \\begin\n",
255 file_name, line_count) ;
256 ++warn_count;
257 }}
258
259 <B_ENVIRONMENT>[^\}\n]+ {
260 {
261 if (strcmp( yytext, "verbatim" ) == 0 )
262 {
263 input();
264 BEGIN(VERBATIM);
265 }
266 else
267 {
268 push(yytext, 2, line_count);
269 input();
270 BEGIN(INITIAL);
271 }
272 }}
273
274 <VERBATIM>\\end[ \t]*\{verbatim\} { BEGIN(INITIAL); }
275
276 <VERBATIM>. { ; }
277
278 <VERBATIM>\n { ++line_count; }
279
280
281 \\verb. {
282 sscanf (yytext, "\\verb%c", &verb_char );
283 BEGIN(VERB);
284 }
285
286 <VERB>. {
287 if ( *yytext == verb_char )
288 BEGIN(INITIAL);
289 if ( *yytext == '\n' )
290 ++line_count;
291 }
292
293
294 \\end[ \t]*"{" { BEGIN(E_ENVIRONMENT); }
295
296 \\end[ \t]*/\n {
297 {
298 printf("\"%s\", line %i: {argument} missing for \\end\n",
299 file_name, line_count) ;
300 ++warn_count;
301 }}
302
303
304 <E_ENVIRONMENT>[^\}\n]+ {
305 {
306 e_checkend(2, yytext);
307 input();
308
309 BEGIN(INITIAL);
310 }}
311
312
313 {ws}([a-zfxe]".")*[a-zA-ZfxeFXE]*[a-zfxe]"."/{non_par_ws}+[a-zfxe] {
314 {
315 if ( *yytext == '\n' )
316 ++line_count;
317
318 printf( "\"%s\", line %d: missing `\\ ' after \"%s\"\n",
319 file_name, line_count, ++yytext);
320 ++warn_count ;
321 }}
322
323 ([a-zfxe]".")*[a-zA-ZfxeFXE]*[a-zfxe]"."/{non_par_ws}+[a-zfxe] {
324 {
325 printf( "\"%s\", line %d: missing `\\ ' after \"%s\"\n",
326 file_name, line_count, yytext);
327 ++warn_count ;
328 }}
329
330 {ws}{non_abbrev}/{non_par_ws}{u_letter} {
331 {
332 if ( *yytext == '\n' )
333 ++line_count;
334 printf("\"%s\", line %d: missing `\\\@' before punctation mark in \"%s\"\n",
335 file_name, line_count, ++yytext);
336 ++warn_count ;
337 }}
338
339 {non_abbrev}/{non_par_ws}{u_letter} {
340 {
341 printf("\"%s\", line %d: missing `\\\@' before `.' in \"%s\"\n",
342 file_name, line_count, yytext);
343 ++warn_count ;
344 }}
345
346 ({hard_space}{space}|{space}{hard_space}) {
347
348 printf("\"%s\", line %d: double space at \"%s\"\n",
349 file_name, line_count, yytext);
350 ++warn_count;
351 }
352
353 {c_bin}{ws}?(\\(\.|\,|\;|\:))*{ws}?\\ldots{ws}?(\\(\.|\,|\;|\:))*{ws}?{c_bin} {
354 printf("\"%s\", line %d: \\ldots should be \\cdots in \"%s\"\n",
355 file_name, line_count, yytext);
356 ++warn_count;
357 }
358
359 [^\\]{l_bin}{ws}?(\\(\.|\,|\;|\:))*{ws}?\\cdots{ws}?(\\(\.|\,|\;|\:))*{ws}?[^\\]{l_bin} {
360 printf("\"%s\", line %d: \\cdots should be \\ldots in \"%s\"\n",
361 file_name, line_count, yytext);
362 ++warn_count;
363 }
364
365 {c_bin}{ws}?(\\(\.|\,|\;|\:))*{ws}?"."+{ws}?(\\(\.|\,|\;|\:))*{ws}?{c_bin} {
366 printf("\"%s\", line %d: Dots should be \\cdots in \"%s\"\n",
367 file_name, line_count, yytext);
368 ++warn_count;
369 }
370
371 [^\\]{l_bin}{ws}?(\\(\.|\,|\;|\:))*{ws}?"."+{ws}?(\\(\.|\,|\;|\:))*{ws}?[^\\]{l_bin} {
372 printf("\"%s\", line %d: Dots should be \\ldots in \"%s\"\n",
373 file_name, line_count, yytext);
374 ++warn_count;
375 }
376
377
378 \.\.\. {
379 printf("\"%s\", line %d: Dots should be ellipsis \"%s\"\n",
380 file_name, line_count, yytext);
381 ++warn_count;
382 }
383
384 /*
385 *
386 * The `~' one is not too good, perhaps it shoud be an option.
387 *
388 */
389 /*
390 {l_letter}" "{symbol} {
391
392 printf("\"%s\", line %d: perhaps you should insert a `~' before%s\n",
393 file_name, line_count, ++yytext);
394 }
395 */
396
397 /*
398 {primitive}/[^a-zA-Z] {
399 {
400 printf("\"%s\", line %d: Don't use \"%s\" in LaTeX documents\n",
401 file_name, line_count, yytext);
402 ++warn_count ;
403 }}
404 */
405
406 \\{font_spec}/[ \t]*"{" {
407 {
408 printf("\"%s\", line %d: Fontspecifiers don't take arguments. \"%s\"\n",
409 file_name, line_count, yytext);
410 ++warn_count;
411 /* (void) input(); */
412 }}
413
414
415 \\([a-zA-Z\@]+\@[a-zA-Z\@]*|[a-zA-Z\@]*\@[a-zA-Z\@]+) {
416 {
417 printf("\"%s\", line %d: Do not use @ in LaTeX macro names. \"%s\"\n",
418 file_name, line_count, yytext);
419 ++warn_count;
420 }}
421
422
423 "%" { BEGIN(COMMENT); }
424
425 <COMMENT>\n { BEGIN(INITIAL); ++line_count; }
426
427 <COMMENT>. { ; }
428
429
430 \\(input|include)([ \t]|"{") { BEGIN(INCLUDE); }
431
432 <INCLUDE>[^\}\n]+ {
433 {
434 if ( strstr(yytext,"\.sty") == NULL )
435 {
436 input_file(yytext);
437 }
438 else
439 {
440 printf("\"%s\", line %d: Style file \`%s\' omitted.\n",
441 file_name,
442 line_count,
443 yytext);
444 input();
445 }
446 BEGIN(INITIAL);
447 }}
448
449 <<EOF>> {
450 if (--istackp < 0)
451 yyterminate();
452
453 else
454 {
455 fclose(yyin);
456 f_checkend(file_name);
457 yy_switch_to_buffer(istack[istackp].stream);
458 free(file_name);
459 line_count = istack[istackp].linenum;
460 file_name = istack[istackp].name;
461 input();
462 BEGIN(INITIAL);
463 }
464
465 }
466
467
468 . { ; }
469 %%
470 int main( argc, argv )
471 int argc;
472 char *argv[];
473 {
474 /* allocate initial stacks */
475 gstack = (tex_group *)malloc(gstack_size * sizeof(tex_group));
476 istack = (input_ *)malloc(istack_size * sizeof(input_));
477 if ( gstack == NULL || istack == NULL ) {
478 fprintf(stderr, "%s: not enough memory for stacks\n", PROGNAME);
479 exit(3);
480 }
481
482 if(argc > 1)
483 {
484 if ( (file_name = malloc(strlen(argv[1]) + 5)) == NULL ) {
485 fprintf(stderr, "%s: out of memory\n", PROGNAME);
486 exit(3);
487 }
488
489 strcpy(file_name, argv[1]);
490
491 if ((yyin = fopen( file_name, "r")) != NULL )
492 {
493 push(file_name, 3, 1);
494 yylex();
495 f_checkend(file_name);
496 }
497 else {
498 strcat(file_name, ".tex" );
499 if ((yyin = fopen( file_name, "r")) != NULL )
500 {
501 push(file_name, 3, 1);
502 yylex();
503 f_checkend(file_name);
504 }
505 else
506 fprintf(stderr,
507 "%s: Could not open : %s\n",PROGNAME, argv[1]);
508 }
509 }
510 else
511 {
512 printf("\n* %s *\n\n",PROGNAME);
513 printf("\t...a consistency checker for LaTeX documents.\n\n");
514
515 printf("Usage:\n\tlacheck filename[.tex] <return>\n\n\n");
516
517 printf("\tFrom within Emacs:\n\n\t");
518 printf("M-x compile <return>\n\tlacheck filename[.tex] <return>");
519 printf("\n\n\tUse C-x ` to step through the messages.\n\n");
520 printf("\n\tThe found context is displayed in \"double quotes\"\n\n");
521 printf("Remark:\n\tAll messages are only warnings!\n\n");
522 printf("\tYour document may be right though LaCheck tells\n");
523 printf("\tsomthing else.\n\n");
524 }
525 return(0);
526 }
527
528 #ifdef NEED_STRSTR
529 char *
530 strstr(string, substring)
531 register char *string; /* String to search. */
532 char *substring; /* Substring to try to find in string. */
533 {
534 register char *a, *b;
535
536 /* First scan quickly through the two strings looking for a
537 * single-character match. When it's found, then compare the
538 * rest of the substring.
539 */
540
541 b = substring;
542 if (*b == 0) {
543 return string;
544 }
545 for ( ; *string != 0; string += 1) {
546 if (*string != *b) {
547 continue;
548 }
549 a = string;
550 while (1) {
551 if (*b == 0) {
552 return string;
553 }
554 if (*a++ != *b++) {
555 break;
556 }
557 }
558 b = substring;
559 }
560 return (char *) 0;
561 }
562 #endif /* NEED_STRSTR */
563
564 void push(p_name, p_type, p_line)
565 unsigned char *p_name;
566 int p_type;
567 int p_line;
568 {
569 if ( gstackp == gstack_size ) { /* extend stack */
570 gstack_size *= 2;
571 gstack = (tex_group *)realloc(gstack, gstack_size * sizeof(tex_group));
572 if ( gstack == NULL ) {
573 fprintf(stderr, "%s: stack out of memory", PROGNAME);
574 exit(3);
575 }
576 }
577
578 if ( (gstack[gstackp].s_name =
579 (unsigned char *)malloc(strlen(p_name) + 1)) == NULL ||
580 (gstack[gstackp].s_file =
581 (char *)malloc(strlen(file_name) + 1)) == NULL ) {
582 fprintf(stderr, "%s: out of memory\n", PROGNAME);
583 exit(3);
584 }
585
586 strcpy(gstack[gstackp].s_name,p_name);
587 gstack[gstackp].s_type = p_type;
588 gstack[gstackp].s_line = p_line;
589 strcpy(gstack[gstackp].s_file,file_name);
590 ++gstackp;
591
592 }
593
594 void input_file(file_nam)
595 char *file_nam;
596 {
597 char *tmp_file_name;
598 FILE *tmp_yyin;
599
600 if ( (tmp_file_name = malloc(strlen(file_nam) + 5)) == NULL ) {
601 fprintf(stderr, "%s: out of memory\n", PROGNAME);
602 exit(3);
603 }
604 strcpy(tmp_file_name,file_nam);
605
606 if (istackp == istack_size) { /* extend stack */
607 istack_size *= 2;
608 istack = (input_ *)realloc(istack, istack_size * sizeof(input_));
609 if ( istack == NULL ) {
610 fprintf(stderr, "%s: \\input stack out of memory\n", PROGNAME);
611 exit(3);
612 }
613 }
614
615 istack[istackp].stream = YY_CURRENT_BUFFER;
616 istack[istackp].linenum = line_count;
617 istack[istackp].name = file_name;
618 ++istackp;
619
620 if ((tmp_yyin = fopen( file_nam, "r")) != NULL )
621 {
622 yyin = tmp_yyin;
623 yy_switch_to_buffer(yy_create_buffer(yyin,YY_BUF_SIZE));
624 file_name = tmp_file_name;
625 push(file_name, 3, 1);
626 line_count = 1;
627 }
628 else {
629 (void) strcat(tmp_file_name, ".tex");
630 if ((tmp_yyin = fopen( tmp_file_name , "r")) != NULL )
631 {
632 yyin = tmp_yyin;
633 yy_switch_to_buffer(yy_create_buffer(yyin,YY_BUF_SIZE));
634 file_name = tmp_file_name;
635 push(file_name, 3, 1);
636 line_count = 1;
637 }
638 else
639 {
640 --istackp;
641 fclose(tmp_yyin);
642 free(tmp_file_name);
643 printf("\"%s\", line %d: Could not open \"%s\"\n",
644 file_name,
645 line_count,
646 file_nam);
647 input();
648 }
649 }
650 }
651
652 void pop()
653 {
654 if ( gstackp == 0 )
655 {
656 fprintf(stderr, "%s: Stack underflow\n", PROGNAME);
657 exit(4);
658 }
659 --gstackp;
660
661 free(gstack[gstackp].s_name);
662 free(gstack[gstackp].s_file);
663 }
664
665 char *bg_command(name)
666 char *name;
667 {
668
669 switch (CG_TYPE) {
670
671 case 2:
672 (void) strcpy( returnval, "\\begin\{" );
673 (void) strcat( returnval, (char *) name);
674 (void) strcat( returnval, "}" );
675 break;
676
677 case 3:
678 (void) strcpy( returnval, "beginning of file " );
679 (void) strcat( returnval, (char *) name);
680 break;
681
682 case 4:
683 (void) strcpy( returnval, "math begin " );
684 (void) strcat( returnval, (char *) name);
685 break;
686
687 case 5:
688 (void) strcpy( returnval, "display math begin " );
689 (void) strcat( returnval, (char *) name);
690 break;
691
692 default:
693 (void) strcpy( returnval, name );
694
695 }
696
697 return ((char *)returnval);
698 }
699
700 char *eg_command(name,type)
701 int type;
702 char *name;
703 {
704
705 switch (type) {
706
707 case 2:
708 (void) strcpy( returnval, "\\end{" );
709 (void) strcat( returnval, (char *) name);
710 (void) strcat( returnval, "}" );
711 break;
712
713 case 3:
714 (void) strcpy( returnval, "end of file " );
715 (void) strcat( returnval, (char *) name);
716 break;
717
718 case 4:
719 (void) strcpy( returnval, "math end " );
720 (void) strcat( returnval, (char *) name);
721 break;
722
723 case 5:
724 (void) strcpy( returnval, "display math end " );
725 (void) strcat( returnval, (char *) name);
726 break;
727
728 default:
729 (void) strcpy( returnval, name );
730 break;
731 }
732
733 return ((char *)returnval);
734 }
735
736
737 void g_checkend(n)
738 int n;
739 {
740 if ( check_top_level_end(yytext,n) == 1 )
741 if ( CG_TYPE != n )
742 print_bad_match(yytext,n);
743 else
744 pop();
745 }
746
747 void e_checkend(n, name)
748 int n;
749 char *name;
750 {
751 if ( check_top_level_end(name,n) == 1 )
752 {
753 if ( CG_TYPE != n || strcmp( CG_NAME, name ) != 0 )
754 print_bad_match(name,n);
755
756 pop();
757
758 }
759 }
760
761 void f_checkend(name)
762 char *name;
763 {
764 if ( check_top_level_end(name,3) == 1 )
765 {
766 if ( CG_TYPE != 3 || strcmp( CG_NAME, name ) != 0 )
767
768 while( CG_TYPE != 3 )
769 {
770 print_bad_match(name,3);
771 pop();
772 }
773
774 pop();
775 }
776 }
777
778 void print_bad_match(end_command,type)
779 char *end_command;
780 int type;
781 {
782 printf("\"%s\", line %i: <- unmatched \"%s\"\n",
783 file_name,
784 line_count,
785 eg_command( end_command , type) ) ;
786
787 printf("\"%s\", line %i: -> unmatched \"%s\"\n",
788 CG_FILE,
789 CG_LINE,
790 bg_command( CG_NAME ) ) ;
791 warn_count += 2;
792 }
793
794 int check_top_level_end(end_command,type)
795 char *end_command;
796 int type;
797 {
798 if ( gstackp == 0 )
799 {
800 printf("\"%s\", line %i: \"%s\" found at top level\n",
801 file_name,
802 line_count,
803 eg_command( end_command, type )) ;
804 ++warn_count;
805 return(0);
806 }
807 else
808 return(1);
809 }
810
811
812