3 * lacheck.lex - A consistency checker checker for LaTeX documents
5 * Copyright (C) 1991 Kresten Krab Thorup (krab@iesd.auc.dk).
9 * Author : Kresten Krab Thorup
10 * Created On : Sun May 26 18:11:58 1991
11 * Last Modified By: Kresten Krab Thorup
12 * Last Modified On: Thu May 30 02:29:57 1991
16 * 30-May-1991 (Last Mod: Thu May 30 02:22:33 1991 #15) Kresten Krab Thorup
17 * Added check for `$${punct}' and `{punct}$' constructions
18 * 30-May-1991 (Last Mod: Wed May 29 10:31:35 1991 #6) Kresten Krab Thorup
19 * Improved (dynamical) stack management from Andreas Stolcke ...
20 * <stolcke@ICSI.Berkeley.EDU>
21 * 26-May-1991 Kresten Krab Thorup
22 * Initial distribution version.
27 #include <sys/param.h>
29 extern char *realloc();
35 #define GROUP_STACK_SIZE 10
36 #define INPUT_STACK_SIZE 10
38 #define PROGNAME "LaCheck"
42 #define CG_NAME gstack[gstackp-1].s_name
43 #define CG_TYPE gstack[gstackp-1].s_type
44 #define CG_LINE gstack[gstackp-1].s_line
45 #define CG_FILE gstack[gstackp-1].s_file
54 void print_bad_match();
55 int check_top_level_end();
57 /* global variables */
67 typedef struct tex_group
69 unsigned char *s_name;
76 int gstack_size = GROUP_STACK_SIZE;
81 YY_BUFFER_STATE stream;
87 int istack_size = INPUT_STACK_SIZE;
94 %x B_ENVIRONMENT E_ENVIRONMENT VERBATIM INCLUDE MATH COMMENT VERB DEF
97 b_group ("{"|\\bgroup)
98 e_group ("}"|\\egroup)
108 non_par_ws ([ \t]+\n?[ \t]*|[ \t]*\n[ \t]*|[ \t]*\n?[ \t]+)
111 space ({ws}|\~|\\space)
112 hard_space (\~|\\space)
118 letter (u_letter|l_letter)
120 c_bin ("-"|"+"|"\\cdot"|"\\oplus"|"\\otimes"|"\\times")
123 general_abbrev {letter}+{punct}
125 non_abbrev {u_letter}{u_letter}+{punct}
127 font_spec (rm|bf|sl|it|tt|em|mediumseries|normalshape)
129 primitive \\(above|advance|catcode|chardef|closein|closeout|copy|count|countdef|cr|crcr|csname|delcode|dimendef|dimen|divide|expandafter|font|hskp|vskip|openout)
131 symbol ("$"("\\"{atoz}+|.)"$"|"\\#"|"\\$"|"\\%"|"\\ref")
137 <DEF,INITIAL>"\\\%" { ; }
139 <DEF,INITIAL>"%"[^\n]* { ; }
141 <DEF,INITIAL>\n { line_count++; }
143 <DEF,INITIAL>"\\\{" { ; }
145 <DEF,INITIAL>"\\\}" { ; }
149 {b_group} { push( "{", 0, line_count);}
151 {e_group} { g_checkend(0); }
153 "\\"[exg]?def[^\{] BEGIN(DEF);
155 <DEF>{b_group} { ++def_count; }
157 <DEF>{e_group} { --def_count;
164 if(CG_TYPE == 4 || CG_TYPE == 5)
165 print_bad_match(yytext,4);
168 push( yytext, 4, line_count);
171 {e_math} { g_checkend(4); }
174 if(CG_TYPE == 4 || CG_TYPE == 5)
175 print_bad_match(yytext,5);
178 push( yytext, 5, line_count);
182 {e_display} { g_checkend(5);
183 BEGIN(AFTER_DISPLAY);}
185 <AFTER_DISPLAY>{punct} {
187 printf( "\"%s\", line %d: puctation mark \"%s\" should be placed before end of displaymath\n",
188 file_name, line_count, yytext);
193 <AFTER_DISPLAY>. { BEGIN(INITIAL); }
195 <AFTER_DISPLAY>\n { ++line_count;
198 {punct}/("\$"|"\\)") { if (CG_TYPE == 4)
200 printf( "\"%s\", line %d: puctation mark \"%s\" should be placed after end of math mode\n",
201 file_name, line_count, yytext);
208 print_bad_match(yytext, 4);
213 e_checkend(4, yytext);
217 push( yytext, 4, line_count);
224 print_bad_match(yytext,5);
229 e_checkend(5, yytext);
230 BEGIN(AFTER_DISPLAY);
234 push( yytext, 5, line_count);
237 \\begingroup/[^a-zA-Z] {
239 push((unsigned char *)"\\begingroup", 1, line_count);
243 \\endgroup/[^a-zA-Z] {
249 \\begin[ \t]*"{" { BEGIN(B_ENVIRONMENT); }
254 printf("\"%s\", line %i: {argument} missing for \\begin\n",
255 file_name, line_count) ;
259 <B_ENVIRONMENT>[^\}\n]+ {
261 if (strcmp( yytext, "verbatim" ) == 0 )
268 push(yytext, 2, line_count);
274 <VERBATIM>\\end[ \t]*\{verbatim\} { BEGIN(INITIAL); }
278 <VERBATIM>\n { ++line_count; }
282 sscanf (yytext, "\\verb%c", &verb_char );
287 if ( *yytext == verb_char )
289 if ( *yytext == '\n' )
294 \\end[ \t]*"{" { BEGIN(E_ENVIRONMENT); }
298 printf("\"%s\", line %i: {argument} missing for \\end\n",
299 file_name, line_count) ;
304 <E_ENVIRONMENT>[^\}\n]+ {
306 e_checkend(2, yytext);
313 {ws}([a-zfxe]".")*[a-zA-ZfxeFXE]*[a-zfxe]"."/{non_par_ws}+[a-zfxe] {
315 if ( *yytext == '\n' )
318 printf( "\"%s\", line %d: missing `\\ ' after \"%s\"\n",
319 file_name, line_count, ++yytext);
323 ([a-zfxe]".")*[a-zA-ZfxeFXE]*[a-zfxe]"."/{non_par_ws}+[a-zfxe] {
325 printf( "\"%s\", line %d: missing `\\ ' after \"%s\"\n",
326 file_name, line_count, yytext);
330 {ws}{non_abbrev}/{non_par_ws}{u_letter} {
332 if ( *yytext == '\n' )
334 printf("\"%s\", line %d: missing `\\\@' before punctation mark in \"%s\"\n",
335 file_name, line_count, ++yytext);
339 {non_abbrev}/{non_par_ws}{u_letter} {
341 printf("\"%s\", line %d: missing `\\\@' before `.' in \"%s\"\n",
342 file_name, line_count, yytext);
346 ({hard_space}{space}|{space}{hard_space}) {
348 printf("\"%s\", line %d: double space at \"%s\"\n",
349 file_name, line_count, yytext);
353 {c_bin}{ws}?(\\(\.|\,|\;|\:))*{ws}?\\ldots{ws}?(\\(\.|\,|\;|\:))*{ws}?{c_bin} {
354 printf("\"%s\", line %d: \\ldots should be \\cdots in \"%s\"\n",
355 file_name, line_count, yytext);
359 [^\\]{l_bin}{ws}?(\\(\.|\,|\;|\:))*{ws}?\\cdots{ws}?(\\(\.|\,|\;|\:))*{ws}?[^\\]{l_bin} {
360 printf("\"%s\", line %d: \\cdots should be \\ldots in \"%s\"\n",
361 file_name, line_count, yytext);
365 {c_bin}{ws}?(\\(\.|\,|\;|\:))*{ws}?"."+{ws}?(\\(\.|\,|\;|\:))*{ws}?{c_bin} {
366 printf("\"%s\", line %d: Dots should be \\cdots in \"%s\"\n",
367 file_name, line_count, yytext);
371 [^\\]{l_bin}{ws}?(\\(\.|\,|\;|\:))*{ws}?"."+{ws}?(\\(\.|\,|\;|\:))*{ws}?[^\\]{l_bin} {
372 printf("\"%s\", line %d: Dots should be \\ldots in \"%s\"\n",
373 file_name, line_count, yytext);
379 printf("\"%s\", line %d: Dots should be ellipsis \"%s\"\n",
380 file_name, line_count, yytext);
386 * The `~' one is not too good, perhaps it shoud be an option.
390 {l_letter}" "{symbol} {
392 printf("\"%s\", line %d: perhaps you should insert a `~' before%s\n",
393 file_name, line_count, ++yytext);
398 {primitive}/[^a-zA-Z] {
400 printf("\"%s\", line %d: Don't use \"%s\" in LaTeX documents\n",
401 file_name, line_count, yytext);
406 \\{font_spec}/[ \t]*"{" {
408 printf("\"%s\", line %d: Fontspecifiers don't take arguments. \"%s\"\n",
409 file_name, line_count, yytext);
411 /* (void) input(); */
415 \\([a-zA-Z\@]+\@[a-zA-Z\@]*|[a-zA-Z\@]*\@[a-zA-Z\@]+) {
417 printf("\"%s\", line %d: Do not use @ in LaTeX macro names. \"%s\"\n",
418 file_name, line_count, yytext);
423 "%" { BEGIN(COMMENT); }
425 <COMMENT>\n { BEGIN(INITIAL); ++line_count; }
430 \\(input|include)([ \t]|"{") { BEGIN(INCLUDE); }
434 if ( strstr(yytext,"\.sty") == NULL )
440 printf("\"%s\", line %d: Style file \`%s\' omitted.\n",
456 f_checkend(file_name);
457 yy_switch_to_buffer(istack[istackp].stream);
459 line_count = istack[istackp].linenum;
460 file_name = istack[istackp].name;
470 int main( argc, argv )
474 /* allocate initial stacks */
475 gstack = (tex_group *)malloc(gstack_size * sizeof(tex_group));
476 istack = (input_ *)malloc(istack_size * sizeof(input_));
477 if ( gstack == NULL || istack == NULL ) {
478 fprintf(stderr, "%s: not enough memory for stacks\n", PROGNAME);
484 if ( (file_name = malloc(strlen(argv[1]) + 5)) == NULL ) {
485 fprintf(stderr, "%s: out of memory\n", PROGNAME);
489 strcpy(file_name, argv[1]);
491 if ((yyin = fopen( file_name, "r")) != NULL )
493 push(file_name, 3, 1);
495 f_checkend(file_name);
498 strcat(file_name, ".tex" );
499 if ((yyin = fopen( file_name, "r")) != NULL )
501 push(file_name, 3, 1);
503 f_checkend(file_name);
507 "%s: Could not open : %s\n",PROGNAME, argv[1]);
512 printf("\n* %s *\n\n",PROGNAME);
513 printf("\t...a consistency checker for LaTeX documents.\n\n");
515 printf("Usage:\n\tlacheck filename[.tex] <return>\n\n\n");
517 printf("\tFrom within Emacs:\n\n\t");
518 printf("M-x compile <return>\n\tlacheck filename[.tex] <return>");
519 printf("\n\n\tUse C-x ` to step through the messages.\n\n");
520 printf("\n\tThe found context is displayed in \"double quotes\"\n\n");
521 printf("Remark:\n\tAll messages are only warnings!\n\n");
522 printf("\tYour document may be right though LaCheck tells\n");
523 printf("\tsomthing else.\n\n");
530 strstr(string, substring)
531 register char *string; /* String to search. */
532 char *substring; /* Substring to try to find in string. */
534 register char *a, *b;
536 /* First scan quickly through the two strings looking for a
537 * single-character match. When it's found, then compare the
538 * rest of the substring.
545 for ( ; *string != 0; string += 1) {
562 #endif /* NEED_STRSTR */
564 void push(p_name, p_type, p_line)
565 unsigned char *p_name;
569 if ( gstackp == gstack_size ) { /* extend stack */
571 gstack = (tex_group *)realloc(gstack, gstack_size * sizeof(tex_group));
572 if ( gstack == NULL ) {
573 fprintf(stderr, "%s: stack out of memory", PROGNAME);
578 if ( (gstack[gstackp].s_name =
579 (unsigned char *)malloc(strlen(p_name) + 1)) == NULL ||
580 (gstack[gstackp].s_file =
581 (char *)malloc(strlen(file_name) + 1)) == NULL ) {
582 fprintf(stderr, "%s: out of memory\n", PROGNAME);
586 strcpy(gstack[gstackp].s_name,p_name);
587 gstack[gstackp].s_type = p_type;
588 gstack[gstackp].s_line = p_line;
589 strcpy(gstack[gstackp].s_file,file_name);
594 void input_file(file_nam)
600 if ( (tmp_file_name = malloc(strlen(file_nam) + 5)) == NULL ) {
601 fprintf(stderr, "%s: out of memory\n", PROGNAME);
604 strcpy(tmp_file_name,file_nam);
606 if (istackp == istack_size) { /* extend stack */
608 istack = (input_ *)realloc(istack, istack_size * sizeof(input_));
609 if ( istack == NULL ) {
610 fprintf(stderr, "%s: \\input stack out of memory\n", PROGNAME);
615 istack[istackp].stream = YY_CURRENT_BUFFER;
616 istack[istackp].linenum = line_count;
617 istack[istackp].name = file_name;
620 if ((tmp_yyin = fopen( file_nam, "r")) != NULL )
623 yy_switch_to_buffer(yy_create_buffer(yyin,YY_BUF_SIZE));
624 file_name = tmp_file_name;
625 push(file_name, 3, 1);
629 (void) strcat(tmp_file_name, ".tex");
630 if ((tmp_yyin = fopen( tmp_file_name , "r")) != NULL )
633 yy_switch_to_buffer(yy_create_buffer(yyin,YY_BUF_SIZE));
634 file_name = tmp_file_name;
635 push(file_name, 3, 1);
643 printf("\"%s\", line %d: Could not open \"%s\"\n",
656 fprintf(stderr, "%s: Stack underflow\n", PROGNAME);
661 free(gstack[gstackp].s_name);
662 free(gstack[gstackp].s_file);
665 char *bg_command(name)
672 (void) strcpy( returnval, "\\begin\{" );
673 (void) strcat( returnval, (char *) name);
674 (void) strcat( returnval, "}" );
678 (void) strcpy( returnval, "beginning of file " );
679 (void) strcat( returnval, (char *) name);
683 (void) strcpy( returnval, "math begin " );
684 (void) strcat( returnval, (char *) name);
688 (void) strcpy( returnval, "display math begin " );
689 (void) strcat( returnval, (char *) name);
693 (void) strcpy( returnval, name );
697 return ((char *)returnval);
700 char *eg_command(name,type)
708 (void) strcpy( returnval, "\\end{" );
709 (void) strcat( returnval, (char *) name);
710 (void) strcat( returnval, "}" );
714 (void) strcpy( returnval, "end of file " );
715 (void) strcat( returnval, (char *) name);
719 (void) strcpy( returnval, "math end " );
720 (void) strcat( returnval, (char *) name);
724 (void) strcpy( returnval, "display math end " );
725 (void) strcat( returnval, (char *) name);
729 (void) strcpy( returnval, name );
733 return ((char *)returnval);
740 if ( check_top_level_end(yytext,n) == 1 )
742 print_bad_match(yytext,n);
747 void e_checkend(n, name)
751 if ( check_top_level_end(name,n) == 1 )
753 if ( CG_TYPE != n || strcmp( CG_NAME, name ) != 0 )
754 print_bad_match(name,n);
761 void f_checkend(name)
764 if ( check_top_level_end(name,3) == 1 )
766 if ( CG_TYPE != 3 || strcmp( CG_NAME, name ) != 0 )
768 while( CG_TYPE != 3 )
770 print_bad_match(name,3);
778 void print_bad_match(end_command,type)
782 printf("\"%s\", line %i: <- unmatched \"%s\"\n",
785 eg_command( end_command , type) ) ;
787 printf("\"%s\", line %i: -> unmatched \"%s\"\n",
790 bg_command( CG_NAME ) ) ;
794 int check_top_level_end(end_command,type)
800 printf("\"%s\", line %i: \"%s\" found at top level\n",
803 eg_command( end_command, type )) ;