]> git.saurik.com Git - wxWidgets.git/blame - utils/tex2rtf/src/tex2any.cpp
free argv in Unicode build
[wxWidgets.git] / utils / tex2rtf / src / tex2any.cpp
CommitLineData
9a29912f
JS
1/////////////////////////////////////////////////////////////////////////////
2// Name: tex2any.cpp
3// Purpose: Utilities for Latex conversion.
4// Author: Julian Smart
5// Modified by:
6// Created: 01/01/99
7// RCS-ID: $Id$
8// Copyright: (c) Julian Smart
9// Licence: wxWindows licence
10/////////////////////////////////////////////////////////////////////////////
11
12#ifdef __GNUG__
13#pragma implementation
14#endif
15
16// For compilers that support precompilation, includes "wx.h".
17#include "wx/wxprec.h"
18
19#ifdef __BORLANDC__
20#pragma hdrstop
21#endif
22
23#ifndef WX_PRECOMP
24#include "wx/wx.h"
25#endif
26
27#include <ctype.h>
28#include "tex2any.h"
29#include <stdlib.h>
30#include <time.h>
31
32/*
33 * Variables accessible from clients
34 *
35 */
36
37TexChunk * DocumentTitle = NULL;
38TexChunk * DocumentAuthor = NULL;
39TexChunk * DocumentDate = NULL;
40
41// Header/footers/pagestyle
42TexChunk * LeftHeaderEven = NULL;
43TexChunk * LeftFooterEven = NULL;
44TexChunk * CentreHeaderEven = NULL;
45TexChunk * CentreFooterEven = NULL;
46TexChunk * RightHeaderEven = NULL;
47TexChunk * RightFooterEven = NULL;
48TexChunk * LeftHeaderOdd = NULL;
49TexChunk * LeftFooterOdd = NULL;
50TexChunk * CentreHeaderOdd = NULL;
51TexChunk * CentreFooterOdd = NULL;
52TexChunk * RightHeaderOdd = NULL;
53TexChunk * RightFooterOdd = NULL;
54char * PageStyle = copystring("plain");
55
56int DocumentStyle = LATEX_REPORT;
57int MinorDocumentStyle = 0;
58wxPathList TexPathList;
59char * BibliographyStyleString = copystring("plain");
60char * DocumentStyleString = copystring("report");
61char * MinorDocumentStyleString = NULL;
62int ParSkip = 0;
63int ParIndent = 0;
64
65int normalFont = 10;
66int smallFont = 8;
67int tinyFont = 6;
68int largeFont1 = 12;
69int LargeFont2 = 14;
70int LARGEFont3 = 18;
71int hugeFont1 = 20;
72int HugeFont2 = 24;
73int HUGEFont3 = 28;
74
fad535ee
GT
75// All of these tokens MUST be found on a line by themselves (no other
76// text) and must start at the first character of the line, or tex2rtf
77// will fail to process them correctly (a limitation of tex2rtf, not TeX)
7cbe4e79 78static const wxString syntaxTokens[] =
fad535ee
GT
79{ "\\begin{verbatim}",
80 "\\begin{toocomplex}",
81 "\\end{verbatim}",
82 "\\end{toocomplex}",
83 "\\verb",
84 "\\begin{comment}",
85 "\\end{comment}",
86 "\\verbatiminput",
341479ff 87// "\\par",
fad535ee
GT
88 "\\input",
89 "\\helpinput",
90 "\\include",
91 wxEmptyString
92};
93
94
9a29912f
JS
95/*
96 * USER-ADJUSTABLE SETTINGS
97 *
98 */
99
100// Section font sizes
101int chapterFont = 12; // LARGEFont3;
102int sectionFont = 12; // LargeFont2;
103int subsectionFont = 12; // largeFont1;
104int titleFont = LARGEFont3;
105int authorFont = LargeFont2;
106int mirrorMargins = TRUE;
107bool winHelp = FALSE; // Output in Windows Help format if TRUE, linear otherwise
108bool isInteractive = FALSE;
109bool runTwice = FALSE;
110int convertMode = TEX_RTF;
fad535ee
GT
111bool checkCurleyBraces = FALSE;
112bool checkSyntax = FALSE;
9a29912f
JS
113bool headerRule = FALSE;
114bool footerRule = FALSE;
115bool compatibilityMode = FALSE; // If TRUE, maximum Latex compatibility
116 // (Quality of RTF generation deteriorate)
117bool generateHPJ; // Generate WinHelp Help Project file
118char *winHelpTitle = NULL; // Windows Help title
119int defaultTableColumnWidth = 2000;
120
121int labelIndentTab = 18; // From left indent to item label (points)
122int itemIndentTab = 40; // From left indent to item (points)
123
124bool useUpButton = TRUE;
125int htmlBrowseButtons = HTML_BUTTONS_TEXT;
126
127bool truncateFilenames = FALSE; // Truncate for DOS
128int winHelpVersion = 3; // WinHelp Version (3 for Windows 3.1, 4 for Win95)
129bool winHelpContents = FALSE; // Generate .cnt file for WinHelp 4
130bool htmlIndex = FALSE; // Generate .htx file for HTML
131bool htmlFrameContents = FALSE; // Use frames for HTML contents page
6d8b260c 132char *htmlStylesheet = NULL; // Use this CSS stylesheet for HTML pages
9a29912f
JS
133bool useHeadingStyles = TRUE; // Insert \s1, s2 etc.
134bool useWord = TRUE; // Insert proper Word table of contents, etc etc
135int contentsDepth = 4; // Depth of Word table of contents
136bool indexSubsections = TRUE; // Index subsections in linear RTF
137// Linear RTF method of including bitmaps. Can be "includepicture", "hex"
138char *bitmapMethod = copystring("includepicture");
139bool upperCaseNames = FALSE;
140// HTML background and text colours
141char *backgroundImageString = NULL;
142char *backgroundColourString = copystring("255;255;255");
143char *textColourString = NULL;
144char *linkColourString = NULL;
145char *followedLinkColourString = NULL;
146bool combineSubSections = FALSE;
14204c7a 147bool htmlWorkshopFiles = FALSE;
bf16085d 148bool ignoreBadRefs = FALSE;
2b5f62a0 149char *htmlFaceName = NULL;
9a29912f 150
3924dd22
GT
151extern int passNumber;
152
153extern wxHashTable TexReferences;
154
9a29912f
JS
155/*
156 * International support
157 */
158
159// Names to help with internationalisation
160char *ContentsNameString = copystring("Contents");
161char *AbstractNameString = copystring("Abstract");
162char *GlossaryNameString = copystring("Glossary");
163char *ReferencesNameString = copystring("References");
164char *FiguresNameString = copystring("List of Figures");
165char *TablesNameString = copystring("List of Tables");
166char *FigureNameString = copystring("Figure");
167char *TableNameString = copystring("Table");
168char *IndexNameString = copystring("Index");
169char *ChapterNameString = copystring("chapter");
170char *SectionNameString = copystring("section");
171char *SubsectionNameString = copystring("subsection");
172char *SubsubsectionNameString = copystring("subsubsection");
173char *UpNameString = copystring("Up");
174
175/*
176 * Section numbering
177 *
178 */
179
180int chapterNo = 0;
181int sectionNo = 0;
182int subsectionNo = 0;
183int subsubsectionNo = 0;
184int figureNo = 0;
185int tableNo = 0;
186
187/*
188 * Other variables
189 *
190 */
191
192FILE *CurrentOutput1 = NULL;
193FILE *CurrentOutput2 = NULL;
194FILE *Inputs[15];
dda2e4fd 195unsigned long LineNumbers[15];
9a29912f
JS
196char *FileNames[15];
197int CurrentInputIndex = 0;
198
199char *TexFileRoot = NULL;
200char *TexBibName = NULL; // Bibliography output file name
201char *TexTmpBibName = NULL; // Temporary bibliography output file name
202bool isSync = FALSE; // If TRUE, should not yield to other processes.
203bool stopRunning = FALSE; // If TRUE, should abort.
204
205static int currentColumn = 0;
206char *currentArgData = NULL;
207bool haveArgData = FALSE; // If TRUE, we're simulating the data.
208TexChunk *currentArgument = NULL;
209TexChunk *nextChunk = NULL;
210bool isArgOptional = FALSE;
446dd881 211int noArgs = 0;
9a29912f
JS
212
213TexChunk *TopLevel = NULL;
214// wxList MacroDefs(wxKEY_STRING);
215wxHashTable MacroDefs(wxKEY_STRING);
216wxStringList IgnorableInputFiles; // Ignorable \input files, e.g. psbox.tex
217char *BigBuffer = NULL; // For reading in large chunks of text
218TexMacroDef *SoloBlockDef = NULL;
219TexMacroDef *VerbatimMacroDef = NULL;
220
221#define IncrementLineNumber() LineNumbers[CurrentInputIndex] ++
222
3924dd22 223
bbd08c6a
GD
224TexRef::TexRef(const char *label, const char *file,
225 const char *section, const char *sectionN)
3924dd22
GT
226{
227 refLabel = copystring(label);
228 refFile = file ? copystring(file) : (char*) NULL;
229 sectionNumber = section ? copystring(section) : copystring("??");
230 sectionName = sectionN ? copystring(sectionN) : copystring("??");
231}
232
233TexRef::~TexRef(void)
234{
235 delete [] refLabel; refLabel = NULL;
236 delete [] refFile; refFile = NULL;
237 delete [] sectionNumber; sectionNumber = NULL;
238 delete [] sectionName; sectionName = NULL;
239}
240
241
242CustomMacro::~CustomMacro()
243{
244 if (macroName)
245 delete [] macroName;
246 if (macroBody)
247 delete [] macroBody;
248}
249
e4a22366 250void TexOutput(const char *s, bool ordinaryText)
9a29912f
JS
251{
252 int len = strlen(s);
253
254 // Update current column, but only if we're guaranteed to
255 // be ordinary text (not mark-up stuff)
256 int i;
257 if (ordinaryText)
258 for (i = 0; i < len; i++)
259 {
260 if (s[i] == 13 || s[i] == 10)
261 currentColumn = 0;
262 else
263 currentColumn ++;
264 }
265
266 if (CurrentOutput1)
267 fprintf(CurrentOutput1, "%s", s);
268 if (CurrentOutput2)
269 fprintf(CurrentOutput2, "%s", s);
270}
271
272/*
273 * Try to find a Latex macro, in one of the following forms:
274 * (1) \begin{} ... \end{}
275 * (2) \macroname{arg1}...{argn}
276 * (3) {\bf arg1}
277 */
278
279void ForbidWarning(TexMacroDef *def)
280{
04b9c5bb 281 wxString informBuf;
9a29912f
JS
282 switch (def->forbidden)
283 {
284 case FORBID_WARN:
285 {
04b9c5bb
GT
286 informBuf.Printf("Warning: it is recommended that command %s is not used.", def->name);
287 OnInform((char *)informBuf.c_str());
9a29912f
JS
288 break;
289 }
290 case FORBID_ABSOLUTELY:
291 {
04b9c5bb
GT
292 informBuf.Printf("Error: command %s cannot be used and will lead to errors.", def->name);
293 OnInform((char *)informBuf.c_str());
9a29912f
JS
294 break;
295 }
296 default:
297 break;
298 }
299}
300
301TexMacroDef *MatchMacro(char *buffer, int *pos, char **env, bool *parseToBrace)
302{
303 *parseToBrace = TRUE;
304 int i = (*pos);
305 TexMacroDef *def = NULL;
306 char macroBuf[40];
307
308 // First, try to find begin{thing}
309 if (strncmp(buffer+i, "begin{", 6) == 0)
310 {
311 i += 6;
312
313 int j = i;
314 while ((isalpha(buffer[j]) || buffer[j] == '*') && ((j - i) < 39))
315 {
316 macroBuf[j-i] = buffer[j];
317 j ++;
318 }
319 macroBuf[j-i] = 0;
320 def = (TexMacroDef *)MacroDefs.Get(macroBuf);
321
322 if (def)
323 {
324 *pos = j + 1; // BUGBUG Should this be + 1???
325 *env = def->name;
326 ForbidWarning(def);
327 return def;
328 }
329 else return NULL;
330 }
331
332 // Failed, so try to find macro from definition list
333 int j = i;
334
335 // First try getting a one-character macro, but ONLY
336 // if these TWO characters are not both alphabetical (could
337 // be a longer macro)
338 if (!(isalpha(buffer[i]) && isalpha(buffer[i+1])))
339 {
340 macroBuf[0] = buffer[i];
341 macroBuf[1] = 0;
342
343 def = (TexMacroDef *)MacroDefs.Get(macroBuf);
344 if (def) j ++;
345 }
346
347 if (!def)
348 {
349 while ((isalpha(buffer[j]) || buffer[j] == '*') && ((j - i) < 39))
350 {
351 macroBuf[j-i] = buffer[j];
352 j ++;
353 }
354 macroBuf[j-i] = 0;
355 def = (TexMacroDef *)MacroDefs.Get(macroBuf);
356 }
357
358 if (def)
359 {
360 i = j;
361
362 // We want to check whether this is a space-consuming macro
363 // (e.g. {\bf word})
364 // No brace, e.g. \input thing.tex instead of \input{thing};
365 // or a numeric argument, such as \parindent0pt
366 if ((def->no_args > 0) && ((buffer[i] == 32) || (buffer[i] == '=') || (isdigit(buffer[i]))))
367 {
368 if ((buffer[i] == 32) || (buffer[i] == '='))
369 i ++;
370
371 *parseToBrace = FALSE;
372 }
373 *pos = i;
374 ForbidWarning(def);
375 return def;
376 }
377 return NULL;
378}
379
380void EatWhiteSpace(char *buffer, int *pos)
381{
382 int len = strlen(buffer);
383 int j = *pos;
384 bool keepGoing = TRUE;
385 bool moreLines = TRUE;
386 while ((j < len) && keepGoing &&
387 (buffer[j] == 10 || buffer[j] == 13 || buffer[j] == ' ' || buffer[j] == 9))
388 {
389 j ++;
390 if (j >= len)
391 {
392 if (moreLines)
393 {
394 moreLines = read_a_line(buffer);
395 len = strlen(buffer);
396 j = 0;
397 }
398 else
399 keepGoing = FALSE;
400 }
401 }
402 *pos = j;
403}
404
405bool FindEndEnvironment(char *buffer, int *pos, char *env)
406{
407 int i = (*pos);
408
409 // Try to find end{thing}
410 if ((strncmp(buffer+i, "end{", 4) == 0) &&
411 (strncmp(buffer+i+4, env, strlen(env)) == 0))
412 {
413 *pos = i + 5 + strlen(env);
414 return TRUE;
415 }
416 else return FALSE;
417}
418
419bool readingVerbatim = FALSE;
420bool readInVerbatim = FALSE; // Within a verbatim, but not nec. verbatiminput
421
f6bcfd97
BP
422// Switched this off because e.g. \verb${$ causes it to fail. There is no
423// detection of \verb yet.
fad535ee 424// #define CHECK_BRACES 1
f6bcfd97 425
341479ff
GT
426unsigned long leftCurley = 0;
427unsigned long rightCurley = 0;
f6bcfd97
BP
428static wxString currentFileName = "";
429
9a29912f
JS
430bool read_a_line(char *buf)
431{
432 if (CurrentInputIndex < 0)
433 {
434 buf[0] = 0;
435 return FALSE;
436 }
fad535ee 437
9a29912f 438 int ch = -2;
dda2e4fd 439 unsigned long bufIndex = 0;
9a29912f 440 buf[0] = 0;
f6bcfd97 441
9a29912f
JS
442 while (ch != EOF && ch != 10)
443 {
0e6ca394
GT
444 if (bufIndex >= MAX_LINE_BUFFER_SIZE)
445 {
446 wxString errBuf;
447 errBuf.Printf("Line %lu of file %s is too long. Lines can be no longer than %lu characters. Truncated.",
dda2e4fd 448 LineNumbers[CurrentInputIndex], (const char*) currentFileName.c_str(), MAX_LINE_BUFFER_SIZE);
0e6ca394
GT
449 OnError((char *)errBuf.c_str());
450 return FALSE;
451 }
452
453 if (((bufIndex == 14) && (strncmp(buf, "\\end{verbatim}", 14) == 0)) ||
454 ((bufIndex == 16) && (strncmp(buf, "\\end{toocomplex}", 16) == 0)))
9a29912f
JS
455 readInVerbatim = FALSE;
456
457 ch = getc(Inputs[CurrentInputIndex]);
f6bcfd97 458
fad535ee 459 if (checkCurleyBraces)
f6bcfd97 460 {
fad535ee 461 if (ch == '{' && !readInVerbatim)
341479ff 462 leftCurley++;
fad535ee
GT
463 if (ch == '}' && !readInVerbatim)
464 {
341479ff
GT
465 rightCurley++;
466 if (rightCurley > leftCurley)
fad535ee
GT
467 {
468 wxString errBuf;
dda2e4fd 469 errBuf.Printf("An extra right Curley brace ('}') was detected at line %lu inside file %s", LineNumbers[CurrentInputIndex], (const char*) currentFileName.c_str());
fad535ee
GT
470 OnError((char *)errBuf.c_str());
471
341479ff 472 // Reduce the count of right Curley braces, so the mismatched count
fad535ee 473 // isn't reported on every line that has a '}' after the first mismatch
341479ff 474 rightCurley--;
fad535ee
GT
475 }
476 }
f6bcfd97 477 }
f6bcfd97 478
9a29912f
JS
479 if (ch != EOF)
480 {
481 // Check for 2 consecutive newlines and replace with \par
482 if (ch == 10 && !readInVerbatim)
483 {
484 int ch1 = getc(Inputs[CurrentInputIndex]);
485 if ((ch1 == 10) || (ch1 == 13))
486 {
487 // Eliminate newline (10) following DOS linefeed
0e6ca394
GT
488 if (ch1 == 13)
489 ch1 = getc(Inputs[CurrentInputIndex]);
490 buf[bufIndex] = 0;
9a29912f
JS
491 IncrementLineNumber();
492// strcat(buf, "\\par\n");
493// i += 6;
0e6ca394
GT
494 if (bufIndex+5 >= MAX_LINE_BUFFER_SIZE)
495 {
496 wxString errBuf;
497 errBuf.Printf("Line %lu of file %s is too long. Lines can be no longer than %lu characters. Truncated.",
498 LineNumbers[CurrentInputIndex], (const char*) currentFileName.c_str(),MAX_LINE_BUFFER_SIZE);
499 OnError((char *)errBuf.c_str());
500 return FALSE;
501 }
9a29912f 502 strcat(buf, "\\par");
0e6ca394
GT
503 bufIndex += 5;
504
9a29912f
JS
505 }
506 else
507 {
508 ungetc(ch1, Inputs[CurrentInputIndex]);
0e6ca394
GT
509 if (bufIndex >= MAX_LINE_BUFFER_SIZE)
510 {
511 wxString errBuf;
512 errBuf.Printf("Line %lu of file %s is too long. Lines can be no longer than %lu characters. Truncated.",
513 LineNumbers[CurrentInputIndex], (const char*) currentFileName.c_str(),MAX_LINE_BUFFER_SIZE);
514 OnError((char *)errBuf.c_str());
515 return FALSE;
516 }
517
518 buf[bufIndex] = ch;
519 bufIndex ++;
9a29912f
JS
520 }
521 }
522 else
523 {
524
525 // Convert embedded characters to RTF equivalents
fad535ee
GT
526 switch(ch)
527 {
528