]> git.saurik.com Git - wxWidgets.git/blame - utils/tex2rtf/src/tex2any.cpp
changed ds[pw] files to use CRLF eol style and not native one (this helps if you...
[wxWidgets.git] / utils / tex2rtf / src / tex2any.cpp
CommitLineData
9a29912f
JS
1/////////////////////////////////////////////////////////////////////////////
2// Name: tex2any.cpp
3// Purpose: Utilities for Latex conversion.
4// Author: Julian Smart
54ca2f7e 5// Modified by: Wlodzimierz ABX Skiba 2003/2004 Unicode support
b63b07a8 6// Ron Lee
9a29912f
JS
7// Created: 01/01/99
8// RCS-ID: $Id$
9// Copyright: (c) Julian Smart
10// Licence: wxWindows licence
11/////////////////////////////////////////////////////////////////////////////
12
9a29912f
JS
13// For compilers that support precompilation, includes "wx.h".
14#include "wx/wxprec.h"
15
16#ifdef __BORLANDC__
17#pragma hdrstop
18#endif
19
20#ifndef WX_PRECOMP
9a29912f
JS
21#endif
22
23#include <ctype.h>
24#include "tex2any.h"
25#include <stdlib.h>
26#include <time.h>
27
ea172e69
MB
28static inline wxChar* copystring(const wxChar* s)
29 { return wxStrcpy(new wxChar[wxStrlen(s) + 1], s); }
ea172e69 30
9a29912f
JS
31/*
32 * Variables accessible from clients
33 *
34 */
254a2129 35
9a29912f
JS
36TexChunk * DocumentTitle = NULL;
37TexChunk * DocumentAuthor = NULL;
38TexChunk * DocumentDate = NULL;
39
40// Header/footers/pagestyle
41TexChunk * LeftHeaderEven = NULL;
42TexChunk * LeftFooterEven = NULL;
43TexChunk * CentreHeaderEven = NULL;
44TexChunk * CentreFooterEven = NULL;
45TexChunk * RightHeaderEven = NULL;
46TexChunk * RightFooterEven = NULL;
47TexChunk * LeftHeaderOdd = NULL;
48TexChunk * LeftFooterOdd = NULL;
49TexChunk * CentreHeaderOdd = NULL;
50TexChunk * CentreFooterOdd = NULL;
51TexChunk * RightHeaderOdd = NULL;
52TexChunk * RightFooterOdd = NULL;
6c155d33 53wxChar * PageStyle = copystring(_T("plain"));
9a29912f
JS
54
55int DocumentStyle = LATEX_REPORT;
56int MinorDocumentStyle = 0;
57wxPathList TexPathList;
6c155d33
JS
58wxChar * BibliographyStyleString = copystring(_T("plain"));
59wxChar * DocumentStyleString = copystring(_T("report"));
60wxChar * MinorDocumentStyleString = NULL;
9a29912f
JS
61int ParSkip = 0;
62int ParIndent = 0;
63
64int normalFont = 10;
65int smallFont = 8;
66int tinyFont = 6;
67int largeFont1 = 12;
68int LargeFont2 = 14;
69int LARGEFont3 = 18;
70int hugeFont1 = 20;
71int HugeFont2 = 24;
72int HUGEFont3 = 28;
73
fad535ee
GT
74// All of these tokens MUST be found on a line by themselves (no other
75// text) and must start at the first character of the line, or tex2rtf
76// will fail to process them correctly (a limitation of tex2rtf, not TeX)
7cbe4e79 77static const wxString syntaxTokens[] =
6c155d33
JS
78{ _T("\\begin{verbatim}"),
79 _T("\\begin{toocomplex}"),
80 _T("\\end{verbatim}"),
81 _T("\\end{toocomplex}"),
82 _T("\\verb"),
83 _T("\\begin{comment}"),
84 _T("\\end{comment}"),
85 _T("\\verbatiminput"),
86// _T("\\par"),
87 _T("\\input"),
88 _T("\\helpinput"),
89 _T("\\include"),
fad535ee
GT
90 wxEmptyString
91};
92
93
9a29912f
JS
94/*
95 * USER-ADJUSTABLE SETTINGS
96 *
97 */
98
99// Section font sizes
100int chapterFont = 12; // LARGEFont3;
101int sectionFont = 12; // LargeFont2;
102int subsectionFont = 12; // largeFont1;
103int titleFont = LARGEFont3;
104int authorFont = LargeFont2;
b63b07a8 105int mirrorMargins = true;
4fe30bce 106bool winHelp = false; // Output in Windows Help format if true, linear otherwise
b63b07a8
RL
107bool isInteractive = false;
108bool runTwice = false;
9a29912f 109int convertMode = TEX_RTF;
88fd7006 110bool checkCurlyBraces = false;
b63b07a8
RL
111bool checkSyntax = false;
112bool headerRule = false;
113bool footerRule = false;
4fe30bce 114bool compatibilityMode = false; // If true, maximum Latex compatibility
9a29912f
JS
115 // (Quality of RTF generation deteriorate)
116bool generateHPJ; // Generate WinHelp Help Project file
6c155d33 117wxChar *winHelpTitle = NULL; // Windows Help title
9a29912f
JS
118int defaultTableColumnWidth = 2000;
119
120int labelIndentTab = 18; // From left indent to item label (points)
121int itemIndentTab = 40; // From left indent to item (points)
122
b63b07a8 123bool useUpButton = true;
9a29912f
JS
124int htmlBrowseButtons = HTML_BUTTONS_TEXT;
125
b63b07a8 126bool truncateFilenames = false; // Truncate for DOS
9a29912f 127int winHelpVersion = 3; // WinHelp Version (3 for Windows 3.1, 4 for Win95)
b63b07a8
RL
128bool winHelpContents = false; // Generate .cnt file for WinHelp 4
129bool htmlIndex = false; // Generate .htx file for HTML
130bool htmlFrameContents = false; // Use frames for HTML contents page
6c155d33 131wxChar *htmlStylesheet = NULL; // Use this CSS stylesheet for HTML pages
b63b07a8
RL
132bool useHeadingStyles = true; // Insert \s1, s2 etc.
133bool useWord = true; // Insert proper Word table of contents, etc etc
9a29912f 134int contentsDepth = 4; // Depth of Word table of contents
b63b07a8 135bool indexSubsections = true; // Index subsections in linear RTF
9a29912f 136// Linear RTF method of including bitmaps. Can be "includepicture", "hex"
6c155d33 137wxChar *bitmapMethod = copystring(_T("includepicture"));
b63b07a8 138bool upperCaseNames = false;
9a29912f 139// HTML background and text colours
6c155d33
JS
140wxChar *backgroundImageString = NULL;
141wxChar *backgroundColourString = copystring(_T("255;255;255"));
142wxChar *textColourString = NULL;
143wxChar *linkColourString = NULL;
144wxChar *followedLinkColourString = NULL;
b63b07a8
RL
145bool combineSubSections = false;
146bool htmlWorkshopFiles = false;
147bool ignoreBadRefs = false;
4fe30bce 148wxChar *htmlFaceName = NULL;
9a29912f 149
3924dd22
GT
150extern int passNumber;
151
152extern wxHashTable TexReferences;
153
9a29912f
JS
154/*
155 * International support
156 */
157
158// Names to help with internationalisation
6c155d33
JS
159wxChar *ContentsNameString = copystring(_T("Contents"));
160wxChar *AbstractNameString = copystring(_T("Abstract"));
161wxChar *GlossaryNameString = copystring(_T("Glossary"));
162wxChar *ReferencesNameString = copystring(_T("References"));
163wxChar *FiguresNameString = copystring(_T("List of Figures"));
164wxChar *TablesNameString = copystring(_T("List of Tables"));
165wxChar *FigureNameString = copystring(_T("Figure"));
166wxChar *TableNameString = copystring(_T("Table"));
167wxChar *IndexNameString = copystring(_T("Index"));
168wxChar *ChapterNameString = copystring(_T("chapter"));
169wxChar *SectionNameString = copystring(_T("section"));
170wxChar *SubsectionNameString = copystring(_T("subsection"));
171wxChar *SubsubsectionNameString = copystring(_T("subsubsection"));
172wxChar *UpNameString = copystring(_T("Up"));
9a29912f
JS
173
174/*
175 * Section numbering
176 *
177 */
254a2129 178
9a29912f
JS
179int chapterNo = 0;
180int sectionNo = 0;
181int subsectionNo = 0;
182int subsubsectionNo = 0;
183int figureNo = 0;
184int tableNo = 0;
185
186/*
187 * Other variables
188 *
189 */
254a2129 190
9a29912f
JS
191FILE *CurrentOutput1 = NULL;
192FILE *CurrentOutput2 = NULL;
193FILE *Inputs[15];
dda2e4fd 194unsigned long LineNumbers[15];
6c155d33 195wxChar *FileNames[15];
9a29912f
JS
196int CurrentInputIndex = 0;
197
6c155d33
JS
198wxChar *TexFileRoot = NULL;
199wxChar *TexBibName = NULL; // Bibliography output file name
200wxChar *TexTmpBibName = NULL; // Temporary bibliography output file name
4fe30bce
WS
201bool isSync = false; // If true, should not yield to other processes.
202bool stopRunning = false; // If true, should abort.
9a29912f
JS
203
204static int currentColumn = 0;
6c155d33 205wxChar *currentArgData = NULL;
4fe30bce 206bool haveArgData = false; // If true, we're simulating the data.
9a29912f
JS
207TexChunk *currentArgument = NULL;
208TexChunk *nextChunk = NULL;
b63b07a8 209bool isArgOptional = false;
446dd881 210int noArgs = 0;
9a29912f
JS
211
212TexChunk *TopLevel = NULL;
213// wxList MacroDefs(wxKEY_STRING);
214wxHashTable MacroDefs(wxKEY_STRING);
215wxStringList IgnorableInputFiles; // Ignorable \input files, e.g. psbox.tex
6c155d33 216wxChar *BigBuffer = NULL; // For reading in large chunks of text
9a29912f
JS
217TexMacroDef *SoloBlockDef = NULL;
218TexMacroDef *VerbatimMacroDef = NULL;
219
220#define IncrementLineNumber() LineNumbers[CurrentInputIndex] ++
221
3924dd22 222
6c155d33 223TexRef::TexRef(const wxChar *label, const wxChar *file,
4fe30bce 224 const wxChar *section, const wxChar *sectionN)
3924dd22
GT
225{
226 refLabel = copystring(label);
6c155d33
JS
227 refFile = file ? copystring(file) : (wxChar*) NULL;
228 sectionNumber = section ? copystring(section) : copystring(_T("??"));
229 sectionName = sectionN ? copystring(sectionN) : copystring(_T("??"));
3924dd22
GT
230}
231
232TexRef::~TexRef(void)
233{
234 delete [] refLabel; refLabel = NULL;
235 delete [] refFile; refFile = NULL;
236 delete [] sectionNumber; sectionNumber = NULL;
237 delete [] sectionName; sectionName = NULL;
238}
239
240
241CustomMacro::~CustomMacro()
242{
243 if (macroName)
244 delete [] macroName;
245 if (macroBody)
246 delete [] macroBody;
247}
248
bd0b594d 249void TexOutput(const wxString& s, bool ordinaryText)
9a29912f 250{
9a29912f
JS
251 // Update current column, but only if we're guaranteed to
252 // be ordinary text (not mark-up stuff)
253 int i;
254 if (ordinaryText)
bd0b594d 255 for (wxString::const_iterator i = s.begin(); i != s.end(); ++i)
9a29912f 256 {
bd0b594d 257 if (*i == 13 || *i == 10)
9a29912f
JS
258 currentColumn = 0;
259 else
260 currentColumn ++;
261 }
262
263 if (CurrentOutput1)
6c155d33 264 wxFprintf(CurrentOutput1, _T("%s"), s);
9a29912f 265 if (CurrentOutput2)
6c155d33 266 wxFprintf(CurrentOutput2, _T("%s"), s);
9a29912f
JS
267}
268
269/*
270 * Try to find a Latex macro, in one of the following forms:
271 * (1) \begin{} ... \end{}
272 * (2) \macroname{arg1}...{argn}
273 * (3) {\bf arg1}
274 */
275
276void ForbidWarning(TexMacroDef *def)
277{
04b9c5bb 278 wxString informBuf;
9a29912f
JS
279 switch (def->forbidden)
280 {
281 case FORBID_WARN:
282 {
6c155d33 283 informBuf.Printf(_T("Warning: it is recommended that command %s is not used."), def->name);
1693108c 284 OnInform(informBuf);
9a29912f
JS
285 break;
286 }
287 case FORBID_ABSOLUTELY:
288 {
6c155d33 289 informBuf.Printf(_T("Error: command %s cannot be used and will lead to errors."), def->name);
1693108c 290 OnInform(informBuf);
9a29912f
JS
291 break;
292 }
293 default:
294 break;
295 }
296}
254a2129 297
6c155d33 298TexMacroDef *MatchMacro(wxChar *buffer, int *pos, wxChar **env, bool *parseToBrace)
9a29912f 299{
9c9691ba
WS
300 *parseToBrace = true;
301 int i = (*pos);
302 TexMacroDef *def = NULL;
303 wxChar macroBuf[40];
9a29912f 304
9c9691ba
WS
305 // First, try to find begin{thing}
306 if (wxStrncmp(buffer+i, _T("begin{"), 6) == 0)
9a29912f 307 {
9c9691ba 308 i += 6;
9a29912f 309
9c9691ba
WS
310 int j = i;
311 while ((isalpha(buffer[j]) || buffer[j] == '*') && ((j - i) < 39))
312 {
313 macroBuf[j-i] = buffer[j];
314 j ++;
315 }
316 macroBuf[j-i] = 0;
317 def = (TexMacroDef *)MacroDefs.Get(macroBuf);
318
319 if (def)
320 {
321 *pos = j + 1; // BUGBUG Should this be + 1???
322 *env = def->name;
323 ForbidWarning(def);
324 return def;
325 }
326 else
327 {
328 return NULL;
329 }
9a29912f 330 }
9a29912f 331
9c9691ba
WS
332 // Failed, so try to find macro from definition list
333 int j = i;
9a29912f 334
9c9691ba
WS
335 // First try getting a one-character macro, but ONLY
336 // if these TWO characters are not both alphabetical (could
337 // be a longer macro)
338 if (!(isalpha(buffer[i]) && isalpha(buffer[i+1])))
339 {
340 macroBuf[0] = buffer[i];
341 macroBuf[1] = 0;
9a29912f 342
9c9691ba
WS
343 def = (TexMacroDef *)MacroDefs.Get(macroBuf);
344 if (def) j ++;
345 }
9a29912f 346
9c9691ba 347 if (!def)
9a29912f 348 {
9c9691ba
WS
349 while ((isalpha(buffer[j]) || buffer[j] == '*') && ((j - i) < 39))
350 {
351 macroBuf[j-i] = buffer[j];
352 j ++;
353 }
354 macroBuf[j-i] = 0;
355 def = (TexMacroDef *)MacroDefs.Get(macroBuf);
9a29912f 356 }
254a2129 357
9c9691ba 358 if (def)
9a29912f 359 {
9c9691ba
WS
360 i = j;
361
362 // We want to check whether this is a space-consuming macro
363 // (e.g. {\bf word})
364 // No brace, e.g. \input thing.tex instead of \input{thing};
365 // or a numeric argument, such as \parindent0pt
366 if ((def->no_args > 0) && ((buffer[i] == 32) || (buffer[i] == '=') || (isdigit(buffer[i]))))
367 {
368 if ((buffer[i] == 32) || (buffer[i] == '='))
369 i ++;
9a29912f 370
9c9691ba
WS
371 *parseToBrace = false;
372 }
373 *pos = i;
374 ForbidWarning(def);
375 return def;
9a29912f 376 }
9c9691ba 377 return NULL;
9a29912f
JS
378}
379
6c155d33 380void EatWhiteSpace(wxChar *buffer, int *pos)
9a29912f 381{
6c155d33 382 int len = wxStrlen(buffer);
9a29912f 383 int j = *pos;
b63b07a8
RL
384 bool keepGoing = true;
385 bool moreLines = true;
9a29912f
JS
386 while ((j < len) && keepGoing &&
387 (buffer[j] == 10 || buffer[j] == 13 || buffer[j] == ' ' || buffer[j] == 9))
388 {
389 j ++;
390 if (j >= len)
391 {
392 if (moreLines)
393 {
394 moreLines = read_a_line(buffer);
6c155d33 395 len = wxStrlen(buffer);
9a29912f
JS
396 j = 0;
397 }
398 else
b63b07a8 399 keepGoing = false;
9a29912f
JS
400 }
401 }
402 *pos = j;
403}
404
6c155d33 405bool FindEndEnvironment(wxChar *buffer, int *pos, wxChar *env)
9a29912f
JS
406{
407 int i = (*pos);
408
409 // Try to find end{thing}
6c155d33
JS
410 if ((wxStrncmp(buffer+i, _T("end{"), 4) == 0) &&
411 (wxStrncmp(buffer+i+4, env, wxStrlen(env)) == 0))
9a29912f 412 {
6c155d33 413 *pos = i + 5 + wxStrlen(env);
b63b07a8 414 return true;
9a29912f 415 }
b63b07a8 416 else return false;
9a29912f
JS
417}
418
b63b07a8
RL
419bool readingVerbatim = false;
420bool readInVerbatim = false; // Within a verbatim, but not nec. verbatiminput
9a29912f 421
f6bcfd97
BP
422// Switched this off because e.g. \verb${$ causes it to fail. There is no
423// detection of \verb yet.
fad535ee 424// #define CHECK_BRACES 1
f6bcfd97 425
88fd7006
VS
426unsigned long leftCurly = 0;
427unsigned long rightCurly = 0;
5c66e5b2 428static wxString currentFileName = wxEmptyString;
f6bcfd97 429
6c155d33 430bool read_a_line(wxChar *buf)
9a29912f
JS
431{
432 if (CurrentInputIndex < 0)
433 {
434 buf[0] = 0;
b63b07a8 435 return false;
9a29912f 436 }
fad535ee 437
9a29912f 438 int ch = -2;
dda2e4fd 439 unsigned long bufIndex = 0;
9a29912f 440 buf[0] = 0;
6644cbe7 441 int lastChar;
f6bcfd97 442
9a29912f
JS
443 while (ch != EOF && ch != 10)
444 {
0e6ca394
GT
445 if (bufIndex >= MAX_LINE_BUFFER_SIZE)
446 {
447 wxString errBuf;
6c155d33
JS
448 errBuf.Printf(_T("Line %lu of file %s is too long. Lines can be no longer than %lu characters. Truncated."),
449 LineNumbers[CurrentInputIndex], (const wxChar*) currentFileName.c_str(), MAX_LINE_BUFFER_SIZE);
1693108c 450 OnError(errBuf);
b63b07a8 451 return false;
0e6ca394
GT
452 }
453
6c155d33
JS
454 if (((bufIndex == 14) && (wxStrncmp(buf, _T("\\end{verbatim}"), 14) == 0)) ||
455 ((bufIndex == 16) && (wxStrncmp(buf, _T("\\end{toocomplex}"), 16) == 0)))
b63b07a8 456 readInVerbatim = false;
9a29912f 457
c22287af 458 lastChar = ch;
9a29912f 459 ch = getc(Inputs[CurrentInputIndex]);
f6bcfd97 460
88fd7006 461 if (checkCurlyBraces)
f6bcfd97 462 {
c22287af 463 if (ch == '{' && !readInVerbatim && lastChar != _T('\\'))
88fd7006 464 leftCurly++;
c22287af 465 if (ch == '}' && !readInVerbatim && lastChar != _T('\\'))
fad535ee 466 {
88fd7006
VS
467 rightCurly++;
468 if (rightCurly > leftCurly)
fad535ee
GT
469 {
470 wxString errBuf;
88fd7006 471 errBuf.Printf(_T("An extra right Curly brace ('}') was detected at line %lu inside file %s"), LineNumbers[CurrentInputIndex], (const wxChar*) currentFileName.c_str());
1693108c 472 OnError(errBuf);
fad535ee 473
88fd7006 474 // Reduce the count of right Curly braces, so the mismatched count
fad535ee 475 // isn't reported on every line that has a '}' after the first mismatch
88fd7006 476 rightCurly--;
fad535ee
GT
477 }
478 }
f6bcfd97 479 }
f6bcfd97 480
9a29912f
JS
481 if (ch != EOF)
482 {
483 // Check for 2 consecutive newlines and replace with \par
484 if (ch == 10 && !readInVerbatim)
485 {
486 int ch1 = getc(Inputs[CurrentInputIndex]);
487 if ((ch1 == 10) || (ch1 == 13))
488 {
489 // Eliminate newline (10) following DOS linefeed
254a2129 490 if (ch1 == 13)
6c155d33 491 getc(Inputs[CurrentInputIndex]);
0e6ca394 492 buf[bufIndex] = 0;
9a29912f 493 IncrementLineNumber();
6c155d33 494// wxStrcat(buf, "\\par\n");
9a29912f 495// i += 6;
0e6ca394
GT
496 if (bufIndex+5 >= MAX_LINE_BUFFER_SIZE)
497 {
498 wxString errBuf;
6c155d33
JS
499 errBuf.Printf(_T("Line %lu of file %s is too long. Lines can be no longer than %lu characters. Truncated."),
500 LineNumbers[CurrentInputIndex], (const wxChar*) currentFileName.c_str(),MAX_LINE_BUFFER_SIZE);
1693108c 501 OnError(errBuf);
b63b07a8 502 return false;
0e6ca394 503 }
6c155d33 504 wxStrcat(buf, _T("\\par"));
0e6ca394
GT
505 bufIndex += 5;
506
9a29912f
JS
507 }
508 else
509 {
510 ungetc(ch1, Inputs[CurrentInputIndex]);
0e6ca394
GT
511 if (bufIndex >= MAX_LINE_BUFFER_SIZE)
512 {
513 wxString errBuf;
6c155d33
JS
514 errBuf.Printf(_T("Line %lu of file %s is too long. Lines can be no longer than %lu characters. Truncated."),
515 LineNumbers[CurrentInputIndex], (const wxChar*) currentFileName.c_str(),MAX_LINE_BUFFER_SIZE);
1693108c 516 OnError(errBuf);
b63b07a8 517 return false;
0e6ca394
GT
518 }
519
254a2129 520 buf[bufIndex] = (wxChar)ch;
0e6ca394 521 bufIndex ++;
9a29912f
JS
522 }
523 }
524 else
525 {
526
527 // Convert embedded characters to RTF equivalents
fad535ee
GT
528 switch(ch)
529 {
530