]>
Commit | Line | Data |
---|---|---|
8e54aaed RD |
1 | // Scintilla source code edit control |
2 | ||
3 | // File: LexTeX.cxx - general context conformant tex coloring scheme | |
4 | // Author: Hans Hagen - PRAGMA ADE - Hasselt NL - www.pragma-ade.com | |
5 | // Version: September 28, 2003 | |
6 | ||
7 | // Copyright: 1998-2003 by Neil Hodgson <neilh@scintilla.org> | |
8 | // The License.txt file describes the conditions under which this software may be distributed. | |
9 | ||
10 | // This lexer is derived from the one written for the texwork environment (1999++) which in | |
11 | // turn is inspired on texedit (1991++) which finds its roots in wdt (1986). | |
12 | ||
13 | // If you run into strange boundary cases, just tell me and I'll look into it. | |
14 | ||
7e0c58e9 RD |
15 | |
16 | // TeX Folding code added by instanton (soft_share@126.com) with borrowed code from VisualTeX source by Alex Romanenko. | |
17 | // Version: June 22, 2007 | |
18 | ||
8e54aaed RD |
19 | #include <stdlib.h> |
20 | #include <string.h> | |
8e54aaed RD |
21 | #include <stdio.h> |
22 | #include <stdarg.h> | |
1dcf666d RD |
23 | #include <assert.h> |
24 | #include <ctype.h> | |
8e54aaed | 25 | |
1dcf666d | 26 | #include "ILexer.h" |
8e54aaed RD |
27 | #include "Scintilla.h" |
28 | #include "SciLexer.h" | |
1dcf666d RD |
29 | |
30 | #include "WordList.h" | |
31 | #include "LexAccessor.h" | |
32 | #include "Accessor.h" | |
8e54aaed | 33 | #include "StyleContext.h" |
1dcf666d RD |
34 | #include "CharacterSet.h" |
35 | #include "LexerModule.h" | |
8e54aaed | 36 | |
7e0c58e9 RD |
37 | #ifdef SCI_NAMESPACE |
38 | using namespace Scintilla; | |
39 | #endif | |
40 | ||
8e54aaed RD |
41 | // val SCE_TEX_DEFAULT = 0 |
42 | // val SCE_TEX_SPECIAL = 1 | |
43 | // val SCE_TEX_GROUP = 2 | |
44 | // val SCE_TEX_SYMBOL = 3 | |
45 | // val SCE_TEX_COMMAND = 4 | |
46 | // val SCE_TEX_TEXT = 5 | |
47 | ||
48 | // Definitions in SciTEGlobal.properties: | |
49 | // | |
50 | // TeX Highlighting | |
51 | // | |
52 | // # Default | |
53 | // style.tex.0=fore:#7F7F00 | |
54 | // # Special | |
55 | // style.tex.1=fore:#007F7F | |
56 | // # Group | |
57 | // style.tex.2=fore:#880000 | |
58 | // # Symbol | |
59 | // style.tex.3=fore:#7F7F00 | |
60 | // # Command | |
61 | // style.tex.4=fore:#008800 | |
62 | // # Text | |
63 | // style.tex.5=fore:#000000 | |
64 | ||
65 | // lexer.tex.interface.default=0 | |
66 | // lexer.tex.comment.process=0 | |
67 | ||
68 | // todo: lexer.tex.auto.if | |
69 | ||
70 | // Auxiliary functions: | |
71 | ||
72 | static inline bool endOfLine(Accessor &styler, unsigned int i) { | |
73 | return | |
74 | (styler[i] == '\n') || ((styler[i] == '\r') && (styler.SafeGetCharAt(i + 1) != '\n')) ; | |
75 | } | |
76 | ||
77 | static inline bool isTeXzero(int ch) { | |
78 | return | |
79 | (ch == '%') ; | |
80 | } | |
81 | ||
82 | static inline bool isTeXone(int ch) { | |
83 | return | |
84 | (ch == '[') || (ch == ']') || (ch == '=') || (ch == '#') || | |
85 | (ch == '(') || (ch == ')') || (ch == '<') || (ch == '>') || | |
86 | (ch == '"') ; | |
87 | } | |
88 | ||
89 | static inline bool isTeXtwo(int ch) { | |
90 | return | |
91 | (ch == '{') || (ch == '}') || (ch == '$') ; | |
92 | } | |
93 | ||
94 | static inline bool isTeXthree(int ch) { | |
95 | return | |
96 | (ch == '~') || (ch == '^') || (ch == '_') || (ch == '&') || | |
97 | (ch == '-') || (ch == '+') || (ch == '\"') || (ch == '`') || | |
98 | (ch == '/') || (ch == '|') || (ch == '%') ; | |
99 | } | |
100 | ||
101 | static inline bool isTeXfour(int ch) { | |
102 | return | |
103 | (ch == '\\') ; | |
104 | } | |
105 | ||
106 | static inline bool isTeXfive(int ch) { | |
107 | return | |
108 | ((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z')) || | |
109 | (ch == '@') || (ch == '!') || (ch == '?') ; | |
110 | } | |
111 | ||
112 | static inline bool isTeXsix(int ch) { | |
113 | return | |
114 | (ch == ' ') ; | |
115 | } | |
116 | ||
117 | static inline bool isTeXseven(int ch) { | |
118 | return | |
119 | (ch == '^') ; | |
120 | } | |
121 | ||
122 | // Interface determination | |
123 | ||
124 | static int CheckTeXInterface( | |
125 | unsigned int startPos, | |
126 | int length, | |
127 | Accessor &styler, | |
128 | int defaultInterface) { | |
129 | ||
130 | char lineBuffer[1024] ; | |
131 | unsigned int linePos = 0 ; | |
132 | ||
133 | // some day we can make something lexer.tex.mapping=(all,0)(nl,1)(en,2)... | |
134 | ||
135 | if (styler.SafeGetCharAt(0) == '%') { | |
136 | for (unsigned int i = 0; i < startPos + length; i++) { | |
137 | lineBuffer[linePos++] = styler.SafeGetCharAt(i) ; | |
138 | if (endOfLine(styler, i) || (linePos >= sizeof(lineBuffer) - 1)) { | |
139 | lineBuffer[linePos] = '\0'; | |
140 | if (strstr(lineBuffer, "interface=all")) { | |
141 | return 0 ; | |
142 | } else if (strstr(lineBuffer, "interface=tex")) { | |
143 | return 1 ; | |
144 | } else if (strstr(lineBuffer, "interface=nl")) { | |
145 | return 2 ; | |
146 | } else if (strstr(lineBuffer, "interface=en")) { | |
147 | return 3 ; | |
148 | } else if (strstr(lineBuffer, "interface=de")) { | |
149 | return 4 ; | |
150 | } else if (strstr(lineBuffer, "interface=cz")) { | |
151 | return 5 ; | |
152 | } else if (strstr(lineBuffer, "interface=it")) { | |
153 | return 6 ; | |
154 | } else if (strstr(lineBuffer, "interface=ro")) { | |
155 | return 7 ; | |
156 | } else if (strstr(lineBuffer, "interface=latex")) { | |
157 | // we will move latex cum suis up to 91+ when more keyword lists are supported | |
158 | return 8 ; | |
159 | } else if (styler.SafeGetCharAt(1) == 'D' && strstr(lineBuffer, "%D \\module")) { | |
160 | // better would be to limit the search to just one line | |
161 | return 3 ; | |
162 | } else { | |
163 | return defaultInterface ; | |
164 | } | |
165 | } | |
166 | } | |
167 | } | |
168 | ||
169 | return defaultInterface ; | |
170 | } | |
171 | ||
172 | static void ColouriseTeXDoc( | |
173 | unsigned int startPos, | |
174 | int length, | |
175 | int, | |
176 | WordList *keywordlists[], | |
177 | Accessor &styler) { | |
178 | ||
179 | styler.StartAt(startPos) ; | |
180 | styler.StartSegment(startPos) ; | |
181 | ||
182 | bool processComment = styler.GetPropertyInt("lexer.tex.comment.process", 0) == 1 ; | |
7e0c58e9 | 183 | bool useKeywords = styler.GetPropertyInt("lexer.tex.use.keywords", 1) == 1 ; |
8e54aaed | 184 | bool autoIf = styler.GetPropertyInt("lexer.tex.auto.if", 1) == 1 ; |
7e0c58e9 | 185 | int defaultInterface = styler.GetPropertyInt("lexer.tex.interface.default", 1) ; |
8e54aaed RD |
186 | |
187 | char key[100] ; | |
188 | int k ; | |
189 | bool newifDone = false ; | |
190 | bool inComment = false ; | |
191 | ||
192 | int currentInterface = CheckTeXInterface(startPos,length,styler,defaultInterface) ; | |
193 | ||
194 | if (currentInterface == 0) { | |
195 | useKeywords = false ; | |
196 | currentInterface = 1 ; | |
197 | } | |
198 | ||
199 | WordList &keywords = *keywordlists[currentInterface-1] ; | |
200 | ||
201 | StyleContext sc(startPos, length, SCE_TEX_TEXT, styler); | |
202 | ||
203 | bool going = sc.More() ; // needed because of a fuzzy end of file state | |
204 | ||
205 | for (; going; sc.Forward()) { | |
206 | ||
207 | if (! sc.More()) { going = false ; } // we need to go one behind the end of text | |
208 | ||
209 | if (inComment) { | |
210 | if (sc.atLineEnd) { | |
211 | sc.SetState(SCE_TEX_TEXT) ; | |
212 | newifDone = false ; | |
213 | inComment = false ; | |
214 | } | |
215 | } else { | |
216 | if (! isTeXfive(sc.ch)) { | |
217 | if (sc.state == SCE_TEX_COMMAND) { | |
218 | if (sc.LengthCurrent() == 1) { // \<noncstoken> | |
219 | if (isTeXseven(sc.ch) && isTeXseven(sc.chNext)) { | |
220 | sc.Forward(2) ; // \^^ and \^^<token> | |
221 | } | |
222 | sc.ForwardSetState(SCE_TEX_TEXT) ; | |
223 | } else { | |
224 | sc.GetCurrent(key, sizeof(key)-1) ; | |
1dcf666d | 225 | k = static_cast<int>(strlen(key)) ; |
8e54aaed RD |
226 | memmove(key,key+1,k) ; // shift left over escape token |
227 | key[k] = '\0' ; | |
228 | k-- ; | |
229 | if (! keywords || ! useKeywords) { | |
230 | sc.SetState(SCE_TEX_COMMAND) ; | |
231 | newifDone = false ; | |
232 | } else if (k == 1) { //\<cstoken> | |
233 | sc.SetState(SCE_TEX_COMMAND) ; | |
234 | newifDone = false ; | |
235 | } else if (keywords.InList(key)) { | |
236 | sc.SetState(SCE_TEX_COMMAND) ; | |
237 | newifDone = autoIf && (strcmp(key,"newif") == 0) ; | |
238 | } else if (autoIf && ! newifDone && (key[0] == 'i') && (key[1] == 'f') && keywords.InList("if")) { | |
239 | sc.SetState(SCE_TEX_COMMAND) ; | |
240 | } else { | |
241 | sc.ChangeState(SCE_TEX_TEXT) ; | |
242 | sc.SetState(SCE_TEX_TEXT) ; | |
243 | newifDone = false ; | |
244 | } | |
245 | } | |
246 | } | |
247 | if (isTeXzero(sc.ch)) { | |
9e96e16f RD |
248 | sc.SetState(SCE_TEX_SYMBOL); |
249 | ||
250 | if (!endOfLine(styler,sc.currentPos + 1)) | |
251 | sc.ForwardSetState(SCE_TEX_DEFAULT) ; | |
252 | ||
8e54aaed RD |
253 | inComment = ! processComment ; |
254 | newifDone = false ; | |
255 | } else if (isTeXseven(sc.ch) && isTeXseven(sc.chNext)) { | |
256 | sc.SetState(SCE_TEX_TEXT) ; | |
257 | sc.ForwardSetState(SCE_TEX_TEXT) ; | |
258 | } else if (isTeXone(sc.ch)) { | |
259 | sc.SetState(SCE_TEX_SPECIAL) ; | |
260 | newifDone = false ; | |
261 | } else if (isTeXtwo(sc.ch)) { | |
262 | sc.SetState(SCE_TEX_GROUP) ; | |
263 | newifDone = false ; | |
264 | } else if (isTeXthree(sc.ch)) { | |
265 | sc.SetState(SCE_TEX_SYMBOL) ; | |
266 | newifDone = false ; | |
267 | } else if (isTeXfour(sc.ch)) { | |
268 | sc.SetState(SCE_TEX_COMMAND) ; | |
269 | } else if (isTeXsix(sc.ch)) { | |
270 | sc.SetState(SCE_TEX_TEXT) ; | |
271 | } else if (sc.atLineEnd) { | |
272 | sc.SetState(SCE_TEX_TEXT) ; | |
273 | newifDone = false ; | |
274 | inComment = false ; | |
275 | } else { | |
276 | sc.SetState(SCE_TEX_TEXT) ; | |
277 | } | |
278 | } else if (sc.state != SCE_TEX_COMMAND) { | |
279 | sc.SetState(SCE_TEX_TEXT) ; | |
280 | } | |
281 | } | |
282 | } | |
283 | sc.ChangeState(SCE_TEX_TEXT) ; | |
284 | sc.Complete(); | |
285 | ||
286 | } | |
287 | ||
288 | ||
7e0c58e9 RD |
289 | static inline bool isNumber(int ch) { |
290 | return | |
1dcf666d RD |
291 | (ch == '0') || (ch == '1') || (ch == '2') || |
292 | (ch == '3') || (ch == '4') || (ch == '5') || | |
7e0c58e9 RD |
293 | (ch == '6') || (ch == '7') || (ch == '8') || (ch == '9'); |
294 | } | |
295 | ||
296 | static inline bool isWordChar(int ch) { | |
297 | return ((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z')); | |
298 | } | |
299 | ||
300 | static int ParseTeXCommand(unsigned int pos, Accessor &styler, char *command) | |
301 | { | |
302 | int length=0; | |
303 | char ch=styler.SafeGetCharAt(pos+1); | |
1dcf666d | 304 | |
7e0c58e9 RD |
305 | if(ch==',' || ch==':' || ch==';' || ch=='%'){ |
306 | command[0]=ch; | |
307 | command[1]=0; | |
308 | return 1; | |
309 | } | |
310 | ||
311 | // find end | |
312 | while(isWordChar(ch) && !isNumber(ch) && ch!='_' && ch!='.' && length<100){ | |
313 | command[length]=ch; | |
314 | length++; | |
315 | ch=styler.SafeGetCharAt(pos+length+1); | |
316 | } | |
1dcf666d RD |
317 | |
318 | command[length]='\0'; | |
7e0c58e9 RD |
319 | if(!length) return 0; |
320 | return length+1; | |
321 | } | |
322 | ||
323 | static int classifyFoldPointTeXPaired(const char* s) { | |
1dcf666d | 324 | int lev=0; |
7e0c58e9 RD |
325 | if (!(isdigit(s[0]) || (s[0] == '.'))){ |
326 | if (strcmp(s, "begin")==0||strcmp(s,"FoldStart")==0|| | |
327 | strcmp(s,"abstract")==0||strcmp(s,"unprotect")==0|| | |
328 | strcmp(s,"title")==0||strncmp(s,"start",5)==0||strncmp(s,"Start",5)==0|| | |
329 | strcmp(s,"documentclass")==0||strncmp(s,"if",2)==0 | |
330 | ) | |
331 | lev=1; | |
332 | if (strcmp(s, "end")==0||strcmp(s,"FoldStop")==0|| | |
333 | strcmp(s,"maketitle")==0||strcmp(s,"protect")==0|| | |
334 | strncmp(s,"stop",4)==0||strncmp(s,"Stop",4)==0|| | |
335 | strcmp(s,"fi")==0 | |
1dcf666d | 336 | ) |
7e0c58e9 RD |
337 | lev=-1; |
338 | } | |
339 | return lev; | |
340 | } | |
341 | ||
342 | static int classifyFoldPointTeXUnpaired(const char* s) { | |
1dcf666d | 343 | int lev=0; |
7e0c58e9 RD |
344 | if (!(isdigit(s[0]) || (s[0] == '.'))){ |
345 | if (strcmp(s,"part")==0|| | |
346 | strcmp(s,"chapter")==0|| | |
347 | strcmp(s,"section")==0|| | |
348 | strcmp(s,"subsection")==0|| | |
349 | strcmp(s,"subsubsection")==0|| | |
350 | strcmp(s,"CJKfamily")==0|| | |
351 | strcmp(s,"appendix")==0|| | |
352 | strcmp(s,"Topic")==0||strcmp(s,"topic")==0|| | |
353 | strcmp(s,"subject")==0||strcmp(s,"subsubject")==0|| | |
354 | strcmp(s,"def")==0||strcmp(s,"gdef")==0||strcmp(s,"edef")==0|| | |
355 | strcmp(s,"xdef")==0||strcmp(s,"framed")==0|| | |
356 | strcmp(s,"frame")==0|| | |
357 | strcmp(s,"foilhead")==0||strcmp(s,"overlays")==0||strcmp(s,"slide")==0 | |
358 | ){ | |
359 | lev=1; | |
360 | } | |
361 | } | |
362 | return lev; | |
363 | } | |
364 | ||
365 | static bool IsTeXCommentLine(int line, Accessor &styler) { | |
366 | int pos = styler.LineStart(line); | |
367 | int eol_pos = styler.LineStart(line + 1) - 1; | |
1dcf666d | 368 | |
7e0c58e9 RD |
369 | int startpos = pos; |
370 | ||
371 | while (startpos<eol_pos){ | |
372 | char ch = styler[startpos]; | |
373 | if (ch!='%' && ch!=' ') return false; | |
374 | else if (ch=='%') return true; | |
375 | startpos++; | |
1dcf666d | 376 | } |
7e0c58e9 RD |
377 | |
378 | return false; | |
379 | } | |
380 | ||
381 | // FoldTeXDoc: borrowed from VisualTeX with modifications | |
382 | ||
1dcf666d | 383 | static void FoldTexDoc(unsigned int startPos, int length, int, WordList *[], Accessor &styler) |
7e0c58e9 RD |
384 | { |
385 | bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0; | |
386 | unsigned int endPos = startPos+length; | |
387 | int visibleChars=0; | |
388 | int lineCurrent=styler.GetLine(startPos); | |
389 | int levelPrev=styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK; | |
390 | int levelCurrent=levelPrev; | |
391 | char chNext=styler[startPos]; | |
392 | char buffer[100]=""; | |
1dcf666d | 393 | |
7e0c58e9 RD |
394 | for (unsigned int i=startPos; i < endPos; i++) { |
395 | char ch=chNext; | |
396 | chNext=styler.SafeGetCharAt(i+1); | |
397 | bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n'); | |
398 | ||
399 | if(ch=='\\') { | |
400 | ParseTeXCommand(i, styler, buffer); | |
401 | levelCurrent += classifyFoldPointTeXPaired(buffer)+classifyFoldPointTeXUnpaired(buffer); | |
402 | } | |
403 | ||
9e96e16f | 404 | if (levelCurrent > SC_FOLDLEVELBASE && ((ch == '\r' || ch=='\n') && (chNext == '\\'))) { |
7e0c58e9 RD |
405 | ParseTeXCommand(i+1, styler, buffer); |
406 | levelCurrent -= classifyFoldPointTeXUnpaired(buffer); | |
407 | } | |
408 | ||
409 | char chNext2; | |
410 | char chNext3; | |
411 | char chNext4; | |
412 | char chNext5; | |
413 | chNext2=styler.SafeGetCharAt(i+2); | |
414 | chNext3=styler.SafeGetCharAt(i+3); | |
415 | chNext4=styler.SafeGetCharAt(i+4); | |
416 | chNext5=styler.SafeGetCharAt(i+5); | |
417 | ||
1dcf666d RD |
418 | bool atEOfold = (ch == '%') && |
419 | (chNext == '%') && (chNext2=='}') && | |
7e0c58e9 RD |
420 | (chNext3=='}')&& (chNext4=='-')&& (chNext5=='-'); |
421 | ||
1dcf666d RD |
422 | bool atBOfold = (ch == '%') && |
423 | (chNext == '%') && (chNext2=='-') && | |
7e0c58e9 RD |
424 | (chNext3=='-')&& (chNext4=='{')&& (chNext5=='{'); |
425 | ||
426 | if(atBOfold){ | |
427 | levelCurrent+=1; | |
428 | } | |
429 | ||
430 | if(atEOfold){ | |
431 | levelCurrent-=1; | |
432 | } | |
1dcf666d | 433 | |
7e0c58e9 RD |
434 | if(ch=='\\' && chNext=='['){ |
435 | levelCurrent+=1; | |
436 | } | |
1dcf666d | 437 | |
7e0c58e9 RD |
438 | if(ch=='\\' && chNext==']'){ |
439 | levelCurrent-=1; | |
440 | } | |
441 | ||
442 | bool foldComment = styler.GetPropertyInt("fold.comment") != 0; | |
443 | ||
444 | if (foldComment && atEOL && IsTeXCommentLine(lineCurrent, styler)) | |
445 | { | |
446 | if (lineCurrent==0 && IsTeXCommentLine(lineCurrent + 1, styler) | |
447 | ) | |
448 | levelCurrent++; | |
449 | else if (lineCurrent!=0 && !IsTeXCommentLine(lineCurrent - 1, styler) | |
450 | && IsTeXCommentLine(lineCurrent + 1, styler) | |
451 | ) | |
452 | levelCurrent++; | |
453 | else if (lineCurrent!=0 && IsTeXCommentLine(lineCurrent - 1, styler) && | |
454 | !IsTeXCommentLine(lineCurrent+1, styler)) | |
455 | levelCurrent--; | |
456 | } | |
457 | ||
1dcf666d RD |
458 | //--------------------------------------------------------------------------------------------- |
459 | ||
7e0c58e9 RD |
460 | if (atEOL) { |
461 | int lev = levelPrev; | |
462 | if (visibleChars == 0 && foldCompact) | |
463 | lev |= SC_FOLDLEVELWHITEFLAG; | |
464 | if ((levelCurrent > levelPrev) && (visibleChars > 0)) | |
465 | lev |= SC_FOLDLEVELHEADERFLAG; | |
466 | if (lev != styler.LevelAt(lineCurrent)) { | |
467 | styler.SetLevel(lineCurrent, lev); | |
468 | } | |
469 | lineCurrent++; | |
470 | levelPrev = levelCurrent; | |
471 | visibleChars = 0; | |
472 | } | |
473 | ||
474 | if (!isspacechar(ch)) | |
475 | visibleChars++; | |
476 | } | |
477 | ||
478 | // Fill in the real level of the next line, keeping the current flags as they will be filled in later | |
479 | int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK; | |
480 | styler.SetLevel(lineCurrent, levelPrev | flagsNext); | |
481 | } | |
482 | ||
483 | ||
484 | ||
8e54aaed RD |
485 | |
486 | static const char * const texWordListDesc[] = { | |
1e9bafca | 487 | "TeX, eTeX, pdfTeX, Omega", |
8e54aaed RD |
488 | "ConTeXt Dutch", |
489 | "ConTeXt English", | |
490 | "ConTeXt German", | |
491 | "ConTeXt Czech", | |
492 | "ConTeXt Italian", | |
493 | "ConTeXt Romanian", | |
494 | 0, | |
495 | } ; | |
496 | ||
7e0c58e9 | 497 | LexerModule lmTeX(SCLEX_TEX, ColouriseTeXDoc, "tex", FoldTexDoc, texWordListDesc); |