]> git.saurik.com Git - wxWidgets.git/blob - contrib/src/stc/scintilla/src/LexHTML.cxx
d2cd30f60e2ee44335d3806f317b9cc63b51e755
[wxWidgets.git] / contrib / src / stc / scintilla / src / LexHTML.cxx
1 // Scintilla source code edit control
2 /** @file LexHTML.cxx
3 ** Lexer for HTML.
4 **/
5 // Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
7
8 #include <stdlib.h>
9 #include <string.h>
10 #include <ctype.h>
11 #include <stdio.h>
12 #include <stdarg.h>
13
14 #include "Platform.h"
15
16 #include "PropSet.h"
17 #include "Accessor.h"
18 #include "StyleContext.h"
19 #include "KeyWords.h"
20 #include "Scintilla.h"
21 #include "SciLexer.h"
22
23 #define SCE_HA_JS (SCE_HJA_START - SCE_HJ_START)
24 #define SCE_HA_VBS (SCE_HBA_START - SCE_HB_START)
25 #define SCE_HA_PYTHON (SCE_HPA_START - SCE_HP_START)
26
27 enum script_type { eScriptNone = 0, eScriptJS, eScriptVBS, eScriptPython, eScriptPHP, eScriptXML, eScriptSGML, eScriptSGMLblock };
28 enum script_mode { eHtml = 0, eNonHtmlScript, eNonHtmlPreProc, eNonHtmlScriptPreProc };
29
30 static inline bool IsAWordChar(const int ch) {
31 return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_');
32 }
33
34 static inline bool IsAWordStart(const int ch) {
35 return (ch < 0x80) && (isalnum(ch) || ch == '_');
36 }
37
38 static script_type segIsScriptingIndicator(Accessor &styler, unsigned int start, unsigned int end, script_type prevValue) {
39 char s[30 + 1];
40 unsigned int i = 0;
41 for (; i < end - start + 1 && i < 30; i++) {
42 s[i] = static_cast<char>(tolower(styler[start + i]));
43 }
44 s[i] = '\0';
45 //Platform::DebugPrintf("Scripting indicator [%s]\n", s);
46 if (strstr(s, "src")) // External script
47 return eScriptNone;
48 if (strstr(s, "vbs"))
49 return eScriptVBS;
50 if (strstr(s, "pyth"))
51 return eScriptPython;
52 if (strstr(s, "javas"))
53 return eScriptJS;
54 if (strstr(s, "jscr"))
55 return eScriptJS;
56 if (strstr(s, "php"))
57 return eScriptPHP;
58 if (strstr(s, "xml"))
59 return eScriptXML;
60
61 return prevValue;
62 }
63
64 static int PrintScriptingIndicatorOffset(Accessor &styler, unsigned int start, unsigned int end) {
65 int iResult = 0;
66 char s[30 + 1];
67 unsigned int i = 0;
68 for (; i < end - start + 1 && i < 30; i++) {
69 s[i] = static_cast<char>(tolower(styler[start + i]));
70 }
71 s[i] = '\0';
72 if (0 == strncmp(s, "php", 3)) {
73 iResult = 3;
74 }
75
76 return iResult;
77 }
78
79 static script_type ScriptOfState(int state) {
80 if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
81 return eScriptPython;
82 } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
83 return eScriptVBS;
84 } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
85 return eScriptJS;
86 } else if ((state >= SCE_HPHP_DEFAULT) && (state <= SCE_HPHP_COMMENTLINE)) {
87 return eScriptPHP;
88 } else if ((state >= SCE_H_SGML_DEFAULT) && (state < SCE_H_SGML_BLOCK_DEFAULT)) {
89 return eScriptSGML;
90 } else if (state == SCE_H_SGML_BLOCK_DEFAULT) {
91 return eScriptSGMLblock;
92 } else {
93 return eScriptNone;
94 }
95 }
96
97 static int statePrintForState(int state, script_mode inScriptType) {
98 int StateToPrint;
99
100 if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
101 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_PYTHON);
102 } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
103 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_VBS);
104 } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
105 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_JS);
106 } else {
107 StateToPrint = state;
108 }
109
110 return StateToPrint;
111 }
112
113 static int stateForPrintState(int StateToPrint) {
114 int state;
115
116 if ((StateToPrint >= SCE_HPA_START) && (StateToPrint <= SCE_HPA_IDENTIFIER)) {
117 state = StateToPrint - SCE_HA_PYTHON;
118 } else if ((StateToPrint >= SCE_HBA_START) && (StateToPrint <= SCE_HBA_STRINGEOL)) {
119 state = StateToPrint - SCE_HA_VBS;
120 } else if ((StateToPrint >= SCE_HJA_START) && (StateToPrint <= SCE_HJA_REGEX)) {
121 state = StateToPrint - SCE_HA_JS;
122 } else {
123 state = StateToPrint;
124 }
125
126 return state;
127 }
128
129 static inline bool IsNumber(unsigned int start, Accessor &styler) {
130 return IsADigit(styler[start]) || (styler[start] == '.') ||
131 (styler[start] == '-') || (styler[start] == '#');
132 }
133
134 static inline bool isStringState(int state) {
135 bool bResult;
136
137 switch (state) {
138 case SCE_HJ_DOUBLESTRING:
139 case SCE_HJ_SINGLESTRING:
140 case SCE_HJA_DOUBLESTRING:
141 case SCE_HJA_SINGLESTRING:
142 case SCE_HB_STRING:
143 case SCE_HBA_STRING:
144 case SCE_HP_STRING:
145 case SCE_HPA_STRING:
146 case SCE_HPHP_HSTRING:
147 case SCE_HPHP_SIMPLESTRING:
148 case SCE_HPHP_HSTRING_VARIABLE:
149 case SCE_HPHP_COMPLEX_VARIABLE:
150 bResult = true;
151 break;
152 default :
153 bResult = false;
154 break;
155 }
156 return bResult;
157 }
158
159 // not really well done, since it's only comments that should lex the %> and <%
160 static inline bool isCommentASPState(int state) {
161 bool bResult;
162
163 switch (state) {
164 case SCE_HJ_COMMENT:
165 case SCE_HJ_COMMENTLINE:
166 case SCE_HJ_COMMENTDOC:
167 case SCE_HB_COMMENTLINE:
168 case SCE_HP_COMMENTLINE:
169 case SCE_HPHP_COMMENT:
170 case SCE_HPHP_COMMENTLINE:
171 bResult = true;
172 break;
173 default :
174 bResult = false;
175 break;
176 }
177 return bResult;
178 }
179
180 static void classifyAttribHTML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
181 bool wordIsNumber = IsNumber(start, styler);
182 char chAttr = SCE_H_ATTRIBUTEUNKNOWN;
183 if (wordIsNumber) {
184 chAttr = SCE_H_NUMBER;
185 } else {
186 char s[30 + 1];
187 unsigned int i = 0;
188 for (; i < end - start + 1 && i < 30; i++) {
189 s[i] = static_cast<char>(tolower(styler[start + i]));
190 }
191 s[i] = '\0';
192 if (keywords.InList(s))
193 chAttr = SCE_H_ATTRIBUTE;
194 }
195 if ((chAttr == SCE_H_ATTRIBUTEUNKNOWN) && !keywords)
196 // No keywords -> all are known
197 chAttr = SCE_H_ATTRIBUTE;
198 styler.ColourTo(end, chAttr);
199 }
200
201 static int classifyTagHTML(unsigned int start, unsigned int end,
202 WordList &keywords, Accessor &styler, bool &tagDontFold,
203 bool caseSensitive) {
204 char s[30 + 2];
205 // Copy after the '<'
206 unsigned int i = 0;
207 for (unsigned int cPos = start; cPos <= end && i < 30; cPos++) {
208 char ch = styler[cPos];
209 if ((ch != '<') && (ch != '/')) {
210 s[i++] = caseSensitive ? ch : static_cast<char>(tolower(ch));
211 }
212 }
213
214 //The following is only a quick hack, to see if this whole thing would work
215 //we first need the tagname with a trailing space...
216 s[i] = ' ';
217 s[i+1] = '\0';
218
219 //...to find it in the list of no-container-tags
220 // (There are many more. We will need a keywordlist in the property file for this)
221 tagDontFold = (NULL != strstr("meta link img area br hr input ",s));
222
223 //now we can remove the trailing space
224 s[i] = '\0';
225
226 bool isScript = false;
227 char chAttr = SCE_H_TAGUNKNOWN;
228 if (s[0] == '!') {
229 chAttr = SCE_H_SGML_DEFAULT;
230 } else if (s[0] == '/') { // Closing tag
231 if (keywords.InList(s + 1))
232 chAttr = SCE_H_TAG;
233 } else {
234 if (keywords.InList(s)) {
235 chAttr = SCE_H_TAG;
236 isScript = 0 == strcmp(s, "script");
237 }
238 }
239 if ((chAttr == SCE_H_TAGUNKNOWN) && !keywords) {
240 // No keywords -> all are known
241 chAttr = SCE_H_TAG;
242 isScript = 0 == strcmp(s, "script");
243 }
244 styler.ColourTo(end, chAttr);
245 return isScript ? SCE_H_SCRIPT : chAttr;
246 }
247
248 static void classifyWordHTJS(unsigned int start, unsigned int end,
249 WordList &keywords, Accessor &styler, script_mode inScriptType) {
250 char chAttr = SCE_HJ_WORD;
251 bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
252 if (wordIsNumber)
253 chAttr = SCE_HJ_NUMBER;
254 else {
255 char s[30 + 1];
256 unsigned int i = 0;
257 for (; i < end - start + 1 && i < 30; i++) {
258 s[i] = styler[start + i];
259 }
260 s[i] = '\0';
261 if (keywords.InList(s))
262 chAttr = SCE_HJ_KEYWORD;
263 }
264 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
265 }
266
267 static int classifyWordHTVB(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, script_mode inScriptType) {
268 char chAttr = SCE_HB_IDENTIFIER;
269 bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
270 if (wordIsNumber)
271 chAttr = SCE_HB_NUMBER;
272 else {
273 char s[30 + 1];
274 unsigned int i = 0;
275 for (; i < end - start + 1 && i < 30; i++) {
276 s[i] = static_cast<char>(tolower(styler[start + i]));
277 }
278 s[i] = '\0';
279 if (keywords.InList(s)) {
280 chAttr = SCE_HB_WORD;
281 if (strcmp(s, "rem") == 0)
282 chAttr = SCE_HB_COMMENTLINE;
283 }
284 }
285 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
286 if (chAttr == SCE_HB_COMMENTLINE)
287 return SCE_HB_COMMENTLINE;
288 else
289 return SCE_HB_DEFAULT;
290 }
291
292 static void classifyWordHTPy(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord, script_mode inScriptType) {
293 bool wordIsNumber = IsADigit(styler[start]);
294 char s[30 + 1];
295 unsigned int i = 0;
296 for (; i < end - start + 1 && i < 30; i++) {
297 s[i] = styler[start + i];
298 }
299 s[i] = '\0';
300 char chAttr = SCE_HP_IDENTIFIER;
301 if (0 == strcmp(prevWord, "class"))
302 chAttr = SCE_HP_CLASSNAME;
303 else if (0 == strcmp(prevWord, "def"))
304 chAttr = SCE_HP_DEFNAME;
305 else if (wordIsNumber)
306 chAttr = SCE_HP_NUMBER;
307 else if (keywords.InList(s))
308 chAttr = SCE_HP_WORD;
309 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
310 strcpy(prevWord, s);
311 }
312
313 // Update the word colour to default or keyword
314 // Called when in a PHP word
315 static void classifyWordHTPHP(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
316 char chAttr = SCE_HPHP_DEFAULT;
317 bool wordIsNumber = IsADigit(styler[start]);
318 if (wordIsNumber)
319 chAttr = SCE_HPHP_NUMBER;
320 else {
321 char s[100 + 1];
322 unsigned int i = 0;
323 for (; i < end - start + 1 && i < 100; i++) {
324 s[i] = static_cast<char>(tolower(styler[start + i]));
325 }
326 s[i] = '\0';
327 if (keywords.InList(s))
328 chAttr = SCE_HPHP_WORD;
329 }
330 styler.ColourTo(end, chAttr);
331 }
332
333 static bool isWordHSGML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
334 char s[30 + 1];
335 unsigned int i = 0;
336 for (; i < end - start + 1 && i < 30; i++) {
337 s[i] = styler[start + i];
338 }
339 s[i] = '\0';
340 return keywords.InList(s);
341 }
342
343 static bool isWordCdata(unsigned int start, unsigned int end, Accessor &styler) {
344 char s[30 + 1];
345 unsigned int i = 0;
346 for (; i < end - start + 1 && i < 30; i++) {
347 s[i] = styler[start + i];
348 }
349 s[i] = '\0';
350 return (0 == strcmp(s, "[CDATA["));
351 }
352
353 // Return the first state to reach when entering a scripting language
354 static int StateForScript(script_type scriptLanguage) {
355 int Result;
356 switch (scriptLanguage) {
357 case eScriptVBS:
358 Result = SCE_HB_START;
359 break;
360 case eScriptPython:
361 Result = SCE_HP_START;
362 break;
363 case eScriptPHP:
364 Result = SCE_HPHP_DEFAULT;
365 break;
366 case eScriptXML:
367 Result = SCE_H_TAGUNKNOWN;
368 break;
369 case eScriptSGML:
370 Result = SCE_H_SGML_DEFAULT;
371 break;
372 default :
373 Result = SCE_HJ_START;
374 break;
375 }
376 return Result;
377 }
378
379 static inline bool ishtmlwordchar(char ch) {
380 return !isascii(ch) ||
381 (isalnum(ch) || ch == '.' || ch == '-' || ch == '_' || ch == ':' || ch == '!' || ch == '#');
382 }
383
384 static inline bool issgmlwordchar(char ch) {
385 return !isascii(ch) ||
386 (isalnum(ch) || ch == '.' || ch == '_' || ch == ':' || ch == '!' || ch == '#' || ch == '[');
387 }
388
389 static inline bool IsPhpWordStart(const unsigned char ch) {
390 return (isascii(ch) && (isalpha(ch) || (ch == '_'))) || (ch >= 0x7f);
391 }
392
393 static inline bool IsPhpWordChar(char ch) {
394 return IsADigit(ch) || IsPhpWordStart(ch);
395 }
396
397 static bool InTagState(int state) {
398 return state == SCE_H_TAG || state == SCE_H_TAGUNKNOWN ||
399 state == SCE_H_SCRIPT ||
400 state == SCE_H_ATTRIBUTE || state == SCE_H_ATTRIBUTEUNKNOWN ||
401 state == SCE_H_NUMBER || state == SCE_H_OTHER ||
402 state == SCE_H_DOUBLESTRING || state == SCE_H_SINGLESTRING;
403 }
404
405 static bool IsCommentState(const int state) {
406 return state == SCE_H_COMMENT || state == SCE_H_SGML_COMMENT;
407 }
408
409 static bool isLineEnd(char ch) {
410 return ch == '\r' || ch == '\n';
411 }
412
413 static bool isOKBeforeRE(char ch) {
414 return (ch == '(') || (ch == '=') || (ch == ',');
415 }
416
417 static bool isPHPStringState(int state) {
418 return
419 (state == SCE_HPHP_HSTRING) ||
420 (state == SCE_HPHP_SIMPLESTRING) ||
421 (state == SCE_HPHP_HSTRING_VARIABLE) ||
422 (state == SCE_HPHP_COMPLEX_VARIABLE);
423 }
424
425 static int FindPhpStringDelimiter(char *phpStringDelimiter, const int phpStringDelimiterSize, int i, const int lengthDoc, Accessor &styler) {
426 int j;
427 phpStringDelimiter[0] = '\n';
428 for (j = i; j < lengthDoc && styler[j] != '\n' && styler[j] != '\r'; j++) {
429 if (j - i < phpStringDelimiterSize - 2)
430 phpStringDelimiter[j-i+1] = styler[j];
431 else
432 i++;
433 }
434 phpStringDelimiter[j-i+1] = '\0';
435 return j;
436 }
437
438 static void ColouriseHyperTextDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
439 Accessor &styler) {
440 WordList &keywords = *keywordlists[0];
441 WordList &keywords2 = *keywordlists[1];
442 WordList &keywords3 = *keywordlists[2];
443 WordList &keywords4 = *keywordlists[3];
444 WordList &keywords5 = *keywordlists[4];
445 WordList &keywords6 = *keywordlists[5]; // SGML (DTD) keywords
446
447 // Lexer for HTML requires more lexical states (7 bits worth) than most lexers
448 styler.StartAt(startPos, STYLE_MAX);
449 char prevWord[200];
450 prevWord[0] = '\0';
451 char phpStringDelimiter[200]; // PHP is not limited in length, we are
452 phpStringDelimiter[0] = '\0';
453 int StateToPrint = initStyle;
454 int state = stateForPrintState(StateToPrint);
455
456 // If inside a tag, it may be a script tag, so reread from the start to ensure any language tags are seen
457 if (InTagState(state)) {
458 while ((startPos > 0) && (InTagState(styler.StyleAt(startPos - 1)))) {
459 startPos--;
460 length++;
461 }
462 state = SCE_H_DEFAULT;
463 }
464 // String can be heredoc, must find a delimiter first
465 while (startPos > 0 && isPHPStringState(state) && state != SCE_HPHP_SIMPLESTRING) {
466 startPos--;
467 length++;
468 state = styler.StyleAt(startPos);
469 }
470 styler.StartAt(startPos, STYLE_MAX);
471
472 int lineCurrent = styler.GetLine(startPos);
473 int lineState;
474 if (lineCurrent > 0) {
475 lineState = styler.GetLineState(lineCurrent);
476 } else {
477 // Default client and ASP scripting language is JavaScript
478 lineState = eScriptJS << 8;
479 lineState |= styler.GetPropertyInt("asp.default.language", eScriptJS) << 4;
480 }
481 script_mode inScriptType = script_mode((lineState >> 0) & 0x03); // 2 bits of scripting mode
482 bool tagOpened = (lineState >> 2) & 0x01; // 1 bit to know if we are in an opened tag
483 bool tagClosing = (lineState >> 3) & 0x01; // 1 bit to know if we are in a closing tag
484 bool tagDontFold = false; //some HTML tags should not be folded
485 script_type aspScript = script_type((lineState >> 4) & 0x0F); // 4 bits of script name
486 script_type clientScript = script_type((lineState >> 8) & 0x0F); // 4 bits of script name
487 int beforePreProc = (lineState >> 12) & 0xFF; // 8 bits of state
488
489 script_type scriptLanguage = ScriptOfState(state);
490
491 const bool foldHTML = styler.GetPropertyInt("fold.html", 0) != 0;
492 const bool fold = foldHTML && styler.GetPropertyInt("fold", 0);
493 const bool foldHTMLPreprocessor = foldHTML && styler.GetPropertyInt("fold.html.preprocessor", 1);
494 const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
495 const bool caseSensitive = styler.GetPropertyInt("html.tags.case.sensitive", 0) != 0;
496
497 int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
498 int levelCurrent = levelPrev;
499 int visibleChars = 0;
500
501 char chPrev = ' ';
502 char ch = ' ';
503 char chPrevNonWhite = ' ';
504 styler.StartSegment(startPos);
505 const int lengthDoc = startPos + length;
506 for (int i = startPos; i < lengthDoc; i++) {
507 const char chPrev2 = chPrev;
508 chPrev = ch;
509 if (ch != ' ' && ch != '\t')
510 chPrevNonWhite = ch;
511 ch = styler[i];
512 char chNext = styler.SafeGetCharAt(i + 1);
513 const char chNext2 = styler.SafeGetCharAt(i + 2);
514
515 // Handle DBCS codepages
516 if (styler.IsLeadByte(ch)) {
517 chPrev = ' ';
518 i += 1;
519 continue;
520 }
521
522 if ((!isspacechar(ch) || !foldCompact) && fold)
523 visibleChars++;
524
525 // decide what is the current state to print (depending of the script tag)
526 StateToPrint = statePrintForState(state, inScriptType);
527
528 // handle script folding
529 if (fold) {
530 switch (scriptLanguage) {
531 case eScriptJS:
532 case eScriptPHP:
533 //not currently supported case eScriptVBS:
534
535 if ((state != SCE_HPHP_COMMENT) && (state != SCE_HPHP_COMMENTLINE) && (state != SCE_HJ_COMMENT) && (state != SCE_HJ_COMMENTLINE) && (state != SCE_HJ_COMMENTDOC) && (!isStringState(state))) {
536 //Platform::DebugPrintf("state=%d, StateToPrint=%d, initStyle=%d\n", state, StateToPrint, initStyle);
537 //if ((state == SCE_HPHP_OPERATOR) || (state == SCE_HPHP_DEFAULT) || (state == SCE_HJ_SYMBOLS) || (state == SCE_HJ_START) || (state == SCE_HJ_DEFAULT)) {
538 if ((ch == '{') || (ch == '}')) {
539 levelCurrent += (ch == '{') ? 1 : -1;
540 }
541 }
542 break;
543 case eScriptPython:
544 if (state != SCE_HP_COMMENTLINE) {
545 if ((ch == ':') && ((chNext == '\n') || (chNext == '\r' && chNext2 == '\n'))) {
546 levelCurrent++;
547 } else if ((ch == '\n') && !((chNext == '\r') && (chNext2 == '\n')) && (chNext != '\n')) {
548 // check if the number of tabs is lower than the level
549 int Findlevel = (levelCurrent & ~SC_FOLDLEVELBASE) * 8;
550 for (int j = 0; Findlevel > 0; j++) {
551 char chTmp = styler.SafeGetCharAt(i + j + 1);
552 if (chTmp == '\t') {
553 Findlevel -= 8;
554 } else if (chTmp == ' ') {
555 Findlevel--;
556 } else {
557 break;
558 }
559 }
560
561 if (Findlevel > 0) {
562 levelCurrent -= Findlevel / 8;
563 if (Findlevel % 8)
564 levelCurrent--;
565 }
566 }
567 }
568 break;
569 default:
570 break;
571 }
572 }
573
574 if ((ch == '\r' && chNext != '\n') || (ch == '\n')) {
575 // Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix)
576 // Avoid triggering two times on Dos/Win
577 // New line -> record any line state onto /next/ line
578 if (fold) {
579 int lev = levelPrev;
580 if (visibleChars == 0)
581 lev |= SC_FOLDLEVELWHITEFLAG;
582 if ((levelCurrent > levelPrev) && (visibleChars > 0))
583 lev |= SC_FOLDLEVELHEADERFLAG;
584
585 styler.SetLevel(lineCurrent, lev);
586 visibleChars = 0;
587 levelPrev = levelCurrent;
588 }
589 lineCurrent++;
590 styler.SetLineState(lineCurrent,
591 ((inScriptType & 0x03) << 0) |
592 ((tagOpened & 0x01) << 2) |
593 ((tagClosing & 0x01) << 3) |
594 ((aspScript & 0x0F) << 4) |
595 ((clientScript & 0x0F) << 8) |
596 ((beforePreProc & 0xFF) << 12));
597 }
598
599 // generic end of script processing
600 else if ((inScriptType == eNonHtmlScript) && (ch == '<') && (chNext == '/')) {
601 // Check if it's the end of the script tag (or any other HTML tag)
602 switch (state) {
603 // in these cases, you can embed HTML tags (to confirm !!!!!!!!!!!!!!!!!!!!!!)
604 case SCE_H_DOUBLESTRING:
605 case SCE_H_SINGLESTRING:
606 case SCE_HJ_COMMENT:
607 case SCE_HJ_COMMENTDOC:
608 // SCE_HJ_COMMENTLINE removed as this is a common thing done to hide
609 // the end of script marker from some JS interpreters.
610 //case SCE_HJ_COMMENTLINE:
611 case SCE_HJ_DOUBLESTRING:
612 case SCE_HJ_SINGLESTRING:
613 case SCE_HJ_REGEX:
614 case SCE_HB_STRING:
615 case SCE_HP_STRING:
616 case SCE_HP_TRIPLE:
617 case SCE_HP_TRIPLEDOUBLE:
618 break;
619 default :
620 // closing tag of the script (it's a closing HTML tag anyway)
621 styler.ColourTo(i - 1, StateToPrint);
622 state = SCE_H_TAGUNKNOWN;
623 inScriptType = eHtml;
624 scriptLanguage = eScriptNone;
625 clientScript = eScriptJS;
626 i += 2;
627 visibleChars += 2;
628 tagClosing = true;
629 continue;
630 }
631 }
632
633 /////////////////////////////////////
634 // handle the start of PHP pre-processor = Non-HTML
635 else if ((state != SCE_H_ASPAT) &&
636 !isPHPStringState(state) &&
637 (state != SCE_HPHP_COMMENT) &&
638 (ch == '<') &&
639 (chNext == '?')) {
640 styler.ColourTo(i - 1, StateToPrint);
641 beforePreProc = state;
642 scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment() + 2, i + 10, eScriptPHP);
643 i++;
644 visibleChars++;
645 i += PrintScriptingIndicatorOffset(styler, styler.GetStartSegment() + 2, i + 10);
646 if (scriptLanguage == eScriptXML)
647 styler.ColourTo(i, SCE_H_XMLSTART);
648 else
649 styler.ColourTo(i, SCE_H_QUESTION);
650 state = StateForScript(scriptLanguage);
651 if (inScriptType == eNonHtmlScript)
652 inScriptType = eNonHtmlScriptPreProc;
653 else
654 inScriptType = eNonHtmlPreProc;
655 // fold whole script
656 if (foldHTMLPreprocessor){
657 levelCurrent++;
658 if (scriptLanguage == eScriptXML)
659 levelCurrent--; // no folding of the XML first tag (all XML-like tags in this case)
660 }
661 // should be better
662 ch = styler.SafeGetCharAt(i);
663 continue;
664 }
665
666 // handle the start of ASP pre-processor = Non-HTML
667 else if (!isCommentASPState(state) && (ch == '<') && (chNext == '%')) {
668 styler.ColourTo(i - 1, StateToPrint);
669 beforePreProc = state;
670 if (inScriptType == eNonHtmlScript)
671 inScriptType = eNonHtmlScriptPreProc;
672 else
673 inScriptType = eNonHtmlPreProc;
674
675 if (chNext2 == '@') {
676 i += 2; // place as if it was the second next char treated
677 visibleChars += 2;
678 state = SCE_H_ASPAT;
679 } else if ((chNext2 == '-') && (styler.SafeGetCharAt(i + 3) == '-')) {
680 styler.ColourTo(i + 3, SCE_H_ASP);
681 state = SCE_H_XCCOMMENT;
682 scriptLanguage = eScriptVBS;
683 continue;
684 } else {
685 if (chNext2 == '=') {
686 i += 2; // place as if it was the second next char treated
687 visibleChars += 2;
688 } else {
689 i++; // place as if it was the next char treated
690 visibleChars++;
691 }
692
693 state = StateForScript(aspScript);
694 }
695 scriptLanguage = eScriptVBS;
696 styler.ColourTo(i, SCE_H_ASP);
697 // fold whole script
698 if (foldHTMLPreprocessor)
699 levelCurrent++;
700 // should be better
701 ch = styler.SafeGetCharAt(i);
702 continue;
703 }
704
705 /////////////////////////////////////
706 // handle the start of SGML language (DTD)
707 else if (((scriptLanguage == eScriptNone) || (scriptLanguage == eScriptXML)) &&
708 (chPrev == '<') &&
709 (ch == '!') &&
710 (StateToPrint != SCE_H_CDATA) && (!IsCommentState(StateToPrint))) {
711 beforePreProc = state;
712 styler.ColourTo(i - 2, StateToPrint);
713 if ((chNext == '-') && (chNext2 == '-')) {
714 state = SCE_H_COMMENT; // wait for a pending command
715 }
716 else if (isWordCdata(i + 1, i + 7, styler)) {
717 state = SCE_H_CDATA;
718 } else {
719 styler.ColourTo(i, SCE_H_SGML_DEFAULT); // <! is default
720 scriptLanguage = eScriptSGML;
721 state = SCE_H_SGML_COMMAND; // wait for a pending command
722 }
723 // fold whole tag (-- when closing the tag)
724 if (foldHTMLPreprocessor)
725 levelCurrent++;
726 continue;
727 }
728
729 // handle the end of a pre-processor = Non-HTML
730 else if ((
731 ((inScriptType == eNonHtmlPreProc)
732 || (inScriptType == eNonHtmlScriptPreProc)) && (
733 ((scriptLanguage == eScriptPHP) && (ch == '?') && !isPHPStringState(state) && (state != SCE_HPHP_COMMENT)) ||
734 ((scriptLanguage != eScriptNone) && !isStringState(state) &&
735 (ch == '%'))
736 ) && (chNext == '>')) ||
737 ((scriptLanguage == eScriptSGML) && (ch == '>') && (state != SCE_H_SGML_COMMENT))) {
738 if (state == SCE_H_ASPAT) {
739 aspScript = segIsScriptingIndicator(styler,
740 styler.GetStartSegment(), i - 1, aspScript);
741 }
742 // Bounce out of any ASP mode
743 switch (state) {
744 case SCE_HJ_WORD:
745 classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
746 break;
747 case SCE_HB_WORD:
748 classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
749 break;
750 case SCE_HP_WORD:
751 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
752 break;
753 case SCE_HPHP_WORD:
754 classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
755 break;
756 case SCE_H_XCCOMMENT:
757 styler.ColourTo(i - 1, state);
758 break;
759 default :
760 styler.ColourTo(i - 1, StateToPrint);
761 break;
762 }
763 if (scriptLanguage != eScriptSGML) {
764 i++;
765 visibleChars++;
766 }
767 if (ch == '%')
768 styler.ColourTo(i, SCE_H_ASP);
769 else if (scriptLanguage == eScriptXML)
770 styler.ColourTo(i, SCE_H_XMLEND);
771 else if (scriptLanguage == eScriptSGML)
772 styler.ColourTo(i, SCE_H_SGML_DEFAULT);
773 else
774 styler.ColourTo(i, SCE_H_QUESTION);
775 state = beforePreProc;
776 if (inScriptType == eNonHtmlScriptPreProc)
777 inScriptType = eNonHtmlScript;
778 else
779 inScriptType = eHtml;
780 scriptLanguage = eScriptNone;
781 // unfold all scripting languages
782 if (foldHTMLPreprocessor)
783 levelCurrent--;
784 continue;
785 }
786 /////////////////////////////////////
787
788 switch (state) {
789 case SCE_H_DEFAULT:
790 if (ch == '<') {
791 // in HTML, fold on tag open and unfold on tag close
792 tagOpened = true;
793 tagClosing = (chNext == '/');
794 styler.ColourTo(i - 1, StateToPrint);
795 if (chNext != '!')
796 state = SCE_H_TAGUNKNOWN;
797 } else if (ch == '&') {
798 styler.ColourTo(i - 1, SCE_H_DEFAULT);
799 state = SCE_H_ENTITY;
800 }
801 break;
802 case SCE_H_SGML_DEFAULT:
803 case SCE_H_SGML_BLOCK_DEFAULT:
804 // if (scriptLanguage == eScriptSGMLblock)
805 // StateToPrint = SCE_H_SGML_BLOCK_DEFAULT;
806
807 if (ch == '\"') {
808 styler.ColourTo(i - 1, StateToPrint);
809 state = SCE_H_SGML_DOUBLESTRING;
810 } else if (ch == '\'') {
811 styler.ColourTo(i - 1, StateToPrint);
812 state = SCE_H_SGML_SIMPLESTRING;
813 } else if ((ch == '-') && (chPrev == '-')) {
814 styler.ColourTo(i - 2, StateToPrint);
815 state = SCE_H_SGML_COMMENT;
816 } else if (isascii(ch) && isalpha(ch) && (chPrev == '%')) {
817 styler.ColourTo(i - 2, StateToPrint);
818 state = SCE_H_SGML_ENTITY;
819 } else if (ch == '#') {
820 styler.ColourTo(i - 1, StateToPrint);
821 state = SCE_H_SGML_SPECIAL;
822 } else if (ch == '[') {
823 styler.ColourTo(i - 1, StateToPrint);
824 scriptLanguage = eScriptSGMLblock;
825 state = SCE_H_SGML_BLOCK_DEFAULT;
826 } else if (ch == ']') {
827 if (scriptLanguage == eScriptSGMLblock) {
828 styler.ColourTo(i, StateToPrint);
829 scriptLanguage = eScriptSGML;
830 } else {
831 styler.ColourTo(i - 1, StateToPrint);
832 styler.ColourTo(i, SCE_H_SGML_ERROR);
833 }
834 state = SCE_H_SGML_DEFAULT;
835 } else if (scriptLanguage == eScriptSGMLblock) {
836 if ((ch == '!') && (chPrev == '<')) {
837 styler.ColourTo(i - 2, StateToPrint);
838 styler.ColourTo(i, SCE_H_SGML_DEFAULT);
839 state = SCE_H_SGML_COMMAND;
840 } else if (ch == '>') {
841 styler.ColourTo(i - 1, StateToPrint);
842 styler.ColourTo(i, SCE_H_SGML_DEFAULT);
843 }
844 }
845 break;
846 case SCE_H_SGML_COMMAND:
847 if ((ch == '-') && (chPrev == '-')) {
848 styler.ColourTo(i - 2, StateToPrint);
849 state = SCE_H_SGML_COMMENT;
850 } else if (!issgmlwordchar(ch)) {
851 if (isWordHSGML(styler.GetStartSegment(), i - 1, keywords6, styler)) {
852 styler.ColourTo(i - 1, StateToPrint);
853 state = SCE_H_SGML_1ST_PARAM;
854 } else {
855 state = SCE_H_SGML_ERROR;
856 }
857 }
858 break;
859 case SCE_H_SGML_1ST_PARAM:
860 // wait for the beginning of the word
861 if ((ch == '-') && (chPrev == '-')) {
862 if (scriptLanguage == eScriptSGMLblock) {
863 styler.ColourTo(i - 2, SCE_H_SGML_BLOCK_DEFAULT);
864 } else {
865 styler.ColourTo(i - 2, SCE_H_SGML_DEFAULT);
866 }
867 state = SCE_H_SGML_1ST_PARAM_COMMENT;
868 } else if (issgmlwordchar(ch)) {
869 if (scriptLanguage == eScriptSGMLblock) {
870 styler.ColourTo(i - 1, SCE_H_SGML_BLOCK_DEFAULT);
871 } else {
872 styler.ColourTo(i - 1, SCE_H_SGML_DEFAULT);
873 }
874 // find the length of the word
875 int size = 1;
876 while (ishtmlwordchar(styler.SafeGetCharAt(i + size)))
877 size++;
878 styler.ColourTo(i + size - 1, StateToPrint);
879 i += size - 1;
880 visibleChars += size - 1;
881 ch = styler.SafeGetCharAt(i);
882 if (scriptLanguage == eScriptSGMLblock) {
883 state = SCE_H_SGML_BLOCK_DEFAULT;
884 } else {
885 state = SCE_H_SGML_DEFAULT;
886 }
887 continue;
888 }
889 break;
890 case SCE_H_SGML_ERROR:
891 if ((ch == '-') && (chPrev == '-')) {
892 styler.ColourTo(i - 2, StateToPrint);
893 state = SCE_H_SGML_COMMENT;
894 }
895 case SCE_H_SGML_DOUBLESTRING:
896 if (ch == '\"') {
897 styler.ColourTo(i, StateToPrint);
898 state = SCE_H_SGML_DEFAULT;
899 }
900 break;
901 case SCE_H_SGML_SIMPLESTRING:
902 if (ch == '\'') {
903 styler.ColourTo(i, StateToPrint);
904 state = SCE_H_SGML_DEFAULT;
905 }
906 break;
907 case SCE_H_SGML_COMMENT:
908 if ((ch == '-') && (chPrev == '-')) {
909 styler.ColourTo(i, StateToPrint);
910 state = SCE_H_SGML_DEFAULT;
911 }
912 break;
913 case SCE_H_CDATA:
914 if ((chPrev2 == ']') && (chPrev == ']') && (ch == '>')) {
915 styler.ColourTo(i, StateToPrint);
916 state = SCE_H_DEFAULT;
917 levelCurrent--;
918 }
919 break;
920 case SCE_H_COMMENT:
921 if ((chPrev2 == '-') && (chPrev == '-') && (ch == '>')) {
922 styler.ColourTo(i, StateToPrint);
923 state = SCE_H_DEFAULT;
924 levelCurrent--;
925 }
926 break;
927 case SCE_H_SGML_1ST_PARAM_COMMENT:
928 if ((ch == '-') && (chPrev == '-')) {
929 styler.ColourTo(i, SCE_H_SGML_COMMENT);
930 state = SCE_H_SGML_1ST_PARAM;
931 }
932 break;
933 case SCE_H_SGML_SPECIAL:
934 if (!(isascii(ch) && isupper(ch))) {
935 styler.ColourTo(i - 1, StateToPrint);
936 if (isalnum(ch)) {
937 state = SCE_H_SGML_ERROR;
938 } else {
939 state = SCE_H_SGML_DEFAULT;
940 }
941 }
942 break;
943 case SCE_H_SGML_ENTITY:
944 if (ch == ';') {
945 styler.ColourTo(i, StateToPrint);
946 state = SCE_H_SGML_DEFAULT;
947 } else if (!(isascii(ch) && isalnum(ch)) && ch != '-' && ch != '.') {
948 styler.ColourTo(i, SCE_H_SGML_ERROR);
949 state = SCE_H_SGML_DEFAULT;
950 }
951 break;
952 case SCE_H_ENTITY:
953 if (ch == ';') {
954 styler.ColourTo(i, StateToPrint);
955 state = SCE_H_DEFAULT;
956 }
957 if (ch != '#' && !(isascii(ch) && isalnum(ch)) // Should check that '#' follows '&', but it is unlikely anyway...
958 && ch != '.' && ch != '-' && ch != '_' && ch != ':') { // valid in XML
959 styler.ColourTo(i, SCE_H_TAGUNKNOWN);
960 state = SCE_H_DEFAULT;
961 }
962 break;
963 case SCE_H_TAGUNKNOWN:
964 if (!ishtmlwordchar(ch) && !((ch == '/') && (chPrev == '<')) && ch != '[') {
965 int eClass = classifyTagHTML(styler.GetStartSegment(),
966 i - 1, keywords, styler, tagDontFold, caseSensitive);
967 if (eClass == SCE_H_SCRIPT) {
968 if (!tagClosing) {
969 inScriptType = eNonHtmlScript;
970 scriptLanguage = clientScript;
971 eClass = SCE_H_TAG;
972 } else {
973 scriptLanguage = eScriptNone;
974 eClass = SCE_H_TAG;
975 }
976 }
977 if (ch == '>') {
978 styler.ColourTo(i, eClass);
979 if (inScriptType == eNonHtmlScript) {
980 state = StateForScript(scriptLanguage);
981 } else {
982 state = SCE_H_DEFAULT;
983 }
984 tagOpened = false;
985 if (!tagDontFold){
986 if (tagClosing) {
987 levelCurrent--;
988 } else {
989 levelCurrent++;
990 }
991 }
992 tagClosing = false;
993 } else if (ch == '/' && chNext == '>') {
994 if (eClass == SCE_H_TAGUNKNOWN) {
995 styler.ColourTo(i + 1, SCE_H_TAGUNKNOWN);
996 } else {
997 styler.ColourTo(i - 1, StateToPrint);
998 styler.ColourTo(i + 1, SCE_H_TAGEND);
999 }
1000 i++;
1001 ch = chNext;
1002 state = SCE_H_DEFAULT;
1003 tagOpened = false;
1004 } else {
1005 if (eClass != SCE_H_TAGUNKNOWN) {
1006 if (eClass == SCE_H_SGML_DEFAULT) {
1007 state = SCE_H_SGML_DEFAULT;
1008 } else {
1009 state = SCE_H_OTHER;
1010 }
1011 }
1012 }
1013 }
1014 break;
1015 case SCE_H_ATTRIBUTE:
1016 if (!ishtmlwordchar(ch) && ch != '/' && ch != '-') {
1017 if (inScriptType == eNonHtmlScript) {
1018 int scriptLanguagePrev = scriptLanguage;
1019 clientScript = segIsScriptingIndicator(styler, styler.GetStartSegment(), i - 1, scriptLanguage);
1020 scriptLanguage = clientScript;
1021 if ((scriptLanguagePrev != scriptLanguage) && (scriptLanguage == eScriptNone))
1022 inScriptType = eHtml;
1023 }
1024 classifyAttribHTML(styler.GetStartSegment(), i - 1, keywords, styler);
1025 if (ch == '>') {
1026 styler.ColourTo(i, SCE_H_TAG);
1027 if (inScriptType == eNonHtmlScript) {
1028 state = StateForScript(scriptLanguage);
1029 } else {
1030 state = SCE_H_DEFAULT;
1031 }
1032 tagOpened = false;
1033 if (!tagDontFold){
1034 if (tagClosing){
1035 levelCurrent--;
1036 } else {
1037 levelCurrent++;
1038 }
1039 }
1040 tagClosing = false;
1041 } else if (ch == '=') {
1042 styler.ColourTo(i, SCE_H_OTHER);
1043 state = SCE_H_VALUE;
1044 } else {
1045 state = SCE_H_OTHER;
1046 }
1047 }
1048 break;
1049 case SCE_H_OTHER:
1050 if (ch == '>') {
1051 styler.ColourTo(i - 1, StateToPrint);
1052 styler.ColourTo(i, SCE_H_TAG);
1053 if (inScriptType == eNonHtmlScript) {
1054 state = StateForScript(scriptLanguage);
1055 } else {
1056 state = SCE_H_DEFAULT;
1057 }
1058 tagOpened = false;
1059 if (!tagDontFold){
1060 if (tagClosing){
1061 levelCurrent--;
1062 } else {
1063 levelCurrent++;
1064 }
1065 }
1066 tagClosing = false;
1067 } else if (ch == '\"') {
1068 styler.ColourTo(i - 1, StateToPrint);
1069 state = SCE_H_DOUBLESTRING;
1070 } else if (ch == '\'') {
1071 styler.ColourTo(i - 1, StateToPrint);
1072 state = SCE_H_SINGLESTRING;
1073 } else if (ch == '=') {
1074 styler.ColourTo(i, StateToPrint);
1075 state = SCE_H_VALUE;
1076 } else if (ch == '/' && chNext == '>') {
1077 styler.ColourTo(i - 1, StateToPrint);
1078 styler.ColourTo(i + 1, SCE_H_TAGEND);
1079 i++;
1080 ch = chNext;
1081 state = SCE_H_DEFAULT;
1082 tagOpened = false;
1083 } else if (ch == '?' && chNext == '>') {
1084 styler.ColourTo(i - 1, StateToPrint);
1085 styler.ColourTo(i + 1, SCE_H_XMLEND);
1086 i++;
1087 ch = chNext;
1088 state = SCE_H_DEFAULT;
1089 } else if (ishtmlwordchar(ch)) {
1090 styler.ColourTo(i - 1, StateToPrint);
1091 state = SCE_H_ATTRIBUTE;
1092 }
1093 break;
1094 case SCE_H_DOUBLESTRING:
1095 if (ch == '\"') {
1096 if (inScriptType == eNonHtmlScript) {
1097 scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1098 }
1099 styler.ColourTo(i, SCE_H_DOUBLESTRING);
1100 state = SCE_H_OTHER;
1101 }
1102 break;
1103 case SCE_H_SINGLESTRING:
1104 if (ch == '\'') {
1105 if (inScriptType == eNonHtmlScript) {
1106 scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1107 }
1108 styler.ColourTo(i, SCE_H_SINGLESTRING);
1109 state = SCE_H_OTHER;
1110 }
1111 break;
1112 case SCE_H_VALUE:
1113 if (!ishtmlwordchar(ch)) {
1114 if (ch == '\"' && chPrev == '=') {
1115 // Should really test for being first character
1116 state = SCE_H_DOUBLESTRING;
1117 } else if (ch == '\'' && chPrev == '=') {
1118 state = SCE_H_SINGLESTRING;
1119 } else {
1120 if (IsNumber(styler.GetStartSegment(), styler)) {
1121 styler.ColourTo(i - 1, SCE_H_NUMBER);
1122 } else {
1123 styler.ColourTo(i - 1, StateToPrint);
1124 }
1125 if (ch == '>') {
1126 styler.ColourTo(i, SCE_H_TAG);
1127 if (inScriptType == eNonHtmlScript) {
1128 state = StateForScript(scriptLanguage);
1129 } else {
1130 state = SCE_H_DEFAULT;
1131 }
1132 tagOpened = false;
1133 if (!tagDontFold){
1134 if (tagClosing){
1135 levelCurrent--;
1136 } else {
1137 levelCurrent++;
1138 }
1139 }
1140 tagClosing = false;
1141 } else {
1142 state = SCE_H_OTHER;
1143 }
1144 }
1145 }
1146 break;
1147 case SCE_HJ_DEFAULT:
1148 case SCE_HJ_START:
1149 case SCE_HJ_SYMBOLS:
1150 if (iswordstart(ch)) {
1151 styler.ColourTo(i - 1, StateToPrint);
1152 state = SCE_HJ_WORD;
1153 } else if (ch == '/' && chNext == '*') {
1154 styler.ColourTo(i - 1, StateToPrint);
1155 if (chNext2 == '*')
1156 state = SCE_HJ_COMMENTDOC;
1157 else
1158 state = SCE_HJ_COMMENT;
1159 } else if (ch == '/' && chNext == '/') {
1160 styler.ColourTo(i - 1, StateToPrint);
1161 state = SCE_HJ_COMMENTLINE;
1162 } else if (ch == '/' && isOKBeforeRE(chPrevNonWhite)) {
1163 styler.ColourTo(i - 1, StateToPrint);
1164 state = SCE_HJ_REGEX;
1165 } else if (ch == '\"') {
1166 styler.ColourTo(i - 1, StateToPrint);
1167 state = SCE_HJ_DOUBLESTRING;
1168 } else if (ch == '\'') {
1169 styler.ColourTo(i - 1, StateToPrint);
1170 state = SCE_HJ_SINGLESTRING;
1171 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1172 styler.SafeGetCharAt(i + 3) == '-') {
1173 styler.ColourTo(i - 1, StateToPrint);
1174 state = SCE_HJ_COMMENTLINE;
1175 } else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1176 styler.ColourTo(i - 1, StateToPrint);
1177 state = SCE_HJ_COMMENTLINE;
1178 i += 2;
1179 } else if (isoperator(ch)) {
1180 styler.ColourTo(i - 1, StateToPrint);
1181 styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1182 state = SCE_HJ_DEFAULT;
1183 } else if ((ch == ' ') || (ch == '\t')) {
1184 if (state == SCE_HJ_START) {
1185 styler.ColourTo(i - 1, StateToPrint);
1186 state = SCE_HJ_DEFAULT;
1187 }
1188 }
1189 break;
1190 case SCE_HJ_WORD:
1191 if (!iswordchar(ch)) {
1192 classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
1193 //styler.ColourTo(i - 1, eHTJSKeyword);
1194 state = SCE_HJ_DEFAULT;
1195 if (ch == '/' && chNext == '*') {
1196 if (chNext2 == '*')
1197 state = SCE_HJ_COMMENTDOC;
1198 else
1199 state = SCE_HJ_COMMENT;
1200 } else if (ch == '/' && chNext == '/') {
1201 state = SCE_HJ_COMMENTLINE;
1202 } else if (ch == '\"') {
1203 state = SCE_HJ_DOUBLESTRING;
1204 } else if (ch == '\'') {
1205 state = SCE_HJ_SINGLESTRING;
1206 } else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1207 styler.ColourTo(i - 1, StateToPrint);
1208 state = SCE_HJ_COMMENTLINE;
1209 i += 2;
1210 } else if (isoperator(ch)) {
1211 styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1212 state = SCE_HJ_DEFAULT;
1213 }
1214 }
1215 break;
1216 case SCE_HJ_COMMENT:
1217 case SCE_HJ_COMMENTDOC:
1218 if (ch == '/' && chPrev == '*') {
1219 styler.ColourTo(i, StateToPrint);
1220 state = SCE_HJ_DEFAULT;
1221 }
1222 break;
1223 case SCE_HJ_COMMENTLINE:
1224 if (ch == '\r' || ch == '\n') {
1225 styler.ColourTo(i - 1, statePrintForState(SCE_HJ_COMMENTLINE, inScriptType));
1226 state = SCE_HJ_DEFAULT;
1227 }
1228 break;
1229 case SCE_HJ_DOUBLESTRING:
1230 if (ch == '\\') {
1231 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1232 i++;
1233 }
1234 } else if (ch == '\"') {
1235 styler.ColourTo(i, statePrintForState(SCE_HJ_DOUBLESTRING, inScriptType));
1236 state = SCE_HJ_DEFAULT;
1237 } else if ((inScriptType == eNonHtmlScript) && (ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1238 styler.ColourTo(i - 1, StateToPrint);
1239 state = SCE_HJ_COMMENTLINE;
1240 i += 2;
1241 } else if (isLineEnd(ch)) {
1242 styler.ColourTo(i - 1, StateToPrint);
1243 state = SCE_HJ_STRINGEOL;
1244 }
1245 break;
1246 case SCE_HJ_SINGLESTRING:
1247 if (ch == '\\') {
1248 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1249 i++;
1250 }
1251 } else if (ch == '\'') {
1252 styler.ColourTo(i, statePrintForState(SCE_HJ_SINGLESTRING, inScriptType));
1253 state = SCE_HJ_DEFAULT;
1254 } else if ((inScriptType == eNonHtmlScript) && (ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1255 styler.ColourTo(i - 1, StateToPrint);
1256 state = SCE_HJ_COMMENTLINE;
1257 i += 2;
1258 } else if (isLineEnd(ch)) {
1259 styler.ColourTo(i - 1, StateToPrint);
1260 state = SCE_HJ_STRINGEOL;
1261 }
1262 break;
1263 case SCE_HJ_STRINGEOL:
1264 if (!isLineEnd(ch)) {
1265 styler.ColourTo(i - 1, StateToPrint);
1266 state = SCE_HJ_DEFAULT;
1267 } else if (!isLineEnd(chNext)) {
1268 styler.ColourTo(i, StateToPrint);
1269 state = SCE_HJ_DEFAULT;
1270 }
1271 break;
1272 case SCE_HJ_REGEX:
1273 if (ch == '\r' || ch == '\n' || ch == '/') {
1274 styler.ColourTo(i, StateToPrint);
1275 state = SCE_HJ_DEFAULT;
1276 } else if (ch == '\\') {
1277 // Gobble up the quoted character
1278 if (chNext == '\\' || chNext == '/') {
1279 i++;
1280 ch = chNext;
1281 chNext = styler.SafeGetCharAt(i + 1);
1282 }
1283 }
1284 break;
1285 case SCE_HB_DEFAULT:
1286 case SCE_HB_START:
1287 if (iswordstart(ch)) {
1288 styler.ColourTo(i - 1, StateToPrint);
1289 state = SCE_HB_WORD;
1290 } else if (ch == '\'') {
1291 styler.ColourTo(i - 1, StateToPrint);
1292 state = SCE_HB_COMMENTLINE;
1293 } else if (ch == '\"') {
1294 styler.ColourTo(i - 1, StateToPrint);
1295 state = SCE_HB_STRING;
1296 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1297 styler.SafeGetCharAt(i + 3) == '-') {
1298 styler.ColourTo(i - 1, StateToPrint);
1299 state = SCE_HB_COMMENTLINE;
1300 } else if (isoperator(ch)) {
1301 styler.ColourTo(i - 1, StateToPrint);
1302 styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
1303 state = SCE_HB_DEFAULT;
1304 } else if ((ch == ' ') || (ch == '\t')) {
1305 if (state == SCE_HB_START) {
1306 styler.ColourTo(i - 1, StateToPrint);
1307 state = SCE_HB_DEFAULT;
1308 }
1309 }
1310 break;
1311 case SCE_HB_WORD:
1312 if (!iswordchar(ch)) {
1313 state = classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
1314 if (state == SCE_HB_DEFAULT) {
1315 if (ch == '\"') {
1316 state = SCE_HB_STRING;
1317 } else if (ch == '\'') {
1318 state = SCE_HB_COMMENTLINE;
1319 } else if (isoperator(ch)) {
1320 styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
1321 state = SCE_HB_DEFAULT;
1322 }
1323 }
1324 }
1325 break;
1326 case SCE_HB_STRING:
1327 if (ch == '\"') {
1328 styler.ColourTo(i, StateToPrint);
1329 state = SCE_HB_DEFAULT;
1330 } else if (ch == '\r' || ch == '\n') {
1331 styler.ColourTo(i - 1, StateToPrint);
1332 state = SCE_HB_STRINGEOL;
1333 }
1334 break;
1335 case SCE_HB_COMMENTLINE:
1336 if (ch == '\r' || ch == '\n') {
1337 styler.ColourTo(i - 1, StateToPrint);
1338 state = SCE_HB_DEFAULT;
1339 }
1340 break;
1341 case SCE_HB_STRINGEOL:
1342 if (!isLineEnd(ch)) {
1343 styler.ColourTo(i - 1, StateToPrint);
1344 state = SCE_HB_DEFAULT;
1345 } else if (!isLineEnd(chNext)) {
1346 styler.ColourTo(i, StateToPrint);
1347 state = SCE_HB_DEFAULT;
1348 }
1349 break;
1350 case SCE_HP_DEFAULT:
1351 case SCE_HP_START:
1352 if (iswordstart(ch)) {
1353 styler.ColourTo(i - 1, StateToPrint);
1354 state = SCE_HP_WORD;
1355 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1356 styler.SafeGetCharAt(i + 3) == '-') {
1357 styler.ColourTo(i - 1, StateToPrint);
1358 state = SCE_HP_COMMENTLINE;
1359 } else if (ch == '#') {
1360 styler.ColourTo(i - 1, StateToPrint);
1361 state = SCE_HP_COMMENTLINE;
1362 } else if (ch == '\"') {
1363 styler.ColourTo(i - 1, StateToPrint);
1364 if (chNext == '\"' && chNext2 == '\"') {
1365 i += 2;
1366 state = SCE_HP_TRIPLEDOUBLE;
1367 ch = ' ';
1368 chPrev = ' ';
1369 chNext = styler.SafeGetCharAt(i + 1);
1370 } else {
1371 // state = statePrintForState(SCE_HP_STRING,inScriptType);
1372 state = SCE_HP_STRING;
1373 }
1374 } else if (ch == '\'') {
1375 styler.ColourTo(i - 1, StateToPrint);
1376 if (chNext == '\'' && chNext2 == '\'') {
1377 i += 2;
1378 state = SCE_HP_TRIPLE;
1379 ch = ' ';
1380 chPrev = ' ';
1381 chNext = styler.SafeGetCharAt(i + 1);
1382 } else {
1383 state = SCE_HP_CHARACTER;
1384 }
1385 } else if (isoperator(ch)) {
1386 styler.ColourTo(i - 1, StateToPrint);
1387 styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
1388 } else if ((ch == ' ') || (ch == '\t')) {
1389 if (state == SCE_HP_START) {
1390 styler.ColourTo(i - 1, StateToPrint);
1391 state = SCE_HP_DEFAULT;
1392 }
1393 }
1394 break;
1395 case SCE_HP_WORD:
1396 if (!iswordchar(ch)) {
1397 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
1398 state = SCE_HP_DEFAULT;
1399 if (ch == '#') {
1400 state = SCE_HP_COMMENTLINE;
1401 } else if (ch == '\"') {
1402 if (chNext == '\"' && chNext2 == '\"') {
1403 i += 2;
1404 state = SCE_HP_TRIPLEDOUBLE;
1405 ch = ' ';
1406 chPrev = ' ';
1407 chNext = styler.SafeGetCharAt(i + 1);
1408 } else {
1409 state = SCE_HP_STRING;
1410 }
1411 } else if (ch == '\'') {
1412 if (chNext == '\'' && chNext2 == '\'') {
1413 i += 2;
1414 state = SCE_HP_TRIPLE;
1415 ch = ' ';
1416 chPrev = ' ';
1417 chNext = styler.SafeGetCharAt(i + 1);
1418 } else {
1419 state = SCE_HP_CHARACTER;
1420 }
1421 } else if (isoperator(ch)) {
1422 styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
1423 }
1424 }
1425 break;
1426 case SCE_HP_COMMENTLINE:
1427 if (ch == '\r' || ch == '\n') {
1428 styler.ColourTo(i - 1, StateToPrint);
1429 state = SCE_HP_DEFAULT;
1430 }
1431 break;
1432 case SCE_HP_STRING:
1433 if (ch == '\\') {
1434 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1435 i++;
1436 ch = chNext;
1437 chNext = styler.SafeGetCharAt(i + 1);
1438 }
1439 } else if (ch == '\"') {
1440 styler.ColourTo(i, StateToPrint);
1441 state = SCE_HP_DEFAULT;
1442 }
1443 break;
1444 case SCE_HP_CHARACTER:
1445 if (ch == '\\') {
1446 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1447 i++;
1448 ch = chNext;
1449 chNext = styler.SafeGetCharAt(i + 1);
1450 }
1451 } else if (ch == '\'') {
1452 styler.ColourTo(i, StateToPrint);
1453 state = SCE_HP_DEFAULT;
1454 }
1455 break;
1456 case SCE_HP_TRIPLE:
1457 if (ch == '\'' && chPrev == '\'' && chPrev2 == '\'') {
1458 styler.ColourTo(i, StateToPrint);
1459 state = SCE_HP_DEFAULT;
1460 }
1461 break;
1462 case SCE_HP_TRIPLEDOUBLE:
1463 if (ch == '\"' && chPrev == '\"' && chPrev2 == '\"') {
1464 styler.ColourTo(i, StateToPrint);
1465 state = SCE_HP_DEFAULT;
1466 }
1467 break;
1468 ///////////// start - PHP state handling
1469 case SCE_HPHP_WORD:
1470 if (!iswordchar(ch)) {
1471 classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
1472 if (ch == '/' && chNext == '*') {
1473 i++;
1474 state = SCE_HPHP_COMMENT;
1475 } else if (ch == '/' && chNext == '/') {
1476 i++;
1477 state = SCE_HPHP_COMMENTLINE;
1478 } else if (ch == '#') {
1479 state = SCE_HPHP_COMMENTLINE;
1480 } else if (ch == '\"') {
1481 state = SCE_HPHP_HSTRING;
1482 strcpy(phpStringDelimiter, "\"");
1483 } else if (styler.Match(i, "<<<")) {
1484 state = SCE_HPHP_HSTRING;
1485 i = FindPhpStringDelimiter(phpStringDelimiter, sizeof(phpStringDelimiter), i + 3, lengthDoc, styler);
1486 } else if (ch == '\'') {
1487 state = SCE_HPHP_SIMPLESTRING;
1488 } else if (ch == '$' && IsPhpWordStart(chNext)) {
1489 state = SCE_HPHP_VARIABLE;
1490 } else if (isoperator(ch)) {
1491 state = SCE_HPHP_OPERATOR;
1492 } else {
1493 state = SCE_HPHP_DEFAULT;
1494 }
1495 }
1496 break;
1497 case SCE_HPHP_NUMBER:
1498 if (!IsADigit(ch)) {
1499 styler.ColourTo(i - 1, SCE_HPHP_NUMBER);
1500 if (isoperator(ch))
1501 state = SCE_HPHP_OPERATOR;
1502 else
1503 state = SCE_HPHP_DEFAULT;
1504 }
1505 break;
1506 case SCE_HPHP_VARIABLE:
1507 if (!IsPhpWordChar(ch)) {
1508 styler.ColourTo(i - 1, SCE_HPHP_VARIABLE);
1509 if (isoperator(ch))
1510 state = SCE_HPHP_OPERATOR;
1511 else
1512 state = SCE_HPHP_DEFAULT;
1513 }
1514 break;
1515 case SCE_HPHP_COMMENT:
1516 if (ch == '/' && chPrev == '*') {
1517 styler.ColourTo(i, StateToPrint);
1518 state = SCE_HPHP_DEFAULT;
1519 }
1520 break;
1521 case SCE_HPHP_COMMENTLINE:
1522 if (ch == '\r' || ch == '\n') {
1523 styler.ColourTo(i - 1, StateToPrint);
1524 state = SCE_HPHP_DEFAULT;
1525 }
1526 break;
1527 case SCE_HPHP_HSTRING:
1528 if (ch == '\\' && (phpStringDelimiter[0] == '\"' || chNext == '$' || chNext == '{')) {
1529 // skip the next char
1530 i++;
1531 } else if (((ch == '{' && chNext == '$') || (ch == '$' && chNext == '{'))
1532 && IsPhpWordStart(chNext2)) {
1533 styler.ColourTo(i - 1, StateToPrint);
1534 state = SCE_HPHP_COMPLEX_VARIABLE;
1535 } else if (ch == '$' && IsPhpWordStart(chNext)) {
1536 styler.ColourTo(i - 1, StateToPrint);
1537 state = SCE_HPHP_HSTRING_VARIABLE;
1538 } else if (styler.Match(i, phpStringDelimiter)) {
1539 if (strlen(phpStringDelimiter) > 1)
1540 i += strlen(phpStringDelimiter) - 1;
1541 styler.ColourTo(i, StateToPrint);
1542 state = SCE_HPHP_DEFAULT;
1543 }
1544 break;
1545 case SCE_HPHP_SIMPLESTRING:
1546 if (ch == '\\') {
1547 // skip the next char
1548 i++;
1549 } else if (ch == '\'') {
1550 styler.ColourTo(i, StateToPrint);
1551 state = SCE_HPHP_DEFAULT;
1552 }
1553 break;
1554 case SCE_HPHP_HSTRING_VARIABLE:
1555 if (!IsPhpWordChar(ch)) {
1556 styler.ColourTo(i - 1, StateToPrint);
1557 i--; // strange but it works
1558 state = SCE_HPHP_HSTRING;
1559 }
1560 break;
1561 case SCE_HPHP_COMPLEX_VARIABLE:
1562 if (ch == '}') {
1563 styler.ColourTo(i, StateToPrint);
1564 state = SCE_HPHP_HSTRING;
1565 }
1566 break;
1567 case SCE_HPHP_OPERATOR:
1568 case SCE_HPHP_DEFAULT:
1569 styler.ColourTo(i - 1, StateToPrint);
1570 if (IsADigit(ch)) {
1571 state = SCE_HPHP_NUMBER;
1572 } else if (iswordstart(ch)) {
1573 state = SCE_HPHP_WORD;
1574 } else if (ch == '/' && chNext == '*') {
1575 i++;
1576 state = SCE_HPHP_COMMENT;
1577 } else if (ch == '/' && chNext == '/') {
1578 i++;
1579 state = SCE_HPHP_COMMENTLINE;
1580 } else if (ch == '#') {
1581 state = SCE_HPHP_COMMENTLINE;
1582 } else if (ch == '\"') {
1583 state = SCE_HPHP_HSTRING;
1584 strcpy(phpStringDelimiter, "\"");
1585 } else if (styler.Match(i, "<<<")) {
1586 state = SCE_HPHP_HSTRING;
1587 i = FindPhpStringDelimiter(phpStringDelimiter, sizeof(phpStringDelimiter), i + 3, lengthDoc, styler);
1588 } else if (ch == '\'') {
1589 state = SCE_HPHP_SIMPLESTRING;
1590 } else if (ch == '$' && IsPhpWordStart(chNext)) {
1591 state = SCE_HPHP_VARIABLE;
1592 } else if (isoperator(ch)) {
1593 state = SCE_HPHP_OPERATOR;
1594 } else if ((state == SCE_HPHP_OPERATOR) && (isspacechar(ch))) {
1595 state = SCE_HPHP_DEFAULT;
1596 }
1597 break;
1598 ///////////// end - PHP state handling
1599 }
1600
1601 // Some of the above terminated their lexeme but since the same character starts
1602 // the same class again, only reenter if non empty segment.
1603
1604 bool nonEmptySegment = i >= static_cast<int>(styler.GetStartSegment());
1605 if (state == SCE_HB_DEFAULT) { // One of the above succeeded
1606 if ((ch == '\"') && (nonEmptySegment)) {
1607 state = SCE_HB_STRING;
1608 } else if (ch == '\'') {
1609 state = SCE_HB_COMMENTLINE;
1610 } else if (iswordstart(ch)) {
1611 state = SCE_HB_WORD;
1612 } else if (isoperator(ch)) {
1613 styler.ColourTo(i, SCE_HB_DEFAULT);
1614 }
1615 } else if (state == SCE_HBA_DEFAULT) { // One of the above succeeded
1616 if ((ch == '\"') && (nonEmptySegment)) {
1617 state = SCE_HBA_STRING;
1618 } else if (ch == '\'') {
1619 state = SCE_HBA_COMMENTLINE;
1620 } else if (iswordstart(ch)) {
1621 state = SCE_HBA_WORD;
1622 } else if (isoperator(ch)) {
1623 styler.ColourTo(i, SCE_HBA_DEFAULT);
1624 }
1625 } else if (state == SCE_HJ_DEFAULT) { // One of the above succeeded
1626 if (ch == '/' && chNext == '*') {
1627 if (styler.SafeGetCharAt(i + 2) == '*')
1628 state = SCE_HJ_COMMENTDOC;
1629 else
1630 state = SCE_HJ_COMMENT;
1631 } else if (ch == '/' && chNext == '/') {
1632 state = SCE_HJ_COMMENTLINE;
1633 } else if ((ch == '\"') && (nonEmptySegment)) {
1634 state = SCE_HJ_DOUBLESTRING;
1635 } else if ((ch == '\'') && (nonEmptySegment)) {
1636 state = SCE_HJ_SINGLESTRING;
1637 } else if (iswordstart(ch)) {
1638 state = SCE_HJ_WORD;
1639 } else if (isoperator(ch)) {
1640 styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1641 }
1642 }
1643 }
1644
1645 StateToPrint = statePrintForState(state, inScriptType);
1646 styler.ColourTo(lengthDoc - 1, StateToPrint);
1647
1648 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
1649 if (fold) {
1650 int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
1651 styler.SetLevel(lineCurrent, levelPrev | flagsNext);
1652 }
1653 }
1654
1655 static bool isASPScript(int state) {
1656 return
1657 (state >= SCE_HJA_START && state <= SCE_HJA_REGEX) ||
1658 (state >= SCE_HBA_START && state <= SCE_HBA_STRINGEOL) ||
1659 (state >= SCE_HPA_DEFAULT && state <= SCE_HPA_IDENTIFIER);
1660 }
1661
1662 static void ColouriseHBAPiece(StyleContext &sc, WordList *keywordlists[]) {
1663 WordList &keywordsVBS = *keywordlists[2];
1664 if (sc.state == SCE_HBA_WORD) {
1665 if (!IsAWordChar(sc.ch)) {
1666 char s[100];
1667 sc.GetCurrentLowered(s, sizeof(s));
1668 if (keywordsVBS.InList(s)) {
1669 if (strcmp(s, "rem") == 0) {
1670 sc.ChangeState(SCE_HBA_COMMENTLINE);
1671 if (sc.atLineEnd) {
1672 sc.SetState(SCE_HBA_DEFAULT);
1673 }
1674 } else {
1675 sc.SetState(SCE_HBA_DEFAULT);
1676 }
1677 } else {
1678 sc.ChangeState(SCE_HBA_IDENTIFIER);
1679 sc.SetState(SCE_HBA_DEFAULT);
1680 }
1681 }
1682 } else if (sc.state == SCE_HBA_NUMBER) {
1683 if (!IsAWordChar(sc.ch)) {
1684 sc.SetState(SCE_HBA_DEFAULT);
1685 }
1686 } else if (sc.state == SCE_HBA_STRING) {
1687 if (sc.ch == '\"') {
1688 sc.ForwardSetState(SCE_HBA_DEFAULT);
1689 } else if (sc.ch == '\r' || sc.ch == '\n') {
1690 sc.ChangeState(SCE_HBA_STRINGEOL);
1691 sc.ForwardSetState(SCE_HBA_DEFAULT);
1692 }
1693 } else if (sc.state == SCE_HBA_COMMENTLINE) {
1694 if (sc.ch == '\r' || sc.ch == '\n') {
1695 sc.SetState(SCE_HBA_DEFAULT);
1696 }
1697 }
1698
1699 if (sc.state == SCE_HBA_DEFAULT) {
1700 if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
1701 sc.SetState(SCE_HBA_NUMBER);
1702 } else if (IsAWordStart(sc.ch)) {
1703 sc.SetState(SCE_HBA_WORD);
1704 } else if (sc.ch == '\'') {
1705 sc.SetState(SCE_HBA_COMMENTLINE);
1706 } else if (sc.ch == '\"') {
1707 sc.SetState(SCE_HBA_STRING);
1708 }
1709 }
1710 }
1711
1712 static void ColouriseHTMLPiece(StyleContext &sc, WordList *keywordlists[]) {
1713 WordList &keywordsTags = *keywordlists[0];
1714 if (sc.state == SCE_H_COMMENT) {
1715 if (sc.Match("-->")) {
1716 sc.Forward();
1717 sc.Forward();
1718 sc.ForwardSetState(SCE_H_DEFAULT);
1719 }
1720 } else if (sc.state == SCE_H_ENTITY) {
1721 if (sc.ch == ';') {
1722 sc.ForwardSetState(SCE_H_DEFAULT);
1723 } else if (sc.ch != '#' && (sc.ch < 0x80) && !isalnum(sc.ch) // Should check that '#' follows '&', but it is unlikely anyway...
1724 && sc.ch != '.' && sc.ch != '-' && sc.ch != '_' && sc.ch != ':') { // valid in XML
1725 sc.ChangeState(SCE_H_TAGUNKNOWN);
1726 sc.SetState(SCE_H_DEFAULT);
1727 }
1728 } else if (sc.state == SCE_H_TAGUNKNOWN) {
1729 if (!ishtmlwordchar(static_cast<char>(sc.ch)) && !((sc.ch == '/') && (sc.chPrev == '<')) && sc.ch != '[') {
1730 char s[100];
1731 sc.GetCurrentLowered(s, sizeof(s));
1732 if (s[1] == '/') {
1733 if (keywordsTags.InList(s + 2)) {
1734 sc.ChangeState(SCE_H_TAG);
1735 }
1736 } else {
1737 if (keywordsTags.InList(s + 1)) {
1738 sc.ChangeState(SCE_H_TAG);
1739 }
1740 }
1741 if (sc.ch == '>') {
1742 sc.ForwardSetState(SCE_H_DEFAULT);
1743 } else if (sc.Match('/', '>')) {
1744 sc.SetState(SCE_H_TAGEND);
1745 sc.Forward();
1746 sc.ForwardSetState(SCE_H_DEFAULT);
1747 } else {
1748 sc.SetState(SCE_H_OTHER);
1749 }
1750 }
1751 } else if (sc.state == SCE_H_ATTRIBUTE) {
1752 if (!ishtmlwordchar(static_cast<char>(sc.ch))) {
1753 char s[100];
1754 sc.GetCurrentLowered(s, sizeof(s));
1755 if (!keywordsTags.InList(s)) {
1756 sc.ChangeState(SCE_H_ATTRIBUTEUNKNOWN);
1757 }
1758 sc.SetState(SCE_H_OTHER);
1759 }
1760 } else if (sc.state == SCE_H_OTHER) {
1761 if (sc.ch == '>') {
1762 sc.SetState(SCE_H_TAG);
1763 sc.ForwardSetState(SCE_H_DEFAULT);
1764 } else if (sc.Match('/', '>')) {
1765 sc.SetState(SCE_H_TAG);
1766 sc.Forward();
1767 sc.ForwardSetState(SCE_H_DEFAULT);
1768 } else if (sc.chPrev == '=') {
1769 sc.SetState(SCE_H_VALUE);
1770 }
1771 } else if (sc.state == SCE_H_DOUBLESTRING) {
1772 if (sc.ch == '\"') {
1773 sc.ForwardSetState(SCE_H_OTHER);
1774 }
1775 } else if (sc.state == SCE_H_SINGLESTRING) {
1776 if (sc.ch == '\'') {
1777 sc.ForwardSetState(SCE_H_OTHER);
1778 }
1779 } else if (sc.state == SCE_H_NUMBER) {
1780 if (!IsADigit(sc.ch)) {
1781 sc.SetState(SCE_H_OTHER);
1782 }
1783 }
1784
1785 if (sc.state == SCE_H_DEFAULT) {
1786 if (sc.ch == '<') {
1787 if (sc.Match("<!--"))
1788 sc.SetState(SCE_H_COMMENT);
1789 else
1790 sc.SetState(SCE_H_TAGUNKNOWN);
1791 } else if (sc.ch == '&') {
1792 sc.SetState(SCE_H_ENTITY);
1793 }
1794 } else if ((sc.state == SCE_H_OTHER) || (sc.state == SCE_H_VALUE)) {
1795 if (sc.ch == '\"' && sc.chPrev == '=') {
1796 sc.SetState(SCE_H_DOUBLESTRING);
1797 } else if (sc.ch == '\'' && sc.chPrev == '=') {
1798 sc.SetState(SCE_H_SINGLESTRING);
1799 } else if (IsADigit(sc.ch)) {
1800 sc.SetState(SCE_H_NUMBER);
1801 } else if (sc.ch == '>') {
1802 sc.SetState(SCE_H_TAG);
1803 sc.ForwardSetState(SCE_H_DEFAULT);
1804 } else if (ishtmlwordchar(static_cast<char>(sc.ch))) {
1805 sc.SetState(SCE_H_ATTRIBUTE);
1806 }
1807 }
1808 }
1809
1810 static void ColouriseASPPiece(StyleContext &sc, WordList *keywordlists[]) {
1811 // Possibly exit current state to either SCE_H_DEFAULT or SCE_HBA_DEFAULT
1812 if ((sc.state == SCE_H_ASPAT || isASPScript(sc.state)) && sc.Match('%', '>')) {
1813 sc.SetState(SCE_H_ASP);
1814 sc.Forward();
1815 sc.ForwardSetState(SCE_H_DEFAULT);
1816 }
1817
1818 // Handle some ASP script
1819 if (sc.state >= SCE_HBA_START && sc.state <= SCE_HBA_STRINGEOL) {
1820 ColouriseHBAPiece(sc, keywordlists);
1821 } else if (sc.state >= SCE_H_DEFAULT && sc.state <= SCE_H_SGML_BLOCK_DEFAULT) {
1822 ColouriseHTMLPiece(sc, keywordlists);
1823 }
1824
1825 // Enter new sc.state
1826 if ((sc.state == SCE_H_DEFAULT) || (sc.state == SCE_H_TAGUNKNOWN)) {
1827 if (sc.Match('<', '%')) {
1828 if (sc.state == SCE_H_TAGUNKNOWN)
1829 sc.ChangeState(SCE_H_ASP);
1830 else
1831 sc.SetState(SCE_H_ASP);
1832 sc.Forward();
1833 sc.Forward();
1834 if (sc.ch == '@') {
1835 sc.ForwardSetState(SCE_H_ASPAT);
1836 } else {
1837 if (sc.ch == '=') {
1838 sc.Forward();
1839 }
1840 sc.SetState(SCE_HBA_DEFAULT);
1841 }
1842 }
1843 }
1844 }
1845
1846 static void ColouriseASPDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
1847 Accessor &styler) {
1848 // Lexer for HTML requires more lexical states (7 bits worth) than most lexers
1849 StyleContext sc(startPos, length, initStyle, styler, 0x7f);
1850 for (; sc.More(); sc.Forward()) {
1851 ColouriseASPPiece(sc, keywordlists);
1852 }
1853 sc.Complete();
1854 }
1855
1856 static void ColourisePHPPiece(StyleContext &sc, WordList *keywordlists[]) {
1857 // Possibly exit current state to either SCE_H_DEFAULT or SCE_HBA_DEFAULT
1858 if (sc.state >= SCE_HPHP_DEFAULT && sc.state <= SCE_HPHP_OPERATOR) {
1859 if (!isPHPStringState(sc.state) &&
1860 (sc.state != SCE_HPHP_COMMENT) &&
1861 (sc.Match('?', '>'))) {
1862 sc.SetState(SCE_H_QUESTION);
1863 sc.Forward();
1864 sc.ForwardSetState(SCE_H_DEFAULT);
1865 }
1866 }
1867
1868 if (sc.state >= SCE_H_DEFAULT && sc.state <= SCE_H_SGML_BLOCK_DEFAULT) {
1869 ColouriseHTMLPiece(sc, keywordlists);
1870 }
1871
1872 // Handle some PHP script
1873 if (sc.state == SCE_HPHP_WORD) {
1874 if (!IsPhpWordChar(static_cast<char>(sc.ch))) {
1875 sc.SetState(SCE_HPHP_DEFAULT);
1876 }
1877 } else if (sc.state == SCE_HPHP_COMMENTLINE) {
1878 if (sc.ch == '\r' || sc.ch == '\n') {
1879 sc.SetState(SCE_HPHP_DEFAULT);
1880 }
1881 } else if (sc.state == SCE_HPHP_COMMENT) {
1882 if (sc.Match('*', '/')) {
1883 sc.Forward();
1884 sc.Forward();
1885 sc.SetState(SCE_HPHP_DEFAULT);
1886 }
1887 } else if (sc.state == SCE_HPHP_HSTRING) {
1888 if (sc.ch == '\"') {
1889 sc.ForwardSetState(SCE_HPHP_DEFAULT);
1890 }
1891 } else if (sc.state == SCE_HPHP_SIMPLESTRING) {
1892 if (sc.ch == '\'') {
1893 sc.ForwardSetState(SCE_HPHP_DEFAULT);
1894 }
1895 } else if (sc.state == SCE_HPHP_VARIABLE) {
1896 if (!IsPhpWordChar(static_cast<char>(sc.ch))) {
1897 sc.SetState(SCE_HPHP_DEFAULT);
1898 }
1899 } else if (sc.state == SCE_HPHP_OPERATOR) {
1900 sc.SetState(SCE_HPHP_DEFAULT);
1901 }
1902
1903 // Enter new sc.state
1904 if ((sc.state == SCE_H_DEFAULT) || (sc.state == SCE_H_TAGUNKNOWN)) {
1905 if (sc.Match("<?php")) {
1906 sc.SetState(SCE_H_QUESTION);
1907 sc.Forward();
1908 sc.Forward();
1909 sc.Forward();
1910 sc.Forward();
1911 sc.Forward();
1912 sc.SetState(SCE_HPHP_DEFAULT);
1913 }
1914 }
1915 if (sc.state == SCE_HPHP_DEFAULT) {
1916 if (IsPhpWordStart(static_cast<char>(sc.ch))) {
1917 sc.SetState(SCE_HPHP_WORD);
1918 } else if (sc.ch == '#') {
1919 sc.SetState(SCE_HPHP_COMMENTLINE);
1920 } else if (sc.Match("<!--")) {
1921 sc.SetState(SCE_HPHP_COMMENTLINE);
1922 } else if (sc.Match('/', '/')) {
1923 sc.SetState(SCE_HPHP_COMMENTLINE);
1924 } else if (sc.Match('/', '*')) {
1925 sc.SetState(SCE_HPHP_COMMENT);
1926 } else if (sc.ch == '\"') {
1927 sc.SetState(SCE_HPHP_HSTRING);
1928 } else if (sc.ch == '\'') {
1929 sc.SetState(SCE_HPHP_SIMPLESTRING);
1930 } else if (sc.ch == '$' && IsPhpWordStart(static_cast<char>(sc.chNext))) {
1931 sc.SetState(SCE_HPHP_VARIABLE);
1932 } else if (isoperator(static_cast<char>(sc.ch))) {
1933 sc.SetState(SCE_HPHP_OPERATOR);
1934 }
1935 }
1936 }
1937
1938 static void ColourisePHPDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
1939 Accessor &styler) {
1940 // Lexer for HTML requires more lexical states (7 bits worth) than most lexers
1941 StyleContext sc(startPos, length, initStyle, styler, 0x7f);
1942 for (; sc.More(); sc.Forward()) {
1943 ColourisePHPPiece(sc, keywordlists);
1944 }
1945 sc.Complete();
1946 }
1947
1948 static const char * const htmlWordListDesc[] = {
1949 "HTML elements and attributes",
1950 "JavaScript keywords",
1951 "VBScript keywords",
1952 "Python keywords",
1953 "PHP keywords",
1954 "SGML and DTD keywords",
1955 0,
1956 };
1957
1958 LexerModule lmHTML(SCLEX_HTML, ColouriseHyperTextDoc, "hypertext", 0, htmlWordListDesc);
1959 LexerModule lmXML(SCLEX_XML, ColouriseHyperTextDoc, "xml", 0, htmlWordListDesc);
1960 LexerModule lmASP(SCLEX_ASP, ColouriseASPDoc, "asp", 0, htmlWordListDesc);
1961 LexerModule lmPHP(SCLEX_PHP, ColourisePHPDoc, "php", 0, htmlWordListDesc);