]> git.saurik.com Git - wxWidgets.git/blob - contrib/src/stc/scintilla/src/LexHTML.cxx
e3546b436fdb6b1674a29fef85e44e92a3cedd71
[wxWidgets.git] / contrib / src / stc / scintilla / src / LexHTML.cxx
1 // Scintilla source code edit control
2 /** @file LexHTML.cxx
3 ** Lexer for HTML.
4 **/
5 // Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
7
8 #include <stdlib.h>
9 #include <string.h>
10 #include <ctype.h>
11 #include <stdio.h>
12 #include <stdarg.h>
13
14 #include "Platform.h"
15
16 #include "PropSet.h"
17 #include "Accessor.h"
18 #include "StyleContext.h"
19 #include "KeyWords.h"
20 #include "Scintilla.h"
21 #include "SciLexer.h"
22
23 #define SCE_HA_JS (SCE_HJA_START - SCE_HJ_START)
24 #define SCE_HA_VBS (SCE_HBA_START - SCE_HB_START)
25 #define SCE_HA_PYTHON (SCE_HPA_START - SCE_HP_START)
26
27 enum script_type { eScriptNone = 0, eScriptJS, eScriptVBS, eScriptPython, eScriptPHP, eScriptXML, eScriptSGML, eScriptSGMLblock };
28 enum script_mode { eHtml = 0, eNonHtmlScript, eNonHtmlPreProc, eNonHtmlScriptPreProc };
29
30 static inline bool IsAWordChar(const int ch) {
31 return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_');
32 }
33
34 static inline bool IsAWordStart(const int ch) {
35 return (ch < 0x80) && (isalnum(ch) || ch == '_');
36 }
37
38 static script_type segIsScriptingIndicator(Accessor &styler, unsigned int start, unsigned int end, script_type prevValue) {
39 char s[30 + 1];
40 unsigned int i = 0;
41 for (; i < end - start + 1 && i < 30; i++) {
42 s[i] = static_cast<char>(tolower(styler[start + i]));
43 }
44 s[i] = '\0';
45 //Platform::DebugPrintf("Scripting indicator [%s]\n", s);
46 if (strstr(s, "src")) // External script
47 return eScriptNone;
48 if (strstr(s, "vbs"))
49 return eScriptVBS;
50 if (strstr(s, "pyth"))
51 return eScriptPython;
52 if (strstr(s, "javas"))
53 return eScriptJS;
54 if (strstr(s, "jscr"))
55 return eScriptJS;
56 if (strstr(s, "php"))
57 return eScriptPHP;
58 if (strstr(s, "xml"))
59 return eScriptXML;
60
61 return prevValue;
62 }
63
64 static int PrintScriptingIndicatorOffset(Accessor &styler, unsigned int start, unsigned int end) {
65 int iResult = 0;
66 char s[30 + 1];
67 unsigned int i = 0;
68 for (; i < end - start + 1 && i < 30; i++) {
69 s[i] = static_cast<char>(tolower(styler[start + i]));
70 }
71 s[i] = '\0';
72 if (0 == strncmp(s, "php", 3)) {
73 iResult = 3;
74 }
75
76 return iResult;
77 }
78
79 static script_type ScriptOfState(int state) {
80 if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
81 return eScriptPython;
82 } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
83 return eScriptVBS;
84 } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
85 return eScriptJS;
86 } else if ((state >= SCE_HPHP_DEFAULT) && (state <= SCE_HPHP_COMMENTLINE)) {
87 return eScriptPHP;
88 } else if ((state >= SCE_H_SGML_DEFAULT) && (state < SCE_H_SGML_BLOCK_DEFAULT)) {
89 return eScriptSGML;
90 } else if (state == SCE_H_SGML_BLOCK_DEFAULT) {
91 return eScriptSGMLblock;
92 } else {
93 return eScriptNone;
94 }
95 }
96
97 static int statePrintForState(int state, script_mode inScriptType) {
98 int StateToPrint;
99
100 if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
101 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_PYTHON);
102 } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
103 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_VBS);
104 } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
105 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_JS);
106 } else {
107 StateToPrint = state;
108 }
109
110 return StateToPrint;
111 }
112
113 static int stateForPrintState(int StateToPrint) {
114 int state;
115
116 if ((StateToPrint >= SCE_HPA_START) && (StateToPrint <= SCE_HPA_IDENTIFIER)) {
117 state = StateToPrint - SCE_HA_PYTHON;
118 } else if ((StateToPrint >= SCE_HBA_START) && (StateToPrint <= SCE_HBA_STRINGEOL)) {
119 state = StateToPrint - SCE_HA_VBS;
120 } else if ((StateToPrint >= SCE_HJA_START) && (StateToPrint <= SCE_HJA_REGEX)) {
121 state = StateToPrint - SCE_HA_JS;
122 } else {
123 state = StateToPrint;
124 }
125
126 return state;
127 }
128
129 static inline bool IsNumber(unsigned int start, Accessor &styler) {
130 return IsADigit(styler[start]) || (styler[start] == '.') ||
131 (styler[start] == '-') || (styler[start] == '#');
132 }
133
134 static inline bool isStringState(int state) {
135 bool bResult;
136
137 switch (state) {
138 case SCE_HJ_DOUBLESTRING:
139 case SCE_HJ_SINGLESTRING:
140 case SCE_HJA_DOUBLESTRING:
141 case SCE_HJA_SINGLESTRING:
142 case SCE_HB_STRING:
143 case SCE_HBA_STRING:
144 case SCE_HP_STRING:
145 case SCE_HPA_STRING:
146 case SCE_HPHP_HSTRING:
147 case SCE_HPHP_SIMPLESTRING:
148 case SCE_HPHP_HSTRING_VARIABLE:
149 case SCE_HPHP_COMPLEX_VARIABLE:
150 bResult = true;
151 break;
152 default :
153 bResult = false;
154 break;
155 }
156 return bResult;
157 }
158
159 // not really well done, since it's only comments that should lex the %> and <%
160 static inline bool isCommentASPState(int state) {
161 bool bResult;
162
163 switch (state) {
164 case SCE_HJ_COMMENT:
165 case SCE_HJ_COMMENTLINE:
166 case SCE_HJ_COMMENTDOC:
167 case SCE_HB_COMMENTLINE:
168 case SCE_HP_COMMENTLINE:
169 case SCE_HPHP_COMMENT:
170 case SCE_HPHP_COMMENTLINE:
171 bResult = true;
172 break;
173 default :
174 bResult = false;
175 break;
176 }
177 return bResult;
178 }
179
180 static void classifyAttribHTML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
181 bool wordIsNumber = IsNumber(start, styler);
182 char chAttr = SCE_H_ATTRIBUTEUNKNOWN;
183 if (wordIsNumber) {
184 chAttr = SCE_H_NUMBER;
185 } else {
186 char s[30 + 1];
187 unsigned int i = 0;
188 for (; i < end - start + 1 && i < 30; i++) {
189 s[i] = static_cast<char>(tolower(styler[start + i]));
190 }
191 s[i] = '\0';
192 if (keywords.InList(s))
193 chAttr = SCE_H_ATTRIBUTE;
194 }
195 if ((chAttr == SCE_H_ATTRIBUTEUNKNOWN) && !keywords)
196 // No keywords -> all are known
197 chAttr = SCE_H_ATTRIBUTE;
198 styler.ColourTo(end, chAttr);
199 }
200
201 static int classifyTagHTML(unsigned int start, unsigned int end,
202 WordList &keywords, Accessor &styler, bool &tagDontFold,
203 bool caseSensitive) {
204 char s[30 + 2];
205 // Copy after the '<'
206 unsigned int i = 0;
207 for (unsigned int cPos = start; cPos <= end && i < 30; cPos++) {
208 char ch = styler[cPos];
209 if ((ch != '<') && (ch != '/')) {
210 s[i++] = caseSensitive ? ch : static_cast<char>(tolower(ch));
211 }
212 }
213
214 //The following is only a quick hack, to see if this whole thing would work
215 //we first need the tagname with a trailing space...
216 s[i] = ' ';
217 s[i+1] = '\0';
218
219 //...to find it in the list of no-container-tags
220 // (There are many more. We will need a keywordlist in the property file for this)
221 tagDontFold = (NULL != strstr("meta link img area br hr input ",s));
222
223 //now we can remove the trailing space
224 s[i] = '\0';
225
226 bool isScript = false;
227 char chAttr = SCE_H_TAGUNKNOWN;
228 if (s[0] == '!') {
229 chAttr = SCE_H_SGML_DEFAULT;
230 } else if (s[0] == '/') { // Closing tag
231 if (keywords.InList(s + 1))
232 chAttr = SCE_H_TAG;
233 } else {
234 if (keywords.InList(s)) {
235 chAttr = SCE_H_TAG;
236 isScript = 0 == strcmp(s, "script");
237 }
238 }
239 if ((chAttr == SCE_H_TAGUNKNOWN) && !keywords) {
240 // No keywords -> all are known
241 chAttr = SCE_H_TAG;
242 isScript = 0 == strcmp(s, "script");
243 }
244 styler.ColourTo(end, chAttr);
245 return isScript ? SCE_H_SCRIPT : chAttr;
246 }
247
248 static void classifyWordHTJS(unsigned int start, unsigned int end,
249 WordList &keywords, Accessor &styler, script_mode inScriptType) {
250 char chAttr = SCE_HJ_WORD;
251 bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
252 if (wordIsNumber)
253 chAttr = SCE_HJ_NUMBER;
254 else {
255 char s[30 + 1];
256 unsigned int i = 0;
257 for (; i < end - start + 1 && i < 30; i++) {
258 s[i] = styler[start + i];
259 }
260 s[i] = '\0';
261 if (keywords.InList(s))
262 chAttr = SCE_HJ_KEYWORD;
263 }
264 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
265 }
266
267 static int classifyWordHTVB(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, script_mode inScriptType) {
268 char chAttr = SCE_HB_IDENTIFIER;
269 bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
270 if (wordIsNumber)
271 chAttr = SCE_HB_NUMBER;
272 else {
273 char s[30 + 1];
274 unsigned int i = 0;
275 for (; i < end - start + 1 && i < 30; i++) {
276 s[i] = static_cast<char>(tolower(styler[start + i]));
277 }
278 s[i] = '\0';
279 if (keywords.InList(s)) {
280 chAttr = SCE_HB_WORD;
281 if (strcmp(s, "rem") == 0)
282 chAttr = SCE_HB_COMMENTLINE;
283 }
284 }
285 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
286 if (chAttr == SCE_HB_COMMENTLINE)
287 return SCE_HB_COMMENTLINE;
288 else
289 return SCE_HB_DEFAULT;
290 }
291
292 static void classifyWordHTPy(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord, script_mode inScriptType) {
293 bool wordIsNumber = IsADigit(styler[start]);
294 char s[30 + 1];
295 unsigned int i = 0;
296 for (; i < end - start + 1 && i < 30; i++) {
297 s[i] = styler[start + i];
298 }
299 s[i] = '\0';
300 char chAttr = SCE_HP_IDENTIFIER;
301 if (0 == strcmp(prevWord, "class"))
302 chAttr = SCE_HP_CLASSNAME;
303 else if (0 == strcmp(prevWord, "def"))
304 chAttr = SCE_HP_DEFNAME;
305 else if (wordIsNumber)
306 chAttr = SCE_HP_NUMBER;
307 else if (keywords.InList(s))
308 chAttr = SCE_HP_WORD;
309 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
310 strcpy(prevWord, s);
311 }
312
313 // Update the word colour to default or keyword
314 // Called when in a PHP word
315 static void classifyWordHTPHP(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
316 char chAttr = SCE_HPHP_DEFAULT;
317 bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.' && start+1 <= end && IsADigit(styler[start+1]));
318 if (wordIsNumber)
319 chAttr = SCE_HPHP_NUMBER;
320 else {
321 char s[100 + 1];
322 unsigned int i = 0;
323 for (; i < end - start + 1 && i < 100; i++) {
324 s[i] = static_cast<char>(tolower(styler[start + i]));
325 }
326 s[i] = '\0';
327 if (keywords.InList(s))
328 chAttr = SCE_HPHP_WORD;
329 }
330 styler.ColourTo(end, chAttr);
331 }
332
333 static bool isWordHSGML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
334 char s[30 + 1];
335 unsigned int i = 0;
336 for (; i < end - start + 1 && i < 30; i++) {
337 s[i] = styler[start + i];
338 }
339 s[i] = '\0';
340 return keywords.InList(s);
341 }
342
343 static bool isWordCdata(unsigned int start, unsigned int end, Accessor &styler) {
344 char s[30 + 1];
345 unsigned int i = 0;
346 for (; i < end - start + 1 && i < 30; i++) {
347 s[i] = styler[start + i];
348 }
349 s[i] = '\0';
350 return (0 == strcmp(s, "[CDATA["));
351 }
352
353 // Return the first state to reach when entering a scripting language
354 static int StateForScript(script_type scriptLanguage) {
355 int Result;
356 switch (scriptLanguage) {
357 case eScriptVBS:
358 Result = SCE_HB_START;
359 break;
360 case eScriptPython:
361 Result = SCE_HP_START;
362 break;
363 case eScriptPHP:
364 Result = SCE_HPHP_DEFAULT;
365 break;
366 case eScriptXML:
367 Result = SCE_H_TAGUNKNOWN;
368 break;
369 case eScriptSGML:
370 Result = SCE_H_SGML_DEFAULT;
371 break;
372 default :
373 Result = SCE_HJ_START;
374 break;
375 }
376 return Result;
377 }
378
379 static inline bool ishtmlwordchar(char ch) {
380 return !isascii(ch) ||
381 (isalnum(ch) || ch == '.' || ch == '-' || ch == '_' || ch == ':' || ch == '!' || ch == '#');
382 }
383
384 static inline bool issgmlwordchar(char ch) {
385 return !isascii(ch) ||
386 (isalnum(ch) || ch == '.' || ch == '_' || ch == ':' || ch == '!' || ch == '#' || ch == '[');
387 }
388
389 static inline bool IsPhpWordStart(const unsigned char ch) {
390 return (isascii(ch) && (isalpha(ch) || (ch == '_'))) || (ch >= 0x7f);
391 }
392
393 static inline bool IsPhpWordChar(char ch) {
394 return IsADigit(ch) || IsPhpWordStart(ch);
395 }
396
397 static bool InTagState(int state) {
398 return state == SCE_H_TAG || state == SCE_H_TAGUNKNOWN ||
399 state == SCE_H_SCRIPT ||
400 state == SCE_H_ATTRIBUTE || state == SCE_H_ATTRIBUTEUNKNOWN ||
401 state == SCE_H_NUMBER || state == SCE_H_OTHER ||
402 state == SCE_H_DOUBLESTRING || state == SCE_H_SINGLESTRING;
403 }
404
405 static bool IsCommentState(const int state) {
406 return state == SCE_H_COMMENT || state == SCE_H_SGML_COMMENT;
407 }
408
409 static bool isLineEnd(char ch) {
410 return ch == '\r' || ch == '\n';
411 }
412
413 static bool isOKBeforeRE(char ch) {
414 return (ch == '(') || (ch == '=') || (ch == ',');
415 }
416
417 static bool isPHPStringState(int state) {
418 return
419 (state == SCE_HPHP_HSTRING) ||
420 (state == SCE_HPHP_SIMPLESTRING) ||
421 (state == SCE_HPHP_HSTRING_VARIABLE) ||
422 (state == SCE_HPHP_COMPLEX_VARIABLE);
423 }
424
425 static int FindPhpStringDelimiter(char *phpStringDelimiter, const int phpStringDelimiterSize, int i, const int lengthDoc, Accessor &styler) {
426 int j;
427 phpStringDelimiter[0] = '\n';
428 for (j = i; j < lengthDoc && styler[j] != '\n' && styler[j] != '\r'; j++) {
429 if (j - i < phpStringDelimiterSize - 2)
430 phpStringDelimiter[j-i+1] = styler[j];
431 else
432 i++;
433 }
434 phpStringDelimiter[j-i+1] = '\0';
435 return j;
436 }
437
438 static void ColouriseHyperTextDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
439 Accessor &styler) {
440 WordList &keywords = *keywordlists[0];
441 WordList &keywords2 = *keywordlists[1];
442 WordList &keywords3 = *keywordlists[2];
443 WordList &keywords4 = *keywordlists[3];
444 WordList &keywords5 = *keywordlists[4];
445 WordList &keywords6 = *keywordlists[5]; // SGML (DTD) keywords
446
447 // Lexer for HTML requires more lexical states (7 bits worth) than most lexers
448 styler.StartAt(startPos, STYLE_MAX);
449 char prevWord[200];
450 prevWord[0] = '\0';
451 char phpStringDelimiter[200]; // PHP is not limited in length, we are
452 phpStringDelimiter[0] = '\0';
453 int StateToPrint = initStyle;
454 int state = stateForPrintState(StateToPrint);
455
456 // If inside a tag, it may be a script tag, so reread from the start to ensure any language tags are seen
457 if (InTagState(state)) {
458 while ((startPos > 0) && (InTagState(styler.StyleAt(startPos - 1)))) {
459 startPos--;
460 length++;
461 }
462 state = SCE_H_DEFAULT;
463 }
464 // String can be heredoc, must find a delimiter first
465 while (startPos > 0 && isPHPStringState(state) && state != SCE_HPHP_SIMPLESTRING) {
466 startPos--;
467 length++;
468 state = styler.StyleAt(startPos);
469 }
470 styler.StartAt(startPos, STYLE_MAX);
471
472 int lineCurrent = styler.GetLine(startPos);
473 int lineState;
474 if (lineCurrent > 0) {
475 lineState = styler.GetLineState(lineCurrent);
476 } else {
477 // Default client and ASP scripting language is JavaScript
478 lineState = eScriptJS << 8;
479 lineState |= styler.GetPropertyInt("asp.default.language", eScriptJS) << 4;
480 }
481 script_mode inScriptType = script_mode((lineState >> 0) & 0x03); // 2 bits of scripting mode
482 bool tagOpened = (lineState >> 2) & 0x01; // 1 bit to know if we are in an opened tag
483 bool tagClosing = (lineState >> 3) & 0x01; // 1 bit to know if we are in a closing tag
484 bool tagDontFold = false; //some HTML tags should not be folded
485 script_type aspScript = script_type((lineState >> 4) & 0x0F); // 4 bits of script name
486 script_type clientScript = script_type((lineState >> 8) & 0x0F); // 4 bits of script name
487 int beforePreProc = (lineState >> 12) & 0xFF; // 8 bits of state
488
489 script_type scriptLanguage = ScriptOfState(state);
490
491 const bool foldHTML = styler.GetPropertyInt("fold.html", 0) != 0;
492 const bool fold = foldHTML && styler.GetPropertyInt("fold", 0);
493 const bool foldHTMLPreprocessor = foldHTML && styler.GetPropertyInt("fold.html.preprocessor", 1);
494 const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
495 const bool caseSensitive = styler.GetPropertyInt("html.tags.case.sensitive", 0) != 0;
496
497 int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
498 int levelCurrent = levelPrev;
499 int visibleChars = 0;
500
501 char chPrev = ' ';
502 char ch = ' ';
503 char chPrevNonWhite = ' ';
504 styler.StartSegment(startPos);
505 const int lengthDoc = startPos + length;
506 for (int i = startPos; i < lengthDoc; i++) {
507 const char chPrev2 = chPrev;
508 chPrev = ch;
509 if (ch != ' ' && ch != '\t')
510 chPrevNonWhite = ch;
511 ch = styler[i];
512 char chNext = styler.SafeGetCharAt(i + 1);
513 const char chNext2 = styler.SafeGetCharAt(i + 2);
514
515 // Handle DBCS codepages
516 if (styler.IsLeadByte(ch)) {
517 chPrev = ' ';
518 i += 1;
519 continue;
520 }
521
522 if ((!isspacechar(ch) || !foldCompact) && fold)
523 visibleChars++;
524
525 // decide what is the current state to print (depending of the script tag)
526 StateToPrint = statePrintForState(state, inScriptType);
527
528 // handle script folding
529 if (fold) {
530 switch (scriptLanguage) {
531 case eScriptJS:
532 case eScriptPHP:
533 //not currently supported case eScriptVBS:
534
535 if ((state != SCE_HPHP_COMMENT) && (state != SCE_HPHP_COMMENTLINE) && (state != SCE_HJ_COMMENT) && (state != SCE_HJ_COMMENTLINE) && (state != SCE_HJ_COMMENTDOC) && (!isStringState(state))) {
536 //Platform::DebugPrintf("state=%d, StateToPrint=%d, initStyle=%d\n", state, StateToPrint, initStyle);
537 //if ((state == SCE_HPHP_OPERATOR) || (state == SCE_HPHP_DEFAULT) || (state == SCE_HJ_SYMBOLS) || (state == SCE_HJ_START) || (state == SCE_HJ_DEFAULT)) {
538 if ((ch == '{') || (ch == '}')) {
539 levelCurrent += (ch == '{') ? 1 : -1;
540 }
541 }
542 break;
543 case eScriptPython:
544 if (state != SCE_HP_COMMENTLINE) {
545 if ((ch == ':') && ((chNext == '\n') || (chNext == '\r' && chNext2 == '\n'))) {
546 levelCurrent++;
547 } else if ((ch == '\n') && !((chNext == '\r') && (chNext2 == '\n')) && (chNext != '\n')) {
548 // check if the number of tabs is lower than the level
549 int Findlevel = (levelCurrent & ~SC_FOLDLEVELBASE) * 8;
550 for (int j = 0; Findlevel > 0; j++) {
551 char chTmp = styler.SafeGetCharAt(i + j + 1);
552 if (chTmp == '\t') {
553 Findlevel -= 8;
554 } else if (chTmp == ' ') {
555 Findlevel--;
556 } else {
557 break;
558 }
559 }
560
561 if (Findlevel > 0) {
562 levelCurrent -= Findlevel / 8;
563 if (Findlevel % 8)
564 levelCurrent--;
565 }
566 }
567 }
568 break;
569 default:
570 break;
571 }
572 }
573
574 if ((ch == '\r' && chNext != '\n') || (ch == '\n')) {
575 // Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix)
576 // Avoid triggering two times on Dos/Win
577 // New line -> record any line state onto /next/ line
578 if (fold) {
579 int lev = levelPrev;
580 if (visibleChars == 0)
581 lev |= SC_FOLDLEVELWHITEFLAG;
582 if ((levelCurrent > levelPrev) && (visibleChars > 0))
583 lev |= SC_FOLDLEVELHEADERFLAG;
584
585 styler.SetLevel(lineCurrent, lev);
586 visibleChars = 0;
587 levelPrev = levelCurrent;
588 }
589 lineCurrent++;
590 styler.SetLineState(lineCurrent,
591 ((inScriptType & 0x03) << 0) |
592 ((tagOpened & 0x01) << 2) |
593 ((tagClosing & 0x01) << 3) |
594 ((aspScript & 0x0F) << 4) |
595 ((clientScript & 0x0F) << 8) |
596 ((beforePreProc & 0xFF) << 12));
597 }
598
599 // generic end of script processing
600 else if ((inScriptType == eNonHtmlScript) && (ch == '<') && (chNext == '/')) {
601 // Check if it's the end of the script tag (or any other HTML tag)
602 switch (state) {
603 // in these cases, you can embed HTML tags (to confirm !!!!!!!!!!!!!!!!!!!!!!)
604 case SCE_H_DOUBLESTRING:
605 case SCE_H_SINGLESTRING:
606 case SCE_HJ_COMMENT:
607 case SCE_HJ_COMMENTDOC:
608 // SCE_HJ_COMMENTLINE removed as this is a common thing done to hide
609 // the end of script marker from some JS interpreters.
610 //case SCE_HJ_COMMENTLINE:
611 case SCE_HJ_DOUBLESTRING:
612 case SCE_HJ_SINGLESTRING:
613 case SCE_HJ_REGEX:
614 case SCE_HB_STRING:
615 case SCE_HP_STRING:
616 case SCE_HP_TRIPLE:
617 case SCE_HP_TRIPLEDOUBLE:
618 break;
619 default :
620 // closing tag of the script (it's a closing HTML tag anyway)
621 styler.ColourTo(i - 1, StateToPrint);
622 state = SCE_H_TAGUNKNOWN;
623 inScriptType = eHtml;
624 scriptLanguage = eScriptNone;
625 clientScript = eScriptJS;
626 i += 2;
627 visibleChars += 2;
628 tagClosing = true;
629 continue;
630 }
631 }
632
633 /////////////////////////////////////
634 // handle the start of PHP pre-processor = Non-HTML
635 else if ((state != SCE_H_ASPAT) &&
636 !isPHPStringState(state) &&
637 (state != SCE_HPHP_COMMENT) &&
638 (ch == '<') &&
639 (chNext == '?')) {
640 scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment() + 2, i + 10, eScriptPHP);
641 if (scriptLanguage != eScriptPHP && isStringState(state)) continue;
642 styler.ColourTo(i - 1, StateToPrint);
643 beforePreProc = state;
644 i++;
645 visibleChars++;
646 i += PrintScriptingIndicatorOffset(styler, styler.GetStartSegment() + 2, i + 10);
647 if (scriptLanguage == eScriptXML)
648 styler.ColourTo(i, SCE_H_XMLSTART);
649 else
650 styler.ColourTo(i, SCE_H_QUESTION);
651 state = StateForScript(scriptLanguage);
652 if (inScriptType == eNonHtmlScript)
653 inScriptType = eNonHtmlScriptPreProc;
654 else
655 inScriptType = eNonHtmlPreProc;
656 // fold whole script
657 if (foldHTMLPreprocessor){
658 levelCurrent++;
659 if (scriptLanguage == eScriptXML)
660 levelCurrent--; // no folding of the XML first tag (all XML-like tags in this case)
661 }
662 // should be better
663 ch = styler.SafeGetCharAt(i);
664 continue;
665 }
666
667 // handle the start of ASP pre-processor = Non-HTML
668 else if (!isCommentASPState(state) && (ch == '<') && (chNext == '%')) {
669 styler.ColourTo(i - 1, StateToPrint);
670 beforePreProc = state;
671 if (inScriptType == eNonHtmlScript)
672 inScriptType = eNonHtmlScriptPreProc;
673 else
674 inScriptType = eNonHtmlPreProc;
675
676 if (chNext2 == '@') {
677 i += 2; // place as if it was the second next char treated
678 visibleChars += 2;
679 state = SCE_H_ASPAT;
680 } else if ((chNext2 == '-') && (styler.SafeGetCharAt(i + 3) == '-')) {
681 styler.ColourTo(i + 3, SCE_H_ASP);
682 state = SCE_H_XCCOMMENT;
683 scriptLanguage = eScriptVBS;
684 continue;
685 } else {
686 if (chNext2 == '=') {
687 i += 2; // place as if it was the second next char treated
688 visibleChars += 2;
689 } else {
690 i++; // place as if it was the next char treated
691 visibleChars++;
692 }
693
694 state = StateForScript(aspScript);
695 }
696 scriptLanguage = eScriptVBS;
697 styler.ColourTo(i, SCE_H_ASP);
698 // fold whole script
699 if (foldHTMLPreprocessor)
700 levelCurrent++;
701 // should be better
702 ch = styler.SafeGetCharAt(i);
703 continue;
704 }
705
706 /////////////////////////////////////
707 // handle the start of SGML language (DTD)
708 else if (((scriptLanguage == eScriptNone) || (scriptLanguage == eScriptXML)) &&
709 (chPrev == '<') &&
710 (ch == '!') &&
711 (StateToPrint != SCE_H_CDATA) && (!IsCommentState(StateToPrint))) {
712 beforePreProc = state;
713 styler.ColourTo(i - 2, StateToPrint);
714 if ((chNext == '-') && (chNext2 == '-')) {
715 state = SCE_H_COMMENT; // wait for a pending command
716 }
717 else if (isWordCdata(i + 1, i + 7, styler)) {
718 state = SCE_H_CDATA;
719 } else {
720 styler.ColourTo(i, SCE_H_SGML_DEFAULT); // <! is default
721 scriptLanguage = eScriptSGML;
722 state = SCE_H_SGML_COMMAND; // wait for a pending command
723 }
724 // fold whole tag (-- when closing the tag)
725 if (foldHTMLPreprocessor)
726 levelCurrent++;
727 continue;
728 }
729
730 // handle the end of a pre-processor = Non-HTML
731 else if ((
732 ((inScriptType == eNonHtmlPreProc)
733 || (inScriptType == eNonHtmlScriptPreProc)) && (
734 ((scriptLanguage == eScriptPHP) && (ch == '?') && !isPHPStringState(state) && (state != SCE_HPHP_COMMENT)) ||
735 ((scriptLanguage != eScriptNone) && !isStringState(state) &&
736 (ch == '%'))
737 ) && (chNext == '>')) ||
738 ((scriptLanguage == eScriptSGML) && (ch == '>') && (state != SCE_H_SGML_COMMENT))) {
739 if (state == SCE_H_ASPAT) {
740 aspScript = segIsScriptingIndicator(styler,
741 styler.GetStartSegment(), i - 1, aspScript);
742 }
743 // Bounce out of any ASP mode
744 switch (state) {
745 case SCE_HJ_WORD:
746 classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
747 break;
748 case SCE_HB_WORD:
749 classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
750 break;
751 case SCE_HP_WORD:
752 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
753 break;
754 case SCE_HPHP_WORD:
755 classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
756 break;
757 case SCE_H_XCCOMMENT:
758 styler.ColourTo(i - 1, state);
759 break;
760 default :
761 styler.ColourTo(i - 1, StateToPrint);
762 break;
763 }
764 if (scriptLanguage != eScriptSGML) {
765 i++;
766 visibleChars++;
767 }
768 if (ch == '%')
769 styler.ColourTo(i, SCE_H_ASP);
770 else if (scriptLanguage == eScriptXML)
771 styler.ColourTo(i, SCE_H_XMLEND);
772 else if (scriptLanguage == eScriptSGML)
773 styler.ColourTo(i, SCE_H_SGML_DEFAULT);
774 else
775 styler.ColourTo(i, SCE_H_QUESTION);
776 state = beforePreProc;
777 if (inScriptType == eNonHtmlScriptPreProc)
778 inScriptType = eNonHtmlScript;
779 else
780 inScriptType = eHtml;
781 scriptLanguage = eScriptNone;
782 // unfold all scripting languages
783 if (foldHTMLPreprocessor)
784 levelCurrent--;
785 continue;
786 }
787 /////////////////////////////////////
788
789 switch (state) {
790 case SCE_H_DEFAULT:
791 if (ch == '<') {
792 // in HTML, fold on tag open and unfold on tag close
793 tagOpened = true;
794 tagClosing = (chNext == '/');
795 styler.ColourTo(i - 1, StateToPrint);
796 if (chNext != '!')
797 state = SCE_H_TAGUNKNOWN;
798 } else if (ch == '&') {
799 styler.ColourTo(i - 1, SCE_H_DEFAULT);
800 state = SCE_H_ENTITY;
801 }
802 break;
803 case SCE_H_SGML_DEFAULT:
804 case SCE_H_SGML_BLOCK_DEFAULT:
805 // if (scriptLanguage == eScriptSGMLblock)
806 // StateToPrint = SCE_H_SGML_BLOCK_DEFAULT;
807
808 if (ch == '\"') {
809 styler.ColourTo(i - 1, StateToPrint);
810 state = SCE_H_SGML_DOUBLESTRING;
811 } else if (ch == '\'') {
812 styler.ColourTo(i - 1, StateToPrint);
813 state = SCE_H_SGML_SIMPLESTRING;
814 } else if ((ch == '-') && (chPrev == '-')) {
815 styler.ColourTo(i - 2, StateToPrint);
816 state = SCE_H_SGML_COMMENT;
817 } else if (isascii(ch) && isalpha(ch) && (chPrev == '%')) {
818 styler.ColourTo(i - 2, StateToPrint);
819 state = SCE_H_SGML_ENTITY;
820 } else if (ch == '#') {
821 styler.ColourTo(i - 1, StateToPrint);
822 state = SCE_H_SGML_SPECIAL;
823 } else if (ch == '[') {
824 styler.ColourTo(i - 1, StateToPrint);
825 scriptLanguage = eScriptSGMLblock;
826 state = SCE_H_SGML_BLOCK_DEFAULT;
827 } else if (ch == ']') {
828 if (scriptLanguage == eScriptSGMLblock) {
829 styler.ColourTo(i, StateToPrint);
830 scriptLanguage = eScriptSGML;
831 } else {
832 styler.ColourTo(i - 1, StateToPrint);
833 styler.ColourTo(i, SCE_H_SGML_ERROR);
834 }
835 state = SCE_H_SGML_DEFAULT;
836 } else if (scriptLanguage == eScriptSGMLblock) {
837 if ((ch == '!') && (chPrev == '<')) {
838 styler.ColourTo(i - 2, StateToPrint);
839 styler.ColourTo(i, SCE_H_SGML_DEFAULT);
840 state = SCE_H_SGML_COMMAND;
841 } else if (ch == '>') {
842 styler.ColourTo(i - 1, StateToPrint);
843 styler.ColourTo(i, SCE_H_SGML_DEFAULT);
844 }
845 }
846 break;
847 case SCE_H_SGML_COMMAND:
848 if ((ch == '-') && (chPrev == '-')) {
849 styler.ColourTo(i - 2, StateToPrint);
850 state = SCE_H_SGML_COMMENT;
851 } else if (!issgmlwordchar(ch)) {
852 if (isWordHSGML(styler.GetStartSegment(), i - 1, keywords6, styler)) {
853 styler.ColourTo(i - 1, StateToPrint);
854 state = SCE_H_SGML_1ST_PARAM;
855 } else {
856 state = SCE_H_SGML_ERROR;
857 }
858 }
859 break;
860 case SCE_H_SGML_1ST_PARAM:
861 // wait for the beginning of the word
862 if ((ch == '-') && (chPrev == '-')) {
863 if (scriptLanguage == eScriptSGMLblock) {
864 styler.ColourTo(i - 2, SCE_H_SGML_BLOCK_DEFAULT);
865 } else {
866 styler.ColourTo(i - 2, SCE_H_SGML_DEFAULT);
867 }
868 state = SCE_H_SGML_1ST_PARAM_COMMENT;
869 } else if (issgmlwordchar(ch)) {
870 if (scriptLanguage == eScriptSGMLblock) {
871 styler.ColourTo(i - 1, SCE_H_SGML_BLOCK_DEFAULT);
872 } else {
873 styler.ColourTo(i - 1, SCE_H_SGML_DEFAULT);
874 }
875 // find the length of the word
876 int size = 1;
877 while (ishtmlwordchar(styler.SafeGetCharAt(i + size)))
878 size++;
879 styler.ColourTo(i + size - 1, StateToPrint);
880 i += size - 1;
881 visibleChars += size - 1;
882 ch = styler.SafeGetCharAt(i);
883 if (scriptLanguage == eScriptSGMLblock) {
884 state = SCE_H_SGML_BLOCK_DEFAULT;
885 } else {
886 state = SCE_H_SGML_DEFAULT;
887 }
888 continue;
889 }
890 break;
891 case SCE_H_SGML_ERROR:
892 if ((ch == '-') && (chPrev == '-')) {
893 styler.ColourTo(i - 2, StateToPrint);
894 state = SCE_H_SGML_COMMENT;
895 }
896 case SCE_H_SGML_DOUBLESTRING:
897 if (ch == '\"') {
898 styler.ColourTo(i, StateToPrint);
899 state = SCE_H_SGML_DEFAULT;
900 }
901 break;
902 case SCE_H_SGML_SIMPLESTRING:
903 if (ch == '\'') {
904 styler.ColourTo(i, StateToPrint);
905 state = SCE_H_SGML_DEFAULT;
906 }
907 break;
908 case SCE_H_SGML_COMMENT:
909 if ((ch == '-') && (chPrev == '-')) {
910 styler.ColourTo(i, StateToPrint);
911 state = SCE_H_SGML_DEFAULT;
912 }
913 break;
914 case SCE_H_CDATA:
915 if ((chPrev2 == ']') && (chPrev == ']') && (ch == '>')) {
916 styler.ColourTo(i, StateToPrint);
917 state = SCE_H_DEFAULT;
918 levelCurrent--;
919 }
920 break;
921 case SCE_H_COMMENT:
922 if ((chPrev2 == '-') && (chPrev == '-') && (ch == '>')) {
923 styler.ColourTo(i, StateToPrint);
924 state = SCE_H_DEFAULT;
925 levelCurrent--;
926 }
927 break;
928 case SCE_H_SGML_1ST_PARAM_COMMENT:
929 if ((ch == '-') && (chPrev == '-')) {
930 styler.ColourTo(i, SCE_H_SGML_COMMENT);
931 state = SCE_H_SGML_1ST_PARAM;
932 }
933 break;
934 case SCE_H_SGML_SPECIAL:
935 if (!(isascii(ch) && isupper(ch))) {
936 styler.ColourTo(i - 1, StateToPrint);
937 if (isalnum(ch)) {
938 state = SCE_H_SGML_ERROR;
939 } else {
940 state = SCE_H_SGML_DEFAULT;
941 }
942 }
943 break;
944 case SCE_H_SGML_ENTITY:
945 if (ch == ';') {
946 styler.ColourTo(i, StateToPrint);
947 state = SCE_H_SGML_DEFAULT;
948 } else if (!(isascii(ch) && isalnum(ch)) && ch != '-' && ch != '.') {
949 styler.ColourTo(i, SCE_H_SGML_ERROR);
950 state = SCE_H_SGML_DEFAULT;
951 }
952 break;
953 case SCE_H_ENTITY:
954 if (ch == ';') {
955 styler.ColourTo(i, StateToPrint);
956 state = SCE_H_DEFAULT;
957 }
958 if (ch != '#' && !(isascii(ch) && isalnum(ch)) // Should check that '#' follows '&', but it is unlikely anyway...
959 && ch != '.' && ch != '-' && ch != '_' && ch != ':') { // valid in XML
960 styler.ColourTo(i, SCE_H_TAGUNKNOWN);
961 state = SCE_H_DEFAULT;
962 }
963 break;
964 case SCE_H_TAGUNKNOWN:
965 if (!ishtmlwordchar(ch) && !((ch == '/') && (chPrev == '<')) && ch != '[') {
966 int eClass = classifyTagHTML(styler.GetStartSegment(),
967 i - 1, keywords, styler, tagDontFold, caseSensitive);
968 if (eClass == SCE_H_SCRIPT) {
969 if (!tagClosing) {
970 inScriptType = eNonHtmlScript;
971 scriptLanguage = clientScript;
972 eClass = SCE_H_TAG;
973 } else {
974 scriptLanguage = eScriptNone;
975 eClass = SCE_H_TAG;
976 }
977 }
978 if (ch == '>') {
979 styler.ColourTo(i, eClass);
980 if (inScriptType == eNonHtmlScript) {
981 state = StateForScript(scriptLanguage);
982 } else {
983 state = SCE_H_DEFAULT;
984 }
985 tagOpened = false;
986 if (!tagDontFold){
987 if (tagClosing) {
988 levelCurrent--;
989 } else {
990 levelCurrent++;
991 }
992 }
993 tagClosing = false;
994 } else if (ch == '/' && chNext == '>') {
995 if (eClass == SCE_H_TAGUNKNOWN) {
996 styler.ColourTo(i + 1, SCE_H_TAGUNKNOWN);
997 } else {
998 styler.ColourTo(i - 1, StateToPrint);
999 styler.ColourTo(i + 1, SCE_H_TAGEND);
1000 }
1001 i++;
1002 ch = chNext;
1003 state = SCE_H_DEFAULT;
1004 tagOpened = false;
1005 } else {
1006 if (eClass != SCE_H_TAGUNKNOWN) {
1007 if (eClass == SCE_H_SGML_DEFAULT) {
1008 state = SCE_H_SGML_DEFAULT;
1009 } else {
1010 state = SCE_H_OTHER;
1011 }
1012 }
1013 }
1014 }
1015 break;
1016 case SCE_H_ATTRIBUTE:
1017 if (!ishtmlwordchar(ch) && ch != '/' && ch != '-') {
1018 if (inScriptType == eNonHtmlScript) {
1019 int scriptLanguagePrev = scriptLanguage;
1020 clientScript = segIsScriptingIndicator(styler, styler.GetStartSegment(), i - 1, scriptLanguage);
1021 scriptLanguage = clientScript;
1022 if ((scriptLanguagePrev != scriptLanguage) && (scriptLanguage == eScriptNone))
1023 inScriptType = eHtml;
1024 }
1025 classifyAttribHTML(styler.GetStartSegment(), i - 1, keywords, styler);
1026 if (ch == '>') {
1027 styler.ColourTo(i, SCE_H_TAG);
1028 if (inScriptType == eNonHtmlScript) {
1029 state = StateForScript(scriptLanguage);
1030 } else {
1031 state = SCE_H_DEFAULT;
1032 }
1033 tagOpened = false;
1034 if (!tagDontFold){
1035 if (tagClosing){
1036 levelCurrent--;
1037 } else {
1038 levelCurrent++;
1039 }
1040 }
1041 tagClosing = false;
1042 } else if (ch == '=') {
1043 styler.ColourTo(i, SCE_H_OTHER);
1044 state = SCE_H_VALUE;
1045 } else {
1046 state = SCE_H_OTHER;
1047 }
1048 }
1049 break;
1050 case SCE_H_OTHER:
1051 if (ch == '>') {
1052 styler.ColourTo(i - 1, StateToPrint);
1053 styler.ColourTo(i, SCE_H_TAG);
1054 if (inScriptType == eNonHtmlScript) {
1055 state = StateForScript(scriptLanguage);
1056 } else {
1057 state = SCE_H_DEFAULT;
1058 }
1059 tagOpened = false;
1060 if (!tagDontFold){
1061 if (tagClosing){
1062 levelCurrent--;
1063 } else {
1064 levelCurrent++;
1065 }
1066 }
1067 tagClosing = false;
1068 } else if (ch == '\"') {
1069 styler.ColourTo(i - 1, StateToPrint);
1070 state = SCE_H_DOUBLESTRING;
1071 } else if (ch == '\'') {
1072 styler.ColourTo(i - 1, StateToPrint);
1073 state = SCE_H_SINGLESTRING;
1074 } else if (ch == '=') {
1075 styler.ColourTo(i, StateToPrint);
1076 state = SCE_H_VALUE;
1077 } else if (ch == '/' && chNext == '>') {
1078 styler.ColourTo(i - 1, StateToPrint);
1079 styler.ColourTo(i + 1, SCE_H_TAGEND);
1080 i++;
1081 ch = chNext;
1082 state = SCE_H_DEFAULT;
1083 tagOpened = false;
1084 } else if (ch == '?' && chNext == '>') {
1085 styler.ColourTo(i - 1, StateToPrint);
1086 styler.ColourTo(i + 1, SCE_H_XMLEND);
1087 i++;
1088 ch = chNext;
1089 state = SCE_H_DEFAULT;
1090 } else if (ishtmlwordchar(ch)) {
1091 styler.ColourTo(i - 1, StateToPrint);
1092 state = SCE_H_ATTRIBUTE;
1093 }
1094 break;
1095 case SCE_H_DOUBLESTRING:
1096 if (ch == '\"') {
1097 if (inScriptType == eNonHtmlScript) {
1098 scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1099 }
1100 styler.ColourTo(i, SCE_H_DOUBLESTRING);
1101 state = SCE_H_OTHER;
1102 }
1103 break;
1104 case SCE_H_SINGLESTRING:
1105 if (ch == '\'') {
1106 if (inScriptType == eNonHtmlScript) {
1107 scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1108 }
1109 styler.ColourTo(i, SCE_H_SINGLESTRING);
1110 state = SCE_H_OTHER;
1111 }
1112 break;
1113 case SCE_H_VALUE:
1114 if (!ishtmlwordchar(ch)) {
1115 if (ch == '\"' && chPrev == '=') {
1116 // Should really test for being first character
1117 state = SCE_H_DOUBLESTRING;
1118 } else if (ch == '\'' && chPrev == '=') {
1119 state = SCE_H_SINGLESTRING;
1120 } else {
1121 if (IsNumber(styler.GetStartSegment(), styler)) {
1122 styler.ColourTo(i - 1, SCE_H_NUMBER);
1123 } else {
1124 styler.ColourTo(i - 1, StateToPrint);
1125 }
1126 if (ch == '>') {
1127 styler.ColourTo(i, SCE_H_TAG);
1128 if (inScriptType == eNonHtmlScript) {
1129 state = StateForScript(scriptLanguage);
1130 } else {
1131 state = SCE_H_DEFAULT;
1132 }
1133 tagOpened = false;
1134 if (!tagDontFold){
1135 if (tagClosing){
1136 levelCurrent--;
1137 } else {
1138 levelCurrent++;
1139 }
1140 }
1141 tagClosing = false;
1142 } else {
1143 state = SCE_H_OTHER;
1144 }
1145 }
1146 }
1147 break;
1148 case SCE_HJ_DEFAULT:
1149 case SCE_HJ_START:
1150 case SCE_HJ_SYMBOLS:
1151 if (iswordstart(ch)) {
1152 styler.ColourTo(i - 1, StateToPrint);
1153 state = SCE_HJ_WORD;
1154 } else if (ch == '/' && chNext == '*') {
1155 styler.ColourTo(i - 1, StateToPrint);
1156 if (chNext2 == '*')
1157 state = SCE_HJ_COMMENTDOC;
1158 else
1159 state = SCE_HJ_COMMENT;
1160 } else if (ch == '/' && chNext == '/') {
1161 styler.ColourTo(i - 1, StateToPrint);
1162 state = SCE_HJ_COMMENTLINE;
1163 } else if (ch == '/' && isOKBeforeRE(chPrevNonWhite)) {
1164 styler.ColourTo(i - 1, StateToPrint);
1165 state = SCE_HJ_REGEX;
1166 } else if (ch == '\"') {
1167 styler.ColourTo(i - 1, StateToPrint);
1168 state = SCE_HJ_DOUBLESTRING;
1169 } else if (ch == '\'') {
1170 styler.ColourTo(i - 1, StateToPrint);
1171 state = SCE_HJ_SINGLESTRING;
1172 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1173 styler.SafeGetCharAt(i + 3) == '-') {
1174 styler.ColourTo(i - 1, StateToPrint);
1175 state = SCE_HJ_COMMENTLINE;
1176 } else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1177 styler.ColourTo(i - 1, StateToPrint);
1178 state = SCE_HJ_COMMENTLINE;
1179 i += 2;
1180 } else if (isoperator(ch)) {
1181 styler.ColourTo(i - 1, StateToPrint);
1182 styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1183 state = SCE_HJ_DEFAULT;
1184 } else if ((ch == ' ') || (ch == '\t')) {
1185 if (state == SCE_HJ_START) {
1186 styler.ColourTo(i - 1, StateToPrint);
1187 state = SCE_HJ_DEFAULT;
1188 }
1189 }
1190 break;
1191 case SCE_HJ_WORD:
1192 if (!iswordchar(ch)) {
1193 classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
1194 //styler.ColourTo(i - 1, eHTJSKeyword);
1195 state = SCE_HJ_DEFAULT;
1196 if (ch == '/' && chNext == '*') {
1197 if (chNext2 == '*')
1198 state = SCE_HJ_COMMENTDOC;
1199 else
1200 state = SCE_HJ_COMMENT;
1201 } else if (ch == '/' && chNext == '/') {
1202 state = SCE_HJ_COMMENTLINE;
1203 } else if (ch == '\"') {
1204 state = SCE_HJ_DOUBLESTRING;
1205 } else if (ch == '\'') {
1206 state = SCE_HJ_SINGLESTRING;
1207 } else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1208 styler.ColourTo(i - 1, StateToPrint);
1209 state = SCE_HJ_COMMENTLINE;
1210 i += 2;
1211 } else if (isoperator(ch)) {
1212 styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1213 state = SCE_HJ_DEFAULT;
1214 }
1215 }
1216 break;
1217 case SCE_HJ_COMMENT:
1218 case SCE_HJ_COMMENTDOC:
1219 if (ch == '/' && chPrev == '*') {
1220 styler.ColourTo(i, StateToPrint);
1221 state = SCE_HJ_DEFAULT;
1222 }
1223 break;
1224 case SCE_HJ_COMMENTLINE:
1225 if (ch == '\r' || ch == '\n') {
1226 styler.ColourTo(i - 1, statePrintForState(SCE_HJ_COMMENTLINE, inScriptType));
1227 state = SCE_HJ_DEFAULT;
1228 }
1229 break;
1230 case SCE_HJ_DOUBLESTRING:
1231 if (ch == '\\') {
1232 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1233 i++;
1234 }
1235 } else if (ch == '\"') {
1236 styler.ColourTo(i, statePrintForState(SCE_HJ_DOUBLESTRING, inScriptType));
1237 state = SCE_HJ_DEFAULT;
1238 } else if ((inScriptType == eNonHtmlScript) && (ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1239 styler.ColourTo(i - 1, StateToPrint);
1240 state = SCE_HJ_COMMENTLINE;
1241 i += 2;
1242 } else if (isLineEnd(ch)) {
1243 styler.ColourTo(i - 1, StateToPrint);
1244 state = SCE_HJ_STRINGEOL;
1245 }
1246 break;
1247 case SCE_HJ_SINGLESTRING:
1248 if (ch == '\\') {
1249 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1250 i++;
1251 }
1252 } else if (ch == '\'') {
1253 styler.ColourTo(i, statePrintForState(SCE_HJ_SINGLESTRING, inScriptType));
1254 state = SCE_HJ_DEFAULT;
1255 } else if ((inScriptType == eNonHtmlScript) && (ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1256 styler.ColourTo(i - 1, StateToPrint);
1257 state = SCE_HJ_COMMENTLINE;
1258 i += 2;
1259 } else if (isLineEnd(ch)) {
1260 styler.ColourTo(i - 1, StateToPrint);
1261 state = SCE_HJ_STRINGEOL;
1262 }
1263 break;
1264 case SCE_HJ_STRINGEOL:
1265 if (!isLineEnd(ch)) {
1266 styler.ColourTo(i - 1, StateToPrint);
1267 state = SCE_HJ_DEFAULT;
1268 } else if (!isLineEnd(chNext)) {
1269 styler.ColourTo(i, StateToPrint);
1270 state = SCE_HJ_DEFAULT;
1271 }
1272 break;
1273 case SCE_HJ_REGEX:
1274 if (ch == '\r' || ch == '\n' || ch == '/') {
1275 styler.ColourTo(i, StateToPrint);
1276 state = SCE_HJ_DEFAULT;
1277 } else if (ch == '\\') {
1278 // Gobble up the quoted character
1279 if (chNext == '\\' || chNext == '/') {
1280 i++;
1281 ch = chNext;
1282 chNext = styler.SafeGetCharAt(i + 1);
1283 }
1284 }
1285 break;
1286 case SCE_HB_DEFAULT:
1287 case SCE_HB_START:
1288 if (iswordstart(ch)) {
1289 styler.ColourTo(i - 1, StateToPrint);
1290 state = SCE_HB_WORD;
1291 } else if (ch == '\'') {
1292 styler.ColourTo(i - 1, StateToPrint);
1293 state = SCE_HB_COMMENTLINE;
1294 } else if (ch == '\"') {
1295 styler.ColourTo(i - 1, StateToPrint);
1296 state = SCE_HB_STRING;
1297 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1298 styler.SafeGetCharAt(i + 3) == '-') {
1299 styler.ColourTo(i - 1, StateToPrint);
1300 state = SCE_HB_COMMENTLINE;
1301 } else if (isoperator(ch)) {
1302 styler.ColourTo(i - 1, StateToPrint);
1303 styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
1304 state = SCE_HB_DEFAULT;
1305 } else if ((ch == ' ') || (ch == '\t')) {
1306 if (state == SCE_HB_START) {
1307 styler.ColourTo(i - 1, StateToPrint);
1308 state = SCE_HB_DEFAULT;
1309 }
1310 }
1311 break;
1312 case SCE_HB_WORD:
1313 if (!iswordchar(ch)) {
1314 state = classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
1315 if (state == SCE_HB_DEFAULT) {
1316 if (ch == '\"') {
1317 state = SCE_HB_STRING;
1318 } else if (ch == '\'') {
1319 state = SCE_HB_COMMENTLINE;
1320 } else if (isoperator(ch)) {
1321 styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
1322 state = SCE_HB_DEFAULT;
1323 }
1324 }
1325 }
1326 break;
1327 case SCE_HB_STRING:
1328 if (ch == '\"') {
1329 styler.ColourTo(i, StateToPrint);
1330 state = SCE_HB_DEFAULT;
1331 } else if (ch == '\r' || ch == '\n') {
1332 styler.ColourTo(i - 1, StateToPrint);
1333 state = SCE_HB_STRINGEOL;
1334 }
1335 break;
1336 case SCE_HB_COMMENTLINE:
1337 if (ch == '\r' || ch == '\n') {
1338 styler.ColourTo(i - 1, StateToPrint);
1339 state = SCE_HB_DEFAULT;
1340 }
1341 break;
1342 case SCE_HB_STRINGEOL:
1343 if (!isLineEnd(ch)) {
1344 styler.ColourTo(i - 1, StateToPrint);
1345 state = SCE_HB_DEFAULT;
1346 } else if (!isLineEnd(chNext)) {
1347 styler.ColourTo(i, StateToPrint);
1348 state = SCE_HB_DEFAULT;
1349 }
1350 break;
1351 case SCE_HP_DEFAULT:
1352 case SCE_HP_START:
1353 if (iswordstart(ch)) {
1354 styler.ColourTo(i - 1, StateToPrint);
1355 state = SCE_HP_WORD;
1356 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1357 styler.SafeGetCharAt(i + 3) == '-') {
1358 styler.ColourTo(i - 1, StateToPrint);
1359 state = SCE_HP_COMMENTLINE;
1360 } else if (ch == '#') {
1361 styler.ColourTo(i - 1, StateToPrint);
1362 state = SCE_HP_COMMENTLINE;
1363 } else if (ch == '\"') {
1364 styler.ColourTo(i - 1, StateToPrint);
1365 if (chNext == '\"' && chNext2 == '\"') {
1366 i += 2;
1367 state = SCE_HP_TRIPLEDOUBLE;
1368 ch = ' ';
1369 chPrev = ' ';
1370 chNext = styler.SafeGetCharAt(i + 1);
1371 } else {
1372 // state = statePrintForState(SCE_HP_STRING,inScriptType);
1373 state = SCE_HP_STRING;
1374 }
1375 } else if (ch == '\'') {
1376 styler.ColourTo(i - 1, StateToPrint);
1377 if (chNext == '\'' && chNext2 == '\'') {
1378 i += 2;
1379 state = SCE_HP_TRIPLE;
1380 ch = ' ';
1381 chPrev = ' ';
1382 chNext = styler.SafeGetCharAt(i + 1);
1383 } else {
1384 state = SCE_HP_CHARACTER;
1385 }
1386 } else if (isoperator(ch)) {
1387 styler.ColourTo(i - 1, StateToPrint);
1388 styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
1389 } else if ((ch == ' ') || (ch == '\t')) {
1390 if (state == SCE_HP_START) {
1391 styler.ColourTo(i - 1, StateToPrint);
1392 state = SCE_HP_DEFAULT;
1393 }
1394 }
1395 break;
1396 case SCE_HP_WORD:
1397 if (!iswordchar(ch)) {
1398 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
1399 state = SCE_HP_DEFAULT;
1400 if (ch == '#') {
1401 state = SCE_HP_COMMENTLINE;
1402 } else if (ch == '\"') {
1403 if (chNext == '\"' && chNext2 == '\"') {
1404 i += 2;
1405 state = SCE_HP_TRIPLEDOUBLE;
1406 ch = ' ';
1407 chPrev = ' ';
1408 chNext = styler.SafeGetCharAt(i + 1);
1409 } else {
1410 state = SCE_HP_STRING;
1411 }
1412 } else if (ch == '\'') {
1413 if (chNext == '\'' && chNext2 == '\'') {
1414 i += 2;
1415 state = SCE_HP_TRIPLE;
1416 ch = ' ';
1417 chPrev = ' ';
1418 chNext = styler.SafeGetCharAt(i + 1);
1419 } else {
1420 state = SCE_HP_CHARACTER;
1421 }
1422 } else if (isoperator(ch)) {
1423 styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
1424 }
1425 }
1426 break;
1427 case SCE_HP_COMMENTLINE:
1428 if (ch == '\r' || ch == '\n') {
1429 styler.ColourTo(i - 1, StateToPrint);
1430 state = SCE_HP_DEFAULT;
1431 }
1432 break;
1433 case SCE_HP_STRING:
1434 if (ch == '\\') {
1435 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1436 i++;
1437 ch = chNext;
1438 chNext = styler.SafeGetCharAt(i + 1);
1439 }
1440 } else if (ch == '\"') {
1441 styler.ColourTo(i, StateToPrint);
1442 state = SCE_HP_DEFAULT;
1443 }
1444 break;
1445 case SCE_HP_CHARACTER:
1446 if (ch == '\\') {
1447 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1448 i++;
1449 ch = chNext;
1450 chNext = styler.SafeGetCharAt(i + 1);
1451 }
1452 } else if (ch == '\'') {
1453 styler.ColourTo(i, StateToPrint);
1454 state = SCE_HP_DEFAULT;
1455 }
1456 break;
1457 case SCE_HP_TRIPLE:
1458 if (ch == '\'' && chPrev == '\'' && chPrev2 == '\'') {
1459 styler.ColourTo(i, StateToPrint);
1460 state = SCE_HP_DEFAULT;
1461 }
1462 break;
1463 case SCE_HP_TRIPLEDOUBLE:
1464 if (ch == '\"' && chPrev == '\"' && chPrev2 == '\"') {
1465 styler.ColourTo(i, StateToPrint);
1466 state = SCE_HP_DEFAULT;
1467 }
1468 break;
1469 ///////////// start - PHP state handling
1470 case SCE_HPHP_WORD:
1471 if (!iswordchar(ch)) {
1472 classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
1473 if (ch == '/' && chNext == '*') {
1474 i++;
1475 state = SCE_HPHP_COMMENT;
1476 } else if (ch == '/' && chNext == '/') {
1477 i++;
1478 state = SCE_HPHP_COMMENTLINE;
1479 } else if (ch == '#') {
1480 state = SCE_HPHP_COMMENTLINE;
1481 } else if (ch == '\"') {
1482 state = SCE_HPHP_HSTRING;
1483 strcpy(phpStringDelimiter, "\"");
1484 } else if (styler.Match(i, "<<<")) {
1485 state = SCE_HPHP_HSTRING;
1486 i = FindPhpStringDelimiter(phpStringDelimiter, sizeof(phpStringDelimiter), i + 3, lengthDoc, styler);
1487 } else if (ch == '\'') {
1488 state = SCE_HPHP_SIMPLESTRING;
1489 } else if (ch == '$' && IsPhpWordStart(chNext)) {
1490 state = SCE_HPHP_VARIABLE;
1491 } else if (isoperator(ch)) {
1492 state = SCE_HPHP_OPERATOR;
1493 } else {
1494 state = SCE_HPHP_DEFAULT;
1495 }
1496 }
1497 break;
1498 case SCE_HPHP_NUMBER:
1499 if (!IsADigit(ch) && ch != '.' && ch != 'e' && ch != 'E' && (ch != '-' || (chPrev != 'e' && chPrev != 'E'))) {
1500 styler.ColourTo(i - 1, SCE_HPHP_NUMBER);
1501 if (isoperator(ch))
1502 state = SCE_HPHP_OPERATOR;
1503 else
1504 state = SCE_HPHP_DEFAULT;
1505 }
1506 break;
1507 case SCE_HPHP_VARIABLE:
1508 if (!IsPhpWordChar(ch)) {
1509 styler.ColourTo(i - 1, SCE_HPHP_VARIABLE);
1510 if (isoperator(ch))
1511 state = SCE_HPHP_OPERATOR;
1512 else
1513 state = SCE_HPHP_DEFAULT;
1514 }
1515 break;
1516 case SCE_HPHP_COMMENT:
1517 if (ch == '/' && chPrev == '*') {
1518 styler.ColourTo(i, StateToPrint);
1519 state = SCE_HPHP_DEFAULT;
1520 }
1521 break;
1522 case SCE_HPHP_COMMENTLINE:
1523 if (ch == '\r' || ch == '\n') {
1524 styler.ColourTo(i - 1, StateToPrint);
1525 state = SCE_HPHP_DEFAULT;
1526 }
1527 break;
1528 case SCE_HPHP_HSTRING:
1529 if (ch == '\\' && (phpStringDelimiter[0] == '\"' || chNext == '$' || chNext == '{')) {
1530 // skip the next char
1531 i++;
1532 } else if (((ch == '{' && chNext == '$') || (ch == '$' && chNext == '{'))
1533 && IsPhpWordStart(chNext2)) {
1534 styler.ColourTo(i - 1, StateToPrint);
1535 state = SCE_HPHP_COMPLEX_VARIABLE;
1536 } else if (ch == '$' && IsPhpWordStart(chNext)) {
1537 styler.ColourTo(i - 1, StateToPrint);
1538 state = SCE_HPHP_HSTRING_VARIABLE;
1539 } else if (styler.Match(i, phpStringDelimiter)) {
1540 if (strlen(phpStringDelimiter) > 1)
1541 i += strlen(phpStringDelimiter) - 1;
1542 styler.ColourTo(i, StateToPrint);
1543 state = SCE_HPHP_DEFAULT;
1544 }
1545 break;
1546 case SCE_HPHP_SIMPLESTRING:
1547 if (ch == '\\') {
1548 // skip the next char
1549 i++;
1550 } else if (ch == '\'') {
1551 styler.ColourTo(i, StateToPrint);
1552 state = SCE_HPHP_DEFAULT;
1553 }
1554 break;
1555 case SCE_HPHP_HSTRING_VARIABLE:
1556 if (!IsPhpWordChar(ch)) {
1557 styler.ColourTo(i - 1, StateToPrint);
1558 i--; // strange but it works
1559 state = SCE_HPHP_HSTRING;
1560 }
1561 break;
1562 case SCE_HPHP_COMPLEX_VARIABLE:
1563 if (ch == '}') {
1564 styler.ColourTo(i, StateToPrint);
1565 state = SCE_HPHP_HSTRING;
1566 }
1567 break;
1568 case SCE_HPHP_OPERATOR:
1569 case SCE_HPHP_DEFAULT:
1570 styler.ColourTo(i - 1, StateToPrint);
1571 if (IsADigit(ch) || (ch == '.' && IsADigit(chNext))) {
1572 state = SCE_HPHP_NUMBER;
1573 } else if (iswordstart(ch)) {
1574 state = SCE_HPHP_WORD;
1575 } else if (ch == '/' && chNext == '*') {
1576 i++;
1577 state = SCE_HPHP_COMMENT;
1578 } else if (ch == '/' && chNext == '/') {
1579 i++;
1580 state = SCE_HPHP_COMMENTLINE;
1581 } else if (ch == '#') {
1582 state = SCE_HPHP_COMMENTLINE;
1583 } else if (ch == '\"') {
1584 state = SCE_HPHP_HSTRING;
1585 strcpy(phpStringDelimiter, "\"");
1586 } else if (styler.Match(i, "<<<")) {
1587 state = SCE_HPHP_HSTRING;
1588 i = FindPhpStringDelimiter(phpStringDelimiter, sizeof(phpStringDelimiter), i + 3, lengthDoc, styler);
1589 } else if (ch == '\'') {
1590 state = SCE_HPHP_SIMPLESTRING;
1591 } else if (ch == '$' && IsPhpWordStart(chNext)) {
1592 state = SCE_HPHP_VARIABLE;
1593 } else if (isoperator(ch)) {
1594 state = SCE_HPHP_OPERATOR;
1595 } else if ((state == SCE_HPHP_OPERATOR) && (isspacechar(ch))) {
1596 state = SCE_HPHP_DEFAULT;
1597 }
1598 break;
1599 ///////////// end - PHP state handling
1600 }
1601
1602 // Some of the above terminated their lexeme but since the same character starts
1603 // the same class again, only reenter if non empty segment.
1604
1605 bool nonEmptySegment = i >= static_cast<int>(styler.GetStartSegment());
1606 if (state == SCE_HB_DEFAULT) { // One of the above succeeded
1607 if ((ch == '\"') && (nonEmptySegment)) {
1608 state = SCE_HB_STRING;
1609 } else if (ch == '\'') {
1610 state = SCE_HB_COMMENTLINE;
1611 } else if (iswordstart(ch)) {
1612 state = SCE_HB_WORD;
1613 } else if (isoperator(ch)) {
1614 styler.ColourTo(i, SCE_HB_DEFAULT);
1615 }
1616 } else if (state == SCE_HBA_DEFAULT) { // One of the above succeeded
1617 if ((ch == '\"') && (nonEmptySegment)) {
1618 state = SCE_HBA_STRING;
1619 } else if (ch == '\'') {
1620 state = SCE_HBA_COMMENTLINE;
1621 } else if (iswordstart(ch)) {
1622 state = SCE_HBA_WORD;
1623 } else if (isoperator(ch)) {
1624 styler.ColourTo(i, SCE_HBA_DEFAULT);
1625 }
1626 } else if (state == SCE_HJ_DEFAULT) { // One of the above succeeded
1627 if (ch == '/' && chNext == '*') {
1628 if (styler.SafeGetCharAt(i + 2) == '*')
1629 state = SCE_HJ_COMMENTDOC;
1630 else
1631 state = SCE_HJ_COMMENT;
1632 } else if (ch == '/' && chNext == '/') {
1633 state = SCE_HJ_COMMENTLINE;
1634 } else if ((ch == '\"') && (nonEmptySegment)) {
1635 state = SCE_HJ_DOUBLESTRING;
1636 } else if ((ch == '\'') && (nonEmptySegment)) {
1637 state = SCE_HJ_SINGLESTRING;
1638 } else if (iswordstart(ch)) {
1639 state = SCE_HJ_WORD;
1640 } else if (isoperator(ch)) {
1641 styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1642 }
1643 }
1644 }
1645
1646 StateToPrint = statePrintForState(state, inScriptType);
1647 styler.ColourTo(lengthDoc - 1, StateToPrint);
1648
1649 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
1650 if (fold) {
1651 int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
1652 styler.SetLevel(lineCurrent, levelPrev | flagsNext);
1653 }
1654 }
1655
1656 static bool isASPScript(int state) {
1657 return
1658 (state >= SCE_HJA_START && state <= SCE_HJA_REGEX) ||
1659 (state >= SCE_HBA_START && state <= SCE_HBA_STRINGEOL) ||
1660 (state >= SCE_HPA_DEFAULT && state <= SCE_HPA_IDENTIFIER);
1661 }
1662
1663 static void ColouriseHBAPiece(StyleContext &sc, WordList *keywordlists[]) {
1664 WordList &keywordsVBS = *keywordlists[2];
1665 if (sc.state == SCE_HBA_WORD) {
1666 if (!IsAWordChar(sc.ch)) {
1667 char s[100];
1668 sc.GetCurrentLowered(s, sizeof(s));
1669 if (keywordsVBS.InList(s)) {
1670 if (strcmp(s, "rem") == 0) {
1671 sc.ChangeState(SCE_HBA_COMMENTLINE);
1672 if (sc.atLineEnd) {
1673 sc.SetState(SCE_HBA_DEFAULT);
1674 }
1675 } else {
1676 sc.SetState(SCE_HBA_DEFAULT);
1677 }
1678 } else {
1679 sc.ChangeState(SCE_HBA_IDENTIFIER);
1680 sc.SetState(SCE_HBA_DEFAULT);
1681 }
1682 }
1683 } else if (sc.state == SCE_HBA_NUMBER) {
1684 if (!IsAWordChar(sc.ch)) {
1685 sc.SetState(SCE_HBA_DEFAULT);
1686 }
1687 } else if (sc.state == SCE_HBA_STRING) {
1688 if (sc.ch == '\"') {
1689 sc.ForwardSetState(SCE_HBA_DEFAULT);
1690 } else if (sc.ch == '\r' || sc.ch == '\n') {
1691 sc.ChangeState(SCE_HBA_STRINGEOL);
1692 sc.ForwardSetState(SCE_HBA_DEFAULT);
1693 }
1694 } else if (sc.state == SCE_HBA_COMMENTLINE) {
1695 if (sc.ch == '\r' || sc.ch == '\n') {
1696 sc.SetState(SCE_HBA_DEFAULT);
1697 }
1698 }
1699
1700 if (sc.state == SCE_HBA_DEFAULT) {
1701 if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
1702 sc.SetState(SCE_HBA_NUMBER);
1703 } else if (IsAWordStart(sc.ch)) {
1704 sc.SetState(SCE_HBA_WORD);
1705 } else if (sc.ch == '\'') {
1706 sc.SetState(SCE_HBA_COMMENTLINE);
1707 } else if (sc.ch == '\"') {
1708 sc.SetState(SCE_HBA_STRING);
1709 }
1710 }
1711 }
1712
1713 static void ColouriseHTMLPiece(StyleContext &sc, WordList *keywordlists[]) {
1714 WordList &keywordsTags = *keywordlists[0];
1715 if (sc.state == SCE_H_COMMENT) {
1716 if (sc.Match("-->")) {
1717 sc.Forward();
1718 sc.Forward();
1719 sc.ForwardSetState(SCE_H_DEFAULT);
1720 }
1721 } else if (sc.state == SCE_H_ENTITY) {
1722 if (sc.ch == ';') {
1723 sc.ForwardSetState(SCE_H_DEFAULT);
1724 } else if (sc.ch != '#' && (sc.ch < 0x80) && !isalnum(sc.ch) // Should check that '#' follows '&', but it is unlikely anyway...
1725 && sc.ch != '.' && sc.ch != '-' && sc.ch != '_' && sc.ch != ':') { // valid in XML
1726 sc.ChangeState(SCE_H_TAGUNKNOWN);
1727 sc.SetState(SCE_H_DEFAULT);
1728 }
1729 } else if (sc.state == SCE_H_TAGUNKNOWN) {
1730 if (!ishtmlwordchar(static_cast<char>(sc.ch)) && !((sc.ch == '/') && (sc.chPrev == '<')) && sc.ch != '[') {
1731 char s[100];
1732 sc.GetCurrentLowered(s, sizeof(s));
1733 if (s[1] == '/') {
1734 if (keywordsTags.InList(s + 2)) {
1735 sc.ChangeState(SCE_H_TAG);
1736 }
1737 } else {
1738 if (keywordsTags.InList(s + 1)) {
1739 sc.ChangeState(SCE_H_TAG);
1740 }
1741 }
1742 if (sc.ch == '>') {
1743 sc.ForwardSetState(SCE_H_DEFAULT);
1744 } else if (sc.Match('/', '>')) {
1745 sc.SetState(SCE_H_TAGEND);
1746 sc.Forward();
1747 sc.ForwardSetState(SCE_H_DEFAULT);
1748 } else {
1749 sc.SetState(SCE_H_OTHER);
1750 }
1751 }
1752 } else if (sc.state == SCE_H_ATTRIBUTE) {
1753 if (!ishtmlwordchar(static_cast<char>(sc.ch))) {
1754 char s[100];
1755 sc.GetCurrentLowered(s, sizeof(s));
1756 if (!keywordsTags.InList(s)) {
1757 sc.ChangeState(SCE_H_ATTRIBUTEUNKNOWN);
1758 }
1759 sc.SetState(SCE_H_OTHER);
1760 }
1761 } else if (sc.state == SCE_H_OTHER) {
1762 if (sc.ch == '>') {
1763 sc.SetState(SCE_H_TAG);
1764 sc.ForwardSetState(SCE_H_DEFAULT);
1765 } else if (sc.Match('/', '>')) {
1766 sc.SetState(SCE_H_TAG);
1767 sc.Forward();
1768 sc.ForwardSetState(SCE_H_DEFAULT);
1769 } else if (sc.chPrev == '=') {
1770 sc.SetState(SCE_H_VALUE);
1771 }
1772 } else if (sc.state == SCE_H_DOUBLESTRING) {
1773 if (sc.ch == '\"') {
1774 sc.ForwardSetState(SCE_H_OTHER);
1775 }
1776 } else if (sc.state == SCE_H_SINGLESTRING) {
1777 if (sc.ch == '\'') {
1778 sc.ForwardSetState(SCE_H_OTHER);
1779 }
1780 } else if (sc.state == SCE_H_NUMBER) {
1781 if (!IsADigit(sc.ch)) {
1782 sc.SetState(SCE_H_OTHER);
1783 }
1784 }
1785
1786 if (sc.state == SCE_H_DEFAULT) {
1787 if (sc.ch == '<') {
1788 if (sc.Match("<!--"))
1789 sc.SetState(SCE_H_COMMENT);
1790 else
1791 sc.SetState(SCE_H_TAGUNKNOWN);
1792 } else if (sc.ch == '&') {
1793 sc.SetState(SCE_H_ENTITY);
1794 }
1795 } else if ((sc.state == SCE_H_OTHER) || (sc.state == SCE_H_VALUE)) {
1796 if (sc.ch == '\"' && sc.chPrev == '=') {
1797 sc.SetState(SCE_H_DOUBLESTRING);
1798 } else if (sc.ch == '\'' && sc.chPrev == '=') {
1799 sc.SetState(SCE_H_SINGLESTRING);
1800 } else if (IsADigit(sc.ch)) {
1801 sc.SetState(SCE_H_NUMBER);
1802 } else if (sc.ch == '>') {
1803 sc.SetState(SCE_H_TAG);
1804 sc.ForwardSetState(SCE_H_DEFAULT);
1805 } else if (ishtmlwordchar(static_cast<char>(sc.ch))) {
1806 sc.SetState(SCE_H_ATTRIBUTE);
1807 }
1808 }
1809 }
1810
1811 static void ColouriseASPPiece(StyleContext &sc, WordList *keywordlists[]) {
1812 // Possibly exit current state to either SCE_H_DEFAULT or SCE_HBA_DEFAULT
1813 if ((sc.state == SCE_H_ASPAT || isASPScript(sc.state)) && sc.Match('%', '>')) {
1814 sc.SetState(SCE_H_ASP);
1815 sc.Forward();
1816 sc.ForwardSetState(SCE_H_DEFAULT);
1817 }
1818
1819 // Handle some ASP script
1820 if (sc.state >= SCE_HBA_START && sc.state <= SCE_HBA_STRINGEOL) {
1821 ColouriseHBAPiece(sc, keywordlists);
1822 } else if (sc.state >= SCE_H_DEFAULT && sc.state <= SCE_H_SGML_BLOCK_DEFAULT) {
1823 ColouriseHTMLPiece(sc, keywordlists);
1824 }
1825
1826 // Enter new sc.state
1827 if ((sc.state == SCE_H_DEFAULT) || (sc.state == SCE_H_TAGUNKNOWN)) {
1828 if (sc.Match('<', '%')) {
1829 if (sc.state == SCE_H_TAGUNKNOWN)
1830 sc.ChangeState(SCE_H_ASP);
1831 else
1832 sc.SetState(SCE_H_ASP);
1833 sc.Forward();
1834 sc.Forward();
1835 if (sc.ch == '@') {
1836 sc.ForwardSetState(SCE_H_ASPAT);
1837 } else {
1838 if (sc.ch == '=') {
1839 sc.Forward();
1840 }
1841 sc.SetState(SCE_HBA_DEFAULT);
1842 }
1843 }
1844 }
1845 }
1846
1847 static void ColouriseASPDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
1848 Accessor &styler) {
1849 // Lexer for HTML requires more lexical states (7 bits worth) than most lexers
1850 StyleContext sc(startPos, length, initStyle, styler, 0x7f);
1851 for (; sc.More(); sc.Forward()) {
1852 ColouriseASPPiece(sc, keywordlists);
1853 }
1854 sc.Complete();
1855 }
1856
1857 static void ColourisePHPPiece(StyleContext &sc, WordList *keywordlists[]) {
1858 // Possibly exit current state to either SCE_H_DEFAULT or SCE_HBA_DEFAULT
1859 if (sc.state >= SCE_HPHP_DEFAULT && sc.state <= SCE_HPHP_OPERATOR) {
1860 if (!isPHPStringState(sc.state) &&
1861 (sc.state != SCE_HPHP_COMMENT) &&
1862 (sc.Match('?', '>'))) {
1863 sc.SetState(SCE_H_QUESTION);
1864 sc.Forward();
1865 sc.ForwardSetState(SCE_H_DEFAULT);
1866 }
1867 }
1868
1869 if (sc.state >= SCE_H_DEFAULT && sc.state <= SCE_H_SGML_BLOCK_DEFAULT) {
1870 ColouriseHTMLPiece(sc, keywordlists);
1871 }
1872
1873 // Handle some PHP script
1874 if (sc.state == SCE_HPHP_WORD) {
1875 if (!IsPhpWordChar(static_cast<char>(sc.ch))) {
1876 sc.SetState(SCE_HPHP_DEFAULT);
1877 }
1878 } else if (sc.state == SCE_HPHP_COMMENTLINE) {
1879 if (sc.ch == '\r' || sc.ch == '\n') {
1880 sc.SetState(SCE_HPHP_DEFAULT);
1881 }
1882 } else if (sc.state == SCE_HPHP_COMMENT) {
1883 if (sc.Match('*', '/')) {
1884 sc.Forward();
1885 sc.Forward();
1886 sc.SetState(SCE_HPHP_DEFAULT);
1887 }
1888 } else if (sc.state == SCE_HPHP_HSTRING) {
1889 if (sc.ch == '\"') {
1890 sc.ForwardSetState(SCE_HPHP_DEFAULT);
1891 }
1892 } else if (sc.state == SCE_HPHP_SIMPLESTRING) {
1893 if (sc.ch == '\'') {
1894 sc.ForwardSetState(SCE_HPHP_DEFAULT);
1895 }
1896 } else if (sc.state == SCE_HPHP_VARIABLE) {
1897 if (!IsPhpWordChar(static_cast<char>(sc.ch))) {
1898 sc.SetState(SCE_HPHP_DEFAULT);
1899 }
1900 } else if (sc.state == SCE_HPHP_OPERATOR) {
1901 sc.SetState(SCE_HPHP_DEFAULT);
1902 }
1903
1904 // Enter new sc.state
1905 if ((sc.state == SCE_H_DEFAULT) || (sc.state == SCE_H_TAGUNKNOWN)) {
1906 if (sc.Match("<?php")) {
1907 sc.SetState(SCE_H_QUESTION);
1908 sc.Forward();
1909 sc.Forward();
1910 sc.Forward();
1911 sc.Forward();
1912 sc.Forward();
1913 sc.SetState(SCE_HPHP_DEFAULT);
1914 }
1915 }
1916 if (sc.state == SCE_HPHP_DEFAULT) {
1917 if (IsPhpWordStart(static_cast<char>(sc.ch))) {
1918 sc.SetState(SCE_HPHP_WORD);
1919 } else if (sc.ch == '#') {
1920 sc.SetState(SCE_HPHP_COMMENTLINE);
1921 } else if (sc.Match("<!--")) {
1922 sc.SetState(SCE_HPHP_COMMENTLINE);
1923 } else if (sc.Match('/', '/')) {
1924 sc.SetState(SCE_HPHP_COMMENTLINE);
1925 } else if (sc.Match('/', '*')) {
1926 sc.SetState(SCE_HPHP_COMMENT);
1927 } else if (sc.ch == '\"') {
1928 sc.SetState(SCE_HPHP_HSTRING);
1929 } else if (sc.ch == '\'') {
1930 sc.SetState(SCE_HPHP_SIMPLESTRING);
1931 } else if (sc.ch == '$' && IsPhpWordStart(static_cast<char>(sc.chNext))) {
1932 sc.SetState(SCE_HPHP_VARIABLE);
1933 } else if (isoperator(static_cast<char>(sc.ch))) {
1934 sc.SetState(SCE_HPHP_OPERATOR);
1935 }
1936 }
1937 }
1938
1939 static void ColourisePHPDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
1940 Accessor &styler) {
1941 // Lexer for HTML requires more lexical states (7 bits worth) than most lexers
1942 StyleContext sc(startPos, length, initStyle, styler, 0x7f);
1943 for (; sc.More(); sc.Forward()) {
1944 ColourisePHPPiece(sc, keywordlists);
1945 }
1946 sc.Complete();
1947 }
1948
1949 static const char * const htmlWordListDesc[] = {
1950 "HTML elements and attributes",
1951 "JavaScript keywords",
1952 "VBScript keywords",
1953 "Python keywords",
1954 "PHP keywords",
1955 "SGML and DTD keywords",
1956 0,
1957 };
1958
1959 LexerModule lmHTML(SCLEX_HTML, ColouriseHyperTextDoc, "hypertext", 0, htmlWordListDesc);
1960 LexerModule lmXML(SCLEX_XML, ColouriseHyperTextDoc, "xml", 0, htmlWordListDesc);
1961 LexerModule lmASP(SCLEX_ASP, ColouriseASPDoc, "asp", 0, htmlWordListDesc);
1962 LexerModule lmPHP(SCLEX_PHP, ColourisePHPDoc, "php", 0, htmlWordListDesc);