]> git.saurik.com Git - wxWidgets.git/blob - src/stc/scintilla/src/LexHTML.cxx
9b8d5dc92339065959e78d6b0caf076e136b72de
[wxWidgets.git] / src / stc / scintilla / src / LexHTML.cxx
1 // Scintilla source code edit control
2 /** @file LexHTML.cxx
3 ** Lexer for HTML.
4 **/
5 // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
7
8 #include <stdlib.h>
9 #include <string.h>
10 #include <ctype.h>
11 #include <stdio.h>
12 #include <stdarg.h>
13
14 #include "Platform.h"
15
16 #include "PropSet.h"
17 #include "Accessor.h"
18 #include "StyleContext.h"
19 #include "KeyWords.h"
20 #include "Scintilla.h"
21 #include "SciLexer.h"
22 #include "CharacterSet.h"
23
24 #ifdef SCI_NAMESPACE
25 using namespace Scintilla;
26 #endif
27
28 #define SCE_HA_JS (SCE_HJA_START - SCE_HJ_START)
29 #define SCE_HA_VBS (SCE_HBA_START - SCE_HB_START)
30 #define SCE_HA_PYTHON (SCE_HPA_START - SCE_HP_START)
31
32 enum script_type { eScriptNone = 0, eScriptJS, eScriptVBS, eScriptPython, eScriptPHP, eScriptXML, eScriptSGML, eScriptSGMLblock };
33 enum script_mode { eHtml = 0, eNonHtmlScript, eNonHtmlPreProc, eNonHtmlScriptPreProc };
34
35 static inline bool IsAWordChar(const int ch) {
36 return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_');
37 }
38
39 static inline bool IsAWordStart(const int ch) {
40 return (ch < 0x80) && (isalnum(ch) || ch == '_');
41 }
42
43 inline bool IsOperator(int ch) {
44 if (isascii(ch) && isalnum(ch))
45 return false;
46 // '.' left out as it is used to make up numbers
47 if (ch == '%' || ch == '^' || ch == '&' || ch == '*' ||
48 ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
49 ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
50 ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
51 ch == '<' || ch == '>' || ch == ',' || ch == '/' ||
52 ch == '?' || ch == '!' || ch == '.' || ch == '~')
53 return true;
54 return false;
55 }
56
57 static inline int MakeLowerCase(int ch) {
58 if (ch < 'A' || ch > 'Z')
59 return ch;
60 else
61 return ch - 'A' + 'a';
62 }
63
64 static void GetTextSegment(Accessor &styler, unsigned int start, unsigned int end, char *s, size_t len) {
65 size_t i = 0;
66 for (; (i < end - start + 1) && (i < len-1); i++) {
67 s[i] = static_cast<char>(MakeLowerCase(styler[start + i]));
68 }
69 s[i] = '\0';
70 }
71
72 static script_type segIsScriptingIndicator(Accessor &styler, unsigned int start, unsigned int end, script_type prevValue) {
73 char s[100];
74 GetTextSegment(styler, start, end, s, sizeof(s));
75 //Platform::DebugPrintf("Scripting indicator [%s]\n", s);
76 if (strstr(s, "src")) // External script
77 return eScriptNone;
78 if (strstr(s, "vbs"))
79 return eScriptVBS;
80 if (strstr(s, "pyth"))
81 return eScriptPython;
82 if (strstr(s, "javas"))
83 return eScriptJS;
84 if (strstr(s, "jscr"))
85 return eScriptJS;
86 if (strstr(s, "php"))
87 return eScriptPHP;
88 if (strstr(s, "xml")) {
89 const char *xml = strstr(s, "xml");
90 for (const char *t=s; t<xml; t++) {
91 if (!IsASpace(*t)) {
92 return prevValue;
93 }
94 }
95 return eScriptXML;
96 }
97
98 return prevValue;
99 }
100
101 static int PrintScriptingIndicatorOffset(Accessor &styler, unsigned int start, unsigned int end) {
102 int iResult = 0;
103 char s[100];
104 GetTextSegment(styler, start, end, s, sizeof(s));
105 if (0 == strncmp(s, "php", 3)) {
106 iResult = 3;
107 }
108
109 return iResult;
110 }
111
112 static script_type ScriptOfState(int state) {
113 if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
114 return eScriptPython;
115 } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
116 return eScriptVBS;
117 } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
118 return eScriptJS;
119 } else if ((state >= SCE_HPHP_DEFAULT) && (state <= SCE_HPHP_COMMENTLINE)) {
120 return eScriptPHP;
121 } else if ((state >= SCE_H_SGML_DEFAULT) && (state < SCE_H_SGML_BLOCK_DEFAULT)) {
122 return eScriptSGML;
123 } else if (state == SCE_H_SGML_BLOCK_DEFAULT) {
124 return eScriptSGMLblock;
125 } else {
126 return eScriptNone;
127 }
128 }
129
130 static int statePrintForState(int state, script_mode inScriptType) {
131 int StateToPrint = state;
132
133 if (state >= SCE_HJ_START) {
134 if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
135 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_PYTHON);
136 } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
137 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_VBS);
138 } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
139 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_JS);
140 }
141 }
142
143 return StateToPrint;
144 }
145
146 static int stateForPrintState(int StateToPrint) {
147 int state;
148
149 if ((StateToPrint >= SCE_HPA_START) && (StateToPrint <= SCE_HPA_IDENTIFIER)) {
150 state = StateToPrint - SCE_HA_PYTHON;
151 } else if ((StateToPrint >= SCE_HBA_START) && (StateToPrint <= SCE_HBA_STRINGEOL)) {
152 state = StateToPrint - SCE_HA_VBS;
153 } else if ((StateToPrint >= SCE_HJA_START) && (StateToPrint <= SCE_HJA_REGEX)) {
154 state = StateToPrint - SCE_HA_JS;
155 } else {
156 state = StateToPrint;
157 }
158
159 return state;
160 }
161
162 static inline bool IsNumber(unsigned int start, Accessor &styler) {
163 return IsADigit(styler[start]) || (styler[start] == '.') ||
164 (styler[start] == '-') || (styler[start] == '#');
165 }
166
167 static inline bool isStringState(int state) {
168 bool bResult;
169
170 switch (state) {
171 case SCE_HJ_DOUBLESTRING:
172 case SCE_HJ_SINGLESTRING:
173 case SCE_HJA_DOUBLESTRING:
174 case SCE_HJA_SINGLESTRING:
175 case SCE_HB_STRING:
176 case SCE_HBA_STRING:
177 case SCE_HP_STRING:
178 case SCE_HP_CHARACTER:
179 case SCE_HP_TRIPLE:
180 case SCE_HP_TRIPLEDOUBLE:
181 case SCE_HPA_STRING:
182 case SCE_HPA_CHARACTER:
183 case SCE_HPA_TRIPLE:
184 case SCE_HPA_TRIPLEDOUBLE:
185 case SCE_HPHP_HSTRING:
186 case SCE_HPHP_SIMPLESTRING:
187 case SCE_HPHP_HSTRING_VARIABLE:
188 case SCE_HPHP_COMPLEX_VARIABLE:
189 bResult = true;
190 break;
191 default :
192 bResult = false;
193 break;
194 }
195 return bResult;
196 }
197
198 static inline bool stateAllowsTermination(int state) {
199 bool allowTermination = !isStringState(state);
200 if (allowTermination) {
201 switch (state) {
202 case SCE_HB_COMMENTLINE:
203 case SCE_HPHP_COMMENT:
204 case SCE_HP_COMMENTLINE:
205 case SCE_HPA_COMMENTLINE:
206 allowTermination = false;
207 }
208 }
209 return allowTermination;
210 }
211
212 // not really well done, since it's only comments that should lex the %> and <%
213 static inline bool isCommentASPState(int state) {
214 bool bResult;
215
216 switch (state) {
217 case SCE_HJ_COMMENT:
218 case SCE_HJ_COMMENTLINE:
219 case SCE_HJ_COMMENTDOC:
220 case SCE_HB_COMMENTLINE:
221 case SCE_HP_COMMENTLINE:
222 case SCE_HPHP_COMMENT:
223 case SCE_HPHP_COMMENTLINE:
224 bResult = true;
225 break;
226 default :
227 bResult = false;
228 break;
229 }
230 return bResult;
231 }
232
233 static void classifyAttribHTML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
234 bool wordIsNumber = IsNumber(start, styler);
235 char chAttr = SCE_H_ATTRIBUTEUNKNOWN;
236 if (wordIsNumber) {
237 chAttr = SCE_H_NUMBER;
238 } else {
239 char s[100];
240 GetTextSegment(styler, start, end, s, sizeof(s));
241 if (keywords.InList(s))
242 chAttr = SCE_H_ATTRIBUTE;
243 }
244 if ((chAttr == SCE_H_ATTRIBUTEUNKNOWN) && !keywords)
245 // No keywords -> all are known
246 chAttr = SCE_H_ATTRIBUTE;
247 styler.ColourTo(end, chAttr);
248 }
249
250 static int classifyTagHTML(unsigned int start, unsigned int end,
251 WordList &keywords, Accessor &styler, bool &tagDontFold,
252 bool caseSensitive, bool isXml) {
253 char s[30 + 2];
254 // Copy after the '<'
255 unsigned int i = 0;
256 for (unsigned int cPos = start; cPos <= end && i < 30; cPos++) {
257 char ch = styler[cPos];
258 if ((ch != '<') && (ch != '/')) {
259 s[i++] = caseSensitive ? ch : static_cast<char>(MakeLowerCase(ch));
260 }
261 }
262
263 //The following is only a quick hack, to see if this whole thing would work
264 //we first need the tagname with a trailing space...
265 s[i] = ' ';
266 s[i+1] = '\0';
267
268 // if the current language is XML, I can fold any tag
269 // if the current language is HTML, I don't want to fold certain tags (input, meta, etc.)
270 //...to find it in the list of no-container-tags
271 tagDontFold = (!isXml) && (NULL != strstr("meta link img area br hr input ",s));
272
273 //now we can remove the trailing space
274 s[i] = '\0';
275
276 bool isScript = false;
277 char chAttr = SCE_H_TAGUNKNOWN;
278 if (s[0] == '!') {
279 chAttr = SCE_H_SGML_DEFAULT;
280 } else if (s[0] == '/') { // Closing tag
281 if (keywords.InList(s + 1))
282 chAttr = SCE_H_TAG;
283 } else {
284 if (keywords.InList(s)) {
285 chAttr = SCE_H_TAG;
286 isScript = 0 == strcmp(s, "script");
287 }
288 }
289 if ((chAttr == SCE_H_TAGUNKNOWN) && !keywords) {
290 // No keywords -> all are known
291 chAttr = SCE_H_TAG;
292 isScript = 0 == strcmp(s, "script");
293 }
294 styler.ColourTo(end, chAttr);
295 return isScript ? SCE_H_SCRIPT : chAttr;
296 }
297
298 static void classifyWordHTJS(unsigned int start, unsigned int end,
299 WordList &keywords, Accessor &styler, script_mode inScriptType) {
300 char chAttr = SCE_HJ_WORD;
301 bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
302 if (wordIsNumber)
303 chAttr = SCE_HJ_NUMBER;
304 else {
305 char s[30 + 1];
306 unsigned int i = 0;
307 for (; i < end - start + 1 && i < 30; i++) {
308 s[i] = styler[start + i];
309 }
310 s[i] = '\0';
311 if (keywords.InList(s))
312 chAttr = SCE_HJ_KEYWORD;
313 }
314 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
315 }
316
317 static int classifyWordHTVB(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, script_mode inScriptType) {
318 char chAttr = SCE_HB_IDENTIFIER;
319 bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
320 if (wordIsNumber)
321 chAttr = SCE_HB_NUMBER;
322 else {
323 char s[100];
324 GetTextSegment(styler, start, end, s, sizeof(s));
325 if (keywords.InList(s)) {
326 chAttr = SCE_HB_WORD;
327 if (strcmp(s, "rem") == 0)
328 chAttr = SCE_HB_COMMENTLINE;
329 }
330 }
331 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
332 if (chAttr == SCE_HB_COMMENTLINE)
333 return SCE_HB_COMMENTLINE;
334 else
335 return SCE_HB_DEFAULT;
336 }
337
338 static void classifyWordHTPy(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord, script_mode inScriptType) {
339 bool wordIsNumber = IsADigit(styler[start]);
340 char s[30 + 1];
341 unsigned int i = 0;
342 for (; i < end - start + 1 && i < 30; i++) {
343 s[i] = styler[start + i];
344 }
345 s[i] = '\0';
346 char chAttr = SCE_HP_IDENTIFIER;
347 if (0 == strcmp(prevWord, "class"))
348 chAttr = SCE_HP_CLASSNAME;
349 else if (0 == strcmp(prevWord, "def"))
350 chAttr = SCE_HP_DEFNAME;
351 else if (wordIsNumber)
352 chAttr = SCE_HP_NUMBER;
353 else if (keywords.InList(s))
354 chAttr = SCE_HP_WORD;
355 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
356 strcpy(prevWord, s);
357 }
358
359 // Update the word colour to default or keyword
360 // Called when in a PHP word
361 static void classifyWordHTPHP(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
362 char chAttr = SCE_HPHP_DEFAULT;
363 bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.' && start+1 <= end && IsADigit(styler[start+1]));
364 if (wordIsNumber)
365 chAttr = SCE_HPHP_NUMBER;
366 else {
367 char s[100];
368 GetTextSegment(styler, start, end, s, sizeof(s));
369 if (keywords.InList(s))
370 chAttr = SCE_HPHP_WORD;
371 }
372 styler.ColourTo(end, chAttr);
373 }
374
375 static bool isWordHSGML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
376 char s[30 + 1];
377 unsigned int i = 0;
378 for (; i < end - start + 1 && i < 30; i++) {
379 s[i] = styler[start + i];
380 }
381 s[i] = '\0';
382 return keywords.InList(s);
383 }
384
385 static bool isWordCdata(unsigned int start, unsigned int end, Accessor &styler) {
386 char s[30 + 1];
387 unsigned int i = 0;
388 for (; i < end - start + 1 && i < 30; i++) {
389 s[i] = styler[start + i];
390 }
391 s[i] = '\0';
392 return (0 == strcmp(s, "[CDATA["));
393 }
394
395 // Return the first state to reach when entering a scripting language
396 static int StateForScript(script_type scriptLanguage) {
397 int Result;
398 switch (scriptLanguage) {
399 case eScriptVBS:
400 Result = SCE_HB_START;
401 break;
402 case eScriptPython:
403 Result = SCE_HP_START;
404 break;
405 case eScriptPHP:
406 Result = SCE_HPHP_DEFAULT;
407 break;
408 case eScriptXML:
409 Result = SCE_H_TAGUNKNOWN;
410 break;
411 case eScriptSGML:
412 Result = SCE_H_SGML_DEFAULT;
413 break;
414 default :
415 Result = SCE_HJ_START;
416 break;
417 }
418 return Result;
419 }
420
421 static inline bool ishtmlwordchar(int ch) {
422 return !isascii(ch) ||
423 (isalnum(ch) || ch == '.' || ch == '-' || ch == '_' || ch == ':' || ch == '!' || ch == '#');
424 }
425
426 static inline bool issgmlwordchar(int ch) {
427 return !isascii(ch) ||
428 (isalnum(ch) || ch == '.' || ch == '_' || ch == ':' || ch == '!' || ch == '#' || ch == '[');
429 }
430
431 static inline bool IsPhpWordStart(int ch) {
432 return (isascii(ch) && (isalpha(ch) || (ch == '_'))) || (ch >= 0x7f);
433 }
434
435 static inline bool IsPhpWordChar(int ch) {
436 return IsADigit(ch) || IsPhpWordStart(ch);
437 }
438
439 static bool InTagState(int state) {
440 return state == SCE_H_TAG || state == SCE_H_TAGUNKNOWN ||
441 state == SCE_H_SCRIPT ||
442 state == SCE_H_ATTRIBUTE || state == SCE_H_ATTRIBUTEUNKNOWN ||
443 state == SCE_H_NUMBER || state == SCE_H_OTHER ||
444 state == SCE_H_DOUBLESTRING || state == SCE_H_SINGLESTRING;
445 }
446
447 static bool IsCommentState(const int state) {
448 return state == SCE_H_COMMENT || state == SCE_H_SGML_COMMENT;
449 }
450
451 static bool IsScriptCommentState(const int state) {
452 return state == SCE_HJ_COMMENT || state == SCE_HJ_COMMENTLINE || state == SCE_HJA_COMMENT ||
453 state == SCE_HJA_COMMENTLINE || state == SCE_HB_COMMENTLINE || state == SCE_HBA_COMMENTLINE;
454 }
455
456 static bool isLineEnd(int ch) {
457 return ch == '\r' || ch == '\n';
458 }
459
460 static bool isOKBeforeRE(int ch) {
461 return (ch == '(') || (ch == '=') || (ch == ',');
462 }
463
464 static bool isPHPStringState(int state) {
465 return
466 (state == SCE_HPHP_HSTRING) ||
467 (state == SCE_HPHP_SIMPLESTRING) ||
468 (state == SCE_HPHP_HSTRING_VARIABLE) ||
469 (state == SCE_HPHP_COMPLEX_VARIABLE);
470 }
471
472 static int FindPhpStringDelimiter(char *phpStringDelimiter, const int phpStringDelimiterSize, int i, const int lengthDoc, Accessor &styler) {
473 int j;
474 while (i < lengthDoc && (styler[i] == ' ' || styler[i] == '\t'))
475 i++;
476 phpStringDelimiter[0] = '\n';
477 for (j = i; j < lengthDoc && styler[j] != '\n' && styler[j] != '\r'; j++) {
478 if (j - i < phpStringDelimiterSize - 2)
479 phpStringDelimiter[j-i+1] = styler[j];
480 else
481 i++;
482 }
483 phpStringDelimiter[j-i+1] = '\0';
484 return j;
485 }
486
487 static void ColouriseHyperTextDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
488 Accessor &styler, bool isXml) {
489 WordList &keywords = *keywordlists[0];
490 WordList &keywords2 = *keywordlists[1];
491 WordList &keywords3 = *keywordlists[2];
492 WordList &keywords4 = *keywordlists[3];
493 WordList &keywords5 = *keywordlists[4];
494 WordList &keywords6 = *keywordlists[5]; // SGML (DTD) keywords
495
496 // Lexer for HTML requires more lexical states (8 bits worth) than most lexers
497 styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
498 char prevWord[200];
499 prevWord[0] = '\0';
500 char phpStringDelimiter[200]; // PHP is not limited in length, we are
501 phpStringDelimiter[0] = '\0';
502 int StateToPrint = initStyle;
503 int state = stateForPrintState(StateToPrint);
504
505 // If inside a tag, it may be a script tag, so reread from the start to ensure any language tags are seen
506 if (InTagState(state)) {
507 while ((startPos > 0) && (InTagState(styler.StyleAt(startPos - 1)))) {
508 startPos--;
509 length++;
510 }
511 state = SCE_H_DEFAULT;
512 }
513 // String can be heredoc, must find a delimiter first
514 while (startPos > 0 && isPHPStringState(state) && state != SCE_HPHP_SIMPLESTRING) {
515 startPos--;
516 length++;
517 state = styler.StyleAt(startPos);
518 }
519 styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
520
521 int lineCurrent = styler.GetLine(startPos);
522 int lineState;
523 if (lineCurrent > 0) {
524 lineState = styler.GetLineState(lineCurrent);
525 } else {
526 // Default client and ASP scripting language is JavaScript
527 lineState = eScriptJS << 8;
528 lineState |= styler.GetPropertyInt("asp.default.language", eScriptJS) << 4;
529 }
530 script_mode inScriptType = script_mode((lineState >> 0) & 0x03); // 2 bits of scripting mode
531 bool tagOpened = (lineState >> 2) & 0x01; // 1 bit to know if we are in an opened tag
532 bool tagClosing = (lineState >> 3) & 0x01; // 1 bit to know if we are in a closing tag
533 bool tagDontFold = false; //some HTML tags should not be folded
534 script_type aspScript = script_type((lineState >> 4) & 0x0F); // 4 bits of script name
535 script_type clientScript = script_type((lineState >> 8) & 0x0F); // 4 bits of script name
536 int beforePreProc = (lineState >> 12) & 0xFF; // 8 bits of state
537
538 script_type scriptLanguage = ScriptOfState(state);
539
540 const bool foldHTML = styler.GetPropertyInt("fold.html", 0) != 0;
541 const bool fold = foldHTML && styler.GetPropertyInt("fold", 0);
542 const bool foldHTMLPreprocessor = foldHTML && styler.GetPropertyInt("fold.html.preprocessor", 1);
543 const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
544 const bool caseSensitive = styler.GetPropertyInt("html.tags.case.sensitive", 0) != 0;
545
546 const CharacterSet setHTMLWord(CharacterSet::setAlphaNum, ".-_:!#", 0x80, true);
547 const CharacterSet setTagContinue(CharacterSet::setAlphaNum, ".-_:!#[", 0x80, true);
548 const CharacterSet setAttributeContinue(CharacterSet::setAlphaNum, ".-_:!#/", 0x80, true);
549
550 int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
551 int levelCurrent = levelPrev;
552 int visibleChars = 0;
553
554 int chPrev = ' ';
555 int ch = ' ';
556 int chPrevNonWhite = ' ';
557 // look back to set chPrevNonWhite properly for better regex colouring
558 if (scriptLanguage == eScriptJS && startPos > 0) {
559 int back = startPos;
560 int style = 0;
561 while (--back) {
562 style = styler.StyleAt(back);
563 if (style < SCE_HJ_DEFAULT || style > SCE_HJ_COMMENTDOC)
564 // includes SCE_HJ_COMMENT & SCE_HJ_COMMENTLINE
565 break;
566 }
567 if (style == SCE_HJ_SYMBOLS) {
568 chPrevNonWhite = static_cast<unsigned char>(styler.SafeGetCharAt(back));
569 }
570 }
571
572 styler.StartSegment(startPos);
573 const int lengthDoc = startPos + length;
574 for (int i = startPos; i < lengthDoc; i++) {
575 const int chPrev2 = chPrev;
576 chPrev = ch;
577 if (!IsASpace(ch) && state != SCE_HJ_COMMENT &&
578 state != SCE_HJ_COMMENTLINE && state != SCE_HJ_COMMENTDOC)
579 chPrevNonWhite = ch;
580 ch = static_cast<unsigned char>(styler[i]);
581 int chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
582 const int chNext2 = static_cast<unsigned char>(styler.SafeGetCharAt(i + 2));
583
584 // Handle DBCS codepages
585 if (styler.IsLeadByte(static_cast<char>(ch))) {
586 chPrev = ' ';
587 i += 1;
588 continue;
589 }
590
591 if ((!IsASpace(ch) || !foldCompact) && fold)
592 visibleChars++;
593
594 // decide what is the current state to print (depending of the script tag)
595 StateToPrint = statePrintForState(state, inScriptType);
596
597 // handle script folding
598 if (fold) {
599 switch (scriptLanguage) {
600 case eScriptJS:
601 case eScriptPHP:
602 //not currently supported case eScriptVBS:
603
604 if ((state != SCE_HPHP_COMMENT) && (state != SCE_HPHP_COMMENTLINE) && (state != SCE_HJ_COMMENT) && (state != SCE_HJ_COMMENTLINE) && (state != SCE_HJ_COMMENTDOC) && (!isStringState(state))) {
605 //Platform::DebugPrintf("state=%d, StateToPrint=%d, initStyle=%d\n", state, StateToPrint, initStyle);
606 //if ((state == SCE_HPHP_OPERATOR) || (state == SCE_HPHP_DEFAULT) || (state == SCE_HJ_SYMBOLS) || (state == SCE_HJ_START) || (state == SCE_HJ_DEFAULT)) {
607 if ((ch == '{') || (ch == '}')) {
608 levelCurrent += (ch == '{') ? 1 : -1;
609 }
610 }
611 break;
612 case eScriptPython:
613 if (state != SCE_HP_COMMENTLINE) {
614 if ((ch == ':') && ((chNext == '\n') || (chNext == '\r' && chNext2 == '\n'))) {
615 levelCurrent++;
616 } else if ((ch == '\n') && !((chNext == '\r') && (chNext2 == '\n')) && (chNext != '\n')) {
617 // check if the number of tabs is lower than the level
618 int Findlevel = (levelCurrent & ~SC_FOLDLEVELBASE) * 8;
619 for (int j = 0; Findlevel > 0; j++) {
620 char chTmp = styler.SafeGetCharAt(i + j + 1);
621 if (chTmp == '\t') {
622 Findlevel -= 8;
623 } else if (chTmp == ' ') {
624 Findlevel--;
625 } else {
626 break;
627 }
628 }
629
630 if (Findlevel > 0) {
631 levelCurrent -= Findlevel / 8;
632 if (Findlevel % 8)
633 levelCurrent--;
634 }
635 }
636 }
637 break;
638 default:
639 break;
640 }
641 }
642
643 if ((ch == '\r' && chNext != '\n') || (ch == '\n')) {
644 // Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix)
645 // Avoid triggering two times on Dos/Win
646 // New line -> record any line state onto /next/ line
647 if (fold) {
648 int lev = levelPrev;
649 if (visibleChars == 0)
650 lev |= SC_FOLDLEVELWHITEFLAG;
651 if ((levelCurrent > levelPrev) && (visibleChars > 0))
652 lev |= SC_FOLDLEVELHEADERFLAG;
653
654 styler.SetLevel(lineCurrent, lev);
655 visibleChars = 0;
656 levelPrev = levelCurrent;
657 }
658 lineCurrent++;
659 styler.SetLineState(lineCurrent,
660 ((inScriptType & 0x03) << 0) |
661 ((tagOpened & 0x01) << 2) |
662 ((tagClosing & 0x01) << 3) |
663 ((aspScript & 0x0F) << 4) |
664 ((clientScript & 0x0F) << 8) |
665 ((beforePreProc & 0xFF) << 12));
666 }
667
668 // generic end of script processing
669 else if ((inScriptType == eNonHtmlScript) && (ch == '<') && (chNext == '/')) {
670 // Check if it's the end of the script tag (or any other HTML tag)
671 switch (state) {
672 // in these cases, you can embed HTML tags (to confirm !!!!!!!!!!!!!!!!!!!!!!)
673 case SCE_H_DOUBLESTRING:
674 case SCE_H_SINGLESTRING:
675 case SCE_HJ_COMMENT:
676 case SCE_HJ_COMMENTDOC:
677 //case SCE_HJ_COMMENTLINE: // removed as this is a common thing done to hide
678 // the end of script marker from some JS interpreters.
679 case SCE_HB_COMMENTLINE:
680 case SCE_HBA_COMMENTLINE:
681 case SCE_HJ_DOUBLESTRING:
682 case SCE_HJ_SINGLESTRING:
683 case SCE_HJ_REGEX:
684 case SCE_HB_STRING:
685 case SCE_HBA_STRING:
686 case SCE_HP_STRING:
687 case SCE_HP_TRIPLE:
688 case SCE_HP_TRIPLEDOUBLE:
689 break;
690 default :
691 // check if the closing tag is a script tag
692 if (state == SCE_HJ_COMMENTLINE || isXml) {
693 char tag[7]; // room for the <script> tag
694 int j = 0;
695 char chr = styler.SafeGetCharAt(i+2);
696 while (j < 6 && !IsASpace(chr)) {
697 tag[j++] = static_cast<char>(MakeLowerCase(chr));
698 chr = styler.SafeGetCharAt(i+2+j);
699 }
700 tag[j] = '\0';
701 if (strcmp(tag, "script") != 0) break;
702 }
703 // closing tag of the script (it's a closing HTML tag anyway)
704 styler.ColourTo(i - 1, StateToPrint);
705 state = SCE_H_TAGUNKNOWN;
706 inScriptType = eHtml;
707 scriptLanguage = eScriptNone;
708 clientScript = eScriptJS;
709 i += 2;
710 visibleChars += 2;
711 tagClosing = true;
712 continue;
713 }
714 }
715
716 /////////////////////////////////////
717 // handle the start of PHP pre-processor = Non-HTML
718 else if ((state != SCE_H_ASPAT) &&
719 !isPHPStringState(state) &&
720 (state != SCE_HPHP_COMMENT) &&
721 (ch == '<') &&
722 (chNext == '?') &&
723 !IsScriptCommentState(state) ) {
724 scriptLanguage = segIsScriptingIndicator(styler, i + 2, i + 10, eScriptPHP);
725 if (scriptLanguage != eScriptPHP && isStringState(state)) continue;
726 styler.ColourTo(i - 1, StateToPrint);
727 beforePreProc = state;
728 i++;
729 visibleChars++;
730 i += PrintScriptingIndicatorOffset(styler, styler.GetStartSegment() + 2, i + 10);
731 if (scriptLanguage == eScriptXML)
732 styler.ColourTo(i, SCE_H_XMLSTART);
733 else
734 styler.ColourTo(i, SCE_H_QUESTION);
735 state = StateForScript(scriptLanguage);
736 if (inScriptType == eNonHtmlScript)
737 inScriptType = eNonHtmlScriptPreProc;
738 else
739 inScriptType = eNonHtmlPreProc;
740 // Fold whole script, but not if the XML first tag (all XML-like tags in this case)
741 if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
742 levelCurrent++;
743 }
744 // should be better
745 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
746 continue;
747 }
748
749 // handle the start of ASP pre-processor = Non-HTML
750 else if (!isCommentASPState(state) && (ch == '<') && (chNext == '%') && !isPHPStringState(state)) {
751 styler.ColourTo(i - 1, StateToPrint);
752 beforePreProc = state;
753 if (inScriptType == eNonHtmlScript)
754 inScriptType = eNonHtmlScriptPreProc;
755 else
756 inScriptType = eNonHtmlPreProc;
757
758 if (chNext2 == '@') {
759 i += 2; // place as if it was the second next char treated
760 visibleChars += 2;
761 state = SCE_H_ASPAT;
762 } else if ((chNext2 == '-') && (styler.SafeGetCharAt(i + 3) == '-')) {
763 styler.ColourTo(i + 3, SCE_H_ASP);
764 state = SCE_H_XCCOMMENT;
765 scriptLanguage = eScriptVBS;
766 continue;
767 } else {
768 if (chNext2 == '=') {
769 i += 2; // place as if it was the second next char treated
770 visibleChars += 2;
771 } else {
772 i++; // place as if it was the next char treated
773 visibleChars++;
774 }
775
776 state = StateForScript(aspScript);
777 }
778 scriptLanguage = eScriptVBS;
779 styler.ColourTo(i, SCE_H_ASP);
780 // fold whole script
781 if (foldHTMLPreprocessor)
782 levelCurrent++;
783 // should be better
784 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
785 continue;
786 }
787
788 /////////////////////////////////////
789 // handle the start of SGML language (DTD)
790 else if (((scriptLanguage == eScriptNone) || (scriptLanguage == eScriptXML)) &&
791 (chPrev == '<') &&
792 (ch == '!') &&
793 (StateToPrint != SCE_H_CDATA) &&
794 (!IsCommentState(StateToPrint)) &&
795 (!IsScriptCommentState(StateToPrint)) ) {
796 beforePreProc = state;
797 styler.ColourTo(i - 2, StateToPrint);
798 if ((chNext == '-') && (chNext2 == '-')) {
799 state = SCE_H_COMMENT; // wait for a pending command
800 styler.ColourTo(i + 2, SCE_H_COMMENT);
801 i += 2; // follow styling after the --
802 } else if (isWordCdata(i + 1, i + 7, styler)) {
803 state = SCE_H_CDATA;
804 } else {
805 styler.ColourTo(i, SCE_H_SGML_DEFAULT); // <! is default
806 scriptLanguage = eScriptSGML;
807 state = SCE_H_SGML_COMMAND; // wait for a pending command
808 }
809 // fold whole tag (-- when closing the tag)
810 if (foldHTMLPreprocessor)
811 levelCurrent++;
812 continue;
813 }
814
815 // handle the end of a pre-processor = Non-HTML
816 else if ((
817 ((inScriptType == eNonHtmlPreProc)
818 || (inScriptType == eNonHtmlScriptPreProc)) && (
819 ((scriptLanguage != eScriptNone) && stateAllowsTermination(state) && ((ch == '%') || (ch == '?')))
820 ) && (chNext == '>')) ||
821 ((scriptLanguage == eScriptSGML) && (ch == '>') && (state != SCE_H_SGML_COMMENT))) {
822 if (state == SCE_H_ASPAT) {
823 aspScript = segIsScriptingIndicator(styler,
824 styler.GetStartSegment(), i - 1, aspScript);
825 }
826 // Bounce out of any ASP mode
827 switch (state) {
828 case SCE_HJ_WORD:
829 classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
830 break;
831 case SCE_HB_WORD:
832 classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
833 break;
834 case SCE_HP_WORD:
835 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
836 break;
837 case SCE_HPHP_WORD:
838 classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
839 break;
840 case SCE_H_XCCOMMENT:
841 styler.ColourTo(i - 1, state);
842 break;
843 default :
844 styler.ColourTo(i - 1, StateToPrint);
845 break;
846 }
847 if (scriptLanguage != eScriptSGML) {
848 i++;
849 visibleChars++;
850 }
851 if (ch == '%')
852 styler.ColourTo(i, SCE_H_ASP);
853 else if (scriptLanguage == eScriptXML)
854 styler.ColourTo(i, SCE_H_XMLEND);
855 else if (scriptLanguage == eScriptSGML)
856 styler.ColourTo(i, SCE_H_SGML_DEFAULT);
857 else
858 styler.ColourTo(i, SCE_H_QUESTION);
859 state = beforePreProc;
860 if (inScriptType == eNonHtmlScriptPreProc)
861 inScriptType = eNonHtmlScript;
862 else
863 inScriptType = eHtml;
864 // Unfold all scripting languages, except for XML tag
865 if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
866 levelCurrent--;
867 }
868 scriptLanguage = eScriptNone;
869 continue;
870 }
871 /////////////////////////////////////
872
873 switch (state) {
874 case SCE_H_DEFAULT:
875 if (ch == '<') {
876 // in HTML, fold on tag open and unfold on tag close
877 tagOpened = true;
878 tagClosing = (chNext == '/');
879 styler.ColourTo(i - 1, StateToPrint);
880 if (chNext != '!')
881 state = SCE_H_TAGUNKNOWN;
882 } else if (ch == '&') {
883 styler.ColourTo(i - 1, SCE_H_DEFAULT);
884 state = SCE_H_ENTITY;
885 }
886 break;
887 case SCE_H_SGML_DEFAULT:
888 case SCE_H_SGML_BLOCK_DEFAULT:
889 // if (scriptLanguage == eScriptSGMLblock)
890 // StateToPrint = SCE_H_SGML_BLOCK_DEFAULT;
891
892 if (ch == '\"') {
893 styler.ColourTo(i - 1, StateToPrint);
894 state = SCE_H_SGML_DOUBLESTRING;
895 } else if (ch == '\'') {
896 styler.ColourTo(i - 1, StateToPrint);
897 state = SCE_H_SGML_SIMPLESTRING;
898 } else if ((ch == '-') && (chPrev == '-')) {
899 if (static_cast<int>(styler.GetStartSegment()) <= (i - 2)) {
900 styler.ColourTo(i - 2, StateToPrint);
901 }
902 state = SCE_H_SGML_COMMENT;
903 } else if (isascii(ch) && isalpha(ch) && (chPrev == '%')) {
904 styler.ColourTo(i - 2, StateToPrint);
905 state = SCE_H_SGML_ENTITY;
906 } else if (ch == '#') {
907 styler.ColourTo(i - 1, StateToPrint);
908 state = SCE_H_SGML_SPECIAL;
909 } else if (ch == '[') {
910 styler.ColourTo(i - 1, StateToPrint);
911 scriptLanguage = eScriptSGMLblock;
912 state = SCE_H_SGML_BLOCK_DEFAULT;
913 } else if (ch == ']') {
914 if (scriptLanguage == eScriptSGMLblock) {
915 styler.ColourTo(i, StateToPrint);
916 scriptLanguage = eScriptSGML;
917 } else {
918 styler.ColourTo(i - 1, StateToPrint);
919 styler.ColourTo(i, SCE_H_SGML_ERROR);
920 }
921 state = SCE_H_SGML_DEFAULT;
922 } else if (scriptLanguage == eScriptSGMLblock) {
923 if ((ch == '!') && (chPrev == '<')) {
924 styler.ColourTo(i - 2, StateToPrint);
925 styler.ColourTo(i, SCE_H_SGML_DEFAULT);
926 state = SCE_H_SGML_COMMAND;
927 } else if (ch == '>') {
928 styler.ColourTo(i - 1, StateToPrint);
929 styler.ColourTo(i, SCE_H_SGML_DEFAULT);
930 }
931 }
932 break;
933 case SCE_H_SGML_COMMAND:
934 if ((ch == '-') && (chPrev == '-')) {
935 styler.ColourTo(i - 2, StateToPrint);
936 state = SCE_H_SGML_COMMENT;
937 } else if (!issgmlwordchar(ch)) {
938 if (isWordHSGML(styler.GetStartSegment(), i - 1, keywords6, styler)) {
939 styler.ColourTo(i - 1, StateToPrint);
940 state = SCE_H_SGML_1ST_PARAM;
941 } else {
942 state = SCE_H_SGML_ERROR;
943 }
944 }
945 break;
946 case SCE_H_SGML_1ST_PARAM:
947 // wait for the beginning of the word
948 if ((ch == '-') && (chPrev == '-')) {
949 if (scriptLanguage == eScriptSGMLblock) {
950 styler.ColourTo(i - 2, SCE_H_SGML_BLOCK_DEFAULT);
951 } else {
952 styler.ColourTo(i - 2, SCE_H_SGML_DEFAULT);
953 }
954 state = SCE_H_SGML_1ST_PARAM_COMMENT;
955 } else if (issgmlwordchar(ch)) {
956 if (scriptLanguage == eScriptSGMLblock) {
957 styler.ColourTo(i - 1, SCE_H_SGML_BLOCK_DEFAULT);
958 } else {
959 styler.ColourTo(i - 1, SCE_H_SGML_DEFAULT);
960 }
961 // find the length of the word
962 int size = 1;
963 while (setHTMLWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(i + size))))
964 size++;
965 styler.ColourTo(i + size - 1, StateToPrint);
966 i += size - 1;
967 visibleChars += size - 1;
968 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
969 if (scriptLanguage == eScriptSGMLblock) {
970 state = SCE_H_SGML_BLOCK_DEFAULT;
971 } else {
972 state = SCE_H_SGML_DEFAULT;
973 }
974 continue;
975 }
976 break;
977 case SCE_H_SGML_ERROR:
978 if ((ch == '-') && (chPrev == '-')) {
979 styler.ColourTo(i - 2, StateToPrint);
980 state = SCE_H_SGML_COMMENT;
981 }
982 case SCE_H_SGML_DOUBLESTRING:
983 if (ch == '\"') {
984 styler.ColourTo(i, StateToPrint);
985 state = SCE_H_SGML_DEFAULT;
986 }
987 break;
988 case SCE_H_SGML_SIMPLESTRING:
989 if (ch == '\'') {
990 styler.ColourTo(i, StateToPrint);
991 state = SCE_H_SGML_DEFAULT;
992 }
993 break;
994 case SCE_H_SGML_COMMENT:
995 if ((ch == '-') && (chPrev == '-')) {
996 styler.ColourTo(i, StateToPrint);
997 state = SCE_H_SGML_DEFAULT;
998 }
999 break;
1000 case SCE_H_CDATA:
1001 if ((chPrev2 == ']') && (chPrev == ']') && (ch == '>')) {
1002 styler.ColourTo(i, StateToPrint);
1003 state = SCE_H_DEFAULT;
1004 levelCurrent--;
1005 }
1006 break;
1007 case SCE_H_COMMENT:
1008 if ((chPrev2 == '-') && (chPrev == '-') && (ch == '>')) {
1009 styler.ColourTo(i, StateToPrint);
1010 state = SCE_H_DEFAULT;
1011 levelCurrent--;
1012 }
1013 break;
1014 case SCE_H_SGML_1ST_PARAM_COMMENT:
1015 if ((ch == '-') && (chPrev == '-')) {
1016 styler.ColourTo(i, SCE_H_SGML_COMMENT);
1017 state = SCE_H_SGML_1ST_PARAM;
1018 }
1019 break;
1020 case SCE_H_SGML_SPECIAL:
1021 if (!(isascii(ch) && isupper(ch))) {
1022 styler.ColourTo(i - 1, StateToPrint);
1023 if (isalnum(ch)) {
1024 state = SCE_H_SGML_ERROR;
1025 } else {
1026 state = SCE_H_SGML_DEFAULT;
1027 }
1028 }
1029 break;
1030 case SCE_H_SGML_ENTITY:
1031 if (ch == ';') {
1032 styler.ColourTo(i, StateToPrint);
1033 state = SCE_H_SGML_DEFAULT;
1034 } else if (!(isascii(ch) && isalnum(ch)) && ch != '-' && ch != '.') {
1035 styler.ColourTo(i, SCE_H_SGML_ERROR);
1036 state = SCE_H_SGML_DEFAULT;
1037 }
1038 break;
1039 case SCE_H_ENTITY:
1040 if (ch == ';') {
1041 styler.ColourTo(i, StateToPrint);
1042 state = SCE_H_DEFAULT;
1043 }
1044 if (ch != '#' && !(isascii(ch) && isalnum(ch)) // Should check that '#' follows '&', but it is unlikely anyway...
1045 && ch != '.' && ch != '-' && ch != '_' && ch != ':') { // valid in XML
1046 styler.ColourTo(i, SCE_H_TAGUNKNOWN);
1047 state = SCE_H_DEFAULT;
1048 }
1049 break;
1050 case SCE_H_TAGUNKNOWN:
1051 if (!setTagContinue.Contains(ch) && !((ch == '/') && (chPrev == '<'))) {
1052 int eClass = classifyTagHTML(styler.GetStartSegment(),
1053 i - 1, keywords, styler, tagDontFold, caseSensitive, isXml);
1054 if (eClass == SCE_H_SCRIPT) {
1055 if (!tagClosing) {
1056 inScriptType = eNonHtmlScript;
1057 scriptLanguage = clientScript;
1058 eClass = SCE_H_TAG;
1059 } else {
1060 scriptLanguage = eScriptNone;
1061 eClass = SCE_H_TAG;
1062 }
1063 }
1064 if (ch == '>') {
1065 styler.ColourTo(i, eClass);
1066 if (inScriptType == eNonHtmlScript) {
1067 state = StateForScript(scriptLanguage);
1068 } else {
1069 state = SCE_H_DEFAULT;
1070 }
1071 tagOpened = false;
1072 if (!tagDontFold){
1073 if (tagClosing) {
1074 levelCurrent--;
1075 } else {
1076 levelCurrent++;
1077 }
1078 }
1079 tagClosing = false;
1080 } else if (ch == '/' && chNext == '>') {
1081 if (eClass == SCE_H_TAGUNKNOWN) {
1082 styler.ColourTo(i + 1, SCE_H_TAGUNKNOWN);
1083 } else {
1084 styler.ColourTo(i - 1, StateToPrint);
1085 styler.ColourTo(i + 1, SCE_H_TAGEND);
1086 }
1087 i++;
1088 ch = chNext;
1089 state = SCE_H_DEFAULT;
1090 tagOpened = false;
1091 } else {
1092 if (eClass != SCE_H_TAGUNKNOWN) {
1093 if (eClass == SCE_H_SGML_DEFAULT) {
1094 state = SCE_H_SGML_DEFAULT;
1095 } else {
1096 state = SCE_H_OTHER;
1097 }
1098 }
1099 }
1100 }
1101 break;
1102 case SCE_H_ATTRIBUTE:
1103 if (!setAttributeContinue.Contains(ch)) {
1104 if (inScriptType == eNonHtmlScript) {
1105 int scriptLanguagePrev = scriptLanguage;
1106 clientScript = segIsScriptingIndicator(styler, styler.GetStartSegment(), i - 1, scriptLanguage);
1107 scriptLanguage = clientScript;
1108 if ((scriptLanguagePrev != scriptLanguage) && (scriptLanguage == eScriptNone))
1109 inScriptType = eHtml;
1110 }
1111 classifyAttribHTML(styler.GetStartSegment(), i - 1, keywords, styler);
1112 if (ch == '>') {
1113 styler.ColourTo(i, SCE_H_TAG);
1114 if (inScriptType == eNonHtmlScript) {
1115 state = StateForScript(scriptLanguage);
1116 } else {
1117 state = SCE_H_DEFAULT;
1118 }
1119 tagOpened = false;
1120 if (!tagDontFold){
1121 if (tagClosing){
1122 levelCurrent--;
1123 } else {
1124 levelCurrent++;
1125 }
1126 }
1127 tagClosing = false;
1128 } else if (ch == '=') {
1129 styler.ColourTo(i, SCE_H_OTHER);
1130 state = SCE_H_VALUE;
1131 } else {
1132 state = SCE_H_OTHER;
1133 }
1134 }
1135 break;
1136 case SCE_H_OTHER:
1137 if (ch == '>') {
1138 styler.ColourTo(i - 1, StateToPrint);
1139 styler.ColourTo(i, SCE_H_TAG);
1140 if (inScriptType == eNonHtmlScript) {
1141 state = StateForScript(scriptLanguage);
1142 } else {
1143 state = SCE_H_DEFAULT;
1144 }
1145 tagOpened = false;
1146 if (!tagDontFold){
1147 if (tagClosing){
1148 levelCurrent--;
1149 } else {
1150 levelCurrent++;
1151 }
1152 }
1153 tagClosing = false;
1154 } else if (ch == '\"') {
1155 styler.ColourTo(i - 1, StateToPrint);
1156 state = SCE_H_DOUBLESTRING;
1157 } else if (ch == '\'') {
1158 styler.ColourTo(i - 1, StateToPrint);
1159 state = SCE_H_SINGLESTRING;
1160 } else if (ch == '=') {
1161 styler.ColourTo(i, StateToPrint);
1162 state = SCE_H_VALUE;
1163 } else if (ch == '/' && chNext == '>') {
1164 styler.ColourTo(i - 1, StateToPrint);
1165 styler.ColourTo(i + 1, SCE_H_TAGEND);
1166 i++;
1167 ch = chNext;
1168 state = SCE_H_DEFAULT;
1169 tagOpened = false;
1170 } else if (ch == '?' && chNext == '>') {
1171 styler.ColourTo(i - 1, StateToPrint);
1172 styler.ColourTo(i + 1, SCE_H_XMLEND);
1173 i++;
1174 ch = chNext;
1175 state = SCE_H_DEFAULT;
1176 } else if (setHTMLWord.Contains(ch)) {
1177 styler.ColourTo(i - 1, StateToPrint);
1178 state = SCE_H_ATTRIBUTE;
1179 }
1180 break;
1181 case SCE_H_DOUBLESTRING:
1182 if (ch == '\"') {
1183 if (inScriptType == eNonHtmlScript) {
1184 scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1185 }
1186 styler.ColourTo(i, SCE_H_DOUBLESTRING);
1187 state = SCE_H_OTHER;
1188 }
1189 break;
1190 case SCE_H_SINGLESTRING:
1191 if (ch == '\'') {
1192 if (inScriptType == eNonHtmlScript) {
1193 scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1194 }
1195 styler.ColourTo(i, SCE_H_SINGLESTRING);
1196 state = SCE_H_OTHER;
1197 }
1198 break;
1199 case SCE_H_VALUE:
1200 if (!setHTMLWord.Contains(ch)) {
1201 if (ch == '\"' && chPrev == '=') {
1202 // Should really test for being first character
1203 state = SCE_H_DOUBLESTRING;
1204 } else if (ch == '\'' && chPrev == '=') {
1205 state = SCE_H_SINGLESTRING;
1206 } else {
1207 if (IsNumber(styler.GetStartSegment(), styler)) {
1208 styler.ColourTo(i - 1, SCE_H_NUMBER);
1209 } else {
1210 styler.ColourTo(i - 1, StateToPrint);
1211 }
1212 if (ch == '>') {
1213 styler.ColourTo(i, SCE_H_TAG);
1214 if (inScriptType == eNonHtmlScript) {
1215 state = StateForScript(scriptLanguage);
1216 } else {
1217 state = SCE_H_DEFAULT;
1218 }
1219 tagOpened = false;
1220 if (!tagDontFold){
1221 if (tagClosing){
1222 levelCurrent--;
1223 } else {
1224 levelCurrent++;
1225 }
1226 }
1227 tagClosing = false;
1228 } else {
1229 state = SCE_H_OTHER;
1230 }
1231 }
1232 }
1233 break;
1234 case SCE_HJ_DEFAULT:
1235 case SCE_HJ_START:
1236 case SCE_HJ_SYMBOLS:
1237 if (IsAWordStart(ch)) {
1238 styler.ColourTo(i - 1, StateToPrint);
1239 state = SCE_HJ_WORD;
1240 } else if (ch == '/' && chNext == '*') {
1241 styler.ColourTo(i - 1, StateToPrint);
1242 if (chNext2 == '*')
1243 state = SCE_HJ_COMMENTDOC;
1244 else
1245 state = SCE_HJ_COMMENT;
1246 } else if (ch == '/' && chNext == '/') {
1247 styler.ColourTo(i - 1, StateToPrint);
1248 state = SCE_HJ_COMMENTLINE;
1249 } else if (ch == '/' && isOKBeforeRE(chPrevNonWhite)) {
1250 styler.ColourTo(i - 1, StateToPrint);
1251 state = SCE_HJ_REGEX;
1252 } else if (ch == '\"') {
1253 styler.ColourTo(i - 1, StateToPrint);
1254 state = SCE_HJ_DOUBLESTRING;
1255 } else if (ch == '\'') {
1256 styler.ColourTo(i - 1, StateToPrint);
1257 state = SCE_HJ_SINGLESTRING;
1258 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1259 styler.SafeGetCharAt(i + 3) == '-') {
1260 styler.ColourTo(i - 1, StateToPrint);
1261 state = SCE_HJ_COMMENTLINE;
1262 } else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1263 styler.ColourTo(i - 1, StateToPrint);
1264 state = SCE_HJ_COMMENTLINE;
1265 i += 2;
1266 } else if (IsOperator(ch)) {
1267 styler.ColourTo(i - 1, StateToPrint);
1268 styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1269 state = SCE_HJ_DEFAULT;
1270 } else if ((ch == ' ') || (ch == '\t')) {
1271 if (state == SCE_HJ_START) {
1272 styler.ColourTo(i - 1, StateToPrint);
1273 state = SCE_HJ_DEFAULT;
1274 }
1275 }
1276 break;
1277 case SCE_HJ_WORD:
1278 if (!IsAWordChar(ch)) {
1279 classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
1280 //styler.ColourTo(i - 1, eHTJSKeyword);
1281 state = SCE_HJ_DEFAULT;
1282 if (ch == '/' && chNext == '*') {
1283 if (chNext2 == '*')
1284 state = SCE_HJ_COMMENTDOC;
1285 else
1286 state = SCE_HJ_COMMENT;
1287 } else if (ch == '/' && chNext == '/') {
1288 state = SCE_HJ_COMMENTLINE;
1289 } else if (ch == '\"') {
1290 state = SCE_HJ_DOUBLESTRING;
1291 } else if (ch == '\'') {
1292 state = SCE_HJ_SINGLESTRING;
1293 } else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1294 styler.ColourTo(i - 1, StateToPrint);
1295 state = SCE_HJ_COMMENTLINE;
1296 i += 2;
1297 } else if (IsOperator(ch)) {
1298 styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1299 state = SCE_HJ_DEFAULT;
1300 }
1301 }
1302 break;
1303 case SCE_HJ_COMMENT:
1304 case SCE_HJ_COMMENTDOC:
1305 if (ch == '/' && chPrev == '*') {
1306 styler.ColourTo(i, StateToPrint);
1307 state = SCE_HJ_DEFAULT;
1308 ch = ' ';
1309 }
1310 break;
1311 case SCE_HJ_COMMENTLINE:
1312 if (ch == '\r' || ch == '\n') {
1313 styler.ColourTo(i - 1, statePrintForState(SCE_HJ_COMMENTLINE, inScriptType));
1314 state = SCE_HJ_DEFAULT;
1315 ch = ' ';
1316 }
1317 break;
1318 case SCE_HJ_DOUBLESTRING:
1319 if (ch == '\\') {
1320 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1321 i++;
1322 }
1323 } else if (ch == '\"') {
1324 styler.ColourTo(i, statePrintForState(SCE_HJ_DOUBLESTRING, inScriptType));
1325 state = SCE_HJ_DEFAULT;
1326 } else if ((inScriptType == eNonHtmlScript) && (ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1327 styler.ColourTo(i - 1, StateToPrint);
1328 state = SCE_HJ_COMMENTLINE;
1329 i += 2;
1330 } else if (isLineEnd(ch)) {
1331 styler.ColourTo(i - 1, StateToPrint);
1332 state = SCE_HJ_STRINGEOL;
1333 }
1334 break;
1335 case SCE_HJ_SINGLESTRING:
1336 if (ch == '\\') {
1337 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1338 i++;
1339 }
1340 } else if (ch == '\'') {
1341 styler.ColourTo(i, statePrintForState(SCE_HJ_SINGLESTRING, inScriptType));
1342 state = SCE_HJ_DEFAULT;
1343 } else if ((inScriptType == eNonHtmlScript) && (ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1344 styler.ColourTo(i - 1, StateToPrint);
1345 state = SCE_HJ_COMMENTLINE;
1346 i += 2;
1347 } else if (isLineEnd(ch)) {
1348 styler.ColourTo(i - 1, StateToPrint);
1349 state = SCE_HJ_STRINGEOL;
1350 }
1351 break;
1352 case SCE_HJ_STRINGEOL:
1353 if (!isLineEnd(ch)) {
1354 styler.ColourTo(i - 1, StateToPrint);
1355 state = SCE_HJ_DEFAULT;
1356 } else if (!isLineEnd(chNext)) {
1357 styler.ColourTo(i, StateToPrint);
1358 state = SCE_HJ_DEFAULT;
1359 }
1360 break;
1361 case SCE_HJ_REGEX:
1362 if (ch == '\r' || ch == '\n' || ch == '/') {
1363 if (ch == '/') {
1364 while (isascii(chNext) && islower(chNext)) { // gobble regex flags
1365 i++;
1366 ch = chNext;
1367 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1368 }
1369 }
1370 styler.ColourTo(i, StateToPrint);
1371 state = SCE_HJ_DEFAULT;
1372 } else if (ch == '\\') {
1373 // Gobble up the quoted character
1374 if (chNext == '\\' || chNext == '/') {
1375 i++;
1376 ch = chNext;
1377 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1378 }
1379 }
1380 break;
1381 case SCE_HB_DEFAULT:
1382 case SCE_HB_START:
1383 if (IsAWordStart(ch)) {
1384 styler.ColourTo(i - 1, StateToPrint);
1385 state = SCE_HB_WORD;
1386 } else if (ch == '\'') {
1387 styler.ColourTo(i - 1, StateToPrint);
1388 state = SCE_HB_COMMENTLINE;
1389 } else if (ch == '\"') {
1390 styler.ColourTo(i - 1, StateToPrint);
1391 state = SCE_HB_STRING;
1392 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1393 styler.SafeGetCharAt(i + 3) == '-') {
1394 styler.ColourTo(i - 1, StateToPrint);
1395 state = SCE_HB_COMMENTLINE;
1396 } else if (IsOperator(ch)) {
1397 styler.ColourTo(i - 1, StateToPrint);
1398 styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
1399 state = SCE_HB_DEFAULT;
1400 } else if ((ch == ' ') || (ch == '\t')) {
1401 if (state == SCE_HB_START) {
1402 styler.ColourTo(i - 1, StateToPrint);
1403 state = SCE_HB_DEFAULT;
1404 }
1405 }
1406 break;
1407 case SCE_HB_WORD:
1408 if (!IsAWordChar(ch)) {
1409 state = classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
1410 if (state == SCE_HB_DEFAULT) {
1411 if (ch == '\"') {
1412 state = SCE_HB_STRING;
1413 } else if (ch == '\'') {
1414 state = SCE_HB_COMMENTLINE;
1415 } else if (IsOperator(ch)) {
1416 styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
1417 state = SCE_HB_DEFAULT;
1418 }
1419 }
1420 }
1421 break;
1422 case SCE_HB_STRING:
1423 if (ch == '\"') {
1424 styler.ColourTo(i, StateToPrint);
1425 state = SCE_HB_DEFAULT;
1426 } else if (ch == '\r' || ch == '\n') {
1427 styler.ColourTo(i - 1, StateToPrint);
1428 state = SCE_HB_STRINGEOL;
1429 }
1430 break;
1431 case SCE_HB_COMMENTLINE:
1432 if (ch == '\r' || ch == '\n') {
1433 styler.ColourTo(i - 1, StateToPrint);
1434 state = SCE_HB_DEFAULT;
1435 }
1436 break;
1437 case SCE_HB_STRINGEOL:
1438 if (!isLineEnd(ch)) {
1439 styler.ColourTo(i - 1, StateToPrint);
1440 state = SCE_HB_DEFAULT;
1441 } else if (!isLineEnd(chNext)) {
1442 styler.ColourTo(i, StateToPrint);
1443 state = SCE_HB_DEFAULT;
1444 }
1445 break;
1446 case SCE_HP_DEFAULT:
1447 case SCE_HP_START:
1448 if (IsAWordStart(ch)) {
1449 styler.ColourTo(i - 1, StateToPrint);
1450 state = SCE_HP_WORD;
1451 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1452 styler.SafeGetCharAt(i + 3) == '-') {
1453 styler.ColourTo(i - 1, StateToPrint);
1454 state = SCE_HP_COMMENTLINE;
1455 } else if (ch == '#') {
1456 styler.ColourTo(i - 1, StateToPrint);
1457 state = SCE_HP_COMMENTLINE;
1458 } else if (ch == '\"') {
1459 styler.ColourTo(i - 1, StateToPrint);
1460 if (chNext == '\"' && chNext2 == '\"') {
1461 i += 2;
1462 state = SCE_HP_TRIPLEDOUBLE;
1463 ch = ' ';
1464 chPrev = ' ';
1465 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1466 } else {
1467 // state = statePrintForState(SCE_HP_STRING,inScriptType);
1468 state = SCE_HP_STRING;
1469 }
1470 } else if (ch == '\'') {
1471 styler.ColourTo(i - 1, StateToPrint);
1472 if (chNext == '\'' && chNext2 == '\'') {
1473 i += 2;
1474 state = SCE_HP_TRIPLE;
1475 ch = ' ';
1476 chPrev = ' ';
1477 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1478 } else {
1479 state = SCE_HP_CHARACTER;
1480 }
1481 } else if (IsOperator(ch)) {
1482 styler.ColourTo(i - 1, StateToPrint);
1483 styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
1484 } else if ((ch == ' ') || (ch == '\t')) {
1485 if (state == SCE_HP_START) {
1486 styler.ColourTo(i - 1, StateToPrint);
1487 state = SCE_HP_DEFAULT;
1488 }
1489 }
1490 break;
1491 case SCE_HP_WORD:
1492 if (!IsAWordChar(ch)) {
1493 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
1494 state = SCE_HP_DEFAULT;
1495 if (ch == '#') {
1496 state = SCE_HP_COMMENTLINE;
1497 } else if (ch == '\"') {
1498 if (chNext == '\"' && chNext2 == '\"') {
1499 i += 2;
1500 state = SCE_HP_TRIPLEDOUBLE;
1501 ch = ' ';
1502 chPrev = ' ';
1503 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1504 } else {
1505 state = SCE_HP_STRING;
1506 }
1507 } else if (ch == '\'') {
1508 if (chNext == '\'' && chNext2 == '\'') {
1509 i += 2;
1510 state = SCE_HP_TRIPLE;
1511 ch = ' ';
1512 chPrev = ' ';
1513 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1514 } else {
1515 state = SCE_HP_CHARACTER;
1516 }
1517 } else if (IsOperator(ch)) {
1518 styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
1519 }
1520 }
1521 break;
1522 case SCE_HP_COMMENTLINE:
1523 if (ch == '\r' || ch == '\n') {
1524 styler.ColourTo(i - 1, StateToPrint);
1525 state = SCE_HP_DEFAULT;
1526 }
1527 break;
1528 case SCE_HP_STRING:
1529 if (ch == '\\') {
1530 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1531 i++;
1532 ch = chNext;
1533 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1534 }
1535 } else if (ch == '\"') {
1536 styler.ColourTo(i, StateToPrint);
1537 state = SCE_HP_DEFAULT;
1538 }
1539 break;
1540 case SCE_HP_CHARACTER:
1541 if (ch == '\\') {
1542 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1543 i++;
1544 ch = chNext;
1545 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1546 }
1547 } else if (ch == '\'') {
1548 styler.ColourTo(i, StateToPrint);
1549 state = SCE_HP_DEFAULT;
1550 }
1551 break;
1552 case SCE_HP_TRIPLE:
1553 if (ch == '\'' && chPrev == '\'' && chPrev2 == '\'') {
1554 styler.ColourTo(i, StateToPrint);
1555 state = SCE_HP_DEFAULT;
1556 }
1557 break;
1558 case SCE_HP_TRIPLEDOUBLE:
1559 if (ch == '\"' && chPrev == '\"' && chPrev2 == '\"') {
1560 styler.ColourTo(i, StateToPrint);
1561 state = SCE_HP_DEFAULT;
1562 }
1563 break;
1564 ///////////// start - PHP state handling
1565 case SCE_HPHP_WORD:
1566 if (!IsAWordChar(ch)) {
1567 classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
1568 if (ch == '/' && chNext == '*') {
1569 i++;
1570 state = SCE_HPHP_COMMENT;
1571 } else if (ch == '/' && chNext == '/') {
1572 i++;
1573 state = SCE_HPHP_COMMENTLINE;
1574 } else if (ch == '#') {
1575 state = SCE_HPHP_COMMENTLINE;
1576 } else if (ch == '\"') {
1577 state = SCE_HPHP_HSTRING;
1578 strcpy(phpStringDelimiter, "\"");
1579 } else if (styler.Match(i, "<<<")) {
1580 state = SCE_HPHP_HSTRING;
1581 i = FindPhpStringDelimiter(phpStringDelimiter, sizeof(phpStringDelimiter), i + 3, lengthDoc, styler);
1582 } else if (ch == '\'') {
1583 state = SCE_HPHP_SIMPLESTRING;
1584 } else if (ch == '$' && IsPhpWordStart(chNext)) {
1585 state = SCE_HPHP_VARIABLE;
1586 } else if (IsOperator(ch)) {
1587 state = SCE_HPHP_OPERATOR;
1588 } else {
1589 state = SCE_HPHP_DEFAULT;
1590 }
1591 }
1592 break;
1593 case SCE_HPHP_NUMBER:
1594 // recognize bases 8,10 or 16 integers OR floating-point numbers
1595 if (!IsADigit(ch)
1596 && strchr(".xXabcdefABCDEF", ch) == NULL
1597 && ((ch != '-' && ch != '+') || (chPrev != 'e' && chPrev != 'E'))) {
1598 styler.ColourTo(i - 1, SCE_HPHP_NUMBER);
1599 if (IsOperator(ch))
1600 state = SCE_HPHP_OPERATOR;
1601 else
1602 state = SCE_HPHP_DEFAULT;
1603 }
1604 break;
1605 case SCE_HPHP_VARIABLE:
1606 if (!IsPhpWordChar(ch)) {
1607 styler.ColourTo(i - 1, SCE_HPHP_VARIABLE);
1608 if (IsOperator(ch))
1609 state = SCE_HPHP_OPERATOR;
1610 else
1611 state = SCE_HPHP_DEFAULT;
1612 }
1613 break;
1614 case SCE_HPHP_COMMENT:
1615 if (ch == '/' && chPrev == '*') {
1616 styler.ColourTo(i, StateToPrint);
1617 state = SCE_HPHP_DEFAULT;
1618 }
1619 break;
1620 case SCE_HPHP_COMMENTLINE:
1621 if (ch == '\r' || ch == '\n') {
1622 styler.ColourTo(i - 1, StateToPrint);
1623 state = SCE_HPHP_DEFAULT;
1624 }
1625 break;
1626 case SCE_HPHP_HSTRING:
1627 if (ch == '\\' && (phpStringDelimiter[0] == '\"' || chNext == '$' || chNext == '{')) {
1628 // skip the next char
1629 i++;
1630 } else if (((ch == '{' && chNext == '$') || (ch == '$' && chNext == '{'))
1631 && IsPhpWordStart(chNext2)) {
1632 styler.ColourTo(i - 1, StateToPrint);
1633 state = SCE_HPHP_COMPLEX_VARIABLE;
1634 } else if (ch == '$' && IsPhpWordStart(chNext)) {
1635 styler.ColourTo(i - 1, StateToPrint);
1636 state = SCE_HPHP_HSTRING_VARIABLE;
1637 } else if (styler.Match(i, phpStringDelimiter)) {
1638 const int psdLength = strlen(phpStringDelimiter);
1639 if ((psdLength > 1) && ((i + psdLength) < lengthDoc))
1640 i += psdLength - 1;
1641 styler.ColourTo(i, StateToPrint);
1642 state = SCE_HPHP_DEFAULT;
1643 }
1644 break;
1645 case SCE_HPHP_SIMPLESTRING:
1646 if (ch == '\\') {
1647 // skip the next char
1648 i++;
1649 } else if (ch == '\'') {
1650 styler.ColourTo(i, StateToPrint);
1651 state = SCE_HPHP_DEFAULT;
1652 }
1653 break;
1654 case SCE_HPHP_HSTRING_VARIABLE:
1655 if (!IsPhpWordChar(ch)) {
1656 styler.ColourTo(i - 1, StateToPrint);
1657 i--; // strange but it works
1658 state = SCE_HPHP_HSTRING;
1659 }
1660 break;
1661 case SCE_HPHP_COMPLEX_VARIABLE:
1662 if (ch == '}') {
1663 styler.ColourTo(i, StateToPrint);
1664 state = SCE_HPHP_HSTRING;
1665 }
1666 break;
1667 case SCE_HPHP_OPERATOR:
1668 case SCE_HPHP_DEFAULT:
1669 styler.ColourTo(i - 1, StateToPrint);
1670 if (IsADigit(ch) || (ch == '.' && IsADigit(chNext))) {
1671 state = SCE_HPHP_NUMBER;
1672 } else if (IsAWordStart(ch)) {
1673 state = SCE_HPHP_WORD;
1674 } else if (ch == '/' && chNext == '*') {
1675 i++;
1676 state = SCE_HPHP_COMMENT;
1677 } else if (ch == '/' && chNext == '/') {
1678 i++;
1679 state = SCE_HPHP_COMMENTLINE;
1680 } else if (ch == '#') {
1681 state = SCE_HPHP_COMMENTLINE;
1682 } else if (ch == '\"') {
1683 state = SCE_HPHP_HSTRING;
1684 strcpy(phpStringDelimiter, "\"");
1685 } else if (styler.Match(i, "<<<")) {
1686 state = SCE_HPHP_HSTRING;
1687 i = FindPhpStringDelimiter(phpStringDelimiter, sizeof(phpStringDelimiter), i + 3, lengthDoc, styler);
1688 } else if (ch == '\'') {
1689 state = SCE_HPHP_SIMPLESTRING;
1690 } else if (ch == '$' && IsPhpWordStart(chNext)) {
1691 state = SCE_HPHP_VARIABLE;
1692 } else if (IsOperator(ch)) {
1693 state = SCE_HPHP_OPERATOR;
1694 } else if ((state == SCE_HPHP_OPERATOR) && (IsASpace(ch))) {
1695 state = SCE_HPHP_DEFAULT;
1696 }
1697 break;
1698 ///////////// end - PHP state handling
1699 }
1700
1701 // Some of the above terminated their lexeme but since the same character starts
1702 // the same class again, only reenter if non empty segment.
1703
1704 bool nonEmptySegment = i >= static_cast<int>(styler.GetStartSegment());
1705 if (state == SCE_HB_DEFAULT) { // One of the above succeeded
1706 if ((ch == '\"') && (nonEmptySegment)) {
1707 state = SCE_HB_STRING;
1708 } else if (ch == '\'') {
1709 state = SCE_HB_COMMENTLINE;
1710 } else if (IsAWordStart(ch)) {
1711 state = SCE_HB_WORD;
1712 } else if (IsOperator(ch)) {
1713 styler.ColourTo(i, SCE_HB_DEFAULT);
1714 }
1715 } else if (state == SCE_HBA_DEFAULT) { // One of the above succeeded
1716 if ((ch == '\"') && (nonEmptySegment)) {
1717 state = SCE_HBA_STRING;
1718 } else if (ch == '\'') {
1719 state = SCE_HBA_COMMENTLINE;
1720 } else if (IsAWordStart(ch)) {
1721 state = SCE_HBA_WORD;
1722 } else if (IsOperator(ch)) {
1723 styler.ColourTo(i, SCE_HBA_DEFAULT);
1724 }
1725 } else if (state == SCE_HJ_DEFAULT) { // One of the above succeeded
1726 if (ch == '/' && chNext == '*') {
1727 if (styler.SafeGetCharAt(i + 2) == '*')
1728 state = SCE_HJ_COMMENTDOC;
1729 else
1730 state = SCE_HJ_COMMENT;
1731 } else if (ch == '/' && chNext == '/') {
1732 state = SCE_HJ_COMMENTLINE;
1733 } else if ((ch == '\"') && (nonEmptySegment)) {
1734 state = SCE_HJ_DOUBLESTRING;
1735 } else if ((ch == '\'') && (nonEmptySegment)) {
1736 state = SCE_HJ_SINGLESTRING;
1737 } else if (IsAWordStart(ch)) {
1738 state = SCE_HJ_WORD;
1739 } else if (IsOperator(ch)) {
1740 styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1741 }
1742 }
1743 }
1744
1745 StateToPrint = statePrintForState(state, inScriptType);
1746 styler.ColourTo(lengthDoc - 1, StateToPrint);
1747
1748 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
1749 if (fold) {
1750 int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
1751 styler.SetLevel(lineCurrent, levelPrev | flagsNext);
1752 }
1753 }
1754
1755 static void ColouriseXMLDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
1756 Accessor &styler) {
1757 // Passing in true because we're lexing XML
1758 ColouriseHyperTextDoc(startPos, length, initStyle, keywordlists,styler, true);
1759 }
1760
1761 static void ColouriseHTMLDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
1762 Accessor &styler) {
1763 // Passing in false because we're notlexing XML
1764 ColouriseHyperTextDoc(startPos, length, initStyle, keywordlists,styler, false);
1765 }
1766
1767 static bool isASPScript(int state) {
1768 return
1769 (state >= SCE_HJA_START && state <= SCE_HJA_REGEX) ||
1770 (state >= SCE_HBA_START && state <= SCE_HBA_STRINGEOL) ||
1771 (state >= SCE_HPA_DEFAULT && state <= SCE_HPA_IDENTIFIER);
1772 }
1773
1774 static void ColouriseHBAPiece(StyleContext &sc, WordList *keywordlists[]) {
1775 WordList &keywordsVBS = *keywordlists[2];
1776 if (sc.state == SCE_HBA_WORD) {
1777 if (!IsAWordChar(sc.ch)) {
1778 char s[100];
1779 sc.GetCurrentLowered(s, sizeof(s));
1780 if (keywordsVBS.InList(s)) {
1781 if (strcmp(s, "rem") == 0) {
1782 sc.ChangeState(SCE_HBA_COMMENTLINE);
1783 if (sc.atLineEnd) {
1784 sc.SetState(SCE_HBA_DEFAULT);
1785 }
1786 } else {
1787 sc.SetState(SCE_HBA_DEFAULT);
1788 }
1789 } else {
1790 sc.ChangeState(SCE_HBA_IDENTIFIER);
1791 sc.SetState(SCE_HBA_DEFAULT);
1792 }
1793 }
1794 } else if (sc.state == SCE_HBA_NUMBER) {
1795 if (!IsAWordChar(sc.ch)) {
1796 sc.SetState(SCE_HBA_DEFAULT);
1797 }
1798 } else if (sc.state == SCE_HBA_STRING) {
1799 if (sc.ch == '\"') {
1800 sc.ForwardSetState(SCE_HBA_DEFAULT);
1801 } else if (sc.ch == '\r' || sc.ch == '\n') {
1802 sc.ChangeState(SCE_HBA_STRINGEOL);
1803 sc.ForwardSetState(SCE_HBA_DEFAULT);
1804 }
1805 } else if (sc.state == SCE_HBA_COMMENTLINE) {
1806 if (sc.ch == '\r' || sc.ch == '\n') {
1807 sc.SetState(SCE_HBA_DEFAULT);
1808 }
1809 }
1810
1811 if (sc.state == SCE_HBA_DEFAULT) {
1812 if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
1813 sc.SetState(SCE_HBA_NUMBER);
1814 } else if (IsAWordStart(sc.ch)) {
1815 sc.SetState(SCE_HBA_WORD);
1816 } else if (sc.ch == '\'') {
1817 sc.SetState(SCE_HBA_COMMENTLINE);
1818 } else if (sc.ch == '\"') {
1819 sc.SetState(SCE_HBA_STRING);
1820 }
1821 }
1822 }
1823
1824 static void ColouriseHTMLPiece(StyleContext &sc, WordList *keywordlists[]) {
1825 WordList &keywordsTags = *keywordlists[0];
1826 if (sc.state == SCE_H_COMMENT) {
1827 if (sc.Match("-->")) {
1828 sc.Forward();
1829 sc.Forward();
1830 sc.ForwardSetState(SCE_H_DEFAULT);
1831 }
1832 } else if (sc.state == SCE_H_ENTITY) {
1833 if (sc.ch == ';') {
1834 sc.ForwardSetState(SCE_H_DEFAULT);
1835 } else if (sc.ch != '#' && (sc.ch < 0x80) && !isalnum(sc.ch) // Should check that '#' follows '&', but it is unlikely anyway...
1836 && sc.ch != '.' && sc.ch != '-' && sc.ch != '_' && sc.ch != ':') { // valid in XML
1837 sc.ChangeState(SCE_H_TAGUNKNOWN);
1838 sc.SetState(SCE_H_DEFAULT);
1839 }
1840 } else if (sc.state == SCE_H_TAGUNKNOWN) {
1841 if (!ishtmlwordchar(sc.ch) && !((sc.ch == '/') && (sc.chPrev == '<')) && sc.ch != '[') {
1842 char s[100];
1843 sc.GetCurrentLowered(s, sizeof(s));
1844 if (s[1] == '/') {
1845 if (keywordsTags.InList(s + 2)) {
1846 sc.ChangeState(SCE_H_TAG);
1847 }
1848 } else {
1849 if (keywordsTags.InList(s + 1)) {
1850 sc.ChangeState(SCE_H_TAG);
1851 }
1852 }
1853 if (sc.ch == '>') {
1854 sc.ForwardSetState(SCE_H_DEFAULT);
1855 } else if (sc.Match('/', '>')) {
1856 sc.SetState(SCE_H_TAGEND);
1857 sc.Forward();
1858 sc.ForwardSetState(SCE_H_DEFAULT);
1859 } else {
1860 sc.SetState(SCE_H_OTHER);
1861 }
1862 }
1863 } else if (sc.state == SCE_H_ATTRIBUTE) {
1864 if (!ishtmlwordchar(sc.ch)) {
1865 char s[100];
1866 sc.GetCurrentLowered(s, sizeof(s));
1867 if (!keywordsTags.InList(s)) {
1868 sc.ChangeState(SCE_H_ATTRIBUTEUNKNOWN);
1869 }
1870 sc.SetState(SCE_H_OTHER);
1871 }
1872 } else if (sc.state == SCE_H_OTHER) {
1873 if (sc.ch == '>') {
1874 sc.SetState(SCE_H_TAG);
1875 sc.ForwardSetState(SCE_H_DEFAULT);
1876 } else if (sc.Match('/', '>')) {
1877 sc.SetState(SCE_H_TAG);
1878 sc.Forward();
1879 sc.ForwardSetState(SCE_H_DEFAULT);
1880 } else if (sc.chPrev == '=') {
1881 sc.SetState(SCE_H_VALUE);
1882 }
1883 } else if (sc.state == SCE_H_DOUBLESTRING) {
1884 if (sc.ch == '\"') {
1885 sc.ForwardSetState(SCE_H_OTHER);
1886 }
1887 } else if (sc.state == SCE_H_SINGLESTRING) {
1888 if (sc.ch == '\'') {
1889 sc.ForwardSetState(SCE_H_OTHER);
1890 }
1891 } else if (sc.state == SCE_H_NUMBER) {
1892 if (!IsADigit(sc.ch)) {
1893 sc.SetState(SCE_H_OTHER);
1894 }
1895 }
1896
1897 if (sc.state == SCE_H_DEFAULT) {
1898 if (sc.ch == '<') {
1899 if (sc.Match("<!--"))
1900 sc.SetState(SCE_H_COMMENT);
1901 else
1902 sc.SetState(SCE_H_TAGUNKNOWN);
1903 } else if (sc.ch == '&') {
1904 sc.SetState(SCE_H_ENTITY);
1905 }
1906 } else if ((sc.state == SCE_H_OTHER) || (sc.state == SCE_H_VALUE)) {
1907 if (sc.ch == '\"' && sc.chPrev == '=') {
1908 sc.SetState(SCE_H_DOUBLESTRING);
1909 } else if (sc.ch == '\'' && sc.chPrev == '=') {
1910 sc.SetState(SCE_H_SINGLESTRING);
1911 } else if (IsADigit(sc.ch)) {
1912 sc.SetState(SCE_H_NUMBER);
1913 } else if (sc.ch == '>') {
1914 sc.SetState(SCE_H_TAG);
1915 sc.ForwardSetState(SCE_H_DEFAULT);
1916 } else if (ishtmlwordchar(sc.ch)) {
1917 sc.SetState(SCE_H_ATTRIBUTE);
1918 }
1919 }
1920 }
1921
1922 static void ColouriseASPPiece(StyleContext &sc, WordList *keywordlists[]) {
1923 // Possibly exit current state to either SCE_H_DEFAULT or SCE_HBA_DEFAULT
1924 if ((sc.state == SCE_H_ASPAT || isASPScript(sc.state)) && sc.Match('%', '>')) {
1925 sc.SetState(SCE_H_ASP);
1926 sc.Forward();
1927 sc.ForwardSetState(SCE_H_DEFAULT);
1928 }
1929
1930 // Handle some ASP script
1931 if (sc.state >= SCE_HBA_START && sc.state <= SCE_HBA_STRINGEOL) {
1932 ColouriseHBAPiece(sc, keywordlists);
1933 } else if (sc.state >= SCE_H_DEFAULT && sc.state <= SCE_H_SGML_BLOCK_DEFAULT) {
1934 ColouriseHTMLPiece(sc, keywordlists);
1935 }
1936
1937 // Enter new sc.state
1938 if ((sc.state == SCE_H_DEFAULT) || (sc.state == SCE_H_TAGUNKNOWN)) {
1939 if (sc.Match('<', '%')) {
1940 if (sc.state == SCE_H_TAGUNKNOWN)
1941 sc.ChangeState(SCE_H_ASP);
1942 else
1943 sc.SetState(SCE_H_ASP);
1944 sc.Forward();
1945 sc.Forward();
1946 if (sc.ch == '@') {
1947 sc.ForwardSetState(SCE_H_ASPAT);
1948 } else {
1949 if (sc.ch == '=') {
1950 sc.Forward();
1951 }
1952 sc.SetState(SCE_HBA_DEFAULT);
1953 }
1954 }
1955 }
1956 }
1957
1958 static void ColouriseASPDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
1959 Accessor &styler) {
1960 // Lexer for HTML requires more lexical states (8 bits worth) than most lexers
1961 StyleContext sc(startPos, length, initStyle, styler, static_cast<char>(STYLE_MAX));
1962 for (; sc.More(); sc.Forward()) {
1963 ColouriseASPPiece(sc, keywordlists);
1964 }
1965 sc.Complete();
1966 }
1967
1968 static void ColourisePHPPiece(StyleContext &sc, WordList *keywordlists[]) {
1969 // Possibly exit current state to either SCE_H_DEFAULT or SCE_HBA_DEFAULT
1970 if (sc.state >= SCE_HPHP_DEFAULT && sc.state <= SCE_HPHP_OPERATOR) {
1971 if (!isPHPStringState(sc.state) &&
1972 (sc.state != SCE_HPHP_COMMENT) &&
1973 (sc.Match('?', '>'))) {
1974 sc.SetState(SCE_H_QUESTION);
1975 sc.Forward();
1976 sc.ForwardSetState(SCE_H_DEFAULT);
1977 }
1978 }
1979
1980 if (sc.state >= SCE_H_DEFAULT && sc.state <= SCE_H_SGML_BLOCK_DEFAULT) {
1981 ColouriseHTMLPiece(sc, keywordlists);
1982 }
1983
1984 // Handle some PHP script
1985 if (sc.state == SCE_HPHP_WORD) {
1986 if (!IsPhpWordChar(static_cast<char>(sc.ch))) {
1987 sc.SetState(SCE_HPHP_DEFAULT);
1988 }
1989 } else if (sc.state == SCE_HPHP_COMMENTLINE) {
1990 if (sc.ch == '\r' || sc.ch == '\n') {
1991 sc.SetState(SCE_HPHP_DEFAULT);
1992 }
1993 } else if (sc.state == SCE_HPHP_COMMENT) {
1994 if (sc.Match('*', '/')) {
1995 sc.Forward();
1996 sc.Forward();
1997 sc.SetState(SCE_HPHP_DEFAULT);
1998 }
1999 } else if (sc.state == SCE_HPHP_HSTRING) {
2000 if (sc.ch == '\"') {
2001 sc.ForwardSetState(SCE_HPHP_DEFAULT);
2002 }
2003 } else if (sc.state == SCE_HPHP_SIMPLESTRING) {
2004 if (sc.ch == '\'') {
2005 sc.ForwardSetState(SCE_HPHP_DEFAULT);
2006 }
2007 } else if (sc.state == SCE_HPHP_VARIABLE) {
2008 if (!IsPhpWordChar(static_cast<char>(sc.ch))) {
2009 sc.SetState(SCE_HPHP_DEFAULT);
2010 }
2011 } else if (sc.state == SCE_HPHP_OPERATOR) {
2012 sc.SetState(SCE_HPHP_DEFAULT);
2013 }
2014
2015 // Enter new sc.state
2016 if ((sc.state == SCE_H_DEFAULT) || (sc.state == SCE_H_TAGUNKNOWN)) {
2017 if (sc.Match("<?php")) {
2018 sc.SetState(SCE_H_QUESTION);
2019 sc.Forward();
2020 sc.Forward();
2021 sc.Forward();
2022 sc.Forward();
2023 sc.Forward();
2024 sc.SetState(SCE_HPHP_DEFAULT);
2025 }
2026 }
2027 if (sc.state == SCE_HPHP_DEFAULT) {
2028 if (IsPhpWordStart(static_cast<char>(sc.ch))) {
2029 sc.SetState(SCE_HPHP_WORD);
2030 } else if (sc.ch == '#') {
2031 sc.SetState(SCE_HPHP_COMMENTLINE);
2032 } else if (sc.Match("<!--")) {
2033 sc.SetState(SCE_HPHP_COMMENTLINE);
2034 } else if (sc.Match('/', '/')) {
2035 sc.SetState(SCE_HPHP_COMMENTLINE);
2036 } else if (sc.Match('/', '*')) {
2037 sc.SetState(SCE_HPHP_COMMENT);
2038 } else if (sc.ch == '\"') {
2039 sc.SetState(SCE_HPHP_HSTRING);
2040 } else if (sc.ch == '\'') {
2041 sc.SetState(SCE_HPHP_SIMPLESTRING);
2042 } else if (sc.ch == '$' && IsPhpWordStart(static_cast<char>(sc.chNext))) {
2043 sc.SetState(SCE_HPHP_VARIABLE);
2044 } else if (IsOperator(static_cast<char>(sc.ch))) {
2045 sc.SetState(SCE_HPHP_OPERATOR);
2046 }
2047 }
2048 }
2049
2050 static void ColourisePHPDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
2051 Accessor &styler) {
2052 // Lexer for HTML requires more lexical states (8 bits worth) than most lexers
2053 StyleContext sc(startPos, length, initStyle, styler, static_cast<char>(STYLE_MAX));
2054 for (; sc.More(); sc.Forward()) {
2055 ColourisePHPPiece(sc, keywordlists);
2056 }
2057 sc.Complete();
2058 }
2059
2060 static void ColourisePHPScriptDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
2061 Accessor &styler) {
2062 if(startPos == 0) initStyle = SCE_HPHP_DEFAULT;
2063 ColouriseHTMLDoc(startPos,length,initStyle,keywordlists,styler);
2064 }
2065
2066 static const char * const htmlWordListDesc[] = {
2067 "HTML elements and attributes",
2068 "JavaScript keywords",
2069 "VBScript keywords",
2070 "Python keywords",
2071 "PHP keywords",
2072 "SGML and DTD keywords",
2073 0,
2074 };
2075
2076 static const char * const phpscriptWordListDesc[] = {
2077 "", //Unused
2078 "", //Unused
2079 "", //Unused
2080 "", //Unused
2081 "PHP keywords",
2082 "", //Unused
2083 0,
2084 };
2085
2086 LexerModule lmHTML(SCLEX_HTML, ColouriseHTMLDoc, "hypertext", 0, htmlWordListDesc, 8);
2087 LexerModule lmXML(SCLEX_XML, ColouriseXMLDoc, "xml", 0, htmlWordListDesc, 8);
2088 // SCLEX_ASP and SCLEX_PHP should not be used in new code: use SCLEX_HTML instead.
2089 LexerModule lmASP(SCLEX_ASP, ColouriseASPDoc, "asp", 0, htmlWordListDesc, 8);
2090 LexerModule lmPHP(SCLEX_PHP, ColourisePHPDoc, "php", 0, htmlWordListDesc, 8);
2091 LexerModule lmPHPSCRIPT(SCLEX_PHPSCRIPT, ColourisePHPScriptDoc, "phpscript", 0, phpscriptWordListDesc, 8);