]> git.saurik.com Git - wxWidgets.git/blob - src/stc/scintilla/src/LexHTML.cxx
7868b47fc8c11d283b3da0d70b22a7ac5419b468
[wxWidgets.git] / src / stc / scintilla / src / LexHTML.cxx
1 // SciTE - Scintilla based Text Editor
2 // LexHTML.cxx - lexer for HTML
3 // Copyright 1998-2000 by Neil Hodgson <neilh@scintilla.org>
4 // The License.txt file describes the conditions under which this software may be distributed.
5
6 #include <stdlib.h>
7 #include <string.h>
8 #include <ctype.h>
9 #include <stdio.h>
10 #include <stdarg.h>
11
12 #include "Platform.h"
13
14 #include "PropSet.h"
15 #include "Accessor.h"
16 #include "KeyWords.h"
17 #include "Scintilla.h"
18 #include "SciLexer.h"
19
20 #define SCE_HA_JS (SCE_HJA_START - SCE_HJ_START)
21 #define SCE_HA_VBS (SCE_HBA_START - SCE_HB_START)
22 #define SCE_HA_PYTHON (SCE_HPA_START - SCE_HP_START)
23
24 enum { eScriptNone = 0, eScriptJS, eScriptVBS, eScriptPython, eScriptPHP, eScriptXML };
25 enum { eHtml = 0, eNonHtmlScript, eNonHtmlPreProc, eNonHtmlScriptPreProc };
26
27 static int segIsScriptingIndicator(Accessor &styler, unsigned int start, unsigned int end, int prevValue) {
28 char s[30 + 1];
29 s[0] = '\0';
30 for (unsigned int i = 0; i < end - start + 1 && i < 30; i++) {
31 s[i] = static_cast<char>(tolower(styler[start + i]));
32 s[i + 1] = '\0';
33 }
34 //Platform::DebugPrintf("Scripting indicator [%s]\n", s);
35 if (strstr(s, "vbs"))
36 return eScriptVBS;
37 if (strstr(s, "pyth"))
38 return eScriptPython;
39 if (strstr(s, "javas"))
40 return eScriptJS;
41 if (strstr(s, "jscr"))
42 return eScriptJS;
43 if (strstr(s, "php"))
44 return eScriptPHP;
45 if (strstr(s, "xml"))
46 return eScriptXML;
47
48 return prevValue;
49 }
50
51 static int PrintScriptingIndicatorOffset(Accessor &styler, unsigned int start, unsigned int end) {
52 int iResult = 0;
53 char s[30 + 1];
54 s[0] = '\0';
55 for (unsigned int i = 0; i < end - start + 1 && i < 30; i++) {
56 s[i] = static_cast<char>(tolower(styler[start + i]));
57 s[i + 1] = '\0';
58 }
59 if (0 == strncmp(s, "php", 3)) {
60 iResult = 3;
61 }
62
63 return iResult;
64 }
65
66 //static int ScriptOfState(int state,int defaultScript)
67 static int ScriptOfState(int state) {
68 int scriptLanguage;
69
70 if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
71 scriptLanguage = eScriptPython;
72 } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
73 scriptLanguage = eScriptVBS;
74 } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_STRINGEOL)) {
75 scriptLanguage = eScriptJS;
76 } else if ((state >= SCE_HPHP_DEFAULT) && (state <= SCE_HPHP_COMMENTLINE)) {
77 scriptLanguage = eScriptPHP;
78 } else {
79 // scriptLanguage = defaultScript;
80 scriptLanguage = eScriptNone;
81 }
82
83 return scriptLanguage;
84 }
85
86 static int statePrintForState(int state, int inScriptType) {
87 int StateToPrint;
88
89 if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
90 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_PYTHON);
91 } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
92 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_VBS);
93 } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_STRINGEOL)) {
94 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_JS);
95 } else {
96 StateToPrint = state;
97 }
98
99 return StateToPrint;
100 }
101
102 static int stateForPrintState(int StateToPrint) {
103 int state;
104
105 if ((StateToPrint >= SCE_HPA_START) && (StateToPrint <= SCE_HPA_IDENTIFIER)) {
106 state = StateToPrint - SCE_HA_PYTHON;
107 } else if ((StateToPrint >= SCE_HBA_START) && (StateToPrint <= SCE_HBA_STRINGEOL)) {
108 state = StateToPrint - SCE_HA_VBS;
109 } else if ((StateToPrint >= SCE_HJA_START) && (StateToPrint <= SCE_HJA_STRINGEOL)) {
110 state = StateToPrint - SCE_HA_JS;
111 } else {
112 state = StateToPrint;
113 }
114
115 return state;
116 }
117
118 static inline bool IsNumber(unsigned int start, Accessor &styler) {
119 return isdigit(styler[start]) || (styler[start] == '.') ||
120 (styler[start] == '-') || (styler[start] == '#');
121 }
122
123 static void classifyAttribHTML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
124 bool wordIsNumber = IsNumber(start, styler);
125 char chAttr = SCE_H_ATTRIBUTEUNKNOWN;
126 if (wordIsNumber) {
127 chAttr = SCE_H_NUMBER;
128 } else {
129 char s[30 + 1];
130 s[0] = '\0';
131 for (unsigned int i = 0; i < end - start + 1 && i < 30; i++) {
132 s[i] = static_cast<char>(tolower(styler[start + i]));
133 s[i + 1] = '\0';
134 }
135 if (keywords.InList(s))
136 chAttr = SCE_H_ATTRIBUTE;
137 }
138 if ((chAttr == SCE_H_ATTRIBUTEUNKNOWN) && !keywords)
139 // No keywords -> all are known
140 chAttr = SCE_H_ATTRIBUTE;
141 styler.ColourTo(end, chAttr);
142 }
143
144 static int classifyTagHTML(unsigned int start, unsigned int end,
145 WordList &keywords, Accessor &styler) {
146 char s[30 + 1];
147 // Copy after the '<'
148 unsigned int i = 0;
149 for (unsigned int cPos = start; cPos <= end && i < 30; cPos++) {
150 char ch = styler[cPos];
151 if (ch != '<')
152 s[i++] = static_cast<char>(tolower(ch));
153 }
154 s[i] = '\0';
155 char chAttr = SCE_H_TAGUNKNOWN;
156 if (s[0] == '!' && s[1] == '-' && s[2] == '-') { //Comment
157 chAttr = SCE_H_COMMENT;
158 } else if (strcmp(s, "![cdata[") == 0) { // In lower case because already converted
159 chAttr = SCE_H_CDATA;
160 } else if (s[0] == '/') { // Closing tag
161 if (keywords.InList(s + 1))
162 chAttr = SCE_H_TAG;
163 } else {
164 if (keywords.InList(s)) {
165 chAttr = SCE_H_TAG;
166 }
167 if (0 == strcmp(s, "script")) {
168 chAttr = SCE_H_SCRIPT;
169 }
170 }
171 if ((chAttr == SCE_H_TAGUNKNOWN) && !keywords)
172 // No keywords -> all are known
173 chAttr = SCE_H_TAG;
174 styler.ColourTo(end, chAttr);
175 return chAttr;
176 }
177
178 static void classifyWordHTJS(unsigned int start, unsigned int end,
179 WordList &keywords, Accessor &styler, int inScriptType) {
180 char chAttr = SCE_HJ_WORD;
181 bool wordIsNumber = isdigit(styler[start]) || (styler[start] == '.');
182 if (wordIsNumber)
183 chAttr = SCE_HJ_NUMBER;
184 else {
185 char s[30 + 1];
186 for (unsigned int i = 0; i < end - start + 1 && i < 30; i++) {
187 s[i] = styler[start + i];
188 s[i + 1] = '\0';
189 }
190 if (keywords.InList(s))
191 chAttr = SCE_HJ_KEYWORD;
192 }
193 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
194 }
195
196 static int classifyWordHTVB(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, int inScriptType) {
197 char chAttr = SCE_HB_IDENTIFIER;
198 bool wordIsNumber = isdigit(styler[start]) || (styler[start] == '.');
199 if (wordIsNumber)
200 chAttr = SCE_HB_NUMBER;
201 else {
202 char s[30 + 1];
203 for (unsigned int i = 0; i < end - start + 1 && i < 30; i++) {
204 s[i] = static_cast<char>(tolower(styler[start + i]));
205 s[i + 1] = '\0';
206 }
207 if (keywords.InList(s)) {
208 chAttr = SCE_HB_WORD;
209 if (strcmp(s, "rem") == 0)
210 chAttr = SCE_HB_COMMENTLINE;
211 }
212 }
213 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
214 if (chAttr == SCE_HB_COMMENTLINE)
215 return SCE_HB_COMMENTLINE;
216 else
217 return SCE_HB_DEFAULT;
218 }
219
220 static void classifyWordHTPy(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord, int inScriptType) {
221 bool wordIsNumber = isdigit(styler[start]);
222 char s[30 + 1];
223 for (unsigned int i = 0; i < end - start + 1 && i < 30; i++) {
224 s[i] = styler[start + i];
225 s[i + 1] = '\0';
226 }
227 char chAttr = SCE_HP_IDENTIFIER;
228 if (0 == strcmp(prevWord, "class"))
229 chAttr = SCE_HP_CLASSNAME;
230 else if (0 == strcmp(prevWord, "def"))
231 chAttr = SCE_HP_DEFNAME;
232 else if (wordIsNumber)
233 chAttr = SCE_HP_NUMBER;
234 else if (keywords.InList(s))
235 chAttr = SCE_HP_WORD;
236 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
237 strcpy(prevWord, s);
238 }
239
240 // Update the word colour to default or keyword
241 // Called when in a PHP word
242 static void classifyWordHTPHP(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
243 char chAttr = SCE_HPHP_DEFAULT;
244 bool wordIsNumber = isdigit(styler[start]);
245 if (wordIsNumber)
246 chAttr = SCE_HPHP_NUMBER;
247 else {
248 char s[30 + 1];
249 for (unsigned int i = 0; i < end - start + 1 && i < 30; i++) {
250 s[i] = styler[start + i];
251 s[i + 1] = '\0';
252 }
253 if (keywords.InList(s))
254 chAttr = SCE_HPHP_WORD;
255 }
256 styler.ColourTo(end, chAttr);
257 }
258
259 // Return the first state to reach when entering a scripting language
260 static int StateForScript(int scriptLanguage) {
261 int Result;
262 switch (scriptLanguage) {
263 case eScriptVBS:
264 Result = SCE_HB_START;
265 break;
266 case eScriptPython:
267 Result = SCE_HP_START;
268 break;
269 case eScriptPHP:
270 Result = SCE_HPHP_DEFAULT;
271 break;
272 case eScriptXML:
273 Result = SCE_H_TAGUNKNOWN;
274 break;
275 default :
276 Result = SCE_HJ_START;
277 break;
278 }
279 return Result;
280 }
281
282 inline bool ishtmlwordchar(char ch) {
283 return isalnum(ch) || ch == '.' || ch == '-' || ch == '_' || ch == ':' || ch == '!' || ch == '#';
284 }
285
286 static bool InTagState(int state) {
287 return state == SCE_H_TAG || state == SCE_H_TAGUNKNOWN ||
288 state == SCE_H_SCRIPT ||
289 state == SCE_H_ATTRIBUTE || state == SCE_H_ATTRIBUTEUNKNOWN ||
290 state == SCE_H_NUMBER || state == SCE_H_OTHER ||
291 state == SCE_H_DOUBLESTRING || state == SCE_H_SINGLESTRING;
292 }
293
294 static bool isLineEnd(char ch) {
295 return ch == '\r' || ch == '\n';
296 }
297
298 static void ColouriseHyperTextDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
299 Accessor &styler) {
300
301 WordList &keywords = *keywordlists[0];
302 WordList &keywords2 = *keywordlists[1];
303 WordList &keywords3 = *keywordlists[2];
304 WordList &keywords4 = *keywordlists[3];
305 WordList &keywords5 = *keywordlists[4];
306
307 // Lexer for HTML requires more lexical states (7 bits worth) than most lexers
308 styler.StartAt(startPos, 127);
309 char prevWord[200];
310 prevWord[0] = '\0';
311 int StateToPrint = initStyle;
312 int state = stateForPrintState(StateToPrint);
313
314 // If inside a tag, it may be a script tag, so reread from the start to ensure any language tags are seen
315 if (InTagState(state)) {
316 while ((startPos > 1) && (InTagState(styler.StyleAt(startPos - 1)))) {
317 startPos--;
318 }
319 state = SCE_H_DEFAULT;
320 }
321 styler.StartAt(startPos, 127);
322
323 int lineState = eScriptVBS;
324 int lineCurrent = styler.GetLine(startPos);
325 if (lineCurrent > 0)
326 lineState = styler.GetLineState(lineCurrent);
327 int inScriptType = (lineState >> 0) & 0x03; // 2 bits
328 int defaultScript = (lineState >> 4) & 0x0F; // 4 bits
329 int beforePreProc = (lineState >> 8) & 0xFF; // 8 bits
330
331 // int scriptLanguage = ScriptOfState(state,defaultScript);
332 int scriptLanguage = ScriptOfState(state);
333
334 bool fold = styler.GetPropertyInt("fold");
335 int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
336 int levelCurrent = levelPrev;
337 int visibleChars = 0;
338
339 char chPrev = ' ';
340 char ch = ' ';
341 styler.StartSegment(startPos);
342 int lengthDoc = startPos + length;
343 for (int i = startPos; i < lengthDoc; i++) {
344 char chPrev2 = chPrev;
345 chPrev = ch;
346 ch = styler[i];
347 char chNext = styler.SafeGetCharAt(i + 1);
348 char chNext2 = styler.SafeGetCharAt(i + 2);
349
350 // Handle DBCS codepages
351 if (styler.IsLeadByte(ch)) {
352 chPrev = ' ';
353 i += 1;
354 continue;
355 }
356
357 if (fold && !isspace(ch))
358 visibleChars++;
359
360 // handle script folding
361 if (fold) {
362 switch (scriptLanguage) {
363 case eScriptJS:
364 case eScriptPHP:
365 //not currently supported case eScriptVBS:
366 if ((state != SCE_HPHP_COMMENT) && (state != SCE_HPHP_COMMENTLINE) && (state != SCE_HJ_COMMENT) && (state != SCE_HJ_COMMENTLINE) && (state != SCE_HJ_COMMENTDOC)) {
367 if ((ch == '{') || (ch == '}')) {
368 levelCurrent += (ch == '{') ? 1 : -1;
369 }
370 }
371 break;
372 case eScriptPython:
373 if (state != SCE_HP_COMMENTLINE) {
374 if ((ch == ':') && ((chNext == '\n') || (chNext == '\r' && chNext2 == '\n'))) {
375 levelCurrent++;
376 } else if ((ch == '\n') && !((chNext == '\r') && (chNext2 == '\n')) && (chNext != '\n')) {
377 // check if the number of tabs is lower than the level
378 int Findlevel = (levelCurrent & ~SC_FOLDLEVELBASE) * 8;
379 for (int j = 0;Findlevel > 0;j++) {
380 char chTmp = styler.SafeGetCharAt(i + j + 1);
381 if (chTmp == '\t') {
382 Findlevel -= 8;
383 } else if (chTmp == ' ') {
384 Findlevel--;
385 } else break;
386 }
387
388 if (Findlevel > 0) {
389 levelCurrent -= Findlevel / 8;
390 if (Findlevel % 8) levelCurrent--;
391 }
392 }
393 }
394 break;
395 }
396 }
397
398 // decide what is the current state to print (depending of the script tag)
399 StateToPrint = statePrintForState(state, inScriptType);
400
401 if ((ch == '\r' && chNext != '\n') || (ch == '\n')) {
402 // Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix)
403 // Avoid triggering two times on Dos/Win
404 // New line -> record any line state onto /next/ line
405 if (fold) {
406 int lev = levelPrev;
407 if (visibleChars == 0)
408 lev |= SC_FOLDLEVELWHITEFLAG;
409 if ((levelCurrent > levelPrev) && (visibleChars > 0))
410 lev |= SC_FOLDLEVELHEADERFLAG;
411 styler.SetLevel(lineCurrent, lev);
412 visibleChars = 0;
413 levelPrev = levelCurrent;
414 }
415 lineCurrent++;
416 styler.SetLineState(lineCurrent,
417 ((inScriptType & 0x03) << 0) |
418 ((defaultScript & 0x0F) << 4) |
419 ((beforePreProc & 0xFF) << 8));
420 }
421
422 // generic end of script processing
423 else if ((inScriptType == eNonHtmlScript) && (ch == '<') && (chNext == '/')) {
424 // Check if it's the end of the script tag (or any other HTML tag)
425 switch (state) {
426 // in these cases, you can embed HTML tags (to confirm !!!!!!!!!!!!!!!!!!!!!!)
427 case SCE_H_DOUBLESTRING:
428 case SCE_H_SINGLESTRING:
429 case SCE_HJ_COMMENT:
430 case SCE_HJ_COMMENTDOC:
431 case SCE_HJ_COMMENTLINE:
432 case SCE_HJ_DOUBLESTRING:
433 case SCE_HJ_SINGLESTRING:
434 case SCE_HB_STRING:
435 case SCE_HP_STRING:
436 case SCE_HP_TRIPLE:
437 case SCE_HP_TRIPLEDOUBLE:
438 break;
439 default :
440 // maybe we should check here if it's a tag and if it's SCRIPT
441
442 styler.ColourTo(i - 1, StateToPrint);
443 state = SCE_H_TAGUNKNOWN;
444 inScriptType = eHtml;
445 scriptLanguage = eScriptNone;
446 i += 2;
447 continue;
448 }
449 }
450
451 /////////////////////////////////////
452 // handle the start of PHP pre-processor = Non-HTML
453 else if ((ch == '<') && (chNext == '?')) {
454 styler.ColourTo(i - 1, StateToPrint);
455 beforePreProc = state;
456 scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment() + 2, i + 10, eScriptPHP);
457 i++;
458 i += PrintScriptingIndicatorOffset(styler, styler.GetStartSegment() + 2, i + 10);
459 if (scriptLanguage == eScriptXML)
460 styler.ColourTo(i, SCE_H_XMLSTART);
461 else
462 styler.ColourTo(i, SCE_H_QUESTION);
463 state = StateForScript(scriptLanguage);
464 if (inScriptType == eNonHtmlScript)
465 inScriptType = eNonHtmlScriptPreProc;
466 else
467 inScriptType = eNonHtmlPreProc;
468 continue;
469 }
470
471 // handle the start of ASP pre-processor = Non-HTML
472 else if ((ch == '<') && (chNext == '%')) {
473 styler.ColourTo(i - 1, StateToPrint);
474 beforePreProc = state;
475 if (inScriptType == eNonHtmlScript)
476 inScriptType = eNonHtmlScriptPreProc;
477 else
478 inScriptType = eNonHtmlPreProc;
479
480 if (chNext2 == '@') {
481 i += 2; // place as if it was the second next char treated
482 state = SCE_H_ASPAT;
483 } else {
484 if (chNext2 == '=') {
485 i += 2; // place as if it was the second next char treated
486 }
487 else {
488 i++; // place as if it was the next char treated
489 }
490
491
492 state = StateForScript(defaultScript);
493 }
494 styler.ColourTo(i, SCE_H_ASP);
495 continue;
496 }
497
498 // handle the end of a pre-processor = Non-HTML
499 else if (((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) && ((ch == '?') || (ch == '%')) && (chNext == '>')) {
500 if (state == SCE_H_ASPAT) {
501 defaultScript = segIsScriptingIndicator(styler, styler.GetStartSegment(), i - 1, defaultScript);
502 }
503 // Bounce out of any ASP mode
504 switch (state) {
505 case SCE_HJ_WORD:
506 classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
507 break;
508 case SCE_HB_WORD:
509 classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
510 break;
511 case SCE_HP_WORD:
512 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
513 break;
514 case SCE_HPHP_WORD:
515 classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
516 break;
517 default :
518 styler.ColourTo(i - 1, StateToPrint);
519 break;
520 }
521 i++;
522 if (ch == '%')
523 styler.ColourTo(i, SCE_H_ASP);
524 else if (scriptLanguage == eScriptXML)
525 styler.ColourTo(i, SCE_H_XMLEND);
526 else
527 styler.ColourTo(i, SCE_H_QUESTION);
528 state = beforePreProc;
529 if (inScriptType == eNonHtmlScriptPreProc)
530 inScriptType = eNonHtmlScript;
531 else
532 inScriptType = eHtml;
533 scriptLanguage = eScriptNone;
534 continue;
535 }
536 /////////////////////////////////////
537
538 switch (state) {
539 case SCE_H_DEFAULT:
540 if (ch == '<') {
541 styler.ColourTo(i - 1, StateToPrint);
542 if (chNext == '!' && chNext2 == '-' && styler.SafeGetCharAt(i + 3) == '-')
543 state = SCE_H_COMMENT;
544 else
545 state = SCE_H_TAGUNKNOWN;
546 } else if (ch == '&') {
547 styler.ColourTo(i - 1, SCE_H_DEFAULT);
548 state = SCE_H_ENTITY;
549 }
550 break;
551 case SCE_H_COMMENT:
552 if ((ch == '>') && (chPrev == '-') && (chPrev2 == '-')) {
553 styler.ColourTo(i, StateToPrint);
554 state = SCE_H_DEFAULT;
555 }
556 break;
557 case SCE_H_CDATA:
558 if ((ch == '>') && (chPrev == ']') && (chPrev2 == ']')) {
559 styler.ColourTo(i, StateToPrint);
560 state = SCE_H_DEFAULT;
561 }
562 break;
563 case SCE_H_ENTITY:
564 if (ch == ';') {
565 styler.ColourTo(i, StateToPrint);
566 state = SCE_H_DEFAULT;
567 }
568 if (ch != '#' && !isalnum(ch)) { // Should check that '#' follows '&', but it is unlikely anyway...
569 styler.ColourTo(i, SCE_H_TAGUNKNOWN);
570 state = SCE_H_DEFAULT;
571 }
572 break;
573 case SCE_H_TAGUNKNOWN:
574 if (!ishtmlwordchar(ch) && ch != '/' && ch != '-' && ch != '[') {
575 int eClass = classifyTagHTML(styler.GetStartSegment(), i - 1, keywords, styler);
576 if (eClass == SCE_H_SCRIPT) {
577 inScriptType = eNonHtmlScript;
578 scriptLanguage = defaultScript;
579 eClass = SCE_H_TAG;
580 }
581 if (ch == '>') {
582 styler.ColourTo(i, SCE_H_TAG);
583 if (inScriptType == eNonHtmlScript) {
584 state = StateForScript(scriptLanguage);
585 } else {
586 state = SCE_H_DEFAULT;
587 }
588 } else {
589 if (eClass == SCE_H_CDATA) {
590 state = SCE_H_CDATA;
591 } else {
592 state = SCE_H_OTHER;
593 }
594 }
595 }
596 break;
597 case SCE_H_ATTRIBUTE:
598 if (!ishtmlwordchar(ch) && ch != '/' && ch != '-') {
599 if (inScriptType == eNonHtmlScript) {
600 scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i - 1, scriptLanguage);
601 }
602 classifyAttribHTML(styler.GetStartSegment(), i - 1, keywords, styler);
603 if (ch == '>') {
604 styler.ColourTo(i, SCE_H_TAG);
605 if (inScriptType == eNonHtmlScript) {
606 state = StateForScript(scriptLanguage);
607 } else {
608 state = SCE_H_DEFAULT;
609 }
610 } else if (ch == '=') {
611 styler.ColourTo(i, SCE_H_OTHER);
612 state = SCE_H_VALUE;
613 } else {
614 state = SCE_H_OTHER;
615 }
616 }
617 break;
618 case SCE_H_OTHER:
619 if (ch == '>') {
620 styler.ColourTo(i - 1, StateToPrint);
621 styler.ColourTo(i, SCE_H_TAG);
622 if (inScriptType == eNonHtmlScript) {
623 state = StateForScript(scriptLanguage);
624 } else {
625 state = SCE_H_DEFAULT;
626 }
627 } else if (ch == '\"') {
628 styler.ColourTo(i - 1, StateToPrint);
629 state = SCE_H_DOUBLESTRING;
630 } else if (ch == '\'') {
631 styler.ColourTo(i - 1, StateToPrint);
632 state = SCE_H_SINGLESTRING;
633 } else if (ch == '=') {
634 styler.ColourTo(i, StateToPrint);
635 state = SCE_H_VALUE;
636 } else if (ch == '/' && chNext == '>') {
637 styler.ColourTo(i - 1, StateToPrint);
638 styler.ColourTo(i + 1, SCE_H_TAGEND);
639 i++;
640 ch = chNext;
641 state = SCE_H_DEFAULT;
642 } else if (ch == '?' && chNext == '>') {
643 styler.ColourTo(i - 1, StateToPrint);
644 styler.ColourTo(i + 1, SCE_H_XMLEND);
645 i++;
646 ch = chNext;
647 state = SCE_H_DEFAULT;
648 } else if (ishtmlwordchar(ch)) {
649 styler.ColourTo(i - 1, StateToPrint);
650 state = SCE_H_ATTRIBUTE;
651 }
652 break;
653 case SCE_H_DOUBLESTRING:
654 if (ch == '\"') {
655 if (inScriptType == eNonHtmlScript) {
656 scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
657 }
658 styler.ColourTo(i, SCE_H_DOUBLESTRING);
659 state = SCE_H_OTHER;
660 }
661 break;
662 case SCE_H_SINGLESTRING:
663 if (ch == '\'') {
664 if (inScriptType == eNonHtmlScript) {
665 scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
666 }
667 styler.ColourTo(i, SCE_H_SINGLESTRING);
668 state = SCE_H_OTHER;
669 }
670 break;
671 case SCE_H_VALUE:
672 if (!ishtmlwordchar(ch)) {
673 if (ch == '\"') {
674 // Should really test for being first character
675 state = SCE_H_DOUBLESTRING;
676 } else if (ch == '\'') {
677 state = SCE_H_SINGLESTRING;
678 } else {
679 if (IsNumber(styler.GetStartSegment(), styler)) {
680 styler.ColourTo(i - 1, SCE_H_NUMBER);
681 } else {
682 styler.ColourTo(i - 1, StateToPrint);
683 }
684 if (ch == '>') {
685 styler.ColourTo(i, SCE_H_TAG);
686 if (inScriptType == eNonHtmlScript) {
687 state = StateForScript(scriptLanguage);
688 } else {
689 state = SCE_H_DEFAULT;
690 }
691 } else {
692 state = SCE_H_OTHER;
693 }
694 }
695 }
696 break;
697 case SCE_HJ_DEFAULT:
698 case SCE_HJ_START:
699 case SCE_HJ_SYMBOLS:
700 if (iswordstart(ch)) {
701 styler.ColourTo(i - 1, StateToPrint);
702 state = SCE_HJ_WORD;
703 } else if (ch == '/' && chNext == '*') {
704 styler.ColourTo(i - 1, StateToPrint);
705 if (chNext2 == '*')
706 state = SCE_HJ_COMMENTDOC;
707 else
708 state = SCE_HJ_COMMENT;
709 } else if (ch == '/' && chNext == '/') {
710 styler.ColourTo(i - 1, StateToPrint);
711 state = SCE_HJ_COMMENTLINE;
712 } else if (ch == '\"') {
713 styler.ColourTo(i - 1, StateToPrint);
714 state = SCE_HJ_DOUBLESTRING;
715 } else if (ch == '\'') {
716 styler.ColourTo(i - 1, StateToPrint);
717 state = SCE_HJ_SINGLESTRING;
718 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
719 styler.SafeGetCharAt(i + 3) == '-') {
720 styler.ColourTo(i - 1, StateToPrint);
721 state = SCE_HJ_COMMENTLINE;
722 } else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
723 styler.ColourTo(i - 1, StateToPrint);
724 state = SCE_HJ_COMMENTLINE;
725 i += 2;
726 } else if (isoperator(ch)) {
727 styler.ColourTo(i - 1, StateToPrint);
728 styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
729 state = SCE_HJ_DEFAULT;
730 } else if ((ch == ' ') || (ch == '\t')) {
731 if (state == SCE_HJ_START) {
732 styler.ColourTo(i - 1, StateToPrint);
733 state = SCE_HJ_DEFAULT;
734 }
735 }
736 break;
737 case SCE_HJ_WORD:
738 if (!iswordchar(ch)) {
739 classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
740 //styler.ColourTo(i - 1, eHTJSKeyword);
741 state = SCE_HJ_DEFAULT;
742 if (ch == '/' && chNext == '*') {
743 if (chNext2 == '*')
744 state = SCE_HJ_COMMENTDOC;
745 else
746 state = SCE_HJ_COMMENT;
747 } else if (ch == '/' && chNext == '/') {
748 state = SCE_HJ_COMMENTLINE;
749 } else if (ch == '\"') {
750 state = SCE_HJ_DOUBLESTRING;
751 } else if (ch == '\'') {
752 state = SCE_HJ_SINGLESTRING;
753 } else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
754 styler.ColourTo(i - 1, StateToPrint);
755 state = SCE_HJ_COMMENTLINE;
756 i += 2;
757 } else if (isoperator(ch)) {
758 styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
759 state = SCE_HJ_DEFAULT;
760 }
761 }
762 break;
763 case SCE_HJ_COMMENT:
764 case SCE_HJ_COMMENTDOC:
765 if (ch == '/' && chPrev == '*') {
766 styler.ColourTo(i, StateToPrint);
767 state = SCE_HJ_DEFAULT;
768 }
769 break;
770 case SCE_HJ_COMMENTLINE:
771 if (ch == '\r' || ch == '\n') {
772 styler.ColourTo(i - 1, statePrintForState(SCE_HJ_COMMENTLINE, inScriptType));
773 state = SCE_HJ_DEFAULT;
774 }
775 break;
776 case SCE_HJ_DOUBLESTRING:
777 if (ch == '\\') {
778 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
779 i++;
780 }
781 } else if (ch == '\"') {
782 styler.ColourTo(i, statePrintForState(SCE_HJ_DOUBLESTRING, inScriptType));
783 state = SCE_HJ_DEFAULT;
784 i++;
785 ch = chNext;
786 } else if ((inScriptType == eNonHtmlScript) && (ch == '-') && (chNext == '-') && (chNext2 == '>')) {
787 styler.ColourTo(i - 1, StateToPrint);
788 state = SCE_HJ_COMMENTLINE;
789 i += 2;
790 } else if (isLineEnd(ch)) {
791 styler.ColourTo(i - 1, StateToPrint);
792 state = SCE_HJ_STRINGEOL;
793 }
794 break;
795 case SCE_HJ_SINGLESTRING:
796 if (ch == '\\') {
797 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
798 i++;
799 }
800 } else if (ch == '\'') {
801 styler.ColourTo(i, statePrintForState(SCE_HJ_SINGLESTRING, inScriptType));
802 state = SCE_HJ_DEFAULT;
803 i++;
804 ch = chNext;
805 } else if ((inScriptType == eNonHtmlScript) && (ch == '-') && (chNext == '-') && (chNext2 == '>')) {
806 styler.ColourTo(i - 1, StateToPrint);
807 state = SCE_HJ_COMMENTLINE;
808 i += 2;
809 } else if (isLineEnd(ch)) {
810 styler.ColourTo(i - 1, StateToPrint);
811 state = SCE_HJ_STRINGEOL;
812 }
813 break;
814 case SCE_HJ_STRINGEOL:
815 if (!isLineEnd(ch)) {
816 styler.ColourTo(i - 1, StateToPrint);
817 state = SCE_HJ_DEFAULT;
818 } else if (!isLineEnd(chNext)) {
819 styler.ColourTo(i, StateToPrint);
820 state = SCE_HJ_DEFAULT;
821 }
822 break;
823 case SCE_HB_DEFAULT:
824 case SCE_HB_START:
825 if (iswordstart(ch)) {
826 styler.ColourTo(i - 1, StateToPrint);
827 state = SCE_HB_WORD;
828 } else if (ch == '\'') {
829 styler.ColourTo(i - 1, StateToPrint);
830 state = SCE_HB_COMMENTLINE;
831 } else if (ch == '\"') {
832 styler.ColourTo(i - 1, StateToPrint);
833 state = SCE_HB_STRING;
834 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
835 styler.SafeGetCharAt(i + 3) == '-') {
836 styler.ColourTo(i - 1, StateToPrint);
837 state = SCE_HB_COMMENTLINE;
838 } else if (isoperator(ch)) {
839 styler.ColourTo(i - 1, StateToPrint);
840 styler.ColourTo(i, SCE_HB_DEFAULT);
841 state = SCE_HB_DEFAULT;
842 } else if ((ch == ' ') || (ch == '\t')) {
843 if (state == SCE_HB_START) {
844 styler.ColourTo(i - 1, StateToPrint);
845 state = SCE_HB_DEFAULT;
846 }
847 }
848 break;
849 case SCE_HB_WORD:
850 if (!iswordchar(ch)) {
851 state = classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
852 if (state == SCE_HB_DEFAULT) {
853 if (ch == '\"') {
854 state = SCE_HB_STRING;
855 } else if (ch == '\'') {
856 state = SCE_HB_COMMENTLINE;
857 } else if (isoperator(ch)) {
858 styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
859 state = SCE_HB_DEFAULT;
860 }
861 }
862 }
863 break;
864 case SCE_HB_STRING:
865 if (ch == '\"') {
866 styler.ColourTo(i, StateToPrint);
867 state = SCE_HB_DEFAULT;
868 i++;
869 ch = chNext;
870 } else if (ch == '\r' || ch == '\n') {
871 styler.ColourTo(i - 1, StateToPrint);
872 state = SCE_HB_STRINGEOL;
873 }
874 break;
875 case SCE_HB_COMMENTLINE:
876 if (ch == '\r' || ch == '\n') {
877 styler.ColourTo(i - 1, StateToPrint);
878 state = SCE_HB_DEFAULT;
879 }
880 break;
881 case SCE_HB_STRINGEOL:
882 if (!isLineEnd(ch)) {
883 styler.ColourTo(i - 1, StateToPrint);
884 state = SCE_HB_DEFAULT;
885 } else if (!isLineEnd(chNext)) {
886 styler.ColourTo(i, StateToPrint);
887 state = SCE_HB_DEFAULT;
888 }
889 break;
890 case SCE_HP_DEFAULT:
891 case SCE_HP_START:
892 if (iswordstart(ch)) {
893 styler.ColourTo(i - 1, StateToPrint);
894 state = SCE_HP_WORD;
895 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
896 styler.SafeGetCharAt(i + 3) == '-') {
897 styler.ColourTo(i - 1, StateToPrint);
898 state = SCE_HP_COMMENTLINE;
899 } else if (ch == '#') {
900 styler.ColourTo(i - 1, StateToPrint);
901 state = SCE_HP_COMMENTLINE;
902 } else if (ch == '\"') {
903 styler.ColourTo(i - 1, StateToPrint);
904 if (chNext == '\"' && chNext2 == '\"') {
905 i += 2;
906 state = SCE_HP_TRIPLEDOUBLE;
907 ch = ' ';
908 chPrev = ' ';
909 chNext = styler.SafeGetCharAt(i + 1);
910 } else {
911 // state = statePrintForState(SCE_HP_STRING,inScriptType);
912 state = SCE_HP_STRING;
913 }
914 } else if (ch == '\'') {
915 styler.ColourTo(i - 1, StateToPrint);
916 if (chNext == '\'' && chNext2 == '\'') {
917 i += 2;
918 state = SCE_HP_TRIPLE;
919 ch = ' ';
920 chPrev = ' ';
921 chNext = styler.SafeGetCharAt(i + 1);
922 } else {
923 state = SCE_HP_CHARACTER;
924 }
925 } else if (isoperator(ch)) {
926 styler.ColourTo(i - 1, StateToPrint);
927 styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
928 } else if ((ch == ' ') || (ch == '\t')) {
929 if (state == SCE_HP_START) {
930 styler.ColourTo(i - 1, StateToPrint);
931 state = SCE_HP_DEFAULT;
932 }
933 }
934 break;
935 case SCE_HP_WORD:
936 if (!iswordchar(ch)) {
937 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
938 state = SCE_HP_DEFAULT;
939 if (ch == '#') {
940 state = SCE_HP_COMMENTLINE;
941 } else if (ch == '\"') {
942 if (chNext == '\"' && chNext2 == '\"') {
943 i += 2;
944 state = SCE_HP_TRIPLEDOUBLE;
945 ch = ' ';
946 chPrev = ' ';
947 chNext = styler.SafeGetCharAt(i + 1);
948 } else {
949 state = SCE_HP_STRING;
950 }
951 } else if (ch == '\'') {
952 if (chNext == '\'' && chNext2 == '\'') {
953 i += 2;
954 state = SCE_HP_TRIPLE;
955 ch = ' ';
956 chPrev = ' ';
957 chNext = styler.SafeGetCharAt(i + 1);
958 } else {
959 state = SCE_HP_CHARACTER;
960 }
961 } else if (isoperator(ch)) {
962 styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
963 }
964 }
965 break;
966 case SCE_HP_COMMENTLINE:
967 if (ch == '\r' || ch == '\n') {
968 styler.ColourTo(i - 1, StateToPrint);
969 state = SCE_HP_DEFAULT;
970 }
971 break;
972 case SCE_HP_STRING:
973 if (ch == '\\') {
974 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
975 i++;
976 ch = chNext;
977 chNext = styler.SafeGetCharAt(i + 1);
978 }
979 } else if (ch == '\"') {
980 styler.ColourTo(i, StateToPrint);
981 state = SCE_HP_DEFAULT;
982 }
983 break;
984 case SCE_HP_CHARACTER:
985 if (ch == '\\') {
986 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
987 i++;
988 ch = chNext;
989 chNext = styler.SafeGetCharAt(i + 1);
990 }
991 } else if (ch == '\'') {
992 styler.ColourTo(i, StateToPrint);
993 state = SCE_HP_DEFAULT;
994 }
995 break;
996 case SCE_HP_TRIPLE:
997 if (ch == '\'' && chPrev == '\'' && chPrev2 == '\'') {
998 styler.ColourTo(i, StateToPrint);
999 state = SCE_HP_DEFAULT;
1000 }
1001 break;
1002 case SCE_HP_TRIPLEDOUBLE:
1003 if (ch == '\"' && chPrev == '\"' && chPrev2 == '\"') {
1004 styler.ColourTo(i, StateToPrint);
1005 state = SCE_HP_DEFAULT;
1006 }
1007 break;
1008 ///////////// start - PHP state handling
1009 case SCE_HPHP_WORD:
1010 if (!iswordstart(ch)) {
1011 classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
1012 if (ch == '/' && chNext == '*') {
1013 i++;
1014 state = SCE_HPHP_COMMENT;
1015 } else if (ch == '/' && chNext == '/') {
1016 i++;
1017 state = SCE_HPHP_COMMENTLINE;
1018 } else if (ch == '#') {
1019 state = SCE_HPHP_COMMENTLINE;
1020 } else if (ch == '\"') {
1021 state = SCE_HPHP_HSTRING;
1022 } else if (ch == '\'') {
1023 state = SCE_HPHP_SIMPLESTRING;
1024 } else if (ch == '$') {
1025 state = SCE_HPHP_VARIABLE;
1026 } else {
1027 state = SCE_HPHP_DEFAULT;
1028 }
1029 }
1030 break;
1031 case SCE_HPHP_NUMBER:
1032 if (!isdigit(ch)) {
1033 styler.ColourTo(i - 1, SCE_HPHP_NUMBER);
1034 state = SCE_HPHP_DEFAULT;
1035 }
1036 break;
1037 case SCE_HPHP_VARIABLE:
1038 if (!iswordstart(ch)) {
1039 styler.ColourTo(i - 1, SCE_HPHP_VARIABLE);
1040 state = SCE_HPHP_DEFAULT;
1041 }
1042 break;
1043 case SCE_HPHP_COMMENT:
1044 if (ch == '/' && chPrev == '*') {
1045 styler.ColourTo(i, StateToPrint);
1046 state = SCE_HPHP_DEFAULT;
1047 }
1048 break;
1049 case SCE_HPHP_COMMENTLINE:
1050 if (ch == '\r' || ch == '\n') {
1051 styler.ColourTo(i - 1, StateToPrint);
1052 state = SCE_HPHP_DEFAULT;
1053 }
1054 break;
1055 case SCE_HPHP_HSTRING:
1056 if (ch == '\\') {
1057 // skip the next char
1058 i++;
1059 } else if (ch == '\"') {
1060 styler.ColourTo(i, StateToPrint);
1061 state = SCE_HPHP_DEFAULT;
1062 }
1063 break;
1064 case SCE_HPHP_SIMPLESTRING:
1065 if (ch == '\'') {
1066 styler.ColourTo(i, StateToPrint);
1067 state = SCE_HPHP_DEFAULT;
1068 }
1069 break;
1070 case SCE_HPHP_DEFAULT:
1071 styler.ColourTo(i - 1, StateToPrint);
1072 if (isdigit(ch)) {
1073 state = SCE_HPHP_NUMBER;
1074 } else if (iswordstart(ch)) {
1075 state = SCE_HPHP_WORD;
1076 } else if (ch == '/' && chNext == '*') {
1077 i++;
1078 state = SCE_HPHP_COMMENT;
1079 } else if (ch == '/' && chNext == '/') {
1080 i++;
1081 state = SCE_HPHP_COMMENTLINE;
1082 } else if (ch == '#') {
1083 state = SCE_HPHP_COMMENTLINE;
1084 } else if (ch == '\"') {
1085 state = SCE_HPHP_HSTRING;
1086 } else if (ch == '\'') {
1087 state = SCE_HPHP_SIMPLESTRING;
1088 } else if (ch == '$') {
1089 state = SCE_HPHP_VARIABLE;
1090 }
1091 break;
1092 ///////////// end - PHP state handling
1093 }
1094
1095
1096
1097 if (state == SCE_HB_DEFAULT) { // One of the above succeeded
1098 if (ch == '\"') {
1099 state = SCE_HB_STRING;
1100 } else if (ch == '\'') {
1101 state = SCE_HB_COMMENTLINE;
1102 } else if (iswordstart(ch)) {
1103 state = SCE_HB_WORD;
1104 } else if (isoperator(ch)) {
1105 styler.ColourTo(i, SCE_HB_DEFAULT);
1106 }
1107 } else if (state == SCE_HBA_DEFAULT) { // One of the above succeeded
1108 if (ch == '\"') {
1109 state = SCE_HBA_STRING;
1110 } else if (ch == '\'') {
1111 state = SCE_HBA_COMMENTLINE;
1112 } else if (iswordstart(ch)) {
1113 state = SCE_HBA_WORD;
1114 } else if (isoperator(ch)) {
1115 styler.ColourTo(i, SCE_HBA_DEFAULT);
1116 }
1117 } else if (state == SCE_HJ_DEFAULT) { // One of the above succeeded
1118 if (ch == '/' && chNext == '*') {
1119 if (styler.SafeGetCharAt(i + 2) == '*')
1120 state = SCE_HJ_COMMENTDOC;
1121 else
1122 state = SCE_HJ_COMMENT;
1123 } else if (ch == '/' && chNext == '/') {
1124 state = SCE_HJ_COMMENTLINE;
1125 } else if (ch == '\"') {
1126 state = SCE_HJ_DOUBLESTRING;
1127 } else if (ch == '\'') {
1128 state = SCE_HJ_SINGLESTRING;
1129 } else if (iswordstart(ch)) {
1130 state = SCE_HJ_WORD;
1131 } else if (isoperator(ch)) {
1132 styler.ColourTo(i, SCE_HJ_SYMBOLS);
1133 }
1134 }
1135 }
1136
1137 StateToPrint = statePrintForState(state, inScriptType);
1138 styler.ColourTo(lengthDoc - 1, StateToPrint);
1139
1140 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
1141 if (fold) {
1142 int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
1143 //styler.SetLevel(lineCurrent, levelCurrent | flagsNext);
1144 styler.SetLevel(lineCurrent, levelPrev | flagsNext);
1145
1146 }
1147 }
1148
1149 LexerModule lmHTML(SCLEX_HTML, ColouriseHyperTextDoc);
1150 LexerModule lmXML(SCLEX_XML, ColouriseHyperTextDoc);
1151