]>
git.saurik.com Git - wxWidgets.git/blob - contrib/src/stc/scintilla/src/LexPerl.cxx
a7186fe5525641672f16b746da4cbdc68830aec1
1 // SciTE - Scintilla based Text Editor
2 // LexPerl.cxx - lexer for subset of Perl
3 // Copyright 1998-2000 by Neil Hodgson <neilh@scintilla.org>
4 // The License.txt file describes the conditions under which this software may be distributed.
17 #include "Scintilla.h"
20 inline bool isPerlOperator(char ch
) {
23 // '.' left out as it is used to make up numbers
24 if (ch
== '%' || ch
== '^' || ch
== '&' || ch
== '*' || ch
== '\\' ||
25 ch
== '(' || ch
== ')' || ch
== '-' || ch
== '+' ||
26 ch
== '=' || ch
== '|' || ch
== '{' || ch
== '}' ||
27 ch
== '[' || ch
== ']' || ch
== ':' || ch
== ';' ||
28 ch
== '<' || ch
== '>' || ch
== ',' || ch
== '/' ||
29 ch
== '?' || ch
== '!' || ch
== '.' || ch
== '~')
34 static int classifyWordPerl(unsigned int start
, unsigned int end
, WordList
&keywords
, Accessor
&styler
) {
36 bool wordIsNumber
= isdigit(styler
[start
]) || (styler
[start
] == '.');
37 for (unsigned int i
= 0; i
< end
- start
+ 1 && i
< 30; i
++) {
38 s
[i
] = styler
[start
+ i
];
41 char chAttr
= SCE_PL_IDENTIFIER
;
43 chAttr
= SCE_PL_NUMBER
;
45 if (keywords
.InList(s
))
48 styler
.ColourTo(end
, chAttr
);
52 static bool isEndVar(char ch
) {
53 return !isalnum(ch
) && ch
!= '#' && ch
!= '$' &&
54 ch
!= '_' && ch
!= '\'';
57 static bool isMatch(Accessor
&styler
, int lengthDoc
, int pos
, const char *val
) {
58 if ((pos
+ static_cast<int>(strlen(val
))) >= lengthDoc
) {
62 if (*val
!= styler
[pos
++]) {
70 static char opposite(char ch
) {
82 static void ColourisePerlDoc(unsigned int startPos
, int length
, int initStyle
,
83 WordList
*keywordlists
[], Accessor
&styler
) {
85 // Lexer for perl often has to backtrack to start of current style to determine
86 // which characters are being used as quotes, how deeply nested is the
87 // start position and what the termination string is for here documents
89 WordList
&keywords
= *keywordlists
[0];
98 sooked
[sookedpos
] = '\0';
99 int state
= initStyle
;
100 int lengthDoc
= startPos
+ length
;
101 // If in a long distance lexical state, seek to the beginning to find quote characters
102 if (state
== SCE_PL_HERE
|| state
== SCE_PL_REGEX
||
103 state
== SCE_PL_REGSUBST
|| state
== SCE_PL_LONGQUOTE
) {
104 while ((startPos
> 1) && (styler
.StyleAt(startPos
- 1) == state
)) {
107 state
= SCE_PL_DEFAULT
;
109 styler
.StartAt(startPos
);
110 char chPrev
= styler
.SafeGetCharAt(startPos
- 1);
111 char chNext
= styler
[startPos
];
112 styler
.StartSegment(startPos
);
113 for (int i
= startPos
; i
< lengthDoc
; i
++) {
115 chNext
= styler
.SafeGetCharAt(i
+ 1);
116 char chNext2
= styler
.SafeGetCharAt(i
+ 2);
118 if (styler
.IsLeadByte(ch
)) {
119 chNext
= styler
.SafeGetCharAt(i
+ 2);
125 if (state
== SCE_PL_DEFAULT
) {
126 if (iswordstart(ch
)) {
127 styler
.ColourTo(i
- 1, state
);
128 if (ch
== 's' && !isalnum(chNext
)) {
129 state
= SCE_PL_REGSUBST
;
134 } else if (ch
== 'm' && !isalnum(chNext
)) {
135 state
= SCE_PL_REGEX
;
140 } else if (ch
== 't' && chNext
== 'r' && !isalnum(chNext2
)) {
141 state
= SCE_PL_REGSUBST
;
148 } else if (ch
== 'q' && (chNext
== 'q' || chNext
== 'r' || chNext
== 'w' || chNext
== 'x') && !isalnum(chNext2
)) {
149 state
= SCE_PL_LONGQUOTE
;
160 } else if (ch
== '#') {
161 styler
.ColourTo(i
- 1, state
);
162 state
= SCE_PL_COMMENTLINE
;
163 } else if (ch
== '\"') {
164 styler
.ColourTo(i
- 1, state
);
165 state
= SCE_PL_STRING
;
166 } else if (ch
== '\'') {
169 styler
.ColourTo(i
, state
);
171 styler
.ColourTo(i
- 1, state
);
172 state
= SCE_PL_CHARACTER
;
174 } else if (ch
== '`') {
175 styler
.ColourTo(i
- 1, state
);
176 state
= SCE_PL_BACKTICKS
;
177 } else if (ch
== '$') {
179 styler
.ColourTo(i
- 1, state
);
180 if (isalnum(chNext
) || chNext
== '#' || chNext
== '$' || chNext
== '_') {
181 state
= SCE_PL_SCALAR
;
182 } else if (chNext
!= '{' && chNext
!= '[') {
183 styler
.ColourTo(i
, SCE_PL_SCALAR
);
188 styler
.ColourTo(i
, SCE_PL_SCALAR
);
190 } else if (ch
== '@') {
192 styler
.ColourTo(i
- 1, state
);
193 if (isalpha(chNext
) || chNext
== '#' || chNext
== '$' || chNext
== '_') {
194 state
= SCE_PL_ARRAY
;
195 } else if (chNext
!= '{' && chNext
!= '[') {
196 styler
.ColourTo(i
, SCE_PL_ARRAY
);
200 styler
.ColourTo(i
, SCE_PL_ARRAY
);
202 } else if (ch
== '%') {
204 styler
.ColourTo(i
- 1, state
);
205 if (isalpha(chNext
) || chNext
== '#' || chNext
== '$' || chNext
== '_') {
207 } else if (chNext
!= '{' && chNext
!= '[') {
208 styler
.ColourTo(i
, SCE_PL_HASH
);
212 styler
.ColourTo(i
, SCE_PL_HASH
);
214 } else if (ch
== '*') {
215 styler
.ColourTo(i
- 1, state
);
216 state
= SCE_PL_SYMBOLTABLE
;
217 } else if (ch
== '/' && preferRE
) {
218 styler
.ColourTo(i
- 1, state
);
219 state
= SCE_PL_REGEX
;
224 } else if (ch
== '<' && chNext
== '<') {
225 styler
.ColourTo(i
- 1, state
);
232 sooked
[sookedpos
] = '\0';
233 } else if (ch
== '=' && (chPrev
== '\r' || chPrev
== '\n') && isalpha(chNext
)) {
234 styler
.ColourTo(i
- 1, state
);
238 sooked
[sookedpos
] = '\0';
239 } else if (isPerlOperator(ch
)) {
240 if (ch
== ')' || ch
== ']')
244 styler
.ColourTo(i
- 1, state
);
245 styler
.ColourTo(i
, SCE_PL_OPERATOR
);
247 } else if (state
== SCE_PL_WORD
) {
248 if (!iswordchar(ch
) && ch
!= '\'') { // Archaic Perl has quotes inside names
249 if (isMatch(styler
, lengthDoc
, styler
.GetStartSegment(), "__DATA__")) {
250 styler
.ColourTo(i
, SCE_PL_DATASECTION
);
251 state
= SCE_PL_DATASECTION
;
252 } else if (isMatch(styler
, lengthDoc
, styler
.GetStartSegment(), "__END__")) {
253 styler
.ColourTo(i
, SCE_PL_DATASECTION
);
254 state
= SCE_PL_DATASECTION
;
256 if (classifyWordPerl(styler
.GetStartSegment(), i
- 1, keywords
, styler
) == SCE_PL_WORD
)
258 state
= SCE_PL_DEFAULT
;
260 state
= SCE_PL_COMMENTLINE
;
261 } else if (ch
== '\"') {
262 state
= SCE_PL_STRING
;
263 } else if (ch
== '\'') {
264 state
= SCE_PL_CHARACTER
;
265 } else if (ch
== '<' && chNext
== '<') {
269 sooked
[sookedpos
] = '\0';
270 } else if (isPerlOperator(ch
)) {
271 if (ch
== ')' || ch
== ']')
275 styler
.ColourTo(i
, SCE_PL_OPERATOR
);
276 state
= SCE_PL_DEFAULT
;
281 if (state
== SCE_PL_COMMENTLINE
) {
282 if (ch
== '\r' || ch
== '\n') {
283 styler
.ColourTo(i
- 1, state
);
284 state
= SCE_PL_DEFAULT
;
286 } else if (state
== SCE_PL_HERE
) {
287 if ((isalnum(ch
) || ch
== '_') && quotes
< 2) {
288 sooked
[sookedpos
++] = ch
;
289 sooked
[sookedpos
] = '\0';
296 (chPrev
== '\n' || chPrev
== '\r') &&
297 isMatch(styler
, lengthDoc
, i
, sooked
)) {
299 chNext
= styler
.SafeGetCharAt(i
);
300 if (chNext
== '\n' || chNext
== '\r') {
301 styler
.ColourTo(i
- 1, SCE_PL_HERE
);
302 state
= SCE_PL_DEFAULT
;
305 chNext
= styler
.SafeGetCharAt(i
+ 1);
307 } else if (state
== SCE_PL_STRING
) {
309 if (chNext
== '\"' || chNext
== '\'' || chNext
== '\\') {
312 chNext
= styler
.SafeGetCharAt(i
+ 1);
314 } else if (ch
== '\"') {
315 styler
.ColourTo(i
, state
);
316 state
= SCE_PL_DEFAULT
;
319 chNext
= styler
.SafeGetCharAt(i
+ 1);
321 } else if (state
== SCE_PL_CHARACTER
) {
323 if (chNext
== '\"' || chNext
== '\'' || chNext
== '\\') {
326 chNext
= styler
.SafeGetCharAt(i
+ 1);
328 } else if (ch
== '\'') {
329 styler
.ColourTo(i
, state
);
330 state
= SCE_PL_DEFAULT
;
333 chNext
= styler
.SafeGetCharAt(i
+ 1);
335 } else if (state
== SCE_PL_BACKTICKS
) {
337 styler
.ColourTo(i
, state
);
338 state
= SCE_PL_DEFAULT
;
341 chNext
= styler
.SafeGetCharAt(i
+ 1);
343 } else if (state
== SCE_PL_POD
) {
344 if (ch
== '=' && (chPrev
== '\r' || chPrev
== '\n')) {
345 if (isMatch(styler
, lengthDoc
, i
, "=cut")) {
346 styler
.ColourTo(i
- 1 + 4, state
);
348 state
= SCE_PL_DEFAULT
;
349 ch
= styler
.SafeGetCharAt(i
);
350 chNext
= styler
.SafeGetCharAt(i
+ 1);
353 } else if (state
== SCE_PL_SCALAR
) {
355 styler
.ColourTo(i
- 1, state
);
356 state
= SCE_PL_DEFAULT
;
358 } else if (state
== SCE_PL_ARRAY
) {
360 styler
.ColourTo(i
- 1, state
);
361 state
= SCE_PL_DEFAULT
;
363 } else if (state
== SCE_PL_HASH
) {
365 styler
.ColourTo(i
- 1, state
);
366 state
= SCE_PL_DEFAULT
;
368 } else if (state
== SCE_PL_SYMBOLTABLE
) {
370 styler
.ColourTo(i
- 1, state
);
371 state
= SCE_PL_DEFAULT
;
373 } else if (state
== SCE_PL_REF
) {
375 styler
.ColourTo(i
- 1, state
);
376 state
= SCE_PL_DEFAULT
;
378 } else if (state
== SCE_PL_REGEX
) {
379 if (!quoteUp
&& !isspace(ch
)) {
381 quoteDown
= opposite(ch
);
384 if (ch
== quoteDown
&& chPrev
!= '\\') {
388 if (quoteUp
== quoteDown
) {
392 if (!isalpha(chNext
)) {
394 styler
.ColourTo(i
, state
);
395 state
= SCE_PL_DEFAULT
;
399 } else if (ch
== quoteUp
&& chPrev
!= '\\') {
401 } else if (!isalpha(chNext
)) {
403 styler
.ColourTo(i
, state
);
404 state
= SCE_PL_DEFAULT
;
409 } else if (state
== SCE_PL_REGSUBST
) {
410 if (!quoteUp
&& !isspace(ch
)) {
412 quoteDown
= opposite(ch
);
415 if (quotes
== 0 && quoteRep
== 1) {
416 /* We matched something like s(...) or tr{...}
417 * and are looking for the next matcher characters,
418 * which could be either bracketed ({...}) or non-bracketed
421 * Number-signs are problematic. If they occur after
422 * the close of the first part, treat them like
423 * a quoteUp char, even if they actually start comments.
425 * If we find an alnum, we end the regsubst, and punt.
427 * Eric Promislow ericp@activestate.com Aug 9,2000
431 } else if (isalnum(ch
)) {
432 styler
.ColourTo(i
, state
);
433 state
= SCE_PL_DEFAULT
;
437 quoteDown
= opposite(ch
);
440 } else if (ch
== quoteDown
&& chPrev
!= '\\') {
445 if (!isalpha(chNext
)) {
447 styler
.ColourTo(i
, state
);
448 state
= SCE_PL_DEFAULT
;
452 if (quoteUp
== quoteDown
) {
455 } else if (ch
== quoteUp
&& chPrev
!= '\\') {
457 } else if (!isalpha(chNext
)) {
459 styler
.ColourTo(i
, state
);
460 state
= SCE_PL_DEFAULT
;
465 } else if (state
== SCE_PL_LONGQUOTE
) {
466 if (!quoteDown
&& !isspace(ch
)) {
468 quoteDown
= opposite(quoteUp
);
470 } else if (ch
== quoteDown
) {
475 styler
.ColourTo(i
, state
);
476 state
= SCE_PL_DEFAULT
;
479 if (quoteUp
== quoteDown
) {
483 } else if (ch
== quoteUp
) {
488 if (state
== SCE_PL_DEFAULT
) { // One of the above succeeded
490 state
= SCE_PL_COMMENTLINE
;
491 } else if (ch
== '\"') {
492 state
= SCE_PL_STRING
;
493 } else if (ch
== '\'') {
494 state
= SCE_PL_CHARACTER
;
495 } else if (iswordstart(ch
)) {
498 } else if (isoperator(ch
)) {
499 styler
.ColourTo(i
, SCE_PL_OPERATOR
);
505 styler
.ColourTo(lengthDoc
, state
);
508 LexerModule
lmPerl(SCLEX_PERL
, ColourisePerlDoc
);