]> git.saurik.com Git - wxWidgets.git/blob - src/stc/scintilla/src/LexMarkdown.cxx
f7fc48f40fd74ae091053091fd53c216200eaff8
[wxWidgets.git] / src / stc / scintilla / src / LexMarkdown.cxx
1 /******************************************************************
2 * LexMarkdown.cxx
3 *
4 * A simple Markdown lexer for scintilla.
5 *
6 * Includes highlighting for some extra features from the
7 * Pandoc implementation; strikeout, using '#.' as a default
8 * ordered list item marker, and delimited code blocks.
9 *
10 * Limitations:
11 *
12 * Standard indented code blocks are not highlighted at all,
13 * as it would conflict with other indentation schemes. Use
14 * delimited code blocks for blanket highlighting of an
15 * entire code block. Embedded HTML is not highlighted either.
16 * Blanket HTML highlighting has issues, because some Markdown
17 * implementations allow Markdown markup inside of the HTML. Also,
18 * there is a following blank line issue that can't be ignored,
19 * explained in the next paragraph. Embedded HTML and code
20 * blocks would be better supported with language specific
21 * highlighting.
22 *
23 * The highlighting aims to accurately reflect correct syntax,
24 * but a few restrictions are relaxed. Delimited code blocks are
25 * highlighted, even if the line following the code block is not blank.
26 * Requiring a blank line after a block, breaks the highlighting
27 * in certain cases, because of the way Scintilla ends up calling
28 * the lexer.
29 *
30 * Written by Jon Strait - jstrait@moonloop.net
31 *
32 * The License.txt file describes the conditions under which this
33 * software may be distributed.
34 *
35 *****************************************************************/
36
37 #include <stdlib.h>
38 #include <string.h>
39 #include <ctype.h>
40 #include <stdio.h>
41 #include <stdarg.h>
42
43 #include "Platform.h"
44
45 #include "PropSet.h"
46 #include "Accessor.h"
47 #include "StyleContext.h"
48 #include "KeyWords.h"
49 #include "Scintilla.h"
50 #include "SciLexer.h"
51
52 #ifdef SCI_NAMESPACE
53 using namespace Scintilla;
54 #endif
55
56 static inline bool IsNewline(const int ch) {
57 return (ch == '\n' || ch == '\r');
58 }
59
60 // True if can follow ch down to the end with possibly trailing whitespace
61 static bool FollowToLineEnd(const int ch, const int state, const unsigned int endPos, StyleContext &sc) {
62 unsigned int i = 0;
63 while (sc.GetRelative(++i) == ch)
64 ;
65 // Skip over whitespace
66 while (IsASpaceOrTab(sc.GetRelative(i)) && sc.currentPos + i < endPos)
67 ++i;
68 if (IsNewline(sc.GetRelative(i)) || sc.currentPos + i == endPos) {
69 sc.Forward(i);
70 sc.ChangeState(state);
71 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
72 return true;
73 }
74 else return false;
75 }
76
77 // Set the state on text section from current to length characters,
78 // then set the rest until the newline to default, except for any characters matching token
79 static void SetStateAndZoom(const int state, const int length, const int token, StyleContext &sc) {
80 sc.SetState(state);
81 sc.Forward(length);
82 sc.SetState(SCE_MARKDOWN_DEFAULT);
83 sc.Forward();
84 bool started = false;
85 while (sc.More() && !IsNewline(sc.ch)) {
86 if (sc.ch == token && !started) {
87 sc.SetState(state);
88 started = true;
89 }
90 else if (sc.ch != token) {
91 sc.SetState(SCE_MARKDOWN_DEFAULT);
92 started = false;
93 }
94 sc.Forward();
95 }
96 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
97 }
98
99 // Does the previous line have more than spaces and tabs?
100 static bool HasPrevLineContent(StyleContext &sc) {
101 int i = 0;
102 // Go back to the previous newline
103 while ((--i + sc.currentPos) && !IsNewline(sc.GetRelative(i)))
104 ;
105 while (--i + sc.currentPos) {
106 if (IsNewline(sc.GetRelative(i)))
107 break;
108 if (!IsASpaceOrTab(sc.GetRelative(i)))
109 return true;
110 }
111 return false;
112 }
113
114 static bool IsValidHrule(const unsigned int endPos, StyleContext &sc) {
115 int c, count = 1;
116 unsigned int i = 0;
117 while (++i) {
118 c = sc.GetRelative(i);
119 if (c == sc.ch)
120 ++count;
121 // hit a terminating character
122 else if (!IsASpaceOrTab(c) || sc.currentPos + i == endPos) {
123 // Are we a valid HRULE
124 if ((IsNewline(c) || sc.currentPos + i == endPos) &&
125 count >= 3 && !HasPrevLineContent(sc)) {
126 sc.SetState(SCE_MARKDOWN_HRULE);
127 sc.Forward(i);
128 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
129 return true;
130 }
131 else {
132 sc.SetState(SCE_MARKDOWN_DEFAULT);
133 return false;
134 }
135 }
136 }
137 return false;
138 }
139
140 static void ColorizeMarkdownDoc(unsigned int startPos, int length, int initStyle,
141 WordList **, Accessor &styler) {
142 unsigned int endPos = startPos + length;
143 int precharCount = 0;
144 // Don't advance on a new loop iteration and retry at the same position.
145 // Useful in the corner case of having to start at the beginning file position
146 // in the default state.
147 bool freezeCursor = false;
148
149 StyleContext sc(startPos, length, initStyle, styler);
150
151 while (sc.More()) {
152 // Skip past escaped characters
153 if (sc.ch == '\\') {
154 sc.Forward();
155 continue;
156 }
157
158 // A blockquotes resets the line semantics
159 if (sc.state == SCE_MARKDOWN_BLOCKQUOTE)
160 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
161
162 // Conditional state-based actions
163 if (sc.state == SCE_MARKDOWN_CODE2) {
164 if (sc.Match("``") && sc.GetRelative(-2) != ' ') {
165 sc.Forward(2);
166 sc.SetState(SCE_MARKDOWN_DEFAULT);
167 }
168 }
169 else if (sc.state == SCE_MARKDOWN_CODE) {
170 if (sc.ch == '`' && sc.chPrev != ' ')
171 sc.ForwardSetState(SCE_MARKDOWN_DEFAULT);
172 }
173 /* De-activated because it gets in the way of other valid indentation
174 * schemes, for example multiple paragraphs inside a list item.
175 // Code block
176 else if (sc.state == SCE_MARKDOWN_CODEBK) {
177 bool d = true;
178 if (IsNewline(sc.ch)) {
179 if (sc.chNext != '\t') {
180 for (int c = 1; c < 5; ++c) {
181 if (sc.GetRelative(c) != ' ')
182 d = false;
183 }
184 }
185 }
186 else if (sc.atLineStart) {
187 if (sc.ch != '\t' ) {
188 for (int i = 0; i < 4; ++i) {
189 if (sc.GetRelative(i) != ' ')
190 d = false;
191 }
192 }
193 }
194 if (!d)
195 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
196 }
197 */
198 // Strong
199 else if (sc.state == SCE_MARKDOWN_STRONG1) {
200 if (sc.Match("**") && sc.chPrev != ' ') {
201 sc.Forward(2);
202 sc.SetState(SCE_MARKDOWN_DEFAULT);
203 }
204 }
205 else if (sc.state == SCE_MARKDOWN_STRONG2) {
206 if (sc.Match("__") && sc.chPrev != ' ') {
207 sc.Forward(2);
208 sc.SetState(SCE_MARKDOWN_DEFAULT);
209 }
210 }
211 // Emphasis
212 else if (sc.state == SCE_MARKDOWN_EM1) {
213 if (sc.ch == '*' && sc.chPrev != ' ')
214 sc.ForwardSetState(SCE_MARKDOWN_DEFAULT);
215 }
216 else if (sc.state == SCE_MARKDOWN_EM2) {
217 if (sc.ch == '_' && sc.chPrev != ' ')
218 sc.ForwardSetState(SCE_MARKDOWN_DEFAULT);
219 }
220 else if (sc.state == SCE_MARKDOWN_CODEBK) {
221 if (sc.atLineStart && sc.Match("~~~")) {
222 int i = 1;
223 while (!IsNewline(sc.GetRelative(i)) && sc.currentPos + i < endPos)
224 i++;
225 sc.Forward(i);
226 sc.SetState(SCE_MARKDOWN_DEFAULT);
227 }
228 }
229 else if (sc.state == SCE_MARKDOWN_STRIKEOUT) {
230 if (sc.Match("~~") && sc.chPrev != ' ') {
231 sc.Forward(2);
232 sc.SetState(SCE_MARKDOWN_DEFAULT);
233 }
234 }
235 else if (sc.state == SCE_MARKDOWN_LINE_BEGIN) {
236 // Header
237 if (sc.Match("######"))
238 SetStateAndZoom(SCE_MARKDOWN_HEADER6, 6, '#', sc);
239 else if (sc.Match("#####"))
240 SetStateAndZoom(SCE_MARKDOWN_HEADER5, 5, '#', sc);
241 else if (sc.Match("####"))
242 SetStateAndZoom(SCE_MARKDOWN_HEADER4, 4, '#', sc);
243 else if (sc.Match("###"))
244 SetStateAndZoom(SCE_MARKDOWN_HEADER3, 3, '#', sc);
245 else if (sc.Match("##"))
246 SetStateAndZoom(SCE_MARKDOWN_HEADER2, 2, '#', sc);
247 else if (sc.Match("#")) {
248 // Catch the special case of an unordered list
249 if (sc.chNext == '.' && IsASpaceOrTab(sc.GetRelative(2))) {
250 precharCount = 0;
251 sc.SetState(SCE_MARKDOWN_PRECHAR);
252 }
253 else
254 SetStateAndZoom(SCE_MARKDOWN_HEADER1, 1, '#', sc);
255 }
256 // Code block
257 else if (sc.Match("~~~")) {
258 if (!HasPrevLineContent(sc))
259 sc.SetState(SCE_MARKDOWN_CODEBK);
260 else
261 sc.SetState(SCE_MARKDOWN_DEFAULT);
262 }
263 else if (sc.ch == '=') {
264 if (HasPrevLineContent(sc) && FollowToLineEnd('=', SCE_MARKDOWN_HEADER1, endPos, sc))
265 ;
266 else
267 sc.SetState(SCE_MARKDOWN_DEFAULT);
268 }
269 else if (sc.ch == '-') {
270 if (HasPrevLineContent(sc) && FollowToLineEnd('-', SCE_MARKDOWN_HEADER2, endPos, sc))
271 ;
272 else {
273 precharCount = 0;
274 sc.SetState(SCE_MARKDOWN_PRECHAR);
275 }
276 }
277 else if (IsNewline(sc.ch))
278 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
279 else {
280 precharCount = 0;
281 sc.SetState(SCE_MARKDOWN_PRECHAR);
282 }
283 }
284
285 // The header lasts until the newline
286 else if (sc.state == SCE_MARKDOWN_HEADER1 || sc.state == SCE_MARKDOWN_HEADER2 ||
287 sc.state == SCE_MARKDOWN_HEADER3 || sc.state == SCE_MARKDOWN_HEADER4 ||
288 sc.state == SCE_MARKDOWN_HEADER5 || sc.state == SCE_MARKDOWN_HEADER6) {
289 if (IsNewline(sc.ch))
290 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
291 }
292
293 // New state only within the initial whitespace
294 if (sc.state == SCE_MARKDOWN_PRECHAR) {
295 // Blockquote
296 if (sc.ch == '>' && precharCount < 5)
297 sc.SetState(SCE_MARKDOWN_BLOCKQUOTE);
298 /*
299 // Begin of code block
300 else if (!HasPrevLineContent(sc) && (sc.chPrev == '\t' || precharCount >= 4))
301 sc.SetState(SCE_MARKDOWN_CODEBK);
302 */
303 // HRule - Total of three or more hyphens, asterisks, or underscores
304 // on a line by themselves
305 else if ((sc.ch == '-' || sc.ch == '*' || sc.ch == '_') && IsValidHrule(endPos, sc))
306 ;
307 // Unordered list
308 else if ((sc.ch == '-' || sc.ch == '*' || sc.ch == '+') && IsASpaceOrTab(sc.chNext)) {
309 sc.SetState(SCE_MARKDOWN_ULIST_ITEM);
310 sc.ForwardSetState(SCE_MARKDOWN_DEFAULT);
311 }
312 // Ordered list
313 else if (IsADigit(sc.ch)) {
314 int digitCount = 0;
315 while (IsADigit(sc.GetRelative(++digitCount)))
316 ;
317 if (sc.GetRelative(digitCount) == '.' &&
318 IsASpaceOrTab(sc.GetRelative(digitCount + 1))) {
319 sc.SetState(SCE_MARKDOWN_OLIST_ITEM);
320 sc.Forward(digitCount + 1);
321 sc.SetState(SCE_MARKDOWN_DEFAULT);
322 }
323 }
324 // Alternate Ordered list
325 else if (sc.ch == '#' && sc.chNext == '.' && IsASpaceOrTab(sc.GetRelative(2))) {
326 sc.SetState(SCE_MARKDOWN_OLIST_ITEM);
327 sc.Forward(2);
328 sc.SetState(SCE_MARKDOWN_DEFAULT);
329 }
330 else if (sc.ch != ' ' || precharCount > 2)
331 sc.SetState(SCE_MARKDOWN_DEFAULT);
332 else
333 ++precharCount;
334 }
335
336 // New state anywhere in doc
337 if (sc.state == SCE_MARKDOWN_DEFAULT) {
338 if (sc.atLineStart && sc.ch == '#') {
339 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
340 freezeCursor = true;
341 }
342 // Links and Images
343 if (sc.Match("![") || sc.ch == '[') {
344 int i = 0, j = 0, k = 0;
345 int len = endPos - sc.currentPos;
346 while (i < len && (sc.GetRelative(++i) != ']' || sc.GetRelative(i - 1) == '\\'))
347 ;
348 if (sc.GetRelative(i) == ']') {
349 j = i;
350 if (sc.GetRelative(++i) == '(') {
351 while (i < len && (sc.GetRelative(++i) != ')' || sc.GetRelative(i - 1) == '\\'))
352 ;
353 if (sc.GetRelative(i) == ')')
354 k = i;
355 }
356 else if (sc.GetRelative(i) == '[' || sc.GetRelative(++i) == '[') {
357 while (i < len && (sc.GetRelative(++i) != ']' || sc.GetRelative(i - 1) == '\\'))
358 ;
359 if (sc.GetRelative(i) == ']')
360 k = i;
361 }
362 }
363 // At least a link text
364 if (j) {
365 sc.SetState(SCE_MARKDOWN_LINK);
366 sc.Forward(j);
367 // Also has a URL or reference portion
368 if (k)
369 sc.Forward(k - j);
370 sc.ForwardSetState(SCE_MARKDOWN_DEFAULT);
371 }
372 }
373 // Code - also a special case for alternate inside spacing
374 if (sc.Match("``") && sc.GetRelative(3) != ' ') {
375 sc.SetState(SCE_MARKDOWN_CODE2);
376 sc.Forward();
377 }
378 else if (sc.ch == '`' && sc.chNext != ' ') {
379 sc.SetState(SCE_MARKDOWN_CODE);
380 }
381 // Strong
382 else if (sc.Match("**") && sc.GetRelative(2) != ' ') {
383 sc.SetState(SCE_MARKDOWN_STRONG1);
384 sc.Forward();
385 }
386 else if (sc.Match("__") && sc.GetRelative(2) != ' ') {
387 sc.SetState(SCE_MARKDOWN_STRONG2);
388 sc.Forward();
389 }
390 // Emphasis
391 else if (sc.ch == '*' && sc.chNext != ' ')
392 sc.SetState(SCE_MARKDOWN_EM1);
393 else if (sc.ch == '_' && sc.chNext != ' ')
394 sc.SetState(SCE_MARKDOWN_EM2);
395 // Strikeout
396 else if (sc.Match("~~") && sc.GetRelative(2) != ' ') {
397 sc.SetState(SCE_MARKDOWN_STRIKEOUT);
398 sc.Forward();
399 }
400 // Beginning of line
401 else if (IsNewline(sc.ch))
402 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
403 }
404 // Advance if not holding back the cursor for this iteration.
405 if (!freezeCursor)
406 sc.Forward();
407 freezeCursor = false;
408 }
409 sc.Complete();
410 }
411
412 LexerModule lmMarkdown(SCLEX_MARKDOWN, ColorizeMarkdownDoc, "markdown");