]>
Commit | Line | Data |
---|---|---|
9e96e16f RD |
1 | /****************************************************************** |
2 | * LexMarkdown.cxx | |
3 | * | |
4 | * A simple Markdown lexer for scintilla. | |
1dcf666d | 5 | * |
9e96e16f | 6 | * Includes highlighting for some extra features from the |
1dcf666d | 7 | * Pandoc implementation; strikeout, using '#.' as a default |
9e96e16f | 8 | * ordered list item marker, and delimited code blocks. |
1dcf666d | 9 | * |
9e96e16f | 10 | * Limitations: |
1dcf666d | 11 | * |
9e96e16f | 12 | * Standard indented code blocks are not highlighted at all, |
1dcf666d | 13 | * as it would conflict with other indentation schemes. Use |
9e96e16f RD |
14 | * delimited code blocks for blanket highlighting of an |
15 | * entire code block. Embedded HTML is not highlighted either. | |
16 | * Blanket HTML highlighting has issues, because some Markdown | |
17 | * implementations allow Markdown markup inside of the HTML. Also, | |
1dcf666d RD |
18 | * there is a following blank line issue that can't be ignored, |
19 | * explained in the next paragraph. Embedded HTML and code | |
20 | * blocks would be better supported with language specific | |
9e96e16f | 21 | * highlighting. |
1dcf666d | 22 | * |
9e96e16f RD |
23 | * The highlighting aims to accurately reflect correct syntax, |
24 | * but a few restrictions are relaxed. Delimited code blocks are | |
1dcf666d | 25 | * highlighted, even if the line following the code block is not blank. |
9e96e16f RD |
26 | * Requiring a blank line after a block, breaks the highlighting |
27 | * in certain cases, because of the way Scintilla ends up calling | |
28 | * the lexer. | |
1dcf666d | 29 | * |
9e96e16f RD |
30 | * Written by Jon Strait - jstrait@moonloop.net |
31 | * | |
32 | * The License.txt file describes the conditions under which this | |
33 | * software may be distributed. | |
34 | * | |
35 | *****************************************************************/ | |
36 | ||
1dcf666d | 37 | #include <stdlib.h> |
9e96e16f | 38 | #include <string.h> |
9e96e16f RD |
39 | #include <stdio.h> |
40 | #include <stdarg.h> | |
1dcf666d | 41 | #include <assert.h> |
9e96e16f | 42 | |
1dcf666d RD |
43 | #include "ILexer.h" |
44 | #include "Scintilla.h" | |
45 | #include "SciLexer.h" | |
9e96e16f | 46 | |
1dcf666d RD |
47 | #include "WordList.h" |
48 | #include "LexAccessor.h" | |
9e96e16f RD |
49 | #include "Accessor.h" |
50 | #include "StyleContext.h" | |
1dcf666d RD |
51 | #include "CharacterSet.h" |
52 | #include "LexerModule.h" | |
9e96e16f RD |
53 | |
54 | #ifdef SCI_NAMESPACE | |
55 | using namespace Scintilla; | |
56 | #endif | |
57 | ||
58 | static inline bool IsNewline(const int ch) { | |
59 | return (ch == '\n' || ch == '\r'); | |
60 | } | |
61 | ||
62 | // True if can follow ch down to the end with possibly trailing whitespace | |
63 | static bool FollowToLineEnd(const int ch, const int state, const unsigned int endPos, StyleContext &sc) { | |
64 | unsigned int i = 0; | |
65 | while (sc.GetRelative(++i) == ch) | |
66 | ; | |
67 | // Skip over whitespace | |
68 | while (IsASpaceOrTab(sc.GetRelative(i)) && sc.currentPos + i < endPos) | |
69 | ++i; | |
70 | if (IsNewline(sc.GetRelative(i)) || sc.currentPos + i == endPos) { | |
71 | sc.Forward(i); | |
72 | sc.ChangeState(state); | |
73 | sc.SetState(SCE_MARKDOWN_LINE_BEGIN); | |
74 | return true; | |
75 | } | |
76 | else return false; | |
77 | } | |
78 | ||
1dcf666d | 79 | // Set the state on text section from current to length characters, |
9e96e16f RD |
80 | // then set the rest until the newline to default, except for any characters matching token |
81 | static void SetStateAndZoom(const int state, const int length, const int token, StyleContext &sc) { | |
82 | sc.SetState(state); | |
83 | sc.Forward(length); | |
84 | sc.SetState(SCE_MARKDOWN_DEFAULT); | |
85 | sc.Forward(); | |
86 | bool started = false; | |
87 | while (sc.More() && !IsNewline(sc.ch)) { | |
88 | if (sc.ch == token && !started) { | |
89 | sc.SetState(state); | |
90 | started = true; | |
91 | } | |
92 | else if (sc.ch != token) { | |
93 | sc.SetState(SCE_MARKDOWN_DEFAULT); | |
94 | started = false; | |
95 | } | |
96 | sc.Forward(); | |
97 | } | |
98 | sc.SetState(SCE_MARKDOWN_LINE_BEGIN); | |
99 | } | |
100 | ||
101 | // Does the previous line have more than spaces and tabs? | |
102 | static bool HasPrevLineContent(StyleContext &sc) { | |
103 | int i = 0; | |
104 | // Go back to the previous newline | |
1dcf666d | 105 | while ((--i + (int)sc.currentPos) >= 0 && !IsNewline(sc.GetRelative(i))) |
9e96e16f | 106 | ; |
1dcf666d | 107 | while ((--i + (int)sc.currentPos) >= 0) { |
9e96e16f RD |
108 | if (IsNewline(sc.GetRelative(i))) |
109 | break; | |
110 | if (!IsASpaceOrTab(sc.GetRelative(i))) | |
111 | return true; | |
112 | } | |
113 | return false; | |
114 | } | |
115 | ||
1dcf666d RD |
116 | static bool AtTermStart(StyleContext &sc) { |
117 | return sc.currentPos == 0 || isspacechar(sc.chPrev); | |
118 | } | |
119 | ||
9e96e16f RD |
120 | static bool IsValidHrule(const unsigned int endPos, StyleContext &sc) { |
121 | int c, count = 1; | |
122 | unsigned int i = 0; | |
123 | while (++i) { | |
124 | c = sc.GetRelative(i); | |
1dcf666d | 125 | if (c == sc.ch) |
9e96e16f RD |
126 | ++count; |
127 | // hit a terminating character | |
128 | else if (!IsASpaceOrTab(c) || sc.currentPos + i == endPos) { | |
129 | // Are we a valid HRULE | |
1dcf666d | 130 | if ((IsNewline(c) || sc.currentPos + i == endPos) && |
9e96e16f RD |
131 | count >= 3 && !HasPrevLineContent(sc)) { |
132 | sc.SetState(SCE_MARKDOWN_HRULE); | |
133 | sc.Forward(i); | |
134 | sc.SetState(SCE_MARKDOWN_LINE_BEGIN); | |
135 | return true; | |
136 | } | |
137 | else { | |
138 | sc.SetState(SCE_MARKDOWN_DEFAULT); | |
139 | return false; | |
140 | } | |
141 | } | |
142 | } | |
143 | return false; | |
144 | } | |
145 | ||
146 | static void ColorizeMarkdownDoc(unsigned int startPos, int length, int initStyle, | |
147 | WordList **, Accessor &styler) { | |
148 | unsigned int endPos = startPos + length; | |
149 | int precharCount = 0; | |
150 | // Don't advance on a new loop iteration and retry at the same position. | |
151 | // Useful in the corner case of having to start at the beginning file position | |
152 | // in the default state. | |
153 | bool freezeCursor = false; | |
1dcf666d | 154 | |
9e96e16f RD |
155 | StyleContext sc(startPos, length, initStyle, styler); |
156 | ||
157 | while (sc.More()) { | |
158 | // Skip past escaped characters | |
159 | if (sc.ch == '\\') { | |
160 | sc.Forward(); | |
161 | continue; | |
162 | } | |
1dcf666d | 163 | |
9e96e16f RD |
164 | // A blockquotes resets the line semantics |
165 | if (sc.state == SCE_MARKDOWN_BLOCKQUOTE) | |
166 | sc.SetState(SCE_MARKDOWN_LINE_BEGIN); | |
1dcf666d | 167 | |
9e96e16f RD |
168 | // Conditional state-based actions |
169 | if (sc.state == SCE_MARKDOWN_CODE2) { | |
170 | if (sc.Match("``") && sc.GetRelative(-2) != ' ') { | |
171 | sc.Forward(2); | |
172 | sc.SetState(SCE_MARKDOWN_DEFAULT); | |
173 | } | |
1dcf666d | 174 | } |
9e96e16f RD |
175 | else if (sc.state == SCE_MARKDOWN_CODE) { |
176 | if (sc.ch == '`' && sc.chPrev != ' ') | |
177 | sc.ForwardSetState(SCE_MARKDOWN_DEFAULT); | |
178 | } | |
179 | /* De-activated because it gets in the way of other valid indentation | |
180 | * schemes, for example multiple paragraphs inside a list item. | |
181 | // Code block | |
182 | else if (sc.state == SCE_MARKDOWN_CODEBK) { | |
183 | bool d = true; | |
184 | if (IsNewline(sc.ch)) { | |
185 | if (sc.chNext != '\t') { | |
186 | for (int c = 1; c < 5; ++c) { | |
187 | if (sc.GetRelative(c) != ' ') | |
188 | d = false; | |
189 | } | |
190 | } | |
191 | } | |
192 | else if (sc.atLineStart) { | |
193 | if (sc.ch != '\t' ) { | |
194 | for (int i = 0; i < 4; ++i) { | |
195 | if (sc.GetRelative(i) != ' ') | |
196 | d = false; | |
197 | } | |
198 | } | |
199 | } | |
200 | if (!d) | |
201 | sc.SetState(SCE_MARKDOWN_LINE_BEGIN); | |
202 | } | |
203 | */ | |
204 | // Strong | |
205 | else if (sc.state == SCE_MARKDOWN_STRONG1) { | |
206 | if (sc.Match("**") && sc.chPrev != ' ') { | |
207 | sc.Forward(2); | |
208 | sc.SetState(SCE_MARKDOWN_DEFAULT); | |
209 | } | |
1dcf666d RD |
210 | } |
211 | else if (sc.state == SCE_MARKDOWN_STRONG2) { | |
212 | if (sc.Match("__") && sc.chPrev != ' ') { | |
9e96e16f RD |
213 | sc.Forward(2); |
214 | sc.SetState(SCE_MARKDOWN_DEFAULT); | |
215 | } | |
216 | } | |
1dcf666d | 217 | // Emphasis |
9e96e16f RD |
218 | else if (sc.state == SCE_MARKDOWN_EM1) { |
219 | if (sc.ch == '*' && sc.chPrev != ' ') | |
220 | sc.ForwardSetState(SCE_MARKDOWN_DEFAULT); | |
221 | } | |
222 | else if (sc.state == SCE_MARKDOWN_EM2) { | |
223 | if (sc.ch == '_' && sc.chPrev != ' ') | |
224 | sc.ForwardSetState(SCE_MARKDOWN_DEFAULT); | |
225 | } | |
226 | else if (sc.state == SCE_MARKDOWN_CODEBK) { | |
227 | if (sc.atLineStart && sc.Match("~~~")) { | |
228 | int i = 1; | |
229 | while (!IsNewline(sc.GetRelative(i)) && sc.currentPos + i < endPos) | |
230 | i++; | |
231 | sc.Forward(i); | |
232 | sc.SetState(SCE_MARKDOWN_DEFAULT); | |
233 | } | |
234 | } | |
235 | else if (sc.state == SCE_MARKDOWN_STRIKEOUT) { | |
236 | if (sc.Match("~~") && sc.chPrev != ' ') { | |
237 | sc.Forward(2); | |
238 | sc.SetState(SCE_MARKDOWN_DEFAULT); | |
239 | } | |
240 | } | |
241 | else if (sc.state == SCE_MARKDOWN_LINE_BEGIN) { | |
242 | // Header | |
243 | if (sc.Match("######")) | |
244 | SetStateAndZoom(SCE_MARKDOWN_HEADER6, 6, '#', sc); | |
245 | else if (sc.Match("#####")) | |
246 | SetStateAndZoom(SCE_MARKDOWN_HEADER5, 5, '#', sc); | |
247 | else if (sc.Match("####")) | |
248 | SetStateAndZoom(SCE_MARKDOWN_HEADER4, 4, '#', sc); | |
249 | else if (sc.Match("###")) | |
250 | SetStateAndZoom(SCE_MARKDOWN_HEADER3, 3, '#', sc); | |
251 | else if (sc.Match("##")) | |
252 | SetStateAndZoom(SCE_MARKDOWN_HEADER2, 2, '#', sc); | |
253 | else if (sc.Match("#")) { | |
254 | // Catch the special case of an unordered list | |
255 | if (sc.chNext == '.' && IsASpaceOrTab(sc.GetRelative(2))) { | |
256 | precharCount = 0; | |
257 | sc.SetState(SCE_MARKDOWN_PRECHAR); | |
258 | } | |
259 | else | |
260 | SetStateAndZoom(SCE_MARKDOWN_HEADER1, 1, '#', sc); | |
261 | } | |
262 | // Code block | |
263 | else if (sc.Match("~~~")) { | |
264 | if (!HasPrevLineContent(sc)) | |
265 | sc.SetState(SCE_MARKDOWN_CODEBK); | |
266 | else | |
267 | sc.SetState(SCE_MARKDOWN_DEFAULT); | |
268 | } | |
269 | else if (sc.ch == '=') { | |
270 | if (HasPrevLineContent(sc) && FollowToLineEnd('=', SCE_MARKDOWN_HEADER1, endPos, sc)) | |
271 | ; | |
272 | else | |
273 | sc.SetState(SCE_MARKDOWN_DEFAULT); | |
274 | } | |
275 | else if (sc.ch == '-') { | |
276 | if (HasPrevLineContent(sc) && FollowToLineEnd('-', SCE_MARKDOWN_HEADER2, endPos, sc)) | |
277 | ; | |
278 | else { | |
279 | precharCount = 0; | |
280 | sc.SetState(SCE_MARKDOWN_PRECHAR); | |
281 | } | |
282 | } | |
283 | else if (IsNewline(sc.ch)) | |
284 | sc.SetState(SCE_MARKDOWN_LINE_BEGIN); | |
285 | else { | |
286 | precharCount = 0; | |
287 | sc.SetState(SCE_MARKDOWN_PRECHAR); | |
288 | } | |
289 | } | |
1dcf666d | 290 | |
9e96e16f RD |
291 | // The header lasts until the newline |
292 | else if (sc.state == SCE_MARKDOWN_HEADER1 || sc.state == SCE_MARKDOWN_HEADER2 || | |
293 | sc.state == SCE_MARKDOWN_HEADER3 || sc.state == SCE_MARKDOWN_HEADER4 || | |
294 | sc.state == SCE_MARKDOWN_HEADER5 || sc.state == SCE_MARKDOWN_HEADER6) { | |
295 | if (IsNewline(sc.ch)) | |
296 | sc.SetState(SCE_MARKDOWN_LINE_BEGIN); | |
297 | } | |
1dcf666d | 298 | |
9e96e16f RD |
299 | // New state only within the initial whitespace |
300 | if (sc.state == SCE_MARKDOWN_PRECHAR) { | |
301 | // Blockquote | |
302 | if (sc.ch == '>' && precharCount < 5) | |
303 | sc.SetState(SCE_MARKDOWN_BLOCKQUOTE); | |
304 | /* | |
305 | // Begin of code block | |
306 | else if (!HasPrevLineContent(sc) && (sc.chPrev == '\t' || precharCount >= 4)) | |
307 | sc.SetState(SCE_MARKDOWN_CODEBK); | |
308 | */ | |
1dcf666d RD |
309 | // HRule - Total of three or more hyphens, asterisks, or underscores |
310 | // on a line by themselves | |
9e96e16f RD |
311 | else if ((sc.ch == '-' || sc.ch == '*' || sc.ch == '_') && IsValidHrule(endPos, sc)) |
312 | ; | |
313 | // Unordered list | |
314 | else if ((sc.ch == '-' || sc.ch == '*' || sc.ch == '+') && IsASpaceOrTab(sc.chNext)) { | |
315 | sc.SetState(SCE_MARKDOWN_ULIST_ITEM); | |
316 | sc.ForwardSetState(SCE_MARKDOWN_DEFAULT); | |
317 | } | |
318 | // Ordered list | |
319 | else if (IsADigit(sc.ch)) { | |
320 | int digitCount = 0; | |
321 | while (IsADigit(sc.GetRelative(++digitCount))) | |
322 | ; | |
1dcf666d | 323 | if (sc.GetRelative(digitCount) == '.' && |
9e96e16f RD |
324 | IsASpaceOrTab(sc.GetRelative(digitCount + 1))) { |
325 | sc.SetState(SCE_MARKDOWN_OLIST_ITEM); | |
326 | sc.Forward(digitCount + 1); | |
327 | sc.SetState(SCE_MARKDOWN_DEFAULT); | |
328 | } | |
329 | } | |
330 | // Alternate Ordered list | |
331 | else if (sc.ch == '#' && sc.chNext == '.' && IsASpaceOrTab(sc.GetRelative(2))) { | |
332 | sc.SetState(SCE_MARKDOWN_OLIST_ITEM); | |
333 | sc.Forward(2); | |
334 | sc.SetState(SCE_MARKDOWN_DEFAULT); | |
335 | } | |
336 | else if (sc.ch != ' ' || precharCount > 2) | |
337 | sc.SetState(SCE_MARKDOWN_DEFAULT); | |
338 | else | |
339 | ++precharCount; | |
340 | } | |
1dcf666d | 341 | |
9e96e16f RD |
342 | // New state anywhere in doc |
343 | if (sc.state == SCE_MARKDOWN_DEFAULT) { | |
344 | if (sc.atLineStart && sc.ch == '#') { | |
345 | sc.SetState(SCE_MARKDOWN_LINE_BEGIN); | |
346 | freezeCursor = true; | |
347 | } | |
348 | // Links and Images | |
349 | if (sc.Match("![") || sc.ch == '[') { | |
350 | int i = 0, j = 0, k = 0; | |
351 | int len = endPos - sc.currentPos; | |
352 | while (i < len && (sc.GetRelative(++i) != ']' || sc.GetRelative(i - 1) == '\\')) | |
353 | ; | |
354 | if (sc.GetRelative(i) == ']') { | |
355 | j = i; | |
356 | if (sc.GetRelative(++i) == '(') { | |
357 | while (i < len && (sc.GetRelative(++i) != ')' || sc.GetRelative(i - 1) == '\\')) | |
358 | ; | |
359 | if (sc.GetRelative(i) == ')') | |
360 | k = i; | |
361 | } | |
362 | else if (sc.GetRelative(i) == '[' || sc.GetRelative(++i) == '[') { | |
363 | while (i < len && (sc.GetRelative(++i) != ']' || sc.GetRelative(i - 1) == '\\')) | |
364 | ; | |
365 | if (sc.GetRelative(i) == ']') | |
366 | k = i; | |
367 | } | |
368 | } | |
369 | // At least a link text | |
370 | if (j) { | |
371 | sc.SetState(SCE_MARKDOWN_LINK); | |
372 | sc.Forward(j); | |
373 | // Also has a URL or reference portion | |
374 | if (k) | |
375 | sc.Forward(k - j); | |
376 | sc.ForwardSetState(SCE_MARKDOWN_DEFAULT); | |
377 | } | |
378 | } | |
379 | // Code - also a special case for alternate inside spacing | |
1dcf666d | 380 | if (sc.Match("``") && sc.GetRelative(3) != ' ' && AtTermStart(sc)) { |
9e96e16f RD |
381 | sc.SetState(SCE_MARKDOWN_CODE2); |
382 | sc.Forward(); | |
383 | } | |
1dcf666d | 384 | else if (sc.ch == '`' && sc.chNext != ' ' && AtTermStart(sc)) { |
9e96e16f RD |
385 | sc.SetState(SCE_MARKDOWN_CODE); |
386 | } | |
387 | // Strong | |
1dcf666d | 388 | else if (sc.Match("**") && sc.GetRelative(2) != ' ' && AtTermStart(sc)) { |
9e96e16f RD |
389 | sc.SetState(SCE_MARKDOWN_STRONG1); |
390 | sc.Forward(); | |
391 | } | |
1dcf666d | 392 | else if (sc.Match("__") && sc.GetRelative(2) != ' ' && AtTermStart(sc)) { |
9e96e16f RD |
393 | sc.SetState(SCE_MARKDOWN_STRONG2); |
394 | sc.Forward(); | |
395 | } | |
396 | // Emphasis | |
1dcf666d | 397 | else if (sc.ch == '*' && sc.chNext != ' ' && AtTermStart(sc)) { |
9e96e16f | 398 | sc.SetState(SCE_MARKDOWN_EM1); |
1dcf666d RD |
399 | } |
400 | else if (sc.ch == '_' && sc.chNext != ' ' && AtTermStart(sc)) { | |
9e96e16f | 401 | sc.SetState(SCE_MARKDOWN_EM2); |
1dcf666d | 402 | } |
9e96e16f | 403 | // Strikeout |
1dcf666d | 404 | else if (sc.Match("~~") && sc.GetRelative(2) != ' ' && AtTermStart(sc)) { |
9e96e16f RD |
405 | sc.SetState(SCE_MARKDOWN_STRIKEOUT); |
406 | sc.Forward(); | |
407 | } | |
408 | // Beginning of line | |
1dcf666d | 409 | else if (IsNewline(sc.ch)) { |
9e96e16f | 410 | sc.SetState(SCE_MARKDOWN_LINE_BEGIN); |
1dcf666d | 411 | } |
9e96e16f RD |
412 | } |
413 | // Advance if not holding back the cursor for this iteration. | |
414 | if (!freezeCursor) | |
415 | sc.Forward(); | |
416 | freezeCursor = false; | |
417 | } | |
418 | sc.Complete(); | |
419 | } | |
420 | ||
421 | LexerModule lmMarkdown(SCLEX_MARKDOWN, ColorizeMarkdownDoc, "markdown"); |