]> git.saurik.com Git - wxWidgets.git/blob - wxPython/samples/stxview/StructuredText/ClassicStructuredText.py
fixed the last of the off-by-one errors (some are refixed, again...)
[wxWidgets.git] / wxPython / samples / stxview / StructuredText / ClassicStructuredText.py
1 #! /usr/bin/env python -- # -*- python -*-
2 ##############################################################################
3 #
4 # Zope Public License (ZPL) Version 1.0
5 # -------------------------------------
6 #
7 # Copyright (c) Digital Creations. All rights reserved.
8 #
9 # This license has been certified as Open Source(tm).
10 #
11 # Redistribution and use in source and binary forms, with or without
12 # modification, are permitted provided that the following conditions are
13 # met:
14 #
15 # 1. Redistributions in source code must retain the above copyright
16 # notice, this list of conditions, and the following disclaimer.
17 #
18 # 2. Redistributions in binary form must reproduce the above copyright
19 # notice, this list of conditions, and the following disclaimer in
20 # the documentation and/or other materials provided with the
21 # distribution.
22 #
23 # 3. Digital Creations requests that attribution be given to Zope
24 # in any manner possible. Zope includes a "Powered by Zope"
25 # button that is installed by default. While it is not a license
26 # violation to remove this button, it is requested that the
27 # attribution remain. A significant investment has been put
28 # into Zope, and this effort will continue if the Zope community
29 # continues to grow. This is one way to assure that growth.
30 #
31 # 4. All advertising materials and documentation mentioning
32 # features derived from or use of this software must display
33 # the following acknowledgement:
34 #
35 # "This product includes software developed by Digital Creations
36 # for use in the Z Object Publishing Environment
37 # (http://www.zope.org/)."
38 #
39 # In the event that the product being advertised includes an
40 # intact Zope distribution (with copyright and license included)
41 # then this clause is waived.
42 #
43 # 5. Names associated with Zope or Digital Creations must not be used to
44 # endorse or promote products derived from this software without
45 # prior written permission from Digital Creations.
46 #
47 # 6. Modified redistributions of any form whatsoever must retain
48 # the following acknowledgment:
49 #
50 # "This product includes software developed by Digital Creations
51 # for use in the Z Object Publishing Environment
52 # (http://www.zope.org/)."
53 #
54 # Intact (re-)distributions of any official Zope release do not
55 # require an external acknowledgement.
56 #
57 # 7. Modifications are encouraged but must be packaged separately as
58 # patches to official Zope releases. Distributions that do not
59 # clearly separate the patches from the original work must be clearly
60 # labeled as unofficial distributions. Modifications which do not
61 # carry the name Zope may be packaged in any form, as long as they
62 # conform to all of the clauses above.
63 #
64 #
65 # Disclaimer
66 #
67 # THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
68 # EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
69 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
70 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
71 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
72 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
73 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
74 # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
75 # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
76 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
77 # OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
78 # SUCH DAMAGE.
79 #
80 #
81 # This software consists of contributions made by Digital Creations and
82 # many individuals on behalf of Digital Creations. Specific
83 # attributions are listed in the accompanying credits file.
84 #
85 ##############################################################################
86 '''Structured Text Manipulation
87
88 Parse a structured text string into a form that can be used with
89 structured formats, like html.
90
91 Structured text is text that uses indentation and simple
92 symbology to indicate the structure of a document.
93
94 A structured string consists of a sequence of paragraphs separated by
95 one or more blank lines. Each paragraph has a level which is defined
96 as the minimum indentation of the paragraph. A paragraph is a
97 sub-paragraph of another paragraph if the other paragraph is the last
98 preceding paragraph that has a lower level.
99
100 Special symbology is used to indicate special constructs:
101
102 - A single-line paragraph whose immediately succeeding paragraphs are lower
103 level is treated as a header.
104
105 - A paragraph that begins with a '-', '*', or 'o' is treated as an
106 unordered list (bullet) element.
107
108 - A paragraph that begins with a sequence of digits followed by a
109 white-space character is treated as an ordered list element.
110
111 - A paragraph that begins with a sequence of sequences, where each
112 sequence is a sequence of digits or a sequence of letters followed
113 by a period, is treated as an ordered list element.
114
115 - A paragraph with a first line that contains some text, followed by
116 some white-space and '--' is treated as
117 a descriptive list element. The leading text is treated as the
118 element title.
119
120 - Sub-paragraphs of a paragraph that ends in the word 'example' or the
121 word 'examples', or '::' is treated as example code and is output as is.
122
123 - Text enclosed single quotes (with white-space to the left of the
124 first quote and whitespace or punctuation to the right of the second quote)
125 is treated as example code.
126
127 - Text surrounded by '*' characters (with white-space to the left of the
128 first '*' and whitespace or punctuation to the right of the second '*')
129 is emphasized.
130
131 - Text surrounded by '**' characters (with white-space to the left of the
132 first '**' and whitespace or punctuation to the right of the second '**')
133 is made strong.
134
135 - Text surrounded by '_' underscore characters (with whitespace to the left
136 and whitespace or punctuation to the right) is made underlined.
137
138 - Text encloded by double quotes followed by a colon, a URL, and concluded
139 by punctuation plus white space, *or* just white space, is treated as a
140 hyper link. For example:
141
142 "Zope":http://www.zope.org/ is ...
143
144 Is interpreted as '<a href="http://www.zope.org/">Zope</a> is ....'
145 Note: This works for relative as well as absolute URLs.
146
147 - Text enclosed by double quotes followed by a comma, one or more spaces,
148 an absolute URL and concluded by punctuation plus white space, or just
149 white space, is treated as a hyper link. For example:
150
151 "mail me", mailto:amos@digicool.com.
152
153 Is interpreted as '<a href="mailto:amos@digicool.com">mail me</a>.'
154
155 - Text enclosed in brackets which consists only of letters, digits,
156 underscores and dashes is treated as hyper links within the document.
157 For example:
158
159 As demonstrated by Smith [12] this technique is quite effective.
160
161 Is interpreted as '... by Smith <a href="#12">[12]</a> this ...'. Together
162 with the next rule this allows easy coding of references or end notes.
163
164 - Text enclosed in brackets which is preceded by the start of a line, two
165 periods and a space is treated as a named link. For example:
166
167 .. [12] "Effective Techniques" Smith, Joe ...
168
169 Is interpreted as '<a name="12">[12]</a> "Effective Techniques" ...'.
170 Together with the previous rule this allows easy coding of references or
171 end notes.
172
173
174 - A paragraph that has blocks of text enclosed in '||' is treated as a
175 table. The text blocks correspond to table cells and table rows are
176 denoted by newlines. By default the cells are center aligned. A cell
177 can span more than one column by preceding a block of text with an
178 equivalent number of cell separators '||'. Newlines and '|' cannot
179 be a part of the cell text. For example:
180
181 |||| **Ingredients** ||
182 || *Name* || *Amount* ||
183 ||Spam||10||
184 ||Eggs||3||
185
186 is interpreted as::
187
188 <TABLE BORDER=1 CELLPADDING=2>
189 <TR>
190 <TD ALIGN=CENTER COLSPAN=2> <strong>Ingredients</strong> </TD>
191 </TR>
192 <TR>
193 <TD ALIGN=CENTER COLSPAN=1> <em>Name</em> </TD>
194 <TD ALIGN=CENTER COLSPAN=1> <em>Amount</em> </TD>
195 </TR>
196 <TR>
197 <TD ALIGN=CENTER COLSPAN=1>Spam</TD>
198 <TD ALIGN=CENTER COLSPAN=1>10</TD>
199 </TR>
200 <TR>
201 <TD ALIGN=CENTER COLSPAN=1>Eggs</TD>
202 <TD ALIGN=CENTER COLSPAN=1>3</TD>
203 </TR>
204 </TABLE>
205
206 '''
207
208 import ts_regex
209 import regex
210 from ts_regex import gsub
211 from string import split, join, strip, find
212 import string,re
213
214
215 def untabify(aString,
216 indent_tab=ts_regex.compile('\(\n\|^\)\( *\)\t').search_group,
217 ):
218 '''\
219 Convert indentation tabs to spaces.
220 '''
221 result=''
222 rest=aString
223 while 1:
224 ts_results = indent_tab(rest, (1,2))
225 if ts_results:
226 start, grps = ts_results
227 lnl=len(grps[0])
228 indent=len(grps[1])
229 result=result+rest[:start]
230 rest="\n%s%s" % (' ' * ((indent/8+1)*8),
231 rest[start+indent+1+lnl:])
232 else:
233 return result+rest
234
235 def indent(aString, indent=2):
236 """Indent a string the given number of spaces"""
237 r=split(untabify(aString),'\n')
238 if not r: return ''
239 if not r[-1]: del r[-1]
240 tab=' '*level
241 return "%s%s\n" % (tab,join(r,'\n'+tab))
242
243 def reindent(aString, indent=2, already_untabified=0):
244 "reindent a block of text, so that the minimum indent is as given"
245
246 if not already_untabified: aString=untabify(aString)
247
248 l=indent_level(aString)[0]
249 if indent==l: return aString
250
251 r=[]
252
253 append=r.append
254
255 if indent > l:
256 tab=' ' * (indent-l)
257 for s in split(aString,'\n'): append(tab+s)
258 else:
259 l=l-indent
260 for s in split(aString,'\n'): append(s[l:])
261
262 return join(r,'\n')
263
264 def indent_level(aString,
265 indent_space=ts_regex.compile('\n\( *\)').search_group,
266 ):
267 '''\
268 Find the minimum indentation for a string, not counting blank lines.
269 '''
270 start=0
271 text='\n'+aString
272 indent=l=len(text)
273 while 1:
274
275 ts_results = indent_space(text, (1,2), start)
276 if ts_results:
277 start, grps = ts_results
278 i=len(grps[0])
279 start=start+i+1
280 if start < l and text[start] != '\n': # Skip blank lines
281 if not i: return (0,aString)
282 if i < indent: indent = i
283 else:
284 return (indent,aString)
285
286 def paragraphs(list,start):
287 l=len(list)
288 level=list[start][0]
289 i=start+1
290 while i < l and list[i][0] > level: i=i+1
291 return i-1-start
292
293 def structure(list):
294 if not list: return []
295 i=0
296 l=len(list)
297 r=[]
298 while i < l:
299 sublen=paragraphs(list,i)
300 i=i+1
301 r.append((list[i-1][1],structure(list[i:i+sublen])))
302 i=i+sublen
303 return r
304
305
306 class Table:
307 CELL=' <TD ALIGN=CENTER COLSPAN=%i>%s</TD>\n'
308 ROW=' <TR>\n%s </TR>\n'
309 TABLE='\n<TABLE BORDER=1 CELLPADDING=2>\n%s</TABLE>'
310
311 def create(self,aPar,
312 td_reg=re.compile(r'[ \t\n]*\|\|([^\0x00|]*)')
313 ):
314 '''parses a table and returns nested list representing the
315 table'''
316 self.table=[]
317 text=filter(None,split(aPar,'\n'))
318 for line in text:
319 row=[]
320 while 1:
321 mo = td_reg.match(line)
322 if not mo: return 0
323 pos = mo.end(1)
324 row.append(mo.group(1))
325 if pos==len(line):break
326 line=line[pos:]
327 self.table.append(row)
328 return 1
329
330 def html(self):
331 '''Creates an HTML representation of table'''
332 htmltable=[]
333 for row in self.table:
334 htmlrow=[]
335 colspan=1
336 for cell in row:
337 if cell=='':
338 colspan=colspan+1
339 continue
340 else:
341 htmlrow.append(self.CELL%(colspan,cell))
342 colspan=1
343 htmltable.append(self.ROW%join(htmlrow,''))
344 return self.TABLE%join(htmltable,'')
345
346 table=Table()
347
348 class StructuredText:
349
350 """Model text as structured collection of paragraphs.
351
352 Structure is implied by the indentation level.
353
354 This class is intended as a base classes that do actual text
355 output formatting.
356 """
357
358 def __init__(self, aStructuredString, level=0,
359 paragraph_divider=regex.compile('\(\r?\n *\)+\r?\n'),
360 ):
361 '''Convert a structured text string into a structured text object.
362
363 Aguments:
364
365 aStructuredString -- The string to be parsed.
366 level -- The level of top level headings to be created.
367 '''
368
369
370 pat = ' \"([%s0-9-_,./?=@~&]*)\":' % string.letters+ \
371 '([-:%s0-9_,./?=@#~&]*?)' % string.letters + \
372 '([.:?;] )'
373
374 p_reg = re.compile(pat,re.M)
375
376 aStructuredString = p_reg.sub(r'<a href="\2">\1</a>\3 ' , aStructuredString)
377
378 pat = ' \"([%s0-9-_,./?=@~&]*)\", ' % string.letters+ \
379 '([-:%s0-9_,./?=@#~&]*?)' % string.letters + \
380 '([.:?;] )'
381
382 p_reg = re.compile(pat,re.M)
383
384 aStructuredString = p_reg.sub(r'<a href="\2">\1</a>\3 ' , aStructuredString)
385
386
387 protoless = find(aStructuredString, '<a href=":')
388 if protoless != -1:
389 aStructuredString = re.sub('<a href=":', '<a href="',
390 aStructuredString)
391
392 self.level=level
393 paragraphs=ts_regex.split(untabify(aStructuredString),
394 paragraph_divider)
395 paragraphs=map(indent_level,paragraphs)
396
397 self.structure=structure(paragraphs)
398
399
400 def __str__(self):
401 return str(self.structure)
402
403
404 ctag_prefix=r'([\x00- \\(]|^)'
405 ctag_suffix=r'([\x00- ,.:;!?\\)]|$)'
406 ctag_middle=r'[%s]([^\x00- %s][^%s]*[^\x00- %s]|[^%s])[%s]'
407 ctag_middl2=r'[%s][%s]([^\x00- %s][^%s]*[^\x00- %s]|[^%s])[%s][%s]'
408
409 def ctag(s,
410 em=re.compile(
411 ctag_prefix+(ctag_middle % (("*",)*6) )+ctag_suffix),
412 strong=re.compile(
413 ctag_prefix+(ctag_middl2 % (("*",)*8))+ctag_suffix),
414 under=re.compile(
415 ctag_prefix+(ctag_middle % (("_",)*6) )+ctag_suffix),
416 code=re.compile(
417 ctag_prefix+(ctag_middle % (("\'",)*6))+ctag_suffix),
418 ):
419 if s is None: s=''
420 s=strong.sub(r'\1<strong>\2</strong>\3',s)
421 s=under.sub( r'\1<u>\2</u>\3',s)
422 s=code.sub( r'\1<code>\2</code>\3',s)
423 s=em.sub( r'\1<em>\2</em>\3',s)
424 return s
425
426 class HTML(StructuredText):
427
428 '''\
429 An HTML structured text formatter.
430 '''\
431
432 def __str__(self,
433 extra_dl=re.compile("</dl>\n<dl>"),
434 extra_ul=re.compile("</ul>\n<ul>"),
435 extra_ol=re.compile("</ol>\n<ol>"),
436 ):
437 '''\
438 Return an HTML string representation of the structured text data.
439
440 '''
441 s=self._str(self.structure,self.level)
442 s=extra_dl.sub('\n',s)
443 s=extra_ul.sub('\n',s)
444 s=extra_ol.sub('\n',s)
445 return s
446
447 def ul(self, before, p, after):
448 if p: p="<p>%s</p>" % strip(ctag(p))
449 return ('%s<ul><li>%s\n%s\n</li></ul>\n'
450 % (before,p,after))
451
452 def ol(self, before, p, after):
453 if p: p="<p>%s</p>" % strip(ctag(p))
454 return ('%s<ol><li>%s\n%s\n</li></ol>\n'
455 % (before,p,after))
456
457 def dl(self, before, t, d, after):
458 return ('%s<dl><dt>%s</dt><dd><p>%s</p>\n%s\n</dd></dl>\n'
459 % (before,ctag(t),ctag(d),after))
460
461 def head(self, before, t, level, d):
462 if level > 0 and level < 6:
463 return ('%s<h%d>%s</h%d>\n%s\n'
464 % (before,level,strip(ctag(t)),level,d))
465
466 t="<p><strong>%s</strong></p>" % strip(ctag(t))
467 return ('%s<dl><dt>%s\n</dt><dd>%s\n</dd></dl>\n'
468 % (before,t,d))
469
470 def normal(self,before,p,after):
471 return '%s<p>%s</p>\n%s\n' % (before,ctag(p),after)
472
473 def pre(self,structure,tagged=0):
474 if not structure: return ''
475 if tagged:
476 r=''
477 else:
478 r='<PRE>\n'
479 for s in structure:
480 r="%s%s\n\n%s" % (r,html_quote(s[0]),self.pre(s[1],1))
481 if not tagged: r=r+'</PRE>\n'
482 return r
483
484 def table(self,before,table,after):
485 return '%s<p>%s</p>\n%s\n' % (before,ctag(table),after)
486
487 def _str(self,structure,level,
488 # Static
489 bullet=ts_regex.compile('[ \t\n]*[o*-][ \t\n]+\([^\0]*\)'
490 ).match_group,
491 example=ts_regex.compile('[\0- ]examples?:[\0- ]*$'
492 ).search,
493 dl=ts_regex.compile('\([^\n]+\)[ \t]+--[ \t\n]+\([^\0]*\)'
494 ).match_group,
495 nl=ts_regex.compile('\n').search,
496 ol=ts_regex.compile(
497 '[ \t]*\(\([0-9]+\|[%s]+\)[.)]\)+[ \t\n]+\([^\0]*\|$\)' % string.letters
498 ).match_group,
499 olp=ts_regex.compile('[ \t]*([0-9]+)[ \t\n]+\([^\0]*\|$\)'
500 ).match_group,
501 ):
502 r=''
503 for s in structure:
504
505 ts_results = bullet(s[0], (1,))
506 if ts_results:
507 p = ts_results[1]
508 if s[0][-2:]=='::' and s[1]: ps=self.pre(s[1])
509 else: ps=self._str(s[1],level)
510 r=self.ul(r,p,ps)
511 continue
512 ts_results = ol(s[0], (3,))
513 if ts_results:
514 p = ts_results[1]
515 if s[0][-2:]=='::' and s[1]: ps=self.pre(s[1])
516 else: ps=self._str(s[1],level)
517 r=self.ol(r,p,ps)
518 continue
519 ts_results = olp(s[0], (1,))
520 if ts_results:
521 p = ts_results[1]
522 if s[0][-2:]=='::' and s[1]: ps=self.pre(s[1])
523 else: ps=self._str(s[1],level)
524 r=self.ol(r,p,ps)
525 continue
526 ts_results = dl(s[0], (1,2))
527 if ts_results:
528 t,d = ts_results[1]
529 r=self.dl(r,t,d,self._str(s[1],level))
530 continue
531 if example(s[0]) >= 0 and s[1]:
532 # Introduce an example, using pre tags:
533 r=self.normal(r,s[0],self.pre(s[1]))
534 continue
535 if s[0][-2:]=='::' and s[1]:
536 # Introduce an example, using pre tags:
537 r=self.normal(r,s[0][:-1],self.pre(s[1]))
538 continue
539 if table.create(s[0]):
540 ## table support.
541 r=self.table(r,table.html(),self._str(s[1],level))
542 continue
543 else:
544
545 if nl(s[0]) < 0 and s[1] and s[0][-1:] != ':':
546 # Treat as a heading
547 t=s[0]
548 r=self.head(r,t,level,
549 self._str(s[1],level and level+1))
550 else:
551 r=self.normal(r,s[0],self._str(s[1],level))
552 return r
553
554
555 def html_quote(v,
556 character_entities=(
557 (re.compile('&'), '&amp;'),
558 (re.compile("<"), '&lt;' ),
559 (re.compile(">"), '&gt;' ),
560 (re.compile('"'), '&quot;')
561 )): #"
562 text=str(v)
563 for re,name in character_entities:
564 text=re.sub(name,text)
565 return text
566
567 def html_with_references(text, level=1):
568 text = re.sub(
569 r'[\0\n]\.\. \[([0-9_%s-]+)\]' % string.letters,
570 r'\n <a name="\1">[\1]</a>',
571 text)
572
573 text = re.sub(
574 r'([\x00- ,])\[(?P<ref>[0-9_%s-]+)\]([\x00- ,.:])' % string.letters,
575 r'\1<a href="#\2">[\2]</a>\3',
576 text)
577
578 text = re.sub(
579 r'([\0- ,])\[([^]]+)\.html\]([\0- ,.:])',
580 r'\1<a href="\2.html">[\2]</a>\3',
581 text)
582
583 return HTML(text,level=level)
584
585
586 def main():
587 import sys, getopt
588
589 opts,args=getopt.getopt(sys.argv[1:],'twl')
590
591 if args:
592 [infile]=args
593 s=open(infile,'r').read()
594 else:
595 s=sys.stdin.read()
596
597 if opts:
598
599 if filter(lambda o: o[0]=='-w', opts):
600 print 'Content-Type: text/html\n'
601
602 if filter(lambda o: o[0]=='-l', opts):
603 import locale
604 locale.setlocale(locale.LC_ALL,"")
605
606 if s[:2]=='#!':
607 s=re.sub('^#![^\n]+','',s)
608
609 mo = re.compile('([\0-\n]*\n)').match(s)
610 if mo is not None:
611 s = s[len(mo.group(0)) :]
612
613 s=str(html_with_references(s))
614 if s[:4]=='<h1>':
615 t=s[4:find(s,'</h1>')]
616 s='''<html><head><title>%s</title>
617 </head><body>
618 %s
619 </body></html>
620 ''' % (t,s)
621 print s
622 else:
623 print html_with_references(s)
624
625 if __name__=="__main__": main()