]>
git.saurik.com Git - wxWidgets.git/blob - wxPython/samples/stxview/StructuredText/ClassicStructuredText.py
1 #! /usr/bin/env python -- # -*- python -*-
2 ##############################################################################
4 # Zope Public License (ZPL) Version 1.0
5 # -------------------------------------
7 # Copyright (c) Digital Creations. All rights reserved.
9 # This license has been certified as Open Source(tm).
11 # Redistribution and use in source and binary forms, with or without
12 # modification, are permitted provided that the following conditions are
15 # 1. Redistributions in source code must retain the above copyright
16 # notice, this list of conditions, and the following disclaimer.
18 # 2. Redistributions in binary form must reproduce the above copyright
19 # notice, this list of conditions, and the following disclaimer in
20 # the documentation and/or other materials provided with the
23 # 3. Digital Creations requests that attribution be given to Zope
24 # in any manner possible. Zope includes a "Powered by Zope"
25 # button that is installed by default. While it is not a license
26 # violation to remove this button, it is requested that the
27 # attribution remain. A significant investment has been put
28 # into Zope, and this effort will continue if the Zope community
29 # continues to grow. This is one way to assure that growth.
31 # 4. All advertising materials and documentation mentioning
32 # features derived from or use of this software must display
33 # the following acknowledgement:
35 # "This product includes software developed by Digital Creations
36 # for use in the Z Object Publishing Environment
37 # (http://www.zope.org/)."
39 # In the event that the product being advertised includes an
40 # intact Zope distribution (with copyright and license included)
41 # then this clause is waived.
43 # 5. Names associated with Zope or Digital Creations must not be used to
44 # endorse or promote products derived from this software without
45 # prior written permission from Digital Creations.
47 # 6. Modified redistributions of any form whatsoever must retain
48 # the following acknowledgment:
50 # "This product includes software developed by Digital Creations
51 # for use in the Z Object Publishing Environment
52 # (http://www.zope.org/)."
54 # Intact (re-)distributions of any official Zope release do not
55 # require an external acknowledgement.
57 # 7. Modifications are encouraged but must be packaged separately as
58 # patches to official Zope releases. Distributions that do not
59 # clearly separate the patches from the original work must be clearly
60 # labeled as unofficial distributions. Modifications which do not
61 # carry the name Zope may be packaged in any form, as long as they
62 # conform to all of the clauses above.
67 # THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
68 # EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
69 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
70 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
71 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
72 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
73 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
74 # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
75 # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
76 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
77 # OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
81 # This software consists of contributions made by Digital Creations and
82 # many individuals on behalf of Digital Creations. Specific
83 # attributions are listed in the accompanying credits file.
85 ##############################################################################
86 '''Structured Text Manipulation
88 Parse a structured text string into a form that can be used with
89 structured formats, like html.
91 Structured text is text that uses indentation and simple
92 symbology to indicate the structure of a document.
94 A structured string consists of a sequence of paragraphs separated by
95 one or more blank lines. Each paragraph has a level which is defined
96 as the minimum indentation of the paragraph. A paragraph is a
97 sub-paragraph of another paragraph if the other paragraph is the last
98 preceding paragraph that has a lower level.
100 Special symbology is used to indicate special constructs:
102 - A single-line paragraph whose immediately succeeding paragraphs are lower
103 level is treated as a header.
105 - A paragraph that begins with a '-', '*', or 'o' is treated as an
106 unordered list (bullet) element.
108 - A paragraph that begins with a sequence of digits followed by a
109 white-space character is treated as an ordered list element.
111 - A paragraph that begins with a sequence of sequences, where each
112 sequence is a sequence of digits or a sequence of letters followed
113 by a period, is treated as an ordered list element.
115 - A paragraph with a first line that contains some text, followed by
116 some white-space and '--' is treated as
117 a descriptive list element. The leading text is treated as the
120 - Sub-paragraphs of a paragraph that ends in the word 'example' or the
121 word 'examples', or '::' is treated as example code and is output as is.
123 - Text enclosed single quotes (with white-space to the left of the
124 first quote and whitespace or punctuation to the right of the second quote)
125 is treated as example code.
127 - Text surrounded by '*' characters (with white-space to the left of the
128 first '*' and whitespace or punctuation to the right of the second '*')
131 - Text surrounded by '**' characters (with white-space to the left of the
132 first '**' and whitespace or punctuation to the right of the second '**')
135 - Text surrounded by '_' underscore characters (with whitespace to the left
136 and whitespace or punctuation to the right) is made underlined.
138 - Text encloded by double quotes followed by a colon, a URL, and concluded
139 by punctuation plus white space, *or* just white space, is treated as a
140 hyper link. For example:
142 "Zope":http://www.zope.org/ is ...
144 Is interpreted as '<a href="http://www.zope.org/">Zope</a> is ....'
145 Note: This works for relative as well as absolute URLs.
147 - Text enclosed by double quotes followed by a comma, one or more spaces,
148 an absolute URL and concluded by punctuation plus white space, or just
149 white space, is treated as a hyper link. For example:
151 "mail me", mailto:amos@digicool.com.
153 Is interpreted as '<a href="mailto:amos@digicool.com">mail me</a>.'
155 - Text enclosed in brackets which consists only of letters, digits,
156 underscores and dashes is treated as hyper links within the document.
159 As demonstrated by Smith [12] this technique is quite effective.
161 Is interpreted as '... by Smith <a href="#12">[12]</a> this ...'. Together
162 with the next rule this allows easy coding of references or end notes.
164 - Text enclosed in brackets which is preceded by the start of a line, two
165 periods and a space is treated as a named link. For example:
167 .. [12] "Effective Techniques" Smith, Joe ...
169 Is interpreted as '<a name="12">[12]</a> "Effective Techniques" ...'.
170 Together with the previous rule this allows easy coding of references or
174 - A paragraph that has blocks of text enclosed in '||' is treated as a
175 table. The text blocks correspond to table cells and table rows are
176 denoted by newlines. By default the cells are center aligned. A cell
177 can span more than one column by preceding a block of text with an
178 equivalent number of cell separators '||'. Newlines and '|' cannot
179 be a part of the cell text. For example:
181 |||| **Ingredients** ||
182 || *Name* || *Amount* ||
188 <TABLE BORDER=1 CELLPADDING=2>
190 <TD ALIGN=CENTER COLSPAN=2> <strong>Ingredients</strong> </TD>
193 <TD ALIGN=CENTER COLSPAN=1> <em>Name</em> </TD>
194 <TD ALIGN=CENTER COLSPAN=1> <em>Amount</em> </TD>
197 <TD ALIGN=CENTER COLSPAN=1>Spam</TD>
198 <TD ALIGN=CENTER COLSPAN=1>10</TD>
201 <TD ALIGN=CENTER COLSPAN=1>Eggs</TD>
202 <TD ALIGN=CENTER COLSPAN=1>3</TD>
210 from ts_regex
import gsub
211 from string
import split
, join
, strip
, find
215 def untabify(aString
,
216 indent_tab
=ts_regex
.compile('\(\n\|^\)\( *\)\t').search_group
,
219 Convert indentation tabs to spaces.
224 ts_results
= indent_tab(rest
, (1,2))
226 start
, grps
= ts_results
229 result
=result
+rest
[:start
]
230 rest
="\n%s%s" % (' ' * ((indent
/8+1)*8),
231 rest
[start
+indent
+1+lnl
:])
235 def indent(aString
, indent
=2):
236 """Indent a string the given number of spaces"""
237 r
=split(untabify(aString
),'\n')
239 if not r
[-1]: del r
[-1]
241 return "%s%s\n" % (tab
,join(r
,'\n'+tab
))
243 def reindent(aString
, indent
=2, already_untabified
=0):
244 "reindent a block of text, so that the minimum indent is as given"
246 if not already_untabified
: aString
=untabify(aString
)
248 l
=indent_level(aString
)[0]
249 if indent
==l
: return aString
257 for s
in split(aString
,'\n'): append(tab
+s
)
260 for s
in split(aString
,'\n'): append(s
[l
:])
264 def indent_level(aString
,
265 indent_space
=ts_regex
.compile('\n\( *\)').search_group
,
268 Find the minimum indentation for a string, not counting blank lines.
275 ts_results
= indent_space(text
, (1,2), start
)
277 start
, grps
= ts_results
280 if start
< l
and text
[start
] != '\n': # Skip blank lines
281 if not i
: return (0,aString
)
282 if i
< indent
: indent
= i
284 return (indent
,aString
)
286 def paragraphs(list,start
):
290 while i
< l
and list[i
][0] > level
: i
=i
+1
294 if not list: return []
299 sublen
=paragraphs(list,i
)
301 r
.append((list[i
-1][1],structure(list[i
:i
+sublen
])))
307 CELL
=' <TD ALIGN=CENTER COLSPAN=%i>%s</TD>\n'
308 ROW
=' <TR>\n%s </TR>\n'
309 TABLE
='\n<TABLE BORDER=1 CELLPADDING=2>\n%s</TABLE>'
311 def create(self
,aPar
,
312 td_reg
=re
.compile(r
'[ \t\n]*\|\|([^\0x00|]*)')
314 '''parses a table and returns nested list representing the
317 text
=filter(None,split(aPar
,'\n'))
321 mo
= td_reg
.match(line
)
324 row
.append(mo
.group(1))
325 if pos
==len(line
):break
327 self
.table
.append(row
)
331 '''Creates an HTML representation of table'''
333 for row
in self
.table
:
341 htmlrow
.append(self
.CELL
%(colspan
,cell
))
343 htmltable
.append(self
.ROW
%join(htmlrow
,''))
344 return self
.TABLE
%join(htmltable
,'')
348 class StructuredText
:
350 """Model text as structured collection of paragraphs.
352 Structure is implied by the indentation level.
354 This class is intended as a base classes that do actual text
358 def __init__(self
, aStructuredString
, level
=0,
359 paragraph_divider
=regex
.compile('\(\r?\n *\)+\r?\n'),
361 '''Convert a structured text string into a structured text object.
365 aStructuredString -- The string to be parsed.
366 level -- The level of top level headings to be created.
370 pat
= ' \"([%s0-9-_,./?=@~&]*)\":' % string
.letters
+ \
371 '([-:%s0-9_,./?=@#~&]*?)' % string
.letters
+ \
374 p_reg
= re
.compile(pat
,re
.M
)
376 aStructuredString
= p_reg
.sub(r
'<a href="\2">\1</a>\3 ' , aStructuredString
)
378 pat
= ' \"([%s0-9-_,./?=@~&]*)\", ' % string
.letters
+ \
379 '([-:%s0-9_,./?=@#~&]*?)' % string
.letters
+ \
382 p_reg
= re
.compile(pat
,re
.M
)
384 aStructuredString
= p_reg
.sub(r
'<a href="\2">\1</a>\3 ' , aStructuredString
)
387 protoless
= find(aStructuredString
, '<a href=":')
389 aStructuredString
= re
.sub('<a href=":', '<a href="',
393 paragraphs
=ts_regex
.split(untabify(aStructuredString
),
395 paragraphs
=map(indent_level
,paragraphs
)
397 self
.structure
=structure(paragraphs
)
401 return str(self
.structure
)
404 ctag_prefix
=r
'([\x00- \\(]|^)'
405 ctag_suffix
=r
'([\x00- ,.:;!?\\)]|$)'
406 ctag_middle
=r
'[%s]([^\x00- %s][^%s]*[^\x00- %s]|[^%s])[%s]'
407 ctag_middl2
=r
'[%s][%s]([^\x00- %s][^%s]*[^\x00- %s]|[^%s])[%s][%s]'
411 ctag_prefix
+(ctag_middle
% (("*",)*6) )+ctag_suffix
),
413 ctag_prefix
+(ctag_middl2
% (("*",)*8))+ctag_suffix
),
415 ctag_prefix
+(ctag_middle
% (("_",)*6) )+ctag_suffix
),
417 ctag_prefix
+(ctag_middle
% (("\'",)*6))+ctag_suffix
),
420 s
=strong
.sub(r
'\1<strong>\2</strong>\3',s
)
421 s
=under
.sub( r
'\1<u>\2</u>\3',s
)
422 s
=code
.sub( r
'\1<code>\2</code>\3',s
)
423 s
=em
.sub( r
'\1<em>\2</em>\3',s
)
426 class HTML(StructuredText
):
429 An HTML structured text formatter.
433 extra_dl
=re
.compile("</dl>\n<dl>"),
434 extra_ul
=re
.compile("</ul>\n<ul>"),
435 extra_ol
=re
.compile("</ol>\n<ol>"),
438 Return an HTML string representation of the structured text data.
441 s
=self
._str
(self
.structure
,self
.level
)
442 s
=extra_dl
.sub('\n',s
)
443 s
=extra_ul
.sub('\n',s
)
444 s
=extra_ol
.sub('\n',s
)
447 def ul(self
, before
, p
, after
):
448 if p
: p
="<p>%s</p>" % strip(ctag(p
))
449 return ('%s<ul><li>%s\n%s\n</li></ul>\n'
452 def ol(self
, before
, p
, after
):
453 if p
: p
="<p>%s</p>" % strip(ctag(p
))
454 return ('%s<ol><li>%s\n%s\n</li></ol>\n'
457 def dl(self
, before
, t
, d
, after
):
458 return ('%s<dl><dt>%s</dt><dd><p>%s</p>\n%s\n</dd></dl>\n'
459 % (before
,ctag(t
),ctag(d
),after
))
461 def head(self
, before
, t
, level
, d
):
462 if level
> 0 and level
< 6:
463 return ('%s<h%d>%s</h%d>\n%s\n'
464 % (before
,level
,strip(ctag(t
)),level
,d
))
466 t
="<p><strong>%s</strong></p>" % strip(ctag(t
))
467 return ('%s<dl><dt>%s\n</dt><dd>%s\n</dd></dl>\n'
470 def normal(self
,before
,p
,after
):
471 return '%s<p>%s</p>\n%s\n' % (before
,ctag(p
),after
)
473 def pre(self
,structure
,tagged
=0):
474 if not structure
: return ''
480 r
="%s%s\n\n%s" % (r
,html_quote(s
[0]),self
.pre(s
[1],1))
481 if not tagged
: r
=r
+'</PRE>\n'
484 def table(self
,before
,table
,after
):
485 return '%s<p>%s</p>\n%s\n' % (before
,ctag(table
),after
)
487 def _str(self
,structure
,level
,
489 bullet
=ts_regex
.compile('[ \t\n]*[o*-][ \t\n]+\([^\0]*\)'
491 example
=ts_regex
.compile('[\0- ]examples?:[\0- ]*$'
493 dl
=ts_regex
.compile('\([^\n]+\)[ \t]+--[ \t\n]+\([^\0]*\)'
495 nl
=ts_regex
.compile('\n').search
,
497 '[ \t]*\(\([0-9]+\|[%s]+\)[.)]\)+[ \t\n]+\([^\0]*\|$\)' % string
.letters
499 olp
=ts_regex
.compile('[ \t]*([0-9]+)[ \t\n]+\([^\0]*\|$\)'
505 ts_results
= bullet(s
[0], (1,))
508 if s
[0][-2:]=='::' and s
[1]: ps
=self
.pre(s
[1])
509 else: ps
=self
._str
(s
[1],level
)
512 ts_results
= ol(s
[0], (3,))
515 if s
[0][-2:]=='::' and s
[1]: ps
=self
.pre(s
[1])
516 else: ps
=self
._str
(s
[1],level
)
519 ts_results
= olp(s
[0], (1,))
522 if s
[0][-2:]=='::' and s
[1]: ps
=self
.pre(s
[1])
523 else: ps
=self
._str
(s
[1],level
)
526 ts_results
= dl(s
[0], (1,2))
529 r
=self
.dl(r
,t
,d
,self
._str
(s
[1],level
))
531 if example(s
[0]) >= 0 and s
[1]:
532 # Introduce an example, using pre tags:
533 r
=self
.normal(r
,s
[0],self
.pre(s
[1]))
535 if s
[0][-2:]=='::' and s
[1]:
536 # Introduce an example, using pre tags:
537 r
=self
.normal(r
,s
[0][:-1],self
.pre(s
[1]))
539 if table
.create(s
[0]):
541 r
=self
.table(r
,table
.html(),self
._str
(s
[1],level
))
545 if nl(s
[0]) < 0 and s
[1] and s
[0][-1:] != ':':
548 r
=self
.head(r
,t
,level
,
549 self
._str
(s
[1],level
and level
+1))
551 r
=self
.normal(r
,s
[0],self
._str
(s
[1],level
))
557 (re
.compile('&'), '&'),
558 (re
.compile("<"), '<' ),
559 (re
.compile(">"), '>' ),
560 (re
.compile('"'), '"')
563 for re
,name
in character_entities
:
564 text
=re
.sub(name
,text
)
567 def html_with_references(text
, level
=1):
569 r
'[\0\n]\.\. \[([0-9_%s-]+)\]' % string
.letters
,
570 r
'\n <a name="\1">[\1]</a>',
574 r
'([\x00- ,])\[(?P<ref>[0-9_%s-]+)\]([\x00- ,.:])' % string
.letters
,
575 r
'\1<a href="#\2">[\2]</a>\3',
579 r
'([\0- ,])\[([^]]+)\.html\]([\0- ,.:])',
580 r
'\1<a href="\2.html">[\2]</a>\3',
583 return HTML(text
,level
=level
)
589 opts
,args
=getopt
.getopt(sys
.argv
[1:],'twl')
593 s
=open(infile
,'r').read()
599 if filter(lambda o
: o
[0]=='-w', opts
):
600 print 'Content-Type: text/html\n'
602 if filter(lambda o
: o
[0]=='-l', opts
):
604 locale
.setlocale(locale
.LC_ALL
,"")
607 s
=re
.sub('^#![^\n]+','',s
)
609 mo
= re
.compile('([\0-\n]*\n)').match(s
)
611 s
= s
[len(mo
.group(0)) :]
613 s
=str(html_with_references(s
))
615 t
=s
[4:find(s
,'</h1>')]
616 s
='''<html><head><title>%s</title>
623 print html_with_references(s
)
625 if __name__
=="__main__": main()