]> git.saurik.com Git - wxWidgets.git/blame - wxPython/samples/stxview/StructuredText/StructuredText.py
*** empty log message ***
[wxWidgets.git] / wxPython / samples / stxview / StructuredText / StructuredText.py
CommitLineData
c12bc4de
RD
1#! /usr/bin/env python -- # -*- python -*-
2##############################################################################
3#
4# Zope Public License (ZPL) Version 1.0
5# -------------------------------------
6#
7# Copyright (c) Digital Creations. All rights reserved.
8#
9# This license has been certified as Open Source(tm).
10#
11# Redistribution and use in source and binary forms, with or without
12# modification, are permitted provided that the following conditions are
13# met:
14#
15# 1. Redistributions in source code must retain the above copyright
16# notice, this list of conditions, and the following disclaimer.
17#
18# 2. Redistributions in binary form must reproduce the above copyright
19# notice, this list of conditions, and the following disclaimer in
20# the documentation and/or other materials provided with the
21# distribution.
22#
23# 3. Digital Creations requests that attribution be given to Zope
24# in any manner possible. Zope includes a "Powered by Zope"
25# button that is installed by default. While it is not a license
26# violation to remove this button, it is requested that the
27# attribution remain. A significant investment has been put
28# into Zope, and this effort will continue if the Zope community
29# continues to grow. This is one way to assure that growth.
30#
31# 4. All advertising materials and documentation mentioning
32# features derived from or use of this software must display
33# the following acknowledgement:
34#
35# "This product includes software developed by Digital Creations
36# for use in the Z Object Publishing Environment
37# (http://www.zope.org/)."
38#
39# In the event that the product being advertised includes an
40# intact Zope distribution (with copyright and license included)
41# then this clause is waived.
42#
43# 5. Names associated with Zope or Digital Creations must not be used to
44# endorse or promote products derived from this software without
45# prior written permission from Digital Creations.
46#
47# 6. Modified redistributions of any form whatsoever must retain
48# the following acknowledgment:
49#
50# "This product includes software developed by Digital Creations
51# for use in the Z Object Publishing Environment
52# (http://www.zope.org/)."
53#
54# Intact (re-)distributions of any official Zope release do not
55# require an external acknowledgement.
56#
57# 7. Modifications are encouraged but must be packaged separately as
58# patches to official Zope releases. Distributions that do not
59# clearly separate the patches from the original work must be clearly
60# labeled as unofficial distributions. Modifications which do not
61# carry the name Zope may be packaged in any form, as long as they
62# conform to all of the clauses above.
63#
64#
65# Disclaimer
66#
67# THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
68# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
69# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
70# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
71# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
72# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
73# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
74# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
75# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
76# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
77# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
78# SUCH DAMAGE.
79#
80#
81# This software consists of contributions made by Digital Creations and
82# many individuals on behalf of Digital Creations. Specific
83# attributions are listed in the accompanying credits file.
84#
85##############################################################################
86'''Structured Text Manipulation
87
88Parse a structured text string into a form that can be used with
89structured formats, like html.
90
91Structured text is text that uses indentation and simple
92symbology to indicate the structure of a document.
93
94A structured string consists of a sequence of paragraphs separated by
95one or more blank lines. Each paragraph has a level which is defined
96as the minimum indentation of the paragraph. A paragraph is a
97sub-paragraph of another paragraph if the other paragraph is the last
98preceding paragraph that has a lower level.
99
100Special symbology is used to indicate special constructs:
101
102- A single-line paragraph whose immediately succeeding paragraphs are lower
103 level is treated as a header.
104
105- A paragraph that begins with a '-', '*', or 'o' is treated as an
106 unordered list (bullet) element.
107
108- A paragraph that begins with a sequence of digits followed by a
109 white-space character is treated as an ordered list element.
110
111- A paragraph that begins with a sequence of sequences, where each
112 sequence is a sequence of digits or a sequence of letters followed
113 by a period, is treated as an ordered list element.
114
115- A paragraph with a first line that contains some text, followed by
116 some white-space and '--' is treated as
117 a descriptive list element. The leading text is treated as the
118 element title.
119
120- Sub-paragraphs of a paragraph that ends in the word 'example' or the
121 word 'examples', or '::' is treated as example code and is output as is.
122
123- Text enclosed single quotes (with white-space to the left of the
124 first quote and whitespace or puctuation to the right of the second quote)
125 is treated as example code.
126
127- Text surrounded by '*' characters (with white-space to the left of the
128 first '*' and whitespace or puctuation to the right of the second '*')
129 is emphasized.
130
131- Text surrounded by '**' characters (with white-space to the left of the
132 first '**' and whitespace or puctuation to the right of the second '**')
133 is made strong.
134
135- Text surrounded by '_' underscore characters (with whitespace to the left
136 and whitespace or punctuation to the right) is made underlined.
137
138- Text encloded by double quotes followed by a colon, a URL, and concluded
139 by punctuation plus white space, *or* just white space, is treated as a
140 hyper link. For example:
141
142 "Zope":http://www.zope.org/ is ...
143
144 Is interpreted as '<a href="http://www.zope.org/">Zope</a> is ....'
145 Note: This works for relative as well as absolute URLs.
146
147- Text enclosed by double quotes followed by a comma, one or more spaces,
148 an absolute URL and concluded by punctuation plus white space, or just
149 white space, is treated as a hyper link. For example:
150
151 "mail me", mailto:amos@digicool.com.
152
153 Is interpreted as '<a href="mailto:amos@digicool.com">mail me</a>.'
154
155- Text enclosed in brackets which consists only of letters, digits,
156 underscores and dashes is treated as hyper links within the document.
157 For example:
158
159 As demonstrated by Smith [12] this technique is quite effective.
160
161 Is interpreted as '... by Smith <a href="#12">[12]</a> this ...'. Together
162 with the next rule this allows easy coding of references or end notes.
163
164- Text enclosed in brackets which is preceded by the start of a line, two
165 periods and a space is treated as a named link. For example:
166
167 .. [12] "Effective Techniques" Smith, Joe ...
168
169 Is interpreted as '<a name="12">[12]</a> "Effective Techniques" ...'.
170 Together with the previous rule this allows easy coding of references or
171 end notes.
172
173
174- A paragraph that has blocks of text enclosed in '||' is treated as a
175 table. The text blocks correspond to table cells and table rows are
176 denoted by newlines. By default the cells are center aligned. A cell
177 can span more than one column by preceding a block of text with an
178 equivalent number of cell separators '||'. Newlines and '|' cannot
179 be a part of the cell text. For example:
180
181 |||| **Ingredients** ||
182 || *Name* || *Amount* ||
183 ||Spam||10||
184 ||Eggs||3||
185
186 is interpreted as::
187
188 <TABLE BORDER=1 CELLPADDING=2>
189 <TR>
190 <TD ALIGN=CENTER COLSPAN=2> <strong>Ingredients</strong> </TD>
191 </TR>
192 <TR>
193 <TD ALIGN=CENTER COLSPAN=1> <em>Name</em> </TD>
194 <TD ALIGN=CENTER COLSPAN=1> <em>Amount</em> </TD>
195 </TR>
196 <TR>
197 <TD ALIGN=CENTER COLSPAN=1>Spam</TD>
198 <TD ALIGN=CENTER COLSPAN=1>10</TD>
199 </TR>
200 <TR>
201 <TD ALIGN=CENTER COLSPAN=1>Eggs</TD>
202 <TD ALIGN=CENTER COLSPAN=1>3</TD>
203 </TR>
204 </TABLE>
205
206
207$Id$'''
208# Copyright
209#
210# Copyright 1996 Digital Creations, L.C., 910 Princess Anne
211# Street, Suite 300, Fredericksburg, Virginia 22401 U.S.A. All
212# rights reserved. Copyright in this software is owned by DCLC,
213# unless otherwise indicated. Permission to use, copy and
214# distribute this software is hereby granted, provided that the
215# above copyright notice appear in all copies and that both that
216# copyright notice and this permission notice appear. Note that
217# any product, process or technology described in this software
218# may be the subject of other Intellectual Property rights
219# reserved by Digital Creations, L.C. and are not licensed
220# hereunder.
221#
222# Trademarks
223#
224# Digital Creations & DCLC, are trademarks of Digital Creations, L.C..
225# All other trademarks are owned by their respective companies.
226#
227# No Warranty
228#
229# The software is provided "as is" without warranty of any kind,
230# either express or implied, including, but not limited to, the
231# implied warranties of merchantability, fitness for a particular
232# purpose, or non-infringement. This software could include
233# technical inaccuracies or typographical errors. Changes are
234# periodically made to the software; these changes will be
235# incorporated in new editions of the software. DCLC may make
236# improvements and/or changes in this software at any time
237# without notice.
238#
239# Limitation Of Liability
240#
241# In no event will DCLC be liable for direct, indirect, special,
242# incidental, economic, cover, or consequential damages arising
243# out of the use of or inability to use this software even if
244# advised of the possibility of such damages. Some states do not
245# allow the exclusion or limitation of implied warranties or
246# limitation of liability for incidental or consequential
247# damages, so the above limitation or exclusion may not apply to
248# you.
249#
250#
251# If you have questions regarding this software,
252# contact:
253#
254# Jim Fulton, jim@digicool.com
255#
256# (540) 371-6909
257#
258# $Log$
259# Revision 1.1 2001/03/10 05:07:20 RD
260# Added some simple sample apps
261#
262# Revision 1.27 2000/04/21 13:38:10 jim
263# Added closing list tags. Woo hoo!
264#
265# Revision 1.26 2000/03/14 17:22:04 brian
266# Allow ~ in hrefs.
267#
268# Revision 1.25 2000/02/17 00:53:24 klm
269# HTML._str(): We were getting preformatted examples rendered twice,
270# second time without preformatting. Problem was a missing 'continue'
271# in one of the cases.
272#
273# Revision 1.24 1999/12/13 16:32:48 klm
274# Incorporated pavlos christoforou's mods to handle simple tables. From
275# his web page at http://www.zope.org/Members/gaaros/StructuredText:
276#
277# Structured Text module with table support
278#
279# A paragraph that has blocks of text enclosed in '||' is treated as a
280# table. The text blocks correspond to table cells and table rows are
281# denoted by newlines. By default the cells are center aligned. You can
282# change the defaults by modifying the CELL,ROW and TABLE class
283# attributes in class Table. A cell can span more than one column by
284# preceding a block of text with an equivalent number of cell separators
285# '||'. Newlines and '|' cannot be a part of the cell text. If you need
286# newlines use <BR>. For example:
287#
288# |||| **Ingredients** ||
289# || *Name* || *Amount* ||
290# ||Spam||10||
291# ||Eggs||3||
292#
293# Revision 1.23 1999/08/03 20:49:05 jim
294# Fixed to allow list elements to introduce examples.
295#
296# Restructured _str using continue to avoid excessive nesting.
297#
298# Revision 1.22 1999/08/02 22:01:28 jim
299# Fixed a bunch of bugs introduced by making ts_regex actually thread
300# safe.
301#
302# Also localized a bunch of regular expressions
303# using "static" variables (aka always default arguments).
304#
305# Revision 1.21 1999/08/02 13:26:52 jim
306# paragraph_divider needs to be a regular (thread-unsafe) regex
307# since it gets passed to ts_regex.split, which is thread-safe
308# and wants to use regs.
309#
310# Revision 1.20 1999/07/21 13:33:59 jim
311# untabified.
312#
313# Revision 1.19 1999/07/15 16:43:15 jim
314# Checked in Scott Robertson's thread-safety fixes.
315#
316# Revision 1.18 1999/03/24 00:03:18 klm
317# Provide for relative links, eg <a href="file_in_same_dir">whatever</a>,
318# as:
319#
320# "whatever", :file_in_same_dir
321#
322# or
323#
324# "whatever"::file_in_same_dir
325#
326# .__init__(): relax the second gsub, using a '*' instead of a '+', so
327# the stuff before the ':' can be missing, and also do postprocessing so
328# any resulting '<a href=":file_in_same_dir">'s have the superfluous ':'
329# removed. *Seems* good!
330#
331# Revision 1.17 1999/03/12 23:21:39 klm
332# Gratuituous checkin to test my cvs *update* logging hook.
333#
334# Revision 1.16 1999/03/12 17:12:12 klm
335# Added support for underlined elements, in the obvious way (and
336# included an entry in the module docstring for it).
337#
338# Added an entry in the module docstring describing what i *guess* is
339# the criterion for identifying header elements. (I'm going to have to
340# delve into and understand the framework a bit better before *knowing*
341# this is the case.)
342#
343# Revision 1.15 1999/03/11 22:40:18 klm
344# Handle links that include '#' named links.
345#
346# Revision 1.14 1999/03/11 01:35:19 klm
347# Fixed a small typo, and refined the module docstring link example, in
348# order to do a checkin to exercise the CVS repository mirroring. Might
349# as well include my last checkin message, with some substantial stuff:
350#
351# Links are now recognized whether or not the candidate strings are
352# terminated with punctuation before the trailing whitespace. The old
353# form - trailing punctuation then whitespace - is preserved, but the
354# punctuation is now unnecessary.
355#
356# The regular expressions are a bit more complicated, but i've factored
357# out the common parts and but them in variables with suggestive names,
358# which may make them easier to understand.
359#
360# Revision 1.13 1999/03/11 00:49:57 klm
361# Links are now recognized whether or not the candidate strings are
362# terminated with punctuation before the trailing whitespace. The old
363# form - trailing punctuation then whitespace - is preserved, but the
364# punctuation is now unnecessary.
365#
366# The regular expressions are a bit more complicated, but i've factored
367# out the common parts and but them in variables with suggestive names,
368# which may make them easier to understand.
369#
370# Revision 1.12 1999/03/10 00:15:46 klm
371# Committing with version 1.0 of the license.
372#
373# Revision 1.11 1999/02/08 18:13:12 klm
374# Trival checkin (spelling fix "preceedeing" -> "preceding" and similar)
375# to see what pitfalls my environment presents to accomplishing a
376# successful checkin. (It turns out that i can't do it from aldous because
377# the new version of cvs doesn't support the '-t' and '-f' options in the
378# cvswrappers file...)
379#
380# Revision 1.10 1998/12/29 22:30:43 amos
381# Improved doc string to describe hyper link and references capabilities.
382#
383# Revision 1.9 1998/12/04 20:15:31 jim
384# Detabification and new copyright.
385#
386# Revision 1.8 1998/02/27 18:45:22 jim
387# Various updates, including new indentation utilities.
388#
389# Revision 1.7 1997/12/12 15:39:54 jim
390# Added level as argument for html_with_references.
391#
392# Revision 1.6 1997/12/12 15:27:25 jim
393# Added additional pattern matching for HTML references.
394#
395# Revision 1.5 1997/03/08 16:01:03 jim
396# Moved code to recognize: "foo bar", url.
397# into object initializer, so it gets applied in all cases.
398#
399# Revision 1.4 1997/02/17 23:36:35 jim
400# Added support for "foo title", http:/foohost/foo
401#
402# Revision 1.3 1996/12/06 15:57:37 jim
403# Fixed bugs in character tags.
404#
405# Added -t command-line option to generate title if:
406#
407# - The first paragraph is one line (i.e. a heading) and
408#
409# - All other paragraphs are indented.
410#
411# Revision 1.2 1996/10/28 13:56:02 jim
412# Fixed bug in ordered lists.
413# Added option for either HTML-style headings or descriptive-list style
414# headings.
415#
416# Revision 1.1 1996/10/23 14:00:45 jim
417# *** empty log message ***
418#
419#
420#
421
422import ts_regex, regex
423from ts_regex import gsub
424from string import split, join, strip, find
425
426def untabify(aString,
427 indent_tab=ts_regex.compile('\(\n\|^\)\( *\)\t').search_group,
428 ):
429 '''\
430 Convert indentation tabs to spaces.
431 '''
432 result=''
433 rest=aString
434 while 1:
435 ts_results = indent_tab(rest, (1,2))
436 if ts_results:
437 start, grps = ts_results
438 lnl=len(grps[0])
439 indent=len(grps[1])
440 result=result+rest[:start]
441 rest="\n%s%s" % (' ' * ((indent/8+1)*8),
442 rest[start+indent+1+lnl:])
443 else:
444 return result+rest
445
446def indent(aString, indent=2):
447 """Indent a string the given number of spaces"""
448 r=split(untabify(aString),'\n')
449 if not r: return ''
450 if not r[-1]: del r[-1]
451 tab=' '*level
452 return "%s%s\n" % (tab,join(r,'\n'+tab))
453
454def reindent(aString, indent=2, already_untabified=0):
455 "reindent a block of text, so that the minimum indent is as given"
456
457 if not already_untabified: aString=untabify(aString)
458
459 l=indent_level(aString)[0]
460 if indent==l: return aString
461
462 r=[]
463
464 append=r.append
465
466 if indent > l:
467 tab=' ' * (indent-l)
468 for s in split(aString,'\n'): append(tab+s)
469 else:
470 l=l-indent
471 for s in split(aString,'\n'): append(s[l:])
472
473 return join(r,'\n')
474
475def indent_level(aString,
476 indent_space=ts_regex.compile('\n\( *\)').search_group,
477 ):
478 '''\
479 Find the minimum indentation for a string, not counting blank lines.
480 '''
481 start=0
482 text='\n'+aString
483 indent=l=len(text)
484 while 1:
485
486 ts_results = indent_space(text, (1,2), start)
487 if ts_results:
488 start, grps = ts_results
489 i=len(grps[0])
490 start=start+i+1
491 if start < l and text[start] != '\n': # Skip blank lines
492 if not i: return (0,aString)
493 if i < indent: indent = i
494 else:
495 return (indent,aString)
496
497def paragraphs(list,start):
498 l=len(list)
499 level=list[start][0]
500 i=start+1
501 while i < l and list[i][0] > level: i=i+1
502 return i-1-start
503
504def structure(list):
505 if not list: return []
506 i=0
507 l=len(list)
508 r=[]
509 while i < l:
510 sublen=paragraphs(list,i)
511 i=i+1
512 r.append((list[i-1][1],structure(list[i:i+sublen])))
513 i=i+sublen
514 return r
515
516
517class Table:
518 CELL=' <TD ALIGN=CENTER COLSPAN=%i>%s</TD>\n'
519 ROW=' <TR>\n%s </TR>\n'
520 TABLE='\n<TABLE BORDER=1 CELLPADDING=2>\n%s</TABLE>'
521
522 def create(self,aPar,td=ts_regex.compile(
523 '[ \t\n]*||\([^\0|]*\)').match_group):
524 '''parses a table and returns nested list representing the
525 table'''
526 self.table=[]
527 text=filter(None,split(aPar,'\n'))
528 for line in text:
529 row=[]
530 while 1:
531 pos=td(line,(1,))
532 if not pos:return 0
533 row.append(pos[1])
534 if pos[0]==len(line):break
535 line=line[pos[0]:]
536 self.table.append(row)
537 return 1
538
539 def html(self):
540 '''Creates an HTML representation of table'''
541 htmltable=[]
542 for row in self.table:
543 htmlrow=[]
544 colspan=1
545 for cell in row:
546 if cell=='':
547 colspan=colspan+1
548 continue
549 else:
550 htmlrow.append(self.CELL%(colspan,cell))
551 colspan=1
552 htmltable.append(self.ROW%join(htmlrow,''))
553 return self.TABLE%join(htmltable,'')
554
555optional_trailing_punctuation = '\(,\|\([.:?;]\)\)?'
556trailing_space = '\([\0- ]\)'
557not_punctuation_or_whitespace = "[^-,.?:\0- ]"
558table=Table()
559
560class StructuredText:
561
562 """Model text as structured collection of paragraphs.
563
564 Structure is implied by the indentation level.
565
566 This class is intended as a base classes that do actual text
567 output formatting.
568 """
569
570 def __init__(self, aStructuredString, level=0,
571 paragraph_divider=regex.compile('\(\n *\)+\n'),
572 ):
573 '''Convert a structured text string into a structured text object.
574
575 Aguments:
576
577 aStructuredString -- The string to be parsed.
578 level -- The level of top level headings to be created.
579 '''
580
581 aStructuredString = gsub(
582 '\"\([^\"\0]+\)\":' # title: <"text":>
583 + ('\([-:a-zA-Z0-9_,./?=@#~]+%s\)'
584 % not_punctuation_or_whitespace)
585 + optional_trailing_punctuation
586 + trailing_space,
587 '<a href="\\2">\\1</a>\\4\\5\\6',
588 aStructuredString)
589
590 aStructuredString = gsub(
591 '\"\([^\"\0]+\)\",[\0- ]+' # title: <"text", >
592 + ('\([a-zA-Z]*:[-:a-zA-Z0-9_,./?=@#~]*%s\)'
593 % not_punctuation_or_whitespace)
594 + optional_trailing_punctuation
595 + trailing_space,
596 '<a href="\\2">\\1</a>\\4\\5\\6',
597 aStructuredString)
598
599 protoless = find(aStructuredString, '<a href=":')
600 if protoless != -1:
601 aStructuredString = gsub('<a href=":', '<a href="',
602 aStructuredString)
603
604 self.level=level
605 paragraphs=ts_regex.split(untabify(aStructuredString),
606 paragraph_divider)
607 paragraphs=map(indent_level,paragraphs)
608
609 self.structure=structure(paragraphs)
610
611
612 def __str__(self):
613 return str(self.structure)
614
615
616ctag_prefix="\([\0- (]\|^\)"
617ctag_suffix="\([\0- ,.:;!?)]\|$\)"
618ctag_middle="[%s]\([^\0- %s][^%s]*[^\0- %s]\|[^%s]\)[%s]"
619ctag_middl2="[%s][%s]\([^\0- %s][^%s]*[^\0- %s]\|[^%s]\)[%s][%s]"
620
621def ctag(s,
622 em=regex.compile(
623 ctag_prefix+(ctag_middle % (("*",)*6) )+ctag_suffix),
624 strong=regex.compile(
625 ctag_prefix+(ctag_middl2 % (("*",)*8))+ctag_suffix),
626 under=regex.compile(
627 ctag_prefix+(ctag_middle % (("_",)*6) )+ctag_suffix),
628 code=regex.compile(
629 ctag_prefix+(ctag_middle % (("\'",)*6))+ctag_suffix),
630 ):
631 if s is None: s=''
632 s=gsub(strong,'\\1<strong>\\2</strong>\\3',s)
633 s=gsub(under, '\\1<u>\\2</u>\\3',s)
634 s=gsub(code, '\\1<code>\\2</code>\\3',s)
635 s=gsub(em, '\\1<em>\\2</em>\\3',s)
636 return s
637
638class HTML(StructuredText):
639
640 '''\
641 An HTML structured text formatter.
642 '''\
643
644 def __str__(self,
645 extra_dl=regex.compile("</dl>\n<dl>"),
646 extra_ul=regex.compile("</ul>\n<ul>"),
647 extra_ol=regex.compile("</ol>\n<ol>"),
648 ):
649 '''\
650 Return an HTML string representation of the structured text data.
651
652 '''
653 s=self._str(self.structure,self.level)
654 s=gsub(extra_dl,'\n',s)
655 s=gsub(extra_ul,'\n',s)
656 s=gsub(extra_ol,'\n',s)
657 return s
658
659 def ul(self, before, p, after):
660 if p: p="<p>%s</p>" % strip(ctag(p))
661 return ('%s<ul><li>%s\n%s\n</li></ul>\n'
662 % (before,p,after))
663
664 def ol(self, before, p, after):
665 if p: p="<p>%s</p>" % strip(ctag(p))
666 return ('%s<ol><li>%s\n%s\n</li></ol>\n'
667 % (before,p,after))
668
669 def dl(self, before, t, d, after):
670 return ('%s<dl><dt>%s</dt><dd><p>%s</p>\n%s\n</dd></dl>\n'
671 % (before,ctag(t),ctag(d),after))
672
673 def head(self, before, t, level, d):
674 if level > 0 and level < 6:
675 return ('%s<h%d>%s</h%d>\n%s\n'
676 % (before,level,strip(ctag(t)),level,d))
677
678 t="<p><strong>%s</strong><p>" % strip(ctag(t))
679 return ('%s<dl><dt>%s\n</dt><dd>%s\n</dd></dl>\n'
680 % (before,t,d))
681
682 def normal(self,before,p,after):
683 return '%s<p>%s</p>\n%s\n' % (before,ctag(p),after)
684
685 def pre(self,structure,tagged=0):
686 if not structure: return ''
687 if tagged:
688 r=''
689 else:
690 r='<PRE>\n'
691 for s in structure:
692 r="%s%s\n\n%s" % (r,html_quote(s[0]),self.pre(s[1],1))
693 if not tagged: r=r+'</PRE>\n'
694 return r
695
696 def table(self,before,table,after):
697 return '%s<p>%s</p>\n%s\n' % (before,ctag(table),after)
698
699 def _str(self,structure,level,
700 # Static
701 bullet=ts_regex.compile('[ \t\n]*[o*-][ \t\n]+\([^\0]*\)'
702 ).match_group,
703 example=ts_regex.compile('[\0- ]examples?:[\0- ]*$'
704 ).search,
705 dl=ts_regex.compile('\([^\n]+\)[ \t]+--[ \t\n]+\([^\0]*\)'
706 ).match_group,
707 nl=ts_regex.compile('\n').search,
708 ol=ts_regex.compile(
709 '[ \t]*\(\([0-9]+\|[a-zA-Z]+\)[.)]\)+[ \t\n]+\([^\0]*\|$\)'
710 ).match_group,
711 olp=ts_regex.compile('[ \t]*([0-9]+)[ \t\n]+\([^\0]*\|$\)'
712 ).match_group,
713 ):
714 r=''
715 for s in structure:
716
717 ts_results = bullet(s[0], (1,))
718 if ts_results:
719 p = ts_results[1]
720 if s[0][-2:]=='::' and s[1]: ps=self.pre(s[1])
721 else: ps=self._str(s[1],level)
722 r=self.ul(r,p,ps)
723 continue
724 ts_results = ol(s[0], (3,))
725 if ts_results:
726 p = ts_results[1]
727 if s[0][-2:]=='::' and s[1]: ps=self.pre(s[1])
728 else: ps=self._str(s[1],level)
729 r=self.ol(r,p,ps)
730 continue
731 ts_results = olp(s[0], (1,))
732 if ts_results:
733 p = ts_results[1]
734 if s[0][-2:]=='::' and s[1]: ps=self.pre(s[1])
735 else: ps=self._str(s[1],level)
736 r=self.ol(r,p,ps)
737 continue
738 ts_results = dl(s[0], (1,2))
739 if ts_results:
740 t,d = ts_results[1]
741 r=self.dl(r,t,d,self._str(s[1],level))
742 continue
743 if example(s[0]) >= 0 and s[1]:
744 # Introduce an example, using pre tags:
745 r=self.normal(r,s[0],self.pre(s[1]))
746 continue
747 if s[0][-2:]=='::' and s[1]:
748 # Introduce an example, using pre tags:
749 r=self.normal(r,s[0][:-1],self.pre(s[1]))
750 continue
751 if table.create(s[0]):
752 ## table support.
753 r=self.table(r,table.html(),self._str(s[1],level))
754 continue
755 else:
756
757 if nl(s[0]) < 0 and s[1] and s[0][-1:] != ':':
758 # Treat as a heading
759 t=s[0]
760 r=self.head(r,t,level,
761 self._str(s[1],level and level+1))
762 else:
763 r=self.normal(r,s[0],self._str(s[1],level))
764 return r
765
766
767def html_quote(v,
768 character_entities=(
769 (regex.compile('&'), '&amp;'),
770 (regex.compile("<"), '&lt;' ),
771 (regex.compile(">"), '&gt;' ),
772 (regex.compile('"'), '&quot;')
773 )): #"
774 text=str(v)
775 for re,name in character_entities:
776 text=gsub(re,name,text)
777 return text
778
779def html_with_references(text, level=1):
780 text = gsub(
781 '[\0\n].. \[\([-_0-9_a-zA-Z-]+\)\]',
782 '\n <a name="\\1">[\\1]</a>',
783 text)
784
785 text = gsub(
786 '\([\0- ,]\)\[\([0-9_a-zA-Z-]+\)\]\([\0- ,.:]\)',
787 '\\1<a href="#\\2">[\\2]</a>\\3',
788 text)
789
790 text = gsub(
791 '\([\0- ,]\)\[\([^]]+\)\.html\]\([\0- ,.:]\)',
792 '\\1<a href="\\2.html">[\\2]</a>\\3',
793 text)
794
795 return HTML(text,level=level)
796
797
798def main():
799 import sys, getopt
800
801 opts,args=getopt.getopt(sys.argv[1:],'tw')
802
803 if args:
804 [infile]=args
805 s=open(infile,'r').read()
806 else:
807 s=sys.stdin.read()
808
809 if opts:
810
811 if filter(lambda o: o[0]=='-w', opts):
812 print 'Content-Type: text/html\n'
813
814 if s[:2]=='#!':
815 s=ts_regex.sub('^#![^\n]+','',s)
816
817 r=ts_regex.compile('\([\0-\n]*\n\)')
818 ts_results = r.match_group(s, (1,))
819 if ts_results:
820 s=s[len(ts_results[1]):]
821 s=str(html_with_references(s))
822 if s[:4]=='<h1>':
823 t=s[4:find(s,'</h1>')]
824 s='''<html><head><title>%s</title>
825 </head><body>
826 %s
827 </body></html>
828 ''' % (t,s)
829 print s
830 else:
831 print html_with_references(s)
832
833if __name__=="__main__": main()