]>
git.saurik.com Git - wxWidgets.git/blob - wxPython/samples/stxview/StructuredText/ClassicStructuredText.py
   1 #! /usr/bin/env python -- # -*- python -*- 
   2 ############################################################################## 
   4 # Zope Public License (ZPL) Version 1.0 
   5 # ------------------------------------- 
   7 # Copyright (c) Digital Creations.  All rights reserved. 
   9 # This license has been certified as Open Source(tm). 
  11 # Redistribution and use in source and binary forms, with or without 
  12 # modification, are permitted provided that the following conditions are 
  15 # 1. Redistributions in source code must retain the above copyright 
  16 #    notice, this list of conditions, and the following disclaimer. 
  18 # 2. Redistributions in binary form must reproduce the above copyright 
  19 #    notice, this list of conditions, and the following disclaimer in 
  20 #    the documentation and/or other materials provided with the 
  23 # 3. Digital Creations requests that attribution be given to Zope 
  24 #    in any manner possible. Zope includes a "Powered by Zope" 
  25 #    button that is installed by default. While it is not a license 
  26 #    violation to remove this button, it is requested that the 
  27 #    attribution remain. A significant investment has been put 
  28 #    into Zope, and this effort will continue if the Zope community 
  29 #    continues to grow. This is one way to assure that growth. 
  31 # 4. All advertising materials and documentation mentioning 
  32 #    features derived from or use of this software must display 
  33 #    the following acknowledgement: 
  35 #      "This product includes software developed by Digital Creations 
  36 #      for use in the Z Object Publishing Environment 
  37 #      (http://www.zope.org/)." 
  39 #    In the event that the product being advertised includes an 
  40 #    intact Zope distribution (with copyright and license included) 
  41 #    then this clause is waived. 
  43 # 5. Names associated with Zope or Digital Creations must not be used to 
  44 #    endorse or promote products derived from this software without 
  45 #    prior written permission from Digital Creations. 
  47 # 6. Modified redistributions of any form whatsoever must retain 
  48 #    the following acknowledgment: 
  50 #      "This product includes software developed by Digital Creations 
  51 #      for use in the Z Object Publishing Environment 
  52 #      (http://www.zope.org/)." 
  54 #    Intact (re-)distributions of any official Zope release do not 
  55 #    require an external acknowledgement. 
  57 # 7. Modifications are encouraged but must be packaged separately as 
  58 #    patches to official Zope releases.  Distributions that do not 
  59 #    clearly separate the patches from the original work must be clearly 
  60 #    labeled as unofficial distributions.  Modifications which do not 
  61 #    carry the name Zope may be packaged in any form, as long as they 
  62 #    conform to all of the clauses above. 
  67 #   THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY 
  68 #   EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  69 #   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 
  70 #   PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL DIGITAL CREATIONS OR ITS 
  71 #   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
  72 #   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
  73 #   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 
  74 #   USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 
  75 #   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
  76 #   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 
  77 #   OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
  81 # This software consists of contributions made by Digital Creations and 
  82 # many individuals on behalf of Digital Creations.  Specific 
  83 # attributions are listed in the accompanying credits file. 
  85 ############################################################################## 
  86 '''Structured Text Manipulation 
  88 Parse a structured text string into a form that can be used with  
  89 structured formats, like html. 
  91 Structured text is text that uses indentation and simple 
  92 symbology to indicate the structure of a document.   
  94 A structured string consists of a sequence of paragraphs separated by 
  95 one or more blank lines.  Each paragraph has a level which is defined 
  96 as the minimum indentation of the paragraph.  A paragraph is a 
  97 sub-paragraph of another paragraph if the other paragraph is the last 
  98 preceding paragraph that has a lower level. 
 100 Special symbology is used to indicate special constructs: 
 102 - A single-line paragraph whose immediately succeeding paragraphs are lower 
 103   level is treated as a header. 
 105 - A paragraph that begins with a '-', '*', or 'o' is treated as an 
 106   unordered list (bullet) element. 
 108 - A paragraph that begins with a sequence of digits followed by a 
 109   white-space character is treated as an ordered list element. 
 111 - A paragraph that begins with a sequence of sequences, where each 
 112   sequence is a sequence of digits or a sequence of letters followed 
 113   by a period, is treated as an ordered list element. 
 115 - A paragraph with a first line that contains some text, followed by 
 116   some white-space and '--' is treated as 
 117   a descriptive list element. The leading text is treated as the 
 120 - Sub-paragraphs of a paragraph that ends in the word 'example' or the 
 121   word 'examples', or '::' is treated as example code and is output as is. 
 123 - Text enclosed single quotes (with white-space to the left of the 
 124   first quote and whitespace or punctuation to the right of the second quote) 
 125   is treated as example code. 
 127 - Text surrounded by '*' characters (with white-space to the left of the 
 128   first '*' and whitespace or punctuation to the right of the second '*') 
 131 - Text surrounded by '**' characters (with white-space to the left of the 
 132   first '**' and whitespace or punctuation to the right of the second '**') 
 135 - Text surrounded by '_' underscore characters (with whitespace to the left  
 136   and whitespace or punctuation to the right) is made underlined. 
 138 - Text encloded by double quotes followed by a colon, a URL, and concluded 
 139   by punctuation plus white space, *or* just white space, is treated as a 
 140   hyper link. For example: 
 142     "Zope":http://www.zope.org/ is ... 
 144   Is interpreted as '<a href="http://www.zope.org/">Zope</a> is ....' 
 145   Note: This works for relative as well as absolute URLs. 
 147 - Text enclosed by double quotes followed by a comma, one or more spaces, 
 148   an absolute URL and concluded by punctuation plus white space, or just 
 149   white space, is treated as a hyper link. For example:  
 151     "mail me", mailto:amos@digicool.com. 
 153   Is interpreted as '<a href="mailto:amos@digicool.com">mail me</a>.'  
 155 - Text enclosed in brackets which consists only of letters, digits, 
 156   underscores and dashes is treated as hyper links within the document. 
 159     As demonstrated by Smith [12] this technique is quite effective. 
 161   Is interpreted as '... by Smith <a href="#12">[12]</a> this ...'. Together 
 162   with the next rule this allows easy coding of references or end notes. 
 164 - Text enclosed in brackets which is preceded by the start of a line, two 
 165   periods and a space is treated as a named link. For example: 
 167     .. [12] "Effective Techniques" Smith, Joe ...  
 169   Is interpreted as '<a name="12">[12]</a> "Effective Techniques" ...'. 
 170   Together with the previous rule this allows easy coding of references or 
 174 - A paragraph that has blocks of text enclosed in '||' is treated as a 
 175   table. The text blocks correspond to table cells and table rows are 
 176   denoted by newlines. By default the cells are center aligned. A cell 
 177   can span more than one column by preceding a block of text with an 
 178   equivalent number of cell separators '||'. Newlines and '|' cannot 
 179   be a part of the cell text. For example: 
 181       |||| **Ingredients** || 
 182       || *Name* || *Amount* || 
 188     <TABLE BORDER=1 CELLPADDING=2> 
 190       <TD ALIGN=CENTER COLSPAN=2> <strong>Ingredients</strong> </TD> 
 193       <TD ALIGN=CENTER COLSPAN=1> <em>Name</em> </TD> 
 194       <TD ALIGN=CENTER COLSPAN=1> <em>Amount</em> </TD> 
 197       <TD ALIGN=CENTER COLSPAN=1>Spam</TD> 
 198       <TD ALIGN=CENTER COLSPAN=1>10</TD> 
 201       <TD ALIGN=CENTER COLSPAN=1>Eggs</TD> 
 202       <TD ALIGN=CENTER COLSPAN=1>3</TD> 
 210 from ts_regex 
import gsub
 
 211 from string 
import split
, join
, strip
, find
 
 215 def untabify(aString
, 
 216              indent_tab
=ts_regex
.compile('\(\n\|^\)\( *\)\t').search_group
, 
 219     Convert indentation tabs to spaces. 
 224         ts_results 
= indent_tab(rest
, (1,2)) 
 226             start
, grps 
= ts_results
 
 229             result
=result
+rest
[:start
] 
 230             rest
="\n%s%s" % (' ' * ((indent
/8+1)*8), 
 231                              rest
[start
+indent
+1+lnl
:]) 
 235 def indent(aString
, indent
=2): 
 236     """Indent a string the given number of spaces""" 
 237     r
=split(untabify(aString
),'\n') 
 239     if not r
[-1]: del r
[-1] 
 241     return "%s%s\n" % (tab
,join(r
,'\n'+tab
)) 
 243 def reindent(aString
, indent
=2, already_untabified
=0): 
 244     "reindent a block of text, so that the minimum indent is as given" 
 246     if not already_untabified
: aString
=untabify(aString
) 
 248     l
=indent_level(aString
)[0] 
 249     if indent
==l
: return aString
 
 257         for s 
in split(aString
,'\n'): append(tab
+s
) 
 260         for s 
in split(aString
,'\n'): append(s
[l
:]) 
 264 def indent_level(aString
, 
 265                  indent_space
=ts_regex
.compile('\n\( *\)').search_group
, 
 268     Find the minimum indentation for a string, not counting blank lines. 
 275         ts_results 
= indent_space(text
, (1,2), start
) 
 277             start
, grps 
= ts_results
 
 280             if start 
< l 
and text
[start
] != '\n':       # Skip blank lines 
 281                 if not i
: return (0,aString
) 
 282                 if i 
< indent
: indent 
= i
 
 284             return (indent
,aString
) 
 286 def paragraphs(list,start
): 
 290     while i 
< l 
and list[i
][0] > level
: i
=i
+1 
 294     if not list: return [] 
 299         sublen
=paragraphs(list,i
) 
 301         r
.append((list[i
-1][1],structure(list[i
:i
+sublen
]))) 
 307     CELL
='  <TD ALIGN=CENTER COLSPAN=%i>%s</TD>\n' 
 308     ROW
=' <TR>\n%s </TR>\n' 
 309     TABLE
='\n<TABLE BORDER=1 CELLPADDING=2>\n%s</TABLE>' 
 311     def create(self
,aPar
, 
 312         td_reg
=re
.compile(r
'[ \t\n]*\|\|([^\0x00|]*)') 
 314         '''parses a table and returns nested list representing the 
 317         text
=filter(None,split(aPar
,'\n')) 
 321                 mo 
=  td_reg
.match(line
) 
 324                 row
.append(mo
.group(1)) 
 325                 if pos
==len(line
):break 
 327             self
.table
.append(row
) 
 331         '''Creates an HTML representation of table''' 
 333         for row 
in self
.table
: 
 341                     htmlrow
.append(self
.CELL
%(colspan
,cell
)) 
 343             htmltable
.append(self
.ROW
%join(htmlrow
,'')) 
 344         return self
.TABLE
%join(htmltable
,'') 
 348 class StructuredText
: 
 350     """Model text as structured collection of paragraphs. 
 352     Structure is implied by the indentation level. 
 354     This class is intended as a base classes that do actual text 
 358     def __init__(self
, aStructuredString
, level
=0, 
 359                  paragraph_divider
=regex
.compile('\(\r?\n *\)+\r?\n'), 
 361         '''Convert a structured text string into a structured text object. 
 365           aStructuredString -- The string to be parsed. 
 366           level -- The level of top level headings to be created. 
 370         pat 
= ' \"([%s0-9-_,./?=@~&]*)\":' % string
.letters
+ \
 
 371               '([-:%s0-9_,./?=@#~&]*?)' % string
.letters 
+ \
 
 374         p_reg 
= re
.compile(pat
,re
.M
) 
 376         aStructuredString 
= p_reg
.sub(r
'<a href="\2">\1</a>\3 ' , aStructuredString
) 
 378         pat 
= ' \"([%s0-9-_,./?=@~&]*)\", ' % string
.letters
+ \
 
 379               '([-:%s0-9_,./?=@#~&]*?)' % string
.letters 
+ \
 
 382         p_reg 
= re
.compile(pat
,re
.M
) 
 384         aStructuredString 
= p_reg
.sub(r
'<a href="\2">\1</a>\3 ' , aStructuredString
) 
 387         protoless 
= find(aStructuredString
, '<a href=":') 
 389             aStructuredString 
= re
.sub('<a href=":', '<a href="', 
 393         paragraphs
=ts_regex
.split(untabify(aStructuredString
), 
 395         paragraphs
=map(indent_level
,paragraphs
) 
 397         self
.structure
=structure(paragraphs
) 
 401         return str(self
.structure
) 
 404 ctag_prefix
=r
'([\x00- \\(]|^)'  
 405 ctag_suffix
=r
'([\x00- ,.:;!?\\)]|$)'          
 406 ctag_middle
=r
'[%s]([^\x00- %s][^%s]*[^\x00- %s]|[^%s])[%s]'  
 407 ctag_middl2
=r
'[%s][%s]([^\x00- %s][^%s]*[^\x00- %s]|[^%s])[%s][%s]'     
 411              ctag_prefix
+(ctag_middle 
% (("*",)*6) )+ctag_suffix
), 
 413              ctag_prefix
+(ctag_middl2 
% (("*",)*8))+ctag_suffix
), 
 415              ctag_prefix
+(ctag_middle 
% (("_",)*6) )+ctag_suffix
), 
 417              ctag_prefix
+(ctag_middle 
% (("\'",)*6))+ctag_suffix
), 
 420     s
=strong
.sub(r
'\1<strong>\2</strong>\3',s
) 
 421     s
=under
.sub( r
'\1<u>\2</u>\3',s
) 
 422     s
=code
.sub(  r
'\1<code>\2</code>\3',s
) 
 423     s
=em
.sub(    r
'\1<em>\2</em>\3',s
) 
 426 class HTML(StructuredText
): 
 429     An HTML structured text formatter. 
 433                 extra_dl
=re
.compile("</dl>\n<dl>"), 
 434                 extra_ul
=re
.compile("</ul>\n<ul>"), 
 435                 extra_ol
=re
.compile("</ol>\n<ol>"), 
 438         Return an HTML string representation of the structured text data. 
 441         s
=self
._str
(self
.structure
,self
.level
) 
 442         s
=extra_dl
.sub('\n',s
) 
 443         s
=extra_ul
.sub('\n',s
) 
 444         s
=extra_ol
.sub('\n',s
) 
 447     def ul(self
, before
, p
, after
): 
 448         if p
: p
="<p>%s</p>" % strip(ctag(p
)) 
 449         return ('%s<ul><li>%s\n%s\n</li></ul>\n' 
 452     def ol(self
, before
, p
, after
): 
 453         if p
: p
="<p>%s</p>" % strip(ctag(p
)) 
 454         return ('%s<ol><li>%s\n%s\n</li></ol>\n' 
 457     def dl(self
, before
, t
, d
, after
): 
 458         return ('%s<dl><dt>%s</dt><dd><p>%s</p>\n%s\n</dd></dl>\n' 
 459                 % (before
,ctag(t
),ctag(d
),after
)) 
 461     def head(self
, before
, t
, level
, d
): 
 462         if level 
> 0 and level 
< 6: 
 463             return ('%s<h%d>%s</h%d>\n%s\n' 
 464                     % (before
,level
,strip(ctag(t
)),level
,d
)) 
 466         t
="<p><strong>%s</strong></p>" % strip(ctag(t
)) 
 467         return ('%s<dl><dt>%s\n</dt><dd>%s\n</dd></dl>\n' 
 470     def normal(self
,before
,p
,after
): 
 471         return '%s<p>%s</p>\n%s\n' % (before
,ctag(p
),after
) 
 473     def pre(self
,structure
,tagged
=0): 
 474         if not structure
: return '' 
 480             r
="%s%s\n\n%s" % (r
,html_quote(s
[0]),self
.pre(s
[1],1)) 
 481         if not tagged
: r
=r
+'</PRE>\n' 
 484     def table(self
,before
,table
,after
): 
 485         return '%s<p>%s</p>\n%s\n' % (before
,ctag(table
),after
) 
 487     def _str(self
,structure
,level
, 
 489              bullet
=ts_regex
.compile('[ \t\n]*[o*-][ \t\n]+\([^\0]*\)' 
 491              example
=ts_regex
.compile('[\0- ]examples?:[\0- ]*$' 
 493              dl
=ts_regex
.compile('\([^\n]+\)[ \t]+--[ \t\n]+\([^\0]*\)' 
 495              nl
=ts_regex
.compile('\n').search
, 
 497                  '[ \t]*\(\([0-9]+\|[%s]+\)[.)]\)+[ \t\n]+\([^\0]*\|$\)' % string
.letters
 
 499              olp
=ts_regex
.compile('[ \t]*([0-9]+)[ \t\n]+\([^\0]*\|$\)' 
 505             ts_results 
= bullet(s
[0], (1,)) 
 508                 if s
[0][-2:]=='::' and s
[1]: ps
=self
.pre(s
[1]) 
 509                 else: ps
=self
._str
(s
[1],level
) 
 512             ts_results 
= ol(s
[0], (3,)) 
 515                 if s
[0][-2:]=='::' and s
[1]: ps
=self
.pre(s
[1]) 
 516                 else: ps
=self
._str
(s
[1],level
) 
 519             ts_results 
= olp(s
[0], (1,)) 
 522                 if s
[0][-2:]=='::' and s
[1]: ps
=self
.pre(s
[1]) 
 523                 else: ps
=self
._str
(s
[1],level
) 
 526             ts_results 
= dl(s
[0], (1,2)) 
 529                 r
=self
.dl(r
,t
,d
,self
._str
(s
[1],level
)) 
 531             if example(s
[0]) >= 0 and s
[1]: 
 532                 # Introduce an example, using pre tags: 
 533                 r
=self
.normal(r
,s
[0],self
.pre(s
[1])) 
 535             if s
[0][-2:]=='::' and s
[1]: 
 536                 # Introduce an example, using pre tags: 
 537                 r
=self
.normal(r
,s
[0][:-1],self
.pre(s
[1])) 
 539             if table
.create(s
[0]): 
 541                 r
=self
.table(r
,table
.html(),self
._str
(s
[1],level
)) 
 545                 if nl(s
[0]) < 0 and s
[1] and s
[0][-1:] != ':': 
 548                     r
=self
.head(r
,t
,level
, 
 549                                 self
._str
(s
[1],level 
and level
+1)) 
 551                     r
=self
.normal(r
,s
[0],self
._str
(s
[1],level
)) 
 557                        (re
.compile('&'), '&'), 
 558                        (re
.compile("<"), '<' ), 
 559                        (re
.compile(">"), '>' ), 
 560                        (re
.compile('"'), '"') 
 563         for re
,name 
in character_entities
: 
 564             text
=re
.sub(name
,text
) 
 567 def html_with_references(text
, level
=1): 
 569         r
'[\0\n]\.\. \[([0-9_%s-]+)\]' % string
.letters
, 
 570         r
'\n  <a name="\1">[\1]</a>', 
 574         r
'([\x00- ,])\[(?P<ref>[0-9_%s-]+)\]([\x00- ,.:])'   % string
.letters
, 
 575         r
'\1<a href="#\2">[\2]</a>\3', 
 579         r
'([\0- ,])\[([^]]+)\.html\]([\0- ,.:])', 
 580         r
'\1<a href="\2.html">[\2]</a>\3', 
 583     return HTML(text
,level
=level
) 
 589     opts
,args
=getopt
.getopt(sys
.argv
[1:],'twl') 
 593         s
=open(infile
,'r').read() 
 599         if filter(lambda o
: o
[0]=='-w', opts
): 
 600             print 'Content-Type: text/html\n' 
 602         if filter(lambda o
: o
[0]=='-l', opts
): 
 604             locale
.setlocale(locale
.LC_ALL
,"") 
 607             s
=re
.sub('^#![^\n]+','',s
) 
 609         mo 
= re
.compile('([\0-\n]*\n)').match(s
) 
 611             s 
= s
[len(mo
.group(0)) :] 
 613         s
=str(html_with_references(s
)) 
 615             t
=s
[4:find(s
,'</h1>')] 
 616             s
='''<html><head><title>%s</title> 
 623         print html_with_references(s
) 
 625 if __name__
=="__main__": main()