From ddfc587a2ea899f654d940c21f102316a39985bd Mon Sep 17 00:00:00 2001 From: Robin Dunn <robin@alldunn.com> Date: Fri, 4 May 2001 18:28:27 +0000 Subject: [PATCH] Got a new version of StructuredText from Zope's CVS. git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@9995 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775 --- .../StructuredText/ClassicDocumentClass.py | 213 +++-- .../StructuredText/ClassicStructuredText.py | 625 ++++++++++++++ .../stxview/StructuredText/DocBookClass.py | 415 +++++----- .../stxview/StructuredText/DocumentClass.py | 417 +++++++--- .../stxview/StructuredText/HTMLClass.py | 51 +- .../stxview/StructuredText/HTMLWithImages.py | 15 +- .../samples/stxview/StructuredText/MML.py | 170 ---- wxPython/samples/stxview/StructuredText/ST.py | 11 +- .../samples/stxview/StructuredText/STDOM.py | 4 +- .../samples/stxview/StructuredText/STNG.txt | 2 +- .../stxview/StructuredText/STletters.py | 15 + .../stxview/StructuredText/StructuredText.py | 765 +----------------- .../stxview/StructuredText/__init__.py | 2 +- .../stxview/StructuredText/ts_regex.py | 215 ----- wxPython/samples/stxview/stxview.py | 8 +- 15 files changed, 1360 insertions(+), 1568 deletions(-) create mode 100644 wxPython/samples/stxview/StructuredText/ClassicStructuredText.py delete mode 100644 wxPython/samples/stxview/StructuredText/MML.py create mode 100644 wxPython/samples/stxview/StructuredText/STletters.py delete mode 100644 wxPython/samples/stxview/StructuredText/ts_regex.py diff --git a/wxPython/samples/stxview/StructuredText/ClassicDocumentClass.py b/wxPython/samples/stxview/StructuredText/ClassicDocumentClass.py index 23b73d6294..69fc9c81bb 100644 --- a/wxPython/samples/stxview/StructuredText/ClassicDocumentClass.py +++ b/wxPython/samples/stxview/StructuredText/ClassicDocumentClass.py @@ -1,24 +1,24 @@ ############################################################################## -# +# # Zope Public License (ZPL) Version 1.0 # ------------------------------------- -# +# # Copyright (c) Digital Creations. All rights reserved. -# +# # This license has been certified as Open Source(tm). -# +# # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: -# +# # 1. Redistributions in source code must retain the above copyright # notice, this list of conditions, and the following disclaimer. -# +# # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in # the documentation and/or other materials provided with the # distribution. -# +# # 3. Digital Creations requests that attribution be given to Zope # in any manner possible. Zope includes a "Powered by Zope" # button that is installed by default. While it is not a license @@ -26,43 +26,43 @@ # attribution remain. A significant investment has been put # into Zope, and this effort will continue if the Zope community # continues to grow. This is one way to assure that growth. -# +# # 4. All advertising materials and documentation mentioning # features derived from or use of this software must display # the following acknowledgement: -# +# # "This product includes software developed by Digital Creations # for use in the Z Object Publishing Environment # (http://www.zope.org/)." -# +# # In the event that the product being advertised includes an # intact Zope distribution (with copyright and license included) # then this clause is waived. -# +# # 5. Names associated with Zope or Digital Creations must not be used to # endorse or promote products derived from this software without # prior written permission from Digital Creations. -# +# # 6. Modified redistributions of any form whatsoever must retain # the following acknowledgment: -# +# # "This product includes software developed by Digital Creations # for use in the Z Object Publishing Environment # (http://www.zope.org/)." -# +# # Intact (re-)distributions of any official Zope release do not # require an external acknowledgement. -# +# # 7. Modifications are encouraged but must be packaged separately as # patches to official Zope releases. Distributions that do not # clearly separate the patches from the original work must be clearly # labeled as unofficial distributions. Modifications which do not # carry the name Zope may be packaged in any form, as long as they # conform to all of the clauses above. -# -# +# +# # Disclaimer -# +# # THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY # EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -75,16 +75,17 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT # OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. -# -# +# +# # This software consists of contributions made by Digital Creations and # many individuals on behalf of Digital Creations. Specific # attributions are listed in the accompanying credits file. -# +# ############################################################################## import re, ST, STDOM from string import split, join, replace, expandtabs, strip, find +from STletters import letters,lettpunc,punctuations StringType=type('') ListType=type([]) @@ -116,7 +117,7 @@ class StructuredTextDescriptionBody(ST.StructuredTextParagraph): class StructuredTextDescription(ST.StructuredTextParagraph): """Represents a section of a document with a title and a body""" - + def __init__(self, title, src, subs, **kw): apply(ST.StructuredTextParagraph.__init__, (self, src, subs), kw) self._title=title @@ -138,6 +139,12 @@ class StructuredTextSection(ST.StructuredTextParagraph): (self, StructuredTextSectionTitle(src), subs), kw) + def getColorizableTexts(self): + return self._src.getColorizableTexts() + + def setColorizableTexts(self,src): + self._src.setColorizableTexts(src) + # a StructuredTextTable holds StructuredTextRows class StructuredTextTable(ST.StructuredTextDocument): """ @@ -146,27 +153,27 @@ class StructuredTextTable(ST.StructuredTextDocument): EX rows = [[('row 1:column1',1)],[('row2:column1',1)]] """ - + def __init__(self, rows, src, subs, **kw): apply(ST.StructuredTextDocument.__init__,(self,subs),kw) self._rows = [] for row in rows: if row: self._rows.append(StructuredTextRow(row,kw)) - + def getRows(self): return [self._rows] - + def _getRows(self): return self.getRows() - + def getColorizableTexts(self): """ return a tuple where each item is a column/cell's contents. The tuple, result, will be of this format. ("r1 col1", "r1=col2", "r2 col1", "r2 col2") """ - + #result = () result = [] for row in self._rows: @@ -174,7 +181,7 @@ class StructuredTextTable(ST.StructuredTextDocument): #result = result[:] + (column.getColorizableTexts(),) result.append(column.getColorizableTexts()[0]) return result - + def setColorizableTexts(self,texts): """ texts is going to a tuple where each item is the @@ -186,35 +193,35 @@ class StructuredTextTable(ST.StructuredTextDocument): for column_index in range(len(self._rows[row_index]._columns)): self._rows[row_index]._columns[column_index].setColorizableTexts((texts[0],)) texts = texts[1:] - + def _getColorizableTexts(self): return self.getColorizableTexts() - + def _setColorizableTexts(self): return self.setColorizableTexts() - + # StructuredTextRow holds StructuredTextColumns class StructuredTextRow(ST.StructuredTextDocument): - + def __init__(self,row,kw): """ row is a list of tuples, where each tuple is the raw text for a cell/column and the span - of that cell/column". - EX + of that cell/column". + EX [('this is column one',1), ('this is column two',1)] """ - + apply(ST.StructuredTextDocument.__init__,(self,[]),kw) self._columns = [] - for column in row: + for column in row: self._columns.append(StructuredTextColumn(column[0],column[1],kw)) def getColumns(self): return [self._columns] def _getColumns(self): return [self._columns] - + # this holds the raw text of a table cell class StructuredTextColumn(ST.StructuredTextParagraph): """ @@ -223,19 +230,19 @@ class StructuredTextColumn(ST.StructuredTextParagraph): thus a StructuredTextParagraph. A StructuredTextColumn also holds the span of its column """ - + def __init__(self,text,span,kw): apply(ST.StructuredTextParagraph.__init__,(self,text,[]),kw) self._span = span - + def getSpan(self): return self._span - + def _getSpan(self): return self._span - + class StructuredTextMarkup(STDOM.Element): - + def __init__(self, v, **kw): self._value=v self._attributes=kw.keys() @@ -269,7 +276,7 @@ class StructuredTextUnderline(StructuredTextMarkup): pass class StructuredTextLink(StructuredTextMarkup): "A simple hyperlink" -class DocumentClass: +class DocumentClass: """ Class instance calls [ex.=> x()] require a structured text structure. Doc will then parse each paragraph in the structure @@ -316,10 +323,10 @@ class DocumentClass: """ Parse accepts a raw_string, an expr to test the raw_string, and the raw_string's subparagraphs. - - Parse will continue to search through raw_string until - all instances of expr in raw_string are found. - + + Parse will continue to search through raw_string until + all instances of expr in raw_string are found. + If no instances of expr are found, raw_string is returned. Otherwise a list of substrings and instances is returned """ @@ -351,10 +358,10 @@ class DocumentClass: raw_string = raw_string[end:len(raw_string)] if not tmp: return raw_string # nothing found - + if raw_string: append(raw_string) elif len(tmp)==1: return tmp[0] - + return tmp @@ -386,7 +393,7 @@ class DocumentClass: for s in str.getColorizableTexts(): color(s, (text_type,)) a(s) - + str.setColorizableTexts(r) return str @@ -396,11 +403,11 @@ class DocumentClass: st=type('')): result=[] for paragraph in raw_paragraphs: - + if paragraph.getNodeName() != 'StructuredTextParagraph': result.append(paragraph) continue - + for pt in self.paragraph_types: if type(pt) is st: # grab the corresponding function @@ -428,19 +435,20 @@ class DocumentClass: result.append(paragraph) return result - + def doc_table(self,paragraph, expr = re.compile('(\s*)([||]+)').match): + #print "paragraph=>", type(paragraph), paragraph, paragraph._src text = paragraph.getColorizableTexts()[0] m = expr(text) - + if not (m): return None rows = [] - + # initial split for row in split(text,"\n"): - rows.append(row) - + rows.append(row) + # clean up the rows for index in range(len(rows)): tmp = [] @@ -458,30 +466,30 @@ class DocumentClass: for index in range(len(rows)): l = len(rows[index])-1 rows[index] = rows[index][:l] - + result = [] for row in rows: cspan = 0 tmp = [] for item in row: if item: - tmp.append(item,cspan) + tmp.append((item,cspan)) cspan = 0 else: cspan = cspan + 1 result.append(tmp) - + subs = paragraph.getSubparagraphs() indent=paragraph.indent return StructuredTextTable(result,text,subs,indent=paragraph.indent) - + def doc_bullet(self, paragraph, expr = re.compile('\s*[-*o]\s+').match): top=paragraph.getColorizableTexts()[0] m=expr(top) if not m: return None - + subs=paragraph.getSubparagraphs() if top[-2:]=='::': subs=[StructuredTextExample(subs)] @@ -493,17 +501,17 @@ class DocumentClass: def doc_numbered( self, paragraph, - expr = re.compile('(\s*[a-zA-Z]+\.)|(\s*[0-9]+\.)|(\s*[0-9]+\s+)').match): - + expr = re.compile('(\s*[%s]+\.)|(\s*[0-9]+\.)|(\s*[0-9]+\s+)' % letters).match): + # This is the old expression. It had a nasty habit # of grabbing paragraphs that began with a single # letter word even if there was no following period. - + #expr = re.compile('\s*' # '(([a-zA-Z]|[0-9]+|[ivxlcdmIVXLCDM]+)\.)*' # '([a-zA-Z]|[0-9]+|[ivxlcdmIVXLCDM]+)\.?' # '\s+').match): - + top=paragraph.getColorizableTexts()[0] m=expr(top) if not m: return None @@ -518,7 +526,7 @@ class DocumentClass: def doc_description( self, paragraph, delim = re.compile('\s+--\s+').search, - nb=re.compile(r'[^\0- ]').search, + nb=re.compile(r'[^\000- ]').search, ): top=paragraph.getColorizableTexts()[0] @@ -542,7 +550,7 @@ class DocumentClass: delim=d) def doc_header(self, paragraph, - expr = re.compile('[ a-zA-Z0-9.:/,-_*<>\?\'\"]+').match + expr = re.compile('[ %s0-9.:/,-_*<>\?\'\"]+' % letters).match ): subs=paragraph.getSubparagraphs() if not subs: return None @@ -562,11 +570,11 @@ class DocumentClass: def doc_literal( self, s, expr=re.compile( - "(?:\s|^)'" # open + "(?:\s|^)'" # open "([^ \t\n\r\f\v']|[^ \t\n\r\f\v'][^\n']*[^ \t\n\r\f\v'])" # contents - "'(?:\s|[,.;:!?]|$)" # close + "'(?:\s|[,.;:!?]|$)" # close ).search): - + r=expr(s) if r: start, end = r.span(1) @@ -576,7 +584,7 @@ class DocumentClass: def doc_emphasize( self, s, - expr = re.compile('\s*\*([ \na-zA-Z0-9.:/;,\'\"\?]+)\*(?!\*|-)').search + expr = re.compile('\s*\*([ \n%s0-9]+)\*(?!\*|-)' % lettpunc).search ): r=expr(s) @@ -585,12 +593,12 @@ class DocumentClass: return (StructuredTextEmphasis(s[start:end]), start-1, end+1) else: return None - + def doc_inner_link(self, s, expr1 = re.compile("\.\.\s*").search, - expr2 = re.compile("\[[a-zA-Z0-9]+\]").search): - + expr2 = re.compile("\[[%s0-9]+\]" % letters).search): + # make sure we dont grab a named link if expr2(s) and expr1(s): start1,end1 = expr1(s).span() @@ -600,17 +608,17 @@ class DocumentClass: return None else: # the .. is somewhere else, ignore it - return (StructuredTextInnerLink(s[start2+1,end2-1],start2,end2)) + return (StructuredTextInnerLink(s[start2+1:end2-1]),start2,end2) return None elif expr2(s) and not expr1(s): start,end = expr2(s).span() return (StructuredTextInnerLink(s[start+1:end-1]),start,end) return None - + def doc_named_link(self, s, - expr=re.compile("(\.\.\s)(\[[a-zA-Z0-9]+\])").search): - + expr=re.compile("(\.\.\s)(\[[%s0-9]+\])" % letters).search): + result = expr(s) if result: start,end = result.span(2) @@ -621,11 +629,11 @@ class DocumentClass: return (StructuredTextNamedLink(str),st,en) #return (StructuredTextNamedLink(s[st:en]),st,en) return None - + def doc_underline(self, s, - expr=re.compile("\_([a-zA-Z0-9\s\.,\?\/]+)\_").search): - + expr=re.compile("\s+\_([0-9%s ]+)\_" % lettpunc).search): + result = expr(s) if result: start,end = result.span(1) @@ -633,10 +641,10 @@ class DocumentClass: return (StructuredTextUnderline(s[start:end]),st,e) else: return None - - def doc_strong(self, + + def doc_strong(self, s, - expr = re.compile('\s*\*\*([ \na-zA-Z0-9.:/;\-,!\?\'\"]+)\*\*').search + expr = re.compile('\s*\*\*([ \n%s0-9]+)\*\*' % lettpunc).search ): r=expr(s) @@ -645,45 +653,32 @@ class DocumentClass: return (StructuredTextStrong(s[start:end]), start-2, end+2) else: return None - + def doc_href( - + self, s, - expr1 = re.compile("(\"[ a-zA-Z0-9\n\-\.\,\;\(\)\/\:\/]+\")(:)([a-zA-Z0-9\:\/\.\~\-]+)([,]*\s*)").search, - expr2 = re.compile('(\"[ a-zA-Z0-9\n\-\.\:\;\(\)\/]+\")([,]+\s+)([a-zA-Z0-9\@\.\,\?\!\/\:\;\-\#]+)(\s*)').search): - - #expr1=re.compile('\"([ a-zA-Z0-9.:/;,\n\~\(\)\-]+)\"' - # ':' - # '([a-zA-Z0-9.:/;,\n\~]+)(?=(\s+|\.|\!|\?))' - # ).search, - #expr2=re.compile('\"([ a-zA-Z0-9./:]+)\"' - # ',\s+' - # '([ a-zA-Z0-9@.:/;]+)(?=(\s+|\.|\!|\?))' - # ).search, - - punctuation = re.compile("[\,\.\?\!\;]+").match + expr1 = re.compile("(\"[ %s0-9\n\-\.\,\;\(\)\/\:\/\*\']+\")(:)([a-zA-Z0-9\@\.\,\?\!\/\:\;\-\#\~]+)([,]*\s*)" % letters).search, + expr2 = re.compile('(\"[ %s0-9\n\-\.\:\;\(\)\/\*\']+\")([,]+\s+)([a-zA-Z0-9\@\.\,\?\!\/\:\;\-\#\~]+)(\s*)' % letters).search): + r=expr1(s) or expr2(s) if r: # need to grab the href part and the # beginning part - + start,e = r.span(1) name = s[start:e] name = replace(name,'"','',2) - #start = start + 1 st,end = r.span(3) - if punctuation(s[end-1:end]): - end = end -1 + + if s[end-1:end] in punctuations: end-=1 link = s[st:end] - #end = end - 1 - + # name is the href title, link is the target # of the href return (StructuredTextLink(name, href=link), start, end) - - #return (StructuredTextLink(s[start:end], href=s[start:end]), - # start, end) + + else: return None diff --git a/wxPython/samples/stxview/StructuredText/ClassicStructuredText.py b/wxPython/samples/stxview/StructuredText/ClassicStructuredText.py new file mode 100644 index 0000000000..b591558f73 --- /dev/null +++ b/wxPython/samples/stxview/StructuredText/ClassicStructuredText.py @@ -0,0 +1,625 @@ +#! /usr/bin/env python -- # -*- python -*- +############################################################################## +# +# Zope Public License (ZPL) Version 1.0 +# ------------------------------------- +# +# Copyright (c) Digital Creations. All rights reserved. +# +# This license has been certified as Open Source(tm). +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# 1. Redistributions in source code must retain the above copyright +# notice, this list of conditions, and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# +# 3. Digital Creations requests that attribution be given to Zope +# in any manner possible. Zope includes a "Powered by Zope" +# button that is installed by default. While it is not a license +# violation to remove this button, it is requested that the +# attribution remain. A significant investment has been put +# into Zope, and this effort will continue if the Zope community +# continues to grow. This is one way to assure that growth. +# +# 4. All advertising materials and documentation mentioning +# features derived from or use of this software must display +# the following acknowledgement: +# +# "This product includes software developed by Digital Creations +# for use in the Z Object Publishing Environment +# (http://www.zope.org/)." +# +# In the event that the product being advertised includes an +# intact Zope distribution (with copyright and license included) +# then this clause is waived. +# +# 5. Names associated with Zope or Digital Creations must not be used to +# endorse or promote products derived from this software without +# prior written permission from Digital Creations. +# +# 6. Modified redistributions of any form whatsoever must retain +# the following acknowledgment: +# +# "This product includes software developed by Digital Creations +# for use in the Z Object Publishing Environment +# (http://www.zope.org/)." +# +# Intact (re-)distributions of any official Zope release do not +# require an external acknowledgement. +# +# 7. Modifications are encouraged but must be packaged separately as +# patches to official Zope releases. Distributions that do not +# clearly separate the patches from the original work must be clearly +# labeled as unofficial distributions. Modifications which do not +# carry the name Zope may be packaged in any form, as long as they +# conform to all of the clauses above. +# +# +# Disclaimer +# +# THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY +# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DIGITAL CREATIONS OR ITS +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# +# This software consists of contributions made by Digital Creations and +# many individuals on behalf of Digital Creations. Specific +# attributions are listed in the accompanying credits file. +# +############################################################################## +'''Structured Text Manipulation + +Parse a structured text string into a form that can be used with +structured formats, like html. + +Structured text is text that uses indentation and simple +symbology to indicate the structure of a document. + +A structured string consists of a sequence of paragraphs separated by +one or more blank lines. Each paragraph has a level which is defined +as the minimum indentation of the paragraph. A paragraph is a +sub-paragraph of another paragraph if the other paragraph is the last +preceding paragraph that has a lower level. + +Special symbology is used to indicate special constructs: + +- A single-line paragraph whose immediately succeeding paragraphs are lower + level is treated as a header. + +- A paragraph that begins with a '-', '*', or 'o' is treated as an + unordered list (bullet) element. + +- A paragraph that begins with a sequence of digits followed by a + white-space character is treated as an ordered list element. + +- A paragraph that begins with a sequence of sequences, where each + sequence is a sequence of digits or a sequence of letters followed + by a period, is treated as an ordered list element. + +- A paragraph with a first line that contains some text, followed by + some white-space and '--' is treated as + a descriptive list element. The leading text is treated as the + element title. + +- Sub-paragraphs of a paragraph that ends in the word 'example' or the + word 'examples', or '::' is treated as example code and is output as is. + +- Text enclosed single quotes (with white-space to the left of the + first quote and whitespace or punctuation to the right of the second quote) + is treated as example code. + +- Text surrounded by '*' characters (with white-space to the left of the + first '*' and whitespace or punctuation to the right of the second '*') + is emphasized. + +- Text surrounded by '**' characters (with white-space to the left of the + first '**' and whitespace or punctuation to the right of the second '**') + is made strong. + +- Text surrounded by '_' underscore characters (with whitespace to the left + and whitespace or punctuation to the right) is made underlined. + +- Text encloded by double quotes followed by a colon, a URL, and concluded + by punctuation plus white space, *or* just white space, is treated as a + hyper link. For example: + + "Zope":http://www.zope.org/ is ... + + Is interpreted as '<a href="http://www.zope.org/">Zope</a> is ....' + Note: This works for relative as well as absolute URLs. + +- Text enclosed by double quotes followed by a comma, one or more spaces, + an absolute URL and concluded by punctuation plus white space, or just + white space, is treated as a hyper link. For example: + + "mail me", mailto:amos@digicool.com. + + Is interpreted as '<a href="mailto:amos@digicool.com">mail me</a>.' + +- Text enclosed in brackets which consists only of letters, digits, + underscores and dashes is treated as hyper links within the document. + For example: + + As demonstrated by Smith [12] this technique is quite effective. + + Is interpreted as '... by Smith <a href="#12">[12]</a> this ...'. Together + with the next rule this allows easy coding of references or end notes. + +- Text enclosed in brackets which is preceded by the start of a line, two + periods and a space is treated as a named link. For example: + + .. [12] "Effective Techniques" Smith, Joe ... + + Is interpreted as '<a name="12">[12]</a> "Effective Techniques" ...'. + Together with the previous rule this allows easy coding of references or + end notes. + + +- A paragraph that has blocks of text enclosed in '||' is treated as a + table. The text blocks correspond to table cells and table rows are + denoted by newlines. By default the cells are center aligned. A cell + can span more than one column by preceding a block of text with an + equivalent number of cell separators '||'. Newlines and '|' cannot + be a part of the cell text. For example: + + |||| **Ingredients** || + || *Name* || *Amount* || + ||Spam||10|| + ||Eggs||3|| + + is interpreted as:: + + <TABLE BORDER=1 CELLPADDING=2> + <TR> + <TD ALIGN=CENTER COLSPAN=2> <strong>Ingredients</strong> </TD> + </TR> + <TR> + <TD ALIGN=CENTER COLSPAN=1> <em>Name</em> </TD> + <TD ALIGN=CENTER COLSPAN=1> <em>Amount</em> </TD> + </TR> + <TR> + <TD ALIGN=CENTER COLSPAN=1>Spam</TD> + <TD ALIGN=CENTER COLSPAN=1>10</TD> + </TR> + <TR> + <TD ALIGN=CENTER COLSPAN=1>Eggs</TD> + <TD ALIGN=CENTER COLSPAN=1>3</TD> + </TR> + </TABLE> + +''' + +import ts_regex +import regex +from ts_regex import gsub +from string import split, join, strip, find +import string,re + + +def untabify(aString, + indent_tab=ts_regex.compile('\(\n\|^\)\( *\)\t').search_group, + ): + '''\ + Convert indentation tabs to spaces. + ''' + result='' + rest=aString + while 1: + ts_results = indent_tab(rest, (1,2)) + if ts_results: + start, grps = ts_results + lnl=len(grps[0]) + indent=len(grps[1]) + result=result+rest[:start] + rest="\n%s%s" % (' ' * ((indent/8+1)*8), + rest[start+indent+1+lnl:]) + else: + return result+rest + +def indent(aString, indent=2): + """Indent a string the given number of spaces""" + r=split(untabify(aString),'\n') + if not r: return '' + if not r[-1]: del r[-1] + tab=' '*level + return "%s%s\n" % (tab,join(r,'\n'+tab)) + +def reindent(aString, indent=2, already_untabified=0): + "reindent a block of text, so that the minimum indent is as given" + + if not already_untabified: aString=untabify(aString) + + l=indent_level(aString)[0] + if indent==l: return aString + + r=[] + + append=r.append + + if indent > l: + tab=' ' * (indent-l) + for s in split(aString,'\n'): append(tab+s) + else: + l=l-indent + for s in split(aString,'\n'): append(s[l:]) + + return join(r,'\n') + +def indent_level(aString, + indent_space=ts_regex.compile('\n\( *\)').search_group, + ): + '''\ + Find the minimum indentation for a string, not counting blank lines. + ''' + start=0 + text='\n'+aString + indent=l=len(text) + while 1: + + ts_results = indent_space(text, (1,2), start) + if ts_results: + start, grps = ts_results + i=len(grps[0]) + start=start+i+1 + if start < l and text[start] != '\n': # Skip blank lines + if not i: return (0,aString) + if i < indent: indent = i + else: + return (indent,aString) + +def paragraphs(list,start): + l=len(list) + level=list[start][0] + i=start+1 + while i < l and list[i][0] > level: i=i+1 + return i-1-start + +def structure(list): + if not list: return [] + i=0 + l=len(list) + r=[] + while i < l: + sublen=paragraphs(list,i) + i=i+1 + r.append((list[i-1][1],structure(list[i:i+sublen]))) + i=i+sublen + return r + + +class Table: + CELL=' <TD ALIGN=CENTER COLSPAN=%i>%s</TD>\n' + ROW=' <TR>\n%s </TR>\n' + TABLE='\n<TABLE BORDER=1 CELLPADDING=2>\n%s</TABLE>' + + def create(self,aPar, + td_reg=re.compile(r'[ \t\n]*\|\|([^\0x00|]*)') + ): + '''parses a table and returns nested list representing the + table''' + self.table=[] + text=filter(None,split(aPar,'\n')) + for line in text: + row=[] + while 1: + mo = td_reg.match(line) + if not mo: return 0 + pos = mo.end(1) + row.append(mo.group(1)) + if pos==len(line):break + line=line[pos:] + self.table.append(row) + return 1 + + def html(self): + '''Creates an HTML representation of table''' + htmltable=[] + for row in self.table: + htmlrow=[] + colspan=1 + for cell in row: + if cell=='': + colspan=colspan+1 + continue + else: + htmlrow.append(self.CELL%(colspan,cell)) + colspan=1 + htmltable.append(self.ROW%join(htmlrow,'')) + return self.TABLE%join(htmltable,'') + +table=Table() + +class StructuredText: + + """Model text as structured collection of paragraphs. + + Structure is implied by the indentation level. + + This class is intended as a base classes that do actual text + output formatting. + """ + + def __init__(self, aStructuredString, level=0, + paragraph_divider=regex.compile('\(\r?\n *\)+\r?\n'), + ): + '''Convert a structured text string into a structured text object. + + Aguments: + + aStructuredString -- The string to be parsed. + level -- The level of top level headings to be created. + ''' + + + pat = ' \"([%s0-9-_,./?=@~&]*)\":' % string.letters+ \ + '([-:%s0-9_,./?=@#~&]*?)' % string.letters + \ + '([.:?;] )' + + p_reg = re.compile(pat,re.M) + + aStructuredString = p_reg.sub(r'<a href="\2">\1</a>\3 ' , aStructuredString) + + pat = ' \"([%s0-9-_,./?=@~&]*)\", ' % string.letters+ \ + '([-:%s0-9_,./?=@#~&]*?)' % string.letters + \ + '([.:?;] )' + + p_reg = re.compile(pat,re.M) + + aStructuredString = p_reg.sub(r'<a href="\2">\1</a>\3 ' , aStructuredString) + + + protoless = find(aStructuredString, '<a href=":') + if protoless != -1: + aStructuredString = re.sub('<a href=":', '<a href="', + aStructuredString) + + self.level=level + paragraphs=ts_regex.split(untabify(aStructuredString), + paragraph_divider) + paragraphs=map(indent_level,paragraphs) + + self.structure=structure(paragraphs) + + + def __str__(self): + return str(self.structure) + + +ctag_prefix=r'([\x00- \\(]|^)' +ctag_suffix=r'([\x00- ,.:;!?\\)]|$)' +ctag_middle=r'[%s]([^\x00- %s][^%s]*[^\x00- %s]|[^%s])[%s]' +ctag_middl2=r'[%s][%s]([^\x00- %s][^%s]*[^\x00- %s]|[^%s])[%s][%s]' + +def ctag(s, + em=re.compile( + ctag_prefix+(ctag_middle % (("*",)*6) )+ctag_suffix), + strong=re.compile( + ctag_prefix+(ctag_middl2 % (("*",)*8))+ctag_suffix), + under=re.compile( + ctag_prefix+(ctag_middle % (("_",)*6) )+ctag_suffix), + code=re.compile( + ctag_prefix+(ctag_middle % (("\'",)*6))+ctag_suffix), + ): + if s is None: s='' + s=strong.sub(r'\1<strong>\2</strong>\3',s) + s=under.sub( r'\1<u>\2</u>\3',s) + s=code.sub( r'\1<code>\2</code>\3',s) + s=em.sub( r'\1<em>\2</em>\3',s) + return s + +class HTML(StructuredText): + + '''\ + An HTML structured text formatter. + '''\ + + def __str__(self, + extra_dl=re.compile("</dl>\n<dl>"), + extra_ul=re.compile("</ul>\n<ul>"), + extra_ol=re.compile("</ol>\n<ol>"), + ): + '''\ + Return an HTML string representation of the structured text data. + + ''' + s=self._str(self.structure,self.level) + s=extra_dl.sub('\n',s) + s=extra_ul.sub('\n',s) + s=extra_ol.sub('\n',s) + return s + + def ul(self, before, p, after): + if p: p="<p>%s</p>" % strip(ctag(p)) + return ('%s<ul><li>%s\n%s\n</li></ul>\n' + % (before,p,after)) + + def ol(self, before, p, after): + if p: p="<p>%s</p>" % strip(ctag(p)) + return ('%s<ol><li>%s\n%s\n</li></ol>\n' + % (before,p,after)) + + def dl(self, before, t, d, after): + return ('%s<dl><dt>%s</dt><dd><p>%s</p>\n%s\n</dd></dl>\n' + % (before,ctag(t),ctag(d),after)) + + def head(self, before, t, level, d): + if level > 0 and level < 6: + return ('%s<h%d>%s</h%d>\n%s\n' + % (before,level,strip(ctag(t)),level,d)) + + t="<p><strong>%s</strong></p>" % strip(ctag(t)) + return ('%s<dl><dt>%s\n</dt><dd>%s\n</dd></dl>\n' + % (before,t,d)) + + def normal(self,before,p,after): + return '%s<p>%s</p>\n%s\n' % (before,ctag(p),after) + + def pre(self,structure,tagged=0): + if not structure: return '' + if tagged: + r='' + else: + r='<PRE>\n' + for s in structure: + r="%s%s\n\n%s" % (r,html_quote(s[0]),self.pre(s[1],1)) + if not tagged: r=r+'</PRE>\n' + return r + + def table(self,before,table,after): + return '%s<p>%s</p>\n%s\n' % (before,ctag(table),after) + + def _str(self,structure,level, + # Static + bullet=ts_regex.compile('[ \t\n]*[o*-][ \t\n]+\([^\0]*\)' + ).match_group, + example=ts_regex.compile('[\0- ]examples?:[\0- ]*$' + ).search, + dl=ts_regex.compile('\([^\n]+\)[ \t]+--[ \t\n]+\([^\0]*\)' + ).match_group, + nl=ts_regex.compile('\n').search, + ol=ts_regex.compile( + '[ \t]*\(\([0-9]+\|[%s]+\)[.)]\)+[ \t\n]+\([^\0]*\|$\)' % string.letters + ).match_group, + olp=ts_regex.compile('[ \t]*([0-9]+)[ \t\n]+\([^\0]*\|$\)' + ).match_group, + ): + r='' + for s in structure: + + ts_results = bullet(s[0], (1,)) + if ts_results: + p = ts_results[1] + if s[0][-2:]=='::' and s[1]: ps=self.pre(s[1]) + else: ps=self._str(s[1],level) + r=self.ul(r,p,ps) + continue + ts_results = ol(s[0], (3,)) + if ts_results: + p = ts_results[1] + if s[0][-2:]=='::' and s[1]: ps=self.pre(s[1]) + else: ps=self._str(s[1],level) + r=self.ol(r,p,ps) + continue + ts_results = olp(s[0], (1,)) + if ts_results: + p = ts_results[1] + if s[0][-2:]=='::' and s[1]: ps=self.pre(s[1]) + else: ps=self._str(s[1],level) + r=self.ol(r,p,ps) + continue + ts_results = dl(s[0], (1,2)) + if ts_results: + t,d = ts_results[1] + r=self.dl(r,t,d,self._str(s[1],level)) + continue + if example(s[0]) >= 0 and s[1]: + # Introduce an example, using pre tags: + r=self.normal(r,s[0],self.pre(s[1])) + continue + if s[0][-2:]=='::' and s[1]: + # Introduce an example, using pre tags: + r=self.normal(r,s[0][:-1],self.pre(s[1])) + continue + if table.create(s[0]): + ## table support. + r=self.table(r,table.html(),self._str(s[1],level)) + continue + else: + + if nl(s[0]) < 0 and s[1] and s[0][-1:] != ':': + # Treat as a heading + t=s[0] + r=self.head(r,t,level, + self._str(s[1],level and level+1)) + else: + r=self.normal(r,s[0],self._str(s[1],level)) + return r + + +def html_quote(v, + character_entities=( + (re.compile('&'), '&'), + (re.compile("<"), '<' ), + (re.compile(">"), '>' ), + (re.compile('"'), '"') + )): #" + text=str(v) + for re,name in character_entities: + text=re.sub(name,text) + return text + +def html_with_references(text, level=1): + text = re.sub( + r'[\0\n]\.\. \[([0-9_%s-]+)\]' % string.letters, + r'\n <a name="\1">[\1]</a>', + text) + + text = re.sub( + r'([\x00- ,])\[(?P<ref>[0-9_%s-]+)\]([\x00- ,.:])' % string.letters, + r'\1<a href="#\2">[\2]</a>\3', + text) + + text = re.sub( + r'([\0- ,])\[([^]]+)\.html\]([\0- ,.:])', + r'\1<a href="\2.html">[\2]</a>\3', + text) + + return HTML(text,level=level) + + +def main(): + import sys, getopt + + opts,args=getopt.getopt(sys.argv[1:],'twl') + + if args: + [infile]=args + s=open(infile,'r').read() + else: + s=sys.stdin.read() + + if opts: + + if filter(lambda o: o[0]=='-w', opts): + print 'Content-Type: text/html\n' + + if filter(lambda o: o[0]=='-l', opts): + import locale + locale.setlocale(locale.LC_ALL,"") + + if s[:2]=='#!': + s=re.sub('^#![^\n]+','',s) + + mo = re.compile('([\0-\n]*\n)').match(s) + if mo is not None: + s = s[len(mo.group(0)) :] + + s=str(html_with_references(s)) + if s[:4]=='<h1>': + t=s[4:find(s,'</h1>')] + s='''<html><head><title>%s</title> + </head><body> + %s + </body></html> + ''' % (t,s) + print s + else: + print html_with_references(s) + +if __name__=="__main__": main() diff --git a/wxPython/samples/stxview/StructuredText/DocBookClass.py b/wxPython/samples/stxview/StructuredText/DocBookClass.py index b126878bde..5a14f33d78 100644 --- a/wxPython/samples/stxview/StructuredText/DocBookClass.py +++ b/wxPython/samples/stxview/StructuredText/DocBookClass.py @@ -88,238 +88,245 @@ from string import join, split, find, lstrip class DocBookClass: - element_types={ - '#text': '_text', - 'StructuredTextDocument': 'document', - 'StructuredTextParagraph': 'paragraph', - 'StructuredTextExample': 'example', - 'StructuredTextBullet': 'bullet', - 'StructuredTextNumbered': 'numbered', - 'StructuredTextDescription': 'description', - 'StructuredTextDescriptionTitle': 'descriptionTitle', - 'StructuredTextDescriptionBody': 'descriptionBody', - 'StructuredTextSection': 'section', - 'StructuredTextSectionTitle': 'sectionTitle', - 'StructuredTextLiteral': 'literal', - 'StructuredTextEmphasis': 'emphasis', - 'StructuredTextStrong': 'strong', - 'StructuredTextLink': 'link', - 'StructuredTextXref': 'xref', - } - - def dispatch(self, doc, level, output): - getattr(self, self.element_types[doc.getNodeName()])(doc, level, output) - - def __call__(self, doc, level=1): - r=[] - self.dispatch(doc, level-1, r.append) - return join(r,'') - - def _text(self, doc, level, output): - if doc.getNodeName() == 'StructuredTextLiteral': - output(doc.getNodeValue()) - else: - output(lstrip(doc.getNodeValue())) - - def document(self, doc, level, output): - output('<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook V3.1//EN">\n') - output('<book>\n') - children=doc.getChildNodes() - if (children and - children[0].getNodeName() == 'StructuredTextSection'): - output('<title>%s</title>' % children[0].getChildNodes()[0].getNodeValue()) - for c in children: + element_types={ + '#text': '_text', + 'StructuredTextDocument': 'document', + 'StructuredTextParagraph': 'paragraph', + 'StructuredTextExample': 'example', + 'StructuredTextBullet': 'bullet', + 'StructuredTextNumbered': 'numbered', + 'StructuredTextDescription': 'description', + 'StructuredTextDescriptionTitle': 'descriptionTitle', + 'StructuredTextDescriptionBody': 'descriptionBody', + 'StructuredTextSection': 'section', + 'StructuredTextSectionTitle': 'sectionTitle', + 'StructuredTextLiteral': 'literal', + 'StructuredTextEmphasis': 'emphasis', + 'StructuredTextStrong': 'strong', + 'StructuredTextLink': 'link', + 'StructuredTextXref': 'xref', + 'StructuredTextSGML': 'sgml', + } + + def dispatch(self, doc, level, output): + getattr(self, self.element_types[doc.getNodeName()])(doc, level, output) + + def __call__(self, doc, level=1): + r=[] + self.dispatch(doc, level-1, r.append) + return join(r,'') + + def _text(self, doc, level, output): + if doc.getNodeName() == 'StructuredTextLiteral': + output(doc.getNodeValue()) + else: + output(lstrip(doc.getNodeValue())) + + def document(self, doc, level, output): + output('<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook V4.1//EN">\n') + output('<book>\n') + children=doc.getChildNodes() + if (children and + children[0].getNodeName() == 'StructuredTextSection'): + output('<title>%s</title>' % children[0].getChildNodes()[0].getNodeValue()) + for c in children: + getattr(self, self.element_types[c.getNodeName()])(c, level, output) + output('</book>\n') + + def section(self, doc, level, output): + output('\n<section>\n') + children=doc.getChildNodes() + for c in children: + getattr(self, self.element_types[c.getNodeName()])(c, level+1, output) + output('\n</section>\n') + + def sectionTitle(self, doc, level, output): + output('<title>') + for c in doc.getChildNodes(): + try: getattr(self, self.element_types[c.getNodeName()])(c, level, output) - output('</book>\n') - - def section(self, doc, level, output): - output('\n<sect%s>\n' % (level + 1)) - children=doc.getChildNodes() - for c in children: - getattr(self, self.element_types[c.getNodeName()])(c, level+1, output) - output('\n</sect%s>\n' % (level + 1)) - - def sectionTitle(self, doc, level, output): - output('<title>') - for c in doc.getChildNodes(): - getattr(self, self.element_types[c.getNodeName()])(c, level, output) - output('</title>\n') - - def description(self, doc, level, output): - p=doc.getPreviousSibling() - if p is None or p.getNodeName() is not doc.getNodeName(): - output('<variablelist>\n') - for c in doc.getChildNodes(): - getattr(self, self.element_types[c.getNodeName()])(c, level, output) - n=doc.getNextSibling() - if n is None or n.getNodeName() is not doc.getNodeName(): - output('</variablelist>\n') - - def descriptionTitle(self, doc, level, output): - output('<varlistentry><term>\n') - for c in doc.getChildNodes(): - getattr(self, self.element_types[c.getNodeName()])(c, level, output) - output('</term>\n') - - def descriptionBody(self, doc, level, output): - output('<listitem><para>\n') - for c in doc.getChildNodes(): - getattr(self, self.element_types[c.getNodeName()])(c, level, output) - output('</para></listitem>\n') - output('</varlistentry>\n') + except: + print "failed", c.getNodeName(), c + output('</title>\n') + + def description(self, doc, level, output): + p=doc.getPreviousSibling() + if p is None or p.getNodeName() is not doc.getNodeName(): + output('<variablelist>\n') + for c in doc.getChildNodes(): + getattr(self, self.element_types[c.getNodeName()])(c, level, output) + n=doc.getNextSibling() + if n is None or n.getNodeName() is not doc.getNodeName(): + output('</variablelist>\n') + + def descriptionTitle(self, doc, level, output): + output('<varlistentry><term>\n') + for c in doc.getChildNodes(): + getattr(self, self.element_types[c.getNodeName()])(c, level, output) + output('</term>\n') + + def descriptionBody(self, doc, level, output): + output('<listitem><para>\n') + for c in doc.getChildNodes(): + getattr(self, self.element_types[c.getNodeName()])(c, level, output) + output('</para></listitem>\n') + output('</varlistentry>\n') + + def bullet(self, doc, level, output): + p=doc.getPreviousSibling() + if p is None or p.getNodeName() is not doc.getNodeName(): + output('<itemizedlist>\n') + output('<listitem><para>\n') + + for c in doc.getChildNodes(): + getattr(self, self.element_types[c.getNodeName()])(c, level, output) + n=doc.getNextSibling() + output('</para></listitem>\n') + if n is None or n.getNodeName() is not doc.getNodeName(): + output('</itemizedlist>\n') + + def numbered(self, doc, level, output): + p=doc.getPreviousSibling() + if p is None or p.getNodeName() is not doc.getNodeName(): + output('<orderedlist>\n') + output('<listitem><para>\n') + for c in doc.getChildNodes(): + getattr(self, self.element_types[c.getNodeName()])(c, level, output) + n=doc.getNextSibling() + output('</para></listitem>\n') + if n is None or n.getNodeName() is not doc.getNodeName(): + output('</orderedlist>\n') + + def example(self, doc, level, output): + i=0 + for c in doc.getChildNodes(): + if i==0: + output('<programlisting>\n<![CDATA[\n') + ## + ## eek. A ']]>' in your body will break this... + ## + output(prestrip(c.getNodeValue())) + output('\n]]></programlisting>\n') + else: + getattr(self, self.element_types[c.getNodeName()])( + c, level, output) + + def paragraph(self, doc, level, output): + output('<para>\n\n') + for c in doc.getChildNodes(): + getattr(self, self.element_types[c.getNodeName()])( + c, level, output) + output('</para>\n\n') + + def link(self, doc, level, output): + output('<ulink url="%s">' % doc.href) + for c in doc.getChildNodes(): + getattr(self, self.element_types[c.getNodeName()])(c, level, output) + output('</ulink>') - def bullet(self, doc, level, output): - p=doc.getPreviousSibling() - if p is None or p.getNodeName() is not doc.getNodeName(): - output('<itemizedlist>\n') - output('<listitem><para>\n') + def emphasis(self, doc, level, output): + output('<emphasis>') + for c in doc.getChildNodes(): + getattr(self, self.element_types[c.getNodeName()])(c, level, output) + output('</emphasis> ') - for c in doc.getChildNodes(): - getattr(self, self.element_types[c.getNodeName()])(c, level, output) - n=doc.getNextSibling() - output('</para></listitem>\n') - if n is None or n.getNodeName() is not doc.getNodeName(): - output('</itemizedlist>\n') - - def numbered(self, doc, level, output): - p=doc.getPreviousSibling() - if p is None or p.getNodeName() is not doc.getNodeName(): - output('<orderedlist>\n') - output('<listitem><para>\n') - for c in doc.getChildNodes(): - getattr(self, self.element_types[c.getNodeName()])(c, level, output) - n=doc.getNextSibling() - output('</para></listitem>\n') - if n is None or n.getNodeName() is not doc.getNodeName(): - output('</orderedlist>\n') - - def example(self, doc, level, output): - i=0 - for c in doc.getChildNodes(): - if i==0: - output('<programlisting>\n<![CDATA[\n') - ## - ## eek. A ']]>' in your body will break this... - ## - output(prestrip(c.getNodeValue())) - output('\n]]></programlisting>\n') - else: - getattr(self, self.element_types[c.getNodeName()])( - c, level, output) - - def paragraph(self, doc, level, output): - - output('<para>\n\n') - for c in doc.getChildNodes(): - getattr(self, self.element_types[c.getNodeName()])( - c, level, output) - output('</para>\n\n') - - def link(self, doc, level, output): -# output('<link linkend="%s">' % doc.href) - for c in doc.getChildNodes(): - getattr(self, self.element_types[c.getNodeName()])(c, level, output) -# output('</link>') + def literal(self, doc, level, output): + output('<literal>') + for c in doc.getChildNodes(): + output(c.getNodeValue()) + output('</literal>') - def emphasis(self, doc, level, output): - output('<emphasis>') - for c in doc.getChildNodes(): - getattr(self, self.element_types[c.getNodeName()])(c, level, output) - output('</emphasis> ') + def strong(self, doc, level, output): + output('<emphasis>') + for c in doc.getChildNodes(): + getattr(self, self.element_types[c.getNodeName()])(c, level, output) + output('</emphasis>') - def literal(self, doc, level, output): - output('<literal>') - for c in doc.getChildNodes(): - output(c.getNodeValue()) - output('</literal>') + def xref(self, doc, level, output): + output('<xref linkend="%s"/>' % doc.getNodeValue()) - def strong(self, doc, level, output): - output('<emphasis>') - for c in doc.getChildNodes(): - getattr(self, self.element_types[c.getNodeName()])(c, level, output) - output('</emphasis>') + def sgml(self, doc, level, output): + output(doc.getNodeValue()) - def xref(self, doc, level, output): - output('<xref linkend="%s">' % doc.getNodeValue()) def prestrip(v): - v=string.replace(v, '\r\n', '\n') - v=string.replace(v, '\r', '\n') - v=string.replace(v, '\t', ' ') - lines=string.split(v, '\n') - indent=len(lines[0]) - for line in lines: - if not len(line): continue - i=len(line)-len(string.lstrip(line)) - if i < indent: - indent=i - nlines=[] - for line in lines: - nlines.append(line[indent:]) - return string.join(nlines, '\r\n') + v=string.replace(v, '\r\n', '\n') + v=string.replace(v, '\r', '\n') + v=string.replace(v, '\t', ' ') + lines=string.split(v, '\n') + indent=len(lines[0]) + for line in lines: + if not len(line): continue + i=len(line)-len(string.lstrip(line)) + if i < indent: + indent=i + nlines=[] + for line in lines: + nlines.append(line[indent:]) + return string.join(nlines, '\n') class DocBookChapter(DocBookClass): - def document(self, doc, level, output): - output('<chapter>\n') - children=doc.getChildNodes() - if (children and - children[0].getNodeName() == 'StructuredTextSection'): - output('<title>%s</title>' % children[0].getChildNodes()[0].getNodeValue()) - for c in children[0].getChildNodes()[1:]: - getattr(self, self.element_types[c.getNodeName()])(c, level, output) - output('</chapter>\n') + def document(self, doc, level, output): + output('<chapter>\n') + children=doc.getChildNodes() + if (children and + children[0].getNodeName() == 'StructuredTextSection'): + output('<title>%s</title>' % children[0].getChildNodes()[0].getNodeValue()) + for c in children[0].getChildNodes()[1:]: + getattr(self, self.element_types[c.getNodeName()])(c, level, output) + output('</chapter>\n') ets = DocBookClass.element_types -ets.update({'StructuredTextImage': 'image'}) +ets.update({'StructuredTextImage': 'image'}) class DocBookChapterWithFigures(DocBookChapter): - element_types = ets + element_types = ets - def image(self, doc, level, output): - if hasattr(doc, 'key'): - output('<figure id="%s"><title>%s</title>\n' % (doc.key, doc.getNodeValue()) ) - else: - output('<figure><title>%s</title>\n' % doc.getNodeValue()) -## for c in doc.getChildNodes(): -## getattr(self, self.element_types[c.getNodeName()])(c, level, output) - output('<graphic fileref="%s"></graphic>\n</figure>\n' % doc.href) + def image(self, doc, level, output): + if hasattr(doc, 'key'): + output('<figure id="%s"><title>%s</title>\n' % (doc.key, doc.getNodeValue()) ) + else: + output('<figure><title>%s</title>\n' % doc.getNodeValue()) +## for c in doc.getChildNodes(): +## getattr(self, self.element_types[c.getNodeName()])(c, level, output) + output('<graphic fileref="%s"></graphic>\n</figure>\n' % doc.href) class DocBookArticle(DocBookClass): - def document(self, doc, level, output): - output('<!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook V3.1//EN">\n') - output('<article>\n') - children=doc.getChildNodes() - if (children and - children[0].getNodeName() == 'StructuredTextSection'): - output('<artheader>\n<title>%s</title>\n</artheader>\n' % - children[0].getChildNodes()[0].getNodeValue()) - for c in children: - getattr(self, self.element_types[c.getNodeName()])(c, level, output) - output('</article>\n') + def document(self, doc, level, output): + output('<!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook V4.1//EN">\n') + output('<article>\n') + children=doc.getChildNodes() + if (children and + children[0].getNodeName() == 'StructuredTextSection'): + output('<articleinfo>\n<title>%s</title>\n</articleinfo>\n' % + children[0].getChildNodes()[0].getNodeValue()) + for c in children: + getattr(self, self.element_types[c.getNodeName()])(c, level, output) + output('</article>\n') class DocBookBook: - def __init__(self, title=''): - self.title = title - self.chapters = [] + def __init__(self, title=''): + self.title = title + self.chapters = [] - def addChapter(self, chapter): - self.chapters.append(chapter) + def addChapter(self, chapter): + self.chapters.append(chapter) - def read(self): - out = '<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook V3.1//EN">\n<book>\n' - out = out + '<title>%s</title>\n' % self.title - for chapter in self.chapters: - out = out + chapter + '\n</book>\n' + def read(self): + out = '<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook V4.1//EN">\n<book>\n' + out = out + '<title>%s</title>\n' % self.title + for chapter in self.chapters: + out = out + chapter + '\n</book>\n' - return out + return out - def __str__(self): - return self.read() - + def __str__(self): + return self.read() + diff --git a/wxPython/samples/stxview/StructuredText/DocumentClass.py b/wxPython/samples/stxview/StructuredText/DocumentClass.py index ec6dc402a4..405f35e25d 100644 --- a/wxPython/samples/stxview/StructuredText/DocumentClass.py +++ b/wxPython/samples/stxview/StructuredText/DocumentClass.py @@ -85,19 +85,31 @@ import re, ST, STDOM from string import split, join, replace, expandtabs, strip, find, rstrip +from STletters import * + StringType=type('') ListType=type([]) +def flatten(obj, append): + if obj.getNodeType()==STDOM.TEXT_NODE: + append(obj.getNodeValue()) + else: + for child in obj.getChildNodes(): + flatten(child, append) + + class StructuredTextExample(ST.StructuredTextParagraph): """Represents a section of document with literal text, as for examples""" def __init__(self, subs, **kw): - t=[]; a=t.append - for s in subs: a(s.getNodeValue()) - apply(ST.StructuredTextParagraph.__init__, - (self, join(t,'\n\n'), ()), - kw) + t=[] + a=t.append + for s in subs: + flatten(s, a) + apply(ST.StructuredTextParagraph.__init__, + (self, join(t,'\n\n'), ()), + kw) def getColorizableTexts(self): return () def setColorizableTexts(self, src): pass # never color examples @@ -137,9 +149,15 @@ class StructuredTextSection(ST.StructuredTextParagraph): apply(ST.StructuredTextParagraph.__init__, (self, StructuredTextSectionTitle(src), subs), kw) - + + def getColorizableTexts(self): + return self._src.getColorizableTexts() + + def setColorizableTexts(self,src): + self._src.setColorizableTexts(src) + # a StructuredTextTable holds StructuredTextRows -class StructuredTextTable(ST.StructuredTextDocument): +class StructuredTextTable(ST.StructuredTextParagraph): """ rows is a list of lists containing tuples, which represent the columns/cells in each rows. @@ -148,7 +166,7 @@ class StructuredTextTable(ST.StructuredTextDocument): """ def __init__(self, rows, src, subs, **kw): - apply(ST.StructuredTextDocument.__init__,(self,subs),kw) + apply(ST.StructuredTextParagraph.__init__,(self,subs),kw) self._rows = [] for row in rows: if row: @@ -208,34 +226,40 @@ class StructuredTextTable(ST.StructuredTextDocument): return self.setColorizableTexts() # StructuredTextRow holds StructuredTextColumns -class StructuredTextRow(ST.StructuredTextDocument): +class StructuredTextRow(ST.StructuredTextParagraph): def __init__(self,row,kw): """ row is a list of tuples, where each tuple is the raw text for a cell/column and the span - of that cell/column". + of that cell/column. EX [('this is column one',1), ('this is column two',1)] """ - apply(ST.StructuredTextDocument.__init__,(self,[]),kw) + apply(ST.StructuredTextParagraph.__init__,(self,[]),kw) + self._columns = [] - for column in row: - self._columns.append(StructuredTextColumn(column[0],column[1],kw)) - + for column in row: + self._columns.append(StructuredTextColumn(column[0], + column[1], + column[2], + column[3], + column[4], + kw)) + def getColumns(self): return [self._columns] - + def _getColumns(self): return [self._columns] def setColumns(self,columns): self._columns = columns - + def _setColumns(self,columns): return self.setColumns(columns) - + # this holds the text of a table cell class StructuredTextColumn(ST.StructuredTextParagraph): """ @@ -245,20 +269,40 @@ class StructuredTextColumn(ST.StructuredTextParagraph): or StructuredTextTableData. """ - def __init__(self,text,span,kw): - # print "StructuredTextColumn", text, span + def __init__(self,text,span,align,valign,typ,kw): apply(ST.StructuredTextParagraph.__init__,(self,text,[]),kw) self._span = span + self._align = align + self._valign = valign + self._type = typ def getSpan(self): return self._span def _getSpan(self): return self._span + + def getAlign(self): + return self._align + + def _getAlign(self): + return self.getAlign() + + def getValign(self): + return self._valign + + def _getValign(self): + return self.getValign() + + def getType(self): + return self._type + + def _getType(self): + return self.getType() + +class StructuredTextTableHeader(ST.StructuredTextParagraph): pass -class StructuredTextTableHeader(ST.StructuredTextDocument): pass - -class StructuredTextTableData(ST.StructuredTextDocument): pass +class StructuredTextTableData(ST.StructuredTextParagraph): pass class StructuredTextMarkup(STDOM.Element): @@ -266,22 +310,22 @@ class StructuredTextMarkup(STDOM.Element): self._value=v self._attributes=kw.keys() for k, v in kw.items(): setattr(self, k, v) - + def getChildren(self, type=type, lt=type([])): v=self._value if type(v) is not lt: v=[v] return v - + def getColorizableTexts(self): return self._value, def setColorizableTexts(self, v): self._value=v[0] - + def __repr__(self): return '%s(%s)' % (self.__class__.__name__, `self._value`) - + class StructuredTextLiteral(StructuredTextMarkup): def getColorizableTexts(self): return () def setColorizableTexts(self, v): pass - + class StructuredTextEmphasis(StructuredTextMarkup): pass class StructuredTextStrong(StructuredTextMarkup): pass @@ -294,9 +338,11 @@ class StructuredTextUnderline(StructuredTextMarkup): pass class StructuredTextSGML(StructuredTextMarkup): pass -class StructuredTextLink(StructuredTextMarkup): pass +class StructuredTextLink(StructuredTextMarkup): pass + +class StructuredTextXref(StructuredTextMarkup): pass -class DocumentClass: +class DocumentClass: """ Class instance calls [ex.=> x()] require a structured text structure. Doc will then parse each paragraph in the structure @@ -309,7 +355,6 @@ class DocumentClass: instance with a strong instance stored in its string """ - #'doc_table', paragraph_types = [ 'doc_bullet', 'doc_numbered', @@ -322,13 +367,15 @@ class DocumentClass: #'doc_named_link', #'doc_underline', text_types = [ + 'doc_sgml', 'doc_href', 'doc_strong', 'doc_emphasize', 'doc_literal', - 'doc_sgml' + 'doc_sgml', + 'doc_xref', ] - + def __call__(self, doc): if type(doc) is type(''): doc=ST.StructuredText(doc) @@ -338,10 +385,10 @@ class DocumentClass: doc=ST.StructuredTextDocument(self.color_paragraphs( doc.getSubparagraphs())) return doc - + def parse(self, raw_string, text_type, type=type, st=type(''), lt=type([])): - + """ Parse accepts a raw_string, an expr to test the raw_string, and the raw_string's subparagraphs. @@ -425,7 +472,6 @@ class DocumentClass: st=type('')): result=[] for paragraph in raw_paragraphs: - #print type(paragraph) if paragraph.getNodeName() != 'StructuredTextParagraph': result.append(paragraph) continue @@ -445,23 +491,22 @@ class DocumentClass: break else: new_paragraphs=ST.StructuredTextParagraph(paragraph.getColorizableTexts()[0], - self.color_paragraphs(paragraph.getSubparagraphs()), - indent=paragraph.indent), + self.color_paragraphs(paragraph.getSubparagraphs()), + indent=paragraph.indent), # color the inline StructuredText types # for each StructuredTextParagraph for paragraph in new_paragraphs: if paragraph.getNodeName() is "StructuredTextTable": - #print "we have a table" cells = paragraph.getColumns() text = paragraph.getColorizableTexts() text = map(ST.StructuredText,text) text = map(self.__call__,text) - #for index in range(len(text)): - # text[index].setColorizableTexts(map(self.color_text,text[index].getColorizableTexts())) + for t in range(len(text)): + text[t] = text[t].getSubparagraphs() paragraph.setColorizableTexts(text) - + paragraph.setColorizableTexts( map(self.color_text, paragraph.getColorizableTexts() @@ -470,7 +515,7 @@ class DocumentClass: return result - def doc_table(self, paragraph, expr = re.compile('\s*\|[-]+\|').match): + def doc_table(self, paragraph, expr = re.compile(r'\s*\|[-]+\|').match): text = paragraph.getColorizableTexts()[0] m = expr(text) @@ -479,58 +524,102 @@ class DocumentClass: if not (m): return None rows = [] - - rows = split(text,'\n') - + spans = [] ROWS = [] COLS = [] - - TDdivider = re.compile("[\-]+").match - THdivider = re.compile("[\=]+").match - - # find where the column markers are located - col = re.compile('\|').search + indexes = [] + ignore = [] + + TDdivider = re.compile("[\-]+").match + THdivider = re.compile("[\=]+").match + col = re.compile('\|').search + innertable = re.compile('\|([-]+|[=]+)\|').search + text = strip(text) rows = split(text,'\n') + foo = "" + for row in range(len(rows)): rows[row] = strip(rows[row]) - - for row in rows: - tmp = strip(row) - tmp = row[1:len(tmp)-1] # remove leading and trailing | - offset = 0 + + # have indexes store if a row is a divider + # or a cell part + for index in range(len(rows)): + tmpstr = rows[index][1:len(rows[index])-1] + if TDdivider(tmpstr): + indexes.append("TDdivider") + elif THdivider(tmpstr): + indexes.append("THdivider") + else: + indexes.append("cell") + + for index in range(len(indexes)): + if indexes[index] is "TDdivider" or indexes[index] is THdivider: + ignore = [] # reset ignore + #continue # skip dividers + + tmp = strip(rows[index]) # clean the row up + tmp = tmp[1:len(tmp)-1] # remove leading + trailing | + offset = 0 + + # find the start and end of inner + # tables. ignore everything between + if innertable(tmp): + tmpstr = strip(tmp) + while innertable(tmpstr): + start,end = innertable(tmpstr).span() + if not (start,end-1) in ignore: + ignore.append(start,end-1) + tmpstr = " " + tmpstr[end:] + + # find the location of column dividers + # NOTE: |'s in inner tables do not count + # as column dividers if col(tmp): while col(tmp): - start,end = col(tmp).span() + bar = 1 # true if start is not in ignore + start,end = col(tmp).span() + if not start+offset in spans: - spans.append(start + offset) - COLS.append((tmp[0:start],start+offset)) - tmp = " " + tmp[end:] - offset = offset + (start) + for s,e in ignore: + if start+offset >= s or start+offset <= e: + bar = None + break + if bar: # start is clean + spans.append(start+offset) + if not bar: + foo = foo + tmp[:end] + tmp = tmp[end:] + offset = offset + end + else: + COLS.append((foo + tmp[0:start],start+offset)) + foo = "" + tmp = " " + tmp[end:] + offset = offset + start if not offset+len(tmp) in spans: spans.append(offset+len(tmp)) - COLS.append((tmp,offset+len(tmp))) + COLS.append((foo + tmp,offset+len(tmp))) + foo = "" ROWS.append(COLS) COLS = [] - - spans.sort() - - ROWS = ROWS[1:len(ROWS)] + spans.sort() + ROWS = ROWS[1:len(ROWS)] + # find each column span cols = [] tmp = [] - + for row in ROWS: for c in row: tmp.append(c[1]) cols.append(tmp) tmp = [] - - cur = 1 # the current column span - tmp = [] - C = [] # holds the span of each cell + + cur = 1 + tmp = [] + C = [] for col in cols: for span in spans: if not span in col: @@ -541,14 +630,47 @@ class DocumentClass: C.append(tmp) tmp = [] - # make rows contain the cell's text and the span - # of that cell for index in range(len(C)): for i in range(len(C[index])): ROWS[index][i] = (ROWS[index][i][0],C[index][i]) rows = ROWS - # now munge the table cells together + # label things as either TableData or + # Table header + TD = [] + TH = [] + all = [] + for index in range(len(indexes)): + if indexes[index] is "TDdivider": + TD.append(index) + all.append(index) + if indexes[index] is "THdivider": + TH.append(index) + all.append(index) + TD = TD[1:] + dividers = all[1:] + #print "TD => ", TD + #print "TH => ", TH + #print "all => ", all, "\n" + + for div in dividers: + if div in TD: + index = all.index(div) + for rowindex in range(all[index-1],all[index]): + for i in range(len(rows[rowindex])): + rows[rowindex][i] = (rows[rowindex][i][0], + rows[rowindex][i][1], + "td") + else: + index = all.index(div) + for rowindex in range(all[index-1],all[index]): + for i in range(len(rows[rowindex])): + rows[rowindex][i] = (rows[rowindex][i][0], + rows[rowindex][i][1], + "th") + + # now munge the multi-line cells together + # as paragraphs ROWS = [] COLS = [] for row in rows: @@ -556,16 +678,97 @@ class DocumentClass: if not COLS: COLS = range(len(row)) for i in range(len(COLS)): - COLS[i] = ["",1] + COLS[i] = ["",1,""] if TDdivider(row[index][0]) or THdivider(row[index][0]): ROWS.append(COLS) COLS = [] else: - COLS[index][0] = COLS[index][0] + rstrip(row[index][0]) + "\n" + COLS[index][0] = COLS[index][0] + (row[index][0]) + "\n" COLS[index][1] = row[index][1] - return StructuredTextTable(ROWS,text,subs,indent=paragraph.indent) + COLS[index][2] = row[index][2] + + # now that each cell has been munged together, + # determine the cell's alignment. + # Default is to center. Also determine the cell's + # vertical alignment, top, middle, bottom. Default is + # to middle + rows = [] + cols = [] + for row in ROWS: + for index in range(len(row)): + topindent = 0 + bottomindent = 0 + leftindent = 0 + rightindent = 0 + left = [] + right = [] + text = row[index][0] + text = split(text,'\n') + text = text[:len(text)-1] + align = "" + valign = "" + for t in text: + t = strip(t) + if not t: + topindent = topindent + 1 + else: + break + text.reverse() + for t in text: + t = strip(t) + if not t: + bottomindent = bottomindent + 1 + else: + break + text.reverse() + tmp = join(text[topindent:len(text)-bottomindent],"\n") + pars = re.compile("\n\s*\n").split(tmp) + for par in pars: + if index > 0: + par = par[1:] + par = split(par, ' ') + for p in par: + if not p: + leftindent = leftindent+1 + else: + break + left.append(leftindent) + leftindent = 0 + par.reverse() + for p in par: + if not p: + rightindent = rightindent + 1 + else: + break + right.append(rightindent) + rightindent = 0 + left.sort() + right.sort() + + if topindent == bottomindent: + valign="middle" + elif topindent < 1: + valign="top" + elif bottomindent < 1: + valign="bottom" + else: + valign="middle" + + if left[0] < 1: + align = "left" + elif right[0] < 1: + align = "right" + elif left[0] > 1 and right[0] > 1: + align="center" + else: + align="left" + + cols.append(row[index][0],row[index][1],align,valign,row[index][2]) + rows.append(cols) + cols = [] + return StructuredTextTable(rows,text,subs,indent=paragraph.indent) - def doc_bullet(self, paragraph, expr = re.compile('\s*[-*o]\s+').match): + def doc_bullet(self, paragraph, expr = re.compile(r'\s*[-*o]\s+').match): top=paragraph.getColorizableTexts()[0] m=expr(top) @@ -583,7 +786,7 @@ class DocumentClass: def doc_numbered( self, paragraph, - expr = re.compile('(\s*[a-zA-Z]+\.)|(\s*[0-9]+\.)|(\s*[0-9]+\s+)').match): + expr = re.compile(r'(\s*[%s]+\.)|(\s*[0-9]+\.)|(\s*[0-9]+\s+)' % letters).match): # This is the old expression. It had a nasty habit # of grabbing paragraphs that began with a single @@ -607,8 +810,8 @@ class DocumentClass: def doc_description( self, paragraph, - delim = re.compile('\s+--\s+').search, - nb=re.compile(r'[^\0- ]').search, + delim = re.compile(r'\s+--\s+').search, + nb=re.compile(r'[^\000- ]').search, ): top=paragraph.getColorizableTexts()[0] @@ -632,7 +835,7 @@ class DocumentClass: delim=d) def doc_header(self, paragraph, - expr = re.compile('[ a-zA-Z0-9.:/,-_*<>\?\'\"]+').match + expr = re.compile(r'[ %s0-9.:/,-_*<>\?\'\"]+' % letters).match ): subs=paragraph.getSubparagraphs() if not subs: return None @@ -650,9 +853,9 @@ class DocumentClass: def doc_literal( self, s, expr=re.compile( - "(?:\s|^)'" # open - "([^ \t\n\r\f\v']|[^ \t\n\r\f\v'][^\n']*[^ \t\n\r\f\v'])" # contents - "'(?:\s|[,.;:!?]|$)" # close + r"(?:\s|^)'" # open + r"([^ \t\n\r\f\v']|[^ \t\n\r\f\v'][^\n']*[^ \t\n\r\f\v'])" # contents + r"'(?:\s|[,.;:!?]|$)" # close ).search): r=expr(s) @@ -664,7 +867,7 @@ class DocumentClass: def doc_emphasize( self, s, - expr = re.compile('\s*\*([ \na-zA-Z0-9.:/;,\'\"\?]+)\*(?!\*|-)').search + expr = re.compile(r'\s*\*([ \n%s0-9]+)\*(?!\*|-)' % lettpunc).search ): r=expr(s) @@ -676,8 +879,8 @@ class DocumentClass: def doc_inner_link(self, s, - expr1 = re.compile("\.\.\s*").search, - expr2 = re.compile("\[[a-zA-Z0-9]+\]").search): + expr1 = re.compile(r"\.\.\s*").search, + expr2 = re.compile(r"\[[%s0-9]+\]" % letters ).search): # make sure we dont grab a named link if expr2(s) and expr1(s): @@ -697,7 +900,7 @@ class DocumentClass: def doc_named_link(self, s, - expr=re.compile("(\.\.\s)(\[[a-zA-Z0-9]+\])").search): + expr=re.compile(r"(\.\.\s)(\[[%s0-9]+\])" % letters).search): result = expr(s) if result: @@ -711,7 +914,7 @@ class DocumentClass: def doc_underline(self, s, - expr=re.compile("\_([a-zA-Z0-9\s\.,\?]+)\_").search): + expr=re.compile(r"\s+\_([%s0-9\s]+)\_" % lettpunc).search): result = expr(s) if result: @@ -723,7 +926,7 @@ class DocumentClass: def doc_strong(self, s, - expr = re.compile('\s*\*\*([ \na-zA-Z0-9.:/;\-,!\?\'\"]+)\*\*').search + expr = re.compile(r'\s*\*\*([ \n%s0-9]+)\*\*' % lettpunc).search ): r=expr(s) @@ -732,14 +935,17 @@ class DocumentClass: return (StructuredTextStrong(s[start:end]), start-2, end+2) else: return None + + ## Some constants to make the doc_href() regex easier to read. + _DQUOTEDTEXT = r'("[%s0-9\n\-\.\,\;\(\)\/\:\/\*\']+")' % letters ## double quoted text + _URL_AND_PUNC = r'([%s0-9\@\.\,\?\!\/\:\;\-\#\~]+)' % letters + _SPACES = r'(\s*)' - def doc_href( - - self, s, - expr1 = re.compile("(\"[ a-zA-Z0-9\n\-\.\,\;\(\)\/\:\/]+\")(:)([a-zA-Z0-9\:\/\.\~\-]+)([,]*\s*)").search, - expr2 = re.compile('(\"[ a-zA-Z0-9\n\-\.\:\;\(\)\/]+\")([,]+\s+)([a-zA-Z0-9\@\.\,\?\!\/\:\;\-\#]+)(\s*)').search): + def doc_href(self, s, + expr1 = re.compile(_DQUOTEDTEXT + "(:)" + _URL_AND_PUNC + _SPACES).search, + expr2 = re.compile(_DQUOTEDTEXT + r'(\,\s+)' + _URL_AND_PUNC + _SPACES).search): - punctuation = re.compile("[\,\.\?\!\;]+").match + punctuation = re.compile(r"[\,\.\?\!\;]+").match r=expr1(s) or expr2(s) if r: @@ -766,7 +972,7 @@ class DocumentClass: else: return None - def doc_sgml(self,s,expr=re.compile("\<[a-zA-Z0-9\.\=\'\"\:\/\-\#\+\s]+\>").search): + def doc_sgml(self,s,expr=re.compile(r"\<[%s0-9\.\=\'\"\:\/\-\#\+\s\*]+\>" % letters).search): """ SGML text is ignored and outputed as-is """ @@ -775,3 +981,18 @@ class DocumentClass: start,end = r.span() text = s[start:end] return (StructuredTextSGML(text),start,end) + + + def doc_xref(self, s, + expr = re.compile('\[([%s0-9\-.:/;,\n\~]+)\]' % letters).search + ): + r = expr(s) + if r: + start, end = r.span(1) + return (StructuredTextXref(s[start:end]), start-1, end+1) + else: + return None + + + + diff --git a/wxPython/samples/stxview/StructuredText/HTMLClass.py b/wxPython/samples/stxview/StructuredText/HTMLClass.py index d5c03d8357..951aec4c97 100644 --- a/wxPython/samples/stxview/StructuredText/HTMLClass.py +++ b/wxPython/samples/stxview/StructuredText/HTMLClass.py @@ -84,6 +84,7 @@ ############################################################################## from string import join, split, find +from cgi import escape import re, sys, ST class HTMLClass: @@ -172,34 +173,34 @@ class HTMLClass: def bullet(self, doc, level, output): p=doc.getPreviousSibling() if p is None or p.getNodeName() is not doc.getNodeName(): - output('<ul>\n') + output('\n<ul>\n') output('<li>') for c in doc.getChildNodes(): getattr(self, self.element_types[c.getNodeName()])(c, level, output) n=doc.getNextSibling() output('</li>\n') if n is None or n.getNodeName() is not doc.getNodeName(): - output('</ul>\n') + output('\n</ul>\n') def numbered(self, doc, level, output): p=doc.getPreviousSibling() if p is None or p.getNodeName() is not doc.getNodeName(): - output('<ol>\n') + output('\n<ol>\n') output('<li>') for c in doc.getChildNodes(): getattr(self, self.element_types[c.getNodeName()])(c, level, output) n=doc.getNextSibling() output('</li>\n') if n is None or n.getNodeName() is not doc.getNodeName(): - output('</ol>\n') + output('\n</ol>\n') def example(self, doc, level, output): i=0 for c in doc.getChildNodes(): if i==0: - output('<pre>') - output(html_quote(c.getNodeValue())) - output('</pre>\n') + output('\n<pre>\n') + output(escape(c.getNodeValue())) + output('\n</pre>\n') else: getattr(self, self.element_types[c.getNodeName()])( c, level, output) @@ -214,7 +215,7 @@ class HTMLClass: else: getattr(self, self.element_types[c.getNodeName()])( c, level, output) - output('</p>') + output('</p>\n') def link(self, doc, level, output): output('<a href="%s">' % doc.href) @@ -231,7 +232,7 @@ class HTMLClass: def literal(self, doc, level, output): output('<code>') for c in doc.getChildNodes(): - output(html_quote(c.getNodeValue())) + output(escape(c.getNodeValue())) output('</code>') def strong(self, doc, level, output): @@ -267,6 +268,10 @@ class HTMLClass: def sgml(self,doc,level,output): for c in doc.getChildNodes(): getattr(self, self.element_types[c.getNodeName()])(c, level, output) + + def xref(self, doc, level, output): + val = doc.getNodeValue() + output('<a href="#%s">[%s]</a>' % (val, val) ) def table(self,doc,level,output): """ @@ -279,29 +284,23 @@ class HTMLClass: for row in doc.getRows()[0]: output("<tr>\n") for column in row.getColumns()[0]: - str = "<td colspan=%s>" % column.getSpan() + if hasattr(column,"getAlign"): + str = "<%s colspan=%s align=%s valign=%s>" % (column.getType(), + column.getSpan(), + column.getAlign(), + column.getValign()) + else: + str = "<td colspan=%s>" % column.getSpan() output(str) - #for c in doc.getChildNodes(): - # getattr(self, self.element_types[c.getNodeName()])(c, level, output) for c in column.getChildNodes(): getattr(self, self.element_types[c.getNodeName()])(c, level, output) - output("</td>\n") + if hasattr(column,"getType"): + output("</"+column.getType()+">\n") + else: + output("</td>\n") output("</tr>\n") output("</table>\n") -def html_quote(v, name='(Unknown name)', md={}, - character_entities=( - (('&'), '&'), - (('<'), '<' ), - (('>'), '>' ), - (('\213'), '<' ), - (('\233'), '>' ), - (('"'), '"'))): #" - text=str(v) - for re,name in character_entities: - if find(text, re) >= 0: text=join(split(text,re),name) - return text - diff --git a/wxPython/samples/stxview/StructuredText/HTMLWithImages.py b/wxPython/samples/stxview/StructuredText/HTMLWithImages.py index 4d1e2f2b05..2b25a8891c 100644 --- a/wxPython/samples/stxview/StructuredText/HTMLWithImages.py +++ b/wxPython/samples/stxview/StructuredText/HTMLWithImages.py @@ -109,21 +109,16 @@ class HTMLWithImages(HTMLClass): output('</body>\n') output('</html>\n') - - def image(self, doc, level, output): - output('<img src="%s" alt="%s">' % (doc.href, doc.getNodeValue())) - - def image(self, doc, level, output): if hasattr(doc, 'key'): - output('<a name="%s"></a>\n<img src="%s" alt="%s">' % (doc.key, doc.href, doc.getNodeValue())) - else: - output('<img src="%s" alt="%s">' % (doc.href, doc.getNodeValue())) - + output('<a name="%s"></a>\n' % doc.key) + output('<img src="%s" alt="%s">\n' % (doc.href, doc.getNodeValue())) + if doc.getNodeValue() and hasattr(doc, 'key'): + output('<p><b>Figure %s</b> %s</p>\n' % (doc.key, doc.getNodeValue())) def xref(self, doc, level, output): val = doc.getNodeValue() - output('<a href="#%s">%s</a>' % (val, val) ) + output('<a href="#%s">Figure %s</a>' % (val, val) ) diff --git a/wxPython/samples/stxview/StructuredText/MML.py b/wxPython/samples/stxview/StructuredText/MML.py deleted file mode 100644 index 515bd3272c..0000000000 --- a/wxPython/samples/stxview/StructuredText/MML.py +++ /dev/null @@ -1,170 +0,0 @@ -############################################################################## -# -# Zope Public License (ZPL) Version 1.0 -# ------------------------------------- -# -# Copyright (c) Digital Creations. All rights reserved. -# -# This license has been certified as Open Source(tm). -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions in source code must retain the above copyright -# notice, this list of conditions, and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions, and the following disclaimer in -# the documentation and/or other materials provided with the -# distribution. -# -# 3. Digital Creations requests that attribution be given to Zope -# in any manner possible. Zope includes a "Powered by Zope" -# button that is installed by default. While it is not a license -# violation to remove this button, it is requested that the -# attribution remain. A significant investment has been put -# into Zope, and this effort will continue if the Zope community -# continues to grow. This is one way to assure that growth. -# -# 4. All advertising materials and documentation mentioning -# features derived from or use of this software must display -# the following acknowledgement: -# -# "This product includes software developed by Digital Creations -# for use in the Z Object Publishing Environment -# (http://www.zope.org/)." -# -# In the event that the product being advertised includes an -# intact Zope distribution (with copyright and license included) -# then this clause is waived. -# -# 5. Names associated with Zope or Digital Creations must not be used to -# endorse or promote products derived from this software without -# prior written permission from Digital Creations. -# -# 6. Modified redistributions of any form whatsoever must retain -# the following acknowledgment: -# -# "This product includes software developed by Digital Creations -# for use in the Z Object Publishing Environment -# (http://www.zope.org/)." -# -# Intact (re-)distributions of any official Zope release do not -# require an external acknowledgement. -# -# 7. Modifications are encouraged but must be packaged separately as -# patches to official Zope releases. Distributions that do not -# clearly separate the patches from the original work must be clearly -# labeled as unofficial distributions. Modifications which do not -# carry the name Zope may be packaged in any form, as long as they -# conform to all of the clauses above. -# -# -# Disclaimer -# -# THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY -# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DIGITAL CREATIONS OR ITS -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF -# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT -# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -# SUCH DAMAGE. -# -# -# This software consists of contributions made by Digital Creations and -# many individuals on behalf of Digital Creations. Specific -# attributions are listed in the accompanying credits file. -# -############################################################################## -''' -$Id$''' - -from StructuredText import * # :-) - -def ctag(s): - # Blech, wish we could use character tags - if s is None: s='' - s=gsub(strong,'\\1<bold>\\2<plain>\\3',s) - s=gsub(code, '\\1<family Courier>\\2<family Times>\\3',s) - s=gsub(em, '\\1<italic>\\2<plain>\\3',s) - return join(map(strip,split(s,'\n')),'\n') - -class MML(StructuredText): - - '''\ - An MML structured text formatter. - '''\ - - def __str__(self, - ): - '''\ - Return an HTML string representation of the structured text data. - - ''' - s=self._str(self.structure,self.level) - return s - - def ul(self, before, p, after): - return ("%s\n\n<Bulleted>\n%s%s" - % (before, ctag(p), after)) - - def ol(self, before, p, after): - return ("%s\n\n<Numbered>\n%s%s" - % (before, ctag(p), after)) - - def dl(self, before, t, d, after): - return ("%s\n\n<Term>\n%s\n\n<Definition>\n%s%s" - % (before,ctag(t),ctag(d),after)) - - def head(self, before, t, level, d): - return ("%s\n\n<Heading%d>\n%s%s" - % (before,level,ctag(t),d)) - - def normal(self,before,p,after): - return "%s\n\n<Body>\n%s%s" % (before, ctag(p), after) - - def pre(self,structure,r=None): - if r is None: r=[''] - for s in structure: - for line in split(s[0],'\n'): - r.append('\n<PRE>') - r.append(line) - self.pre(s[1],r) - return join(r,'\n') - - def _str(self,structure,level): - r='' - for s in structure: - # print s[0],'\n', len(s[1]), '\n\n' - if bullet.match(s[0]) >= 0: - p=bullet.group(1) - r=self.ul(r,p,self._str(s[1],level)) - elif ol.match(s[0]) >= 0: - p=ol.group(3) - r=self.ol(r,p,self._str(s[1],level)) - elif olp.match(s[0]) >= 0: - p=olp.group(1) - r=self.ol(r,p,self._str(s[1],level)) - elif dl.match(s[0]) >= 0: - t,d=dl.group(1,2) - r=self.dl(r,t,d,self._str(s[1],level)) - elif example.search(s[0]) >= 0 and s[1]: - # Introduce an example, using pre tags: - r=self.normal(r,s[0],self.pre(s[1])) - elif s[0][-2:]=='::' and s[1]: - # Introduce an example, using pre tags: - r=self.normal(r,s[0][:-1],self.pre(s[1])) - elif nl.search(s[0]) < 0 and s[1] and s[0][-1:] != ':': - # Treat as a heading - t=s[0] - r=self.head(r,t,level, - self._str(s[1],level and level+1)) - else: - r=self.normal(r,s[0],self._str(s[1],level)) - return r diff --git a/wxPython/samples/stxview/StructuredText/ST.py b/wxPython/samples/stxview/StructuredText/ST.py index 2e6d0aba1f..3917adcaa8 100644 --- a/wxPython/samples/stxview/StructuredText/ST.py +++ b/wxPython/samples/stxview/StructuredText/ST.py @@ -26,7 +26,9 @@ def insert(struct, top, level): """ #print "struct", struct, top-1 if not top-1 in range(len(struct)): - return None + if struct: + return struct[len(struct)-1].getSubparagraphs() + return struct run = struct[top-1] i = 0 while i+1 < level: @@ -142,8 +144,11 @@ def StructuredText(paragraphs, paragraph_delimiter=re.compile('\n\s*\n')): if result > 0: currentlevel = result currentindent = indent - run = insert(struct,level,currentlevel) - run.append(StructuredTextParagraph(paragraph, indent=indent, level=currentlevel)) + if not level: + struct.append(StructuredTextParagraph(paragraph, indent=indent, level=currentlevel)) + else: + run = insert(struct,level,currentlevel) + run.append(StructuredTextParagraph(paragraph, indent=indent, level=currentlevel)) else: if insert(struct,level,currentlevel): run = insert(struct,level,currentlevel) diff --git a/wxPython/samples/stxview/StructuredText/STDOM.py b/wxPython/samples/stxview/StructuredText/STDOM.py index 1eb2d42731..c38f2fa6f4 100644 --- a/wxPython/samples/stxview/StructuredText/STDOM.py +++ b/wxPython/samples/stxview/StructuredText/STDOM.py @@ -178,7 +178,7 @@ class ParentNode: if not children: return None - n=chidren[0] + n=children[0] if type(n) is st: n=TextNode(n) @@ -554,7 +554,7 @@ class Element(Node): return self.getNodeType() def _get_NodeValue(self, type=type, st=type('')): - return self.GetNodeValue(type,st) + return self.getNodeValue(type,st) def _get_ParentNode(self): return self.getParentNode() diff --git a/wxPython/samples/stxview/StructuredText/STNG.txt b/wxPython/samples/stxview/StructuredText/STNG.txt index 40af179bd1..20c7e6fc82 100644 --- a/wxPython/samples/stxview/StructuredText/STNG.txt +++ b/wxPython/samples/stxview/StructuredText/STNG.txt @@ -18,7 +18,7 @@ Using Structured Text st=StructuredText.Basic(raw) The output of 'StructuredText.Basic' is simply a - StructuredTextDocumemt object containing StructuredTextParagraph + StructuredTextDocument object containing StructuredTextParagraph objects arranged in a hierarchy. Paragraphs are delimited by strings of two or more whitespace characters beginning and ending with newline characters. Hierarchy is indicated by indentation. The diff --git a/wxPython/samples/stxview/StructuredText/STletters.py b/wxPython/samples/stxview/StructuredText/STletters.py new file mode 100644 index 0000000000..5168b01e47 --- /dev/null +++ b/wxPython/samples/stxview/StructuredText/STletters.py @@ -0,0 +1,15 @@ +import string + +try: + del string + import locale + locale.setlocale(locale.LC_ALL,"") +except: + pass + +import string + +letters = string.letters +punctuations = string.punctuation + +lettpunc = letters + punctuations diff --git a/wxPython/samples/stxview/StructuredText/StructuredText.py b/wxPython/samples/stxview/StructuredText/StructuredText.py index a1b3fd03ad..2408f2331c 100644 --- a/wxPython/samples/stxview/StructuredText/StructuredText.py +++ b/wxPython/samples/stxview/StructuredText/StructuredText.py @@ -1,4 +1,3 @@ -#! /usr/bin/env python -- # -*- python -*- ############################################################################## # # Zope Public License (ZPL) Version 1.0 @@ -83,751 +82,67 @@ # attributions are listed in the accompanying credits file. # ############################################################################## -'''Structured Text Manipulation -Parse a structured text string into a form that can be used with -structured formats, like html. +""" Alias module for StructuredTextClassic compatibility which makes +use of StructuredTextNG """ -Structured text is text that uses indentation and simple -symbology to indicate the structure of a document. -A structured string consists of a sequence of paragraphs separated by -one or more blank lines. Each paragraph has a level which is defined -as the minimum indentation of the paragraph. A paragraph is a -sub-paragraph of another paragraph if the other paragraph is the last -preceding paragraph that has a lower level. +import HTMLClass, DocumentClass, ClassicDocumentClass +from ST import Basic -Special symbology is used to indicate special constructs: +import re, string,sys +from STletters import letters -- A single-line paragraph whose immediately succeeding paragraphs are lower - level is treated as a header. +Document = ClassicDocumentClass.DocumentClass() +HTMLNG = HTMLClass.HTMLClass() -- A paragraph that begins with a '-', '*', or 'o' is treated as an - unordered list (bullet) element. +def HTML(aStructuredString, level=0): + st = Basic(aStructuredString) + doc = Document(st) + return HTMLNG(doc) -- A paragraph that begins with a sequence of digits followed by a - white-space character is treated as an ordered list element. +def StructuredText(aStructuredString, level=0): + return HTML(aStructuredString,level) -- A paragraph that begins with a sequence of sequences, where each - sequence is a sequence of digits or a sequence of letters followed - by a period, is treated as an ordered list element. - -- A paragraph with a first line that contains some text, followed by - some white-space and '--' is treated as - a descriptive list element. The leading text is treated as the - element title. - -- Sub-paragraphs of a paragraph that ends in the word 'example' or the - word 'examples', or '::' is treated as example code and is output as is. - -- Text enclosed single quotes (with white-space to the left of the - first quote and whitespace or puctuation to the right of the second quote) - is treated as example code. - -- Text surrounded by '*' characters (with white-space to the left of the - first '*' and whitespace or puctuation to the right of the second '*') - is emphasized. - -- Text surrounded by '**' characters (with white-space to the left of the - first '**' and whitespace or puctuation to the right of the second '**') - is made strong. - -- Text surrounded by '_' underscore characters (with whitespace to the left - and whitespace or punctuation to the right) is made underlined. - -- Text encloded by double quotes followed by a colon, a URL, and concluded - by punctuation plus white space, *or* just white space, is treated as a - hyper link. For example: - - "Zope":http://www.zope.org/ is ... - - Is interpreted as '<a href="http://www.zope.org/">Zope</a> is ....' - Note: This works for relative as well as absolute URLs. - -- Text enclosed by double quotes followed by a comma, one or more spaces, - an absolute URL and concluded by punctuation plus white space, or just - white space, is treated as a hyper link. For example: - - "mail me", mailto:amos@digicool.com. - - Is interpreted as '<a href="mailto:amos@digicool.com">mail me</a>.' - -- Text enclosed in brackets which consists only of letters, digits, - underscores and dashes is treated as hyper links within the document. - For example: - - As demonstrated by Smith [12] this technique is quite effective. - - Is interpreted as '... by Smith <a href="#12">[12]</a> this ...'. Together - with the next rule this allows easy coding of references or end notes. - -- Text enclosed in brackets which is preceded by the start of a line, two - periods and a space is treated as a named link. For example: - - .. [12] "Effective Techniques" Smith, Joe ... - - Is interpreted as '<a name="12">[12]</a> "Effective Techniques" ...'. - Together with the previous rule this allows easy coding of references or - end notes. - - -- A paragraph that has blocks of text enclosed in '||' is treated as a - table. The text blocks correspond to table cells and table rows are - denoted by newlines. By default the cells are center aligned. A cell - can span more than one column by preceding a block of text with an - equivalent number of cell separators '||'. Newlines and '|' cannot - be a part of the cell text. For example: - - |||| **Ingredients** || - || *Name* || *Amount* || - ||Spam||10|| - ||Eggs||3|| - - is interpreted as:: - - <TABLE BORDER=1 CELLPADDING=2> - <TR> - <TD ALIGN=CENTER COLSPAN=2> <strong>Ingredients</strong> </TD> - </TR> - <TR> - <TD ALIGN=CENTER COLSPAN=1> <em>Name</em> </TD> - <TD ALIGN=CENTER COLSPAN=1> <em>Amount</em> </TD> - </TR> - <TR> - <TD ALIGN=CENTER COLSPAN=1>Spam</TD> - <TD ALIGN=CENTER COLSPAN=1>10</TD> - </TR> - <TR> - <TD ALIGN=CENTER COLSPAN=1>Eggs</TD> - <TD ALIGN=CENTER COLSPAN=1>3</TD> - </TR> - </TABLE> - - -$Id$''' -# Copyright -# -# Copyright 1996 Digital Creations, L.C., 910 Princess Anne -# Street, Suite 300, Fredericksburg, Virginia 22401 U.S.A. All -# rights reserved. Copyright in this software is owned by DCLC, -# unless otherwise indicated. Permission to use, copy and -# distribute this software is hereby granted, provided that the -# above copyright notice appear in all copies and that both that -# copyright notice and this permission notice appear. Note that -# any product, process or technology described in this software -# may be the subject of other Intellectual Property rights -# reserved by Digital Creations, L.C. and are not licensed -# hereunder. -# -# Trademarks -# -# Digital Creations & DCLC, are trademarks of Digital Creations, L.C.. -# All other trademarks are owned by their respective companies. -# -# No Warranty -# -# The software is provided "as is" without warranty of any kind, -# either express or implied, including, but not limited to, the -# implied warranties of merchantability, fitness for a particular -# purpose, or non-infringement. This software could include -# technical inaccuracies or typographical errors. Changes are -# periodically made to the software; these changes will be -# incorporated in new editions of the software. DCLC may make -# improvements and/or changes in this software at any time -# without notice. -# -# Limitation Of Liability -# -# In no event will DCLC be liable for direct, indirect, special, -# incidental, economic, cover, or consequential damages arising -# out of the use of or inability to use this software even if -# advised of the possibility of such damages. Some states do not -# allow the exclusion or limitation of implied warranties or -# limitation of liability for incidental or consequential -# damages, so the above limitation or exclusion may not apply to -# you. -# -# -# If you have questions regarding this software, -# contact: -# -# Jim Fulton, jim@digicool.com -# -# (540) 371-6909 -# -# $Log$ -# Revision 1.1 2001/03/10 05:07:20 RD -# Added some simple sample apps -# -# Revision 1.27 2000/04/21 13:38:10 jim -# Added closing list tags. Woo hoo! -# -# Revision 1.26 2000/03/14 17:22:04 brian -# Allow ~ in hrefs. -# -# Revision 1.25 2000/02/17 00:53:24 klm -# HTML._str(): We were getting preformatted examples rendered twice, -# second time without preformatting. Problem was a missing 'continue' -# in one of the cases. -# -# Revision 1.24 1999/12/13 16:32:48 klm -# Incorporated pavlos christoforou's mods to handle simple tables. From -# his web page at http://www.zope.org/Members/gaaros/StructuredText: -# -# Structured Text module with table support -# -# A paragraph that has blocks of text enclosed in '||' is treated as a -# table. The text blocks correspond to table cells and table rows are -# denoted by newlines. By default the cells are center aligned. You can -# change the defaults by modifying the CELL,ROW and TABLE class -# attributes in class Table. A cell can span more than one column by -# preceding a block of text with an equivalent number of cell separators -# '||'. Newlines and '|' cannot be a part of the cell text. If you need -# newlines use <BR>. For example: -# -# |||| **Ingredients** || -# || *Name* || *Amount* || -# ||Spam||10|| -# ||Eggs||3|| -# -# Revision 1.23 1999/08/03 20:49:05 jim -# Fixed to allow list elements to introduce examples. -# -# Restructured _str using continue to avoid excessive nesting. -# -# Revision 1.22 1999/08/02 22:01:28 jim -# Fixed a bunch of bugs introduced by making ts_regex actually thread -# safe. -# -# Also localized a bunch of regular expressions -# using "static" variables (aka always default arguments). -# -# Revision 1.21 1999/08/02 13:26:52 jim -# paragraph_divider needs to be a regular (thread-unsafe) regex -# since it gets passed to ts_regex.split, which is thread-safe -# and wants to use regs. -# -# Revision 1.20 1999/07/21 13:33:59 jim -# untabified. -# -# Revision 1.19 1999/07/15 16:43:15 jim -# Checked in Scott Robertson's thread-safety fixes. -# -# Revision 1.18 1999/03/24 00:03:18 klm -# Provide for relative links, eg <a href="file_in_same_dir">whatever</a>, -# as: -# -# "whatever", :file_in_same_dir -# -# or -# -# "whatever"::file_in_same_dir -# -# .__init__(): relax the second gsub, using a '*' instead of a '+', so -# the stuff before the ':' can be missing, and also do postprocessing so -# any resulting '<a href=":file_in_same_dir">'s have the superfluous ':' -# removed. *Seems* good! -# -# Revision 1.17 1999/03/12 23:21:39 klm -# Gratuituous checkin to test my cvs *update* logging hook. -# -# Revision 1.16 1999/03/12 17:12:12 klm -# Added support for underlined elements, in the obvious way (and -# included an entry in the module docstring for it). -# -# Added an entry in the module docstring describing what i *guess* is -# the criterion for identifying header elements. (I'm going to have to -# delve into and understand the framework a bit better before *knowing* -# this is the case.) -# -# Revision 1.15 1999/03/11 22:40:18 klm -# Handle links that include '#' named links. -# -# Revision 1.14 1999/03/11 01:35:19 klm -# Fixed a small typo, and refined the module docstring link example, in -# order to do a checkin to exercise the CVS repository mirroring. Might -# as well include my last checkin message, with some substantial stuff: -# -# Links are now recognized whether or not the candidate strings are -# terminated with punctuation before the trailing whitespace. The old -# form - trailing punctuation then whitespace - is preserved, but the -# punctuation is now unnecessary. -# -# The regular expressions are a bit more complicated, but i've factored -# out the common parts and but them in variables with suggestive names, -# which may make them easier to understand. -# -# Revision 1.13 1999/03/11 00:49:57 klm -# Links are now recognized whether or not the candidate strings are -# terminated with punctuation before the trailing whitespace. The old -# form - trailing punctuation then whitespace - is preserved, but the -# punctuation is now unnecessary. -# -# The regular expressions are a bit more complicated, but i've factored -# out the common parts and but them in variables with suggestive names, -# which may make them easier to understand. -# -# Revision 1.12 1999/03/10 00:15:46 klm -# Committing with version 1.0 of the license. -# -# Revision 1.11 1999/02/08 18:13:12 klm -# Trival checkin (spelling fix "preceedeing" -> "preceding" and similar) -# to see what pitfalls my environment presents to accomplishing a -# successful checkin. (It turns out that i can't do it from aldous because -# the new version of cvs doesn't support the '-t' and '-f' options in the -# cvswrappers file...) -# -# Revision 1.10 1998/12/29 22:30:43 amos -# Improved doc string to describe hyper link and references capabilities. -# -# Revision 1.9 1998/12/04 20:15:31 jim -# Detabification and new copyright. -# -# Revision 1.8 1998/02/27 18:45:22 jim -# Various updates, including new indentation utilities. -# -# Revision 1.7 1997/12/12 15:39:54 jim -# Added level as argument for html_with_references. -# -# Revision 1.6 1997/12/12 15:27:25 jim -# Added additional pattern matching for HTML references. -# -# Revision 1.5 1997/03/08 16:01:03 jim -# Moved code to recognize: "foo bar", url. -# into object initializer, so it gets applied in all cases. -# -# Revision 1.4 1997/02/17 23:36:35 jim -# Added support for "foo title", http:/foohost/foo -# -# Revision 1.3 1996/12/06 15:57:37 jim -# Fixed bugs in character tags. -# -# Added -t command-line option to generate title if: -# -# - The first paragraph is one line (i.e. a heading) and -# -# - All other paragraphs are indented. -# -# Revision 1.2 1996/10/28 13:56:02 jim -# Fixed bug in ordered lists. -# Added option for either HTML-style headings or descriptive-list style -# headings. -# -# Revision 1.1 1996/10/23 14:00:45 jim -# *** empty log message *** -# -# -# - -import ts_regex, regex -from ts_regex import gsub -from string import split, join, strip, find - -def untabify(aString, - indent_tab=ts_regex.compile('\(\n\|^\)\( *\)\t').search_group, - ): - '''\ - Convert indentation tabs to spaces. - ''' - result='' - rest=aString - while 1: - ts_results = indent_tab(rest, (1,2)) - if ts_results: - start, grps = ts_results - lnl=len(grps[0]) - indent=len(grps[1]) - result=result+rest[:start] - rest="\n%s%s" % (' ' * ((indent/8+1)*8), - rest[start+indent+1+lnl:]) - else: - return result+rest - -def indent(aString, indent=2): - """Indent a string the given number of spaces""" - r=split(untabify(aString),'\n') - if not r: return '' - if not r[-1]: del r[-1] - tab=' '*level - return "%s%s\n" % (tab,join(r,'\n'+tab)) - -def reindent(aString, indent=2, already_untabified=0): - "reindent a block of text, so that the minimum indent is as given" - - if not already_untabified: aString=untabify(aString) - - l=indent_level(aString)[0] - if indent==l: return aString - - r=[] - - append=r.append - - if indent > l: - tab=' ' * (indent-l) - for s in split(aString,'\n'): append(tab+s) - else: - l=l-indent - for s in split(aString,'\n'): append(s[l:]) - - return join(r,'\n') - -def indent_level(aString, - indent_space=ts_regex.compile('\n\( *\)').search_group, - ): - '''\ - Find the minimum indentation for a string, not counting blank lines. - ''' - start=0 - text='\n'+aString - indent=l=len(text) - while 1: - - ts_results = indent_space(text, (1,2), start) - if ts_results: - start, grps = ts_results - i=len(grps[0]) - start=start+i+1 - if start < l and text[start] != '\n': # Skip blank lines - if not i: return (0,aString) - if i < indent: indent = i - else: - return (indent,aString) - -def paragraphs(list,start): - l=len(list) - level=list[start][0] - i=start+1 - while i < l and list[i][0] > level: i=i+1 - return i-1-start - -def structure(list): - if not list: return [] - i=0 - l=len(list) - r=[] - while i < l: - sublen=paragraphs(list,i) - i=i+1 - r.append((list[i-1][1],structure(list[i:i+sublen]))) - i=i+sublen - return r - - -class Table: - CELL=' <TD ALIGN=CENTER COLSPAN=%i>%s</TD>\n' - ROW=' <TR>\n%s </TR>\n' - TABLE='\n<TABLE BORDER=1 CELLPADDING=2>\n%s</TABLE>' - - def create(self,aPar,td=ts_regex.compile( - '[ \t\n]*||\([^\0|]*\)').match_group): - '''parses a table and returns nested list representing the - table''' - self.table=[] - text=filter(None,split(aPar,'\n')) - for line in text: - row=[] - while 1: - pos=td(line,(1,)) - if not pos:return 0 - row.append(pos[1]) - if pos[0]==len(line):break - line=line[pos[0]:] - self.table.append(row) - return 1 - - def html(self): - '''Creates an HTML representation of table''' - htmltable=[] - for row in self.table: - htmlrow=[] - colspan=1 - for cell in row: - if cell=='': - colspan=colspan+1 - continue - else: - htmlrow.append(self.CELL%(colspan,cell)) - colspan=1 - htmltable.append(self.ROW%join(htmlrow,'')) - return self.TABLE%join(htmltable,'') - -optional_trailing_punctuation = '\(,\|\([.:?;]\)\)?' -trailing_space = '\([\0- ]\)' -not_punctuation_or_whitespace = "[^-,.?:\0- ]" -table=Table() - -class StructuredText: - - """Model text as structured collection of paragraphs. - - Structure is implied by the indentation level. - - This class is intended as a base classes that do actual text - output formatting. - """ - - def __init__(self, aStructuredString, level=0, - paragraph_divider=regex.compile('\(\n *\)+\n'), - ): - '''Convert a structured text string into a structured text object. - - Aguments: - - aStructuredString -- The string to be parsed. - level -- The level of top level headings to be created. - ''' - - aStructuredString = gsub( - '\"\([^\"\0]+\)\":' # title: <"text":> - + ('\([-:a-zA-Z0-9_,./?=@#~]+%s\)' - % not_punctuation_or_whitespace) - + optional_trailing_punctuation - + trailing_space, - '<a href="\\2">\\1</a>\\4\\5\\6', - aStructuredString) - - aStructuredString = gsub( - '\"\([^\"\0]+\)\",[\0- ]+' # title: <"text", > - + ('\([a-zA-Z]*:[-:a-zA-Z0-9_,./?=@#~]*%s\)' - % not_punctuation_or_whitespace) - + optional_trailing_punctuation - + trailing_space, - '<a href="\\2">\\1</a>\\4\\5\\6', - aStructuredString) - - protoless = find(aStructuredString, '<a href=":') - if protoless != -1: - aStructuredString = gsub('<a href=":', '<a href="', - aStructuredString) - - self.level=level - paragraphs=ts_regex.split(untabify(aStructuredString), - paragraph_divider) - paragraphs=map(indent_level,paragraphs) - - self.structure=structure(paragraphs) - - - def __str__(self): - return str(self.structure) - - -ctag_prefix="\([\0- (]\|^\)" -ctag_suffix="\([\0- ,.:;!?)]\|$\)" -ctag_middle="[%s]\([^\0- %s][^%s]*[^\0- %s]\|[^%s]\)[%s]" -ctag_middl2="[%s][%s]\([^\0- %s][^%s]*[^\0- %s]\|[^%s]\)[%s][%s]" - -def ctag(s, - em=regex.compile( - ctag_prefix+(ctag_middle % (("*",)*6) )+ctag_suffix), - strong=regex.compile( - ctag_prefix+(ctag_middl2 % (("*",)*8))+ctag_suffix), - under=regex.compile( - ctag_prefix+(ctag_middle % (("_",)*6) )+ctag_suffix), - code=regex.compile( - ctag_prefix+(ctag_middle % (("\'",)*6))+ctag_suffix), - ): - if s is None: s='' - s=gsub(strong,'\\1<strong>\\2</strong>\\3',s) - s=gsub(under, '\\1<u>\\2</u>\\3',s) - s=gsub(code, '\\1<code>\\2</code>\\3',s) - s=gsub(em, '\\1<em>\\2</em>\\3',s) - return s - -class HTML(StructuredText): - - '''\ - An HTML structured text formatter. - '''\ - - def __str__(self, - extra_dl=regex.compile("</dl>\n<dl>"), - extra_ul=regex.compile("</ul>\n<ul>"), - extra_ol=regex.compile("</ol>\n<ol>"), - ): - '''\ - Return an HTML string representation of the structured text data. - - ''' - s=self._str(self.structure,self.level) - s=gsub(extra_dl,'\n',s) - s=gsub(extra_ul,'\n',s) - s=gsub(extra_ol,'\n',s) - return s - - def ul(self, before, p, after): - if p: p="<p>%s</p>" % strip(ctag(p)) - return ('%s<ul><li>%s\n%s\n</li></ul>\n' - % (before,p,after)) - - def ol(self, before, p, after): - if p: p="<p>%s</p>" % strip(ctag(p)) - return ('%s<ol><li>%s\n%s\n</li></ol>\n' - % (before,p,after)) - - def dl(self, before, t, d, after): - return ('%s<dl><dt>%s</dt><dd><p>%s</p>\n%s\n</dd></dl>\n' - % (before,ctag(t),ctag(d),after)) - - def head(self, before, t, level, d): - if level > 0 and level < 6: - return ('%s<h%d>%s</h%d>\n%s\n' - % (before,level,strip(ctag(t)),level,d)) - - t="<p><strong>%s</strong><p>" % strip(ctag(t)) - return ('%s<dl><dt>%s\n</dt><dd>%s\n</dd></dl>\n' - % (before,t,d)) - - def normal(self,before,p,after): - return '%s<p>%s</p>\n%s\n' % (before,ctag(p),after) +def html_with_references(text, level=1): + text = re.sub( + r'[\000\n]\.\. \[([0-9_%s-]+)\]' % letters, + r'\n <a name="\1">[\1]</a>', + text) - def pre(self,structure,tagged=0): - if not structure: return '' - if tagged: - r='' - else: - r='<PRE>\n' - for s in structure: - r="%s%s\n\n%s" % (r,html_quote(s[0]),self.pre(s[1],1)) - if not tagged: r=r+'</PRE>\n' - return r - - def table(self,before,table,after): - return '%s<p>%s</p>\n%s\n' % (before,ctag(table),after) + text = re.sub( + r'([\000- ,])\[(?P<ref>[0-9_%s-]+)\]([\000- ,.:])' % letters, + r'\1<a href="#\2">[\2]</a>\3', + text) - def _str(self,structure,level, - # Static - bullet=ts_regex.compile('[ \t\n]*[o*-][ \t\n]+\([^\0]*\)' - ).match_group, - example=ts_regex.compile('[\0- ]examples?:[\0- ]*$' - ).search, - dl=ts_regex.compile('\([^\n]+\)[ \t]+--[ \t\n]+\([^\0]*\)' - ).match_group, - nl=ts_regex.compile('\n').search, - ol=ts_regex.compile( - '[ \t]*\(\([0-9]+\|[a-zA-Z]+\)[.)]\)+[ \t\n]+\([^\0]*\|$\)' - ).match_group, - olp=ts_regex.compile('[ \t]*([0-9]+)[ \t\n]+\([^\0]*\|$\)' - ).match_group, - ): - r='' - for s in structure: - - ts_results = bullet(s[0], (1,)) - if ts_results: - p = ts_results[1] - if s[0][-2:]=='::' and s[1]: ps=self.pre(s[1]) - else: ps=self._str(s[1],level) - r=self.ul(r,p,ps) - continue - ts_results = ol(s[0], (3,)) - if ts_results: - p = ts_results[1] - if s[0][-2:]=='::' and s[1]: ps=self.pre(s[1]) - else: ps=self._str(s[1],level) - r=self.ol(r,p,ps) - continue - ts_results = olp(s[0], (1,)) - if ts_results: - p = ts_results[1] - if s[0][-2:]=='::' and s[1]: ps=self.pre(s[1]) - else: ps=self._str(s[1],level) - r=self.ol(r,p,ps) - continue - ts_results = dl(s[0], (1,2)) - if ts_results: - t,d = ts_results[1] - r=self.dl(r,t,d,self._str(s[1],level)) - continue - if example(s[0]) >= 0 and s[1]: - # Introduce an example, using pre tags: - r=self.normal(r,s[0],self.pre(s[1])) - continue - if s[0][-2:]=='::' and s[1]: - # Introduce an example, using pre tags: - r=self.normal(r,s[0][:-1],self.pre(s[1])) - continue - if table.create(s[0]): - ## table support. - r=self.table(r,table.html(),self._str(s[1],level)) - continue - else: + text = re.sub( + r'([\000- ,])\[([^]]+)\.html\]([\000- ,.:])', + r'\1<a href="\2.html">[\2]</a>\3', + text) - if nl(s[0]) < 0 and s[1] and s[0][-1:] != ':': - # Treat as a heading - t=s[0] - r=self.head(r,t,level, - self._str(s[1],level and level+1)) - else: - r=self.normal(r,s[0],self._str(s[1],level)) - return r - + return HTML(text,level=level) def html_quote(v, character_entities=( - (regex.compile('&'), '&'), - (regex.compile("<"), '<' ), - (regex.compile(">"), '>' ), - (regex.compile('"'), '"') + (re.compile('&'), '&'), + (re.compile("<"), '<' ), + (re.compile(">"), '>' ), + (re.compile('"'), '"') )): #" text=str(v) for re,name in character_entities: - text=gsub(re,name,text) + text=re.sub(name,text) return text -def html_with_references(text, level=1): - text = gsub( - '[\0\n].. \[\([-_0-9_a-zA-Z-]+\)\]', - '\n <a name="\\1">[\\1]</a>', - text) - - text = gsub( - '\([\0- ,]\)\[\([0-9_a-zA-Z-]+\)\]\([\0- ,.:]\)', - '\\1<a href="#\\2">[\\2]</a>\\3', - text) - - text = gsub( - '\([\0- ,]\)\[\([^]]+\)\.html\]\([\0- ,.:]\)', - '\\1<a href="\\2.html">[\\2]</a>\\3', - text) - - return HTML(text,level=level) - - -def main(): - import sys, getopt - - opts,args=getopt.getopt(sys.argv[1:],'tw') - - if args: - [infile]=args - s=open(infile,'r').read() - else: - s=sys.stdin.read() - if opts: +if __name__=='__main__': + import getopt - if filter(lambda o: o[0]=='-w', opts): - print 'Content-Type: text/html\n' + opts,args = getopt.getopt(sys.argv[1:],'',[]) - if s[:2]=='#!': - s=ts_regex.sub('^#![^\n]+','',s) + for k,v in opts: + pass - r=ts_regex.compile('\([\0-\n]*\n\)') - ts_results = r.match_group(s, (1,)) - if ts_results: - s=s[len(ts_results[1]):] - s=str(html_with_references(s)) - if s[:4]=='<h1>': - t=s[4:find(s,'</h1>')] - s='''<html><head><title>%s</title> - </head><body> - %s - </body></html> - ''' % (t,s) - print s - else: - print html_with_references(s) -if __name__=="__main__": main() + for f in args: + print HTML(open(f).read()) diff --git a/wxPython/samples/stxview/StructuredText/__init__.py b/wxPython/samples/stxview/StructuredText/__init__.py index 65e6f75b0f..a5c1e5b047 100644 --- a/wxPython/samples/stxview/StructuredText/__init__.py +++ b/wxPython/samples/stxview/StructuredText/__init__.py @@ -104,7 +104,7 @@ Document=DocumentClass.DocumentClass() DocumentWithImages=DocumentWithImages.DocumentWithImages() HTMLWithImages=HTMLWithImages.HTMLWithImages() -DocBookBook=DocBookClass.DocBookBook +DocBookBook=DocBookClass.DocBookBook() DocBookChapter=DocBookClass.DocBookChapter() DocBookChapterWithFigures=DocBookClass.DocBookChapterWithFigures() DocBookArticle=DocBookClass.DocBookArticle() diff --git a/wxPython/samples/stxview/StructuredText/ts_regex.py b/wxPython/samples/stxview/StructuredText/ts_regex.py deleted file mode 100644 index 1471eb2449..0000000000 --- a/wxPython/samples/stxview/StructuredText/ts_regex.py +++ /dev/null @@ -1,215 +0,0 @@ -############################################################################## -# -# Zope Public License (ZPL) Version 1.0 -# ------------------------------------- -# -# Copyright (c) Digital Creations. All rights reserved. -# -# This license has been certified as Open Source(tm). -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions in source code must retain the above copyright -# notice, this list of conditions, and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions, and the following disclaimer in -# the documentation and/or other materials provided with the -# distribution. -# -# 3. Digital Creations requests that attribution be given to Zope -# in any manner possible. Zope includes a "Powered by Zope" -# button that is installed by default. While it is not a license -# violation to remove this button, it is requested that the -# attribution remain. A significant investment has been put -# into Zope, and this effort will continue if the Zope community -# continues to grow. This is one way to assure that growth. -# -# 4. All advertising materials and documentation mentioning -# features derived from or use of this software must display -# the following acknowledgement: -# -# "This product includes software developed by Digital Creations -# for use in the Z Object Publishing Environment -# (http://www.zope.org/)." -# -# In the event that the product being advertised includes an -# intact Zope distribution (with copyright and license included) -# then this clause is waived. -# -# 5. Names associated with Zope or Digital Creations must not be used to -# endorse or promote products derived from this software without -# prior written permission from Digital Creations. -# -# 6. Modified redistributions of any form whatsoever must retain -# the following acknowledgment: -# -# "This product includes software developed by Digital Creations -# for use in the Z Object Publishing Environment -# (http://www.zope.org/)." -# -# Intact (re-)distributions of any official Zope release do not -# require an external acknowledgement. -# -# 7. Modifications are encouraged but must be packaged separately as -# patches to official Zope releases. Distributions that do not -# clearly separate the patches from the original work must be clearly -# labeled as unofficial distributions. Modifications which do not -# carry the name Zope may be packaged in any form, as long as they -# conform to all of the clauses above. -# -# -# Disclaimer -# -# THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY -# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DIGITAL CREATIONS OR ITS -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF -# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT -# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -# SUCH DAMAGE. -# -# -# This software consists of contributions made by Digital Creations and -# many individuals on behalf of Digital Creations. Specific -# attributions are listed in the accompanying credits file. -# -############################################################################## -"""Provide a thread-safe interface to regex -""" -import regex, regsub #, Sync -from regex import * -from regsub import split, sub, gsub, splitx, capwords - -try: - import thread -except: - class allocate_lock: - def acquire(*args): pass - def release(*args): pass - -else: - class SafeFunction: - _l=thread.allocate_lock() - _a=_l.acquire - _r=_l.release - - def __init__(self, f): - self._f=f - - def __call__(self, *args, **kw): - self._a() - try: return apply(self._f, args, kw) - finally: self._r() - - split=SafeFunction(split) - sub=SafeFunction(sub) - gsub=SafeFunction(gsub) - splitx=SafeFunction(splitx) - capwords=SafeFunction(capwords) - - allocate_lock=thread.allocate_lock - -class compile: - - _r=None - groupindex=None - - def __init__(self, *args): - self._r=r=apply(regex.compile,args) - self._init(r) - - def _init(self, r): - lock=allocate_lock() - self.__a=lock.acquire - self.__r=lock.release - self.translate=r.translate - self.givenpat=r.givenpat - self.realpat=r.realpat - - def match(self, string, pos=0): - self.__a() - try: return self._r.match(string, pos) - finally: self.__r() - - def search(self, string, pos=0): - self.__a() - try: return self._r.search(string, pos) - finally: self.__r() - - def search_group(self, str, group, pos=0): - """Search a string for a pattern. - - If the pattern was not found, then None is returned, - otherwise, the location where the pattern was found, - as well as any specified group are returned. - """ - self.__a() - try: - r=self._r - l=r.search(str, pos) - if l < 0: return None - return l, apply(r.group, group) - finally: self.__r() - - def match_group(self, str, group, pos=0): - """Match a pattern against a string - - If the string does not match the pattern, then None is - returned, otherwise, the length of the match, as well - as any specified group are returned. - """ - self.__a() - try: - r=self._r - l=r.match(str, pos) - if l < 0: return None - return l, apply(r.group, group) - finally: self.__r() - - def search_regs(self, str, pos=0): - """Search a string for a pattern. - - If the pattern was not found, then None is returned, - otherwise, the 'regs' attribute of the expression is - returned. - """ - self.__a() - try: - r=self._r - r.search(str, pos) - return r.regs - finally: self.__r() - - def match_regs(self, str, pos=0): - """Match a pattern against a string - - If the string does not match the pattern, then None is - returned, otherwise, the 'regs' attribute of the expression is - returned. - """ - self.__a() - try: - r=self._r - r.match(str, pos) - return r.regs - finally: self.__r() - -class symcomp(compile): - - def __init__(self, *args): - self._r=r=apply(regex.symcomp,args) - self._init(r) - self.groupindex=r.groupindex - - - - - diff --git a/wxPython/samples/stxview/stxview.py b/wxPython/samples/stxview/stxview.py index 4599f9749e..e452855f83 100644 --- a/wxPython/samples/stxview/stxview.py +++ b/wxPython/samples/stxview/stxview.py @@ -102,12 +102,12 @@ class StxFrame(wxFrame): def LoadStxText(self, text): # Old ST - html = str(StructuredText.html_with_references(text)) + #html = str(StructuredText.html_with_references(text)) # NG Version - #st = StructuredText.Basic(text) - #doc = StructuredText.Document(st) - #html = StructuredText.HTML(doc) + st = StructuredText.Basic(text) + doc = StructuredText.Document(st) + html = StructuredText.HTMLNG(doc) self.htmlWin.SetPage(html) self.editWin.SetValue(text) -- 2.49.0