]> git.saurik.com Git - wxWidgets.git/commitdiff
Got a new version of StructuredText from Zope's CVS.
authorRobin Dunn <robin@alldunn.com>
Fri, 4 May 2001 18:28:27 +0000 (18:28 +0000)
committerRobin Dunn <robin@alldunn.com>
Fri, 4 May 2001 18:28:27 +0000 (18:28 +0000)
git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@9995 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775

15 files changed:
wxPython/samples/stxview/StructuredText/ClassicDocumentClass.py
wxPython/samples/stxview/StructuredText/ClassicStructuredText.py [new file with mode: 0644]
wxPython/samples/stxview/StructuredText/DocBookClass.py
wxPython/samples/stxview/StructuredText/DocumentClass.py
wxPython/samples/stxview/StructuredText/HTMLClass.py
wxPython/samples/stxview/StructuredText/HTMLWithImages.py
wxPython/samples/stxview/StructuredText/MML.py [deleted file]
wxPython/samples/stxview/StructuredText/ST.py
wxPython/samples/stxview/StructuredText/STDOM.py
wxPython/samples/stxview/StructuredText/STNG.txt
wxPython/samples/stxview/StructuredText/STletters.py [new file with mode: 0644]
wxPython/samples/stxview/StructuredText/StructuredText.py
wxPython/samples/stxview/StructuredText/__init__.py
wxPython/samples/stxview/StructuredText/ts_regex.py [deleted file]
wxPython/samples/stxview/stxview.py

index 23b73d6294a5eae5802c8a2cb880ac85020d9bfd..69fc9c81bbb744f51ef91aa93588dd8bc9354c48 100644 (file)
@@ -1,24 +1,24 @@
 ##############################################################################
-# 
+#
 # Zope Public License (ZPL) Version 1.0
 # -------------------------------------
-# 
+#
 # Copyright (c) Digital Creations.  All rights reserved.
-# 
+#
 # This license has been certified as Open Source(tm).
-# 
+#
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
 # met:
-# 
+#
 # 1. Redistributions in source code must retain the above copyright
 #    notice, this list of conditions, and the following disclaimer.
-# 
+#
 # 2. Redistributions in binary form must reproduce the above copyright
 #    notice, this list of conditions, and the following disclaimer in
 #    the documentation and/or other materials provided with the
 #    distribution.
-# 
+#
 # 3. Digital Creations requests that attribution be given to Zope
 #    in any manner possible. Zope includes a "Powered by Zope"
 #    button that is installed by default. While it is not a license
 #    attribution remain. A significant investment has been put
 #    into Zope, and this effort will continue if the Zope community
 #    continues to grow. This is one way to assure that growth.
-# 
+#
 # 4. All advertising materials and documentation mentioning
 #    features derived from or use of this software must display
 #    the following acknowledgement:
-# 
+#
 #       "This product includes software developed by Digital Creations
 #       for use in the Z Object Publishing Environment
 #       (http://www.zope.org/)."
-# 
+#
 #    In the event that the product being advertised includes an
 #    intact Zope distribution (with copyright and license included)
 #    then this clause is waived.
-# 
+#
 # 5. Names associated with Zope or Digital Creations must not be used to
 #    endorse or promote products derived from this software without
 #    prior written permission from Digital Creations.
-# 
+#
 # 6. Modified redistributions of any form whatsoever must retain
 #    the following acknowledgment:
-# 
+#
 #       "This product includes software developed by Digital Creations
 #       for use in the Z Object Publishing Environment
 #       (http://www.zope.org/)."
-# 
+#
 #    Intact (re-)distributions of any official Zope release do not
 #    require an external acknowledgement.
-# 
+#
 # 7. Modifications are encouraged but must be packaged separately as
 #    patches to official Zope releases.  Distributions that do not
 #    clearly separate the patches from the original work must be clearly
 #    labeled as unofficial distributions.  Modifications which do not
 #    carry the name Zope may be packaged in any form, as long as they
 #    conform to all of the clauses above.
-# 
-# 
+#
+#
 # Disclaimer
-# 
+#
 #    THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
 #    EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 #    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 #    OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 #    OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 #    SUCH DAMAGE.
-# 
-# 
+#
+#
 # This software consists of contributions made by Digital Creations and
 # many individuals on behalf of Digital Creations.  Specific
 # attributions are listed in the accompanying credits file.
-# 
+#
 ##############################################################################
 
 import re, ST, STDOM
 from string import split, join, replace, expandtabs, strip, find
+from STletters import letters,lettpunc,punctuations
 
 StringType=type('')
 ListType=type([])
@@ -116,7 +117,7 @@ class StructuredTextDescriptionBody(ST.StructuredTextParagraph):
 
 class StructuredTextDescription(ST.StructuredTextParagraph):
     """Represents a section of a document with a title and a body"""
-    
+
     def __init__(self, title, src, subs, **kw):
        apply(ST.StructuredTextParagraph.__init__, (self, src, subs), kw)
        self._title=title
@@ -138,6 +139,12 @@ class StructuredTextSection(ST.StructuredTextParagraph):
              (self, StructuredTextSectionTitle(src), subs),
              kw)
 
+    def getColorizableTexts(self):
+        return self._src.getColorizableTexts()
+
+    def setColorizableTexts(self,src):
+        self._src.setColorizableTexts(src)
+
 # a StructuredTextTable holds StructuredTextRows
 class StructuredTextTable(ST.StructuredTextDocument):
     """
@@ -146,27 +153,27 @@ class StructuredTextTable(ST.StructuredTextDocument):
     EX
     rows = [[('row 1:column1',1)],[('row2:column1',1)]]
     """
-    
+
     def __init__(self, rows, src, subs, **kw):
         apply(ST.StructuredTextDocument.__init__,(self,subs),kw)
         self._rows = []
         for row in rows:
             if row:
                 self._rows.append(StructuredTextRow(row,kw))
-    
+
     def getRows(self):
         return [self._rows]
-    
+
     def _getRows(self):
         return self.getRows()
-    
+
     def getColorizableTexts(self):
         """
         return a tuple where each item is a column/cell's
         contents. The tuple, result, will be of this format.
         ("r1 col1", "r1=col2", "r2 col1", "r2 col2")
         """
-        
+
         #result = ()
         result = []
         for row in self._rows:
@@ -174,7 +181,7 @@ class StructuredTextTable(ST.StructuredTextDocument):
                 #result = result[:] + (column.getColorizableTexts(),)
                 result.append(column.getColorizableTexts()[0])
         return result
-    
+
     def setColorizableTexts(self,texts):
         """
         texts is going to a tuple where each item is the
@@ -186,35 +193,35 @@ class StructuredTextTable(ST.StructuredTextDocument):
             for column_index in range(len(self._rows[row_index]._columns)):
                 self._rows[row_index]._columns[column_index].setColorizableTexts((texts[0],))
                 texts = texts[1:]
-        
+
     def _getColorizableTexts(self):
         return self.getColorizableTexts()
-    
+
     def _setColorizableTexts(self):
         return self.setColorizableTexts()
-    
+
 # StructuredTextRow holds StructuredTextColumns
 class StructuredTextRow(ST.StructuredTextDocument):
-    
+
     def __init__(self,row,kw):
         """
         row is a list of tuples, where each tuple is
         the raw text for a cell/column and the span
-        of that cell/column". 
-        EX 
+        of that cell/column".
+        EX
         [('this is column one',1), ('this is column two',1)]
         """
-        
+
         apply(ST.StructuredTextDocument.__init__,(self,[]),kw)
         self._columns = []
-        for column in row:            
+        for column in row:
             self._columns.append(StructuredTextColumn(column[0],column[1],kw))
     def getColumns(self):
         return [self._columns]
 
     def _getColumns(self):
         return [self._columns]
-    
+
 # this holds the raw text of a table cell
 class StructuredTextColumn(ST.StructuredTextParagraph):
     """
@@ -223,19 +230,19 @@ class StructuredTextColumn(ST.StructuredTextParagraph):
     thus a StructuredTextParagraph. A StructuredTextColumn
     also holds the span of its column
     """
-    
+
     def __init__(self,text,span,kw):
         apply(ST.StructuredTextParagraph.__init__,(self,text,[]),kw)
         self._span = span
-    
+
     def getSpan(self):
         return self._span
-    
+
     def _getSpan(self):
         return self._span
-    
+
 class StructuredTextMarkup(STDOM.Element):
-    
+
     def __init__(self, v, **kw):
        self._value=v
        self._attributes=kw.keys()
@@ -269,7 +276,7 @@ class StructuredTextUnderline(StructuredTextMarkup): pass
 class StructuredTextLink(StructuredTextMarkup):
     "A simple hyperlink"
 
-class DocumentClass:    
+class DocumentClass:
     """
     Class instance calls [ex.=> x()] require a structured text
     structure. Doc will then parse each paragraph in the structure
@@ -316,10 +323,10 @@ class DocumentClass:
        """
        Parse accepts a raw_string, an expr to test the raw_string,
        and the raw_string's subparagraphs.
-       
-       Parse will continue to search through raw_string until 
-       all instances of expr in raw_string are found. 
-       
+
+       Parse will continue to search through raw_string until
+       all instances of expr in raw_string are found.
+
        If no instances of expr are found, raw_string is returned.
        Otherwise a list of substrings and instances is returned
        """
@@ -351,10 +358,10 @@ class DocumentClass:
              raw_string = raw_string[end:len(raw_string)]
 
        if not tmp: return raw_string # nothing found
-       
+
        if raw_string: append(raw_string)
        elif len(tmp)==1: return tmp[0]
-       
+
        return tmp
 
 
@@ -386,7 +393,7 @@ class DocumentClass:
              for s in str.getColorizableTexts():
                 color(s, (text_type,))
                 a(s)
-                
+
              str.setColorizableTexts(r)
 
        return str
@@ -396,11 +403,11 @@ class DocumentClass:
                            st=type('')):
        result=[]
        for paragraph in raw_paragraphs:
-          
+
           if paragraph.getNodeName() != 'StructuredTextParagraph':
              result.append(paragraph)
              continue
-          
+
           for pt in self.paragraph_types:
              if type(pt) is st:
                 # grab the corresponding function
@@ -428,19 +435,20 @@ class DocumentClass:
              result.append(paragraph)
 
        return result
-    
+
     def doc_table(self,paragraph, expr = re.compile('(\s*)([||]+)').match):
+        #print "paragraph=>", type(paragraph), paragraph, paragraph._src
         text    = paragraph.getColorizableTexts()[0]
         m       = expr(text)
-        
+
         if not (m):
             return None
         rows = []
-    
+
         # initial split
         for row in split(text,"\n"):
-            rows.append(row)    
-    
+            rows.append(row)
+
         # clean up the rows
         for index in range(len(rows)):
             tmp = []
@@ -458,30 +466,30 @@ class DocumentClass:
         for index in range(len(rows)):
             l = len(rows[index])-1
             rows[index] = rows[index][:l]
-        
+
         result = []
         for row in rows:
             cspan   = 0
             tmp     = []
             for item in row:
                 if item:
-                    tmp.append(item,cspan)
+                    tmp.append((item,cspan))
                     cspan = 0
                 else:
                     cspan = cspan + 1
             result.append(tmp)
-        
+
         subs = paragraph.getSubparagraphs()
         indent=paragraph.indent
         return StructuredTextTable(result,text,subs,indent=paragraph.indent)
-            
+
     def doc_bullet(self, paragraph, expr = re.compile('\s*[-*o]\s+').match):
         top=paragraph.getColorizableTexts()[0]
         m=expr(top)
 
         if not m:
             return None
-            
+
         subs=paragraph.getSubparagraphs()
         if top[-2:]=='::':
            subs=[StructuredTextExample(subs)]
@@ -493,17 +501,17 @@ class DocumentClass:
 
     def doc_numbered(
         self, paragraph,
-        expr = re.compile('(\s*[a-zA-Z]+\.)|(\s*[0-9]+\.)|(\s*[0-9]+\s+)').match):
-        
+        expr = re.compile('(\s*[%s]+\.)|(\s*[0-9]+\.)|(\s*[0-9]+\s+)' % letters).match):
+
         # This is the old expression. It had a nasty habit
         # of grabbing paragraphs that began with a single
         # letter word even if there was no following period.
-        
+
         #expr = re.compile('\s*'
         #                   '(([a-zA-Z]|[0-9]+|[ivxlcdmIVXLCDM]+)\.)*'
         #                   '([a-zA-Z]|[0-9]+|[ivxlcdmIVXLCDM]+)\.?'
         #                   '\s+').match):
-        
+
         top=paragraph.getColorizableTexts()[0]
         m=expr(top)
         if not m: return None
@@ -518,7 +526,7 @@ class DocumentClass:
     def doc_description(
         self, paragraph,
         delim = re.compile('\s+--\s+').search,
-        nb=re.compile(r'[^\0- ]').search,
+        nb=re.compile(r'[^\000- ]').search,
         ):
 
         top=paragraph.getColorizableTexts()[0]
@@ -542,7 +550,7 @@ class DocumentClass:
            delim=d)
 
     def doc_header(self, paragraph,
-                    expr    = re.compile('[ a-zA-Z0-9.:/,-_*<>\?\'\"]+').match
+                    expr    = re.compile('[ %s0-9.:/,-_*<>\?\'\"]+' % letters).match
                     ):
         subs=paragraph.getSubparagraphs()
         if not subs: return None
@@ -562,11 +570,11 @@ class DocumentClass:
     def doc_literal(
         self, s,
         expr=re.compile(
-          "(?:\s|^)'"                                                  # open
+          "(?:\s|^)'"                                               # open
           "([^ \t\n\r\f\v']|[^ \t\n\r\f\v'][^\n']*[^ \t\n\r\f\v'])" # contents
-          "'(?:\s|[,.;:!?]|$)"                                        # close
+          "'(?:\s|[,.;:!?]|$)"                                      # close
           ).search):
-        
+
         r=expr(s)
         if r:
            start, end = r.span(1)
@@ -576,7 +584,7 @@ class DocumentClass:
 
     def doc_emphasize(
         self, s,
-        expr = re.compile('\s*\*([ \na-zA-Z0-9.:/;,\'\"\?]+)\*(?!\*|-)').search
+        expr = re.compile('\s*\*([ \n%s0-9]+)\*(?!\*|-)' % lettpunc).search
         ):
 
         r=expr(s)
@@ -585,12 +593,12 @@ class DocumentClass:
            return (StructuredTextEmphasis(s[start:end]), start-1, end+1)
         else:
            return None
-    
+
     def doc_inner_link(self,
                        s,
                        expr1 = re.compile("\.\.\s*").search,
-                       expr2 = re.compile("\[[a-zA-Z0-9]+\]").search):
-        
+                       expr2 = re.compile("\[[%s0-9]+\]" % letters).search):
+
         # make sure we dont grab a named link
         if expr2(s) and expr1(s):
             start1,end1 = expr1(s).span()
@@ -600,17 +608,17 @@ class DocumentClass:
                 return None
             else:
                 # the .. is somewhere else, ignore it
-                return (StructuredTextInnerLink(s[start2+1,end2-1],start2,end2))
+                return (StructuredTextInnerLink(s[start2+1:end2-1]),start2,end2)
             return None
         elif expr2(s) and not expr1(s):
             start,end = expr2(s).span()
             return (StructuredTextInnerLink(s[start+1:end-1]),start,end)
         return None
-    
+
     def doc_named_link(self,
                        s,
-                       expr=re.compile("(\.\.\s)(\[[a-zA-Z0-9]+\])").search):
-        
+                       expr=re.compile("(\.\.\s)(\[[%s0-9]+\])" % letters).search):
+
         result = expr(s)
         if result:
             start,end   = result.span(2)
@@ -621,11 +629,11 @@ class DocumentClass:
             return (StructuredTextNamedLink(str),st,en)
             #return (StructuredTextNamedLink(s[st:en]),st,en)
         return None
-    
+
     def doc_underline(self,
                       s,
-                      expr=re.compile("\_([a-zA-Z0-9\s\.,\?\/]+)\_").search):
-        
+                      expr=re.compile("\s+\_([0-9%s ]+)\_" % lettpunc).search):
+
         result = expr(s)
         if result:
             start,end = result.span(1)
@@ -633,10 +641,10 @@ class DocumentClass:
             return (StructuredTextUnderline(s[start:end]),st,e)
         else:
             return None
-    
-    def doc_strong(self, 
+
+    def doc_strong(self,
                    s,
-        expr = re.compile('\s*\*\*([ \na-zA-Z0-9.:/;\-,!\?\'\"]+)\*\*').search
+        expr = re.compile('\s*\*\*([ \n%s0-9]+)\*\*' % lettpunc).search
         ):
 
         r=expr(s)
@@ -645,45 +653,32 @@ class DocumentClass:
            return (StructuredTextStrong(s[start:end]), start-2, end+2)
         else:
            return None
-    
+
     def doc_href(
-        
+
         self, s,
-        expr1 = re.compile("(\"[ a-zA-Z0-9\n\-\.\,\;\(\)\/\:\/]+\")(:)([a-zA-Z0-9\:\/\.\~\-]+)([,]*\s*)").search,
-        expr2 = re.compile('(\"[ a-zA-Z0-9\n\-\.\:\;\(\)\/]+\")([,]+\s+)([a-zA-Z0-9\@\.\,\?\!\/\:\;\-\#]+)(\s*)').search):
-        
-        #expr1=re.compile('\"([ a-zA-Z0-9.:/;,\n\~\(\)\-]+)\"'
-        #                  ':'
-        #                  '([a-zA-Z0-9.:/;,\n\~]+)(?=(\s+|\.|\!|\?))'
-        #                  ).search,
-        #expr2=re.compile('\"([ a-zA-Z0-9./:]+)\"'
-        #                  ',\s+'
-        #                  '([ a-zA-Z0-9@.:/;]+)(?=(\s+|\.|\!|\?))'
-        #                  ).search,
-        
-        punctuation = re.compile("[\,\.\?\!\;]+").match
+        expr1 = re.compile("(\"[ %s0-9\n\-\.\,\;\(\)\/\:\/\*\']+\")(:)([a-zA-Z0-9\@\.\,\?\!\/\:\;\-\#\~]+)([,]*\s*)" % letters).search,
+        expr2 = re.compile('(\"[ %s0-9\n\-\.\:\;\(\)\/\*\']+\")([,]+\s+)([a-zA-Z0-9\@\.\,\?\!\/\:\;\-\#\~]+)(\s*)' % letters).search):
+
         r=expr1(s) or expr2(s)
 
         if r:
             # need to grab the href part and the
             # beginning part
-                        
+
             start,e = r.span(1)
             name    = s[start:e]
             name    = replace(name,'"','',2)
-            #start   = start + 1
             st,end   = r.span(3)
-            if punctuation(s[end-1:end]):
-                end = end -1
+
+            if s[end-1:end] in punctuations: end-=1
             link    = s[st:end]
-            #end     = end - 1                        
-            
+
             # name is the href title, link is the target
             # of the href
             return (StructuredTextLink(name, href=link),
                     start, end)
-            
-            #return (StructuredTextLink(s[start:end], href=s[start:end]),
-            #        start, end)
+
+
         else:
             return None
diff --git a/wxPython/samples/stxview/StructuredText/ClassicStructuredText.py b/wxPython/samples/stxview/StructuredText/ClassicStructuredText.py
new file mode 100644 (file)
index 0000000..b591558
--- /dev/null
@@ -0,0 +1,625 @@
+#! /usr/bin/env python -- # -*- python -*-
+##############################################################################
+# 
+# Zope Public License (ZPL) Version 1.0
+# -------------------------------------
+# 
+# Copyright (c) Digital Creations.  All rights reserved.
+# 
+# This license has been certified as Open Source(tm).
+# 
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+# 
+# 1. Redistributions in source code must retain the above copyright
+#    notice, this list of conditions, and the following disclaimer.
+# 
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions, and the following disclaimer in
+#    the documentation and/or other materials provided with the
+#    distribution.
+# 
+# 3. Digital Creations requests that attribution be given to Zope
+#    in any manner possible. Zope includes a "Powered by Zope"
+#    button that is installed by default. While it is not a license
+#    violation to remove this button, it is requested that the
+#    attribution remain. A significant investment has been put
+#    into Zope, and this effort will continue if the Zope community
+#    continues to grow. This is one way to assure that growth.
+# 
+# 4. All advertising materials and documentation mentioning
+#    features derived from or use of this software must display
+#    the following acknowledgement:
+# 
+#      "This product includes software developed by Digital Creations
+#      for use in the Z Object Publishing Environment
+#      (http://www.zope.org/)."
+# 
+#    In the event that the product being advertised includes an
+#    intact Zope distribution (with copyright and license included)
+#    then this clause is waived.
+# 
+# 5. Names associated with Zope or Digital Creations must not be used to
+#    endorse or promote products derived from this software without
+#    prior written permission from Digital Creations.
+# 
+# 6. Modified redistributions of any form whatsoever must retain
+#    the following acknowledgment:
+# 
+#      "This product includes software developed by Digital Creations
+#      for use in the Z Object Publishing Environment
+#      (http://www.zope.org/)."
+# 
+#    Intact (re-)distributions of any official Zope release do not
+#    require an external acknowledgement.
+# 
+# 7. Modifications are encouraged but must be packaged separately as
+#    patches to official Zope releases.  Distributions that do not
+#    clearly separate the patches from the original work must be clearly
+#    labeled as unofficial distributions.  Modifications which do not
+#    carry the name Zope may be packaged in any form, as long as they
+#    conform to all of the clauses above.
+# 
+# 
+# Disclaimer
+# 
+#   THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
+#   EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+#   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+#   PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
+#   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+#   USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+#   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+#   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+#   OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+#   SUCH DAMAGE.
+# 
+# 
+# This software consists of contributions made by Digital Creations and
+# many individuals on behalf of Digital Creations.  Specific
+# attributions are listed in the accompanying credits file.
+# 
+##############################################################################
+'''Structured Text Manipulation
+
+Parse a structured text string into a form that can be used with 
+structured formats, like html.
+
+Structured text is text that uses indentation and simple
+symbology to indicate the structure of a document.  
+
+A structured string consists of a sequence of paragraphs separated by
+one or more blank lines.  Each paragraph has a level which is defined
+as the minimum indentation of the paragraph.  A paragraph is a
+sub-paragraph of another paragraph if the other paragraph is the last
+preceding paragraph that has a lower level.
+
+Special symbology is used to indicate special constructs:
+
+- A single-line paragraph whose immediately succeeding paragraphs are lower
+  level is treated as a header.
+
+- A paragraph that begins with a '-', '*', or 'o' is treated as an
+  unordered list (bullet) element.
+
+- A paragraph that begins with a sequence of digits followed by a
+  white-space character is treated as an ordered list element.
+
+- A paragraph that begins with a sequence of sequences, where each
+  sequence is a sequence of digits or a sequence of letters followed
+  by a period, is treated as an ordered list element.
+
+- A paragraph with a first line that contains some text, followed by
+  some white-space and '--' is treated as
+  a descriptive list element. The leading text is treated as the
+  element title.
+
+- Sub-paragraphs of a paragraph that ends in the word 'example' or the
+  word 'examples', or '::' is treated as example code and is output as is.
+
+- Text enclosed single quotes (with white-space to the left of the
+  first quote and whitespace or punctuation to the right of the second quote)
+  is treated as example code.
+
+- Text surrounded by '*' characters (with white-space to the left of the
+  first '*' and whitespace or punctuation to the right of the second '*')
+  is emphasized.
+
+- Text surrounded by '**' characters (with white-space to the left of the
+  first '**' and whitespace or punctuation to the right of the second '**')
+  is made strong.
+
+- Text surrounded by '_' underscore characters (with whitespace to the left 
+  and whitespace or punctuation to the right) is made underlined.
+
+- Text encloded by double quotes followed by a colon, a URL, and concluded
+  by punctuation plus white space, *or* just white space, is treated as a
+  hyper link. For example:
+
+    "Zope":http://www.zope.org/ is ...
+
+  Is interpreted as '<a href="http://www.zope.org/">Zope</a> is ....'
+  Note: This works for relative as well as absolute URLs.
+
+- Text enclosed by double quotes followed by a comma, one or more spaces,
+  an absolute URL and concluded by punctuation plus white space, or just
+  white space, is treated as a hyper link. For example: 
+
+    "mail me", mailto:amos@digicool.com.
+
+  Is interpreted as '<a href="mailto:amos@digicool.com">mail me</a>.' 
+
+- Text enclosed in brackets which consists only of letters, digits,
+  underscores and dashes is treated as hyper links within the document.
+  For example:
+    
+    As demonstrated by Smith [12] this technique is quite effective.
+
+  Is interpreted as '... by Smith <a href="#12">[12]</a> this ...'. Together
+  with the next rule this allows easy coding of references or end notes.
+
+- Text enclosed in brackets which is preceded by the start of a line, two
+  periods and a space is treated as a named link. For example:
+
+    .. [12] "Effective Techniques" Smith, Joe ... 
+
+  Is interpreted as '<a name="12">[12]</a> "Effective Techniques" ...'.
+  Together with the previous rule this allows easy coding of references or
+  end notes. 
+
+
+- A paragraph that has blocks of text enclosed in '||' is treated as a
+  table. The text blocks correspond to table cells and table rows are
+  denoted by newlines. By default the cells are center aligned. A cell
+  can span more than one column by preceding a block of text with an
+  equivalent number of cell separators '||'. Newlines and '|' cannot
+  be a part of the cell text. For example:
+
+      |||| **Ingredients** ||
+      || *Name* || *Amount* ||
+      ||Spam||10||
+      ||Eggs||3||
+
+  is interpreted as::
+
+    <TABLE BORDER=1 CELLPADDING=2>
+     <TR>
+      <TD ALIGN=CENTER COLSPAN=2> <strong>Ingredients</strong> </TD>
+     </TR>
+     <TR>
+      <TD ALIGN=CENTER COLSPAN=1> <em>Name</em> </TD>
+      <TD ALIGN=CENTER COLSPAN=1> <em>Amount</em> </TD>
+     </TR>
+     <TR>
+      <TD ALIGN=CENTER COLSPAN=1>Spam</TD>
+      <TD ALIGN=CENTER COLSPAN=1>10</TD>
+     </TR>
+     <TR>
+      <TD ALIGN=CENTER COLSPAN=1>Eggs</TD>
+      <TD ALIGN=CENTER COLSPAN=1>3</TD>
+     </TR>
+    </TABLE>
+
+'''
+
+import ts_regex
+import regex
+from ts_regex import gsub
+from string import split, join, strip, find
+import string,re
+
+
+def untabify(aString,
+             indent_tab=ts_regex.compile('\(\n\|^\)\( *\)\t').search_group,
+             ):
+    '''\
+    Convert indentation tabs to spaces.
+    '''
+    result=''
+    rest=aString
+    while 1:
+        ts_results = indent_tab(rest, (1,2))
+        if ts_results:
+            start, grps = ts_results
+            lnl=len(grps[0])
+            indent=len(grps[1])
+            result=result+rest[:start]
+            rest="\n%s%s" % (' ' * ((indent/8+1)*8),
+                             rest[start+indent+1+lnl:])
+        else:
+            return result+rest
+
+def indent(aString, indent=2):
+    """Indent a string the given number of spaces"""
+    r=split(untabify(aString),'\n')
+    if not r: return ''
+    if not r[-1]: del r[-1]
+    tab=' '*level
+    return "%s%s\n" % (tab,join(r,'\n'+tab))
+
+def reindent(aString, indent=2, already_untabified=0):
+    "reindent a block of text, so that the minimum indent is as given"
+
+    if not already_untabified: aString=untabify(aString)
+
+    l=indent_level(aString)[0]
+    if indent==l: return aString
+
+    r=[]
+
+    append=r.append
+
+    if indent > l:
+        tab=' ' * (indent-l)
+        for s in split(aString,'\n'): append(tab+s)
+    else:
+        l=l-indent
+        for s in split(aString,'\n'): append(s[l:])
+
+    return join(r,'\n')
+
+def indent_level(aString,
+                 indent_space=ts_regex.compile('\n\( *\)').search_group,
+                 ):
+    '''\
+    Find the minimum indentation for a string, not counting blank lines.
+    '''
+    start=0
+    text='\n'+aString
+    indent=l=len(text)
+    while 1:
+
+        ts_results = indent_space(text, (1,2), start)
+        if ts_results:
+            start, grps = ts_results
+            i=len(grps[0])
+            start=start+i+1
+            if start < l and text[start] != '\n':       # Skip blank lines
+                if not i: return (0,aString)
+                if i < indent: indent = i
+        else:
+            return (indent,aString)
+
+def paragraphs(list,start):
+    l=len(list)
+    level=list[start][0]
+    i=start+1
+    while i < l and list[i][0] > level: i=i+1
+    return i-1-start
+
+def structure(list):
+    if not list: return []
+    i=0
+    l=len(list)
+    r=[]
+    while i < l:
+        sublen=paragraphs(list,i)
+        i=i+1
+        r.append((list[i-1][1],structure(list[i:i+sublen])))
+        i=i+sublen
+    return r
+
+
+class Table:
+    CELL='  <TD ALIGN=CENTER COLSPAN=%i>%s</TD>\n'
+    ROW=' <TR>\n%s </TR>\n'
+    TABLE='\n<TABLE BORDER=1 CELLPADDING=2>\n%s</TABLE>'
+    
+    def create(self,aPar,
+        td_reg=re.compile(r'[ \t\n]*\|\|([^\0x00|]*)')
+        ):
+        '''parses a table and returns nested list representing the
+        table'''
+        self.table=[]
+        text=filter(None,split(aPar,'\n'))
+        for line in text:
+            row=[]
+            while 1:
+                mo =  td_reg.match(line)
+                if not mo: return 0
+                pos = mo.end(1)
+                row.append(mo.group(1))
+                if pos==len(line):break
+                line=line[pos:]
+            self.table.append(row)
+        return 1
+
+    def html(self):
+        '''Creates an HTML representation of table'''
+        htmltable=[]
+        for row in self.table:
+            htmlrow=[]
+            colspan=1
+            for cell in row:
+                if cell=='':
+                    colspan=colspan+1
+                    continue
+                else:
+                    htmlrow.append(self.CELL%(colspan,cell))
+                    colspan=1
+            htmltable.append(self.ROW%join(htmlrow,''))
+        return self.TABLE%join(htmltable,'')
+
+table=Table()
+
+class StructuredText:
+
+    """Model text as structured collection of paragraphs.
+
+    Structure is implied by the indentation level.
+
+    This class is intended as a base classes that do actual text
+    output formatting.
+    """
+
+    def __init__(self, aStructuredString, level=0,
+                 paragraph_divider=regex.compile('\(\r?\n *\)+\r?\n'),
+                 ):
+        '''Convert a structured text string into a structured text object.
+
+        Aguments:
+
+          aStructuredString -- The string to be parsed.
+          level -- The level of top level headings to be created.
+        '''
+
+
+        pat = ' \"([%s0-9-_,./?=@~&]*)\":' % string.letters+ \
+              '([-:%s0-9_,./?=@#~&]*?)' % string.letters + \
+              '([.:?;] )' 
+
+        p_reg = re.compile(pat,re.M)
+                
+        aStructuredString = p_reg.sub(r'<a href="\2">\1</a>\3 ' , aStructuredString)
+
+        pat = ' \"([%s0-9-_,./?=@~&]*)\", ' % string.letters+ \
+              '([-:%s0-9_,./?=@#~&]*?)' % string.letters + \
+              '([.:?;] )' 
+
+        p_reg = re.compile(pat,re.M)
+
+        aStructuredString = p_reg.sub(r'<a href="\2">\1</a>\3 ' , aStructuredString)
+
+
+        protoless = find(aStructuredString, '<a href=":')
+        if protoless != -1:
+            aStructuredString = re.sub('<a href=":', '<a href="',
+                                     aStructuredString)
+
+        self.level=level
+        paragraphs=ts_regex.split(untabify(aStructuredString),
+                                  paragraph_divider)
+        paragraphs=map(indent_level,paragraphs)
+
+        self.structure=structure(paragraphs)
+
+
+    def __str__(self):
+        return str(self.structure)
+
+
+ctag_prefix=r'([\x00- \\(]|^)' 
+ctag_suffix=r'([\x00- ,.:;!?\\)]|$)'         
+ctag_middle=r'[%s]([^\x00- %s][^%s]*[^\x00- %s]|[^%s])[%s]' 
+ctag_middl2=r'[%s][%s]([^\x00- %s][^%s]*[^\x00- %s]|[^%s])[%s][%s]'    
+
+def ctag(s,
+         em=re.compile(
+             ctag_prefix+(ctag_middle % (("*",)*6) )+ctag_suffix),
+         strong=re.compile(
+             ctag_prefix+(ctag_middl2 % (("*",)*8))+ctag_suffix),
+         under=re.compile(
+             ctag_prefix+(ctag_middle % (("_",)*6) )+ctag_suffix),
+         code=re.compile(
+             ctag_prefix+(ctag_middle % (("\'",)*6))+ctag_suffix),
+         ):
+    if s is None: s=''
+    s=strong.sub(r'\1<strong>\2</strong>\3',s)
+    s=under.sub( r'\1<u>\2</u>\3',s)
+    s=code.sub(  r'\1<code>\2</code>\3',s)
+    s=em.sub(    r'\1<em>\2</em>\3',s)
+    return s    
+
+class HTML(StructuredText):
+
+    '''\
+    An HTML structured text formatter.
+    '''\
+
+    def __str__(self,
+                extra_dl=re.compile("</dl>\n<dl>"),
+                extra_ul=re.compile("</ul>\n<ul>"),
+                extra_ol=re.compile("</ol>\n<ol>"),
+                ):
+        '''\
+        Return an HTML string representation of the structured text data.
+
+        '''
+        s=self._str(self.structure,self.level)
+        s=extra_dl.sub('\n',s)
+        s=extra_ul.sub('\n',s)
+        s=extra_ol.sub('\n',s)
+        return s
+
+    def ul(self, before, p, after):
+        if p: p="<p>%s</p>" % strip(ctag(p))
+        return ('%s<ul><li>%s\n%s\n</li></ul>\n'
+                % (before,p,after))
+
+    def ol(self, before, p, after):
+        if p: p="<p>%s</p>" % strip(ctag(p))
+        return ('%s<ol><li>%s\n%s\n</li></ol>\n'
+                % (before,p,after))
+
+    def dl(self, before, t, d, after):
+        return ('%s<dl><dt>%s</dt><dd><p>%s</p>\n%s\n</dd></dl>\n'
+                % (before,ctag(t),ctag(d),after))
+
+    def head(self, before, t, level, d):
+        if level > 0 and level < 6:
+            return ('%s<h%d>%s</h%d>\n%s\n'
+                    % (before,level,strip(ctag(t)),level,d))
+            
+        t="<p><strong>%s</strong></p>" % strip(ctag(t))
+        return ('%s<dl><dt>%s\n</dt><dd>%s\n</dd></dl>\n'
+                % (before,t,d))
+
+    def normal(self,before,p,after):
+        return '%s<p>%s</p>\n%s\n' % (before,ctag(p),after)
+
+    def pre(self,structure,tagged=0):
+        if not structure: return ''
+        if tagged:
+            r=''
+        else:
+            r='<PRE>\n'
+        for s in structure:
+            r="%s%s\n\n%s" % (r,html_quote(s[0]),self.pre(s[1],1))
+        if not tagged: r=r+'</PRE>\n'
+        return r
+    
+    def table(self,before,table,after):
+        return '%s<p>%s</p>\n%s\n' % (before,ctag(table),after)
+    
+    def _str(self,structure,level,
+             # Static
+             bullet=ts_regex.compile('[ \t\n]*[o*-][ \t\n]+\([^\0]*\)'
+                                     ).match_group,
+             example=ts_regex.compile('[\0- ]examples?:[\0- ]*$'
+                                      ).search,
+             dl=ts_regex.compile('\([^\n]+\)[ \t]+--[ \t\n]+\([^\0]*\)'
+                                 ).match_group,
+             nl=ts_regex.compile('\n').search,
+             ol=ts_regex.compile(
+                 '[ \t]*\(\([0-9]+\|[%s]+\)[.)]\)+[ \t\n]+\([^\0]*\|$\)' % string.letters
+                 ).match_group,
+             olp=ts_regex.compile('[ \t]*([0-9]+)[ \t\n]+\([^\0]*\|$\)'
+                                  ).match_group,
+             ):
+        r=''
+        for s in structure:
+
+            ts_results = bullet(s[0], (1,))
+            if ts_results:
+                p = ts_results[1]
+                if s[0][-2:]=='::' and s[1]: ps=self.pre(s[1])
+                else: ps=self._str(s[1],level)
+                r=self.ul(r,p,ps)
+                continue
+            ts_results = ol(s[0], (3,))
+            if ts_results:
+                p = ts_results[1]
+                if s[0][-2:]=='::' and s[1]: ps=self.pre(s[1])
+                else: ps=self._str(s[1],level)
+                r=self.ol(r,p,ps)
+                continue
+            ts_results = olp(s[0], (1,))
+            if ts_results:
+                p = ts_results[1]
+                if s[0][-2:]=='::' and s[1]: ps=self.pre(s[1])
+                else: ps=self._str(s[1],level)
+                r=self.ol(r,p,ps)
+                continue
+            ts_results = dl(s[0], (1,2))
+            if ts_results:
+                t,d = ts_results[1]
+                r=self.dl(r,t,d,self._str(s[1],level))
+                continue
+            if example(s[0]) >= 0 and s[1]:
+                # Introduce an example, using pre tags:
+                r=self.normal(r,s[0],self.pre(s[1]))
+                continue
+            if s[0][-2:]=='::' and s[1]:
+                # Introduce an example, using pre tags:
+                r=self.normal(r,s[0][:-1],self.pre(s[1]))
+                continue
+            if table.create(s[0]):
+                ## table support.
+                r=self.table(r,table.html(),self._str(s[1],level))
+                continue
+            else:
+
+                if nl(s[0]) < 0 and s[1] and s[0][-1:] != ':':
+                    # Treat as a heading
+                    t=s[0]
+                    r=self.head(r,t,level,
+                                self._str(s[1],level and level+1))
+                else:
+                    r=self.normal(r,s[0],self._str(s[1],level))
+        return r
+        
+
+def html_quote(v,
+               character_entities=(
+                       (re.compile('&'), '&amp;'),
+                       (re.compile("<"), '&lt;' ),
+                       (re.compile(">"), '&gt;' ),
+                       (re.compile('"'), '&quot;')
+                       )): #"
+        text=str(v)
+        for re,name in character_entities:
+            text=re.sub(name,text)
+        return text
+
+def html_with_references(text, level=1):
+    text = re.sub(
+        r'[\0\n]\.\. \[([0-9_%s-]+)\]' % string.letters,
+        r'\n  <a name="\1">[\1]</a>',
+        text)
+
+    text = re.sub(
+        r'([\x00- ,])\[(?P<ref>[0-9_%s-]+)\]([\x00- ,.:])'   % string.letters,
+        r'\1<a href="#\2">[\2]</a>\3',
+        text)
+    
+    text = re.sub(
+        r'([\0- ,])\[([^]]+)\.html\]([\0- ,.:])',
+        r'\1<a href="\2.html">[\2]</a>\3',
+        text)
+
+    return HTML(text,level=level)
+    
+
+def main():
+    import sys, getopt
+
+    opts,args=getopt.getopt(sys.argv[1:],'twl')
+
+    if args:
+        [infile]=args
+        s=open(infile,'r').read()
+    else:
+        s=sys.stdin.read()
+
+    if opts:
+
+        if filter(lambda o: o[0]=='-w', opts):
+            print 'Content-Type: text/html\n'
+
+        if filter(lambda o: o[0]=='-l', opts):
+            import locale
+            locale.setlocale(locale.LC_ALL,"")
+
+        if s[:2]=='#!':
+            s=re.sub('^#![^\n]+','',s)
+
+        mo = re.compile('([\0-\n]*\n)').match(s)
+        if mo is not None:
+            s = s[len(mo.group(0)) :]
+            
+        s=str(html_with_references(s))
+        if s[:4]=='<h1>':
+            t=s[4:find(s,'</h1>')]
+            s='''<html><head><title>%s</title>
+            </head><body>
+            %s
+            </body></html>
+            ''' % (t,s)
+        print s
+    else:
+        print html_with_references(s)
+
+if __name__=="__main__": main()
index b126878bdeea65a3c0e485e16d4bfd65d7a95005..5a14f33d78425caf8665c0b91f71a69d809d86b9 100644 (file)
@@ -88,238 +88,245 @@ from string import join, split, find, lstrip
 
 class DocBookClass:
 
-    element_types={
-        '#text': '_text',
-        'StructuredTextDocument': 'document',
-        'StructuredTextParagraph': 'paragraph',
-        'StructuredTextExample': 'example',
-        'StructuredTextBullet': 'bullet',
-        'StructuredTextNumbered': 'numbered',
-        'StructuredTextDescription': 'description',
-        'StructuredTextDescriptionTitle': 'descriptionTitle',
-        'StructuredTextDescriptionBody': 'descriptionBody',
-        'StructuredTextSection': 'section',
-        'StructuredTextSectionTitle': 'sectionTitle',
-        'StructuredTextLiteral': 'literal',
-        'StructuredTextEmphasis': 'emphasis',
-        'StructuredTextStrong': 'strong',
-        'StructuredTextLink': 'link',
-        'StructuredTextXref': 'xref',
-        }        
-
-    def dispatch(self, doc, level, output):
-        getattr(self, self.element_types[doc.getNodeName()])(doc, level, output)
-        
-    def __call__(self, doc, level=1):
-        r=[]
-        self.dispatch(doc, level-1, r.append)
-        return join(r,'')
-
-    def _text(self, doc, level, output):
-        if doc.getNodeName() == 'StructuredTextLiteral':
-            output(doc.getNodeValue())
-        else:
-            output(lstrip(doc.getNodeValue()))            
-
-    def document(self, doc, level, output):
-        output('<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook V3.1//EN">\n')
-        output('<book>\n')
-        children=doc.getChildNodes()
-        if (children and
-             children[0].getNodeName() == 'StructuredTextSection'):
-            output('<title>%s</title>' % children[0].getChildNodes()[0].getNodeValue())
-        for c in children:
+   element_types={
+      '#text': '_text',
+      'StructuredTextDocument': 'document',
+      'StructuredTextParagraph': 'paragraph',
+      'StructuredTextExample': 'example',
+      'StructuredTextBullet': 'bullet',
+      'StructuredTextNumbered': 'numbered',
+      'StructuredTextDescription': 'description',
+      'StructuredTextDescriptionTitle': 'descriptionTitle',
+      'StructuredTextDescriptionBody': 'descriptionBody',
+      'StructuredTextSection': 'section',
+      'StructuredTextSectionTitle': 'sectionTitle',
+      'StructuredTextLiteral': 'literal',
+      'StructuredTextEmphasis': 'emphasis',
+      'StructuredTextStrong': 'strong',
+      'StructuredTextLink': 'link',
+      'StructuredTextXref': 'xref',
+      'StructuredTextSGML': 'sgml',
+      }      
+
+   def dispatch(self, doc, level, output):
+      getattr(self, self.element_types[doc.getNodeName()])(doc, level, output)
+      
+   def __call__(self, doc, level=1):
+      r=[]
+      self.dispatch(doc, level-1, r.append)
+      return join(r,'')
+
+   def _text(self, doc, level, output):
+      if doc.getNodeName() == 'StructuredTextLiteral':
+         output(doc.getNodeValue())
+      else:
+         output(lstrip(doc.getNodeValue()))         
+
+   def document(self, doc, level, output):
+      output('<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook V4.1//EN">\n')
+      output('<book>\n')
+      children=doc.getChildNodes()
+      if (children and
+          children[0].getNodeName() == 'StructuredTextSection'):
+         output('<title>%s</title>' % children[0].getChildNodes()[0].getNodeValue())
+      for c in children:
+         getattr(self, self.element_types[c.getNodeName()])(c, level, output)
+      output('</book>\n')
+
+   def section(self, doc, level, output):
+      output('\n<section>\n')
+      children=doc.getChildNodes()
+      for c in children:
+         getattr(self, self.element_types[c.getNodeName()])(c, level+1, output)
+      output('\n</section>\n')
+      
+   def sectionTitle(self, doc, level, output):
+      output('<title>')
+      for c in doc.getChildNodes():
+         try:
             getattr(self, self.element_types[c.getNodeName()])(c, level, output)
-        output('</book>\n')
-
-    def section(self, doc, level, output):
-        output('\n<sect%s>\n' % (level + 1))
-        children=doc.getChildNodes()
-        for c in children:
-            getattr(self, self.element_types[c.getNodeName()])(c, level+1, output)
-        output('\n</sect%s>\n' % (level + 1))
-        
-    def sectionTitle(self, doc, level, output):
-        output('<title>')
-        for c in doc.getChildNodes():
-            getattr(self, self.element_types[c.getNodeName()])(c, level, output)
-        output('</title>\n')
-
-    def description(self, doc, level, output):
-        p=doc.getPreviousSibling()
-        if p is None or  p.getNodeName() is not doc.getNodeName():            
-            output('<variablelist>\n')
-        for c in doc.getChildNodes():
-            getattr(self, self.element_types[c.getNodeName()])(c, level, output)
-        n=doc.getNextSibling()
-        if n is None or n.getNodeName() is not doc.getNodeName():            
-            output('</variablelist>\n')
-        
-    def descriptionTitle(self, doc, level, output):
-        output('<varlistentry><term>\n')
-        for c in doc.getChildNodes():
-            getattr(self, self.element_types[c.getNodeName()])(c, level, output)
-        output('</term>\n')
-        
-    def descriptionBody(self, doc, level, output):
-        output('<listitem><para>\n')
-        for c in doc.getChildNodes():
-            getattr(self, self.element_types[c.getNodeName()])(c, level, output)
-        output('</para></listitem>\n')
-        output('</varlistentry>\n')
+         except:
+            print "failed", c.getNodeName(), c
+      output('</title>\n')
+
+   def description(self, doc, level, output):
+      p=doc.getPreviousSibling()
+      if p is None or  p.getNodeName() is not doc.getNodeName():         
+         output('<variablelist>\n')
+      for c in doc.getChildNodes():
+         getattr(self, self.element_types[c.getNodeName()])(c, level, output)
+      n=doc.getNextSibling()
+      if n is None or n.getNodeName() is not doc.getNodeName():         
+         output('</variablelist>\n')
+      
+   def descriptionTitle(self, doc, level, output):
+      output('<varlistentry><term>\n')
+      for c in doc.getChildNodes():
+         getattr(self, self.element_types[c.getNodeName()])(c, level, output)
+      output('</term>\n')
+      
+   def descriptionBody(self, doc, level, output):
+      output('<listitem><para>\n')
+      for c in doc.getChildNodes():
+         getattr(self, self.element_types[c.getNodeName()])(c, level, output)
+      output('</para></listitem>\n')
+      output('</varlistentry>\n')
+
+   def bullet(self, doc, level, output):
+      p=doc.getPreviousSibling()
+      if p is None or p.getNodeName() is not doc.getNodeName():         
+         output('<itemizedlist>\n')
+      output('<listitem><para>\n')
+
+      for c in doc.getChildNodes():
+         getattr(self, self.element_types[c.getNodeName()])(c, level, output)
+      n=doc.getNextSibling()
+      output('</para></listitem>\n')
+      if n is None or n.getNodeName() is not doc.getNodeName():         
+         output('</itemizedlist>\n')
+
+   def numbered(self, doc, level, output):
+      p=doc.getPreviousSibling()
+      if p is None or p.getNodeName() is not doc.getNodeName():         
+         output('<orderedlist>\n')
+      output('<listitem><para>\n')
+      for c in doc.getChildNodes():
+         getattr(self, self.element_types[c.getNodeName()])(c, level, output)
+      n=doc.getNextSibling()
+      output('</para></listitem>\n')
+      if n is None or n.getNodeName() is not doc.getNodeName():         
+         output('</orderedlist>\n')
+
+   def example(self, doc, level, output):
+      i=0
+      for c in doc.getChildNodes():
+         if i==0:
+            output('<programlisting>\n<![CDATA[\n')
+            ##
+            ## eek.  A ']]>' in your body will break this...
+            ##
+            output(prestrip(c.getNodeValue()))
+            output('\n]]></programlisting>\n')
+         else:
+            getattr(self, self.element_types[c.getNodeName()])(
+               c, level, output)
+
+   def paragraph(self, doc, level, output):
+      output('<para>\n\n')      
+      for c in doc.getChildNodes():
+         getattr(self, self.element_types[c.getNodeName()])(
+            c, level, output)
+      output('</para>\n\n')
+            
+   def link(self, doc, level, output):
+      output('<ulink url="%s">' % doc.href)
+      for c in doc.getChildNodes():
+         getattr(self, self.element_types[c.getNodeName()])(c, level, output)
+      output('</ulink>')
 
-    def bullet(self, doc, level, output):
-        p=doc.getPreviousSibling()
-        if p is None or p.getNodeName() is not doc.getNodeName():            
-            output('<itemizedlist>\n')
-        output('<listitem><para>\n')
+   def emphasis(self, doc, level, output):
+      output('<emphasis>')
+      for c in doc.getChildNodes():
+         getattr(self, self.element_types[c.getNodeName()])(c, level, output)
+      output('</emphasis> ')
 
-        for c in doc.getChildNodes():
-            getattr(self, self.element_types[c.getNodeName()])(c, level, output)
-        n=doc.getNextSibling()
-        output('</para></listitem>\n')
-        if n is None or n.getNodeName() is not doc.getNodeName():            
-            output('</itemizedlist>\n')
-
-    def numbered(self, doc, level, output):
-        p=doc.getPreviousSibling()
-        if p is None or p.getNodeName() is not doc.getNodeName():            
-            output('<orderedlist>\n')
-        output('<listitem><para>\n')
-        for c in doc.getChildNodes():
-            getattr(self, self.element_types[c.getNodeName()])(c, level, output)
-        n=doc.getNextSibling()
-        output('</para></listitem>\n')
-        if n is None or n.getNodeName() is not doc.getNodeName():            
-            output('</orderedlist>\n')
-
-    def example(self, doc, level, output):
-        i=0
-        for c in doc.getChildNodes():
-            if i==0:
-                output('<programlisting>\n<![CDATA[\n')
-                ##
-                ## eek.  A ']]>' in your body will break this...
-                ##
-                output(prestrip(c.getNodeValue()))
-                output('\n]]></programlisting>\n')
-            else:
-                getattr(self, self.element_types[c.getNodeName()])(
-                    c, level, output)
-
-    def paragraph(self, doc, level, output):
-        
-        output('<para>\n\n')        
-        for c in doc.getChildNodes():
-            getattr(self, self.element_types[c.getNodeName()])(
-                c, level, output)
-        output('</para>\n\n')
-                
-    def link(self, doc, level, output):
-#        output('<link linkend="%s">' % doc.href)
-        for c in doc.getChildNodes():
-            getattr(self, self.element_types[c.getNodeName()])(c, level, output)
-#        output('</link>')
+   def literal(self, doc, level, output):
+      output('<literal>')
+      for c in doc.getChildNodes():
+         output(c.getNodeValue())
+      output('</literal>')
 
-    def emphasis(self, doc, level, output):
-        output('<emphasis>')
-        for c in doc.getChildNodes():
-            getattr(self, self.element_types[c.getNodeName()])(c, level, output)
-        output('</emphasis> ')
+   def strong(self, doc, level, output):
+      output('<emphasis>')
+      for c in doc.getChildNodes():
+         getattr(self, self.element_types[c.getNodeName()])(c, level, output)
+      output('</emphasis>')
 
-    def literal(self, doc, level, output):
-        output('<literal>')
-        for c in doc.getChildNodes():
-            output(c.getNodeValue())
-        output('</literal>')
+   def xref(self, doc, level, output):
+      output('<xref linkend="%s"/>' % doc.getNodeValue())
 
-    def strong(self, doc, level, output):
-        output('<emphasis>')
-        for c in doc.getChildNodes():
-            getattr(self, self.element_types[c.getNodeName()])(c, level, output)
-        output('</emphasis>')
+   def sgml(self, doc, level, output):
+      output(doc.getNodeValue())
 
-    def xref(self, doc, level, output):
-        output('<xref linkend="%s">' % doc.getNodeValue())
 
 def prestrip(v):
-    v=string.replace(v, '\r\n', '\n')
-    v=string.replace(v, '\r', '\n')
-    v=string.replace(v, '\t', '          ')
-    lines=string.split(v, '\n')
-    indent=len(lines[0])
-    for line in lines:
-        if not len(line): continue
-        i=len(line)-len(string.lstrip(line))
-        if i < indent:
-            indent=i
-    nlines=[]
-    for line in lines:
-        nlines.append(line[indent:])
-    return string.join(nlines, '\r\n')
+   v=string.replace(v, '\r\n', '\n')
+   v=string.replace(v, '\r', '\n')
+   v=string.replace(v, '\t', '        ')
+   lines=string.split(v, '\n')
+   indent=len(lines[0])
+   for line in lines:
+      if not len(line): continue
+      i=len(line)-len(string.lstrip(line))
+      if i < indent:
+         indent=i
+   nlines=[]
+   for line in lines:
+      nlines.append(line[indent:])
+   return string.join(nlines, '\n')
 
 
 class DocBookChapter(DocBookClass):
 
-    def document(self, doc, level, output):
-        output('<chapter>\n')
-        children=doc.getChildNodes()
-        if (children and
-             children[0].getNodeName() == 'StructuredTextSection'):
-            output('<title>%s</title>' % children[0].getChildNodes()[0].getNodeValue())
-        for c in children[0].getChildNodes()[1:]:
-            getattr(self, self.element_types[c.getNodeName()])(c, level, output)
-        output('</chapter>\n')
+   def document(self, doc, level, output):
+      output('<chapter>\n')
+      children=doc.getChildNodes()
+      if (children and
+          children[0].getNodeName() == 'StructuredTextSection'):
+         output('<title>%s</title>' % children[0].getChildNodes()[0].getNodeValue())
+      for c in children[0].getChildNodes()[1:]:
+         getattr(self, self.element_types[c.getNodeName()])(c, level, output)
+      output('</chapter>\n')
 
 ets = DocBookClass.element_types
-ets.update({'StructuredTextImage': 'image'})        
+ets.update({'StructuredTextImage': 'image'})      
 
 class DocBookChapterWithFigures(DocBookChapter):
 
-     element_types = ets
+    element_types = ets
 
-     def image(self, doc, level, output):
-         if hasattr(doc, 'key'):
-             output('<figure id="%s"><title>%s</title>\n' % (doc.key, doc.getNodeValue()) )
-         else:
-             output('<figure><title>%s</title>\n' % doc.getNodeValue())
-##          for c in doc.getChildNodes():
-##                getattr(self, self.element_types[c.getNodeName()])(c, level, output)
-         output('<graphic fileref="%s"></graphic>\n</figure>\n' % doc.href)     
+    def image(self, doc, level, output):
+       if hasattr(doc, 'key'):
+          output('<figure id="%s"><title>%s</title>\n' % (doc.key, doc.getNodeValue()) )
+       else:
+          output('<figure><title>%s</title>\n' % doc.getNodeValue())
+##        for c in doc.getChildNodes():
+##            getattr(self, self.element_types[c.getNodeName()])(c, level, output)
+       output('<graphic fileref="%s"></graphic>\n</figure>\n' % doc.href)    
 
 class DocBookArticle(DocBookClass):
 
-    def document(self, doc, level, output):
-        output('<!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook V3.1//EN">\n')
-        output('<article>\n')
-        children=doc.getChildNodes()
-        if (children and
-             children[0].getNodeName() == 'StructuredTextSection'):
-            output('<artheader>\n<title>%s</title>\n</artheader>\n' %
-                     children[0].getChildNodes()[0].getNodeValue())
-        for c in children:
-            getattr(self, self.element_types[c.getNodeName()])(c, level, output)
-        output('</article>\n')
+   def document(self, doc, level, output):
+      output('<!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook V4.1//EN">\n')
+      output('<article>\n')
+      children=doc.getChildNodes()
+      if (children and
+          children[0].getNodeName() == 'StructuredTextSection'):
+         output('<articleinfo>\n<title>%s</title>\n</articleinfo>\n' %
+                children[0].getChildNodes()[0].getNodeValue())
+      for c in children:
+         getattr(self, self.element_types[c.getNodeName()])(c, level, output)
+      output('</article>\n')
 
 
 class DocBookBook:
 
-    def __init__(self, title=''):
-        self.title = title
-        self.chapters = []
+   def __init__(self, title=''):
+      self.title = title
+      self.chapters = []
 
-    def addChapter(self, chapter):
-        self.chapters.append(chapter)
+   def addChapter(self, chapter):
+      self.chapters.append(chapter)
 
-    def read(self):
-        out = '<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook V3.1//EN">\n<book>\n'
-        out = out + '<title>%s</title>\n' % self.title
-        for chapter in self.chapters:
-            out = out + chapter + '\n</book>\n'
+   def read(self):
+      out = '<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook V4.1//EN">\n<book>\n'
+      out = out + '<title>%s</title>\n' % self.title
+      for chapter in self.chapters:
+         out = out + chapter + '\n</book>\n'
 
-        return out
+      return out
 
-    def __str__(self):
-        return self.read()
-            
+   def __str__(self):
+      return self.read()
+         
 
index ec6dc402a4febacf0d6e72f94091e80559e92910..405f35e25d27b0efeaae699cd8e7a582152d964f 100644 (file)
 
 import re, ST, STDOM
 from string import split, join, replace, expandtabs, strip, find, rstrip
+from STletters import *
+
 
 StringType=type('')
 ListType=type([])
 
+def flatten(obj, append):
+   if obj.getNodeType()==STDOM.TEXT_NODE:
+      append(obj.getNodeValue())
+   else:
+      for child in obj.getChildNodes():
+         flatten(child, append)
+
+
 class StructuredTextExample(ST.StructuredTextParagraph):
     """Represents a section of document with literal text, as for examples"""
 
     def __init__(self, subs, **kw):
-       t=[]; a=t.append
-       for s in subs: a(s.getNodeValue())
-       apply(ST.StructuredTextParagraph.__init__,
-             (self, join(t,'\n\n'), ()),
-             kw)
+        t=[]
+        a=t.append
+        for s in subs:
+            flatten(s, a)
+        apply(ST.StructuredTextParagraph.__init__,
+              (self, join(t,'\n\n'), ()),
+              kw)
 
     def getColorizableTexts(self): return ()
     def setColorizableTexts(self, src): pass # never color examples
@@ -137,9 +149,15 @@ class StructuredTextSection(ST.StructuredTextParagraph):
        apply(ST.StructuredTextParagraph.__init__,
              (self, StructuredTextSectionTitle(src), subs),
              kw)
-
+    
+    def getColorizableTexts(self):
+        return self._src.getColorizableTexts()
+    
+    def setColorizableTexts(self,src):
+        self._src.setColorizableTexts(src)
+        
 # a StructuredTextTable holds StructuredTextRows
-class StructuredTextTable(ST.StructuredTextDocument):
+class StructuredTextTable(ST.StructuredTextParagraph):
     """
     rows is a list of lists containing tuples, which
     represent the columns/cells in each rows.
@@ -148,7 +166,7 @@ class StructuredTextTable(ST.StructuredTextDocument):
     """
     
     def __init__(self, rows, src, subs, **kw):
-        apply(ST.StructuredTextDocument.__init__,(self,subs),kw)
+        apply(ST.StructuredTextParagraph.__init__,(self,subs),kw)
         self._rows = []
         for row in rows:
             if row:
@@ -208,34 +226,40 @@ class StructuredTextTable(ST.StructuredTextDocument):
         return self.setColorizableTexts()
     
 # StructuredTextRow holds StructuredTextColumns
-class StructuredTextRow(ST.StructuredTextDocument):
+class StructuredTextRow(ST.StructuredTextParagraph):
     
     def __init__(self,row,kw):
         """
         row is a list of tuples, where each tuple is
         the raw text for a cell/column and the span
-        of that cell/column"
+        of that cell/column. 
         EX 
         [('this is column one',1), ('this is column two',1)]
         """
         
-        apply(ST.StructuredTextDocument.__init__,(self,[]),kw)
+        apply(ST.StructuredTextParagraph.__init__,(self,[]),kw)
+        
         self._columns = []
-        for column in row:            
-            self._columns.append(StructuredTextColumn(column[0],column[1],kw))
-
+        for column in row:
+            self._columns.append(StructuredTextColumn(column[0],
+                                                      column[1],
+                                                      column[2],
+                                                      column[3],
+                                                      column[4],
+                                                      kw))
+    
     def getColumns(self):
         return [self._columns]
-
+    
     def _getColumns(self):
         return [self._columns]
     
     def setColumns(self,columns):
         self._columns = columns
-        
+    
     def _setColumns(self,columns):
         return self.setColumns(columns)
-
+    
 # this holds the text of a table cell
 class StructuredTextColumn(ST.StructuredTextParagraph):
     """
@@ -245,20 +269,40 @@ class StructuredTextColumn(ST.StructuredTextParagraph):
     or StructuredTextTableData.
     """
     
-    def __init__(self,text,span,kw):
-        # print "StructuredTextColumn", text, span
+    def __init__(self,text,span,align,valign,typ,kw):
         apply(ST.StructuredTextParagraph.__init__,(self,text,[]),kw)
         self._span = span
+        self._align = align
+        self._valign = valign
+        self._type = typ
     
     def getSpan(self):
         return self._span
     
     def _getSpan(self):
         return self._span
+    
+    def getAlign(self):
+        return self._align
+    
+    def _getAlign(self):
+        return self.getAlign()
+    
+    def getValign(self):
+        return self._valign
+    
+    def _getValign(self):
+        return self.getValign()
+    
+    def getType(self):
+        return self._type
+    
+    def _getType(self):
+        return self.getType()
+    
+class StructuredTextTableHeader(ST.StructuredTextParagraph): pass
 
-class StructuredTextTableHeader(ST.StructuredTextDocument): pass
-
-class StructuredTextTableData(ST.StructuredTextDocument): pass
+class StructuredTextTableData(ST.StructuredTextParagraph): pass
 
 class StructuredTextMarkup(STDOM.Element):
     
@@ -266,22 +310,22 @@ class StructuredTextMarkup(STDOM.Element):
        self._value=v
        self._attributes=kw.keys()
        for k, v in kw.items(): setattr(self, k, v)
-
+    
     def getChildren(self, type=type, lt=type([])):
        v=self._value
        if type(v) is not lt: v=[v]
        return v
-
+    
     def getColorizableTexts(self): return self._value,
     def setColorizableTexts(self, v): self._value=v[0]
-
+    
     def __repr__(self):
        return '%s(%s)' % (self.__class__.__name__, `self._value`)
-
+    
 class StructuredTextLiteral(StructuredTextMarkup):
     def getColorizableTexts(self): return ()
     def setColorizableTexts(self, v): pass
-
+    
 class StructuredTextEmphasis(StructuredTextMarkup): pass
 
 class StructuredTextStrong(StructuredTextMarkup): pass
@@ -294,9 +338,11 @@ class StructuredTextUnderline(StructuredTextMarkup): pass
 
 class StructuredTextSGML(StructuredTextMarkup): pass
 
-class StructuredTextLink(StructuredTextMarkup): pass    
+class StructuredTextLink(StructuredTextMarkup): pass
+
+class StructuredTextXref(StructuredTextMarkup): pass
 
-class DocumentClass:    
+class DocumentClass:
     """
     Class instance calls [ex.=> x()] require a structured text
     structure. Doc will then parse each paragraph in the structure
@@ -309,7 +355,6 @@ class DocumentClass:
     instance with a strong instance stored in its string
     """
     
-    #'doc_table',
     paragraph_types  = [
         'doc_bullet',
         'doc_numbered',
@@ -322,13 +367,15 @@ class DocumentClass:
     #'doc_named_link',
     #'doc_underline',
     text_types = [
+        'doc_sgml',
         'doc_href',
         'doc_strong',
         'doc_emphasize',
         'doc_literal',
-        'doc_sgml'
+        'doc_sgml',
+        'doc_xref',
         ]
-
+    
     def __call__(self, doc):
         if type(doc) is type(''):
            doc=ST.StructuredText(doc)
@@ -338,10 +385,10 @@ class DocumentClass:
            doc=ST.StructuredTextDocument(self.color_paragraphs(
               doc.getSubparagraphs()))
         return doc
-
+    
     def parse(self, raw_string, text_type,
               type=type, st=type(''), lt=type([])):
-
+        
        """
        Parse accepts a raw_string, an expr to test the raw_string,
        and the raw_string's subparagraphs.
@@ -425,7 +472,6 @@ class DocumentClass:
                            st=type('')):
        result=[]
        for paragraph in raw_paragraphs:
-          #print type(paragraph)
           if paragraph.getNodeName() != 'StructuredTextParagraph':
              result.append(paragraph)
              continue
@@ -445,23 +491,22 @@ class DocumentClass:
                 break
           else:
              new_paragraphs=ST.StructuredTextParagraph(paragraph.getColorizableTexts()[0],
-                                                          self.color_paragraphs(paragraph.getSubparagraphs()),
-                                                          indent=paragraph.indent),
+                                                       self.color_paragraphs(paragraph.getSubparagraphs()),
+                                                       indent=paragraph.indent),
         
           # color the inline StructuredText types
           # for each StructuredTextParagraph
           for paragraph in new_paragraphs:
              
              if paragraph.getNodeName() is "StructuredTextTable":
-                #print "we have a table"
                 cells = paragraph.getColumns()
                 text = paragraph.getColorizableTexts()
                 text = map(ST.StructuredText,text)
                 text = map(self.__call__,text)
-                #for index in range(len(text)):
-                #    text[index].setColorizableTexts(map(self.color_text,text[index].getColorizableTexts()))
+                for t in range(len(text)):
+                    text[t] = text[t].getSubparagraphs()
                 paragraph.setColorizableTexts(text)
-                        
+                
              paragraph.setColorizableTexts(
                 map(self.color_text,
                     paragraph.getColorizableTexts()
@@ -470,7 +515,7 @@ class DocumentClass:
 
        return result
     
-    def doc_table(self, paragraph, expr = re.compile('\s*\|[-]+\|').match):
+    def doc_table(self, paragraph, expr = re.compile(r'\s*\|[-]+\|').match):
         text    = paragraph.getColorizableTexts()[0]
         m       = expr(text)
         
@@ -479,58 +524,102 @@ class DocumentClass:
         if not (m):
             return None
         rows = []
-                                                                
-        rows = split(text,'\n')        
-        
+                
         spans   = []
         ROWS    = []
         COLS    = []
-    
-        TDdivider = re.compile("[\-]+").match
-        THdivider = re.compile("[\=]+").match
-    
-        # find where the column markers are located
-        col = re.compile('\|').search
+        indexes = []
+        ignore  = []
+        
+        TDdivider   = re.compile("[\-]+").match
+        THdivider   = re.compile("[\=]+").match
+        col         = re.compile('\|').search
+        innertable  = re.compile('\|([-]+|[=]+)\|').search
+        
         text = strip(text)
         rows = split(text,'\n')
+        foo  = ""
+        
         for row in range(len(rows)):
             rows[row] = strip(rows[row])
-    
-        for row in rows:
-            tmp = strip(row)
-            tmp = row[1:len(tmp)-1] # remove leading and trailing |
-            offset = 0
+        
+        # have indexes store if a row is a divider
+        # or a cell part
+        for index in range(len(rows)):
+            tmpstr = rows[index][1:len(rows[index])-1]
+            if TDdivider(tmpstr):
+                indexes.append("TDdivider")
+            elif THdivider(tmpstr):
+                indexes.append("THdivider")
+            else:
+                indexes.append("cell")
+
+        for index in range(len(indexes)):
+            if indexes[index] is "TDdivider" or indexes[index] is THdivider:
+                ignore = [] # reset ignore
+                #continue    # skip dividers
+
+            tmp     = strip(rows[index])    # clean the row up
+            tmp     = tmp[1:len(tmp)-1]     # remove leading + trailing |
+            offset  = 0
+
+            # find the start and end of inner
+            # tables. ignore everything between
+            if innertable(tmp):
+                tmpstr = strip(tmp)
+                while innertable(tmpstr):
+                    start,end   = innertable(tmpstr).span()
+                    if not (start,end-1) in ignore:
+                        ignore.append(start,end-1)
+                    tmpstr = " " + tmpstr[end:]
+
+            # find the location of column dividers
+            # NOTE: |'s in inner tables do not count
+            #   as column dividers
             if col(tmp):
                 while col(tmp):
-                    start,end = col(tmp).span()
+                    bar         = 1   # true if start is not in ignore
+                    start,end   = col(tmp).span()
+
                     if not start+offset in spans:
-                        spans.append(start + offset)
-                    COLS.append((tmp[0:start],start+offset))
-                    tmp = " " + tmp[end:]
-                    offset = offset + (start)
+                        for s,e in ignore:
+                            if start+offset >= s or start+offset <= e:
+                                bar = None
+                                break
+                        if bar:   # start is clean
+                            spans.append(start+offset)
+                    if not bar:
+                        foo = foo + tmp[:end]
+                        tmp = tmp[end:]
+                        offset = offset + end
+                    else:
+                        COLS.append((foo + tmp[0:start],start+offset))
+                        foo = ""
+                        tmp = " " + tmp[end:]
+                        offset = offset + start
             if not offset+len(tmp) in spans:
                 spans.append(offset+len(tmp))
-            COLS.append((tmp,offset+len(tmp)))
+            COLS.append((foo + tmp,offset+len(tmp)))
+            foo = ""
             ROWS.append(COLS)
             COLS = []
-    
-        spans.sort()
-    
-        ROWS = ROWS[1:len(ROWS)]        
         
+        spans.sort()
+        ROWS = ROWS[1:len(ROWS)]
+
         # find each column span
         cols    = []
         tmp     = []
-    
+        
         for row in ROWS:
             for c in row:
                 tmp.append(c[1])
             cols.append(tmp)
             tmp = []
-    
-        cur = 1     # the current column span
-        tmp = []    
-        C   = []    # holds the span of each cell
+        
+        cur = 1
+        tmp = []
+        C   = []
         for col in cols:
             for span in spans:
                 if not span in col:
@@ -541,14 +630,47 @@ class DocumentClass:
             C.append(tmp)
             tmp = []
         
-        # make rows contain the cell's text and the span
-        # of that cell
         for index in range(len(C)):
             for i in range(len(C[index])):
                 ROWS[index][i] = (ROWS[index][i][0],C[index][i])
         rows = ROWS
         
-        # now munge the table cells together
+        # label things as either TableData or
+        # Table header
+        TD  = []
+        TH  = []
+        all = []
+        for index in range(len(indexes)):
+            if indexes[index] is "TDdivider":
+                TD.append(index)
+                all.append(index)
+            if indexes[index] is "THdivider":
+                TH.append(index)
+                all.append(index)
+        TD = TD[1:]
+        dividers = all[1:]
+        #print "TD  => ", TD
+        #print "TH  => ", TH
+        #print "all => ", all, "\n"
+        
+        for div in dividers:
+            if div in TD:
+                index = all.index(div)
+                for rowindex in range(all[index-1],all[index]):                    
+                    for i in range(len(rows[rowindex])):
+                        rows[rowindex][i] = (rows[rowindex][i][0],
+                                             rows[rowindex][i][1],
+                                             "td")
+            else:
+                index = all.index(div)
+                for rowindex in range(all[index-1],all[index]):
+                    for i in range(len(rows[rowindex])):
+                        rows[rowindex][i] = (rows[rowindex][i][0],
+                                             rows[rowindex][i][1],
+                                             "th")
+        
+        # now munge the multi-line cells together
+        # as paragraphs
         ROWS    = []
         COLS    = []
         for row in rows:
@@ -556,16 +678,97 @@ class DocumentClass:
                 if not COLS:
                     COLS = range(len(row))
                     for i in range(len(COLS)):
-                        COLS[i] = ["",1]
+                        COLS[i] = ["",1,""]
                 if TDdivider(row[index][0]) or THdivider(row[index][0]):
                     ROWS.append(COLS)
                     COLS = []
                 else:
-                    COLS[index][0] = COLS[index][0] + rstrip(row[index][0]) + "\n"
+                    COLS[index][0] = COLS[index][0] + (row[index][0]) + "\n"
                     COLS[index][1] = row[index][1]
-        return StructuredTextTable(ROWS,text,subs,indent=paragraph.indent)
+                    COLS[index][2] = row[index][2]
+        
+        # now that each cell has been munged together,
+        # determine the cell's alignment.
+        # Default is to center. Also determine the cell's
+        # vertical alignment, top, middle, bottom. Default is
+        # to middle
+        rows = []
+        cols = []
+        for row in ROWS:
+            for index in range(len(row)):
+                topindent       = 0
+                bottomindent    = 0
+                leftindent      = 0
+                rightindent     = 0
+                left            = []
+                right           = []                                    
+                text            = row[index][0]
+                text            = split(text,'\n')
+                text            = text[:len(text)-1]
+                align           = ""
+                valign          = ""
+                for t in text:
+                    t = strip(t)
+                    if not t:
+                        topindent = topindent + 1
+                    else:
+                        break
+                text.reverse()
+                for t in text:
+                    t = strip(t)
+                    if not t:
+                        bottomindent = bottomindent + 1
+                    else:
+                        break
+                text.reverse()
+                tmp   = join(text[topindent:len(text)-bottomindent],"\n")
+                pars  = re.compile("\n\s*\n").split(tmp)
+                for par in pars:
+                    if index > 0:
+                        par = par[1:]
+                    par = split(par, ' ')
+                    for p in par:
+                        if not p:
+                            leftindent = leftindent+1
+                        else:
+                            break
+                    left.append(leftindent)
+                    leftindent = 0
+                    par.reverse()
+                    for p in par:
+                        if not p:
+                            rightindent = rightindent + 1
+                        else:
+                            break
+                    right.append(rightindent)
+                    rightindent = 0
+                left.sort()
+                right.sort()
+
+                if topindent == bottomindent:
+                    valign="middle"
+                elif topindent < 1:
+                    valign="top"
+                elif bottomindent < 1:
+                    valign="bottom"
+                else:
+                    valign="middle"
+
+                if left[0] < 1:
+                    align = "left"
+                elif right[0] < 1:
+                    align = "right"
+                elif left[0] > 1 and right[0] > 1:
+                    align="center"
+                else:
+                    align="left"
+                
+                cols.append(row[index][0],row[index][1],align,valign,row[index][2])
+            rows.append(cols)
+            cols = []
+        return StructuredTextTable(rows,text,subs,indent=paragraph.indent)
             
-    def doc_bullet(self, paragraph, expr = re.compile('\s*[-*o]\s+').match):
+    def doc_bullet(self, paragraph, expr = re.compile(r'\s*[-*o]\s+').match):
         top=paragraph.getColorizableTexts()[0]
         m=expr(top)
 
@@ -583,7 +786,7 @@ class DocumentClass:
 
     def doc_numbered(
         self, paragraph,
-        expr = re.compile('(\s*[a-zA-Z]+\.)|(\s*[0-9]+\.)|(\s*[0-9]+\s+)').match):
+        expr = re.compile(r'(\s*[%s]+\.)|(\s*[0-9]+\.)|(\s*[0-9]+\s+)' % letters).match):
         
         # This is the old expression. It had a nasty habit
         # of grabbing paragraphs that began with a single
@@ -607,8 +810,8 @@ class DocumentClass:
 
     def doc_description(
         self, paragraph,
-        delim = re.compile('\s+--\s+').search,
-        nb=re.compile(r'[^\0- ]').search,
+        delim = re.compile(r'\s+--\s+').search,
+        nb=re.compile(r'[^\000- ]').search,
         ):
 
         top=paragraph.getColorizableTexts()[0]
@@ -632,7 +835,7 @@ class DocumentClass:
            delim=d)
 
     def doc_header(self, paragraph,
-                    expr    = re.compile('[ a-zA-Z0-9.:/,-_*<>\?\'\"]+').match
+                    expr    = re.compile(r'[ %s0-9.:/,-_*<>\?\'\"]+' % letters).match
                     ):
         subs=paragraph.getSubparagraphs()
         if not subs: return None
@@ -650,9 +853,9 @@ class DocumentClass:
     def doc_literal(
         self, s,
         expr=re.compile(
-          "(?:\s|^)'"                                                  # open
-          "([^ \t\n\r\f\v']|[^ \t\n\r\f\v'][^\n']*[^ \t\n\r\f\v'])" # contents
-          "'(?:\s|[,.;:!?]|$)"                                        # close
+          r"(?:\s|^)'"                                                  # open
+          r"([^ \t\n\r\f\v']|[^ \t\n\r\f\v'][^\n']*[^ \t\n\r\f\v'])" # contents
+          r"'(?:\s|[,.;:!?]|$)"                                        # close
           ).search):
         
         r=expr(s)
@@ -664,7 +867,7 @@ class DocumentClass:
 
     def doc_emphasize(
         self, s,
-        expr = re.compile('\s*\*([ \na-zA-Z0-9.:/;,\'\"\?]+)\*(?!\*|-)').search
+        expr = re.compile(r'\s*\*([ \n%s0-9]+)\*(?!\*|-)' % lettpunc).search
         ):
 
         r=expr(s)
@@ -676,8 +879,8 @@ class DocumentClass:
     
     def doc_inner_link(self,
                        s,
-                       expr1 = re.compile("\.\.\s*").search,
-                       expr2 = re.compile("\[[a-zA-Z0-9]+\]").search):
+                       expr1 = re.compile(r"\.\.\s*").search,
+                       expr2 = re.compile(r"\[[%s0-9]+\]" % letters ).search):
         
         # make sure we dont grab a named link
         if expr2(s) and expr1(s):
@@ -697,7 +900,7 @@ class DocumentClass:
     
     def doc_named_link(self,
                        s,
-                       expr=re.compile("(\.\.\s)(\[[a-zA-Z0-9]+\])").search):
+                       expr=re.compile(r"(\.\.\s)(\[[%s0-9]+\])" % letters).search):
         
         result = expr(s)
         if result:
@@ -711,7 +914,7 @@ class DocumentClass:
     
     def doc_underline(self,
                       s,
-                      expr=re.compile("\_([a-zA-Z0-9\s\.,\?]+)\_").search):
+                      expr=re.compile(r"\s+\_([%s0-9\s]+)\_" % lettpunc).search):
         
         result = expr(s)
         if result:
@@ -723,7 +926,7 @@ class DocumentClass:
     
     def doc_strong(self, 
                    s,
-        expr = re.compile('\s*\*\*([ \na-zA-Z0-9.:/;\-,!\?\'\"]+)\*\*').search
+        expr = re.compile(r'\s*\*\*([ \n%s0-9]+)\*\*' % lettpunc).search
         ):
 
         r=expr(s)
@@ -732,14 +935,17 @@ class DocumentClass:
            return (StructuredTextStrong(s[start:end]), start-2, end+2)
         else:
            return None
+
+    ## Some constants to make the doc_href() regex easier to read.
+    _DQUOTEDTEXT = r'("[%s0-9\n\-\.\,\;\(\)\/\:\/\*\']+")'  % letters ## double quoted text
+    _URL_AND_PUNC = r'([%s0-9\@\.\,\?\!\/\:\;\-\#\~]+)' % letters 
+    _SPACES = r'(\s*)'
     
-    def doc_href(
-        
-        self, s,
-        expr1 = re.compile("(\"[ a-zA-Z0-9\n\-\.\,\;\(\)\/\:\/]+\")(:)([a-zA-Z0-9\:\/\.\~\-]+)([,]*\s*)").search,
-        expr2 = re.compile('(\"[ a-zA-Z0-9\n\-\.\:\;\(\)\/]+\")([,]+\s+)([a-zA-Z0-9\@\.\,\?\!\/\:\;\-\#]+)(\s*)').search):
+    def doc_href(self, s,
+                 expr1 = re.compile(_DQUOTEDTEXT + "(:)" + _URL_AND_PUNC + _SPACES).search,
+                 expr2 = re.compile(_DQUOTEDTEXT + r'(\,\s+)' + _URL_AND_PUNC + _SPACES).search):
         
-        punctuation = re.compile("[\,\.\?\!\;]+").match
+        punctuation = re.compile(r"[\,\.\?\!\;]+").match
         r=expr1(s) or expr2(s)
 
         if r:
@@ -766,7 +972,7 @@ class DocumentClass:
         else:
             return None
     
-    def doc_sgml(self,s,expr=re.compile("\<[a-zA-Z0-9\.\=\'\"\:\/\-\#\+\s]+\>").search):
+    def doc_sgml(self,s,expr=re.compile(r"\<[%s0-9\.\=\'\"\:\/\-\#\+\s\*]+\>" % letters).search):
         """
         SGML text is ignored and outputed as-is
         """
@@ -775,3 +981,18 @@ class DocumentClass:
             start,end = r.span()
             text = s[start:end]
             return (StructuredTextSGML(text),start,end)
+
+
+    def doc_xref(self, s,
+        expr = re.compile('\[([%s0-9\-.:/;,\n\~]+)\]' % letters).search
+        ):
+        r = expr(s)
+        if r:
+            start, end = r.span(1)
+            return (StructuredTextXref(s[start:end]), start-1, end+1)
+        else:
+            return None
+
+
+
+
index d5c03d8357a3965d83899eec67a4250453765faf..951aec4c97bc299d74dbdee53a0892fde3aaf84b 100644 (file)
@@ -84,6 +84,7 @@
 ##############################################################################
 
 from string import join, split, find
+from cgi import escape
 import re, sys, ST
 
 class HTMLClass:
@@ -172,34 +173,34 @@ class HTMLClass:
     def bullet(self, doc, level, output):
         p=doc.getPreviousSibling()
         if p is None or p.getNodeName() is not doc.getNodeName():
-            output('<ul>\n')
+            output('\n<ul>\n')
         output('<li>')
         for c in doc.getChildNodes():
             getattr(self, self.element_types[c.getNodeName()])(c, level, output)
         n=doc.getNextSibling()
         output('</li>\n')
         if n is None or n.getNodeName() is not doc.getNodeName():            
-            output('</ul>\n')
+            output('\n</ul>\n')
 
     def numbered(self, doc, level, output):
         p=doc.getPreviousSibling()
         if p is None or p.getNodeName() is not doc.getNodeName():            
-            output('<ol>\n')
+            output('\n<ol>\n')
         output('<li>')
         for c in doc.getChildNodes():
             getattr(self, self.element_types[c.getNodeName()])(c, level, output)
         n=doc.getNextSibling()
         output('</li>\n')
         if n is None or n.getNodeName() is not doc.getNodeName():
-            output('</ol>\n')
+            output('\n</ol>\n')
 
     def example(self, doc, level, output):
         i=0
         for c in doc.getChildNodes():
             if i==0:
-                output('<pre>')
-                output(html_quote(c.getNodeValue()))
-                output('</pre>\n')
+                output('\n<pre>\n')
+                output(escape(c.getNodeValue()))
+                output('\n</pre>\n')
             else:
                 getattr(self, self.element_types[c.getNodeName()])(
                     c, level, output)
@@ -214,7 +215,7 @@ class HTMLClass:
             else:
                 getattr(self, self.element_types[c.getNodeName()])(
                     c, level, output)
-        output('</p>')
+        output('</p>\n')
 
     def link(self, doc, level, output):
         output('<a href="%s">' % doc.href)
@@ -231,7 +232,7 @@ class HTMLClass:
     def literal(self, doc, level, output):
         output('<code>')
         for c in doc.getChildNodes():
-            output(html_quote(c.getNodeValue()))
+            output(escape(c.getNodeValue()))
         output('</code>')
 
     def strong(self, doc, level, output):
@@ -267,6 +268,10 @@ class HTMLClass:
     def sgml(self,doc,level,output):
         for c in doc.getChildNodes():
             getattr(self, self.element_types[c.getNodeName()])(c, level, output)
+
+    def xref(self, doc, level, output):
+        val = doc.getNodeValue()
+        output('<a href="#%s">[%s]</a>' % (val, val) )
     
     def table(self,doc,level,output):
         """
@@ -279,29 +284,23 @@ class HTMLClass:
         for row in doc.getRows()[0]:
             output("<tr>\n")
             for column in row.getColumns()[0]:
-                str = "<td colspan=%s>" % column.getSpan()
+                if hasattr(column,"getAlign"):
+                    str = "<%s colspan=%s align=%s valign=%s>" % (column.getType(),
+                                                                  column.getSpan(),
+                                                                  column.getAlign(),
+                                                                  column.getValign())
+                else:
+                    str = "<td colspan=%s>" % column.getSpan()
                 output(str)
-                #for c in doc.getChildNodes():
-                #    getattr(self, self.element_types[c.getNodeName()])(c, level, output)
                 for c in column.getChildNodes():
                     getattr(self, self.element_types[c.getNodeName()])(c, level, output)
-                output("</td>\n")
+                if hasattr(column,"getType"):
+                    output("</"+column.getType()+">\n")
+                else:
+                    output("</td>\n")
             output("</tr>\n")
         output("</table>\n")
           
-def html_quote(v, name='(Unknown name)', md={},
-                    character_entities=(
-                              (('&'),     '&amp;'),
-                              (('<'),     '&lt;' ),
-                              (('>'),     '&gt;' ),
-                              (('\213'), '&lt;' ),
-                              (('\233'), '&gt;' ),
-                              (('"'),     '&quot;'))): #"
-          text=str(v)
-          for re,name in character_entities:
-                if find(text, re) >= 0: text=join(split(text,re),name)
-          return text
-
 
 
 
index 4d1e2f2b052bf5a04d08e716986e6953462bf2fb..2b25a8891cd2108870f426fd8eb51700b93093b1 100644 (file)
@@ -109,21 +109,16 @@ class HTMLWithImages(HTMLClass):
         output('</body>\n')
         output('</html>\n')
 
-
-    def image(self, doc, level, output):
-        output('<img src="%s" alt="%s">' % (doc.href, doc.getNodeValue()))
-
-
     def image(self, doc, level, output):
        if hasattr(doc, 'key'):
-          output('<a name="%s"></a>\n<img src="%s" alt="%s">' % (doc.key, doc.href, doc.getNodeValue()))
-       else:
-          output('<img src="%s" alt="%s">' % (doc.href, doc.getNodeValue()))
-
+          output('<a name="%s"></a>\n' % doc.key)
+       output('<img src="%s" alt="%s">\n' % (doc.href, doc.getNodeValue()))
+       if doc.getNodeValue() and hasattr(doc, 'key'):
+           output('<p><b>Figure %s</b> %s</p>\n' % (doc.key, doc.getNodeValue()))
 
     def xref(self, doc, level, output):
         val = doc.getNodeValue()
-        output('<a href="#%s">%s</a>' % (val, val) )
+        output('<a href="#%s">Figure %s</a>' % (val, val) )
 
 
 
diff --git a/wxPython/samples/stxview/StructuredText/MML.py b/wxPython/samples/stxview/StructuredText/MML.py
deleted file mode 100644 (file)
index 515bd32..0000000
+++ /dev/null
@@ -1,170 +0,0 @@
-##############################################################################
-# 
-# Zope Public License (ZPL) Version 1.0
-# -------------------------------------
-# 
-# Copyright (c) Digital Creations.  All rights reserved.
-# 
-# This license has been certified as Open Source(tm).
-# 
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-# 
-# 1. Redistributions in source code must retain the above copyright
-#    notice, this list of conditions, and the following disclaimer.
-# 
-# 2. Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions, and the following disclaimer in
-#    the documentation and/or other materials provided with the
-#    distribution.
-# 
-# 3. Digital Creations requests that attribution be given to Zope
-#    in any manner possible. Zope includes a "Powered by Zope"
-#    button that is installed by default. While it is not a license
-#    violation to remove this button, it is requested that the
-#    attribution remain. A significant investment has been put
-#    into Zope, and this effort will continue if the Zope community
-#    continues to grow. This is one way to assure that growth.
-# 
-# 4. All advertising materials and documentation mentioning
-#    features derived from or use of this software must display
-#    the following acknowledgement:
-# 
-#      "This product includes software developed by Digital Creations
-#      for use in the Z Object Publishing Environment
-#      (http://www.zope.org/)."
-# 
-#    In the event that the product being advertised includes an
-#    intact Zope distribution (with copyright and license included)
-#    then this clause is waived.
-# 
-# 5. Names associated with Zope or Digital Creations must not be used to
-#    endorse or promote products derived from this software without
-#    prior written permission from Digital Creations.
-# 
-# 6. Modified redistributions of any form whatsoever must retain
-#    the following acknowledgment:
-# 
-#      "This product includes software developed by Digital Creations
-#      for use in the Z Object Publishing Environment
-#      (http://www.zope.org/)."
-# 
-#    Intact (re-)distributions of any official Zope release do not
-#    require an external acknowledgement.
-# 
-# 7. Modifications are encouraged but must be packaged separately as
-#    patches to official Zope releases.  Distributions that do not
-#    clearly separate the patches from the original work must be clearly
-#    labeled as unofficial distributions.  Modifications which do not
-#    carry the name Zope may be packaged in any form, as long as they
-#    conform to all of the clauses above.
-# 
-# 
-# Disclaimer
-# 
-#   THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
-#   EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-#   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-#   PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
-#   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-#   USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-#   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-#   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-#   OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-#   SUCH DAMAGE.
-# 
-# 
-# This software consists of contributions made by Digital Creations and
-# many individuals on behalf of Digital Creations.  Specific
-# attributions are listed in the accompanying credits file.
-# 
-##############################################################################
-'''
-$Id$'''
-
-from StructuredText import * # :-)
-        
-def ctag(s):
-    # Blech, wish we could use character tags
-    if s is None: s=''
-    s=gsub(strong,'\\1<bold>\\2<plain>\\3',s)
-    s=gsub(code,  '\\1<family Courier>\\2<family Times>\\3',s)
-    s=gsub(em,    '\\1<italic>\\2<plain>\\3',s)
-    return join(map(strip,split(s,'\n')),'\n')
-
-class MML(StructuredText):
-
-    '''\
-    An MML structured text formatter.
-    '''\
-
-    def __str__(self,
-                ):
-        '''\
-        Return an HTML string representation of the structured text data.
-
-        '''
-        s=self._str(self.structure,self.level)
-        return s
-
-    def ul(self, before, p, after):
-        return ("%s\n\n<Bulleted>\n%s%s"
-                % (before, ctag(p), after))
-
-    def ol(self, before, p, after):
-        return ("%s\n\n<Numbered>\n%s%s"
-                % (before, ctag(p), after))
-
-    def dl(self, before, t, d, after):
-        return ("%s\n\n<Term>\n%s\n\n<Definition>\n%s%s" 
-                % (before,ctag(t),ctag(d),after))
-
-    def head(self, before, t, level, d):
-        return ("%s\n\n<Heading%d>\n%s%s"
-                % (before,level,ctag(t),d))
-
-    def normal(self,before,p,after):
-        return "%s\n\n<Body>\n%s%s" % (before, ctag(p), after)
-
-    def pre(self,structure,r=None):
-        if r is None: r=['']
-        for s in structure:
-            for line in split(s[0],'\n'):
-                r.append('\n<PRE>')
-                r.append(line)
-            self.pre(s[1],r)
-        return join(r,'\n')
-
-    def _str(self,structure,level):
-        r=''
-        for s in structure:
-            # print s[0],'\n', len(s[1]), '\n\n'
-            if bullet.match(s[0]) >= 0:
-                p=bullet.group(1)
-                r=self.ul(r,p,self._str(s[1],level))
-            elif ol.match(s[0]) >= 0:
-                p=ol.group(3)
-                r=self.ol(r,p,self._str(s[1],level))
-            elif olp.match(s[0]) >= 0:
-                p=olp.group(1)
-                r=self.ol(r,p,self._str(s[1],level))
-            elif dl.match(s[0]) >= 0:
-                t,d=dl.group(1,2)
-                r=self.dl(r,t,d,self._str(s[1],level))
-            elif example.search(s[0]) >= 0 and s[1]:
-                # Introduce an example, using pre tags:
-                r=self.normal(r,s[0],self.pre(s[1]))
-            elif s[0][-2:]=='::' and s[1]:
-                # Introduce an example, using pre tags:
-                r=self.normal(r,s[0][:-1],self.pre(s[1]))
-            elif nl.search(s[0]) < 0 and s[1] and s[0][-1:] != ':':
-                # Treat as a heading
-                t=s[0]
-                r=self.head(r,t,level,
-                            self._str(s[1],level and level+1))
-            else:
-                r=self.normal(r,s[0],self._str(s[1],level))
-        return r        
index 2e6d0aba1f114b51a4b7fcdbac17ec0bb243de71..3917adcaa82f9d4a2b560b254c63e735349b5fec 100644 (file)
@@ -26,7 +26,9 @@ def insert(struct, top, level):
     """
     #print "struct", struct, top-1
     if not top-1 in range(len(struct)):
-        return None
+        if struct:
+            return struct[len(struct)-1].getSubparagraphs()
+        return struct
     run = struct[top-1]
     i    = 0
     while i+1 < level:
@@ -142,8 +144,11 @@ def StructuredText(paragraphs, paragraph_delimiter=re.compile('\n\s*\n')):
             if result > 0:
                 currentlevel = result
             currentindent  = indent
-            run = insert(struct,level,currentlevel)
-            run.append(StructuredTextParagraph(paragraph, indent=indent, level=currentlevel))
+            if not level:
+                struct.append(StructuredTextParagraph(paragraph, indent=indent, level=currentlevel))
+            else:
+                run = insert(struct,level,currentlevel)
+                run.append(StructuredTextParagraph(paragraph, indent=indent, level=currentlevel))
         else:
             if insert(struct,level,currentlevel):
                 run = insert(struct,level,currentlevel)
index 1eb2d427314b0c968bd6bd5a02a91d92024f6837..c38f2fa6f499b22e8d81b8c857977cd8a2a925af 100644 (file)
@@ -178,7 +178,7 @@ class ParentNode:
       if not children:
          return None
          
-      n=chidren[0]
+      n=children[0]
 
       if type(n) is st:
          n=TextNode(n)
@@ -554,7 +554,7 @@ class Element(Node):
       return self.getNodeType()
       
    def _get_NodeValue(self, type=type, st=type('')):
-      return self.GetNodeValue(type,st)
+      return self.getNodeValue(type,st)
       
    def _get_ParentNode(self):
       return self.getParentNode()
index 40af179bd1c18c890147f22c7167df2b5bd4b800..20c7e6fc823fdf42f44481dc96862d279351fc1e 100644 (file)
@@ -18,7 +18,7 @@ Using Structured Text
     st=StructuredText.Basic(raw)
 
   The output of 'StructuredText.Basic' is simply a
-  StructuredTextDocumemt object containing StructuredTextParagraph
+  StructuredTextDocument object containing StructuredTextParagraph
   objects arranged in a hierarchy. Paragraphs are delimited by strings
   of two or more whitespace characters beginning and ending with
   newline characters. Hierarchy is indicated by indentation. The
diff --git a/wxPython/samples/stxview/StructuredText/STletters.py b/wxPython/samples/stxview/StructuredText/STletters.py
new file mode 100644 (file)
index 0000000..5168b01
--- /dev/null
@@ -0,0 +1,15 @@
+import string
+
+try:
+    del string
+    import locale
+    locale.setlocale(locale.LC_ALL,"")
+except:
+    pass    
+
+import string
+
+letters     = string.letters
+punctuations = string.punctuation 
+
+lettpunc    = letters + punctuations
index a1b3fd03ad20dc0af7482b697236a1f28f048b2a..2408f2331c458ce5b3820ed3582cad8a2a6b259c 100644 (file)
@@ -1,4 +1,3 @@
-#! /usr/bin/env python -- # -*- python -*-
 ##############################################################################
 # 
 # Zope Public License (ZPL) Version 1.0
 # attributions are listed in the accompanying credits file.
 # 
 ##############################################################################
-'''Structured Text Manipulation
 
-Parse a structured text string into a form that can be used with 
-structured formats, like html.
+""" Alias module for StructuredTextClassic compatibility which makes
+use of StructuredTextNG """
 
-Structured text is text that uses indentation and simple
-symbology to indicate the structure of a document.  
 
-A structured string consists of a sequence of paragraphs separated by
-one or more blank lines.  Each paragraph has a level which is defined
-as the minimum indentation of the paragraph.  A paragraph is a
-sub-paragraph of another paragraph if the other paragraph is the last
-preceding paragraph that has a lower level.
+import HTMLClass, DocumentClass, ClassicDocumentClass
+from ST import Basic
 
-Special symbology is used to indicate special constructs:
+import re, string,sys
+from STletters import letters
 
-- A single-line paragraph whose immediately succeeding paragraphs are lower
-  level is treated as a header.
+Document = ClassicDocumentClass.DocumentClass()
+HTMLNG = HTMLClass.HTMLClass()
 
-- A paragraph that begins with a '-', '*', or 'o' is treated as an
-  unordered list (bullet) element.
+def HTML(aStructuredString, level=0):
+    st = Basic(aStructuredString)
+    doc = Document(st)
+    return HTMLNG(doc)
 
-- A paragraph that begins with a sequence of digits followed by a
-  white-space character is treated as an ordered list element.
+def StructuredText(aStructuredString, level=0):
+    return HTML(aStructuredString,level)
 
-- A paragraph that begins with a sequence of sequences, where each
-  sequence is a sequence of digits or a sequence of letters followed
-  by a period, is treated as an ordered list element.
-
-- A paragraph with a first line that contains some text, followed by
-  some white-space and '--' is treated as
-  a descriptive list element. The leading text is treated as the
-  element title.
-
-- Sub-paragraphs of a paragraph that ends in the word 'example' or the
-  word 'examples', or '::' is treated as example code and is output as is.
-
-- Text enclosed single quotes (with white-space to the left of the
-  first quote and whitespace or puctuation to the right of the second quote)
-  is treated as example code.
-
-- Text surrounded by '*' characters (with white-space to the left of the
-  first '*' and whitespace or puctuation to the right of the second '*')
-  is emphasized.
-
-- Text surrounded by '**' characters (with white-space to the left of the
-  first '**' and whitespace or puctuation to the right of the second '**')
-  is made strong.
-
-- Text surrounded by '_' underscore characters (with whitespace to the left 
-  and whitespace or punctuation to the right) is made underlined.
-
-- Text encloded by double quotes followed by a colon, a URL, and concluded
-  by punctuation plus white space, *or* just white space, is treated as a
-  hyper link. For example:
-
-    "Zope":http://www.zope.org/ is ...
-
-  Is interpreted as '<a href="http://www.zope.org/">Zope</a> is ....'
-  Note: This works for relative as well as absolute URLs.
-
-- Text enclosed by double quotes followed by a comma, one or more spaces,
-  an absolute URL and concluded by punctuation plus white space, or just
-  white space, is treated as a hyper link. For example: 
-
-    "mail me", mailto:amos@digicool.com.
-
-  Is interpreted as '<a href="mailto:amos@digicool.com">mail me</a>.' 
-
-- Text enclosed in brackets which consists only of letters, digits,
-  underscores and dashes is treated as hyper links within the document.
-  For example:
-    
-    As demonstrated by Smith [12] this technique is quite effective.
-
-  Is interpreted as '... by Smith <a href="#12">[12]</a> this ...'. Together
-  with the next rule this allows easy coding of references or end notes.
-
-- Text enclosed in brackets which is preceded by the start of a line, two
-  periods and a space is treated as a named link. For example:
-
-    .. [12] "Effective Techniques" Smith, Joe ... 
-
-  Is interpreted as '<a name="12">[12]</a> "Effective Techniques" ...'.
-  Together with the previous rule this allows easy coding of references or
-  end notes. 
-
-
-- A paragraph that has blocks of text enclosed in '||' is treated as a
-  table. The text blocks correspond to table cells and table rows are
-  denoted by newlines. By default the cells are center aligned. A cell
-  can span more than one column by preceding a block of text with an
-  equivalent number of cell separators '||'. Newlines and '|' cannot
-  be a part of the cell text. For example:
-
-      |||| **Ingredients** ||
-      || *Name* || *Amount* ||
-      ||Spam||10||
-      ||Eggs||3||
-
-  is interpreted as::
-
-    <TABLE BORDER=1 CELLPADDING=2>
-     <TR>
-      <TD ALIGN=CENTER COLSPAN=2> <strong>Ingredients</strong> </TD>
-     </TR>
-     <TR>
-      <TD ALIGN=CENTER COLSPAN=1> <em>Name</em> </TD>
-      <TD ALIGN=CENTER COLSPAN=1> <em>Amount</em> </TD>
-     </TR>
-     <TR>
-      <TD ALIGN=CENTER COLSPAN=1>Spam</TD>
-      <TD ALIGN=CENTER COLSPAN=1>10</TD>
-     </TR>
-     <TR>
-      <TD ALIGN=CENTER COLSPAN=1>Eggs</TD>
-      <TD ALIGN=CENTER COLSPAN=1>3</TD>
-     </TR>
-    </TABLE>
-
-    
-$Id$'''
-#     Copyright 
-#
-#       Copyright 1996 Digital Creations, L.C., 910 Princess Anne
-#       Street, Suite 300, Fredericksburg, Virginia 22401 U.S.A. All
-#       rights reserved.  Copyright in this software is owned by DCLC,
-#       unless otherwise indicated. Permission to use, copy and
-#       distribute this software is hereby granted, provided that the
-#       above copyright notice appear in all copies and that both that
-#       copyright notice and this permission notice appear. Note that
-#       any product, process or technology described in this software
-#       may be the subject of other Intellectual Property rights
-#       reserved by Digital Creations, L.C. and are not licensed
-#       hereunder.
-#
-#     Trademarks 
-#
-#       Digital Creations & DCLC, are trademarks of Digital Creations, L.C..
-#       All other trademarks are owned by their respective companies. 
-#
-#     No Warranty 
-#
-#       The software is provided "as is" without warranty of any kind,
-#       either express or implied, including, but not limited to, the
-#       implied warranties of merchantability, fitness for a particular
-#       purpose, or non-infringement. This software could include
-#       technical inaccuracies or typographical errors. Changes are
-#       periodically made to the software; these changes will be
-#       incorporated in new editions of the software. DCLC may make
-#       improvements and/or changes in this software at any time
-#       without notice.
-#
-#     Limitation Of Liability 
-#
-#       In no event will DCLC be liable for direct, indirect, special,
-#       incidental, economic, cover, or consequential damages arising
-#       out of the use of or inability to use this software even if
-#       advised of the possibility of such damages. Some states do not
-#       allow the exclusion or limitation of implied warranties or
-#       limitation of liability for incidental or consequential
-#       damages, so the above limitation or exclusion may not apply to
-#       you.
-#  
-#
-# If you have questions regarding this software,
-# contact:
-#
-#   Jim Fulton, jim@digicool.com
-#
-#   (540) 371-6909
-#
-# $Log$
-# Revision 1.1  2001/03/10 05:07:20  RD
-# Added some simple sample apps
-#
-# Revision 1.27  2000/04/21 13:38:10  jim
-# Added closing list tags. Woo hoo!
-#
-# Revision 1.26  2000/03/14 17:22:04  brian
-# Allow ~ in hrefs.
-#
-# Revision 1.25  2000/02/17 00:53:24  klm
-# HTML._str(): We were getting preformatted examples rendered twice,
-# second time without preformatting.  Problem was a missing 'continue'
-# in one of the cases.
-#
-# Revision 1.24  1999/12/13 16:32:48  klm
-# Incorporated pavlos christoforou's mods to handle simple tables.  From
-# his web page at http://www.zope.org/Members/gaaros/StructuredText:
-#
-#   Structured Text module with table support
-#
-#   A paragraph that has blocks of text enclosed in '||' is treated as a
-#   table. The text blocks correspond to table cells and table rows are
-#   denoted by newlines. By default the cells are center aligned. You can
-#   change the defaults by modifying the CELL,ROW and TABLE class
-#   attributes in class Table. A cell can span more than one column by
-#   preceding a block of text with an equivalent number of cell separators
-#   '||'. Newlines and '|' cannot be a part of the cell text. If you need
-#   newlines use <BR>. For example:
-#
-#        |||| **Ingredients** ||
-#        || *Name* || *Amount* ||
-#        ||Spam||10||
-#        ||Eggs||3||
-#
-# Revision 1.23  1999/08/03 20:49:05  jim
-# Fixed to allow list elements to introduce examples.
-#
-# Restructured _str using continue to avoid excessive nesting.
-#
-# Revision 1.22  1999/08/02 22:01:28  jim
-# Fixed a bunch of bugs introduced by making ts_regex actually thread
-# safe.
-#
-# Also localized a bunch of regular expressions
-# using "static" variables (aka always default arguments).
-#
-# Revision 1.21  1999/08/02 13:26:52  jim
-# paragraph_divider needs to be a regular (thread-unsafe) regex
-# since it gets passed to ts_regex.split, which is thread-safe
-# and wants to use regs.
-#
-# Revision 1.20  1999/07/21 13:33:59  jim
-# untabified.
-#
-# Revision 1.19  1999/07/15 16:43:15  jim
-# Checked in Scott Robertson's thread-safety fixes.
-#
-# Revision 1.18  1999/03/24 00:03:18  klm
-# Provide for relative links, eg <a href="file_in_same_dir">whatever</a>,
-# as:
-#
-#   "whatever", :file_in_same_dir
-#
-# or
-#
-#   "whatever"::file_in_same_dir
-#
-# .__init__(): relax the second gsub, using a '*' instead of a '+', so
-# the stuff before the ':' can be missing, and also do postprocessing so
-# any resulting '<a href=":file_in_same_dir">'s have the superfluous ':'
-# removed.  *Seems* good!
-#
-# Revision 1.17  1999/03/12 23:21:39  klm
-# Gratuituous checkin to test my cvs *update* logging hook.
-#
-# Revision 1.16  1999/03/12 17:12:12  klm
-# Added support for underlined elements, in the obvious way (and
-# included an entry in the module docstring for it).
-#
-# Added an entry in the module docstring describing what i *guess* is
-# the criterion for identifying header elements.  (I'm going to have to
-# delve into and understand the framework a bit better before *knowing*
-# this is the case.)
-#
-# Revision 1.15  1999/03/11 22:40:18  klm
-# Handle links that include '#' named links.
-#
-# Revision 1.14  1999/03/11 01:35:19  klm
-# Fixed a small typo, and refined the module docstring link example, in
-# order to do a checkin to exercise the CVS repository mirroring.  Might
-# as well include my last checkin message, with some substantial stuff:
-#
-# Links are now recognized whether or not the candidate strings are
-# terminated with punctuation before the trailing whitespace.  The old
-# form - trailing punctuation then whitespace - is preserved, but the
-# punctuation is now unnecessary.
-#
-# The regular expressions are a bit more complicated, but i've factored
-# out the common parts and but them in variables with suggestive names,
-# which may make them easier to understand.
-#
-# Revision 1.13  1999/03/11 00:49:57  klm
-# Links are now recognized whether or not the candidate strings are
-# terminated with punctuation before the trailing whitespace.  The old
-# form - trailing punctuation then whitespace - is preserved, but the
-# punctuation is now unnecessary.
-#
-# The regular expressions are a bit more complicated, but i've factored
-# out the common parts and but them in variables with suggestive names,
-# which may make them easier to understand.
-#
-# Revision 1.12  1999/03/10 00:15:46  klm
-# Committing with version 1.0 of the license.
-#
-# Revision 1.11  1999/02/08 18:13:12  klm
-# Trival checkin (spelling fix "preceedeing" -> "preceding" and similar)
-# to see what pitfalls my environment presents to accomplishing a
-# successful checkin.  (It turns out that i can't do it from aldous because
-# the new version of cvs doesn't support the '-t' and '-f' options in the
-# cvswrappers file...)
-#
-# Revision 1.10  1998/12/29 22:30:43  amos
-# Improved doc string to describe hyper link and references capabilities.
-#
-# Revision 1.9  1998/12/04 20:15:31  jim
-# Detabification and new copyright.
-#
-# Revision 1.8  1998/02/27 18:45:22  jim
-# Various updates, including new indentation utilities.
-#
-# Revision 1.7  1997/12/12 15:39:54  jim
-# Added level as argument for html_with_references.
-#
-# Revision 1.6  1997/12/12 15:27:25  jim
-# Added additional pattern matching for HTML references.
-#
-# Revision 1.5  1997/03/08 16:01:03  jim
-# Moved code to recognize: "foo bar", url.
-# into object initializer, so it gets applied in all cases.
-#
-# Revision 1.4  1997/02/17 23:36:35  jim
-# Added support for "foo title", http:/foohost/foo
-#
-# Revision 1.3  1996/12/06 15:57:37  jim
-# Fixed bugs in character tags.
-#
-# Added -t command-line option to generate title if:
-#
-#    - The first paragraph is one line (i.e. a heading) and
-#
-#    - All other paragraphs are indented.
-#
-# Revision 1.2  1996/10/28 13:56:02  jim
-# Fixed bug in ordered lists.
-# Added option for either HTML-style headings or descriptive-list style
-# headings.
-#
-# Revision 1.1  1996/10/23 14:00:45  jim
-# *** empty log message ***
-#
-#
-#
-
-import ts_regex, regex
-from ts_regex import gsub
-from string import split, join, strip, find
-
-def untabify(aString,
-             indent_tab=ts_regex.compile('\(\n\|^\)\( *\)\t').search_group,
-             ):
-    '''\
-    Convert indentation tabs to spaces.
-    '''
-    result=''
-    rest=aString
-    while 1:
-        ts_results = indent_tab(rest, (1,2))
-        if ts_results:
-            start, grps = ts_results
-            lnl=len(grps[0])
-            indent=len(grps[1])
-            result=result+rest[:start]
-            rest="\n%s%s" % (' ' * ((indent/8+1)*8),
-                             rest[start+indent+1+lnl:])
-        else:
-            return result+rest
-
-def indent(aString, indent=2):
-    """Indent a string the given number of spaces"""
-    r=split(untabify(aString),'\n')
-    if not r: return ''
-    if not r[-1]: del r[-1]
-    tab=' '*level
-    return "%s%s\n" % (tab,join(r,'\n'+tab))
-
-def reindent(aString, indent=2, already_untabified=0):
-    "reindent a block of text, so that the minimum indent is as given"
-
-    if not already_untabified: aString=untabify(aString)
-
-    l=indent_level(aString)[0]
-    if indent==l: return aString
-
-    r=[]
-
-    append=r.append
-
-    if indent > l:
-        tab=' ' * (indent-l)
-        for s in split(aString,'\n'): append(tab+s)
-    else:
-        l=l-indent
-        for s in split(aString,'\n'): append(s[l:])
-
-    return join(r,'\n')
-
-def indent_level(aString,
-                 indent_space=ts_regex.compile('\n\( *\)').search_group,
-                 ):
-    '''\
-    Find the minimum indentation for a string, not counting blank lines.
-    '''
-    start=0
-    text='\n'+aString
-    indent=l=len(text)
-    while 1:
-
-        ts_results = indent_space(text, (1,2), start)
-        if ts_results:
-            start, grps = ts_results
-            i=len(grps[0])
-            start=start+i+1
-            if start < l and text[start] != '\n':       # Skip blank lines
-                if not i: return (0,aString)
-                if i < indent: indent = i
-        else:
-            return (indent,aString)
-
-def paragraphs(list,start):
-    l=len(list)
-    level=list[start][0]
-    i=start+1
-    while i < l and list[i][0] > level: i=i+1
-    return i-1-start
-
-def structure(list):
-    if not list: return []
-    i=0
-    l=len(list)
-    r=[]
-    while i < l:
-        sublen=paragraphs(list,i)
-        i=i+1
-        r.append((list[i-1][1],structure(list[i:i+sublen])))
-        i=i+sublen
-    return r
-
-
-class Table:
-    CELL='  <TD ALIGN=CENTER COLSPAN=%i>%s</TD>\n'
-    ROW=' <TR>\n%s </TR>\n'
-    TABLE='\n<TABLE BORDER=1 CELLPADDING=2>\n%s</TABLE>'
-    
-    def create(self,aPar,td=ts_regex.compile(
-        '[ \t\n]*||\([^\0|]*\)').match_group):
-        '''parses a table and returns nested list representing the
-        table'''
-        self.table=[]
-        text=filter(None,split(aPar,'\n'))
-        for line in text:
-            row=[]
-            while 1:
-                pos=td(line,(1,))
-                if not pos:return 0
-                row.append(pos[1])
-                if pos[0]==len(line):break
-                line=line[pos[0]:]
-            self.table.append(row)
-        return 1
-
-    def html(self):
-        '''Creates an HTML representation of table'''
-        htmltable=[]
-        for row in self.table:
-            htmlrow=[]
-            colspan=1
-            for cell in row:
-                if cell=='':
-                    colspan=colspan+1
-                    continue
-                else:
-                    htmlrow.append(self.CELL%(colspan,cell))
-                    colspan=1
-            htmltable.append(self.ROW%join(htmlrow,''))
-        return self.TABLE%join(htmltable,'')
-
-optional_trailing_punctuation = '\(,\|\([.:?;]\)\)?'
-trailing_space = '\([\0- ]\)'
-not_punctuation_or_whitespace = "[^-,.?:\0- ]"
-table=Table()
-
-class StructuredText:
-
-    """Model text as structured collection of paragraphs.
-
-    Structure is implied by the indentation level.
-
-    This class is intended as a base classes that do actual text
-    output formatting.
-    """
-
-    def __init__(self, aStructuredString, level=0,
-                 paragraph_divider=regex.compile('\(\n *\)+\n'),
-                 ):
-        '''Convert a structured text string into a structured text object.
-
-        Aguments:
-
-          aStructuredString -- The string to be parsed.
-          level -- The level of top level headings to be created.
-        '''
-
-        aStructuredString = gsub(
-            '\"\([^\"\0]+\)\":'         # title: <"text":>
-            + ('\([-:a-zA-Z0-9_,./?=@#~]+%s\)'
-               % not_punctuation_or_whitespace)
-            + optional_trailing_punctuation
-            + trailing_space,
-            '<a href="\\2">\\1</a>\\4\\5\\6',
-            aStructuredString)
-
-        aStructuredString = gsub(
-            '\"\([^\"\0]+\)\",[\0- ]+'            # title: <"text", >
-            + ('\([a-zA-Z]*:[-:a-zA-Z0-9_,./?=@#~]*%s\)'
-               % not_punctuation_or_whitespace)
-            + optional_trailing_punctuation
-            + trailing_space,
-            '<a href="\\2">\\1</a>\\4\\5\\6',
-            aStructuredString)
-
-        protoless = find(aStructuredString, '<a href=":')
-        if protoless != -1:
-            aStructuredString = gsub('<a href=":', '<a href="',
-                                     aStructuredString)
-
-        self.level=level
-        paragraphs=ts_regex.split(untabify(aStructuredString),
-                                  paragraph_divider)
-        paragraphs=map(indent_level,paragraphs)
-
-        self.structure=structure(paragraphs)
-
-
-    def __str__(self):
-        return str(self.structure)
-
-
-ctag_prefix="\([\0- (]\|^\)"
-ctag_suffix="\([\0- ,.:;!?)]\|$\)"
-ctag_middle="[%s]\([^\0- %s][^%s]*[^\0- %s]\|[^%s]\)[%s]"
-ctag_middl2="[%s][%s]\([^\0- %s][^%s]*[^\0- %s]\|[^%s]\)[%s][%s]"
-        
-def ctag(s,
-         em=regex.compile(
-             ctag_prefix+(ctag_middle % (("*",)*6) )+ctag_suffix),
-         strong=regex.compile(
-             ctag_prefix+(ctag_middl2 % (("*",)*8))+ctag_suffix),
-         under=regex.compile(
-             ctag_prefix+(ctag_middle % (("_",)*6) )+ctag_suffix),
-         code=regex.compile(
-             ctag_prefix+(ctag_middle % (("\'",)*6))+ctag_suffix),
-         ):
-    if s is None: s=''
-    s=gsub(strong,'\\1<strong>\\2</strong>\\3',s)
-    s=gsub(under, '\\1<u>\\2</u>\\3',s)
-    s=gsub(code,  '\\1<code>\\2</code>\\3',s)
-    s=gsub(em,    '\\1<em>\\2</em>\\3',s)
-    return s    
-
-class HTML(StructuredText):
-
-    '''\
-    An HTML structured text formatter.
-    '''\
-
-    def __str__(self,
-                extra_dl=regex.compile("</dl>\n<dl>"),
-                extra_ul=regex.compile("</ul>\n<ul>"),
-                extra_ol=regex.compile("</ol>\n<ol>"),
-                ):
-        '''\
-        Return an HTML string representation of the structured text data.
-
-        '''
-        s=self._str(self.structure,self.level)
-        s=gsub(extra_dl,'\n',s)
-        s=gsub(extra_ul,'\n',s)
-        s=gsub(extra_ol,'\n',s)
-        return s
-
-    def ul(self, before, p, after):
-        if p: p="<p>%s</p>" % strip(ctag(p))
-        return ('%s<ul><li>%s\n%s\n</li></ul>\n'
-                % (before,p,after))
-
-    def ol(self, before, p, after):
-        if p: p="<p>%s</p>" % strip(ctag(p))
-        return ('%s<ol><li>%s\n%s\n</li></ol>\n'
-                % (before,p,after))
-
-    def dl(self, before, t, d, after):
-        return ('%s<dl><dt>%s</dt><dd><p>%s</p>\n%s\n</dd></dl>\n'
-                % (before,ctag(t),ctag(d),after))
-
-    def head(self, before, t, level, d):
-        if level > 0 and level < 6:
-            return ('%s<h%d>%s</h%d>\n%s\n'
-                    % (before,level,strip(ctag(t)),level,d))
-            
-        t="<p><strong>%s</strong><p>" % strip(ctag(t))
-        return ('%s<dl><dt>%s\n</dt><dd>%s\n</dd></dl>\n'
-                % (before,t,d))
-
-    def normal(self,before,p,after):
-        return '%s<p>%s</p>\n%s\n' % (before,ctag(p),after)
+def html_with_references(text, level=1):
+    text = re.sub(
+        r'[\000\n]\.\. \[([0-9_%s-]+)\]' % letters,
+        r'\n  <a name="\1">[\1]</a>',
+        text)
 
-    def pre(self,structure,tagged=0):
-        if not structure: return ''
-        if tagged:
-            r=''
-        else:
-            r='<PRE>\n'
-        for s in structure:
-            r="%s%s\n\n%s" % (r,html_quote(s[0]),self.pre(s[1],1))
-        if not tagged: r=r+'</PRE>\n'
-        return r
-    
-    def table(self,before,table,after):
-        return '%s<p>%s</p>\n%s\n' % (before,ctag(table),after)
+    text = re.sub(
+        r'([\000- ,])\[(?P<ref>[0-9_%s-]+)\]([\000- ,.:])'   % letters,
+        r'\1<a href="#\2">[\2]</a>\3',
+        text)
     
-    def _str(self,structure,level,
-             # Static
-             bullet=ts_regex.compile('[ \t\n]*[o*-][ \t\n]+\([^\0]*\)'
-                                     ).match_group,
-             example=ts_regex.compile('[\0- ]examples?:[\0- ]*$'
-                                      ).search,
-             dl=ts_regex.compile('\([^\n]+\)[ \t]+--[ \t\n]+\([^\0]*\)'
-                                 ).match_group,
-             nl=ts_regex.compile('\n').search,
-             ol=ts_regex.compile(
-                 '[ \t]*\(\([0-9]+\|[a-zA-Z]+\)[.)]\)+[ \t\n]+\([^\0]*\|$\)'
-                 ).match_group,
-             olp=ts_regex.compile('[ \t]*([0-9]+)[ \t\n]+\([^\0]*\|$\)'
-                                  ).match_group,
-             ):
-        r=''
-        for s in structure:
-
-            ts_results = bullet(s[0], (1,))
-            if ts_results:
-                p = ts_results[1]
-                if s[0][-2:]=='::' and s[1]: ps=self.pre(s[1])
-                else: ps=self._str(s[1],level)
-                r=self.ul(r,p,ps)
-                continue
-            ts_results = ol(s[0], (3,))
-            if ts_results:
-                p = ts_results[1]
-                if s[0][-2:]=='::' and s[1]: ps=self.pre(s[1])
-                else: ps=self._str(s[1],level)
-                r=self.ol(r,p,ps)
-                continue
-            ts_results = olp(s[0], (1,))
-            if ts_results:
-                p = ts_results[1]
-                if s[0][-2:]=='::' and s[1]: ps=self.pre(s[1])
-                else: ps=self._str(s[1],level)
-                r=self.ol(r,p,ps)
-                continue
-            ts_results = dl(s[0], (1,2))
-            if ts_results:
-                t,d = ts_results[1]
-                r=self.dl(r,t,d,self._str(s[1],level))
-                continue
-            if example(s[0]) >= 0 and s[1]:
-                # Introduce an example, using pre tags:
-                r=self.normal(r,s[0],self.pre(s[1]))
-                continue
-            if s[0][-2:]=='::' and s[1]:
-                # Introduce an example, using pre tags:
-                r=self.normal(r,s[0][:-1],self.pre(s[1]))
-                continue
-            if table.create(s[0]):
-                ## table support.
-                r=self.table(r,table.html(),self._str(s[1],level))
-                continue
-            else:
+    text = re.sub(
+        r'([\000- ,])\[([^]]+)\.html\]([\000- ,.:])',
+        r'\1<a href="\2.html">[\2]</a>\3',
+        text)
 
-                if nl(s[0]) < 0 and s[1] and s[0][-1:] != ':':
-                    # Treat as a heading
-                    t=s[0]
-                    r=self.head(r,t,level,
-                                self._str(s[1],level and level+1))
-                else:
-                    r=self.normal(r,s[0],self._str(s[1],level))
-        return r
-        
+    return HTML(text,level=level)
 
 def html_quote(v,
                character_entities=(
-                       (regex.compile('&'), '&amp;'),
-                       (regex.compile("<"), '&lt;' ),
-                       (regex.compile(">"), '&gt;' ),
-                       (regex.compile('"'), '&quot;')
+                       (re.compile('&'), '&amp;'),
+                       (re.compile("<"), '&lt;' ),
+                       (re.compile(">"), '&gt;' ),
+                       (re.compile('"'), '&quot;')
                        )): #"
         text=str(v)
         for re,name in character_entities:
-            text=gsub(re,name,text)
+            text=re.sub(name,text)
         return text
 
-def html_with_references(text, level=1):
-    text = gsub(
-        '[\0\n].. \[\([-_0-9_a-zA-Z-]+\)\]',
-        '\n  <a name="\\1">[\\1]</a>',
-        text)
-    
-    text = gsub(
-        '\([\0- ,]\)\[\([0-9_a-zA-Z-]+\)\]\([\0- ,.:]\)',
-        '\\1<a href="#\\2">[\\2]</a>\\3',
-        text)
-    
-    text = gsub(
-        '\([\0- ,]\)\[\([^]]+\)\.html\]\([\0- ,.:]\)',
-        '\\1<a href="\\2.html">[\\2]</a>\\3',
-        text)
-
-    return HTML(text,level=level)
-    
-
-def main():
-    import sys, getopt
-
-    opts,args=getopt.getopt(sys.argv[1:],'tw')
-
-    if args:
-        [infile]=args
-        s=open(infile,'r').read()
-    else:
-        s=sys.stdin.read()
 
-    if opts:
+if __name__=='__main__':
+    import getopt
 
-        if filter(lambda o: o[0]=='-w', opts):
-            print 'Content-Type: text/html\n'
+    opts,args = getopt.getopt(sys.argv[1:],'',[])
 
-        if s[:2]=='#!':
-            s=ts_regex.sub('^#![^\n]+','',s)
+    for k,v in opts:
+       pass
 
-        r=ts_regex.compile('\([\0-\n]*\n\)')
-        ts_results = r.match_group(s, (1,))
-        if ts_results:
-            s=s[len(ts_results[1]):]
-        s=str(html_with_references(s))
-        if s[:4]=='<h1>':
-            t=s[4:find(s,'</h1>')]
-            s='''<html><head><title>%s</title>
-            </head><body>
-            %s
-            </body></html>
-            ''' % (t,s)
-        print s
-    else:
-        print html_with_references(s)
 
-if __name__=="__main__": main()
+    for f in args:
+        print HTML(open(f).read())
index 65e6f75b0f137cd5c6d23c989e76e7ea1726fd1c..a5c1e5b0473636e2b67cc0d47869ee2430ad64c1 100644 (file)
@@ -104,7 +104,7 @@ Document=DocumentClass.DocumentClass()
 DocumentWithImages=DocumentWithImages.DocumentWithImages()
 HTMLWithImages=HTMLWithImages.HTMLWithImages()
 
-DocBookBook=DocBookClass.DocBookBook
+DocBookBook=DocBookClass.DocBookBook()
 DocBookChapter=DocBookClass.DocBookChapter()
 DocBookChapterWithFigures=DocBookClass.DocBookChapterWithFigures()
 DocBookArticle=DocBookClass.DocBookArticle()
diff --git a/wxPython/samples/stxview/StructuredText/ts_regex.py b/wxPython/samples/stxview/StructuredText/ts_regex.py
deleted file mode 100644 (file)
index 1471eb2..0000000
+++ /dev/null
@@ -1,215 +0,0 @@
-##############################################################################
-# 
-# Zope Public License (ZPL) Version 1.0
-# -------------------------------------
-# 
-# Copyright (c) Digital Creations.  All rights reserved.
-# 
-# This license has been certified as Open Source(tm).
-# 
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-# 
-# 1. Redistributions in source code must retain the above copyright
-#    notice, this list of conditions, and the following disclaimer.
-# 
-# 2. Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions, and the following disclaimer in
-#    the documentation and/or other materials provided with the
-#    distribution.
-# 
-# 3. Digital Creations requests that attribution be given to Zope
-#    in any manner possible. Zope includes a "Powered by Zope"
-#    button that is installed by default. While it is not a license
-#    violation to remove this button, it is requested that the
-#    attribution remain. A significant investment has been put
-#    into Zope, and this effort will continue if the Zope community
-#    continues to grow. This is one way to assure that growth.
-# 
-# 4. All advertising materials and documentation mentioning
-#    features derived from or use of this software must display
-#    the following acknowledgement:
-# 
-#      "This product includes software developed by Digital Creations
-#      for use in the Z Object Publishing Environment
-#      (http://www.zope.org/)."
-# 
-#    In the event that the product being advertised includes an
-#    intact Zope distribution (with copyright and license included)
-#    then this clause is waived.
-# 
-# 5. Names associated with Zope or Digital Creations must not be used to
-#    endorse or promote products derived from this software without
-#    prior written permission from Digital Creations.
-# 
-# 6. Modified redistributions of any form whatsoever must retain
-#    the following acknowledgment:
-# 
-#      "This product includes software developed by Digital Creations
-#      for use in the Z Object Publishing Environment
-#      (http://www.zope.org/)."
-# 
-#    Intact (re-)distributions of any official Zope release do not
-#    require an external acknowledgement.
-# 
-# 7. Modifications are encouraged but must be packaged separately as
-#    patches to official Zope releases.  Distributions that do not
-#    clearly separate the patches from the original work must be clearly
-#    labeled as unofficial distributions.  Modifications which do not
-#    carry the name Zope may be packaged in any form, as long as they
-#    conform to all of the clauses above.
-# 
-# 
-# Disclaimer
-# 
-#   THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
-#   EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-#   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-#   PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
-#   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-#   USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-#   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-#   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-#   OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-#   SUCH DAMAGE.
-# 
-# 
-# This software consists of contributions made by Digital Creations and
-# many individuals on behalf of Digital Creations.  Specific
-# attributions are listed in the accompanying credits file.
-# 
-##############################################################################
-"""Provide a thread-safe interface to regex
-"""
-import regex, regsub #, Sync
-from regex import *
-from regsub import split, sub, gsub, splitx, capwords
-
-try: 
-    import thread
-except:
-    class allocate_lock:
-        def acquire(*args): pass
-        def release(*args): pass
-
-else:
-    class SafeFunction:
-        _l=thread.allocate_lock()
-        _a=_l.acquire
-        _r=_l.release
-
-        def __init__(self, f):
-            self._f=f
-
-        def __call__(self, *args, **kw):
-            self._a()
-            try: return apply(self._f, args, kw)
-            finally: self._r()
-
-    split=SafeFunction(split)
-    sub=SafeFunction(sub)
-    gsub=SafeFunction(gsub)
-    splitx=SafeFunction(splitx)
-    capwords=SafeFunction(capwords)
-
-    allocate_lock=thread.allocate_lock
-
-class compile:
-
-    _r=None
-    groupindex=None
-
-    def __init__(self, *args):
-        self._r=r=apply(regex.compile,args)
-        self._init(r)
-
-    def _init(self, r):
-        lock=allocate_lock()
-        self.__a=lock.acquire
-        self.__r=lock.release
-        self.translate=r.translate
-        self.givenpat=r.givenpat
-        self.realpat=r.realpat
-
-    def match(self, string, pos=0):
-        self.__a()
-        try: return self._r.match(string, pos)
-        finally: self.__r()
-
-    def search(self, string, pos=0):
-        self.__a()
-        try: return self._r.search(string, pos)
-        finally: self.__r()
-        
-    def search_group(self, str, group, pos=0):
-        """Search a string for a pattern.
-
-        If the pattern was not found, then None is returned,
-        otherwise, the location where the pattern was found,
-        as well as any specified group are returned.
-        """
-        self.__a()
-        try:
-            r=self._r
-            l=r.search(str, pos)
-            if l < 0: return None
-            return l, apply(r.group, group)
-        finally: self.__r()
-
-    def match_group(self, str, group, pos=0):
-        """Match a pattern against a string
-
-        If the string does not match the pattern, then None is
-        returned, otherwise, the length of the match, as well
-        as any specified group are returned.
-        """
-        self.__a()
-        try:
-            r=self._r
-            l=r.match(str, pos)
-            if l < 0: return None
-            return l, apply(r.group, group)
-        finally: self.__r()
-
-    def search_regs(self, str, pos=0):
-        """Search a string for a pattern.
-
-        If the pattern was not found, then None is returned,
-        otherwise, the 'regs' attribute of the expression is
-        returned.
-        """
-        self.__a()
-        try:
-            r=self._r
-            r.search(str, pos)
-            return r.regs
-        finally: self.__r()
-
-    def match_regs(self, str, pos=0):
-        """Match a pattern against a string
-
-        If the string does not match the pattern, then None is
-        returned, otherwise, the 'regs' attribute of the expression is
-        returned.
-        """
-        self.__a()
-        try:
-            r=self._r
-            r.match(str, pos)
-            return r.regs
-        finally: self.__r()
-
-class symcomp(compile):
-
-    def __init__(self, *args):
-        self._r=r=apply(regex.symcomp,args)
-        self._init(r)
-        self.groupindex=r.groupindex
-
-
-
-
-        
index 4599f9749e7eb4948bd418a3a3db18f44b36a804..e452855f83cbd9366e2eefc75570dd1932c3c4d2 100644 (file)
@@ -102,12 +102,12 @@ class StxFrame(wxFrame):
 
     def LoadStxText(self, text):
         # Old ST
-        html = str(StructuredText.html_with_references(text))
+        #html = str(StructuredText.html_with_references(text))
 
         # NG Version
-        #st = StructuredText.Basic(text)
-        #doc = StructuredText.Document(st)
-        #html = StructuredText.HTML(doc)
+        st = StructuredText.Basic(text)
+        doc = StructuredText.Document(st)
+        html = StructuredText.HTMLNG(doc)
 
         self.htmlWin.SetPage(html)
         self.editWin.SetValue(text)