##############################################################################
-#
+#
# Zope Public License (ZPL) Version 1.0
# -------------------------------------
-#
+#
# Copyright (c) Digital Creations. All rights reserved.
-#
+#
# This license has been certified as Open Source(tm).
-#
+#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
-#
+#
# 1. Redistributions in source code must retain the above copyright
# notice, this list of conditions, and the following disclaimer.
-#
+#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions, and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
-#
+#
# 3. Digital Creations requests that attribution be given to Zope
# in any manner possible. Zope includes a "Powered by Zope"
# button that is installed by default. While it is not a license
# attribution remain. A significant investment has been put
# into Zope, and this effort will continue if the Zope community
# continues to grow. This is one way to assure that growth.
-#
+#
# 4. All advertising materials and documentation mentioning
# features derived from or use of this software must display
# the following acknowledgement:
-#
+#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
-#
+#
# In the event that the product being advertised includes an
# intact Zope distribution (with copyright and license included)
# then this clause is waived.
-#
+#
# 5. Names associated with Zope or Digital Creations must not be used to
# endorse or promote products derived from this software without
# prior written permission from Digital Creations.
-#
+#
# 6. Modified redistributions of any form whatsoever must retain
# the following acknowledgment:
-#
+#
# "This product includes software developed by Digital Creations
# for use in the Z Object Publishing Environment
# (http://www.zope.org/)."
-#
+#
# Intact (re-)distributions of any official Zope release do not
# require an external acknowledgement.
-#
+#
# 7. Modifications are encouraged but must be packaged separately as
# patches to official Zope releases. Distributions that do not
# clearly separate the patches from the original work must be clearly
# labeled as unofficial distributions. Modifications which do not
# carry the name Zope may be packaged in any form, as long as they
# conform to all of the clauses above.
-#
-#
+#
+#
# Disclaimer
-#
+#
# THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
-#
-#
+#
+#
# This software consists of contributions made by Digital Creations and
# many individuals on behalf of Digital Creations. Specific
# attributions are listed in the accompanying credits file.
-#
+#
##############################################################################
import re, ST, STDOM
from string import split, join, replace, expandtabs, strip, find
+from STletters import letters,lettpunc,punctuations
StringType=type('')
ListType=type([])
class StructuredTextDescription(ST.StructuredTextParagraph):
"""Represents a section of a document with a title and a body"""
-
+
def __init__(self, title, src, subs, **kw):
apply(ST.StructuredTextParagraph.__init__, (self, src, subs), kw)
self._title=title
(self, StructuredTextSectionTitle(src), subs),
kw)
+ def getColorizableTexts(self):
+ return self._src.getColorizableTexts()
+
+ def setColorizableTexts(self,src):
+ self._src.setColorizableTexts(src)
+
# a StructuredTextTable holds StructuredTextRows
class StructuredTextTable(ST.StructuredTextDocument):
"""
EX
rows = [[('row 1:column1',1)],[('row2:column1',1)]]
"""
-
+
def __init__(self, rows, src, subs, **kw):
apply(ST.StructuredTextDocument.__init__,(self,subs),kw)
self._rows = []
for row in rows:
if row:
self._rows.append(StructuredTextRow(row,kw))
-
+
def getRows(self):
return [self._rows]
-
+
def _getRows(self):
return self.getRows()
-
+
def getColorizableTexts(self):
"""
return a tuple where each item is a column/cell's
contents. The tuple, result, will be of this format.
("r1 col1", "r1=col2", "r2 col1", "r2 col2")
"""
-
+
#result = ()
result = []
for row in self._rows:
#result = result[:] + (column.getColorizableTexts(),)
result.append(column.getColorizableTexts()[0])
return result
-
+
def setColorizableTexts(self,texts):
"""
texts is going to a tuple where each item is the
for column_index in range(len(self._rows[row_index]._columns)):
self._rows[row_index]._columns[column_index].setColorizableTexts((texts[0],))
texts = texts[1:]
-
+
def _getColorizableTexts(self):
return self.getColorizableTexts()
-
+
def _setColorizableTexts(self):
return self.setColorizableTexts()
-
+
# StructuredTextRow holds StructuredTextColumns
class StructuredTextRow(ST.StructuredTextDocument):
-
+
def __init__(self,row,kw):
"""
row is a list of tuples, where each tuple is
the raw text for a cell/column and the span
- of that cell/column".
- EX
+ of that cell/column".
+ EX
[('this is column one',1), ('this is column two',1)]
"""
-
+
apply(ST.StructuredTextDocument.__init__,(self,[]),kw)
self._columns = []
- for column in row:
+ for column in row:
self._columns.append(StructuredTextColumn(column[0],column[1],kw))
def getColumns(self):
return [self._columns]
def _getColumns(self):
return [self._columns]
-
+
# this holds the raw text of a table cell
class StructuredTextColumn(ST.StructuredTextParagraph):
"""
thus a StructuredTextParagraph. A StructuredTextColumn
also holds the span of its column
"""
-
+
def __init__(self,text,span,kw):
apply(ST.StructuredTextParagraph.__init__,(self,text,[]),kw)
self._span = span
-
+
def getSpan(self):
return self._span
-
+
def _getSpan(self):
return self._span
-
+
class StructuredTextMarkup(STDOM.Element):
-
+
def __init__(self, v, **kw):
self._value=v
self._attributes=kw.keys()
class StructuredTextLink(StructuredTextMarkup):
"A simple hyperlink"
-class DocumentClass:
+class DocumentClass:
"""
Class instance calls [ex.=> x()] require a structured text
structure. Doc will then parse each paragraph in the structure
"""
Parse accepts a raw_string, an expr to test the raw_string,
and the raw_string's subparagraphs.
-
- Parse will continue to search through raw_string until
- all instances of expr in raw_string are found.
-
+
+ Parse will continue to search through raw_string until
+ all instances of expr in raw_string are found.
+
If no instances of expr are found, raw_string is returned.
Otherwise a list of substrings and instances is returned
"""
raw_string = raw_string[end:len(raw_string)]
if not tmp: return raw_string # nothing found
-
+
if raw_string: append(raw_string)
elif len(tmp)==1: return tmp[0]
-
+
return tmp
for s in str.getColorizableTexts():
color(s, (text_type,))
a(s)
-
+
str.setColorizableTexts(r)
return str
st=type('')):
result=[]
for paragraph in raw_paragraphs:
-
+
if paragraph.getNodeName() != 'StructuredTextParagraph':
result.append(paragraph)
continue
-
+
for pt in self.paragraph_types:
if type(pt) is st:
# grab the corresponding function
result.append(paragraph)
return result
-
+
def doc_table(self,paragraph, expr = re.compile('(\s*)([||]+)').match):
+ #print "paragraph=>", type(paragraph), paragraph, paragraph._src
text = paragraph.getColorizableTexts()[0]
m = expr(text)
-
+
if not (m):
return None
rows = []
-
+
# initial split
for row in split(text,"\n"):
- rows.append(row)
-
+ rows.append(row)
+
# clean up the rows
for index in range(len(rows)):
tmp = []
for index in range(len(rows)):
l = len(rows[index])-1
rows[index] = rows[index][:l]
-
+
result = []
for row in rows:
cspan = 0
tmp = []
for item in row:
if item:
- tmp.append(item,cspan)
+ tmp.append((item,cspan))
cspan = 0
else:
cspan = cspan + 1
result.append(tmp)
-
+
subs = paragraph.getSubparagraphs()
indent=paragraph.indent
return StructuredTextTable(result,text,subs,indent=paragraph.indent)
-
+
def doc_bullet(self, paragraph, expr = re.compile('\s*[-*o]\s+').match):
top=paragraph.getColorizableTexts()[0]
m=expr(top)
if not m:
return None
-
+
subs=paragraph.getSubparagraphs()
if top[-2:]=='::':
subs=[StructuredTextExample(subs)]
def doc_numbered(
self, paragraph,
- expr = re.compile('(\s*[a-zA-Z]+\.)|(\s*[0-9]+\.)|(\s*[0-9]+\s+)').match):
-
+ expr = re.compile('(\s*[%s]+\.)|(\s*[0-9]+\.)|(\s*[0-9]+\s+)' % letters).match):
+
# This is the old expression. It had a nasty habit
# of grabbing paragraphs that began with a single
# letter word even if there was no following period.
-
+
#expr = re.compile('\s*'
# '(([a-zA-Z]|[0-9]+|[ivxlcdmIVXLCDM]+)\.)*'
# '([a-zA-Z]|[0-9]+|[ivxlcdmIVXLCDM]+)\.?'
# '\s+').match):
-
+
top=paragraph.getColorizableTexts()[0]
m=expr(top)
if not m: return None
def doc_description(
self, paragraph,
delim = re.compile('\s+--\s+').search,
- nb=re.compile(r'[^\0- ]').search,
+ nb=re.compile(r'[^\000- ]').search,
):
top=paragraph.getColorizableTexts()[0]
delim=d)
def doc_header(self, paragraph,
- expr = re.compile('[ a-zA-Z0-9.:/,-_*<>\?\'\"]+').match
+ expr = re.compile('[ %s0-9.:/,-_*<>\?\'\"]+' % letters).match
):
subs=paragraph.getSubparagraphs()
if not subs: return None
def doc_literal(
self, s,
expr=re.compile(
- "(?:\s|^)'" # open
+ "(?:\s|^)'" # open
"([^ \t\n\r\f\v']|[^ \t\n\r\f\v'][^\n']*[^ \t\n\r\f\v'])" # contents
- "'(?:\s|[,.;:!?]|$)" # close
+ "'(?:\s|[,.;:!?]|$)" # close
).search):
-
+
r=expr(s)
if r:
start, end = r.span(1)
def doc_emphasize(
self, s,
- expr = re.compile('\s*\*([ \na-zA-Z0-9.:/;,\'\"\?]+)\*(?!\*|-)').search
+ expr = re.compile('\s*\*([ \n%s0-9]+)\*(?!\*|-)' % lettpunc).search
):
r=expr(s)
return (StructuredTextEmphasis(s[start:end]), start-1, end+1)
else:
return None
-
+
def doc_inner_link(self,
s,
expr1 = re.compile("\.\.\s*").search,
- expr2 = re.compile("\[[a-zA-Z0-9]+\]").search):
-
+ expr2 = re.compile("\[[%s0-9]+\]" % letters).search):
+
# make sure we dont grab a named link
if expr2(s) and expr1(s):
start1,end1 = expr1(s).span()
return None
else:
# the .. is somewhere else, ignore it
- return (StructuredTextInnerLink(s[start2+1,end2-1],start2,end2))
+ return (StructuredTextInnerLink(s[start2+1:end2-1]),start2,end2)
return None
elif expr2(s) and not expr1(s):
start,end = expr2(s).span()
return (StructuredTextInnerLink(s[start+1:end-1]),start,end)
return None
-
+
def doc_named_link(self,
s,
- expr=re.compile("(\.\.\s)(\[[a-zA-Z0-9]+\])").search):
-
+ expr=re.compile("(\.\.\s)(\[[%s0-9]+\])" % letters).search):
+
result = expr(s)
if result:
start,end = result.span(2)
return (StructuredTextNamedLink(str),st,en)
#return (StructuredTextNamedLink(s[st:en]),st,en)
return None
-
+
def doc_underline(self,
s,
- expr=re.compile("\_([a-zA-Z0-9\s\.,\?\/]+)\_").search):
-
+ expr=re.compile("\s+\_([0-9%s ]+)\_" % lettpunc).search):
+
result = expr(s)
if result:
start,end = result.span(1)
return (StructuredTextUnderline(s[start:end]),st,e)
else:
return None
-
- def doc_strong(self,
+
+ def doc_strong(self,
s,
- expr = re.compile('\s*\*\*([ \na-zA-Z0-9.:/;\-,!\?\'\"]+)\*\*').search
+ expr = re.compile('\s*\*\*([ \n%s0-9]+)\*\*' % lettpunc).search
):
r=expr(s)
return (StructuredTextStrong(s[start:end]), start-2, end+2)
else:
return None
-
+
def doc_href(
-
+
self, s,
- expr1 = re.compile("(\"[ a-zA-Z0-9\n\-\.\,\;\(\)\/\:\/]+\")(:)([a-zA-Z0-9\:\/\.\~\-]+)([,]*\s*)").search,
- expr2 = re.compile('(\"[ a-zA-Z0-9\n\-\.\:\;\(\)\/]+\")([,]+\s+)([a-zA-Z0-9\@\.\,\?\!\/\:\;\-\#]+)(\s*)').search):
-
- #expr1=re.compile('\"([ a-zA-Z0-9.:/;,\n\~\(\)\-]+)\"'
- # ':'
- # '([a-zA-Z0-9.:/;,\n\~]+)(?=(\s+|\.|\!|\?))'
- # ).search,
- #expr2=re.compile('\"([ a-zA-Z0-9./:]+)\"'
- # ',\s+'
- # '([ a-zA-Z0-9@.:/;]+)(?=(\s+|\.|\!|\?))'
- # ).search,
-
- punctuation = re.compile("[\,\.\?\!\;]+").match
+ expr1 = re.compile("(\"[ %s0-9\n\-\.\,\;\(\)\/\:\/\*\']+\")(:)([a-zA-Z0-9\@\.\,\?\!\/\:\;\-\#\~]+)([,]*\s*)" % letters).search,
+ expr2 = re.compile('(\"[ %s0-9\n\-\.\:\;\(\)\/\*\']+\")([,]+\s+)([a-zA-Z0-9\@\.\,\?\!\/\:\;\-\#\~]+)(\s*)' % letters).search):
+
r=expr1(s) or expr2(s)
if r:
# need to grab the href part and the
# beginning part
-
+
start,e = r.span(1)
name = s[start:e]
name = replace(name,'"','',2)
- #start = start + 1
st,end = r.span(3)
- if punctuation(s[end-1:end]):
- end = end -1
+
+ if s[end-1:end] in punctuations: end-=1
link = s[st:end]
- #end = end - 1
-
+
# name is the href title, link is the target
# of the href
return (StructuredTextLink(name, href=link),
start, end)
-
- #return (StructuredTextLink(s[start:end], href=s[start:end]),
- # start, end)
+
+
else:
return None
--- /dev/null
+#! /usr/bin/env python -- # -*- python -*-
+##############################################################################
+#
+# Zope Public License (ZPL) Version 1.0
+# -------------------------------------
+#
+# Copyright (c) Digital Creations. All rights reserved.
+#
+# This license has been certified as Open Source(tm).
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# 1. Redistributions in source code must retain the above copyright
+# notice, this list of conditions, and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions, and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+#
+# 3. Digital Creations requests that attribution be given to Zope
+# in any manner possible. Zope includes a "Powered by Zope"
+# button that is installed by default. While it is not a license
+# violation to remove this button, it is requested that the
+# attribution remain. A significant investment has been put
+# into Zope, and this effort will continue if the Zope community
+# continues to grow. This is one way to assure that growth.
+#
+# 4. All advertising materials and documentation mentioning
+# features derived from or use of this software must display
+# the following acknowledgement:
+#
+# "This product includes software developed by Digital Creations
+# for use in the Z Object Publishing Environment
+# (http://www.zope.org/)."
+#
+# In the event that the product being advertised includes an
+# intact Zope distribution (with copyright and license included)
+# then this clause is waived.
+#
+# 5. Names associated with Zope or Digital Creations must not be used to
+# endorse or promote products derived from this software without
+# prior written permission from Digital Creations.
+#
+# 6. Modified redistributions of any form whatsoever must retain
+# the following acknowledgment:
+#
+# "This product includes software developed by Digital Creations
+# for use in the Z Object Publishing Environment
+# (http://www.zope.org/)."
+#
+# Intact (re-)distributions of any official Zope release do not
+# require an external acknowledgement.
+#
+# 7. Modifications are encouraged but must be packaged separately as
+# patches to official Zope releases. Distributions that do not
+# clearly separate the patches from the original work must be clearly
+# labeled as unofficial distributions. Modifications which do not
+# carry the name Zope may be packaged in any form, as long as they
+# conform to all of the clauses above.
+#
+#
+# Disclaimer
+#
+# THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
+# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+#
+# This software consists of contributions made by Digital Creations and
+# many individuals on behalf of Digital Creations. Specific
+# attributions are listed in the accompanying credits file.
+#
+##############################################################################
+'''Structured Text Manipulation
+
+Parse a structured text string into a form that can be used with
+structured formats, like html.
+
+Structured text is text that uses indentation and simple
+symbology to indicate the structure of a document.
+
+A structured string consists of a sequence of paragraphs separated by
+one or more blank lines. Each paragraph has a level which is defined
+as the minimum indentation of the paragraph. A paragraph is a
+sub-paragraph of another paragraph if the other paragraph is the last
+preceding paragraph that has a lower level.
+
+Special symbology is used to indicate special constructs:
+
+- A single-line paragraph whose immediately succeeding paragraphs are lower
+ level is treated as a header.
+
+- A paragraph that begins with a '-', '*', or 'o' is treated as an
+ unordered list (bullet) element.
+
+- A paragraph that begins with a sequence of digits followed by a
+ white-space character is treated as an ordered list element.
+
+- A paragraph that begins with a sequence of sequences, where each
+ sequence is a sequence of digits or a sequence of letters followed
+ by a period, is treated as an ordered list element.
+
+- A paragraph with a first line that contains some text, followed by
+ some white-space and '--' is treated as
+ a descriptive list element. The leading text is treated as the
+ element title.
+
+- Sub-paragraphs of a paragraph that ends in the word 'example' or the
+ word 'examples', or '::' is treated as example code and is output as is.
+
+- Text enclosed single quotes (with white-space to the left of the
+ first quote and whitespace or punctuation to the right of the second quote)
+ is treated as example code.
+
+- Text surrounded by '*' characters (with white-space to the left of the
+ first '*' and whitespace or punctuation to the right of the second '*')
+ is emphasized.
+
+- Text surrounded by '**' characters (with white-space to the left of the
+ first '**' and whitespace or punctuation to the right of the second '**')
+ is made strong.
+
+- Text surrounded by '_' underscore characters (with whitespace to the left
+ and whitespace or punctuation to the right) is made underlined.
+
+- Text encloded by double quotes followed by a colon, a URL, and concluded
+ by punctuation plus white space, *or* just white space, is treated as a
+ hyper link. For example:
+
+ "Zope":http://www.zope.org/ is ...
+
+ Is interpreted as '<a href="http://www.zope.org/">Zope</a> is ....'
+ Note: This works for relative as well as absolute URLs.
+
+- Text enclosed by double quotes followed by a comma, one or more spaces,
+ an absolute URL and concluded by punctuation plus white space, or just
+ white space, is treated as a hyper link. For example:
+
+ "mail me", mailto:amos@digicool.com.
+
+ Is interpreted as '<a href="mailto:amos@digicool.com">mail me</a>.'
+
+- Text enclosed in brackets which consists only of letters, digits,
+ underscores and dashes is treated as hyper links within the document.
+ For example:
+
+ As demonstrated by Smith [12] this technique is quite effective.
+
+ Is interpreted as '... by Smith <a href="#12">[12]</a> this ...'. Together
+ with the next rule this allows easy coding of references or end notes.
+
+- Text enclosed in brackets which is preceded by the start of a line, two
+ periods and a space is treated as a named link. For example:
+
+ .. [12] "Effective Techniques" Smith, Joe ...
+
+ Is interpreted as '<a name="12">[12]</a> "Effective Techniques" ...'.
+ Together with the previous rule this allows easy coding of references or
+ end notes.
+
+
+- A paragraph that has blocks of text enclosed in '||' is treated as a
+ table. The text blocks correspond to table cells and table rows are
+ denoted by newlines. By default the cells are center aligned. A cell
+ can span more than one column by preceding a block of text with an
+ equivalent number of cell separators '||'. Newlines and '|' cannot
+ be a part of the cell text. For example:
+
+ |||| **Ingredients** ||
+ || *Name* || *Amount* ||
+ ||Spam||10||
+ ||Eggs||3||
+
+ is interpreted as::
+
+ <TABLE BORDER=1 CELLPADDING=2>
+ <TR>
+ <TD ALIGN=CENTER COLSPAN=2> <strong>Ingredients</strong> </TD>
+ </TR>
+ <TR>
+ <TD ALIGN=CENTER COLSPAN=1> <em>Name</em> </TD>
+ <TD ALIGN=CENTER COLSPAN=1> <em>Amount</em> </TD>
+ </TR>
+ <TR>
+ <TD ALIGN=CENTER COLSPAN=1>Spam</TD>
+ <TD ALIGN=CENTER COLSPAN=1>10</TD>
+ </TR>
+ <TR>
+ <TD ALIGN=CENTER COLSPAN=1>Eggs</TD>
+ <TD ALIGN=CENTER COLSPAN=1>3</TD>
+ </TR>
+ </TABLE>
+
+'''
+
+import ts_regex
+import regex
+from ts_regex import gsub
+from string import split, join, strip, find
+import string,re
+
+
+def untabify(aString,
+ indent_tab=ts_regex.compile('\(\n\|^\)\( *\)\t').search_group,
+ ):
+ '''\
+ Convert indentation tabs to spaces.
+ '''
+ result=''
+ rest=aString
+ while 1:
+ ts_results = indent_tab(rest, (1,2))
+ if ts_results:
+ start, grps = ts_results
+ lnl=len(grps[0])
+ indent=len(grps[1])
+ result=result+rest[:start]
+ rest="\n%s%s" % (' ' * ((indent/8+1)*8),
+ rest[start+indent+1+lnl:])
+ else:
+ return result+rest
+
+def indent(aString, indent=2):
+ """Indent a string the given number of spaces"""
+ r=split(untabify(aString),'\n')
+ if not r: return ''
+ if not r[-1]: del r[-1]
+ tab=' '*level
+ return "%s%s\n" % (tab,join(r,'\n'+tab))
+
+def reindent(aString, indent=2, already_untabified=0):
+ "reindent a block of text, so that the minimum indent is as given"
+
+ if not already_untabified: aString=untabify(aString)
+
+ l=indent_level(aString)[0]
+ if indent==l: return aString
+
+ r=[]
+
+ append=r.append
+
+ if indent > l:
+ tab=' ' * (indent-l)
+ for s in split(aString,'\n'): append(tab+s)
+ else:
+ l=l-indent
+ for s in split(aString,'\n'): append(s[l:])
+
+ return join(r,'\n')
+
+def indent_level(aString,
+ indent_space=ts_regex.compile('\n\( *\)').search_group,
+ ):
+ '''\
+ Find the minimum indentation for a string, not counting blank lines.
+ '''
+ start=0
+ text='\n'+aString
+ indent=l=len(text)
+ while 1:
+
+ ts_results = indent_space(text, (1,2), start)
+ if ts_results:
+ start, grps = ts_results
+ i=len(grps[0])
+ start=start+i+1
+ if start < l and text[start] != '\n': # Skip blank lines
+ if not i: return (0,aString)
+ if i < indent: indent = i
+ else:
+ return (indent,aString)
+
+def paragraphs(list,start):
+ l=len(list)
+ level=list[start][0]
+ i=start+1
+ while i < l and list[i][0] > level: i=i+1
+ return i-1-start
+
+def structure(list):
+ if not list: return []
+ i=0
+ l=len(list)
+ r=[]
+ while i < l:
+ sublen=paragraphs(list,i)
+ i=i+1
+ r.append((list[i-1][1],structure(list[i:i+sublen])))
+ i=i+sublen
+ return r
+
+
+class Table:
+ CELL=' <TD ALIGN=CENTER COLSPAN=%i>%s</TD>\n'
+ ROW=' <TR>\n%s </TR>\n'
+ TABLE='\n<TABLE BORDER=1 CELLPADDING=2>\n%s</TABLE>'
+
+ def create(self,aPar,
+ td_reg=re.compile(r'[ \t\n]*\|\|([^\0x00|]*)')
+ ):
+ '''parses a table and returns nested list representing the
+ table'''
+ self.table=[]
+ text=filter(None,split(aPar,'\n'))
+ for line in text:
+ row=[]
+ while 1:
+ mo = td_reg.match(line)
+ if not mo: return 0
+ pos = mo.end(1)
+ row.append(mo.group(1))
+ if pos==len(line):break
+ line=line[pos:]
+ self.table.append(row)
+ return 1
+
+ def html(self):
+ '''Creates an HTML representation of table'''
+ htmltable=[]
+ for row in self.table:
+ htmlrow=[]
+ colspan=1
+ for cell in row:
+ if cell=='':
+ colspan=colspan+1
+ continue
+ else:
+ htmlrow.append(self.CELL%(colspan,cell))
+ colspan=1
+ htmltable.append(self.ROW%join(htmlrow,''))
+ return self.TABLE%join(htmltable,'')
+
+table=Table()
+
+class StructuredText:
+
+ """Model text as structured collection of paragraphs.
+
+ Structure is implied by the indentation level.
+
+ This class is intended as a base classes that do actual text
+ output formatting.
+ """
+
+ def __init__(self, aStructuredString, level=0,
+ paragraph_divider=regex.compile('\(\r?\n *\)+\r?\n'),
+ ):
+ '''Convert a structured text string into a structured text object.
+
+ Aguments:
+
+ aStructuredString -- The string to be parsed.
+ level -- The level of top level headings to be created.
+ '''
+
+
+ pat = ' \"([%s0-9-_,./?=@~&]*)\":' % string.letters+ \
+ '([-:%s0-9_,./?=@#~&]*?)' % string.letters + \
+ '([.:?;] )'
+
+ p_reg = re.compile(pat,re.M)
+
+ aStructuredString = p_reg.sub(r'<a href="\2">\1</a>\3 ' , aStructuredString)
+
+ pat = ' \"([%s0-9-_,./?=@~&]*)\", ' % string.letters+ \
+ '([-:%s0-9_,./?=@#~&]*?)' % string.letters + \
+ '([.:?;] )'
+
+ p_reg = re.compile(pat,re.M)
+
+ aStructuredString = p_reg.sub(r'<a href="\2">\1</a>\3 ' , aStructuredString)
+
+
+ protoless = find(aStructuredString, '<a href=":')
+ if protoless != -1:
+ aStructuredString = re.sub('<a href=":', '<a href="',
+ aStructuredString)
+
+ self.level=level
+ paragraphs=ts_regex.split(untabify(aStructuredString),
+ paragraph_divider)
+ paragraphs=map(indent_level,paragraphs)
+
+ self.structure=structure(paragraphs)
+
+
+ def __str__(self):
+ return str(self.structure)
+
+
+ctag_prefix=r'([\x00- \\(]|^)'
+ctag_suffix=r'([\x00- ,.:;!?\\)]|$)'
+ctag_middle=r'[%s]([^\x00- %s][^%s]*[^\x00- %s]|[^%s])[%s]'
+ctag_middl2=r'[%s][%s]([^\x00- %s][^%s]*[^\x00- %s]|[^%s])[%s][%s]'
+
+def ctag(s,
+ em=re.compile(
+ ctag_prefix+(ctag_middle % (("*",)*6) )+ctag_suffix),
+ strong=re.compile(
+ ctag_prefix+(ctag_middl2 % (("*",)*8))+ctag_suffix),
+ under=re.compile(
+ ctag_prefix+(ctag_middle % (("_",)*6) )+ctag_suffix),
+ code=re.compile(
+ ctag_prefix+(ctag_middle % (("\'",)*6))+ctag_suffix),
+ ):
+ if s is None: s=''
+ s=strong.sub(r'\1<strong>\2</strong>\3',s)
+ s=under.sub( r'\1<u>\2</u>\3',s)
+ s=code.sub( r'\1<code>\2</code>\3',s)
+ s=em.sub( r'\1<em>\2</em>\3',s)
+ return s
+
+class HTML(StructuredText):
+
+ '''\
+ An HTML structured text formatter.
+ '''\
+
+ def __str__(self,
+ extra_dl=re.compile("</dl>\n<dl>"),
+ extra_ul=re.compile("</ul>\n<ul>"),
+ extra_ol=re.compile("</ol>\n<ol>"),
+ ):
+ '''\
+ Return an HTML string representation of the structured text data.
+
+ '''
+ s=self._str(self.structure,self.level)
+ s=extra_dl.sub('\n',s)
+ s=extra_ul.sub('\n',s)
+ s=extra_ol.sub('\n',s)
+ return s
+
+ def ul(self, before, p, after):
+ if p: p="<p>%s</p>" % strip(ctag(p))
+ return ('%s<ul><li>%s\n%s\n</li></ul>\n'
+ % (before,p,after))
+
+ def ol(self, before, p, after):
+ if p: p="<p>%s</p>" % strip(ctag(p))
+ return ('%s<ol><li>%s\n%s\n</li></ol>\n'
+ % (before,p,after))
+
+ def dl(self, before, t, d, after):
+ return ('%s<dl><dt>%s</dt><dd><p>%s</p>\n%s\n</dd></dl>\n'
+ % (before,ctag(t),ctag(d),after))
+
+ def head(self, before, t, level, d):
+ if level > 0 and level < 6:
+ return ('%s<h%d>%s</h%d>\n%s\n'
+ % (before,level,strip(ctag(t)),level,d))
+
+ t="<p><strong>%s</strong></p>" % strip(ctag(t))
+ return ('%s<dl><dt>%s\n</dt><dd>%s\n</dd></dl>\n'
+ % (before,t,d))
+
+ def normal(self,before,p,after):
+ return '%s<p>%s</p>\n%s\n' % (before,ctag(p),after)
+
+ def pre(self,structure,tagged=0):
+ if not structure: return ''
+ if tagged:
+ r=''
+ else:
+ r='<PRE>\n'
+ for s in structure:
+ r="%s%s\n\n%s" % (r,html_quote(s[0]),self.pre(s[1],1))
+ if not tagged: r=r+'</PRE>\n'
+ return r
+
+ def table(self,before,table,after):
+ return '%s<p>%s</p>\n%s\n' % (before,ctag(table),after)
+
+ def _str(self,structure,level,
+ # Static
+ bullet=ts_regex.compile('[ \t\n]*[o*-][ \t\n]+\([^\0]*\)'
+ ).match_group,
+ example=ts_regex.compile('[\0- ]examples?:[\0- ]*$'
+ ).search,
+ dl=ts_regex.compile('\([^\n]+\)[ \t]+--[ \t\n]+\([^\0]*\)'
+ ).match_group,
+ nl=ts_regex.compile('\n').search,
+ ol=ts_regex.compile(
+ '[ \t]*\(\([0-9]+\|[%s]+\)[.)]\)+[ \t\n]+\([^\0]*\|$\)' % string.letters
+ ).match_group,
+ olp=ts_regex.compile('[ \t]*([0-9]+)[ \t\n]+\([^\0]*\|$\)'
+ ).match_group,
+ ):
+ r=''
+ for s in structure:
+
+ ts_results = bullet(s[0], (1,))
+ if ts_results:
+ p = ts_results[1]
+ if s[0][-2:]=='::' and s[1]: ps=self.pre(s[1])
+ else: ps=self._str(s[1],level)
+ r=self.ul(r,p,ps)
+ continue
+ ts_results = ol(s[0], (3,))
+ if ts_results:
+ p = ts_results[1]
+ if s[0][-2:]=='::' and s[1]: ps=self.pre(s[1])
+ else: ps=self._str(s[1],level)
+ r=self.ol(r,p,ps)
+ continue
+ ts_results = olp(s[0], (1,))
+ if ts_results:
+ p = ts_results[1]
+ if s[0][-2:]=='::' and s[1]: ps=self.pre(s[1])
+ else: ps=self._str(s[1],level)
+ r=self.ol(r,p,ps)
+ continue
+ ts_results = dl(s[0], (1,2))
+ if ts_results:
+ t,d = ts_results[1]
+ r=self.dl(r,t,d,self._str(s[1],level))
+ continue
+ if example(s[0]) >= 0 and s[1]:
+ # Introduce an example, using pre tags:
+ r=self.normal(r,s[0],self.pre(s[1]))
+ continue
+ if s[0][-2:]=='::' and s[1]:
+ # Introduce an example, using pre tags:
+ r=self.normal(r,s[0][:-1],self.pre(s[1]))
+ continue
+ if table.create(s[0]):
+ ## table support.
+ r=self.table(r,table.html(),self._str(s[1],level))
+ continue
+ else:
+
+ if nl(s[0]) < 0 and s[1] and s[0][-1:] != ':':
+ # Treat as a heading
+ t=s[0]
+ r=self.head(r,t,level,
+ self._str(s[1],level and level+1))
+ else:
+ r=self.normal(r,s[0],self._str(s[1],level))
+ return r
+
+
+def html_quote(v,
+ character_entities=(
+ (re.compile('&'), '&'),
+ (re.compile("<"), '<' ),
+ (re.compile(">"), '>' ),
+ (re.compile('"'), '"')
+ )): #"
+ text=str(v)
+ for re,name in character_entities:
+ text=re.sub(name,text)
+ return text
+
+def html_with_references(text, level=1):
+ text = re.sub(
+ r'[\0\n]\.\. \[([0-9_%s-]+)\]' % string.letters,
+ r'\n <a name="\1">[\1]</a>',
+ text)
+
+ text = re.sub(
+ r'([\x00- ,])\[(?P<ref>[0-9_%s-]+)\]([\x00- ,.:])' % string.letters,
+ r'\1<a href="#\2">[\2]</a>\3',
+ text)
+
+ text = re.sub(
+ r'([\0- ,])\[([^]]+)\.html\]([\0- ,.:])',
+ r'\1<a href="\2.html">[\2]</a>\3',
+ text)
+
+ return HTML(text,level=level)
+
+
+def main():
+ import sys, getopt
+
+ opts,args=getopt.getopt(sys.argv[1:],'twl')
+
+ if args:
+ [infile]=args
+ s=open(infile,'r').read()
+ else:
+ s=sys.stdin.read()
+
+ if opts:
+
+ if filter(lambda o: o[0]=='-w', opts):
+ print 'Content-Type: text/html\n'
+
+ if filter(lambda o: o[0]=='-l', opts):
+ import locale
+ locale.setlocale(locale.LC_ALL,"")
+
+ if s[:2]=='#!':
+ s=re.sub('^#![^\n]+','',s)
+
+ mo = re.compile('([\0-\n]*\n)').match(s)
+ if mo is not None:
+ s = s[len(mo.group(0)) :]
+
+ s=str(html_with_references(s))
+ if s[:4]=='<h1>':
+ t=s[4:find(s,'</h1>')]
+ s='''<html><head><title>%s</title>
+ </head><body>
+ %s
+ </body></html>
+ ''' % (t,s)
+ print s
+ else:
+ print html_with_references(s)
+
+if __name__=="__main__": main()
class DocBookClass:
- element_types={
- '#text': '_text',
- 'StructuredTextDocument': 'document',
- 'StructuredTextParagraph': 'paragraph',
- 'StructuredTextExample': 'example',
- 'StructuredTextBullet': 'bullet',
- 'StructuredTextNumbered': 'numbered',
- 'StructuredTextDescription': 'description',
- 'StructuredTextDescriptionTitle': 'descriptionTitle',
- 'StructuredTextDescriptionBody': 'descriptionBody',
- 'StructuredTextSection': 'section',
- 'StructuredTextSectionTitle': 'sectionTitle',
- 'StructuredTextLiteral': 'literal',
- 'StructuredTextEmphasis': 'emphasis',
- 'StructuredTextStrong': 'strong',
- 'StructuredTextLink': 'link',
- 'StructuredTextXref': 'xref',
- }
-
- def dispatch(self, doc, level, output):
- getattr(self, self.element_types[doc.getNodeName()])(doc, level, output)
-
- def __call__(self, doc, level=1):
- r=[]
- self.dispatch(doc, level-1, r.append)
- return join(r,'')
-
- def _text(self, doc, level, output):
- if doc.getNodeName() == 'StructuredTextLiteral':
- output(doc.getNodeValue())
- else:
- output(lstrip(doc.getNodeValue()))
-
- def document(self, doc, level, output):
- output('<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook V3.1//EN">\n')
- output('<book>\n')
- children=doc.getChildNodes()
- if (children and
- children[0].getNodeName() == 'StructuredTextSection'):
- output('<title>%s</title>' % children[0].getChildNodes()[0].getNodeValue())
- for c in children:
+ element_types={
+ '#text': '_text',
+ 'StructuredTextDocument': 'document',
+ 'StructuredTextParagraph': 'paragraph',
+ 'StructuredTextExample': 'example',
+ 'StructuredTextBullet': 'bullet',
+ 'StructuredTextNumbered': 'numbered',
+ 'StructuredTextDescription': 'description',
+ 'StructuredTextDescriptionTitle': 'descriptionTitle',
+ 'StructuredTextDescriptionBody': 'descriptionBody',
+ 'StructuredTextSection': 'section',
+ 'StructuredTextSectionTitle': 'sectionTitle',
+ 'StructuredTextLiteral': 'literal',
+ 'StructuredTextEmphasis': 'emphasis',
+ 'StructuredTextStrong': 'strong',
+ 'StructuredTextLink': 'link',
+ 'StructuredTextXref': 'xref',
+ 'StructuredTextSGML': 'sgml',
+ }
+
+ def dispatch(self, doc, level, output):
+ getattr(self, self.element_types[doc.getNodeName()])(doc, level, output)
+
+ def __call__(self, doc, level=1):
+ r=[]
+ self.dispatch(doc, level-1, r.append)
+ return join(r,'')
+
+ def _text(self, doc, level, output):
+ if doc.getNodeName() == 'StructuredTextLiteral':
+ output(doc.getNodeValue())
+ else:
+ output(lstrip(doc.getNodeValue()))
+
+ def document(self, doc, level, output):
+ output('<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook V4.1//EN">\n')
+ output('<book>\n')
+ children=doc.getChildNodes()
+ if (children and
+ children[0].getNodeName() == 'StructuredTextSection'):
+ output('<title>%s</title>' % children[0].getChildNodes()[0].getNodeValue())
+ for c in children:
+ getattr(self, self.element_types[c.getNodeName()])(c, level, output)
+ output('</book>\n')
+
+ def section(self, doc, level, output):
+ output('\n<section>\n')
+ children=doc.getChildNodes()
+ for c in children:
+ getattr(self, self.element_types[c.getNodeName()])(c, level+1, output)
+ output('\n</section>\n')
+
+ def sectionTitle(self, doc, level, output):
+ output('<title>')
+ for c in doc.getChildNodes():
+ try:
getattr(self, self.element_types[c.getNodeName()])(c, level, output)
- output('</book>\n')
-
- def section(self, doc, level, output):
- output('\n<sect%s>\n' % (level + 1))
- children=doc.getChildNodes()
- for c in children:
- getattr(self, self.element_types[c.getNodeName()])(c, level+1, output)
- output('\n</sect%s>\n' % (level + 1))
-
- def sectionTitle(self, doc, level, output):
- output('<title>')
- for c in doc.getChildNodes():
- getattr(self, self.element_types[c.getNodeName()])(c, level, output)
- output('</title>\n')
-
- def description(self, doc, level, output):
- p=doc.getPreviousSibling()
- if p is None or p.getNodeName() is not doc.getNodeName():
- output('<variablelist>\n')
- for c in doc.getChildNodes():
- getattr(self, self.element_types[c.getNodeName()])(c, level, output)
- n=doc.getNextSibling()
- if n is None or n.getNodeName() is not doc.getNodeName():
- output('</variablelist>\n')
-
- def descriptionTitle(self, doc, level, output):
- output('<varlistentry><term>\n')
- for c in doc.getChildNodes():
- getattr(self, self.element_types[c.getNodeName()])(c, level, output)
- output('</term>\n')
-
- def descriptionBody(self, doc, level, output):
- output('<listitem><para>\n')
- for c in doc.getChildNodes():
- getattr(self, self.element_types[c.getNodeName()])(c, level, output)
- output('</para></listitem>\n')
- output('</varlistentry>\n')
+ except:
+ print "failed", c.getNodeName(), c
+ output('</title>\n')
+
+ def description(self, doc, level, output):
+ p=doc.getPreviousSibling()
+ if p is None or p.getNodeName() is not doc.getNodeName():
+ output('<variablelist>\n')
+ for c in doc.getChildNodes():
+ getattr(self, self.element_types[c.getNodeName()])(c, level, output)
+ n=doc.getNextSibling()
+ if n is None or n.getNodeName() is not doc.getNodeName():
+ output('</variablelist>\n')
+
+ def descriptionTitle(self, doc, level, output):
+ output('<varlistentry><term>\n')
+ for c in doc.getChildNodes():
+ getattr(self, self.element_types[c.getNodeName()])(c, level, output)
+ output('</term>\n')
+
+ def descriptionBody(self, doc, level, output):
+ output('<listitem><para>\n')
+ for c in doc.getChildNodes():
+ getattr(self, self.element_types[c.getNodeName()])(c, level, output)
+ output('</para></listitem>\n')
+ output('</varlistentry>\n')
+
+ def bullet(self, doc, level, output):
+ p=doc.getPreviousSibling()
+ if p is None or p.getNodeName() is not doc.getNodeName():
+ output('<itemizedlist>\n')
+ output('<listitem><para>\n')
+
+ for c in doc.getChildNodes():
+ getattr(self, self.element_types[c.getNodeName()])(c, level, output)
+ n=doc.getNextSibling()
+ output('</para></listitem>\n')
+ if n is None or n.getNodeName() is not doc.getNodeName():
+ output('</itemizedlist>\n')
+
+ def numbered(self, doc, level, output):
+ p=doc.getPreviousSibling()
+ if p is None or p.getNodeName() is not doc.getNodeName():
+ output('<orderedlist>\n')
+ output('<listitem><para>\n')
+ for c in doc.getChildNodes():
+ getattr(self, self.element_types[c.getNodeName()])(c, level, output)
+ n=doc.getNextSibling()
+ output('</para></listitem>\n')
+ if n is None or n.getNodeName() is not doc.getNodeName():
+ output('</orderedlist>\n')
+
+ def example(self, doc, level, output):
+ i=0
+ for c in doc.getChildNodes():
+ if i==0:
+ output('<programlisting>\n<![CDATA[\n')
+ ##
+ ## eek. A ']]>' in your body will break this...
+ ##
+ output(prestrip(c.getNodeValue()))
+ output('\n]]></programlisting>\n')
+ else:
+ getattr(self, self.element_types[c.getNodeName()])(
+ c, level, output)
+
+ def paragraph(self, doc, level, output):
+ output('<para>\n\n')
+ for c in doc.getChildNodes():
+ getattr(self, self.element_types[c.getNodeName()])(
+ c, level, output)
+ output('</para>\n\n')
+
+ def link(self, doc, level, output):
+ output('<ulink url="%s">' % doc.href)
+ for c in doc.getChildNodes():
+ getattr(self, self.element_types[c.getNodeName()])(c, level, output)
+ output('</ulink>')
- def bullet(self, doc, level, output):
- p=doc.getPreviousSibling()
- if p is None or p.getNodeName() is not doc.getNodeName():
- output('<itemizedlist>\n')
- output('<listitem><para>\n')
+ def emphasis(self, doc, level, output):
+ output('<emphasis>')
+ for c in doc.getChildNodes():
+ getattr(self, self.element_types[c.getNodeName()])(c, level, output)
+ output('</emphasis> ')
- for c in doc.getChildNodes():
- getattr(self, self.element_types[c.getNodeName()])(c, level, output)
- n=doc.getNextSibling()
- output('</para></listitem>\n')
- if n is None or n.getNodeName() is not doc.getNodeName():
- output('</itemizedlist>\n')
-
- def numbered(self, doc, level, output):
- p=doc.getPreviousSibling()
- if p is None or p.getNodeName() is not doc.getNodeName():
- output('<orderedlist>\n')
- output('<listitem><para>\n')
- for c in doc.getChildNodes():
- getattr(self, self.element_types[c.getNodeName()])(c, level, output)
- n=doc.getNextSibling()
- output('</para></listitem>\n')
- if n is None or n.getNodeName() is not doc.getNodeName():
- output('</orderedlist>\n')
-
- def example(self, doc, level, output):
- i=0
- for c in doc.getChildNodes():
- if i==0:
- output('<programlisting>\n<![CDATA[\n')
- ##
- ## eek. A ']]>' in your body will break this...
- ##
- output(prestrip(c.getNodeValue()))
- output('\n]]></programlisting>\n')
- else:
- getattr(self, self.element_types[c.getNodeName()])(
- c, level, output)
-
- def paragraph(self, doc, level, output):
-
- output('<para>\n\n')
- for c in doc.getChildNodes():
- getattr(self, self.element_types[c.getNodeName()])(
- c, level, output)
- output('</para>\n\n')
-
- def link(self, doc, level, output):
-# output('<link linkend="%s">' % doc.href)
- for c in doc.getChildNodes():
- getattr(self, self.element_types[c.getNodeName()])(c, level, output)
-# output('</link>')
+ def literal(self, doc, level, output):
+ output('<literal>')
+ for c in doc.getChildNodes():
+ output(c.getNodeValue())
+ output('</literal>')
- def emphasis(self, doc, level, output):
- output('<emphasis>')
- for c in doc.getChildNodes():
- getattr(self, self.element_types[c.getNodeName()])(c, level, output)
- output('</emphasis> ')
+ def strong(self, doc, level, output):
+ output('<emphasis>')
+ for c in doc.getChildNodes():
+ getattr(self, self.element_types[c.getNodeName()])(c, level, output)
+ output('</emphasis>')
- def literal(self, doc, level, output):
- output('<literal>')
- for c in doc.getChildNodes():
- output(c.getNodeValue())
- output('</literal>')
+ def xref(self, doc, level, output):
+ output('<xref linkend="%s"/>' % doc.getNodeValue())
- def strong(self, doc, level, output):
- output('<emphasis>')
- for c in doc.getChildNodes():
- getattr(self, self.element_types[c.getNodeName()])(c, level, output)
- output('</emphasis>')
+ def sgml(self, doc, level, output):
+ output(doc.getNodeValue())
- def xref(self, doc, level, output):
- output('<xref linkend="%s">' % doc.getNodeValue())
def prestrip(v):
- v=string.replace(v, '\r\n', '\n')
- v=string.replace(v, '\r', '\n')
- v=string.replace(v, '\t', ' ')
- lines=string.split(v, '\n')
- indent=len(lines[0])
- for line in lines:
- if not len(line): continue
- i=len(line)-len(string.lstrip(line))
- if i < indent:
- indent=i
- nlines=[]
- for line in lines:
- nlines.append(line[indent:])
- return string.join(nlines, '\r\n')
+ v=string.replace(v, '\r\n', '\n')
+ v=string.replace(v, '\r', '\n')
+ v=string.replace(v, '\t', ' ')
+ lines=string.split(v, '\n')
+ indent=len(lines[0])
+ for line in lines:
+ if not len(line): continue
+ i=len(line)-len(string.lstrip(line))
+ if i < indent:
+ indent=i
+ nlines=[]
+ for line in lines:
+ nlines.append(line[indent:])
+ return string.join(nlines, '\n')
class DocBookChapter(DocBookClass):
- def document(self, doc, level, output):
- output('<chapter>\n')
- children=doc.getChildNodes()
- if (children and
- children[0].getNodeName() == 'StructuredTextSection'):
- output('<title>%s</title>' % children[0].getChildNodes()[0].getNodeValue())
- for c in children[0].getChildNodes()[1:]:
- getattr(self, self.element_types[c.getNodeName()])(c, level, output)
- output('</chapter>\n')
+ def document(self, doc, level, output):
+ output('<chapter>\n')
+ children=doc.getChildNodes()
+ if (children and
+ children[0].getNodeName() == 'StructuredTextSection'):
+ output('<title>%s</title>' % children[0].getChildNodes()[0].getNodeValue())
+ for c in children[0].getChildNodes()[1:]:
+ getattr(self, self.element_types[c.getNodeName()])(c, level, output)
+ output('</chapter>\n')
ets = DocBookClass.element_types
-ets.update({'StructuredTextImage': 'image'})
+ets.update({'StructuredTextImage': 'image'})
class DocBookChapterWithFigures(DocBookChapter):
- element_types = ets
+ element_types = ets
- def image(self, doc, level, output):
- if hasattr(doc, 'key'):
- output('<figure id="%s"><title>%s</title>\n' % (doc.key, doc.getNodeValue()) )
- else:
- output('<figure><title>%s</title>\n' % doc.getNodeValue())
-## for c in doc.getChildNodes():
-## getattr(self, self.element_types[c.getNodeName()])(c, level, output)
- output('<graphic fileref="%s"></graphic>\n</figure>\n' % doc.href)
+ def image(self, doc, level, output):
+ if hasattr(doc, 'key'):
+ output('<figure id="%s"><title>%s</title>\n' % (doc.key, doc.getNodeValue()) )
+ else:
+ output('<figure><title>%s</title>\n' % doc.getNodeValue())
+## for c in doc.getChildNodes():
+## getattr(self, self.element_types[c.getNodeName()])(c, level, output)
+ output('<graphic fileref="%s"></graphic>\n</figure>\n' % doc.href)
class DocBookArticle(DocBookClass):
- def document(self, doc, level, output):
- output('<!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook V3.1//EN">\n')
- output('<article>\n')
- children=doc.getChildNodes()
- if (children and
- children[0].getNodeName() == 'StructuredTextSection'):
- output('<artheader>\n<title>%s</title>\n</artheader>\n' %
- children[0].getChildNodes()[0].getNodeValue())
- for c in children:
- getattr(self, self.element_types[c.getNodeName()])(c, level, output)
- output('</article>\n')
+ def document(self, doc, level, output):
+ output('<!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook V4.1//EN">\n')
+ output('<article>\n')
+ children=doc.getChildNodes()
+ if (children and
+ children[0].getNodeName() == 'StructuredTextSection'):
+ output('<articleinfo>\n<title>%s</title>\n</articleinfo>\n' %
+ children[0].getChildNodes()[0].getNodeValue())
+ for c in children:
+ getattr(self, self.element_types[c.getNodeName()])(c, level, output)
+ output('</article>\n')
class DocBookBook:
- def __init__(self, title=''):
- self.title = title
- self.chapters = []
+ def __init__(self, title=''):
+ self.title = title
+ self.chapters = []
- def addChapter(self, chapter):
- self.chapters.append(chapter)
+ def addChapter(self, chapter):
+ self.chapters.append(chapter)
- def read(self):
- out = '<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook V3.1//EN">\n<book>\n'
- out = out + '<title>%s</title>\n' % self.title
- for chapter in self.chapters:
- out = out + chapter + '\n</book>\n'
+ def read(self):
+ out = '<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook V4.1//EN">\n<book>\n'
+ out = out + '<title>%s</title>\n' % self.title
+ for chapter in self.chapters:
+ out = out + chapter + '\n</book>\n'
- return out
+ return out
- def __str__(self):
- return self.read()
-
+ def __str__(self):
+ return self.read()
+
import re, ST, STDOM
from string import split, join, replace, expandtabs, strip, find, rstrip
+from STletters import *
+
StringType=type('')
ListType=type([])
+def flatten(obj, append):
+ if obj.getNodeType()==STDOM.TEXT_NODE:
+ append(obj.getNodeValue())
+ else:
+ for child in obj.getChildNodes():
+ flatten(child, append)
+
+
class StructuredTextExample(ST.StructuredTextParagraph):
"""Represents a section of document with literal text, as for examples"""
def __init__(self, subs, **kw):
- t=[]; a=t.append
- for s in subs: a(s.getNodeValue())
- apply(ST.StructuredTextParagraph.__init__,
- (self, join(t,'\n\n'), ()),
- kw)
+ t=[]
+ a=t.append
+ for s in subs:
+ flatten(s, a)
+ apply(ST.StructuredTextParagraph.__init__,
+ (self, join(t,'\n\n'), ()),
+ kw)
def getColorizableTexts(self): return ()
def setColorizableTexts(self, src): pass # never color examples
apply(ST.StructuredTextParagraph.__init__,
(self, StructuredTextSectionTitle(src), subs),
kw)
-
+
+ def getColorizableTexts(self):
+ return self._src.getColorizableTexts()
+
+ def setColorizableTexts(self,src):
+ self._src.setColorizableTexts(src)
+
# a StructuredTextTable holds StructuredTextRows
-class StructuredTextTable(ST.StructuredTextDocument):
+class StructuredTextTable(ST.StructuredTextParagraph):
"""
rows is a list of lists containing tuples, which
represent the columns/cells in each rows.
"""
def __init__(self, rows, src, subs, **kw):
- apply(ST.StructuredTextDocument.__init__,(self,subs),kw)
+ apply(ST.StructuredTextParagraph.__init__,(self,subs),kw)
self._rows = []
for row in rows:
if row:
return self.setColorizableTexts()
# StructuredTextRow holds StructuredTextColumns
-class StructuredTextRow(ST.StructuredTextDocument):
+class StructuredTextRow(ST.StructuredTextParagraph):
def __init__(self,row,kw):
"""
row is a list of tuples, where each tuple is
the raw text for a cell/column and the span
- of that cell/column".
+ of that cell/column.
EX
[('this is column one',1), ('this is column two',1)]
"""
- apply(ST.StructuredTextDocument.__init__,(self,[]),kw)
+ apply(ST.StructuredTextParagraph.__init__,(self,[]),kw)
+
self._columns = []
- for column in row:
- self._columns.append(StructuredTextColumn(column[0],column[1],kw))
-
+ for column in row:
+ self._columns.append(StructuredTextColumn(column[0],
+ column[1],
+ column[2],
+ column[3],
+ column[4],
+ kw))
+
def getColumns(self):
return [self._columns]
-
+
def _getColumns(self):
return [self._columns]
def setColumns(self,columns):
self._columns = columns
-
+
def _setColumns(self,columns):
return self.setColumns(columns)
-
+
# this holds the text of a table cell
class StructuredTextColumn(ST.StructuredTextParagraph):
"""
or StructuredTextTableData.
"""
- def __init__(self,text,span,kw):
- # print "StructuredTextColumn", text, span
+ def __init__(self,text,span,align,valign,typ,kw):
apply(ST.StructuredTextParagraph.__init__,(self,text,[]),kw)
self._span = span
+ self._align = align
+ self._valign = valign
+ self._type = typ
def getSpan(self):
return self._span
def _getSpan(self):
return self._span
+
+ def getAlign(self):
+ return self._align
+
+ def _getAlign(self):
+ return self.getAlign()
+
+ def getValign(self):
+ return self._valign
+
+ def _getValign(self):
+ return self.getValign()
+
+ def getType(self):
+ return self._type
+
+ def _getType(self):
+ return self.getType()
+
+class StructuredTextTableHeader(ST.StructuredTextParagraph): pass
-class StructuredTextTableHeader(ST.StructuredTextDocument): pass
-
-class StructuredTextTableData(ST.StructuredTextDocument): pass
+class StructuredTextTableData(ST.StructuredTextParagraph): pass
class StructuredTextMarkup(STDOM.Element):
self._value=v
self._attributes=kw.keys()
for k, v in kw.items(): setattr(self, k, v)
-
+
def getChildren(self, type=type, lt=type([])):
v=self._value
if type(v) is not lt: v=[v]
return v
-
+
def getColorizableTexts(self): return self._value,
def setColorizableTexts(self, v): self._value=v[0]
-
+
def __repr__(self):
return '%s(%s)' % (self.__class__.__name__, `self._value`)
-
+
class StructuredTextLiteral(StructuredTextMarkup):
def getColorizableTexts(self): return ()
def setColorizableTexts(self, v): pass
-
+
class StructuredTextEmphasis(StructuredTextMarkup): pass
class StructuredTextStrong(StructuredTextMarkup): pass
class StructuredTextSGML(StructuredTextMarkup): pass
-class StructuredTextLink(StructuredTextMarkup): pass
+class StructuredTextLink(StructuredTextMarkup): pass
+
+class StructuredTextXref(StructuredTextMarkup): pass
-class DocumentClass:
+class DocumentClass:
"""
Class instance calls [ex.=> x()] require a structured text
structure. Doc will then parse each paragraph in the structure
instance with a strong instance stored in its string
"""
- #'doc_table',
paragraph_types = [
'doc_bullet',
'doc_numbered',
#'doc_named_link',
#'doc_underline',
text_types = [
+ 'doc_sgml',
'doc_href',
'doc_strong',
'doc_emphasize',
'doc_literal',
- 'doc_sgml'
+ 'doc_sgml',
+ 'doc_xref',
]
-
+
def __call__(self, doc):
if type(doc) is type(''):
doc=ST.StructuredText(doc)
doc=ST.StructuredTextDocument(self.color_paragraphs(
doc.getSubparagraphs()))
return doc
-
+
def parse(self, raw_string, text_type,
type=type, st=type(''), lt=type([])):
-
+
"""
Parse accepts a raw_string, an expr to test the raw_string,
and the raw_string's subparagraphs.
st=type('')):
result=[]
for paragraph in raw_paragraphs:
- #print type(paragraph)
if paragraph.getNodeName() != 'StructuredTextParagraph':
result.append(paragraph)
continue
break
else:
new_paragraphs=ST.StructuredTextParagraph(paragraph.getColorizableTexts()[0],
- self.color_paragraphs(paragraph.getSubparagraphs()),
- indent=paragraph.indent),
+ self.color_paragraphs(paragraph.getSubparagraphs()),
+ indent=paragraph.indent),
# color the inline StructuredText types
# for each StructuredTextParagraph
for paragraph in new_paragraphs:
if paragraph.getNodeName() is "StructuredTextTable":
- #print "we have a table"
cells = paragraph.getColumns()
text = paragraph.getColorizableTexts()
text = map(ST.StructuredText,text)
text = map(self.__call__,text)
- #for index in range(len(text)):
- # text[index].setColorizableTexts(map(self.color_text,text[index].getColorizableTexts()))
+ for t in range(len(text)):
+ text[t] = text[t].getSubparagraphs()
paragraph.setColorizableTexts(text)
-
+
paragraph.setColorizableTexts(
map(self.color_text,
paragraph.getColorizableTexts()
return result
- def doc_table(self, paragraph, expr = re.compile('\s*\|[-]+\|').match):
+ def doc_table(self, paragraph, expr = re.compile(r'\s*\|[-]+\|').match):
text = paragraph.getColorizableTexts()[0]
m = expr(text)
if not (m):
return None
rows = []
-
- rows = split(text,'\n')
-
+
spans = []
ROWS = []
COLS = []
-
- TDdivider = re.compile("[\-]+").match
- THdivider = re.compile("[\=]+").match
-
- # find where the column markers are located
- col = re.compile('\|').search
+ indexes = []
+ ignore = []
+
+ TDdivider = re.compile("[\-]+").match
+ THdivider = re.compile("[\=]+").match
+ col = re.compile('\|').search
+ innertable = re.compile('\|([-]+|[=]+)\|').search
+
text = strip(text)
rows = split(text,'\n')
+ foo = ""
+
for row in range(len(rows)):
rows[row] = strip(rows[row])
-
- for row in rows:
- tmp = strip(row)
- tmp = row[1:len(tmp)-1] # remove leading and trailing |
- offset = 0
+
+ # have indexes store if a row is a divider
+ # or a cell part
+ for index in range(len(rows)):
+ tmpstr = rows[index][1:len(rows[index])-1]
+ if TDdivider(tmpstr):
+ indexes.append("TDdivider")
+ elif THdivider(tmpstr):
+ indexes.append("THdivider")
+ else:
+ indexes.append("cell")
+
+ for index in range(len(indexes)):
+ if indexes[index] is "TDdivider" or indexes[index] is THdivider:
+ ignore = [] # reset ignore
+ #continue # skip dividers
+
+ tmp = strip(rows[index]) # clean the row up
+ tmp = tmp[1:len(tmp)-1] # remove leading + trailing |
+ offset = 0
+
+ # find the start and end of inner
+ # tables. ignore everything between
+ if innertable(tmp):
+ tmpstr = strip(tmp)
+ while innertable(tmpstr):
+ start,end = innertable(tmpstr).span()
+ if not (start,end-1) in ignore:
+ ignore.append(start,end-1)
+ tmpstr = " " + tmpstr[end:]
+
+ # find the location of column dividers
+ # NOTE: |'s in inner tables do not count
+ # as column dividers
if col(tmp):
while col(tmp):
- start,end = col(tmp).span()
+ bar = 1 # true if start is not in ignore
+ start,end = col(tmp).span()
+
if not start+offset in spans:
- spans.append(start + offset)
- COLS.append((tmp[0:start],start+offset))
- tmp = " " + tmp[end:]
- offset = offset + (start)
+ for s,e in ignore:
+ if start+offset >= s or start+offset <= e:
+ bar = None
+ break
+ if bar: # start is clean
+ spans.append(start+offset)
+ if not bar:
+ foo = foo + tmp[:end]
+ tmp = tmp[end:]
+ offset = offset + end
+ else:
+ COLS.append((foo + tmp[0:start],start+offset))
+ foo = ""
+ tmp = " " + tmp[end:]
+ offset = offset + start
if not offset+len(tmp) in spans:
spans.append(offset+len(tmp))
- COLS.append((tmp,offset+len(tmp)))
+ COLS.append((foo + tmp,offset+len(tmp)))
+ foo = ""
ROWS.append(COLS)
COLS = []
-
- spans.sort()
-
- ROWS = ROWS[1:len(ROWS)]
+ spans.sort()
+ ROWS = ROWS[1:len(ROWS)]
+
# find each column span
cols = []
tmp = []
-
+
for row in ROWS:
for c in row:
tmp.append(c[1])
cols.append(tmp)
tmp = []
-
- cur = 1 # the current column span
- tmp = []
- C = [] # holds the span of each cell
+
+ cur = 1
+ tmp = []
+ C = []
for col in cols:
for span in spans:
if not span in col:
C.append(tmp)
tmp = []
- # make rows contain the cell's text and the span
- # of that cell
for index in range(len(C)):
for i in range(len(C[index])):
ROWS[index][i] = (ROWS[index][i][0],C[index][i])
rows = ROWS
- # now munge the table cells together
+ # label things as either TableData or
+ # Table header
+ TD = []
+ TH = []
+ all = []
+ for index in range(len(indexes)):
+ if indexes[index] is "TDdivider":
+ TD.append(index)
+ all.append(index)
+ if indexes[index] is "THdivider":
+ TH.append(index)
+ all.append(index)
+ TD = TD[1:]
+ dividers = all[1:]
+ #print "TD => ", TD
+ #print "TH => ", TH
+ #print "all => ", all, "\n"
+
+ for div in dividers:
+ if div in TD:
+ index = all.index(div)
+ for rowindex in range(all[index-1],all[index]):
+ for i in range(len(rows[rowindex])):
+ rows[rowindex][i] = (rows[rowindex][i][0],
+ rows[rowindex][i][1],
+ "td")
+ else:
+ index = all.index(div)
+ for rowindex in range(all[index-1],all[index]):
+ for i in range(len(rows[rowindex])):
+ rows[rowindex][i] = (rows[rowindex][i][0],
+ rows[rowindex][i][1],
+ "th")
+
+ # now munge the multi-line cells together
+ # as paragraphs
ROWS = []
COLS = []
for row in rows:
if not COLS:
COLS = range(len(row))
for i in range(len(COLS)):
- COLS[i] = ["",1]
+ COLS[i] = ["",1,""]
if TDdivider(row[index][0]) or THdivider(row[index][0]):
ROWS.append(COLS)
COLS = []
else:
- COLS[index][0] = COLS[index][0] + rstrip(row[index][0]) + "\n"
+ COLS[index][0] = COLS[index][0] + (row[index][0]) + "\n"
COLS[index][1] = row[index][1]
- return StructuredTextTable(ROWS,text,subs,indent=paragraph.indent)
+ COLS[index][2] = row[index][2]
+
+ # now that each cell has been munged together,
+ # determine the cell's alignment.
+ # Default is to center. Also determine the cell's
+ # vertical alignment, top, middle, bottom. Default is
+ # to middle
+ rows = []
+ cols = []
+ for row in ROWS:
+ for index in range(len(row)):
+ topindent = 0
+ bottomindent = 0
+ leftindent = 0
+ rightindent = 0
+ left = []
+ right = []
+ text = row[index][0]
+ text = split(text,'\n')
+ text = text[:len(text)-1]
+ align = ""
+ valign = ""
+ for t in text:
+ t = strip(t)
+ if not t:
+ topindent = topindent + 1
+ else:
+ break
+ text.reverse()
+ for t in text:
+ t = strip(t)
+ if not t:
+ bottomindent = bottomindent + 1
+ else:
+ break
+ text.reverse()
+ tmp = join(text[topindent:len(text)-bottomindent],"\n")
+ pars = re.compile("\n\s*\n").split(tmp)
+ for par in pars:
+ if index > 0:
+ par = par[1:]
+ par = split(par, ' ')
+ for p in par:
+ if not p:
+ leftindent = leftindent+1
+ else:
+ break
+ left.append(leftindent)
+ leftindent = 0
+ par.reverse()
+ for p in par:
+ if not p:
+ rightindent = rightindent + 1
+ else:
+ break
+ right.append(rightindent)
+ rightindent = 0
+ left.sort()
+ right.sort()
+
+ if topindent == bottomindent:
+ valign="middle"
+ elif topindent < 1:
+ valign="top"
+ elif bottomindent < 1:
+ valign="bottom"
+ else:
+ valign="middle"
+
+ if left[0] < 1:
+ align = "left"
+ elif right[0] < 1:
+ align = "right"
+ elif left[0] > 1 and right[0] > 1:
+ align="center"
+ else:
+ align="left"
+
+ cols.append(row[index][0],row[index][1],align,valign,row[index][2])
+ rows.append(cols)
+ cols = []
+ return StructuredTextTable(rows,text,subs,indent=paragraph.indent)
- def doc_bullet(self, paragraph, expr = re.compile('\s*[-*o]\s+').match):
+ def doc_bullet(self, paragraph, expr = re.compile(r'\s*[-*o]\s+').match):
top=paragraph.getColorizableTexts()[0]
m=expr(top)
def doc_numbered(
self, paragraph,
- expr = re.compile('(\s*[a-zA-Z]+\.)|(\s*[0-9]+\.)|(\s*[0-9]+\s+)').match):
+ expr = re.compile(r'(\s*[%s]+\.)|(\s*[0-9]+\.)|(\s*[0-9]+\s+)' % letters).match):
# This is the old expression. It had a nasty habit
# of grabbing paragraphs that began with a single
def doc_description(
self, paragraph,
- delim = re.compile('\s+--\s+').search,
- nb=re.compile(r'[^\0- ]').search,
+ delim = re.compile(r'\s+--\s+').search,
+ nb=re.compile(r'[^\000- ]').search,
):
top=paragraph.getColorizableTexts()[0]
delim=d)
def doc_header(self, paragraph,
- expr = re.compile('[ a-zA-Z0-9.:/,-_*<>\?\'\"]+').match
+ expr = re.compile(r'[ %s0-9.:/,-_*<>\?\'\"]+' % letters).match
):
subs=paragraph.getSubparagraphs()
if not subs: return None
def doc_literal(
self, s,
expr=re.compile(
- "(?:\s|^)'" # open
- "([^ \t\n\r\f\v']|[^ \t\n\r\f\v'][^\n']*[^ \t\n\r\f\v'])" # contents
- "'(?:\s|[,.;:!?]|$)" # close
+ r"(?:\s|^)'" # open
+ r"([^ \t\n\r\f\v']|[^ \t\n\r\f\v'][^\n']*[^ \t\n\r\f\v'])" # contents
+ r"'(?:\s|[,.;:!?]|$)" # close
).search):
r=expr(s)
def doc_emphasize(
self, s,
- expr = re.compile('\s*\*([ \na-zA-Z0-9.:/;,\'\"\?]+)\*(?!\*|-)').search
+ expr = re.compile(r'\s*\*([ \n%s0-9]+)\*(?!\*|-)' % lettpunc).search
):
r=expr(s)
def doc_inner_link(self,
s,
- expr1 = re.compile("\.\.\s*").search,
- expr2 = re.compile("\[[a-zA-Z0-9]+\]").search):
+ expr1 = re.compile(r"\.\.\s*").search,
+ expr2 = re.compile(r"\[[%s0-9]+\]" % letters ).search):
# make sure we dont grab a named link
if expr2(s) and expr1(s):
def doc_named_link(self,
s,
- expr=re.compile("(\.\.\s)(\[[a-zA-Z0-9]+\])").search):
+ expr=re.compile(r"(\.\.\s)(\[[%s0-9]+\])" % letters).search):
result = expr(s)
if result:
def doc_underline(self,
s,
- expr=re.compile("\_([a-zA-Z0-9\s\.,\?]+)\_").search):
+ expr=re.compile(r"\s+\_([%s0-9\s]+)\_" % lettpunc).search):
result = expr(s)
if result:
def doc_strong(self,
s,
- expr = re.compile('\s*\*\*([ \na-zA-Z0-9.:/;\-,!\?\'\"]+)\*\*').search
+ expr = re.compile(r'\s*\*\*([ \n%s0-9]+)\*\*' % lettpunc).search
):
r=expr(s)
return (StructuredTextStrong(s[start:end]), start-2, end+2)
else:
return None
+
+ ## Some constants to make the doc_href() regex easier to read.
+ _DQUOTEDTEXT = r'("[%s0-9\n\-\.\,\;\(\)\/\:\/\*\']+")' % letters ## double quoted text
+ _URL_AND_PUNC = r'([%s0-9\@\.\,\?\!\/\:\;\-\#\~]+)' % letters
+ _SPACES = r'(\s*)'
- def doc_href(
-
- self, s,
- expr1 = re.compile("(\"[ a-zA-Z0-9\n\-\.\,\;\(\)\/\:\/]+\")(:)([a-zA-Z0-9\:\/\.\~\-]+)([,]*\s*)").search,
- expr2 = re.compile('(\"[ a-zA-Z0-9\n\-\.\:\;\(\)\/]+\")([,]+\s+)([a-zA-Z0-9\@\.\,\?\!\/\:\;\-\#]+)(\s*)').search):
+ def doc_href(self, s,
+ expr1 = re.compile(_DQUOTEDTEXT + "(:)" + _URL_AND_PUNC + _SPACES).search,
+ expr2 = re.compile(_DQUOTEDTEXT + r'(\,\s+)' + _URL_AND_PUNC + _SPACES).search):
- punctuation = re.compile("[\,\.\?\!\;]+").match
+ punctuation = re.compile(r"[\,\.\?\!\;]+").match
r=expr1(s) or expr2(s)
if r:
else:
return None
- def doc_sgml(self,s,expr=re.compile("\<[a-zA-Z0-9\.\=\'\"\:\/\-\#\+\s]+\>").search):
+ def doc_sgml(self,s,expr=re.compile(r"\<[%s0-9\.\=\'\"\:\/\-\#\+\s\*]+\>" % letters).search):
"""
SGML text is ignored and outputed as-is
"""
start,end = r.span()
text = s[start:end]
return (StructuredTextSGML(text),start,end)
+
+
+ def doc_xref(self, s,
+ expr = re.compile('\[([%s0-9\-.:/;,\n\~]+)\]' % letters).search
+ ):
+ r = expr(s)
+ if r:
+ start, end = r.span(1)
+ return (StructuredTextXref(s[start:end]), start-1, end+1)
+ else:
+ return None
+
+
+
+
##############################################################################
from string import join, split, find
+from cgi import escape
import re, sys, ST
class HTMLClass:
def bullet(self, doc, level, output):
p=doc.getPreviousSibling()
if p is None or p.getNodeName() is not doc.getNodeName():
- output('<ul>\n')
+ output('\n<ul>\n')
output('<li>')
for c in doc.getChildNodes():
getattr(self, self.element_types[c.getNodeName()])(c, level, output)
n=doc.getNextSibling()
output('</li>\n')
if n is None or n.getNodeName() is not doc.getNodeName():
- output('</ul>\n')
+ output('\n</ul>\n')
def numbered(self, doc, level, output):
p=doc.getPreviousSibling()
if p is None or p.getNodeName() is not doc.getNodeName():
- output('<ol>\n')
+ output('\n<ol>\n')
output('<li>')
for c in doc.getChildNodes():
getattr(self, self.element_types[c.getNodeName()])(c, level, output)
n=doc.getNextSibling()
output('</li>\n')
if n is None or n.getNodeName() is not doc.getNodeName():
- output('</ol>\n')
+ output('\n</ol>\n')
def example(self, doc, level, output):
i=0
for c in doc.getChildNodes():
if i==0:
- output('<pre>')
- output(html_quote(c.getNodeValue()))
- output('</pre>\n')
+ output('\n<pre>\n')
+ output(escape(c.getNodeValue()))
+ output('\n</pre>\n')
else:
getattr(self, self.element_types[c.getNodeName()])(
c, level, output)
else:
getattr(self, self.element_types[c.getNodeName()])(
c, level, output)
- output('</p>')
+ output('</p>\n')
def link(self, doc, level, output):
output('<a href="%s">' % doc.href)
def literal(self, doc, level, output):
output('<code>')
for c in doc.getChildNodes():
- output(html_quote(c.getNodeValue()))
+ output(escape(c.getNodeValue()))
output('</code>')
def strong(self, doc, level, output):
def sgml(self,doc,level,output):
for c in doc.getChildNodes():
getattr(self, self.element_types[c.getNodeName()])(c, level, output)
+
+ def xref(self, doc, level, output):
+ val = doc.getNodeValue()
+ output('<a href="#%s">[%s]</a>' % (val, val) )
def table(self,doc,level,output):
"""
for row in doc.getRows()[0]:
output("<tr>\n")
for column in row.getColumns()[0]:
- str = "<td colspan=%s>" % column.getSpan()
+ if hasattr(column,"getAlign"):
+ str = "<%s colspan=%s align=%s valign=%s>" % (column.getType(),
+ column.getSpan(),
+ column.getAlign(),
+ column.getValign())
+ else:
+ str = "<td colspan=%s>" % column.getSpan()
output(str)
- #for c in doc.getChildNodes():
- # getattr(self, self.element_types[c.getNodeName()])(c, level, output)
for c in column.getChildNodes():
getattr(self, self.element_types[c.getNodeName()])(c, level, output)
- output("</td>\n")
+ if hasattr(column,"getType"):
+ output("</"+column.getType()+">\n")
+ else:
+ output("</td>\n")
output("</tr>\n")
output("</table>\n")
-def html_quote(v, name='(Unknown name)', md={},
- character_entities=(
- (('&'), '&'),
- (('<'), '<' ),
- (('>'), '>' ),
- (('\213'), '<' ),
- (('\233'), '>' ),
- (('"'), '"'))): #"
- text=str(v)
- for re,name in character_entities:
- if find(text, re) >= 0: text=join(split(text,re),name)
- return text
-
output('</body>\n')
output('</html>\n')
-
- def image(self, doc, level, output):
- output('<img src="%s" alt="%s">' % (doc.href, doc.getNodeValue()))
-
-
def image(self, doc, level, output):
if hasattr(doc, 'key'):
- output('<a name="%s"></a>\n<img src="%s" alt="%s">' % (doc.key, doc.href, doc.getNodeValue()))
- else:
- output('<img src="%s" alt="%s">' % (doc.href, doc.getNodeValue()))
-
+ output('<a name="%s"></a>\n' % doc.key)
+ output('<img src="%s" alt="%s">\n' % (doc.href, doc.getNodeValue()))
+ if doc.getNodeValue() and hasattr(doc, 'key'):
+ output('<p><b>Figure %s</b> %s</p>\n' % (doc.key, doc.getNodeValue()))
def xref(self, doc, level, output):
val = doc.getNodeValue()
- output('<a href="#%s">%s</a>' % (val, val) )
+ output('<a href="#%s">Figure %s</a>' % (val, val) )
+++ /dev/null
-##############################################################################
-#
-# Zope Public License (ZPL) Version 1.0
-# -------------------------------------
-#
-# Copyright (c) Digital Creations. All rights reserved.
-#
-# This license has been certified as Open Source(tm).
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-# 1. Redistributions in source code must retain the above copyright
-# notice, this list of conditions, and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions, and the following disclaimer in
-# the documentation and/or other materials provided with the
-# distribution.
-#
-# 3. Digital Creations requests that attribution be given to Zope
-# in any manner possible. Zope includes a "Powered by Zope"
-# button that is installed by default. While it is not a license
-# violation to remove this button, it is requested that the
-# attribution remain. A significant investment has been put
-# into Zope, and this effort will continue if the Zope community
-# continues to grow. This is one way to assure that growth.
-#
-# 4. All advertising materials and documentation mentioning
-# features derived from or use of this software must display
-# the following acknowledgement:
-#
-# "This product includes software developed by Digital Creations
-# for use in the Z Object Publishing Environment
-# (http://www.zope.org/)."
-#
-# In the event that the product being advertised includes an
-# intact Zope distribution (with copyright and license included)
-# then this clause is waived.
-#
-# 5. Names associated with Zope or Digital Creations must not be used to
-# endorse or promote products derived from this software without
-# prior written permission from Digital Creations.
-#
-# 6. Modified redistributions of any form whatsoever must retain
-# the following acknowledgment:
-#
-# "This product includes software developed by Digital Creations
-# for use in the Z Object Publishing Environment
-# (http://www.zope.org/)."
-#
-# Intact (re-)distributions of any official Zope release do not
-# require an external acknowledgement.
-#
-# 7. Modifications are encouraged but must be packaged separately as
-# patches to official Zope releases. Distributions that do not
-# clearly separate the patches from the original work must be clearly
-# labeled as unofficial distributions. Modifications which do not
-# carry the name Zope may be packaged in any form, as long as they
-# conform to all of the clauses above.
-#
-#
-# Disclaimer
-#
-# THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
-# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-# SUCH DAMAGE.
-#
-#
-# This software consists of contributions made by Digital Creations and
-# many individuals on behalf of Digital Creations. Specific
-# attributions are listed in the accompanying credits file.
-#
-##############################################################################
-'''
-$Id$'''
-
-from StructuredText import * # :-)
-
-def ctag(s):
- # Blech, wish we could use character tags
- if s is None: s=''
- s=gsub(strong,'\\1<bold>\\2<plain>\\3',s)
- s=gsub(code, '\\1<family Courier>\\2<family Times>\\3',s)
- s=gsub(em, '\\1<italic>\\2<plain>\\3',s)
- return join(map(strip,split(s,'\n')),'\n')
-
-class MML(StructuredText):
-
- '''\
- An MML structured text formatter.
- '''\
-
- def __str__(self,
- ):
- '''\
- Return an HTML string representation of the structured text data.
-
- '''
- s=self._str(self.structure,self.level)
- return s
-
- def ul(self, before, p, after):
- return ("%s\n\n<Bulleted>\n%s%s"
- % (before, ctag(p), after))
-
- def ol(self, before, p, after):
- return ("%s\n\n<Numbered>\n%s%s"
- % (before, ctag(p), after))
-
- def dl(self, before, t, d, after):
- return ("%s\n\n<Term>\n%s\n\n<Definition>\n%s%s"
- % (before,ctag(t),ctag(d),after))
-
- def head(self, before, t, level, d):
- return ("%s\n\n<Heading%d>\n%s%s"
- % (before,level,ctag(t),d))
-
- def normal(self,before,p,after):
- return "%s\n\n<Body>\n%s%s" % (before, ctag(p), after)
-
- def pre(self,structure,r=None):
- if r is None: r=['']
- for s in structure:
- for line in split(s[0],'\n'):
- r.append('\n<PRE>')
- r.append(line)
- self.pre(s[1],r)
- return join(r,'\n')
-
- def _str(self,structure,level):
- r=''
- for s in structure:
- # print s[0],'\n', len(s[1]), '\n\n'
- if bullet.match(s[0]) >= 0:
- p=bullet.group(1)
- r=self.ul(r,p,self._str(s[1],level))
- elif ol.match(s[0]) >= 0:
- p=ol.group(3)
- r=self.ol(r,p,self._str(s[1],level))
- elif olp.match(s[0]) >= 0:
- p=olp.group(1)
- r=self.ol(r,p,self._str(s[1],level))
- elif dl.match(s[0]) >= 0:
- t,d=dl.group(1,2)
- r=self.dl(r,t,d,self._str(s[1],level))
- elif example.search(s[0]) >= 0 and s[1]:
- # Introduce an example, using pre tags:
- r=self.normal(r,s[0],self.pre(s[1]))
- elif s[0][-2:]=='::' and s[1]:
- # Introduce an example, using pre tags:
- r=self.normal(r,s[0][:-1],self.pre(s[1]))
- elif nl.search(s[0]) < 0 and s[1] and s[0][-1:] != ':':
- # Treat as a heading
- t=s[0]
- r=self.head(r,t,level,
- self._str(s[1],level and level+1))
- else:
- r=self.normal(r,s[0],self._str(s[1],level))
- return r
"""
#print "struct", struct, top-1
if not top-1 in range(len(struct)):
- return None
+ if struct:
+ return struct[len(struct)-1].getSubparagraphs()
+ return struct
run = struct[top-1]
i = 0
while i+1 < level:
if result > 0:
currentlevel = result
currentindent = indent
- run = insert(struct,level,currentlevel)
- run.append(StructuredTextParagraph(paragraph, indent=indent, level=currentlevel))
+ if not level:
+ struct.append(StructuredTextParagraph(paragraph, indent=indent, level=currentlevel))
+ else:
+ run = insert(struct,level,currentlevel)
+ run.append(StructuredTextParagraph(paragraph, indent=indent, level=currentlevel))
else:
if insert(struct,level,currentlevel):
run = insert(struct,level,currentlevel)
if not children:
return None
- n=chidren[0]
+ n=children[0]
if type(n) is st:
n=TextNode(n)
return self.getNodeType()
def _get_NodeValue(self, type=type, st=type('')):
- return self.GetNodeValue(type,st)
+ return self.getNodeValue(type,st)
def _get_ParentNode(self):
return self.getParentNode()
st=StructuredText.Basic(raw)
The output of 'StructuredText.Basic' is simply a
- StructuredTextDocumemt object containing StructuredTextParagraph
+ StructuredTextDocument object containing StructuredTextParagraph
objects arranged in a hierarchy. Paragraphs are delimited by strings
of two or more whitespace characters beginning and ending with
newline characters. Hierarchy is indicated by indentation. The
--- /dev/null
+import string
+
+try:
+ del string
+ import locale
+ locale.setlocale(locale.LC_ALL,"")
+except:
+ pass
+
+import string
+
+letters = string.letters
+punctuations = string.punctuation
+
+lettpunc = letters + punctuations
-#! /usr/bin/env python -- # -*- python -*-
##############################################################################
#
# Zope Public License (ZPL) Version 1.0
# attributions are listed in the accompanying credits file.
#
##############################################################################
-'''Structured Text Manipulation
-Parse a structured text string into a form that can be used with
-structured formats, like html.
+""" Alias module for StructuredTextClassic compatibility which makes
+use of StructuredTextNG """
-Structured text is text that uses indentation and simple
-symbology to indicate the structure of a document.
-A structured string consists of a sequence of paragraphs separated by
-one or more blank lines. Each paragraph has a level which is defined
-as the minimum indentation of the paragraph. A paragraph is a
-sub-paragraph of another paragraph if the other paragraph is the last
-preceding paragraph that has a lower level.
+import HTMLClass, DocumentClass, ClassicDocumentClass
+from ST import Basic
-Special symbology is used to indicate special constructs:
+import re, string,sys
+from STletters import letters
-- A single-line paragraph whose immediately succeeding paragraphs are lower
- level is treated as a header.
+Document = ClassicDocumentClass.DocumentClass()
+HTMLNG = HTMLClass.HTMLClass()
-- A paragraph that begins with a '-', '*', or 'o' is treated as an
- unordered list (bullet) element.
+def HTML(aStructuredString, level=0):
+ st = Basic(aStructuredString)
+ doc = Document(st)
+ return HTMLNG(doc)
-- A paragraph that begins with a sequence of digits followed by a
- white-space character is treated as an ordered list element.
+def StructuredText(aStructuredString, level=0):
+ return HTML(aStructuredString,level)
-- A paragraph that begins with a sequence of sequences, where each
- sequence is a sequence of digits or a sequence of letters followed
- by a period, is treated as an ordered list element.
-
-- A paragraph with a first line that contains some text, followed by
- some white-space and '--' is treated as
- a descriptive list element. The leading text is treated as the
- element title.
-
-- Sub-paragraphs of a paragraph that ends in the word 'example' or the
- word 'examples', or '::' is treated as example code and is output as is.
-
-- Text enclosed single quotes (with white-space to the left of the
- first quote and whitespace or puctuation to the right of the second quote)
- is treated as example code.
-
-- Text surrounded by '*' characters (with white-space to the left of the
- first '*' and whitespace or puctuation to the right of the second '*')
- is emphasized.
-
-- Text surrounded by '**' characters (with white-space to the left of the
- first '**' and whitespace or puctuation to the right of the second '**')
- is made strong.
-
-- Text surrounded by '_' underscore characters (with whitespace to the left
- and whitespace or punctuation to the right) is made underlined.
-
-- Text encloded by double quotes followed by a colon, a URL, and concluded
- by punctuation plus white space, *or* just white space, is treated as a
- hyper link. For example:
-
- "Zope":http://www.zope.org/ is ...
-
- Is interpreted as '<a href="http://www.zope.org/">Zope</a> is ....'
- Note: This works for relative as well as absolute URLs.
-
-- Text enclosed by double quotes followed by a comma, one or more spaces,
- an absolute URL and concluded by punctuation plus white space, or just
- white space, is treated as a hyper link. For example:
-
- "mail me", mailto:amos@digicool.com.
-
- Is interpreted as '<a href="mailto:amos@digicool.com">mail me</a>.'
-
-- Text enclosed in brackets which consists only of letters, digits,
- underscores and dashes is treated as hyper links within the document.
- For example:
-
- As demonstrated by Smith [12] this technique is quite effective.
-
- Is interpreted as '... by Smith <a href="#12">[12]</a> this ...'. Together
- with the next rule this allows easy coding of references or end notes.
-
-- Text enclosed in brackets which is preceded by the start of a line, two
- periods and a space is treated as a named link. For example:
-
- .. [12] "Effective Techniques" Smith, Joe ...
-
- Is interpreted as '<a name="12">[12]</a> "Effective Techniques" ...'.
- Together with the previous rule this allows easy coding of references or
- end notes.
-
-
-- A paragraph that has blocks of text enclosed in '||' is treated as a
- table. The text blocks correspond to table cells and table rows are
- denoted by newlines. By default the cells are center aligned. A cell
- can span more than one column by preceding a block of text with an
- equivalent number of cell separators '||'. Newlines and '|' cannot
- be a part of the cell text. For example:
-
- |||| **Ingredients** ||
- || *Name* || *Amount* ||
- ||Spam||10||
- ||Eggs||3||
-
- is interpreted as::
-
- <TABLE BORDER=1 CELLPADDING=2>
- <TR>
- <TD ALIGN=CENTER COLSPAN=2> <strong>Ingredients</strong> </TD>
- </TR>
- <TR>
- <TD ALIGN=CENTER COLSPAN=1> <em>Name</em> </TD>
- <TD ALIGN=CENTER COLSPAN=1> <em>Amount</em> </TD>
- </TR>
- <TR>
- <TD ALIGN=CENTER COLSPAN=1>Spam</TD>
- <TD ALIGN=CENTER COLSPAN=1>10</TD>
- </TR>
- <TR>
- <TD ALIGN=CENTER COLSPAN=1>Eggs</TD>
- <TD ALIGN=CENTER COLSPAN=1>3</TD>
- </TR>
- </TABLE>
-
-
-$Id$'''
-# Copyright
-#
-# Copyright 1996 Digital Creations, L.C., 910 Princess Anne
-# Street, Suite 300, Fredericksburg, Virginia 22401 U.S.A. All
-# rights reserved. Copyright in this software is owned by DCLC,
-# unless otherwise indicated. Permission to use, copy and
-# distribute this software is hereby granted, provided that the
-# above copyright notice appear in all copies and that both that
-# copyright notice and this permission notice appear. Note that
-# any product, process or technology described in this software
-# may be the subject of other Intellectual Property rights
-# reserved by Digital Creations, L.C. and are not licensed
-# hereunder.
-#
-# Trademarks
-#
-# Digital Creations & DCLC, are trademarks of Digital Creations, L.C..
-# All other trademarks are owned by their respective companies.
-#
-# No Warranty
-#
-# The software is provided "as is" without warranty of any kind,
-# either express or implied, including, but not limited to, the
-# implied warranties of merchantability, fitness for a particular
-# purpose, or non-infringement. This software could include
-# technical inaccuracies or typographical errors. Changes are
-# periodically made to the software; these changes will be
-# incorporated in new editions of the software. DCLC may make
-# improvements and/or changes in this software at any time
-# without notice.
-#
-# Limitation Of Liability
-#
-# In no event will DCLC be liable for direct, indirect, special,
-# incidental, economic, cover, or consequential damages arising
-# out of the use of or inability to use this software even if
-# advised of the possibility of such damages. Some states do not
-# allow the exclusion or limitation of implied warranties or
-# limitation of liability for incidental or consequential
-# damages, so the above limitation or exclusion may not apply to
-# you.
-#
-#
-# If you have questions regarding this software,
-# contact:
-#
-# Jim Fulton, jim@digicool.com
-#
-# (540) 371-6909
-#
-# $Log$
-# Revision 1.1 2001/03/10 05:07:20 RD
-# Added some simple sample apps
-#
-# Revision 1.27 2000/04/21 13:38:10 jim
-# Added closing list tags. Woo hoo!
-#
-# Revision 1.26 2000/03/14 17:22:04 brian
-# Allow ~ in hrefs.
-#
-# Revision 1.25 2000/02/17 00:53:24 klm
-# HTML._str(): We were getting preformatted examples rendered twice,
-# second time without preformatting. Problem was a missing 'continue'
-# in one of the cases.
-#
-# Revision 1.24 1999/12/13 16:32:48 klm
-# Incorporated pavlos christoforou's mods to handle simple tables. From
-# his web page at http://www.zope.org/Members/gaaros/StructuredText:
-#
-# Structured Text module with table support
-#
-# A paragraph that has blocks of text enclosed in '||' is treated as a
-# table. The text blocks correspond to table cells and table rows are
-# denoted by newlines. By default the cells are center aligned. You can
-# change the defaults by modifying the CELL,ROW and TABLE class
-# attributes in class Table. A cell can span more than one column by
-# preceding a block of text with an equivalent number of cell separators
-# '||'. Newlines and '|' cannot be a part of the cell text. If you need
-# newlines use <BR>. For example:
-#
-# |||| **Ingredients** ||
-# || *Name* || *Amount* ||
-# ||Spam||10||
-# ||Eggs||3||
-#
-# Revision 1.23 1999/08/03 20:49:05 jim
-# Fixed to allow list elements to introduce examples.
-#
-# Restructured _str using continue to avoid excessive nesting.
-#
-# Revision 1.22 1999/08/02 22:01:28 jim
-# Fixed a bunch of bugs introduced by making ts_regex actually thread
-# safe.
-#
-# Also localized a bunch of regular expressions
-# using "static" variables (aka always default arguments).
-#
-# Revision 1.21 1999/08/02 13:26:52 jim
-# paragraph_divider needs to be a regular (thread-unsafe) regex
-# since it gets passed to ts_regex.split, which is thread-safe
-# and wants to use regs.
-#
-# Revision 1.20 1999/07/21 13:33:59 jim
-# untabified.
-#
-# Revision 1.19 1999/07/15 16:43:15 jim
-# Checked in Scott Robertson's thread-safety fixes.
-#
-# Revision 1.18 1999/03/24 00:03:18 klm
-# Provide for relative links, eg <a href="file_in_same_dir">whatever</a>,
-# as:
-#
-# "whatever", :file_in_same_dir
-#
-# or
-#
-# "whatever"::file_in_same_dir
-#
-# .__init__(): relax the second gsub, using a '*' instead of a '+', so
-# the stuff before the ':' can be missing, and also do postprocessing so
-# any resulting '<a href=":file_in_same_dir">'s have the superfluous ':'
-# removed. *Seems* good!
-#
-# Revision 1.17 1999/03/12 23:21:39 klm
-# Gratuituous checkin to test my cvs *update* logging hook.
-#
-# Revision 1.16 1999/03/12 17:12:12 klm
-# Added support for underlined elements, in the obvious way (and
-# included an entry in the module docstring for it).
-#
-# Added an entry in the module docstring describing what i *guess* is
-# the criterion for identifying header elements. (I'm going to have to
-# delve into and understand the framework a bit better before *knowing*
-# this is the case.)
-#
-# Revision 1.15 1999/03/11 22:40:18 klm
-# Handle links that include '#' named links.
-#
-# Revision 1.14 1999/03/11 01:35:19 klm
-# Fixed a small typo, and refined the module docstring link example, in
-# order to do a checkin to exercise the CVS repository mirroring. Might
-# as well include my last checkin message, with some substantial stuff:
-#
-# Links are now recognized whether or not the candidate strings are
-# terminated with punctuation before the trailing whitespace. The old
-# form - trailing punctuation then whitespace - is preserved, but the
-# punctuation is now unnecessary.
-#
-# The regular expressions are a bit more complicated, but i've factored
-# out the common parts and but them in variables with suggestive names,
-# which may make them easier to understand.
-#
-# Revision 1.13 1999/03/11 00:49:57 klm
-# Links are now recognized whether or not the candidate strings are
-# terminated with punctuation before the trailing whitespace. The old
-# form - trailing punctuation then whitespace - is preserved, but the
-# punctuation is now unnecessary.
-#
-# The regular expressions are a bit more complicated, but i've factored
-# out the common parts and but them in variables with suggestive names,
-# which may make them easier to understand.
-#
-# Revision 1.12 1999/03/10 00:15:46 klm
-# Committing with version 1.0 of the license.
-#
-# Revision 1.11 1999/02/08 18:13:12 klm
-# Trival checkin (spelling fix "preceedeing" -> "preceding" and similar)
-# to see what pitfalls my environment presents to accomplishing a
-# successful checkin. (It turns out that i can't do it from aldous because
-# the new version of cvs doesn't support the '-t' and '-f' options in the
-# cvswrappers file...)
-#
-# Revision 1.10 1998/12/29 22:30:43 amos
-# Improved doc string to describe hyper link and references capabilities.
-#
-# Revision 1.9 1998/12/04 20:15:31 jim
-# Detabification and new copyright.
-#
-# Revision 1.8 1998/02/27 18:45:22 jim
-# Various updates, including new indentation utilities.
-#
-# Revision 1.7 1997/12/12 15:39:54 jim
-# Added level as argument for html_with_references.
-#
-# Revision 1.6 1997/12/12 15:27:25 jim
-# Added additional pattern matching for HTML references.
-#
-# Revision 1.5 1997/03/08 16:01:03 jim
-# Moved code to recognize: "foo bar", url.
-# into object initializer, so it gets applied in all cases.
-#
-# Revision 1.4 1997/02/17 23:36:35 jim
-# Added support for "foo title", http:/foohost/foo
-#
-# Revision 1.3 1996/12/06 15:57:37 jim
-# Fixed bugs in character tags.
-#
-# Added -t command-line option to generate title if:
-#
-# - The first paragraph is one line (i.e. a heading) and
-#
-# - All other paragraphs are indented.
-#
-# Revision 1.2 1996/10/28 13:56:02 jim
-# Fixed bug in ordered lists.
-# Added option for either HTML-style headings or descriptive-list style
-# headings.
-#
-# Revision 1.1 1996/10/23 14:00:45 jim
-# *** empty log message ***
-#
-#
-#
-
-import ts_regex, regex
-from ts_regex import gsub
-from string import split, join, strip, find
-
-def untabify(aString,
- indent_tab=ts_regex.compile('\(\n\|^\)\( *\)\t').search_group,
- ):
- '''\
- Convert indentation tabs to spaces.
- '''
- result=''
- rest=aString
- while 1:
- ts_results = indent_tab(rest, (1,2))
- if ts_results:
- start, grps = ts_results
- lnl=len(grps[0])
- indent=len(grps[1])
- result=result+rest[:start]
- rest="\n%s%s" % (' ' * ((indent/8+1)*8),
- rest[start+indent+1+lnl:])
- else:
- return result+rest
-
-def indent(aString, indent=2):
- """Indent a string the given number of spaces"""
- r=split(untabify(aString),'\n')
- if not r: return ''
- if not r[-1]: del r[-1]
- tab=' '*level
- return "%s%s\n" % (tab,join(r,'\n'+tab))
-
-def reindent(aString, indent=2, already_untabified=0):
- "reindent a block of text, so that the minimum indent is as given"
-
- if not already_untabified: aString=untabify(aString)
-
- l=indent_level(aString)[0]
- if indent==l: return aString
-
- r=[]
-
- append=r.append
-
- if indent > l:
- tab=' ' * (indent-l)
- for s in split(aString,'\n'): append(tab+s)
- else:
- l=l-indent
- for s in split(aString,'\n'): append(s[l:])
-
- return join(r,'\n')
-
-def indent_level(aString,
- indent_space=ts_regex.compile('\n\( *\)').search_group,
- ):
- '''\
- Find the minimum indentation for a string, not counting blank lines.
- '''
- start=0
- text='\n'+aString
- indent=l=len(text)
- while 1:
-
- ts_results = indent_space(text, (1,2), start)
- if ts_results:
- start, grps = ts_results
- i=len(grps[0])
- start=start+i+1
- if start < l and text[start] != '\n': # Skip blank lines
- if not i: return (0,aString)
- if i < indent: indent = i
- else:
- return (indent,aString)
-
-def paragraphs(list,start):
- l=len(list)
- level=list[start][0]
- i=start+1
- while i < l and list[i][0] > level: i=i+1
- return i-1-start
-
-def structure(list):
- if not list: return []
- i=0
- l=len(list)
- r=[]
- while i < l:
- sublen=paragraphs(list,i)
- i=i+1
- r.append((list[i-1][1],structure(list[i:i+sublen])))
- i=i+sublen
- return r
-
-
-class Table:
- CELL=' <TD ALIGN=CENTER COLSPAN=%i>%s</TD>\n'
- ROW=' <TR>\n%s </TR>\n'
- TABLE='\n<TABLE BORDER=1 CELLPADDING=2>\n%s</TABLE>'
-
- def create(self,aPar,td=ts_regex.compile(
- '[ \t\n]*||\([^\0|]*\)').match_group):
- '''parses a table and returns nested list representing the
- table'''
- self.table=[]
- text=filter(None,split(aPar,'\n'))
- for line in text:
- row=[]
- while 1:
- pos=td(line,(1,))
- if not pos:return 0
- row.append(pos[1])
- if pos[0]==len(line):break
- line=line[pos[0]:]
- self.table.append(row)
- return 1
-
- def html(self):
- '''Creates an HTML representation of table'''
- htmltable=[]
- for row in self.table:
- htmlrow=[]
- colspan=1
- for cell in row:
- if cell=='':
- colspan=colspan+1
- continue
- else:
- htmlrow.append(self.CELL%(colspan,cell))
- colspan=1
- htmltable.append(self.ROW%join(htmlrow,''))
- return self.TABLE%join(htmltable,'')
-
-optional_trailing_punctuation = '\(,\|\([.:?;]\)\)?'
-trailing_space = '\([\0- ]\)'
-not_punctuation_or_whitespace = "[^-,.?:\0- ]"
-table=Table()
-
-class StructuredText:
-
- """Model text as structured collection of paragraphs.
-
- Structure is implied by the indentation level.
-
- This class is intended as a base classes that do actual text
- output formatting.
- """
-
- def __init__(self, aStructuredString, level=0,
- paragraph_divider=regex.compile('\(\n *\)+\n'),
- ):
- '''Convert a structured text string into a structured text object.
-
- Aguments:
-
- aStructuredString -- The string to be parsed.
- level -- The level of top level headings to be created.
- '''
-
- aStructuredString = gsub(
- '\"\([^\"\0]+\)\":' # title: <"text":>
- + ('\([-:a-zA-Z0-9_,./?=@#~]+%s\)'
- % not_punctuation_or_whitespace)
- + optional_trailing_punctuation
- + trailing_space,
- '<a href="\\2">\\1</a>\\4\\5\\6',
- aStructuredString)
-
- aStructuredString = gsub(
- '\"\([^\"\0]+\)\",[\0- ]+' # title: <"text", >
- + ('\([a-zA-Z]*:[-:a-zA-Z0-9_,./?=@#~]*%s\)'
- % not_punctuation_or_whitespace)
- + optional_trailing_punctuation
- + trailing_space,
- '<a href="\\2">\\1</a>\\4\\5\\6',
- aStructuredString)
-
- protoless = find(aStructuredString, '<a href=":')
- if protoless != -1:
- aStructuredString = gsub('<a href=":', '<a href="',
- aStructuredString)
-
- self.level=level
- paragraphs=ts_regex.split(untabify(aStructuredString),
- paragraph_divider)
- paragraphs=map(indent_level,paragraphs)
-
- self.structure=structure(paragraphs)
-
-
- def __str__(self):
- return str(self.structure)
-
-
-ctag_prefix="\([\0- (]\|^\)"
-ctag_suffix="\([\0- ,.:;!?)]\|$\)"
-ctag_middle="[%s]\([^\0- %s][^%s]*[^\0- %s]\|[^%s]\)[%s]"
-ctag_middl2="[%s][%s]\([^\0- %s][^%s]*[^\0- %s]\|[^%s]\)[%s][%s]"
-
-def ctag(s,
- em=regex.compile(
- ctag_prefix+(ctag_middle % (("*",)*6) )+ctag_suffix),
- strong=regex.compile(
- ctag_prefix+(ctag_middl2 % (("*",)*8))+ctag_suffix),
- under=regex.compile(
- ctag_prefix+(ctag_middle % (("_",)*6) )+ctag_suffix),
- code=regex.compile(
- ctag_prefix+(ctag_middle % (("\'",)*6))+ctag_suffix),
- ):
- if s is None: s=''
- s=gsub(strong,'\\1<strong>\\2</strong>\\3',s)
- s=gsub(under, '\\1<u>\\2</u>\\3',s)
- s=gsub(code, '\\1<code>\\2</code>\\3',s)
- s=gsub(em, '\\1<em>\\2</em>\\3',s)
- return s
-
-class HTML(StructuredText):
-
- '''\
- An HTML structured text formatter.
- '''\
-
- def __str__(self,
- extra_dl=regex.compile("</dl>\n<dl>"),
- extra_ul=regex.compile("</ul>\n<ul>"),
- extra_ol=regex.compile("</ol>\n<ol>"),
- ):
- '''\
- Return an HTML string representation of the structured text data.
-
- '''
- s=self._str(self.structure,self.level)
- s=gsub(extra_dl,'\n',s)
- s=gsub(extra_ul,'\n',s)
- s=gsub(extra_ol,'\n',s)
- return s
-
- def ul(self, before, p, after):
- if p: p="<p>%s</p>" % strip(ctag(p))
- return ('%s<ul><li>%s\n%s\n</li></ul>\n'
- % (before,p,after))
-
- def ol(self, before, p, after):
- if p: p="<p>%s</p>" % strip(ctag(p))
- return ('%s<ol><li>%s\n%s\n</li></ol>\n'
- % (before,p,after))
-
- def dl(self, before, t, d, after):
- return ('%s<dl><dt>%s</dt><dd><p>%s</p>\n%s\n</dd></dl>\n'
- % (before,ctag(t),ctag(d),after))
-
- def head(self, before, t, level, d):
- if level > 0 and level < 6:
- return ('%s<h%d>%s</h%d>\n%s\n'
- % (before,level,strip(ctag(t)),level,d))
-
- t="<p><strong>%s</strong><p>" % strip(ctag(t))
- return ('%s<dl><dt>%s\n</dt><dd>%s\n</dd></dl>\n'
- % (before,t,d))
-
- def normal(self,before,p,after):
- return '%s<p>%s</p>\n%s\n' % (before,ctag(p),after)
+def html_with_references(text, level=1):
+ text = re.sub(
+ r'[\000\n]\.\. \[([0-9_%s-]+)\]' % letters,
+ r'\n <a name="\1">[\1]</a>',
+ text)
- def pre(self,structure,tagged=0):
- if not structure: return ''
- if tagged:
- r=''
- else:
- r='<PRE>\n'
- for s in structure:
- r="%s%s\n\n%s" % (r,html_quote(s[0]),self.pre(s[1],1))
- if not tagged: r=r+'</PRE>\n'
- return r
-
- def table(self,before,table,after):
- return '%s<p>%s</p>\n%s\n' % (before,ctag(table),after)
+ text = re.sub(
+ r'([\000- ,])\[(?P<ref>[0-9_%s-]+)\]([\000- ,.:])' % letters,
+ r'\1<a href="#\2">[\2]</a>\3',
+ text)
- def _str(self,structure,level,
- # Static
- bullet=ts_regex.compile('[ \t\n]*[o*-][ \t\n]+\([^\0]*\)'
- ).match_group,
- example=ts_regex.compile('[\0- ]examples?:[\0- ]*$'
- ).search,
- dl=ts_regex.compile('\([^\n]+\)[ \t]+--[ \t\n]+\([^\0]*\)'
- ).match_group,
- nl=ts_regex.compile('\n').search,
- ol=ts_regex.compile(
- '[ \t]*\(\([0-9]+\|[a-zA-Z]+\)[.)]\)+[ \t\n]+\([^\0]*\|$\)'
- ).match_group,
- olp=ts_regex.compile('[ \t]*([0-9]+)[ \t\n]+\([^\0]*\|$\)'
- ).match_group,
- ):
- r=''
- for s in structure:
-
- ts_results = bullet(s[0], (1,))
- if ts_results:
- p = ts_results[1]
- if s[0][-2:]=='::' and s[1]: ps=self.pre(s[1])
- else: ps=self._str(s[1],level)
- r=self.ul(r,p,ps)
- continue
- ts_results = ol(s[0], (3,))
- if ts_results:
- p = ts_results[1]
- if s[0][-2:]=='::' and s[1]: ps=self.pre(s[1])
- else: ps=self._str(s[1],level)
- r=self.ol(r,p,ps)
- continue
- ts_results = olp(s[0], (1,))
- if ts_results:
- p = ts_results[1]
- if s[0][-2:]=='::' and s[1]: ps=self.pre(s[1])
- else: ps=self._str(s[1],level)
- r=self.ol(r,p,ps)
- continue
- ts_results = dl(s[0], (1,2))
- if ts_results:
- t,d = ts_results[1]
- r=self.dl(r,t,d,self._str(s[1],level))
- continue
- if example(s[0]) >= 0 and s[1]:
- # Introduce an example, using pre tags:
- r=self.normal(r,s[0],self.pre(s[1]))
- continue
- if s[0][-2:]=='::' and s[1]:
- # Introduce an example, using pre tags:
- r=self.normal(r,s[0][:-1],self.pre(s[1]))
- continue
- if table.create(s[0]):
- ## table support.
- r=self.table(r,table.html(),self._str(s[1],level))
- continue
- else:
+ text = re.sub(
+ r'([\000- ,])\[([^]]+)\.html\]([\000- ,.:])',
+ r'\1<a href="\2.html">[\2]</a>\3',
+ text)
- if nl(s[0]) < 0 and s[1] and s[0][-1:] != ':':
- # Treat as a heading
- t=s[0]
- r=self.head(r,t,level,
- self._str(s[1],level and level+1))
- else:
- r=self.normal(r,s[0],self._str(s[1],level))
- return r
-
+ return HTML(text,level=level)
def html_quote(v,
character_entities=(
- (regex.compile('&'), '&'),
- (regex.compile("<"), '<' ),
- (regex.compile(">"), '>' ),
- (regex.compile('"'), '"')
+ (re.compile('&'), '&'),
+ (re.compile("<"), '<' ),
+ (re.compile(">"), '>' ),
+ (re.compile('"'), '"')
)): #"
text=str(v)
for re,name in character_entities:
- text=gsub(re,name,text)
+ text=re.sub(name,text)
return text
-def html_with_references(text, level=1):
- text = gsub(
- '[\0\n].. \[\([-_0-9_a-zA-Z-]+\)\]',
- '\n <a name="\\1">[\\1]</a>',
- text)
-
- text = gsub(
- '\([\0- ,]\)\[\([0-9_a-zA-Z-]+\)\]\([\0- ,.:]\)',
- '\\1<a href="#\\2">[\\2]</a>\\3',
- text)
-
- text = gsub(
- '\([\0- ,]\)\[\([^]]+\)\.html\]\([\0- ,.:]\)',
- '\\1<a href="\\2.html">[\\2]</a>\\3',
- text)
-
- return HTML(text,level=level)
-
-
-def main():
- import sys, getopt
-
- opts,args=getopt.getopt(sys.argv[1:],'tw')
-
- if args:
- [infile]=args
- s=open(infile,'r').read()
- else:
- s=sys.stdin.read()
- if opts:
+if __name__=='__main__':
+ import getopt
- if filter(lambda o: o[0]=='-w', opts):
- print 'Content-Type: text/html\n'
+ opts,args = getopt.getopt(sys.argv[1:],'',[])
- if s[:2]=='#!':
- s=ts_regex.sub('^#![^\n]+','',s)
+ for k,v in opts:
+ pass
- r=ts_regex.compile('\([\0-\n]*\n\)')
- ts_results = r.match_group(s, (1,))
- if ts_results:
- s=s[len(ts_results[1]):]
- s=str(html_with_references(s))
- if s[:4]=='<h1>':
- t=s[4:find(s,'</h1>')]
- s='''<html><head><title>%s</title>
- </head><body>
- %s
- </body></html>
- ''' % (t,s)
- print s
- else:
- print html_with_references(s)
-if __name__=="__main__": main()
+ for f in args:
+ print HTML(open(f).read())
DocumentWithImages=DocumentWithImages.DocumentWithImages()
HTMLWithImages=HTMLWithImages.HTMLWithImages()
-DocBookBook=DocBookClass.DocBookBook
+DocBookBook=DocBookClass.DocBookBook()
DocBookChapter=DocBookClass.DocBookChapter()
DocBookChapterWithFigures=DocBookClass.DocBookChapterWithFigures()
DocBookArticle=DocBookClass.DocBookArticle()
+++ /dev/null
-##############################################################################
-#
-# Zope Public License (ZPL) Version 1.0
-# -------------------------------------
-#
-# Copyright (c) Digital Creations. All rights reserved.
-#
-# This license has been certified as Open Source(tm).
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-# 1. Redistributions in source code must retain the above copyright
-# notice, this list of conditions, and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions, and the following disclaimer in
-# the documentation and/or other materials provided with the
-# distribution.
-#
-# 3. Digital Creations requests that attribution be given to Zope
-# in any manner possible. Zope includes a "Powered by Zope"
-# button that is installed by default. While it is not a license
-# violation to remove this button, it is requested that the
-# attribution remain. A significant investment has been put
-# into Zope, and this effort will continue if the Zope community
-# continues to grow. This is one way to assure that growth.
-#
-# 4. All advertising materials and documentation mentioning
-# features derived from or use of this software must display
-# the following acknowledgement:
-#
-# "This product includes software developed by Digital Creations
-# for use in the Z Object Publishing Environment
-# (http://www.zope.org/)."
-#
-# In the event that the product being advertised includes an
-# intact Zope distribution (with copyright and license included)
-# then this clause is waived.
-#
-# 5. Names associated with Zope or Digital Creations must not be used to
-# endorse or promote products derived from this software without
-# prior written permission from Digital Creations.
-#
-# 6. Modified redistributions of any form whatsoever must retain
-# the following acknowledgment:
-#
-# "This product includes software developed by Digital Creations
-# for use in the Z Object Publishing Environment
-# (http://www.zope.org/)."
-#
-# Intact (re-)distributions of any official Zope release do not
-# require an external acknowledgement.
-#
-# 7. Modifications are encouraged but must be packaged separately as
-# patches to official Zope releases. Distributions that do not
-# clearly separate the patches from the original work must be clearly
-# labeled as unofficial distributions. Modifications which do not
-# carry the name Zope may be packaged in any form, as long as they
-# conform to all of the clauses above.
-#
-#
-# Disclaimer
-#
-# THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
-# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-# SUCH DAMAGE.
-#
-#
-# This software consists of contributions made by Digital Creations and
-# many individuals on behalf of Digital Creations. Specific
-# attributions are listed in the accompanying credits file.
-#
-##############################################################################
-"""Provide a thread-safe interface to regex
-"""
-import regex, regsub #, Sync
-from regex import *
-from regsub import split, sub, gsub, splitx, capwords
-
-try:
- import thread
-except:
- class allocate_lock:
- def acquire(*args): pass
- def release(*args): pass
-
-else:
- class SafeFunction:
- _l=thread.allocate_lock()
- _a=_l.acquire
- _r=_l.release
-
- def __init__(self, f):
- self._f=f
-
- def __call__(self, *args, **kw):
- self._a()
- try: return apply(self._f, args, kw)
- finally: self._r()
-
- split=SafeFunction(split)
- sub=SafeFunction(sub)
- gsub=SafeFunction(gsub)
- splitx=SafeFunction(splitx)
- capwords=SafeFunction(capwords)
-
- allocate_lock=thread.allocate_lock
-
-class compile:
-
- _r=None
- groupindex=None
-
- def __init__(self, *args):
- self._r=r=apply(regex.compile,args)
- self._init(r)
-
- def _init(self, r):
- lock=allocate_lock()
- self.__a=lock.acquire
- self.__r=lock.release
- self.translate=r.translate
- self.givenpat=r.givenpat
- self.realpat=r.realpat
-
- def match(self, string, pos=0):
- self.__a()
- try: return self._r.match(string, pos)
- finally: self.__r()
-
- def search(self, string, pos=0):
- self.__a()
- try: return self._r.search(string, pos)
- finally: self.__r()
-
- def search_group(self, str, group, pos=0):
- """Search a string for a pattern.
-
- If the pattern was not found, then None is returned,
- otherwise, the location where the pattern was found,
- as well as any specified group are returned.
- """
- self.__a()
- try:
- r=self._r
- l=r.search(str, pos)
- if l < 0: return None
- return l, apply(r.group, group)
- finally: self.__r()
-
- def match_group(self, str, group, pos=0):
- """Match a pattern against a string
-
- If the string does not match the pattern, then None is
- returned, otherwise, the length of the match, as well
- as any specified group are returned.
- """
- self.__a()
- try:
- r=self._r
- l=r.match(str, pos)
- if l < 0: return None
- return l, apply(r.group, group)
- finally: self.__r()
-
- def search_regs(self, str, pos=0):
- """Search a string for a pattern.
-
- If the pattern was not found, then None is returned,
- otherwise, the 'regs' attribute of the expression is
- returned.
- """
- self.__a()
- try:
- r=self._r
- r.search(str, pos)
- return r.regs
- finally: self.__r()
-
- def match_regs(self, str, pos=0):
- """Match a pattern against a string
-
- If the string does not match the pattern, then None is
- returned, otherwise, the 'regs' attribute of the expression is
- returned.
- """
- self.__a()
- try:
- r=self._r
- r.match(str, pos)
- return r.regs
- finally: self.__r()
-
-class symcomp(compile):
-
- def __init__(self, *args):
- self._r=r=apply(regex.symcomp,args)
- self._init(r)
- self.groupindex=r.groupindex
-
-
-
-
-
def LoadStxText(self, text):
# Old ST
- html = str(StructuredText.html_with_references(text))
+ #html = str(StructuredText.html_with_references(text))
# NG Version
- #st = StructuredText.Basic(text)
- #doc = StructuredText.Document(st)
- #html = StructuredText.HTML(doc)
+ st = StructuredText.Basic(text)
+ doc = StructuredText.Document(st)
+ html = StructuredText.HTMLNG(doc)
self.htmlWin.SetPage(html)
self.editWin.SetValue(text)