[wxWidgets.git] / wxPython / samples / stxview / StructuredText / StructuredText.py

#! /usr/bin/env python -- # -*- python -*-
##############################################################################
# 
# Zope Public License (ZPL) Version 1.0
# -------------------------------------
# 
# Copyright (c) Digital Creations.  All rights reserved.
# 
# This license has been certified as Open Source(tm).
# 
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# 
# 1. Redistributions in source code must retain the above copyright
#    notice, this list of conditions, and the following disclaimer.
# 
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions, and the following disclaimer in
#    the documentation and/or other materials provided with the
#    distribution.
# 
# 3. Digital Creations requests that attribution be given to Zope
#    in any manner possible. Zope includes a "Powered by Zope"
#    button that is installed by default. While it is not a license
#    violation to remove this button, it is requested that the
#    attribution remain. A significant investment has been put
#    into Zope, and this effort will continue if the Zope community
#    continues to grow. This is one way to assure that growth.
# 
# 4. All advertising materials and documentation mentioning
#    features derived from or use of this software must display
#    the following acknowledgement:
# 
#      "This product includes software developed by Digital Creations
#      for use in the Z Object Publishing Environment
#      (http://www.zope.org/)."
# 
#    In the event that the product being advertised includes an
#    intact Zope distribution (with copyright and license included)
#    then this clause is waived.
# 
# 5. Names associated with Zope or Digital Creations must not be used to
#    endorse or promote products derived from this software without
#    prior written permission from Digital Creations.
# 
# 6. Modified redistributions of any form whatsoever must retain
#    the following acknowledgment:
# 
#      "This product includes software developed by Digital Creations
#      for use in the Z Object Publishing Environment
#      (http://www.zope.org/)."
# 
#    Intact (re-)distributions of any official Zope release do not
#    require an external acknowledgement.
# 
# 7. Modifications are encouraged but must be packaged separately as
#    patches to official Zope releases.  Distributions that do not
#    clearly separate the patches from the original work must be clearly
#    labeled as unofficial distributions.  Modifications which do not
#    carry the name Zope may be packaged in any form, as long as they
#    conform to all of the clauses above.
# 
# 
# Disclaimer
# 
#   THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
#   EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
#   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
#   PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
#   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
#   USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
#   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
#   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
#   OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
#   SUCH DAMAGE.
# 
# 
# This software consists of contributions made by Digital Creations and
# many individuals on behalf of Digital Creations.  Specific
# attributions are listed in the accompanying credits file.
# 
##############################################################################
'''Structured Text Manipulation

Parse a structured text string into a form that can be used with 
structured formats, like html.

Structured text is text that uses indentation and simple
symbology to indicate the structure of a document.  

A structured string consists of a sequence of paragraphs separated by
one or more blank lines.  Each paragraph has a level which is defined
as the minimum indentation of the paragraph.  A paragraph is a
sub-paragraph of another paragraph if the other paragraph is the last
preceding paragraph that has a lower level.

Special symbology is used to indicate special constructs:

- A single-line paragraph whose immediately succeeding paragraphs are lower
  level is treated as a header.

- A paragraph that begins with a '-', '*', or 'o' is treated as an
  unordered list (bullet) element.

- A paragraph that begins with a sequence of digits followed by a
  white-space character is treated as an ordered list element.

- A paragraph that begins with a sequence of sequences, where each
  sequence is a sequence of digits or a sequence of letters followed
  by a period, is treated as an ordered list element.

- A paragraph with a first line that contains some text, followed by
  some white-space and '--' is treated as
  a descriptive list element. The leading text is treated as the
  element title.

- Sub-paragraphs of a paragraph that ends in the word 'example' or the
  word 'examples', or '::' is treated as example code and is output as is.

- Text enclosed single quotes (with white-space to the left of the
  first quote and whitespace or puctuation to the right of the second quote)
  is treated as example code.

- Text surrounded by '*' characters (with white-space to the left of the
  first '*' and whitespace or puctuation to the right of the second '*')
  is emphasized.

- Text surrounded by '**' characters (with white-space to the left of the
  first '**' and whitespace or puctuation to the right of the second '**')
  is made strong.

- Text surrounded by '_' underscore characters (with whitespace to the left 
  and whitespace or punctuation to the right) is made underlined.

- Text encloded by double quotes followed by a colon, a URL, and concluded
  by punctuation plus white space, *or* just white space, is treated as a
  hyper link. For example:

    "Zope":http://www.zope.org/ is ...

  Is interpreted as '<a href="http://www.zope.org/">Zope</a> is ....'
  Note: This works for relative as well as absolute URLs.

- Text enclosed by double quotes followed by a comma, one or more spaces,
  an absolute URL and concluded by punctuation plus white space, or just
  white space, is treated as a hyper link. For example: 

    "mail me", mailto:amos@digicool.com.

  Is interpreted as '<a href="mailto:amos@digicool.com">mail me</a>.' 

- Text enclosed in brackets which consists only of letters, digits,
  underscores and dashes is treated as hyper links within the document.
  For example:
    
    As demonstrated by Smith [12] this technique is quite effective.

  Is interpreted as '... by Smith <a href="#12">[12]</a> this ...'. Together
  with the next rule this allows easy coding of references or end notes.

- Text enclosed in brackets which is preceded by the start of a line, two
  periods and a space is treated as a named link. For example:

    .. [12] "Effective Techniques" Smith, Joe ... 

  Is interpreted as '<a name="12">[12]</a> "Effective Techniques" ...'.
  Together with the previous rule this allows easy coding of references or
  end notes. 


- A paragraph that has blocks of text enclosed in '||' is treated as a
  table. The text blocks correspond to table cells and table rows are
  denoted by newlines. By default the cells are center aligned. A cell
  can span more than one column by preceding a block of text with an
  equivalent number of cell separators '||'. Newlines and '|' cannot
  be a part of the cell text. For example:

      |||| **Ingredients** ||
      || *Name* || *Amount* ||
      ||Spam||10||
      ||Eggs||3||

  is interpreted as::

    <TABLE BORDER=1 CELLPADDING=2>
     <TR>
      <TD ALIGN=CENTER COLSPAN=2> <strong>Ingredients</strong> </TD>
     </TR>
     <TR>
      <TD ALIGN=CENTER COLSPAN=1> <em>Name</em> </TD>
      <TD ALIGN=CENTER COLSPAN=1> <em>Amount</em> </TD>
     </TR>
     <TR>
      <TD ALIGN=CENTER COLSPAN=1>Spam</TD>
      <TD ALIGN=CENTER COLSPAN=1>10</TD>
     </TR>
     <TR>
      <TD ALIGN=CENTER COLSPAN=1>Eggs</TD>
      <TD ALIGN=CENTER COLSPAN=1>3</TD>
     </TR>
    </TABLE>

    
$Id$'''
#     Copyright 
#
#       Copyright 1996 Digital Creations, L.C., 910 Princess Anne
#       Street, Suite 300, Fredericksburg, Virginia 22401 U.S.A. All
#       rights reserved.  Copyright in this software is owned by DCLC,
#       unless otherwise indicated. Permission to use, copy and
#       distribute this software is hereby granted, provided that the
#       above copyright notice appear in all copies and that both that
#       copyright notice and this permission notice appear. Note that
#       any product, process or technology described in this software
#       may be the subject of other Intellectual Property rights
#       reserved by Digital Creations, L.C. and are not licensed
#       hereunder.
#
#     Trademarks 
#
#       Digital Creations & DCLC, are trademarks of Digital Creations, L.C..
#       All other trademarks are owned by their respective companies. 
#
#     No Warranty 
#
#       The software is provided "as is" without warranty of any kind,
#       either express or implied, including, but not limited to, the
#       implied warranties of merchantability, fitness for a particular
#       purpose, or non-infringement. This software could include
#       technical inaccuracies or typographical errors. Changes are
#       periodically made to the software; these changes will be
#       incorporated in new editions of the software. DCLC may make
#       improvements and/or changes in this software at any time
#       without notice.
#
#     Limitation Of Liability 
#
#       In no event will DCLC be liable for direct, indirect, special,
#       incidental, economic, cover, or consequential damages arising
#       out of the use of or inability to use this software even if
#       advised of the possibility of such damages. Some states do not
#       allow the exclusion or limitation of implied warranties or
#       limitation of liability for incidental or consequential
#       damages, so the above limitation or exclusion may not apply to
#       you.
#  
#
# If you have questions regarding this software,
# contact:
#
#   Jim Fulton, jim@digicool.com
#
#   (540) 371-6909
#
# $Log$
# Revision 1.1  2001/03/10 05:07:20  RD
# Added some simple sample apps
#
# Revision 1.27  2000/04/21 13:38:10  jim
# Added closing list tags. Woo hoo!
#
# Revision 1.26  2000/03/14 17:22:04  brian
# Allow ~ in hrefs.
#
# Revision 1.25  2000/02/17 00:53:24  klm
# HTML._str(): We were getting preformatted examples rendered twice,
# second time without preformatting.  Problem was a missing 'continue'
# in one of the cases.
#
# Revision 1.24  1999/12/13 16:32:48  klm
# Incorporated pavlos christoforou's mods to handle simple tables.  From
# his web page at http://www.zope.org/Members/gaaros/StructuredText:
#
#   Structured Text module with table support
#
#   A paragraph that has blocks of text enclosed in '||' is treated as a
#   table. The text blocks correspond to table cells and table rows are
#   denoted by newlines. By default the cells are center aligned. You can
#   change the defaults by modifying the CELL,ROW and TABLE class
#   attributes in class Table. A cell can span more than one column by
#   preceding a block of text with an equivalent number of cell separators
#   '||'. Newlines and '|' cannot be a part of the cell text. If you need
#   newlines use <BR>. For example:
#
#        |||| **Ingredients** ||
#        || *Name* || *Amount* ||
#        ||Spam||10||
#        ||Eggs||3||
#
# Revision 1.23  1999/08/03 20:49:05  jim
# Fixed to allow list elements to introduce examples.
#
# Restructured _str using continue to avoid excessive nesting.
#
# Revision 1.22  1999/08/02 22:01:28  jim
# Fixed a bunch of bugs introduced by making ts_regex actually thread
# safe.
#
# Also localized a bunch of regular expressions
# using "static" variables (aka always default arguments).
#
# Revision 1.21  1999/08/02 13:26:52  jim
# paragraph_divider needs to be a regular (thread-unsafe) regex
# since it gets passed to ts_regex.split, which is thread-safe
# and wants to use regs.
#
# Revision 1.20  1999/07/21 13:33:59  jim
# untabified.
#
# Revision 1.19  1999/07/15 16:43:15  jim
# Checked in Scott Robertson's thread-safety fixes.
#
# Revision 1.18  1999/03/24 00:03:18  klm
# Provide for relative links, eg <a href="file_in_same_dir">whatever</a>,
# as:
#
#   "whatever", :file_in_same_dir
#
# or
#
#   "whatever"::file_in_same_dir
#
# .__init__(): relax the second gsub, using a '*' instead of a '+', so
# the stuff before the ':' can be missing, and also do postprocessing so
# any resulting '<a href=":file_in_same_dir">'s have the superfluous ':'
# removed.  *Seems* good!
#
# Revision 1.17  1999/03/12 23:21:39  klm
# Gratuituous checkin to test my cvs *update* logging hook.
#
# Revision 1.16  1999/03/12 17:12:12  klm
# Added support for underlined elements, in the obvious way (and
# included an entry in the module docstring for it).
#
# Added an entry in the module docstring describing what i *guess* is
# the criterion for identifying header elements.  (I'm going to have to
# delve into and understand the framework a bit better before *knowing*
# this is the case.)
#
# Revision 1.15  1999/03/11 22:40:18  klm
# Handle links that include '#' named links.
#
# Revision 1.14  1999/03/11 01:35:19  klm
# Fixed a small typo, and refined the module docstring link example, in
# order to do a checkin to exercise the CVS repository mirroring.  Might
# as well include my last checkin message, with some substantial stuff:
#
# Links are now recognized whether or not the candidate strings are
# terminated with punctuation before the trailing whitespace.  The old
# form - trailing punctuation then whitespace - is preserved, but the
# punctuation is now unnecessary.
#
# The regular expressions are a bit more complicated, but i've factored
# out the common parts and but them in variables with suggestive names,
# which may make them easier to understand.
#
# Revision 1.13  1999/03/11 00:49:57  klm
# Links are now recognized whether or not the candidate strings are
# terminated with punctuation before the trailing whitespace.  The old
# form - trailing punctuation then whitespace - is preserved, but the
# punctuation is now unnecessary.
#
# The regular expressions are a bit more complicated, but i've factored
# out the common parts and but them in variables with suggestive names,
# which may make them easier to understand.
#
# Revision 1.12  1999/03/10 00:15:46  klm
# Committing with version 1.0 of the license.
#
# Revision 1.11  1999/02/08 18:13:12  klm
# Trival checkin (spelling fix "preceedeing" -> "preceding" and similar)
# to see what pitfalls my environment presents to accomplishing a
# successful checkin.  (It turns out that i can't do it from aldous because
# the new version of cvs doesn't support the '-t' and '-f' options in the
# cvswrappers file...)
#
# Revision 1.10  1998/12/29 22:30:43  amos
# Improved doc string to describe hyper link and references capabilities.
#
# Revision 1.9  1998/12/04 20:15:31  jim
# Detabification and new copyright.
#
# Revision 1.8  1998/02/27 18:45:22  jim
# Various updates, including new indentation utilities.
#
# Revision 1.7  1997/12/12 15:39:54  jim
# Added level as argument for html_with_references.
#
# Revision 1.6  1997/12/12 15:27:25  jim
# Added additional pattern matching for HTML references.
#
# Revision 1.5  1997/03/08 16:01:03  jim
# Moved code to recognize: "foo bar", url.
# into object initializer, so it gets applied in all cases.
#
# Revision 1.4  1997/02/17 23:36:35  jim
# Added support for "foo title", http:/foohost/foo
#
# Revision 1.3  1996/12/06 15:57:37  jim
# Fixed bugs in character tags.
#
# Added -t command-line option to generate title if:
#
#    - The first paragraph is one line (i.e. a heading) and
#
#    - All other paragraphs are indented.
#
# Revision 1.2  1996/10/28 13:56:02  jim
# Fixed bug in ordered lists.
# Added option for either HTML-style headings or descriptive-list style
# headings.
#
# Revision 1.1  1996/10/23 14:00:45  jim
# *** empty log message ***
#
#
#

import ts_regex, regex
from ts_regex import gsub
from string import split, join, strip, find

def untabify(aString,
             indent_tab=ts_regex.compile('\(\n\|^\)\( *\)\t').search_group,
             ):
    '''\
    Convert indentation tabs to spaces.
    '''
    result=''
    rest=aString
    while 1:
        ts_results = indent_tab(rest, (1,2))
        if ts_results:
            start, grps = ts_results
            lnl=len(grps[0])
            indent=len(grps[1])
            result=result+rest[:start]
            rest="\n%s%s" % (' ' * ((indent/8+1)*8),
                             rest[start+indent+1+lnl:])
        else:
            return result+rest

def indent(aString, indent=2):
    """Indent a string the given number of spaces"""
    r=split(untabify(aString),'\n')
    if not r: return ''
    if not r[-1]: del r[-1]
    tab=' '*level
    return "%s%s\n" % (tab,join(r,'\n'+tab))

def reindent(aString, indent=2, already_untabified=0):
    "reindent a block of text, so that the minimum indent is as given"

    if not already_untabified: aString=untabify(aString)

    l=indent_level(aString)[0]
    if indent==l: return aString

    r=[]

    append=r.append

    if indent > l:
        tab=' ' * (indent-l)
        for s in split(aString,'\n'): append(tab+s)
    else:
        l=l-indent
        for s in split(aString,'\n'): append(s[l:])

    return join(r,'\n')

def indent_level(aString,
                 indent_space=ts_regex.compile('\n\( *\)').search_group,
                 ):
    '''\
    Find the minimum indentation for a string, not counting blank lines.
    '''
    start=0
    text='\n'+aString
    indent=l=len(text)
    while 1:

        ts_results = indent_space(text, (1,2), start)
        if ts_results:
            start, grps = ts_results
            i=len(grps[0])
            start=start+i+1
            if start < l and text[start] != '\n':       # Skip blank lines
                if not i: return (0,aString)
                if i < indent: indent = i
        else:
            return (indent,aString)

def paragraphs(list,start):
    l=len(list)
    level=list[start][0]
    i=start+1
    while i < l and list[i][0] > level: i=i+1
    return i-1-start

def structure(list):
    if not list: return []
    i=0
    l=len(list)
    r=[]
    while i < l:
        sublen=paragraphs(list,i)
        i=i+1
        r.append((list[i-1][1],structure(list[i:i+sublen])))
        i=i+sublen
    return r


class Table:
    CELL='  <TD ALIGN=CENTER COLSPAN=%i>%s</TD>\n'
    ROW=' <TR>\n%s </TR>\n'
    TABLE='\n<TABLE BORDER=1 CELLPADDING=2>\n%s</TABLE>'
    
    def create(self,aPar,td=ts_regex.compile(
        '[ \t\n]*||\([^\0|]*\)').match_group):
        '''parses a table and returns nested list representing the
        table'''
        self.table=[]
        text=filter(None,split(aPar,'\n'))
        for line in text:
            row=[]
            while 1:
                pos=td(line,(1,))
                if not pos:return 0
                row.append(pos[1])
                if pos[0]==len(line):break
                line=line[pos[0]:]
            self.table.append(row)
        return 1

    def html(self):
        '''Creates an HTML representation of table'''
        htmltable=[]
        for row in self.table:
            htmlrow=[]
            colspan=1
            for cell in row:
                if cell=='':
                    colspan=colspan+1
                    continue
                else:
                    htmlrow.append(self.CELL%(colspan,cell))
                    colspan=1
            htmltable.append(self.ROW%join(htmlrow,''))
        return self.TABLE%join(htmltable,'')

optional_trailing_punctuation = '\(,\|\([.:?;]\)\)?'
trailing_space = '\([\0- ]\)'
not_punctuation_or_whitespace = "[^-,.?:\0- ]"
table=Table()

class StructuredText:

    """Model text as structured collection of paragraphs.

    Structure is implied by the indentation level.

    This class is intended as a base classes that do actual text
    output formatting.
    """

    def __init__(self, aStructuredString, level=0,
                 paragraph_divider=regex.compile('\(\n *\)+\n'),
                 ):
        '''Convert a structured text string into a structured text object.

        Aguments:

          aStructuredString -- The string to be parsed.
          level -- The level of top level headings to be created.
        '''

        aStructuredString = gsub(
            '\"\([^\"\0]+\)\":'         # title: <"text":>
            + ('\([-:a-zA-Z0-9_,./?=@#~]+%s\)'
               % not_punctuation_or_whitespace)
            + optional_trailing_punctuation
            + trailing_space,
            '<a href="\\2">\\1</a>\\4\\5\\6',
            aStructuredString)

        aStructuredString = gsub(
            '\"\([^\"\0]+\)\",[\0- ]+'            # title: <"text", >
            + ('\([a-zA-Z]*:[-:a-zA-Z0-9_,./?=@#~]*%s\)'
               % not_punctuation_or_whitespace)
            + optional_trailing_punctuation
            + trailing_space,
            '<a href="\\2">\\1</a>\\4\\5\\6',
            aStructuredString)

        protoless = find(aStructuredString, '<a href=":')
        if protoless != -1:
            aStructuredString = gsub('<a href=":', '<a href="',
                                     aStructuredString)

        self.level=level
        paragraphs=ts_regex.split(untabify(aStructuredString),
                                  paragraph_divider)
        paragraphs=map(indent_level,paragraphs)

        self.structure=structure(paragraphs)


    def __str__(self):
        return str(self.structure)


ctag_prefix="\([\0- (]\|^\)"
ctag_suffix="\([\0- ,.:;!?)]\|$\)"
ctag_middle="[%s]\([^\0- %s][^%s]*[^\0- %s]\|[^%s]\)[%s]"
ctag_middl2="[%s][%s]\([^\0- %s][^%s]*[^\0- %s]\|[^%s]\)[%s][%s]"
        
def ctag(s,
         em=regex.compile(
             ctag_prefix+(ctag_middle % (("*",)*6) )+ctag_suffix),
         strong=regex.compile(
             ctag_prefix+(ctag_middl2 % (("*",)*8))+ctag_suffix),
         under=regex.compile(
             ctag_prefix+(ctag_middle % (("_",)*6) )+ctag_suffix),
         code=regex.compile(
             ctag_prefix+(ctag_middle % (("\'",)*6))+ctag_suffix),
         ):
    if s is None: s=''
    s=gsub(strong,'\\1<strong>\\2</strong>\\3',s)
    s=gsub(under, '\\1<u>\\2</u>\\3',s)
    s=gsub(code,  '\\1<code>\\2</code>\\3',s)
    s=gsub(em,    '\\1<em>\\2</em>\\3',s)
    return s    

class HTML(StructuredText):

    '''\
    An HTML structured text formatter.
    '''\

    def __str__(self,
                extra_dl=regex.compile("</dl>\n<dl>"),
                extra_ul=regex.compile("</ul>\n<ul>"),
                extra_ol=regex.compile("</ol>\n<ol>"),
                ):
        '''\
        Return an HTML string representation of the structured text data.

        '''
        s=self._str(self.structure,self.level)
        s=gsub(extra_dl,'\n',s)
        s=gsub(extra_ul,'\n',s)
        s=gsub(extra_ol,'\n',s)
        return s

    def ul(self, before, p, after):
        if p: p="<p>%s</p>" % strip(ctag(p))
        return ('%s<ul><li>%s\n%s\n</li></ul>\n'
                % (before,p,after))

    def ol(self, before, p, after):
        if p: p="<p>%s</p>" % strip(ctag(p))
        return ('%s<ol><li>%s\n%s\n</li></ol>\n'
                % (before,p,after))

    def dl(self, before, t, d, after):
        return ('%s<dl><dt>%s</dt><dd><p>%s</p>\n%s\n</dd></dl>\n'
                % (before,ctag(t),ctag(d),after))

    def head(self, before, t, level, d):
        if level > 0 and level < 6:
            return ('%s<h%d>%s</h%d>\n%s\n'
                    % (before,level,strip(ctag(t)),level,d))
            
        t="<p><strong>%s</strong><p>" % strip(ctag(t))
        return ('%s<dl><dt>%s\n</dt><dd>%s\n</dd></dl>\n'
                % (before,t,d))

    def normal(self,before,p,after):
        return '%s<p>%s</p>\n%s\n' % (before,ctag(p),after)

    def pre(self,structure,tagged=0):
        if not structure: return ''
        if tagged:
            r=''
        else:
            r='<PRE>\n'
        for s in structure:
            r="%s%s\n\n%s" % (r,html_quote(s[0]),self.pre(s[1],1))
        if not tagged: r=r+'</PRE>\n'
        return r
    
    def table(self,before,table,after):
        return '%s<p>%s</p>\n%s\n' % (before,ctag(table),after)
    
    def _str(self,structure,level,
             # Static
             bullet=ts_regex.compile('[ \t\n]*[o*-][ \t\n]+\([^\0]*\)'
                                     ).match_group,
             example=ts_regex.compile('[\0- ]examples?:[\0- ]*$'
                                      ).search,
             dl=ts_regex.compile('\([^\n]+\)[ \t]+--[ \t\n]+\([^\0]*\)'
                                 ).match_group,
             nl=ts_regex.compile('\n').search,
             ol=ts_regex.compile(
                 '[ \t]*\(\([0-9]+\|[a-zA-Z]+\)[.)]\)+[ \t\n]+\([^\0]*\|$\)'
                 ).match_group,
             olp=ts_regex.compile('[ \t]*([0-9]+)[ \t\n]+\([^\0]*\|$\)'
                                  ).match_group,
             ):
        r=''
        for s in structure:

            ts_results = bullet(s[0], (1,))
            if ts_results:
                p = ts_results[1]
                if s[0][-2:]=='::' and s[1]: ps=self.pre(s[1])
                else: ps=self._str(s[1],level)
                r=self.ul(r,p,ps)
                continue
            ts_results = ol(s[0], (3,))
            if ts_results:
                p = ts_results[1]
                if s[0][-2:]=='::' and s[1]: ps=self.pre(s[1])
                else: ps=self._str(s[1],level)
                r=self.ol(r,p,ps)
                continue
            ts_results = olp(s[0], (1,))
            if ts_results:
                p = ts_results[1]
                if s[0][-2:]=='::' and s[1]: ps=self.pre(s[1])
                else: ps=self._str(s[1],level)
                r=self.ol(r,p,ps)
                continue
            ts_results = dl(s[0], (1,2))
            if ts_results:
                t,d = ts_results[1]
                r=self.dl(r,t,d,self._str(s[1],level))
                continue
            if example(s[0]) >= 0 and s[1]:
                # Introduce an example, using pre tags:
                r=self.normal(r,s[0],self.pre(s[1]))
                continue
            if s[0][-2:]=='::' and s[1]:
                # Introduce an example, using pre tags:
                r=self.normal(r,s[0][:-1],self.pre(s[1]))
                continue
            if table.create(s[0]):
                ## table support.
                r=self.table(r,table.html(),self._str(s[1],level))
                continue
            else:

                if nl(s[0]) < 0 and s[1] and s[0][-1:] != ':':
                    # Treat as a heading
                    t=s[0]
                    r=self.head(r,t,level,
                                self._str(s[1],level and level+1))
                else:
                    r=self.normal(r,s[0],self._str(s[1],level))
        return r
        

def html_quote(v,
               character_entities=(
                       (regex.compile('&'), '&amp;'),
                       (regex.compile("<"), '&lt;' ),
                       (regex.compile(">"), '&gt;' ),
                       (regex.compile('"'), '&quot;')
                       )): #"
        text=str(v)
        for re,name in character_entities:
            text=gsub(re,name,text)
        return text

def html_with_references(text, level=1):
    text = gsub(
        '[\0\n].. \[\([-_0-9_a-zA-Z-]+\)\]',
        '\n  <a name="\\1">[\\1]</a>',
        text)
    
    text = gsub(
        '\([\0- ,]\)\[\([0-9_a-zA-Z-]+\)\]\([\0- ,.:]\)',
        '\\1<a href="#\\2">[\\2]</a>\\3',
        text)
    
    text = gsub(
        '\([\0- ,]\)\[\([^]]+\)\.html\]\([\0- ,.:]\)',
        '\\1<a href="\\2.html">[\\2]</a>\\3',
        text)

    return HTML(text,level=level)
    

def main():
    import sys, getopt

    opts,args=getopt.getopt(sys.argv[1:],'tw')

    if args:
        [infile]=args
        s=open(infile,'r').read()
    else:
        s=sys.stdin.read()

    if opts:

        if filter(lambda o: o[0]=='-w', opts):
            print 'Content-Type: text/html\n'

        if s[:2]=='#!':
            s=ts_regex.sub('^#![^\n]+','',s)

        r=ts_regex.compile('\([\0-\n]*\n\)')
        ts_results = r.match_group(s, (1,))
        if ts_results:
            s=s[len(ts_results[1]):]
        s=str(html_with_references(s))
        if s[:4]=='<h1>':
            t=s[4:find(s,'</h1>')]
            s='''<html><head><title>%s</title>
            </head><body>
            %s
            </body></html>
            ''' % (t,s)
        print s
    else:
        print html_with_references(s)

if __name__=="__main__": main()