[wxWidgets.git] / wxPython / samples / stxview / StructuredText / ClassicDocumentClass.py

##############################################################################
# 
# Zope Public License (ZPL) Version 1.0
# -------------------------------------
# 
# Copyright (c) Digital Creations.  All rights reserved.
# 
# This license has been certified as Open Source(tm).
# 
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# 
# 1. Redistributions in source code must retain the above copyright
#    notice, this list of conditions, and the following disclaimer.
# 
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions, and the following disclaimer in
#    the documentation and/or other materials provided with the
#    distribution.
# 
# 3. Digital Creations requests that attribution be given to Zope
#    in any manner possible. Zope includes a "Powered by Zope"
#    button that is installed by default. While it is not a license
#    violation to remove this button, it is requested that the
#    attribution remain. A significant investment has been put
#    into Zope, and this effort will continue if the Zope community
#    continues to grow. This is one way to assure that growth.
# 
# 4. All advertising materials and documentation mentioning
#    features derived from or use of this software must display
#    the following acknowledgement:
# 
#       "This product includes software developed by Digital Creations
#       for use in the Z Object Publishing Environment
#       (http://www.zope.org/)."
# 
#    In the event that the product being advertised includes an
#    intact Zope distribution (with copyright and license included)
#    then this clause is waived.
# 
# 5. Names associated with Zope or Digital Creations must not be used to
#    endorse or promote products derived from this software without
#    prior written permission from Digital Creations.
# 
# 6. Modified redistributions of any form whatsoever must retain
#    the following acknowledgment:
# 
#       "This product includes software developed by Digital Creations
#       for use in the Z Object Publishing Environment
#       (http://www.zope.org/)."
# 
#    Intact (re-)distributions of any official Zope release do not
#    require an external acknowledgement.
# 
# 7. Modifications are encouraged but must be packaged separately as
#    patches to official Zope releases.  Distributions that do not
#    clearly separate the patches from the original work must be clearly
#    labeled as unofficial distributions.  Modifications which do not
#    carry the name Zope may be packaged in any form, as long as they
#    conform to all of the clauses above.
# 
# 
# Disclaimer
# 
#    THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
#    EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
#    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
#    PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
#    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
#    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
#    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
#    USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
#    ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
#    OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
#    OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
#    SUCH DAMAGE.
# 
# 
# This software consists of contributions made by Digital Creations and
# many individuals on behalf of Digital Creations.  Specific
# attributions are listed in the accompanying credits file.
# 
##############################################################################

import re, ST, STDOM
from string import split, join, replace, expandtabs, strip, find

StringType=type('')
ListType=type([])

class StructuredTextExample(ST.StructuredTextParagraph):
    """Represents a section of document with literal text, as for examples"""

    def __init__(self, subs, **kw):
       t=[]; a=t.append
       for s in subs: a(s.getNodeValue())
       apply(ST.StructuredTextParagraph.__init__,
             (self, join(t,'\n\n'), ()),
             kw)

    def getColorizableTexts(self): return ()
    def setColorizableTexts(self, src): pass # never color examples

class StructuredTextBullet(ST.StructuredTextParagraph):
    """Represents a section of a document with a title and a body"""

class StructuredTextNumbered(ST.StructuredTextParagraph):
    """Represents a section of a document with a title and a body"""

class StructuredTextDescriptionTitle(ST.StructuredTextParagraph):
    """Represents a section of a document with a title and a body"""

class StructuredTextDescriptionBody(ST.StructuredTextParagraph):
    """Represents a section of a document with a title and a body"""

class StructuredTextDescription(ST.StructuredTextParagraph):
    """Represents a section of a document with a title and a body"""
    
    def __init__(self, title, src, subs, **kw):
       apply(ST.StructuredTextParagraph.__init__, (self, src, subs), kw)
       self._title=title

    def getColorizableTexts(self): return self._title, self._src
    def setColorizableTexts(self, src): self._title, self._src = src

    def getChildren(self):
       return (StructuredTextDescriptionTitle(self._title),
               StructuredTextDescriptionBody(self._src, self._subs))

class StructuredTextSectionTitle(ST.StructuredTextParagraph):
    """Represents a section of a document with a title and a body"""

class StructuredTextSection(ST.StructuredTextParagraph):
    """Represents a section of a document with a title and a body"""
    def __init__(self, src, subs=None, **kw):
       apply(ST.StructuredTextParagraph.__init__,
             (self, StructuredTextSectionTitle(src), subs),
             kw)

# a StructuredTextTable holds StructuredTextRows
class StructuredTextTable(ST.StructuredTextDocument):
    """
    rows is a list of lists containing tuples, which
    represent the columns/cells in each rows.
    EX
    rows = [[('row 1:column1',1)],[('row2:column1',1)]]
    """
    
    def __init__(self, rows, src, subs, **kw):
        apply(ST.StructuredTextDocument.__init__,(self,subs),kw)
        self._rows = []
        for row in rows:
            if row:
                self._rows.append(StructuredTextRow(row,kw))
    
    def getRows(self):
        return [self._rows]
    
    def _getRows(self):
        return self.getRows()
    
    def getColorizableTexts(self):
        """
        return a tuple where each item is a column/cell's
        contents. The tuple, result, will be of this format.
        ("r1 col1", "r1=col2", "r2 col1", "r2 col2")
        """
        
        #result = ()
        result = []
        for row in self._rows:
            for column in row.getColumns()[0]:
                #result = result[:] + (column.getColorizableTexts(),)
                result.append(column.getColorizableTexts()[0])
        return result
    
    def setColorizableTexts(self,texts):
        """
        texts is going to a tuple where each item is the
        result of being mapped to the colortext function.
        Need to insert the results appropriately into the
        individual columns/cells
        """
        for row_index in range(len(self._rows)):
            for column_index in range(len(self._rows[row_index]._columns)):
                self._rows[row_index]._columns[column_index].setColorizableTexts((texts[0],))
                texts = texts[1:]
        
    def _getColorizableTexts(self):
        return self.getColorizableTexts()
    
    def _setColorizableTexts(self):
        return self.setColorizableTexts()
    
# StructuredTextRow holds StructuredTextColumns
class StructuredTextRow(ST.StructuredTextDocument):
    
    def __init__(self,row,kw):
        """
        row is a list of tuples, where each tuple is
        the raw text for a cell/column and the span
        of that cell/column". 
        EX 
        [('this is column one',1), ('this is column two',1)]
        """
        
        apply(ST.StructuredTextDocument.__init__,(self,[]),kw)
        self._columns = []
        for column in row:            
            self._columns.append(StructuredTextColumn(column[0],column[1],kw))
    def getColumns(self):
        return [self._columns]

    def _getColumns(self):
        return [self._columns]
    
# this holds the raw text of a table cell
class StructuredTextColumn(ST.StructuredTextParagraph):
    """
    StructuredTextColumn is a cell/column in a table.
    This contains the actual text of a column and is
    thus a StructuredTextParagraph. A StructuredTextColumn
    also holds the span of its column
    """
    
    def __init__(self,text,span,kw):
        apply(ST.StructuredTextParagraph.__init__,(self,text,[]),kw)
        self._span = span
    
    def getSpan(self):
        return self._span
    
    def _getSpan(self):
        return self._span
    
class StructuredTextMarkup(STDOM.Element):
    
    def __init__(self, v, **kw):
       self._value=v
       self._attributes=kw.keys()
       for k, v in kw.items(): setattr(self, k, v)

    def getChildren(self, type=type, lt=type([])):
       v=self._value
       if type(v) is not lt: v=[v]
       return v

    def getColorizableTexts(self): return self._value,
    def setColorizableTexts(self, v): self._value=v[0]

    def __repr__(self):
       return '%s(%s)' % (self.__class__.__name__, `self._value`)

class StructuredTextLiteral(StructuredTextMarkup):
    def getColorizableTexts(self): return ()
    def setColorizableTexts(self, v): pass

class StructuredTextEmphasis(StructuredTextMarkup): pass

class StructuredTextStrong(StructuredTextMarkup): pass

class StructuredTextInnerLink(StructuredTextMarkup): pass

class StructuredTextNamedLink(StructuredTextMarkup): pass

class StructuredTextUnderline(StructuredTextMarkup): pass

class StructuredTextLink(StructuredTextMarkup):
    "A simple hyperlink"

class DocumentClass:    
    """
    Class instance calls [ex.=> x()] require a structured text
    structure. Doc will then parse each paragraph in the structure
    and will find the special structures within each paragraph.
    Each special structure will be stored as an instance. Special
    structures within another special structure are stored within
    the 'top' structure
    EX : '-underline this-' => would be turned into an underline
    instance. '-underline **this**' would be stored as an underline
    instance with a strong instance stored in its string
    """

    paragraph_types  = [
        'doc_bullet',
        'doc_numbered',
        'doc_description',
        'doc_header',
        'doc_table',
        ]

    text_types = [
        'doc_href',
        'doc_strong',
        'doc_emphasize',
        'doc_literal',
        'doc_inner_link',
        'doc_named_link',
        'doc_underline',
        ]

    def __call__(self, doc):
        if type(doc) is type(''):
           doc=ST.StructuredText(doc)
           doc.setSubparagraphs(self.color_paragraphs(
              doc.getSubparagraphs()))
        else:
           doc=ST.StructuredTextDocument(self.color_paragraphs(
              doc.getSubparagraphs()))
        return doc

    def parse(self, raw_string, text_type,
              type=type, st=type(''), lt=type([])):

       """
       Parse accepts a raw_string, an expr to test the raw_string,
       and the raw_string's subparagraphs.
       
       Parse will continue to search through raw_string until 
       all instances of expr in raw_string are found. 
       
       If no instances of expr are found, raw_string is returned.
       Otherwise a list of substrings and instances is returned
       """

       tmp = []    # the list to be returned if raw_string is split
       append=tmp.append

       if type(text_type) is st: text_type=getattr(self, text_type)

       while 1:
          t = text_type(raw_string)
          if not t: break
          #an instance of expr was found
          t, start, end    = t

          if start: append(raw_string[0:start])

          tt=type(t)
          if tt is st:
             # if we get a string back, add it to text to be parsed
             raw_string = t+raw_string[end:len(raw_string)]
          else:
             if tt is lt:
                # is we get a list, append it's elements
                tmp[len(tmp):]=t
             else:
                # normal case, an object
                append(t)
             raw_string = raw_string[end:len(raw_string)]

       if not tmp: return raw_string # nothing found
       
       if raw_string: append(raw_string)
       elif len(tmp)==1: return tmp[0]
       
       return tmp


    def color_text(self, str, types=None):
       """Search the paragraph for each special structure
       """
       if types is None: types=self.text_types

       for text_type in types:

          if type(str) is StringType:
             str = self.parse(str, text_type)
          elif type(str) is ListType:
             r=[]; a=r.append
             for s in str:
                if type(s) is StringType:
                    s=self.parse(s, text_type)
                    if type(s) is ListType: r[len(r):]=s
                    else: a(s)
                else:
                    s.setColorizableTexts(
                       map(self.color_text,
                           s.getColorizableTexts()
                           ))
                    a(s)
             str=r
          else:
             r=[]; a=r.append; color=self.color_text
             for s in str.getColorizableTexts():
                color(s, (text_type,))
                a(s)
                
             str.setColorizableTexts(r)

       return str

    def color_paragraphs(self, raw_paragraphs,
                           type=type, sequence_types=(type([]), type(())),
                           st=type('')):
       result=[]
       for paragraph in raw_paragraphs:
          
          if paragraph.getNodeName() != 'StructuredTextParagraph':
             result.append(paragraph)
             continue
          
          for pt in self.paragraph_types:
             if type(pt) is st:
                # grab the corresponding function
                pt=getattr(self, pt)
             # evaluate the paragraph
             r=pt(paragraph)
             if r:
                if type(r) not in sequence_types:
                    r=r,
                new_paragraphs=r
                for paragraph in new_paragraphs:
                    paragraph.setSubparagraphs(self.color_paragraphs(paragraph.getSubparagraphs()))
                break
          else:
             new_paragraphs=ST.StructuredTextParagraph(paragraph.getColorizableTexts()[0],
                                                          self.color_paragraphs(paragraph.getSubparagraphs()),
                                                          indent=paragraph.indent),
          # color the inline StructuredText types
          # for each StructuredTextParagraph
          for paragraph in new_paragraphs:
             paragraph.setColorizableTexts(
                map(self.color_text,
                    paragraph.getColorizableTexts()
                    ))
             result.append(paragraph)

       return result
    
    def doc_table(self,paragraph, expr = re.compile('(\s*)([||]+)').match):
        text    = paragraph.getColorizableTexts()[0]
        m       = expr(text)
        
        if not (m):
            return None
        rows = []
    
        # initial split
        for row in split(text,"\n"):
            rows.append(row)    
    
        # clean up the rows
        for index in range(len(rows)):
            tmp = []
            rows[index] = strip(rows[index])
            l = len(rows[index])-2
            result = split(rows[index][:l],"||")
            for text in result:
                if text:
                    tmp.append(text)
                    tmp.append('')
                else:
                    tmp.append(text)
            rows[index] = tmp
        # remove trailing '''s
        for index in range(len(rows)):
            l = len(rows[index])-1
            rows[index] = rows[index][:l]
        
        result = []
        for row in rows:
            cspan   = 0
            tmp     = []
            for item in row:
                if item:
                    tmp.append(item,cspan)
                    cspan = 0
                else:
                    cspan = cspan + 1
            result.append(tmp)
        
        subs = paragraph.getSubparagraphs()
        indent=paragraph.indent
        return StructuredTextTable(result,text,subs,indent=paragraph.indent)
            
    def doc_bullet(self, paragraph, expr = re.compile('\s*[-*o]\s+').match):
        top=paragraph.getColorizableTexts()[0]
        m=expr(top)

        if not m:
            return None
            
        subs=paragraph.getSubparagraphs()
        if top[-2:]=='::':
           subs=[StructuredTextExample(subs)]
           top=top[:-1]
        return StructuredTextBullet(top[m.span()[1]:], subs,
                                     indent=paragraph.indent,
                                     bullet=top[:m.span()[1]]
                                     )

    def doc_numbered(
        self, paragraph,
        expr = re.compile('(\s*[a-zA-Z]+\.)|(\s*[0-9]+\.)|(\s*[0-9]+\s+)').match):
        
        # This is the old expression. It had a nasty habit
        # of grabbing paragraphs that began with a single
        # letter word even if there was no following period.
        
        #expr = re.compile('\s*'
        #                   '(([a-zA-Z]|[0-9]+|[ivxlcdmIVXLCDM]+)\.)*'
        #                   '([a-zA-Z]|[0-9]+|[ivxlcdmIVXLCDM]+)\.?'
        #                   '\s+').match):
        
        top=paragraph.getColorizableTexts()[0]
        m=expr(top)
        if not m: return None
        subs=paragraph.getSubparagraphs()
        if top[-2:]=='::':
           subs=[StructuredTextExample(subs)]
           top=top[:-1]
        return StructuredTextNumbered(top[m.span()[1]:], subs,
                                        indent=paragraph.indent,
                                        number=top[:m.span()[1]])

    def doc_description(
        self, paragraph,
        delim = re.compile('\s+--\s+').search,
        nb=re.compile(r'[^\0- ]').search,
        ):

        top=paragraph.getColorizableTexts()[0]
        d=delim(top)
        if not d: return None
        start, end = d.span()
        title=top[:start]
        if find(title, '\n') >= 0: return None
        if not nb(title): return None
        d=top[start:end]
        top=top[end:]

        subs=paragraph.getSubparagraphs()
        if top[-2:]=='::':
           subs=[StructuredTextExample(subs)]
           top=top[:-1]

        return StructuredTextDescription(
           title, top, subs,
           indent=paragraph.indent,
           delim=d)

    def doc_header(self, paragraph,
                    expr    = re.compile('[ a-zA-Z0-9.:/,-_*<>\?\'\"]+').match
                    ):
        subs=paragraph.getSubparagraphs()
        if not subs: return None
        top=paragraph.getColorizableTexts()[0]
        if not strip(top): return None
        if top[-2:]=='::':
           subs=StructuredTextExample(subs)
           if strip(top)=='::': return subs
           return ST.StructuredTextParagraph(top[:-1],
                                             [subs],
                                             indent=paragraph.indent,
                                             level=paragraph.level)

        if find(top,'\n') >= 0: return None
        return StructuredTextSection(top, subs, indent=paragraph.indent, level=paragraph.level)

    def doc_literal(
        self, s,
        expr=re.compile(
          "(?:\s|^)'"                                                  # open
          "([^ \t\n\r\f\v']|[^ \t\n\r\f\v'][^\n']*[^ \t\n\r\f\v'])" # contents
          "'(?:\s|[,.;:!?]|$)"                                        # close
          ).search):
        
        r=expr(s)
        if r:
           start, end = r.span(1)
           return (StructuredTextLiteral(s[start:end]), start-1, end+1)
        else:
           return None

    def doc_emphasize(
        self, s,
        expr = re.compile('\s*\*([ \na-zA-Z0-9.:/;,\'\"\?]+)\*(?!\*|-)').search
        ):

        r=expr(s)
        if r:
           start, end = r.span(1)
           return (StructuredTextEmphasis(s[start:end]), start-1, end+1)
        else:
           return None
    
    def doc_inner_link(self,
                       s,
                       expr1 = re.compile("\.\.\s*").search,
                       expr2 = re.compile("\[[a-zA-Z0-9]+\]").search):
        
        # make sure we dont grab a named link
        if expr2(s) and expr1(s):
            start1,end1 = expr1(s).span()
            start2,end2 = expr2(s).span()
            if end1 == start2:
                # uh-oh, looks like a named link
                return None
            else:
                # the .. is somewhere else, ignore it
                return (StructuredTextInnerLink(s[start2+1,end2-1],start2,end2))
            return None
        elif expr2(s) and not expr1(s):
            start,end = expr2(s).span()
            return (StructuredTextInnerLink(s[start+1:end-1]),start,end)
        return None
    
    def doc_named_link(self,
                       s,
                       expr=re.compile("(\.\.\s)(\[[a-zA-Z0-9]+\])").search):
        
        result = expr(s)
        if result:
            start,end   = result.span(2)
            a,b         = result.span(1)
            str         = strip(s[a:b]) + s[start:end]
            str         = s[start+1:end-1]
            st,en       = result.span()
            return (StructuredTextNamedLink(str),st,en)
            #return (StructuredTextNamedLink(s[st:en]),st,en)
        return None
    
    def doc_underline(self,
                      s,
                      expr=re.compile("\_([a-zA-Z0-9\s\.,\?\/]+)\_").search):
        
        result = expr(s)
        if result:
            start,end = result.span(1)
            st,e = result.span()
            return (StructuredTextUnderline(s[start:end]),st,e)
        else:
            return None
    
    def doc_strong(self, 
                   s,
        expr = re.compile('\s*\*\*([ \na-zA-Z0-9.:/;\-,!\?\'\"]+)\*\*').search
        ):

        r=expr(s)
        if r:
           start, end = r.span(1)
           return (StructuredTextStrong(s[start:end]), start-2, end+2)
        else:
           return None
    
    def doc_href(
        
        self, s,
        expr1 = re.compile("(\"[ a-zA-Z0-9\n\-\.\,\;\(\)\/\:\/]+\")(:)([a-zA-Z0-9\:\/\.\~\-]+)([,]*\s*)").search,
        expr2 = re.compile('(\"[ a-zA-Z0-9\n\-\.\:\;\(\)\/]+\")([,]+\s+)([a-zA-Z0-9\@\.\,\?\!\/\:\;\-\#]+)(\s*)').search):
        
        #expr1=re.compile('\"([ a-zA-Z0-9.:/;,\n\~\(\)\-]+)\"'
        #                  ':'
        #                  '([a-zA-Z0-9.:/;,\n\~]+)(?=(\s+|\.|\!|\?))'
        #                  ).search,
        #expr2=re.compile('\"([ a-zA-Z0-9./:]+)\"'
        #                  ',\s+'
        #                  '([ a-zA-Z0-9@.:/;]+)(?=(\s+|\.|\!|\?))'
        #                  ).search,
        
        punctuation = re.compile("[\,\.\?\!\;]+").match
        r=expr1(s) or expr2(s)

        if r:
            # need to grab the href part and the
            # beginning part
                        
            start,e = r.span(1)
            name    = s[start:e]
            name    = replace(name,'"','',2)
            #start   = start + 1
            st,end   = r.span(3)
            if punctuation(s[end-1:end]):
                end = end -1
            link    = s[st:end]
            #end     = end - 1                        
            
            # name is the href title, link is the target
            # of the href
            return (StructuredTextLink(name, href=link),
                    start, end)
            
            #return (StructuredTextLink(s[start:end], href=s[start:end]),
            #        start, end)
        else:
            return None
Commit	Line	Data
c12bc4de RD	1	##############################################################################
	2	#
	3	# Zope Public License (ZPL) Version 1.0
	4	# -------------------------------------
	5	#
	6	# Copyright (c) Digital Creations. All rights reserved.
	7	#
	8	# This license has been certified as Open Source(tm).
	9	#
	10	# Redistribution and use in source and binary forms, with or without
	11	# modification, are permitted provided that the following conditions are
	12	# met:
	13	#
	14	# 1. Redistributions in source code must retain the above copyright
	15	# notice, this list of conditions, and the following disclaimer.
	16	#
	17	# 2. Redistributions in binary form must reproduce the above copyright
	18	# notice, this list of conditions, and the following disclaimer in
	19	# the documentation and/or other materials provided with the
	20	# distribution.
	21	#
	22	# 3. Digital Creations requests that attribution be given to Zope
	23	# in any manner possible. Zope includes a "Powered by Zope"
	24	# button that is installed by default. While it is not a license
	25	# violation to remove this button, it is requested that the
	26	# attribution remain. A significant investment has been put
	27	# into Zope, and this effort will continue if the Zope community
	28	# continues to grow. This is one way to assure that growth.
	29	#
	30	# 4. All advertising materials and documentation mentioning
	31	# features derived from or use of this software must display
	32	# the following acknowledgement:
	33	#
	34	# "This product includes software developed by Digital Creations
	35	# for use in the Z Object Publishing Environment
	36	# (http://www.zope.org/)."
	37	#
	38	# In the event that the product being advertised includes an
	39	# intact Zope distribution (with copyright and license included)
	40	# then this clause is waived.
	41	#
	42	# 5. Names associated with Zope or Digital Creations must not be used to
	43	# endorse or promote products derived from this software without
	44	# prior written permission from Digital Creations.
	45	#
	46	# 6. Modified redistributions of any form whatsoever must retain
	47	# the following acknowledgment:
	48	#
	49	# "This product includes software developed by Digital Creations
	50	# for use in the Z Object Publishing Environment
	51	# (http://www.zope.org/)."
	52	#
	53	# Intact (re-)distributions of any official Zope release do not
	54	# require an external acknowledgement.
	55	#
	56	# 7. Modifications are encouraged but must be packaged separately as
	57	# patches to official Zope releases. Distributions that do not
	58	# clearly separate the patches from the original work must be clearly
	59	# labeled as unofficial distributions. Modifications which do not
	60	# carry the name Zope may be packaged in any form, as long as they
	61	# conform to all of the clauses above.
	62	#
	63	#
	64	# Disclaimer
65	#
66	# THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
67	# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
68	# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
69	# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
70	# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
71	# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
72	# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
73	# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
74	# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
75	# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
76	# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
77	# SUCH DAMAGE.
78	#
79	#
80	# This software consists of contributions made by Digital Creations and
81	# many individuals on behalf of Digital Creations. Specific
82	# attributions are listed in the accompanying credits file.
83	#
84	##############################################################################
85
86	import re, ST, STDOM
87	from string import split, join, replace, expandtabs, strip, find
88
89	StringType=type('')
90	ListType=type([])
91
92	class StructuredTextExample(ST.StructuredTextParagraph):
93	"""Represents a section of document with literal text, as for examples"""
94
95	def __init__(self, subs, **kw):
96	t=[]; a=t.append
97	for s in subs: a(s.getNodeValue())
98	apply(ST.StructuredTextParagraph.__init__,
99	(self, join(t,'\n\n'), ()),
100	kw)
101
102	def getColorizableTexts(self): return ()
103	def setColorizableTexts(self, src): pass # never color examples
104
105	class StructuredTextBullet(ST.StructuredTextParagraph):
106	"""Represents a section of a document with a title and a body"""
107
108	class StructuredTextNumbered(ST.StructuredTextParagraph):
109	"""Represents a section of a document with a title and a body"""
110
111	class StructuredTextDescriptionTitle(ST.StructuredTextParagraph):
112	"""Represents a section of a document with a title and a body"""
113
114	class StructuredTextDescriptionBody(ST.StructuredTextParagraph):
115	"""Represents a section of a document with a title and a body"""
116
117	class StructuredTextDescription(ST.StructuredTextParagraph):
118	"""Represents a section of a document with a title and a body"""
119
120	def __init__(self, title, src, subs, **kw):
121	apply(ST.StructuredTextParagraph.__init__, (self, src, subs), kw)
122	self._title=title
123
124	def getColorizableTexts(self): return self._title, self._src
125	def setColorizableTexts(self, src): self._title, self._src = src
126
127	def getChildren(self):
128	return (StructuredTextDescriptionTitle(self._title),
129	StructuredTextDescriptionBody(self._src, self._subs))
130
131	class StructuredTextSectionTitle(ST.StructuredTextParagraph):
132	"""Represents a section of a document with a title and a body"""
133
134	class StructuredTextSection(ST.StructuredTextParagraph):
135	"""Represents a section of a document with a title and a body"""
136	def __init__(self, src, subs=None, **kw):
137	apply(ST.StructuredTextParagraph.__init__,
138	(self, StructuredTextSectionTitle(src), subs),
139	kw)
140
141	# a StructuredTextTable holds StructuredTextRows
142	class StructuredTextTable(ST.StructuredTextDocument):
143	"""
144	rows is a list of lists containing tuples, which
145	represent the columns/cells in each rows.
146	EX
147	rows = [[('row 1:column1',1)],[('row2:column1',1)]]
148	"""
149
150	def __init__(self, rows, src, subs, **kw):
151	apply(ST.StructuredTextDocument.__init__,(self,subs),kw)
152	self._rows = []
153	for row in rows:
154	if row:
155	self._rows.append(StructuredTextRow(row,kw))
156
157	def getRows(self):
158	return [self._rows]
159
160	def _getRows(self):
161	return self.getRows()
162
163	def getColorizableTexts(self):
164	"""
165	return a tuple where each item is a column/cell's
166	contents. The tuple, result, will be of this format.
167	("r1 col1", "r1=col2", "r2 col1", "r2 col2")
168	"""
169
170	#result = ()
171	result = []
172	for row in self._rows:
173	for column in row.getColumns()[0]:
174	#result = result[:] + (column.getColorizableTexts(),)
175	result.append(column.getColorizableTexts()[0])
176	return result
177
178	def setColorizableTexts(self,texts):
179	"""
180	texts is going to a tuple where each item is the
181	result of being mapped to the colortext function.
182	Need to insert the results appropriately into the
183	individual columns/cells
184	"""
185	for row_index in range(len(self._rows)):
186	for column_index in range(len(self._rows[row_index]._columns)):
187	self._rows[row_index]._columns[column_index].setColorizableTexts((texts[0],))
188	texts = texts[1:]
189
190	def _getColorizableTexts(self):
191	return self.getColorizableTexts()
192
193	def _setColorizableTexts(self):
194	return self.setColorizableTexts()
195
196	# StructuredTextRow holds StructuredTextColumns
197	class StructuredTextRow(ST.StructuredTextDocument):
198
199	def __init__(self,row,kw):
200	"""
201	row is a list of tuples, where each tuple is
202	the raw text for a cell/column and the span
203	of that cell/column".
204	EX
205	[('this is column one',1), ('this is column two',1)]
206	"""
207
208	apply(ST.StructuredTextDocument.__init__,(self,[]),kw)
209	self._columns = []
210	for column in row:
211	self._columns.append(StructuredTextColumn(column[0],column[1],kw))
212	def getColumns(self):
213	return [self._columns]
214
215	def _getColumns(self):
216	return [self._columns]
217
218	# this holds the raw text of a table cell
219	class StructuredTextColumn(ST.StructuredTextParagraph):
220	"""
221	StructuredTextColumn is a cell/column in a table.
222	This contains the actual text of a column and is
223	thus a StructuredTextParagraph. A StructuredTextColumn
224	also holds the span of its column
225	"""
226
227	def __init__(self,text,span,kw):
228	apply(ST.StructuredTextParagraph.__init__,(self,text,[]),kw)
229	self._span = span
230
231	def getSpan(self):
232	return self._span
233
234	def _getSpan(self):
235	return self._span
236
237	class StructuredTextMarkup(STDOM.Element):
238
239	def __init__(self, v, **kw):
240	self._value=v
241	self._attributes=kw.keys()
242	for k, v in kw.items(): setattr(self, k, v)
243
244	def getChildren(self, type=type, lt=type([])):
245	v=self._value
246	if type(v) is not lt: v=[v]
247	return v
248
249	def getColorizableTexts(self): return self._value,
250	def setColorizableTexts(self, v): self._value=v[0]
251
252	def __repr__(self):
253	return '%s(%s)' % (self.__class__.__name__, `self._value`)
254
255	class StructuredTextLiteral(StructuredTextMarkup):
256	def getColorizableTexts(self): return ()
257	def setColorizableTexts(self, v): pass
258
259	class StructuredTextEmphasis(StructuredTextMarkup): pass
260
261	class StructuredTextStrong(StructuredTextMarkup): pass
262
263	class StructuredTextInnerLink(StructuredTextMarkup): pass
264
265	class StructuredTextNamedLink(StructuredTextMarkup): pass
266
267	class StructuredTextUnderline(StructuredTextMarkup): pass
268
269	class StructuredTextLink(StructuredTextMarkup):
270	"A simple hyperlink"
271
272	class DocumentClass:
273	"""
274	Class instance calls [ex.=> x()] require a structured text
275	structure. Doc will then parse each paragraph in the structure
276	and will find the special structures within each paragraph.
277	Each special structure will be stored as an instance. Special
278	structures within another special structure are stored within
279	the 'top' structure
280	EX : '-underline this-' => would be turned into an underline
281	instance. '-underline this' would be stored as an underline
282	instance with a strong instance stored in its string
283	"""
284
285	paragraph_types = [
286	'doc_bullet',
287	'doc_numbered',
288	'doc_description',
289	'doc_header',
290	'doc_table',
291	]
292
293	text_types = [
294	'doc_href',
295	'doc_strong',
296	'doc_emphasize',
297	'doc_literal',
298	'doc_inner_link',
299	'doc_named_link',
300	'doc_underline',
301	]
302
303	def __call__(self, doc):
304	if type(doc) is type(''):
305	doc=ST.StructuredText(doc)
306	doc.setSubparagraphs(self.color_paragraphs(
307	doc.getSubparagraphs()))
308	else:
309	doc=ST.StructuredTextDocument(self.color_paragraphs(
310	doc.getSubparagraphs()))
311	return doc
312
313	def parse(self, raw_string, text_type,
314	type=type, st=type(''), lt=type([])):
315
316	"""
317	Parse accepts a raw_string, an expr to test the raw_string,
318	and the raw_string's subparagraphs.
319
320	Parse will continue to search through raw_string until
321	all instances of expr in raw_string are found.
322
323	If no instances of expr are found, raw_string is returned.
324	Otherwise a list of substrings and instances is returned
325	"""
326
327	tmp = [] # the list to be returned if raw_string is split
328	append=tmp.append
329
330	if type(text_type) is st: text_type=getattr(self, text_type)
331
332	while 1:
333	t = text_type(raw_string)
334	if not t: break
335	#an instance of expr was found
336	t, start, end = t
337
338	if start: append(raw_string[0:start])
339
340	tt=type(t)
341	if tt is st:
342	# if we get a string back, add it to text to be parsed
343	raw_string = t+raw_string[end:len(raw_string)]
344	else:
345	if tt is lt:
346	# is we get a list, append it's elements
347	tmp[len(tmp):]=t
348	else:
349	# normal case, an object
350	append(t)
351	raw_string = raw_string[end:len(raw_string)]
352
353	if not tmp: return raw_string # nothing found
354
355	if raw_string: append(raw_string)
356	elif len(tmp)==1: return tmp[0]
357
358	return tmp
359
360
361	def color_text(self, str, types=None):
362	"""Search the paragraph for each special structure
363	"""
364	if types is None: types=self.text_types
365
366	for text_type in types:
367
368	if type(str) is StringType:
369	str = self.parse(str, text_type)
370	elif type(str) is ListType:
371	r=[]; a=r.append
372	for s in str:
373	if type(s) is StringType:
374	s=self.parse(s, text_type)
375	if type(s) is ListType: r[len(r):]=s
376	else: a(s)
377	else:
378	s.setColorizableTexts(
379	map(self.color_text,
380	s.getColorizableTexts()
381	))
382	a(s)
383	str=r
384	else:
385	r=[]; a=r.append; color=self.color_text
386	for s in str.getColorizableTexts():
387	color(s, (text_type,))
388	a(s)
389
390	str.setColorizableTexts(r)
391
392	return str
393
394	def color_paragraphs(self, raw_paragraphs,
395	type=type, sequence_types=(type([]), type(())),
396	st=type('')):
397	result=[]
398	for paragraph in raw_paragraphs:
399
400	if paragraph.getNodeName() != 'StructuredTextParagraph':
401	result.append(paragraph)
402	continue
403
404	for pt in self.paragraph_types:
405	if type(pt) is st:
406	# grab the corresponding function
407	pt=getattr(self, pt)
408	# evaluate the paragraph
409	r=pt(paragraph)
410	if r:
411	if type(r) not in sequence_types:
412	r=r,
413	new_paragraphs=r
414	for paragraph in new_paragraphs:
415	paragraph.setSubparagraphs(self.color_paragraphs(paragraph.getSubparagraphs()))
416	break
417	else:
418	new_paragraphs=ST.StructuredTextParagraph(paragraph.getColorizableTexts()[0],
419	self.color_paragraphs(paragraph.getSubparagraphs()),
420	indent=paragraph.indent),
421	# color the inline StructuredText types
422	# for each StructuredTextParagraph
423	for paragraph in new_paragraphs:
424	paragraph.setColorizableTexts(
425	map(self.color_text,
426	paragraph.getColorizableTexts()
427	))
428	result.append(paragraph)
429
430	return result
431
432	def doc_table(self,paragraph, expr = re.compile('(\s*)([\|\|]+)').match):
433	text = paragraph.getColorizableTexts()[0]
434	m = expr(text)
435
436	if not (m):
437	return None
438	rows = []
439
440	# initial split
441	for row in split(text,"\n"):
442	rows.append(row)
443
444	# clean up the rows
445	for index in range(len(rows)):
446	tmp = []
447	rows[index] = strip(rows[index])
448	l = len(rows[index])-2
449	result = split(rows[index][:l],"\|\|")
450	for text in result:
451	if text:
452	tmp.append(text)
453	tmp.append('')
454	else:
455	tmp.append(text)
456	rows[index] = tmp
457	# remove trailing '''s
458	for index in range(len(rows)):
459	l = len(rows[index])-1
460	rows[index] = rows[index][:l]
461
462	result = []
463	for row in rows:
464	cspan = 0
465	tmp = []
466	for item in row:
467	if item:
468	tmp.append(item,cspan)
469	cspan = 0
470	else:
471	cspan = cspan + 1
472	result.append(tmp)
473
474	subs = paragraph.getSubparagraphs()
475	indent=paragraph.indent
476	return StructuredTextTable(result,text,subs,indent=paragraph.indent)
477
478	def doc_bullet(self, paragraph, expr = re.compile('\s[-o]\s+').match):
479	top=paragraph.getColorizableTexts()[0]
480	m=expr(top)
481
482	if not m:
483	return None
484
485	subs=paragraph.getSubparagraphs()
486	if top[-2:]=='::':
487	subs=[StructuredTextExample(subs)]
488	top=top[:-1]
489	return StructuredTextBullet(top[m.span()[1]:], subs,
490	indent=paragraph.indent,
491	bullet=top[:m.span()[1]]
492	)
493
494	def doc_numbered(
495	self, paragraph,
496	expr = re.compile('(\s[a-zA-Z]+\.)\|(\s[0-9]+\.)\|(\s*[0-9]+\s+)').match):
497
498	# This is the old expression. It had a nasty habit
499	# of grabbing paragraphs that began with a single
500	# letter word even if there was no following period.
501
502	#expr = re.compile('\s*'
503	# '(([a-zA-Z]\|[0-9]+\|[ivxlcdmIVXLCDM]+)\.)*'
504	# '([a-zA-Z]\|[0-9]+\|[ivxlcdmIVXLCDM]+)\.?'
505	# '\s+').match):
506
507	top=paragraph.getColorizableTexts()[0]
508	m=expr(top)
509	if not m: return None
510	subs=paragraph.getSubparagraphs()
511	if top[-2:]=='::':
512	subs=[StructuredTextExample(subs)]
513	top=top[:-1]
514	return StructuredTextNumbered(top[m.span()[1]:], subs,
515	indent=paragraph.indent,
516	number=top[:m.span()[1]])
517
518	def doc_description(
519	self, paragraph,
520	delim = re.compile('\s+--\s+').search,
521	nb=re.compile(r'[^\0- ]').search,
522	):
523
524	top=paragraph.getColorizableTexts()[0]
525	d=delim(top)
526	if not d: return None
527	start, end = d.span()
528	title=top[:start]
529	if find(title, '\n') >= 0: return None
530	if not nb(title): return None
531	d=top[start:end]
532	top=top[end:]
533
534	subs=paragraph.getSubparagraphs()
535	if top[-2:]=='::':
536	subs=[StructuredTextExample(subs)]
537	top=top[:-1]
538
539	return StructuredTextDescription(
540	title, top, subs,
541	indent=paragraph.indent,
542	delim=d)
543
544	def doc_header(self, paragraph,
545	expr = re.compile('[ a-zA-Z0-9.:/,-_*<>\?\'\"]+').match
546	):
547	subs=paragraph.getSubparagraphs()
548	if not subs: return None
549	top=paragraph.getColorizableTexts()[0]
550	if not strip(top): return None
551	if top[-2:]=='::':
552	subs=StructuredTextExample(subs)
553	if strip(top)=='::': return subs
554	return ST.StructuredTextParagraph(top[:-1],
555	[subs],
556	indent=paragraph.indent,
557	level=paragraph.level)
558
559	if find(top,'\n') >= 0: return None
560	return StructuredTextSection(top, subs, indent=paragraph.indent, level=paragraph.level)
561
562	def doc_literal(
563	self, s,
564	expr=re.compile(
565	"(?:\s\|^)'" # open
566	"([^ \t\n\r\f\v']\|[^ \t\n\r\f\v'][^\n']*[^ \t\n\r\f\v'])" # contents
567	"'(?:\s\|[,.;:!?]\|$)" # close
568	).search):
569
570	r=expr(s)
571	if r:
572	start, end = r.span(1)
573	return (StructuredTextLiteral(s[start:end]), start-1, end+1)
574	else:
575	return None
576
577	def doc_emphasize(
578	self, s,
579	expr = re.compile('\s\([ \na-zA-Z0-9.:/;,\'\"\?]+)\(?!\\|-)').search
580	):
581
582	r=expr(s)
583	if r:
584	start, end = r.span(1)
585	return (StructuredTextEmphasis(s[start:end]), start-1, end+1)
586	else:
587	return None
588
589	def doc_inner_link(self,
590	s,
591	expr1 = re.compile("\.\.\s*").search,
592	expr2 = re.compile("\[[a-zA-Z0-9]+\]").search):
593
594	# make sure we dont grab a named link
595	if expr2(s) and expr1(s):
596	start1,end1 = expr1(s).span()
597	start2,end2 = expr2(s).span()
598	if end1 == start2:
599	# uh-oh, looks like a named link
600	return None
601	else:
602	# the .. is somewhere else, ignore it
603	return (StructuredTextInnerLink(s[start2+1,end2-1],start2,end2))
604	return None
605	elif expr2(s) and not expr1(s):
606	start,end = expr2(s).span()
607	return (StructuredTextInnerLink(s[start+1:end-1]),start,end)
608	return None
609
610	def doc_named_link(self,
611	s,
612	expr=re.compile("(\.\.\s)(\[[a-zA-Z0-9]+\])").search):
613
614	result = expr(s)
615	if result:
616	start,end = result.span(2)
617	a,b = result.span(1)
618	str = strip(s[a:b]) + s[start:end]
619	str = s[start+1:end-1]
620	st,en = result.span()
621	return (StructuredTextNamedLink(str),st,en)
622	#return (StructuredTextNamedLink(s[st:en]),st,en)
623	return None
624
625	def doc_underline(self,
626	s,
627	expr=re.compile("\_([a-zA-Z0-9\s\.,\?\/]+)\_").search):
628
629	result = expr(s)
630	if result:
631	start,end = result.span(1)
632	st,e = result.span()
633	return (StructuredTextUnderline(s[start:end]),st,e)
634	else:
635	return None
636
637	def doc_strong(self,
638	s,
639	expr = re.compile('\s\\([ \na-zA-Z0-9.:/;\-,!\?\'\"]+)\\*').search
640	):
641
642	r=expr(s)
643	if r:
644	start, end = r.span(1)
645	return (StructuredTextStrong(s[start:end]), start-2, end+2)
646	else:
647	return None
648
649	def doc_href(
650
651	self, s,
652	expr1 = re.compile("(\"[ a-zA-Z0-9\n\-\.\,\;\(\)\/\:\/]+\")(:)([a-zA-Z0-9\:\/\.\~\-]+)([,]\s)").search,
653	expr2 = re.compile('(\"[ a-zA-Z0-9\n\-\.\:\;\(\)\/]+\")([,]+\s+)([a-zA-Z0-9\@\.\,\?\!\/\:\;\-\#]+)(\s*)').search):
654
655	#expr1=re.compile('\"([ a-zA-Z0-9.:/;,\n\~\(\)\-]+)\"'
656	# ':'
657	# '([a-zA-Z0-9.:/;,\n\~]+)(?=(\s+\|\.\|\!\|\?))'
658	# ).search,
659	#expr2=re.compile('\"([ a-zA-Z0-9./:]+)\"'
660	# ',\s+'
661	# '([ a-zA-Z0-9@.:/;]+)(?=(\s+\|\.\|\!\|\?))'
662	# ).search,
663
664	punctuation = re.compile("[\,\.\?\!\;]+").match
665	r=expr1(s) or expr2(s)
666
667	if r:
668	# need to grab the href part and the
669	# beginning part
670
671	start,e = r.span(1)
672	name = s[start:e]
673	name = replace(name,'"','',2)
674	#start = start + 1
675	st,end = r.span(3)
676	if punctuation(s[end-1:end]):
677	end = end -1
678	link = s[st:end]
679	#end = end - 1
680
681	# name is the href title, link is the target
682	# of the href
683	return (StructuredTextLink(name, href=link),
684	start, end)
685
686	#return (StructuredTextLink(s[start:end], href=s[start:end]),
687	# start, end)
688	else:
689	return None