wxPython/samples/stxview/StructuredText/DocumentClass.py

   1 ##############################################################################
   2 #
   3 # Zope Public License (ZPL) Version 1.0
   4 # -------------------------------------
   5 #
   6 # Copyright (c) Digital Creations.  All rights reserved.
   7 #
   8 # This license has been certified as Open Source(tm).
   9 #
  10 # Redistribution and use in source and binary forms, with or without
  11 # modification, are permitted provided that the following conditions are
  12 # met:
  13 #
  14 # 1. Redistributions in source code must retain the above copyright
  15 #    notice, this list of conditions, and the following disclaimer.
  16 #
  17 # 2. Redistributions in binary form must reproduce the above copyright
  18 #    notice, this list of conditions, and the following disclaimer in
  19 #    the documentation and/or other materials provided with the
  20 #    distribution.
  21 #
  22 # 3. Digital Creations requests that attribution be given to Zope
  23 #    in any manner possible. Zope includes a "Powered by Zope"
  24 #    button that is installed by default. While it is not a license
  25 #    violation to remove this button, it is requested that the
  26 #    attribution remain. A significant investment has been put
  27 #    into Zope, and this effort will continue if the Zope community
  28 #    continues to grow. This is one way to assure that growth.
  29 #
  30 # 4. All advertising materials and documentation mentioning
  31 #    features derived from or use of this software must display
  32 #    the following acknowledgement:
  33 #
  34 #       "This product includes software developed by Digital Creations
  35 #       for use in the Z Object Publishing Environment
  36 #       (http://www.zope.org/)."
  37 #
  38 #    In the event that the product being advertised includes an
  39 #    intact Zope distribution (with copyright and license included)
  40 #    then this clause is waived.
  41 #
  42 # 5. Names associated with Zope or Digital Creations must not be used to
  43 #    endorse or promote products derived from this software without
  44 #    prior written permission from Digital Creations.
  45 #
  46 # 6. Modified redistributions of any form whatsoever must retain
  47 #    the following acknowledgment:
  48 #
  49 #       "This product includes software developed by Digital Creations
  50 #       for use in the Z Object Publishing Environment
  51 #       (http://www.zope.org/)."
  52 #
  53 #    Intact (re-)distributions of any official Zope release do not
  54 #    require an external acknowledgement.
  55 #
  56 # 7. Modifications are encouraged but must be packaged separately as
  57 #    patches to official Zope releases.  Distributions that do not
  58 #    clearly separate the patches from the original work must be clearly
  59 #    labeled as unofficial distributions.  Modifications which do not
  60 #    carry the name Zope may be packaged in any form, as long as they
  61 #    conform to all of the clauses above.
  62 #
  63 #
  64 # Disclaimer
  65 #
  66 #    THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
  67 #    EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  68 #    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  69 #    PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
  70 #    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  71 #    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  72 #    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  73 #    USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  74 #    ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  75 #    OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  76 #    OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  77 #    SUCH DAMAGE.
  78 #
  79 #
  80 # This software consists of contributions made by Digital Creations and
  81 # many individuals on behalf of Digital Creations.  Specific
  82 # attributions are listed in the accompanying credits file.
  83 #
  84 ##############################################################################
  85
  86 import re, ST, STDOM
  87 from string import split, join, replace, expandtabs, strip, find, rstrip
  88 from STletters import *
  89
  90
  91 StringType=type('')
  92 ListType=type([])
  93
  94 def flatten(obj, append):
  95    if obj.getNodeType()==STDOM.TEXT_NODE:
  96       append(obj.getNodeValue())
  97    else:
  98       for child in obj.getChildNodes():
  99          flatten(child, append)
 100
 101
 102 class StructuredTextExample(ST.StructuredTextParagraph):
 103     """Represents a section of document with literal text, as for examples"""
 104
 105     def __init__(self, subs, **kw):
 106         t=[]
 107         a=t.append
 108         for s in subs:
 109             flatten(s, a)
 110         apply(ST.StructuredTextParagraph.__init__,
 111               (self, join(t,'\n\n'), ()),
 112               kw)
 113
 114     def getColorizableTexts(self): return ()
 115     def setColorizableTexts(self, src): pass # never color examples
 116
 117 class StructuredTextBullet(ST.StructuredTextParagraph):
 118     """Represents a section of a document with a title and a body"""
 119
 120 class StructuredTextNumbered(ST.StructuredTextParagraph):
 121     """Represents a section of a document with a title and a body"""
 122
 123 class StructuredTextDescriptionTitle(ST.StructuredTextParagraph):
 124     """Represents a section of a document with a title and a body"""
 125
 126 class StructuredTextDescriptionBody(ST.StructuredTextParagraph):
 127     """Represents a section of a document with a title and a body"""
 128
 129 class StructuredTextDescription(ST.StructuredTextParagraph):
 130     """Represents a section of a document with a title and a body"""
 131
 132     def __init__(self, title, src, subs, **kw):
 133        apply(ST.StructuredTextParagraph.__init__, (self, src, subs), kw)
 134        self._title=title
 135
 136     def getColorizableTexts(self): return self._title, self._src
 137     def setColorizableTexts(self, src): self._title, self._src = src
 138
 139     def getChildren(self):
 140        return (StructuredTextDescriptionTitle(self._title),
 141                StructuredTextDescriptionBody(self._src, self._subs))
 142
 143 class StructuredTextSectionTitle(ST.StructuredTextParagraph):
 144     """Represents a section of a document with a title and a body"""
 145
 146 class StructuredTextSection(ST.StructuredTextParagraph):
 147     """Represents a section of a document with a title and a body"""
 148     def __init__(self, src, subs=None, **kw):
 149        apply(ST.StructuredTextParagraph.__init__,
 150              (self, StructuredTextSectionTitle(src), subs),
 151              kw)
 152
 153     def getColorizableTexts(self):
 154         return self._src.getColorizableTexts()
 155
 156     def setColorizableTexts(self,src):
 157         self._src.setColorizableTexts(src)
 158
 159 # a StructuredTextTable holds StructuredTextRows
 160 class StructuredTextTable(ST.StructuredTextParagraph):
 161     """
 162     rows is a list of lists containing tuples, which
 163     represent the columns/cells in each rows.
 164     EX
 165     rows = [[('row 1:column1',1)],[('row2:column1',1)]]
 166     """
 167
 168     def __init__(self, rows, src, subs, **kw):
 169         apply(ST.StructuredTextParagraph.__init__,(self,subs),kw)
 170         self._rows = []
 171         for row in rows:
 172             if row:
 173                 self._rows.append(StructuredTextRow(row,kw))
 174
 175     def getRows(self):
 176         return [self._rows]
 177
 178     def _getRows(self):
 179         return self.getRows()
 180
 181     def getColumns(self):
 182         result = []
 183         for row in self._rows:
 184             result.append(row.getColumns())
 185         return result
 186
 187     def _getColumns(self):
 188         return self.getColumns()
 189
 190     def setColumns(self,columns):
 191         for index in range(len(self._rows)):
 192             self._rows[index].setColumns(columns[index])
 193
 194     def _setColumns(self,columns):
 195         return self.setColumns(columns)
 196
 197     def getColorizableTexts(self):
 198         """
 199         return a tuple where each item is a column/cell's
 200         contents. The tuple, result, will be of this format.
 201         ("r1 col1", "r1=col2", "r2 col1", "r2 col2")
 202         """
 203
 204         result = []
 205         for row in self._rows:
 206             for column in row.getColumns()[0]:
 207                 result.append(column.getColorizableTexts()[0])
 208         return result
 209
 210     def setColorizableTexts(self,texts):
 211         """
 212         texts is going to a tuple where each item is the
 213         result of being mapped to the colortext function.
 214         Need to insert the results appropriately into the
 215         individual columns/cells
 216         """
 217         for row_index in range(len(self._rows)):
 218             for column_index in range(len(self._rows[row_index]._columns)):
 219                 self._rows[row_index]._columns[column_index].setColorizableTexts((texts[0],))
 220                 texts = texts[1:]
 221
 222     def _getColorizableTexts(self):
 223         return self.getColorizableTexts()
 224
 225     def _setColorizableTexts(self):
 226         return self.setColorizableTexts()
 227
 228 # StructuredTextRow holds StructuredTextColumns
 229 class StructuredTextRow(ST.StructuredTextParagraph):
 230
 231     def __init__(self,row,kw):
 232         """
 233         row is a list of tuples, where each tuple is
 234         the raw text for a cell/column and the span
 235         of that cell/column.
 236         EX
 237         [('this is column one',1), ('this is column two',1)]
 238         """
 239
 240         apply(ST.StructuredTextParagraph.__init__,(self,[]),kw)
 241
 242         self._columns = []
 243         for column in row:
 244             self._columns.append(StructuredTextColumn(column[0],
 245                                                       column[1],
 246                                                       column[2],
 247                                                       column[3],
 248                                                       column[4],
 249                                                       kw))
 250
 251     def getColumns(self):
 252         return [self._columns]
 253
 254     def _getColumns(self):
 255         return [self._columns]
 256
 257     def setColumns(self,columns):
 258         self._columns = columns
 259
 260     def _setColumns(self,columns):
 261         return self.setColumns(columns)
 262
 263 # this holds the text of a table cell
 264 class StructuredTextColumn(ST.StructuredTextParagraph):
 265     """
 266     StructuredTextColumn is a cell/column in a table.
 267     A cell can hold multiple paragraphs. The cell
 268     is either classified as a StructuredTextTableHeader
 269     or StructuredTextTableData.
 270     """
 271
 272     def __init__(self,text,span,align,valign,typ,kw):
 273         apply(ST.StructuredTextParagraph.__init__,(self,text,[]),kw)
 274         self._span = span
 275         self._align = align
 276         self._valign = valign
 277         self._type = typ
 278
 279     def getSpan(self):
 280         return self._span
 281
 282     def _getSpan(self):
 283         return self._span
 284
 285     def getAlign(self):
 286         return self._align
 287
 288     def _getAlign(self):
 289         return self.getAlign()
 290
 291     def getValign(self):
 292         return self._valign
 293
 294     def _getValign(self):
 295         return self.getValign()
 296
 297     def getType(self):
 298         return self._type
 299
 300     def _getType(self):
 301         return self.getType()
 302
 303 class StructuredTextTableHeader(ST.StructuredTextParagraph): pass
 304
 305 class StructuredTextTableData(ST.StructuredTextParagraph): pass
 306
 307 class StructuredTextMarkup(STDOM.Element):
 308
 309     def __init__(self, v, **kw):
 310        self._value=v
 311        self._attributes=kw.keys()
 312        for k, v in kw.items(): setattr(self, k, v)
 313
 314     def getChildren(self, type=type, lt=type([])):
 315        v=self._value
 316        if type(v) is not lt: v=[v]
 317        return v
 318
 319     def getColorizableTexts(self): return self._value,
 320     def setColorizableTexts(self, v): self._value=v[0]
 321
 322     def __repr__(self):
 323        return '%s(%s)' % (self.__class__.__name__, `self._value`)
 324
 325 class StructuredTextLiteral(StructuredTextMarkup):
 326     def getColorizableTexts(self): return ()
 327     def setColorizableTexts(self, v): pass
 328
 329 class StructuredTextEmphasis(StructuredTextMarkup): pass
 330
 331 class StructuredTextStrong(StructuredTextMarkup): pass
 332
 333 class StructuredTextInnerLink(StructuredTextMarkup): pass
 334
 335 class StructuredTextNamedLink(StructuredTextMarkup): pass
 336
 337 class StructuredTextUnderline(StructuredTextMarkup): pass
 338
 339 class StructuredTextSGML(StructuredTextMarkup): pass
 340
 341 class StructuredTextLink(StructuredTextMarkup): pass
 342
 343 class StructuredTextXref(StructuredTextMarkup): pass
 344
 345 class DocumentClass:
 346     """
 347     Class instance calls [ex.=> x()] require a structured text
 348     structure. Doc will then parse each paragraph in the structure
 349     and will find the special structures within each paragraph.
 350     Each special structure will be stored as an instance. Special
 351     structures within another special structure are stored within
 352     the 'top' structure
 353     EX : '-underline this-' => would be turned into an underline
 354     instance. '-underline **this**' would be stored as an underline
 355     instance with a strong instance stored in its string
 356     """
 357
 358     paragraph_types  = [
 359         'doc_bullet',
 360         'doc_numbered',
 361         'doc_description',
 362         'doc_header',
 363         'doc_table',
 364         ]
 365
 366     #'doc_inner_link',
 367     #'doc_named_link',
 368     #'doc_underline',
 369     text_types = [
 370         'doc_sgml',
 371         'doc_href',
 372         'doc_strong',
 373         'doc_emphasize',
 374         'doc_literal',
 375         'doc_sgml',
 376         'doc_xref',
 377         ]
 378
 379     def __call__(self, doc):
 380         if type(doc) is type(''):
 381            doc=ST.StructuredText(doc)
 382            doc.setSubparagraphs(self.color_paragraphs(
 383               doc.getSubparagraphs()))
 384         else:
 385            doc=ST.StructuredTextDocument(self.color_paragraphs(
 386               doc.getSubparagraphs()))
 387         return doc
 388
 389     def parse(self, raw_string, text_type,
 390               type=type, st=type(''), lt=type([])):
 391
 392        """
 393        Parse accepts a raw_string, an expr to test the raw_string,
 394        and the raw_string's subparagraphs.
 395
 396        Parse will continue to search through raw_string until
 397        all instances of expr in raw_string are found.
 398
 399        If no instances of expr are found, raw_string is returned.
 400        Otherwise a list of substrings and instances is returned
 401        """
 402
 403        tmp = []    # the list to be returned if raw_string is split
 404        append=tmp.append
 405
 406        if type(text_type) is st: text_type=getattr(self, text_type)
 407
 408        while 1:
 409           t = text_type(raw_string)
 410           if not t: break
 411           #an instance of expr was found
 412           t, start, end    = t
 413
 414           if start: append(raw_string[0:start])
 415
 416           tt=type(t)
 417           if tt is st:
 418              # if we get a string back, add it to text to be parsed
 419              raw_string = t+raw_string[end:len(raw_string)]
 420           else:
 421              if tt is lt:
 422                 # is we get a list, append it's elements
 423                 tmp[len(tmp):]=t
 424              else:
 425                 # normal case, an object
 426                 append(t)
 427              raw_string = raw_string[end:len(raw_string)]
 428
 429        if not tmp: return raw_string # nothing found
 430
 431        if raw_string: append(raw_string)
 432        elif len(tmp)==1: return tmp[0]
 433
 434        return tmp
 435
 436
 437     def color_text(self, str, types=None):
 438        """Search the paragraph for each special structure
 439        """
 440        if types is None: types=self.text_types
 441
 442        for text_type in types:
 443
 444           if type(str) is StringType:
 445              str = self.parse(str, text_type)
 446           elif type(str) is ListType:
 447              r=[]; a=r.append
 448              for s in str:
 449                 if type(s) is StringType:
 450                     s=self.parse(s, text_type)
 451                     if type(s) is ListType: r[len(r):]=s
 452                     else: a(s)
 453                 else:
 454                     s.setColorizableTexts(
 455                        map(self.color_text,
 456                            s.getColorizableTexts()
 457                            ))
 458                     a(s)
 459              str=r
 460           else:
 461              r=[]; a=r.append; color=self.color_text
 462              for s in str.getColorizableTexts():
 463                 color(s, (text_type,))
 464                 a(s)
 465
 466              str.setColorizableTexts(r)
 467
 468        return str
 469
 470     def color_paragraphs(self, raw_paragraphs,
 471                            type=type, sequence_types=(type([]), type(())),
 472                            st=type('')):
 473        result=[]
 474        for paragraph in raw_paragraphs:
 475           if paragraph.getNodeName() != 'StructuredTextParagraph':
 476              result.append(paragraph)
 477              continue
 478
 479           for pt in self.paragraph_types:
 480              if type(pt) is st:
 481                 # grab the corresponding function
 482                 pt=getattr(self, pt)
 483              # evaluate the paragraph
 484              r=pt(paragraph)
 485              if r:
 486                 if type(r) not in sequence_types:
 487                     r=r,
 488                 new_paragraphs=r
 489                 for paragraph in new_paragraphs:
 490                     paragraph.setSubparagraphs(self.color_paragraphs(paragraph.getSubparagraphs()))
 491                 break
 492           else:
 493              new_paragraphs=ST.StructuredTextParagraph(paragraph.getColorizableTexts()[0],
 494                                                        self.color_paragraphs(paragraph.getSubparagraphs()),
 495                                                        indent=paragraph.indent),
 496
 497           # color the inline StructuredText types
 498           # for each StructuredTextParagraph
 499           for paragraph in new_paragraphs:
 500
 501              if paragraph.getNodeName() is "StructuredTextTable":
 502                 cells = paragraph.getColumns()
 503                 text = paragraph.getColorizableTexts()
 504                 text = map(ST.StructuredText,text)
 505                 text = map(self.__call__,text)
 506                 for t in range(len(text)):
 507                     text[t] = text[t].getSubparagraphs()
 508                 paragraph.setColorizableTexts(text)
 509
 510              paragraph.setColorizableTexts(
 511                 map(self.color_text,
 512                     paragraph.getColorizableTexts()
 513                     ))
 514              result.append(paragraph)
 515
 516        return result
 517
 518     def doc_table(self, paragraph, expr = re.compile(r'\s*\|[-]+\|').match):
 519         text    = paragraph.getColorizableTexts()[0]
 520         m       = expr(text)
 521
 522         subs = paragraph.getSubparagraphs()
 523
 524         if not (m):
 525             return None
 526         rows = []
 527
 528         spans   = []
 529         ROWS    = []
 530         COLS    = []
 531         indexes = []
 532         ignore  = []
 533
 534         TDdivider   = re.compile("[\-]+").match
 535         THdivider   = re.compile("[\=]+").match
 536         col         = re.compile('\|').search
 537         innertable  = re.compile('\|([-]+|[=]+)\|').search
 538
 539         text = strip(text)
 540         rows = split(text,'\n')
 541         foo  = ""
 542
 543         for row in range(len(rows)):
 544             rows[row] = strip(rows[row])
 545
 546         # have indexes store if a row is a divider
 547         # or a cell part
 548         for index in range(len(rows)):
 549             tmpstr = rows[index][1:len(rows[index])-1]
 550             if TDdivider(tmpstr):
 551                 indexes.append("TDdivider")
 552             elif THdivider(tmpstr):
 553                 indexes.append("THdivider")
 554             else:
 555                 indexes.append("cell")
 556
 557         for index in range(len(indexes)):
 558             if indexes[index] is "TDdivider" or indexes[index] is THdivider:
 559                 ignore = [] # reset ignore
 560                 #continue    # skip dividers
 561
 562             tmp     = strip(rows[index])    # clean the row up
 563             tmp     = tmp[1:len(tmp)-1]     # remove leading + trailing |
 564             offset  = 0
 565
 566             # find the start and end of inner
 567             # tables. ignore everything between
 568             if innertable(tmp):
 569                 tmpstr = strip(tmp)
 570                 while innertable(tmpstr):
 571                     start,end   = innertable(tmpstr).span()
 572                     if not (start,end-1) in ignore:
 573                         ignore.append(start,end-1)
 574                     tmpstr = " " + tmpstr[end:]
 575
 576             # find the location of column dividers
 577             # NOTE: |'s in inner tables do not count
 578             #   as column dividers
 579             if col(tmp):
 580                 while col(tmp):
 581                     bar         = 1   # true if start is not in ignore
 582                     start,end   = col(tmp).span()
 583
 584                     if not start+offset in spans:
 585                         for s,e in ignore:
 586                             if start+offset >= s or start+offset <= e:
 587                                 bar = None
 588                                 break
 589                         if bar:   # start is clean
 590                             spans.append(start+offset)
 591                     if not bar:
 592                         foo = foo + tmp[:end]
 593                         tmp = tmp[end:]
 594                         offset = offset + end
 595                     else:
 596                         COLS.append((foo + tmp[0:start],start+offset))
 597                         foo = ""
 598                         tmp = " " + tmp[end:]
 599                         offset = offset + start
 600             if not offset+len(tmp) in spans:
 601                 spans.append(offset+len(tmp))
 602             COLS.append((foo + tmp,offset+len(tmp)))
 603             foo = ""
 604             ROWS.append(COLS)
 605             COLS = []
 606
 607         spans.sort()
 608         ROWS = ROWS[1:len(ROWS)]
 609
 610         # find each column span
 611         cols    = []
 612         tmp     = []
 613
 614         for row in ROWS:
 615             for c in row:
 616                 tmp.append(c[1])
 617             cols.append(tmp)
 618             tmp = []
 619
 620         cur = 1
 621         tmp = []
 622         C   = []
 623         for col in cols:
 624             for span in spans:
 625                 if not span in col:
 626                     cur = cur + 1
 627                 else:
 628                     tmp.append(cur)
 629                     cur = 1
 630             C.append(tmp)
 631             tmp = []
 632
 633         for index in range(len(C)):
 634             for i in range(len(C[index])):
 635                 ROWS[index][i] = (ROWS[index][i][0],C[index][i])
 636         rows = ROWS
 637
 638         # label things as either TableData or
 639         # Table header
 640         TD  = []
 641         TH  = []
 642         all = []
 643         for index in range(len(indexes)):
 644             if indexes[index] is "TDdivider":
 645                 TD.append(index)
 646                 all.append(index)
 647             if indexes[index] is "THdivider":
 648                 TH.append(index)
 649                 all.append(index)
 650         TD = TD[1:]
 651         dividers = all[1:]
 652         #print "TD  => ", TD
 653         #print "TH  => ", TH
 654         #print "all => ", all, "\n"
 655
 656         for div in dividers:
 657             if div in TD:
 658                 index = all.index(div)
 659                 for rowindex in range(all[index-1],all[index]):
 660                     for i in range(len(rows[rowindex])):
 661                         rows[rowindex][i] = (rows[rowindex][i][0],
 662                                              rows[rowindex][i][1],
 663                                              "td")
 664             else:
 665                 index = all.index(div)
 666                 for rowindex in range(all[index-1],all[index]):
 667                     for i in range(len(rows[rowindex])):
 668                         rows[rowindex][i] = (rows[rowindex][i][0],
 669                                              rows[rowindex][i][1],
 670                                              "th")
 671
 672         # now munge the multi-line cells together
 673         # as paragraphs
 674         ROWS    = []
 675         COLS    = []
 676         for row in rows:
 677             for index in range(len(row)):
 678                 if not COLS:
 679                     COLS = range(len(row))
 680                     for i in range(len(COLS)):
 681                         COLS[i] = ["",1,""]
 682                 if TDdivider(row[index][0]) or THdivider(row[index][0]):
 683                     ROWS.append(COLS)
 684                     COLS = []
 685                 else:
 686                     COLS[index][0] = COLS[index][0] + (row[index][0]) + "\n"
 687                     COLS[index][1] = row[index][1]
 688                     COLS[index][2] = row[index][2]
 689
 690         # now that each cell has been munged together,
 691         # determine the cell's alignment.
 692         # Default is to center. Also determine the cell's
 693         # vertical alignment, top, middle, bottom. Default is
 694         # to middle
 695         rows = []
 696         cols = []
 697         for row in ROWS:
 698             for index in range(len(row)):
 699                 topindent       = 0
 700                 bottomindent    = 0
 701                 leftindent      = 0
 702                 rightindent     = 0
 703                 left            = []
 704                 right           = []
 705                 text            = row[index][0]
 706                 text            = split(text,'\n')
 707                 text            = text[:len(text)-1]
 708                 align           = ""
 709                 valign          = ""
 710                 for t in text:
 711                     t = strip(t)
 712                     if not t:
 713                         topindent = topindent + 1
 714                     else:
 715                         break
 716                 text.reverse()
 717                 for t in text:
 718                     t = strip(t)
 719                     if not t:
 720                         bottomindent = bottomindent + 1
 721                     else:
 722                         break
 723                 text.reverse()
 724                 tmp   = join(text[topindent:len(text)-bottomindent],"\n")
 725                 pars  = re.compile("\n\s*\n").split(tmp)
 726                 for par in pars:
 727                     if index > 0:
 728                         par = par[1:]
 729                     par = split(par, ' ')
 730                     for p in par:
 731                         if not p:
 732                             leftindent = leftindent+1
 733                         else:
 734                             break
 735                     left.append(leftindent)
 736                     leftindent = 0
 737                     par.reverse()
 738                     for p in par:
 739                         if not p:
 740                             rightindent = rightindent + 1
 741                         else:
 742                             break
 743                     right.append(rightindent)
 744                     rightindent = 0
 745                 left.sort()
 746                 right.sort()
 747
 748                 if topindent == bottomindent:
 749                     valign="middle"
 750                 elif topindent < 1:
 751                     valign="top"
 752                 elif bottomindent < 1:
 753                     valign="bottom"
 754                 else:
 755                     valign="middle"
 756
 757                 if left[0] < 1:
 758                     align = "left"
 759                 elif right[0] < 1:
 760                     align = "right"
 761                 elif left[0] > 1 and right[0] > 1:
 762                     align="center"
 763                 else:
 764                     align="left"
 765
 766                 cols.append(row[index][0],row[index][1],align,valign,row[index][2])
 767             rows.append(cols)
 768             cols = []
 769         return StructuredTextTable(rows,text,subs,indent=paragraph.indent)
 770
 771     def doc_bullet(self, paragraph, expr = re.compile(r'\s*[-*o]\s+').match):
 772         top=paragraph.getColorizableTexts()[0]
 773         m=expr(top)
 774
 775         if not m:
 776             return None
 777
 778         subs=paragraph.getSubparagraphs()
 779         if top[-2:]=='::':
 780            subs=[StructuredTextExample(subs)]
 781            top=top[:-1]
 782         return StructuredTextBullet(top[m.span()[1]:], subs,
 783                                      indent=paragraph.indent,
 784                                      bullet=top[:m.span()[1]]
 785                                      )
 786
 787     def doc_numbered(
 788         self, paragraph,
 789         expr = re.compile(r'(\s*[%s]+\.)|(\s*[0-9]+\.)|(\s*[0-9]+\s+)' % letters).match):
 790
 791         # This is the old expression. It had a nasty habit
 792         # of grabbing paragraphs that began with a single
 793         # letter word even if there was no following period.
 794
 795         #expr = re.compile('\s*'
 796         #                   '(([a-zA-Z]|[0-9]+|[ivxlcdmIVXLCDM]+)\.)*'
 797         #                   '([a-zA-Z]|[0-9]+|[ivxlcdmIVXLCDM]+)\.?'
 798         #                   '\s+').match):
 799
 800         top=paragraph.getColorizableTexts()[0]
 801         m=expr(top)
 802         if not m: return None
 803         subs=paragraph.getSubparagraphs()
 804         if top[-2:]=='::':
 805            subs=[StructuredTextExample(subs)]
 806            top=top[:-1]
 807         return StructuredTextNumbered(top[m.span()[1]:], subs,
 808                                         indent=paragraph.indent,
 809                                         number=top[:m.span()[1]])
 810
 811     def doc_description(
 812         self, paragraph,
 813         delim = re.compile(r'\s+--\s+').search,
 814         nb=re.compile(r'[^\000- ]').search,
 815         ):
 816
 817         top=paragraph.getColorizableTexts()[0]
 818         d=delim(top)
 819         if not d: return None
 820         start, end = d.span()
 821         title=top[:start]
 822         if find(title, '\n') >= 0: return None
 823         if not nb(title): return None
 824         d=top[start:end]
 825         top=top[end:]
 826
 827         subs=paragraph.getSubparagraphs()
 828         if top[-2:]=='::':
 829            subs=[StructuredTextExample(subs)]
 830            top=top[:-1]
 831
 832         return StructuredTextDescription(
 833            title, top, subs,
 834            indent=paragraph.indent,
 835            delim=d)
 836
 837     def doc_header(self, paragraph,
 838                     expr    = re.compile(r'[ %s0-9.:/,-_*<>\?\'\"]+' % letters).match
 839                     ):
 840         subs=paragraph.getSubparagraphs()
 841         if not subs: return None
 842         top=paragraph.getColorizableTexts()[0]
 843         if not strip(top): return None
 844         if top[-2:]=='::':
 845            subs=StructuredTextExample(subs)
 846            if strip(top)=='::': return subs
 847            return ST.StructuredTextParagraph(
 848               top[:-1], [subs], indent=paragraph.indent)
 849
 850         if find(top,'\n') >= 0: return None
 851         return StructuredTextSection(top, subs, indent=paragraph.indent)
 852
 853     def doc_literal(
 854         self, s,
 855         expr=re.compile(
 856           r"(?:\s|^)'"                                                  # open
 857           r"([^ \t\n\r\f\v']|[^ \t\n\r\f\v'][^\n']*[^ \t\n\r\f\v'])" # contents
 858           r"'(?:\s|[,.;:!?]|$)"                                        # close
 859           ).search):
 860
 861         r=expr(s)
 862         if r:
 863            start, end = r.span(1)
 864            return (StructuredTextLiteral(s[start:end]), start-1, end+1)
 865         else:
 866            return None
 867
 868     def doc_emphasize(
 869         self, s,
 870         expr = re.compile(r'\s*\*([ \n%s0-9]+)\*(?!\*|-)' % lettpunc).search
 871         ):
 872
 873         r=expr(s)
 874         if r:
 875            start, end = r.span(1)
 876            return (StructuredTextEmphasis(s[start:end]), start-1, end+1)
 877         else:
 878            return None
 879
 880     def doc_inner_link(self,
 881                        s,
 882                        expr1 = re.compile(r"\.\.\s*").search,
 883                        expr2 = re.compile(r"\[[%s0-9]+\]" % letters ).search):
 884
 885         # make sure we dont grab a named link
 886         if expr2(s) and expr1(s):
 887             start1,end1 = expr1(s).span()
 888             start2,end2 = expr2(s).span()
 889             if end1 == start2:
 890                 # uh-oh, looks like a named link
 891                 return None
 892             else:
 893                 # the .. is somewhere else, ignore it
 894                 return (StructuredTextInnerLink(s[start2+1,end2-1],start2,end2))
 895             return None
 896         elif expr2(s) and not expr1(s):
 897             start,end = expr2(s).span()
 898             return (StructuredTextInnerLink(s[start+1:end-1]),start,end)
 899         return None
 900
 901     def doc_named_link(self,
 902                        s,
 903                        expr=re.compile(r"(\.\.\s)(\[[%s0-9]+\])" % letters).search):
 904
 905         result = expr(s)
 906         if result:
 907             start,end   = result.span(2)
 908             a,b = result.span(1)
 909             str = strip(s[a:b]) + s[start:end]
 910             st,en       = result.span()
 911             return (StructuredTextNamedLink(str),st,en)
 912             #return (StructuredTextNamedLink(s[st:en]),st,en)
 913         return None
 914
 915     def doc_underline(self,
 916                       s,
 917                       expr=re.compile(r"\s+\_([%s0-9\s]+)\_" % lettpunc).search):
 918
 919         result = expr(s)
 920         if result:
 921             start,end = result.span(1)
 922             st,e = result.span()
 923             return (StructuredTextUnderline(s[start:end]),st,e)
 924         else:
 925             return None
 926
 927     def doc_strong(self,
 928                    s,
 929         expr = re.compile(r'\s*\*\*([ \n%s0-9]+)\*\*' % lettpunc).search
 930         ):
 931
 932         r=expr(s)
 933         if r:
 934            start, end = r.span(1)
 935            return (StructuredTextStrong(s[start:end]), start-2, end+2)
 936         else:
 937            return None
 938
 939     ## Some constants to make the doc_href() regex easier to read.
 940     _DQUOTEDTEXT = r'("[%s0-9\n\-\.\,\;\(\)\/\:\/\*\']+")'  % letters ## double quoted text
 941     _URL_AND_PUNC = r'([%s0-9\@\.\,\?\!\/\:\;\-\#\~]+)' % letters
 942     _SPACES = r'(\s*)'
 943
 944     def doc_href(self, s,
 945                  expr1 = re.compile(_DQUOTEDTEXT + "(:)" + _URL_AND_PUNC + _SPACES).search,
 946                  expr2 = re.compile(_DQUOTEDTEXT + r'(\,\s+)' + _URL_AND_PUNC + _SPACES).search):
 947
 948         punctuation = re.compile(r"[\,\.\?\!\;]+").match
 949         r=expr1(s) or expr2(s)
 950
 951         if r:
 952             # need to grab the href part and the
 953             # beginning part
 954
 955             start,e = r.span(1)
 956             name    = s[start:e]
 957             name    = replace(name,'"','',2)
 958             #start   = start + 1
 959             st,end   = r.span(3)
 960             if punctuation(s[end-1:end]):
 961                 end = end -1
 962             link    = s[st:end]
 963             #end     = end - 1
 964
 965             # name is the href title, link is the target
 966             # of the href
 967             return (StructuredTextLink(name, href=link),
 968                     start, end)
 969
 970             #return (StructuredTextLink(s[start:end], href=s[start:end]),
 971             #        start, end)
 972         else:
 973             return None
 974
 975     def doc_sgml(self,s,expr=re.compile(r"\<[%s0-9\.\=\'\"\:\/\-\#\+\s\*]+\>" % letters).search):
 976         """
 977         SGML text is ignored and outputed as-is
 978         """
 979         r = expr(s)
 980         if r:
 981             start,end = r.span()
 982             text = s[start:end]
 983             return (StructuredTextSGML(text),start,end)
 984
 985
 986     def doc_xref(self, s,
 987         expr = re.compile('\[([%s0-9\-.:/;,\n\~]+)\]' % letters).search
 988         ):
 989         r = expr(s)
 990         if r:
 991             start, end = r.span(1)
 992             return (StructuredTextXref(s[start:end]), start-1, end+1)
 993         else:
 994             return None
 995
 996
 997
 998