wxPython/samples/stxview/StructuredText/StructuredText.py

   1 #! /usr/bin/env python -- # -*- python -*-
   2 ##############################################################################
   3 #
   4 # Zope Public License (ZPL) Version 1.0
   5 # -------------------------------------
   6 #
   7 # Copyright (c) Digital Creations.  All rights reserved.
   8 #
   9 # This license has been certified as Open Source(tm).
  10 #
  11 # Redistribution and use in source and binary forms, with or without
  12 # modification, are permitted provided that the following conditions are
  13 # met:
  14 #
  15 # 1. Redistributions in source code must retain the above copyright
  16 #    notice, this list of conditions, and the following disclaimer.
  17 #
  18 # 2. Redistributions in binary form must reproduce the above copyright
  19 #    notice, this list of conditions, and the following disclaimer in
  20 #    the documentation and/or other materials provided with the
  21 #    distribution.
  22 #
  23 # 3. Digital Creations requests that attribution be given to Zope
  24 #    in any manner possible. Zope includes a "Powered by Zope"
  25 #    button that is installed by default. While it is not a license
  26 #    violation to remove this button, it is requested that the
  27 #    attribution remain. A significant investment has been put
  28 #    into Zope, and this effort will continue if the Zope community
  29 #    continues to grow. This is one way to assure that growth.
  30 #
  31 # 4. All advertising materials and documentation mentioning
  32 #    features derived from or use of this software must display
  33 #    the following acknowledgement:
  34 #
  35 #      "This product includes software developed by Digital Creations
  36 #      for use in the Z Object Publishing Environment
  37 #      (http://www.zope.org/)."
  38 #
  39 #    In the event that the product being advertised includes an
  40 #    intact Zope distribution (with copyright and license included)
  41 #    then this clause is waived.
  42 #
  43 # 5. Names associated with Zope or Digital Creations must not be used to
  44 #    endorse or promote products derived from this software without
  45 #    prior written permission from Digital Creations.
  46 #
  47 # 6. Modified redistributions of any form whatsoever must retain
  48 #    the following acknowledgment:
  49 #
  50 #      "This product includes software developed by Digital Creations
  51 #      for use in the Z Object Publishing Environment
  52 #      (http://www.zope.org/)."
  53 #
  54 #    Intact (re-)distributions of any official Zope release do not
  55 #    require an external acknowledgement.
  56 #
  57 # 7. Modifications are encouraged but must be packaged separately as
  58 #    patches to official Zope releases.  Distributions that do not
  59 #    clearly separate the patches from the original work must be clearly
  60 #    labeled as unofficial distributions.  Modifications which do not
  61 #    carry the name Zope may be packaged in any form, as long as they
  62 #    conform to all of the clauses above.
  63 #
  64 #
  65 # Disclaimer
  66 #
  67 #   THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
  68 #   EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  69 #   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  70 #   PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
  71 #   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  72 #   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  73 #   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  74 #   USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  75 #   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  76 #   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  77 #   OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  78 #   SUCH DAMAGE.
  79 #
  80 #
  81 # This software consists of contributions made by Digital Creations and
  82 # many individuals on behalf of Digital Creations.  Specific
  83 # attributions are listed in the accompanying credits file.
  84 #
  85 ##############################################################################
  86 '''Structured Text Manipulation
  87
  88 Parse a structured text string into a form that can be used with
  89 structured formats, like html.
  90
  91 Structured text is text that uses indentation and simple
  92 symbology to indicate the structure of a document.
  93
  94 A structured string consists of a sequence of paragraphs separated by
  95 one or more blank lines.  Each paragraph has a level which is defined
  96 as the minimum indentation of the paragraph.  A paragraph is a
  97 sub-paragraph of another paragraph if the other paragraph is the last
  98 preceding paragraph that has a lower level.
  99
 100 Special symbology is used to indicate special constructs:
 101
 102 - A single-line paragraph whose immediately succeeding paragraphs are lower
 103   level is treated as a header.
 104
 105 - A paragraph that begins with a '-', '*', or 'o' is treated as an
 106   unordered list (bullet) element.
 107
 108 - A paragraph that begins with a sequence of digits followed by a
 109   white-space character is treated as an ordered list element.
 110
 111 - A paragraph that begins with a sequence of sequences, where each
 112   sequence is a sequence of digits or a sequence of letters followed
 113   by a period, is treated as an ordered list element.
 114
 115 - A paragraph with a first line that contains some text, followed by
 116   some white-space and '--' is treated as
 117   a descriptive list element. The leading text is treated as the
 118   element title.
 119
 120 - Sub-paragraphs of a paragraph that ends in the word 'example' or the
 121   word 'examples', or '::' is treated as example code and is output as is.
 122
 123 - Text enclosed single quotes (with white-space to the left of the
 124   first quote and whitespace or puctuation to the right of the second quote)
 125   is treated as example code.
 126
 127 - Text surrounded by '*' characters (with white-space to the left of the
 128   first '*' and whitespace or puctuation to the right of the second '*')
 129   is emphasized.
 130
 131 - Text surrounded by '**' characters (with white-space to the left of the
 132   first '**' and whitespace or puctuation to the right of the second '**')
 133   is made strong.
 134
 135 - Text surrounded by '_' underscore characters (with whitespace to the left
 136   and whitespace or punctuation to the right) is made underlined.
 137
 138 - Text encloded by double quotes followed by a colon, a URL, and concluded
 139   by punctuation plus white space, *or* just white space, is treated as a
 140   hyper link. For example:
 141
 142     "Zope":http://www.zope.org/ is ...
 143
 144   Is interpreted as '<a href="http://www.zope.org/">Zope</a> is ....'
 145   Note: This works for relative as well as absolute URLs.
 146
 147 - Text enclosed by double quotes followed by a comma, one or more spaces,
 148   an absolute URL and concluded by punctuation plus white space, or just
 149   white space, is treated as a hyper link. For example:
 150
 151     "mail me", mailto:amos@digicool.com.
 152
 153   Is interpreted as '<a href="mailto:amos@digicool.com">mail me</a>.'
 154
 155 - Text enclosed in brackets which consists only of letters, digits,
 156   underscores and dashes is treated as hyper links within the document.
 157   For example:
 158
 159     As demonstrated by Smith [12] this technique is quite effective.
 160
 161   Is interpreted as '... by Smith <a href="#12">[12]</a> this ...'. Together
 162   with the next rule this allows easy coding of references or end notes.
 163
 164 - Text enclosed in brackets which is preceded by the start of a line, two
 165   periods and a space is treated as a named link. For example:
 166
 167     .. [12] "Effective Techniques" Smith, Joe ...
 168
 169   Is interpreted as '<a name="12">[12]</a> "Effective Techniques" ...'.
 170   Together with the previous rule this allows easy coding of references or
 171   end notes.
 172
 173
 174 - A paragraph that has blocks of text enclosed in '||' is treated as a
 175   table. The text blocks correspond to table cells and table rows are
 176   denoted by newlines. By default the cells are center aligned. A cell
 177   can span more than one column by preceding a block of text with an
 178   equivalent number of cell separators '||'. Newlines and '|' cannot
 179   be a part of the cell text. For example:
 180
 181       |||| **Ingredients** ||
 182       || *Name* || *Amount* ||
 183       ||Spam||10||
 184       ||Eggs||3||
 185
 186   is interpreted as::
 187
 188     <TABLE BORDER=1 CELLPADDING=2>
 189      <TR>
 190       <TD ALIGN=CENTER COLSPAN=2> <strong>Ingredients</strong> </TD>
 191      </TR>
 192      <TR>
 193       <TD ALIGN=CENTER COLSPAN=1> <em>Name</em> </TD>
 194       <TD ALIGN=CENTER COLSPAN=1> <em>Amount</em> </TD>
 195      </TR>
 196      <TR>
 197       <TD ALIGN=CENTER COLSPAN=1>Spam</TD>
 198       <TD ALIGN=CENTER COLSPAN=1>10</TD>
 199      </TR>
 200      <TR>
 201       <TD ALIGN=CENTER COLSPAN=1>Eggs</TD>
 202       <TD ALIGN=CENTER COLSPAN=1>3</TD>
 203      </TR>
 204     </TABLE>
 205
 206
 207 $Id$'''
 208 #     Copyright
 209 #
 210 #       Copyright 1996 Digital Creations, L.C., 910 Princess Anne
 211 #       Street, Suite 300, Fredericksburg, Virginia 22401 U.S.A. All
 212 #       rights reserved.  Copyright in this software is owned by DCLC,
 213 #       unless otherwise indicated. Permission to use, copy and
 214 #       distribute this software is hereby granted, provided that the
 215 #       above copyright notice appear in all copies and that both that
 216 #       copyright notice and this permission notice appear. Note that
 217 #       any product, process or technology described in this software
 218 #       may be the subject of other Intellectual Property rights
 219 #       reserved by Digital Creations, L.C. and are not licensed
 220 #       hereunder.
 221 #
 222 #     Trademarks
 223 #
 224 #       Digital Creations & DCLC, are trademarks of Digital Creations, L.C..
 225 #       All other trademarks are owned by their respective companies.
 226 #
 227 #     No Warranty
 228 #
 229 #       The software is provided "as is" without warranty of any kind,
 230 #       either express or implied, including, but not limited to, the
 231 #       implied warranties of merchantability, fitness for a particular
 232 #       purpose, or non-infringement. This software could include
 233 #       technical inaccuracies or typographical errors. Changes are
 234 #       periodically made to the software; these changes will be
 235 #       incorporated in new editions of the software. DCLC may make
 236 #       improvements and/or changes in this software at any time
 237 #       without notice.
 238 #
 239 #     Limitation Of Liability
 240 #
 241 #       In no event will DCLC be liable for direct, indirect, special,
 242 #       incidental, economic, cover, or consequential damages arising
 243 #       out of the use of or inability to use this software even if
 244 #       advised of the possibility of such damages. Some states do not
 245 #       allow the exclusion or limitation of implied warranties or
 246 #       limitation of liability for incidental or consequential
 247 #       damages, so the above limitation or exclusion may not apply to
 248 #       you.
 249 #
 250 #
 251 # If you have questions regarding this software,
 252 # contact:
 253 #
 254 #   Jim Fulton, jim@digicool.com
 255 #
 256 #   (540) 371-6909
 257 #
 258 # $Log$
 259 # Revision 1.1  2001/03/10 05:07:20  RD
 260 # Added some simple sample apps
 261 #
 262 # Revision 1.27  2000/04/21 13:38:10  jim
 263 # Added closing list tags. Woo hoo!
 264 #
 265 # Revision 1.26  2000/03/14 17:22:04  brian
 266 # Allow ~ in hrefs.
 267 #
 268 # Revision 1.25  2000/02/17 00:53:24  klm
 269 # HTML._str(): We were getting preformatted examples rendered twice,
 270 # second time without preformatting.  Problem was a missing 'continue'
 271 # in one of the cases.
 272 #
 273 # Revision 1.24  1999/12/13 16:32:48  klm
 274 # Incorporated pavlos christoforou's mods to handle simple tables.  From
 275 # his web page at http://www.zope.org/Members/gaaros/StructuredText:
 276 #
 277 #   Structured Text module with table support
 278 #
 279 #   A paragraph that has blocks of text enclosed in '||' is treated as a
 280 #   table. The text blocks correspond to table cells and table rows are
 281 #   denoted by newlines. By default the cells are center aligned. You can
 282 #   change the defaults by modifying the CELL,ROW and TABLE class
 283 #   attributes in class Table. A cell can span more than one column by
 284 #   preceding a block of text with an equivalent number of cell separators
 285 #   '||'. Newlines and '|' cannot be a part of the cell text. If you need
 286 #   newlines use <BR>. For example:
 287 #
 288 #        |||| **Ingredients** ||
 289 #        || *Name* || *Amount* ||
 290 #        ||Spam||10||
 291 #        ||Eggs||3||
 292 #
 293 # Revision 1.23  1999/08/03 20:49:05  jim
 294 # Fixed to allow list elements to introduce examples.
 295 #
 296 # Restructured _str using continue to avoid excessive nesting.
 297 #
 298 # Revision 1.22  1999/08/02 22:01:28  jim
 299 # Fixed a bunch of bugs introduced by making ts_regex actually thread
 300 # safe.
 301 #
 302 # Also localized a bunch of regular expressions
 303 # using "static" variables (aka always default arguments).
 304 #
 305 # Revision 1.21  1999/08/02 13:26:52  jim
 306 # paragraph_divider needs to be a regular (thread-unsafe) regex
 307 # since it gets passed to ts_regex.split, which is thread-safe
 308 # and wants to use regs.
 309 #
 310 # Revision 1.20  1999/07/21 13:33:59  jim
 311 # untabified.
 312 #
 313 # Revision 1.19  1999/07/15 16:43:15  jim
 314 # Checked in Scott Robertson's thread-safety fixes.
 315 #
 316 # Revision 1.18  1999/03/24 00:03:18  klm
 317 # Provide for relative links, eg <a href="file_in_same_dir">whatever</a>,
 318 # as:
 319 #
 320 #   "whatever", :file_in_same_dir
 321 #
 322 # or
 323 #
 324 #   "whatever"::file_in_same_dir
 325 #
 326 # .__init__(): relax the second gsub, using a '*' instead of a '+', so
 327 # the stuff before the ':' can be missing, and also do postprocessing so
 328 # any resulting '<a href=":file_in_same_dir">'s have the superfluous ':'
 329 # removed.  *Seems* good!
 330 #
 331 # Revision 1.17  1999/03/12 23:21:39  klm
 332 # Gratuituous checkin to test my cvs *update* logging hook.
 333 #
 334 # Revision 1.16  1999/03/12 17:12:12  klm
 335 # Added support for underlined elements, in the obvious way (and
 336 # included an entry in the module docstring for it).
 337 #
 338 # Added an entry in the module docstring describing what i *guess* is
 339 # the criterion for identifying header elements.  (I'm going to have to
 340 # delve into and understand the framework a bit better before *knowing*
 341 # this is the case.)
 342 #
 343 # Revision 1.15  1999/03/11 22:40:18  klm
 344 # Handle links that include '#' named links.
 345 #
 346 # Revision 1.14  1999/03/11 01:35:19  klm
 347 # Fixed a small typo, and refined the module docstring link example, in
 348 # order to do a checkin to exercise the CVS repository mirroring.  Might
 349 # as well include my last checkin message, with some substantial stuff:
 350 #
 351 # Links are now recognized whether or not the candidate strings are
 352 # terminated with punctuation before the trailing whitespace.  The old
 353 # form - trailing punctuation then whitespace - is preserved, but the
 354 # punctuation is now unnecessary.
 355 #
 356 # The regular expressions are a bit more complicated, but i've factored
 357 # out the common parts and but them in variables with suggestive names,
 358 # which may make them easier to understand.
 359 #
 360 # Revision 1.13  1999/03/11 00:49:57  klm
 361 # Links are now recognized whether or not the candidate strings are
 362 # terminated with punctuation before the trailing whitespace.  The old
 363 # form - trailing punctuation then whitespace - is preserved, but the
 364 # punctuation is now unnecessary.
 365 #
 366 # The regular expressions are a bit more complicated, but i've factored
 367 # out the common parts and but them in variables with suggestive names,
 368 # which may make them easier to understand.
 369 #
 370 # Revision 1.12  1999/03/10 00:15:46  klm
 371 # Committing with version 1.0 of the license.
 372 #
 373 # Revision 1.11  1999/02/08 18:13:12  klm
 374 # Trival checkin (spelling fix "preceedeing" -> "preceding" and similar)
 375 # to see what pitfalls my environment presents to accomplishing a
 376 # successful checkin.  (It turns out that i can't do it from aldous because
 377 # the new version of cvs doesn't support the '-t' and '-f' options in the
 378 # cvswrappers file...)
 379 #
 380 # Revision 1.10  1998/12/29 22:30:43  amos
 381 # Improved doc string to describe hyper link and references capabilities.
 382 #
 383 # Revision 1.9  1998/12/04 20:15:31  jim
 384 # Detabification and new copyright.
 385 #
 386 # Revision 1.8  1998/02/27 18:45:22  jim
 387 # Various updates, including new indentation utilities.
 388 #
 389 # Revision 1.7  1997/12/12 15:39:54  jim
 390 # Added level as argument for html_with_references.
 391 #
 392 # Revision 1.6  1997/12/12 15:27:25  jim
 393 # Added additional pattern matching for HTML references.
 394 #
 395 # Revision 1.5  1997/03/08 16:01:03  jim
 396 # Moved code to recognize: "foo bar", url.
 397 # into object initializer, so it gets applied in all cases.
 398 #
 399 # Revision 1.4  1997/02/17 23:36:35  jim
 400 # Added support for "foo title", http:/foohost/foo
 401 #
 402 # Revision 1.3  1996/12/06 15:57:37  jim
 403 # Fixed bugs in character tags.
 404 #
 405 # Added -t command-line option to generate title if:
 406 #
 407 #    - The first paragraph is one line (i.e. a heading) and
 408 #
 409 #    - All other paragraphs are indented.
 410 #
 411 # Revision 1.2  1996/10/28 13:56:02  jim
 412 # Fixed bug in ordered lists.
 413 # Added option for either HTML-style headings or descriptive-list style
 414 # headings.
 415 #
 416 # Revision 1.1  1996/10/23 14:00:45  jim
 417 # *** empty log message ***
 418 #
 419 #
 420 #
 421
 422 import ts_regex, regex
 423 from ts_regex import gsub
 424 from string import split, join, strip, find
 425
 426 def untabify(aString,
 427              indent_tab=ts_regex.compile('\(\n\|^\)\( *\)\t').search_group,
 428              ):
 429     '''\
 430     Convert indentation tabs to spaces.
 431     '''
 432     result=''
 433     rest=aString
 434     while 1:
 435         ts_results = indent_tab(rest, (1,2))
 436         if ts_results:
 437             start, grps = ts_results
 438             lnl=len(grps[0])
 439             indent=len(grps[1])
 440             result=result+rest[:start]
 441             rest="\n%s%s" % (' ' * ((indent/8+1)*8),
 442                              rest[start+indent+1+lnl:])
 443         else:
 444             return result+rest
 445
 446 def indent(aString, indent=2):
 447     """Indent a string the given number of spaces"""
 448     r=split(untabify(aString),'\n')
 449     if not r: return ''
 450     if not r[-1]: del r[-1]
 451     tab=' '*level
 452     return "%s%s\n" % (tab,join(r,'\n'+tab))
 453
 454 def reindent(aString, indent=2, already_untabified=0):
 455     "reindent a block of text, so that the minimum indent is as given"
 456
 457     if not already_untabified: aString=untabify(aString)
 458
 459     l=indent_level(aString)[0]
 460     if indent==l: return aString
 461
 462     r=[]
 463
 464     append=r.append
 465
 466     if indent > l:
 467         tab=' ' * (indent-l)
 468         for s in split(aString,'\n'): append(tab+s)
 469     else:
 470         l=l-indent
 471         for s in split(aString,'\n'): append(s[l:])
 472
 473     return join(r,'\n')
 474
 475 def indent_level(aString,
 476                  indent_space=ts_regex.compile('\n\( *\)').search_group,
 477                  ):
 478     '''\
 479     Find the minimum indentation for a string, not counting blank lines.
 480     '''
 481     start=0
 482     text='\n'+aString
 483     indent=l=len(text)
 484     while 1:
 485
 486         ts_results = indent_space(text, (1,2), start)
 487         if ts_results:
 488             start, grps = ts_results
 489             i=len(grps[0])
 490             start=start+i+1
 491             if start < l and text[start] != '\n':       # Skip blank lines
 492                 if not i: return (0,aString)
 493                 if i < indent: indent = i
 494         else:
 495             return (indent,aString)
 496
 497 def paragraphs(list,start):
 498     l=len(list)
 499     level=list[start][0]
 500     i=start+1
 501     while i < l and list[i][0] > level: i=i+1
 502     return i-1-start
 503
 504 def structure(list):
 505     if not list: return []
 506     i=0
 507     l=len(list)
 508     r=[]
 509     while i < l:
 510         sublen=paragraphs(list,i)
 511         i=i+1
 512         r.append((list[i-1][1],structure(list[i:i+sublen])))
 513         i=i+sublen
 514     return r
 515
 516
 517 class Table:
 518     CELL='  <TD ALIGN=CENTER COLSPAN=%i>%s</TD>\n'
 519     ROW=' <TR>\n%s </TR>\n'
 520     TABLE='\n<TABLE BORDER=1 CELLPADDING=2>\n%s</TABLE>'
 521
 522     def create(self,aPar,td=ts_regex.compile(
 523         '[ \t\n]*||\([^\0|]*\)').match_group):
 524         '''parses a table and returns nested list representing the
 525         table'''
 526         self.table=[]
 527         text=filter(None,split(aPar,'\n'))
 528         for line in text:
 529             row=[]
 530             while 1:
 531                 pos=td(line,(1,))
 532                 if not pos:return 0
 533                 row.append(pos[1])
 534                 if pos[0]==len(line):break
 535                 line=line[pos[0]:]
 536             self.table.append(row)
 537         return 1
 538
 539     def html(self):
 540         '''Creates an HTML representation of table'''
 541         htmltable=[]
 542         for row in self.table:
 543             htmlrow=[]
 544             colspan=1
 545             for cell in row:
 546                 if cell=='':
 547                     colspan=colspan+1
 548                     continue
 549                 else:
 550                     htmlrow.append(self.CELL%(colspan,cell))
 551                     colspan=1
 552             htmltable.append(self.ROW%join(htmlrow,''))
 553         return self.TABLE%join(htmltable,'')
 554
 555 optional_trailing_punctuation = '\(,\|\([.:?;]\)\)?'
 556 trailing_space = '\([\0- ]\)'
 557 not_punctuation_or_whitespace = "[^-,.?:\0- ]"
 558 table=Table()
 559
 560 class StructuredText:
 561
 562     """Model text as structured collection of paragraphs.
 563
 564     Structure is implied by the indentation level.
 565
 566     This class is intended as a base classes that do actual text
 567     output formatting.
 568     """
 569
 570     def __init__(self, aStructuredString, level=0,
 571                  paragraph_divider=regex.compile('\(\n *\)+\n'),
 572                  ):
 573         '''Convert a structured text string into a structured text object.
 574
 575         Aguments:
 576
 577           aStructuredString -- The string to be parsed.
 578           level -- The level of top level headings to be created.
 579         '''
 580
 581         aStructuredString = gsub(
 582             '\"\([^\"\0]+\)\":'         # title: <"text":>
 583             + ('\([-:a-zA-Z0-9_,./?=@#~]+%s\)'
 584                % not_punctuation_or_whitespace)
 585             + optional_trailing_punctuation
 586             + trailing_space,
 587             '<a href="\\2">\\1</a>\\4\\5\\6',
 588             aStructuredString)
 589
 590         aStructuredString = gsub(
 591             '\"\([^\"\0]+\)\",[\0- ]+'            # title: <"text", >
 592             + ('\([a-zA-Z]*:[-:a-zA-Z0-9_,./?=@#~]*%s\)'
 593                % not_punctuation_or_whitespace)
 594             + optional_trailing_punctuation
 595             + trailing_space,
 596             '<a href="\\2">\\1</a>\\4\\5\\6',
 597             aStructuredString)
 598
 599         protoless = find(aStructuredString, '<a href=":')
 600         if protoless != -1:
 601             aStructuredString = gsub('<a href=":', '<a href="',
 602                                      aStructuredString)
 603
 604         self.level=level
 605         paragraphs=ts_regex.split(untabify(aStructuredString),
 606                                   paragraph_divider)
 607         paragraphs=map(indent_level,paragraphs)
 608
 609         self.structure=structure(paragraphs)
 610
 611
 612     def __str__(self):
 613         return str(self.structure)
 614
 615
 616 ctag_prefix="\([\0- (]\|^\)"
 617 ctag_suffix="\([\0- ,.:;!?)]\|$\)"
 618 ctag_middle="[%s]\([^\0- %s][^%s]*[^\0- %s]\|[^%s]\)[%s]"
 619 ctag_middl2="[%s][%s]\([^\0- %s][^%s]*[^\0- %s]\|[^%s]\)[%s][%s]"
 620
 621 def ctag(s,
 622          em=regex.compile(
 623              ctag_prefix+(ctag_middle % (("*",)*6) )+ctag_suffix),
 624          strong=regex.compile(
 625              ctag_prefix+(ctag_middl2 % (("*",)*8))+ctag_suffix),
 626          under=regex.compile(
 627              ctag_prefix+(ctag_middle % (("_",)*6) )+ctag_suffix),
 628          code=regex.compile(
 629              ctag_prefix+(ctag_middle % (("\'",)*6))+ctag_suffix),
 630          ):
 631     if s is None: s=''
 632     s=gsub(strong,'\\1<strong>\\2</strong>\\3',s)
 633     s=gsub(under, '\\1<u>\\2</u>\\3',s)
 634     s=gsub(code,  '\\1<code>\\2</code>\\3',s)
 635     s=gsub(em,    '\\1<em>\\2</em>\\3',s)
 636     return s
 637
 638 class HTML(StructuredText):
 639
 640     '''\
 641     An HTML structured text formatter.
 642     '''\
 643
 644     def __str__(self,
 645                 extra_dl=regex.compile("</dl>\n<dl>"),
 646                 extra_ul=regex.compile("</ul>\n<ul>"),
 647                 extra_ol=regex.compile("</ol>\n<ol>"),
 648                 ):
 649         '''\
 650         Return an HTML string representation of the structured text data.
 651
 652         '''
 653         s=self._str(self.structure,self.level)
 654         s=gsub(extra_dl,'\n',s)
 655         s=gsub(extra_ul,'\n',s)
 656         s=gsub(extra_ol,'\n',s)
 657         return s
 658
 659     def ul(self, before, p, after):
 660         if p: p="<p>%s</p>" % strip(ctag(p))
 661         return ('%s<ul><li>%s\n%s\n</li></ul>\n'
 662                 % (before,p,after))
 663
 664     def ol(self, before, p, after):
 665         if p: p="<p>%s</p>" % strip(ctag(p))
 666         return ('%s<ol><li>%s\n%s\n</li></ol>\n'
 667                 % (before,p,after))
 668
 669     def dl(self, before, t, d, after):
 670         return ('%s<dl><dt>%s</dt><dd><p>%s</p>\n%s\n</dd></dl>\n'
 671                 % (before,ctag(t),ctag(d),after))
 672
 673     def head(self, before, t, level, d):
 674         if level > 0 and level < 6:
 675             return ('%s<h%d>%s</h%d>\n%s\n'
 676                     % (before,level,strip(ctag(t)),level,d))
 677
 678         t="<p><strong>%s</strong><p>" % strip(ctag(t))
 679         return ('%s<dl><dt>%s\n</dt><dd>%s\n</dd></dl>\n'
 680                 % (before,t,d))
 681
 682     def normal(self,before,p,after):
 683         return '%s<p>%s</p>\n%s\n' % (before,ctag(p),after)
 684
 685     def pre(self,structure,tagged=0):
 686         if not structure: return ''
 687         if tagged:
 688             r=''
 689         else:
 690             r='<PRE>\n'
 691         for s in structure:
 692             r="%s%s\n\n%s" % (r,html_quote(s[0]),self.pre(s[1],1))
 693         if not tagged: r=r+'</PRE>\n'
 694         return r
 695
 696     def table(self,before,table,after):
 697         return '%s<p>%s</p>\n%s\n' % (before,ctag(table),after)
 698
 699     def _str(self,structure,level,
 700              # Static
 701              bullet=ts_regex.compile('[ \t\n]*[o*-][ \t\n]+\([^\0]*\)'
 702                                      ).match_group,
 703              example=ts_regex.compile('[\0- ]examples?:[\0- ]*$'
 704                                       ).search,
 705              dl=ts_regex.compile('\([^\n]+\)[ \t]+--[ \t\n]+\([^\0]*\)'
 706                                  ).match_group,
 707              nl=ts_regex.compile('\n').search,
 708              ol=ts_regex.compile(
 709                  '[ \t]*\(\([0-9]+\|[a-zA-Z]+\)[.)]\)+[ \t\n]+\([^\0]*\|$\)'
 710                  ).match_group,
 711              olp=ts_regex.compile('[ \t]*([0-9]+)[ \t\n]+\([^\0]*\|$\)'
 712                                   ).match_group,
 713              ):
 714         r=''
 715         for s in structure:
 716
 717             ts_results = bullet(s[0], (1,))
 718             if ts_results:
 719                 p = ts_results[1]
 720                 if s[0][-2:]=='::' and s[1]: ps=self.pre(s[1])
 721                 else: ps=self._str(s[1],level)
 722                 r=self.ul(r,p,ps)
 723                 continue
 724             ts_results = ol(s[0], (3,))
 725             if ts_results:
 726                 p = ts_results[1]
 727                 if s[0][-2:]=='::' and s[1]: ps=self.pre(s[1])
 728                 else: ps=self._str(s[1],level)
 729                 r=self.ol(r,p,ps)
 730                 continue
 731             ts_results = olp(s[0], (1,))
 732             if ts_results:
 733                 p = ts_results[1]
 734                 if s[0][-2:]=='::' and s[1]: ps=self.pre(s[1])
 735                 else: ps=self._str(s[1],level)
 736                 r=self.ol(r,p,ps)
 737                 continue
 738             ts_results = dl(s[0], (1,2))
 739             if ts_results:
 740                 t,d = ts_results[1]
 741                 r=self.dl(r,t,d,self._str(s[1],level))
 742                 continue
 743             if example(s[0]) >= 0 and s[1]:
 744                 # Introduce an example, using pre tags:
 745                 r=self.normal(r,s[0],self.pre(s[1]))
 746                 continue
 747             if s[0][-2:]=='::' and s[1]:
 748                 # Introduce an example, using pre tags:
 749                 r=self.normal(r,s[0][:-1],self.pre(s[1]))
 750                 continue
 751             if table.create(s[0]):
 752                 ## table support.
 753                 r=self.table(r,table.html(),self._str(s[1],level))
 754                 continue
 755             else:
 756
 757                 if nl(s[0]) < 0 and s[1] and s[0][-1:] != ':':
 758                     # Treat as a heading
 759                     t=s[0]
 760                     r=self.head(r,t,level,
 761                                 self._str(s[1],level and level+1))
 762                 else:
 763                     r=self.normal(r,s[0],self._str(s[1],level))
 764         return r
 765
 766
 767 def html_quote(v,
 768                character_entities=(
 769                        (regex.compile('&'), '&amp;'),
 770                        (regex.compile("<"), '&lt;' ),
 771                        (regex.compile(">"), '&gt;' ),
 772                        (regex.compile('"'), '&quot;')
 773                        )): #"
 774         text=str(v)
 775         for re,name in character_entities:
 776             text=gsub(re,name,text)
 777         return text
 778
 779 def html_with_references(text, level=1):
 780     text = gsub(
 781         '[\0\n].. \[\([-_0-9_a-zA-Z-]+\)\]',
 782         '\n  <a name="\\1">[\\1]</a>',
 783         text)
 784
 785     text = gsub(
 786         '\([\0- ,]\)\[\([0-9_a-zA-Z-]+\)\]\([\0- ,.:]\)',
 787         '\\1<a href="#\\2">[\\2]</a>\\3',
 788         text)
 789
 790     text = gsub(
 791         '\([\0- ,]\)\[\([^]]+\)\.html\]\([\0- ,.:]\)',
 792         '\\1<a href="\\2.html">[\\2]</a>\\3',
 793         text)
 794
 795     return HTML(text,level=level)
 796
 797
 798 def main():
 799     import sys, getopt
 800
 801     opts,args=getopt.getopt(sys.argv[1:],'tw')
 802
 803     if args:
 804         [infile]=args
 805         s=open(infile,'r').read()
 806     else:
 807         s=sys.stdin.read()
 808
 809     if opts:
 810
 811         if filter(lambda o: o[0]=='-w', opts):
 812             print 'Content-Type: text/html\n'
 813
 814         if s[:2]=='#!':
 815             s=ts_regex.sub('^#![^\n]+','',s)
 816
 817         r=ts_regex.compile('\([\0-\n]*\n\)')
 818         ts_results = r.match_group(s, (1,))
 819         if ts_results:
 820             s=s[len(ts_results[1]):]
 821         s=str(html_with_references(s))
 822         if s[:4]=='<h1>':
 823             t=s[4:find(s,'</h1>')]
 824             s='''<html><head><title>%s</title>
 825             </head><body>
 826             %s
 827             </body></html>
 828             ''' % (t,s)
 829         print s
 830     else:
 831         print html_with_references(s)
 832
 833 if __name__=="__main__": main()