X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/c12bc4de5887421242de7f619b3c5e265bf631ac..5737d05f8665dfd3f62dd8e536b317f1f48202c5:/wxPython/samples/stxview/StructuredText/StructuredText.py diff --git a/wxPython/samples/stxview/StructuredText/StructuredText.py b/wxPython/samples/stxview/StructuredText/StructuredText.py index a1b3fd03ad..2408f2331c 100644 --- a/wxPython/samples/stxview/StructuredText/StructuredText.py +++ b/wxPython/samples/stxview/StructuredText/StructuredText.py @@ -1,4 +1,3 @@ -#! /usr/bin/env python -- # -*- python -*- ############################################################################## # # Zope Public License (ZPL) Version 1.0 @@ -83,751 +82,67 @@ # attributions are listed in the accompanying credits file. # ############################################################################## -'''Structured Text Manipulation -Parse a structured text string into a form that can be used with -structured formats, like html. +""" Alias module for StructuredTextClassic compatibility which makes +use of StructuredTextNG """ -Structured text is text that uses indentation and simple -symbology to indicate the structure of a document. -A structured string consists of a sequence of paragraphs separated by -one or more blank lines. Each paragraph has a level which is defined -as the minimum indentation of the paragraph. A paragraph is a -sub-paragraph of another paragraph if the other paragraph is the last -preceding paragraph that has a lower level. +import HTMLClass, DocumentClass, ClassicDocumentClass +from ST import Basic -Special symbology is used to indicate special constructs: +import re, string,sys +from STletters import letters -- A single-line paragraph whose immediately succeeding paragraphs are lower - level is treated as a header. +Document = ClassicDocumentClass.DocumentClass() +HTMLNG = HTMLClass.HTMLClass() -- A paragraph that begins with a '-', '*', or 'o' is treated as an - unordered list (bullet) element. +def HTML(aStructuredString, level=0): + st = Basic(aStructuredString) + doc = Document(st) + return HTMLNG(doc) -- A paragraph that begins with a sequence of digits followed by a - white-space character is treated as an ordered list element. +def StructuredText(aStructuredString, level=0): + return HTML(aStructuredString,level) -- A paragraph that begins with a sequence of sequences, where each - sequence is a sequence of digits or a sequence of letters followed - by a period, is treated as an ordered list element. - -- A paragraph with a first line that contains some text, followed by - some white-space and '--' is treated as - a descriptive list element. The leading text is treated as the - element title. - -- Sub-paragraphs of a paragraph that ends in the word 'example' or the - word 'examples', or '::' is treated as example code and is output as is. - -- Text enclosed single quotes (with white-space to the left of the - first quote and whitespace or puctuation to the right of the second quote) - is treated as example code. - -- Text surrounded by '*' characters (with white-space to the left of the - first '*' and whitespace or puctuation to the right of the second '*') - is emphasized. - -- Text surrounded by '**' characters (with white-space to the left of the - first '**' and whitespace or puctuation to the right of the second '**') - is made strong. - -- Text surrounded by '_' underscore characters (with whitespace to the left - and whitespace or punctuation to the right) is made underlined. - -- Text encloded by double quotes followed by a colon, a URL, and concluded - by punctuation plus white space, *or* just white space, is treated as a - hyper link. For example: - - "Zope":http://www.zope.org/ is ... - - Is interpreted as 'Zope is ....' - Note: This works for relative as well as absolute URLs. - -- Text enclosed by double quotes followed by a comma, one or more spaces, - an absolute URL and concluded by punctuation plus white space, or just - white space, is treated as a hyper link. For example: - - "mail me", mailto:amos@digicool.com. - - Is interpreted as 'mail me.' - -- Text enclosed in brackets which consists only of letters, digits, - underscores and dashes is treated as hyper links within the document. - For example: - - As demonstrated by Smith [12] this technique is quite effective. - - Is interpreted as '... by Smith [12] this ...'. Together - with the next rule this allows easy coding of references or end notes. - -- Text enclosed in brackets which is preceded by the start of a line, two - periods and a space is treated as a named link. For example: - - .. [12] "Effective Techniques" Smith, Joe ... - - Is interpreted as '[12] "Effective Techniques" ...'. - Together with the previous rule this allows easy coding of references or - end notes. - - -- A paragraph that has blocks of text enclosed in '||' is treated as a - table. The text blocks correspond to table cells and table rows are - denoted by newlines. By default the cells are center aligned. A cell - can span more than one column by preceding a block of text with an - equivalent number of cell separators '||'. Newlines and '|' cannot - be a part of the cell text. For example: - - |||| **Ingredients** || - || *Name* || *Amount* || - ||Spam||10|| - ||Eggs||3|| - - is interpreted as:: - - - - - - - - - - - - - - - - - -
Ingredients
Name Amount
Spam10
Eggs3
- - -$Id$''' -# Copyright -# -# Copyright 1996 Digital Creations, L.C., 910 Princess Anne -# Street, Suite 300, Fredericksburg, Virginia 22401 U.S.A. All -# rights reserved. Copyright in this software is owned by DCLC, -# unless otherwise indicated. Permission to use, copy and -# distribute this software is hereby granted, provided that the -# above copyright notice appear in all copies and that both that -# copyright notice and this permission notice appear. Note that -# any product, process or technology described in this software -# may be the subject of other Intellectual Property rights -# reserved by Digital Creations, L.C. and are not licensed -# hereunder. -# -# Trademarks -# -# Digital Creations & DCLC, are trademarks of Digital Creations, L.C.. -# All other trademarks are owned by their respective companies. -# -# No Warranty -# -# The software is provided "as is" without warranty of any kind, -# either express or implied, including, but not limited to, the -# implied warranties of merchantability, fitness for a particular -# purpose, or non-infringement. This software could include -# technical inaccuracies or typographical errors. Changes are -# periodically made to the software; these changes will be -# incorporated in new editions of the software. DCLC may make -# improvements and/or changes in this software at any time -# without notice. -# -# Limitation Of Liability -# -# In no event will DCLC be liable for direct, indirect, special, -# incidental, economic, cover, or consequential damages arising -# out of the use of or inability to use this software even if -# advised of the possibility of such damages. Some states do not -# allow the exclusion or limitation of implied warranties or -# limitation of liability for incidental or consequential -# damages, so the above limitation or exclusion may not apply to -# you. -# -# -# If you have questions regarding this software, -# contact: -# -# Jim Fulton, jim@digicool.com -# -# (540) 371-6909 -# -# $Log$ -# Revision 1.1 2001/03/10 05:07:20 RD -# Added some simple sample apps -# -# Revision 1.27 2000/04/21 13:38:10 jim -# Added closing list tags. Woo hoo! -# -# Revision 1.26 2000/03/14 17:22:04 brian -# Allow ~ in hrefs. -# -# Revision 1.25 2000/02/17 00:53:24 klm -# HTML._str(): We were getting preformatted examples rendered twice, -# second time without preformatting. Problem was a missing 'continue' -# in one of the cases. -# -# Revision 1.24 1999/12/13 16:32:48 klm -# Incorporated pavlos christoforou's mods to handle simple tables. From -# his web page at http://www.zope.org/Members/gaaros/StructuredText: -# -# Structured Text module with table support -# -# A paragraph that has blocks of text enclosed in '||' is treated as a -# table. The text blocks correspond to table cells and table rows are -# denoted by newlines. By default the cells are center aligned. You can -# change the defaults by modifying the CELL,ROW and TABLE class -# attributes in class Table. A cell can span more than one column by -# preceding a block of text with an equivalent number of cell separators -# '||'. Newlines and '|' cannot be a part of the cell text. If you need -# newlines use
. For example: -# -# |||| **Ingredients** || -# || *Name* || *Amount* || -# ||Spam||10|| -# ||Eggs||3|| -# -# Revision 1.23 1999/08/03 20:49:05 jim -# Fixed to allow list elements to introduce examples. -# -# Restructured _str using continue to avoid excessive nesting. -# -# Revision 1.22 1999/08/02 22:01:28 jim -# Fixed a bunch of bugs introduced by making ts_regex actually thread -# safe. -# -# Also localized a bunch of regular expressions -# using "static" variables (aka always default arguments). -# -# Revision 1.21 1999/08/02 13:26:52 jim -# paragraph_divider needs to be a regular (thread-unsafe) regex -# since it gets passed to ts_regex.split, which is thread-safe -# and wants to use regs. -# -# Revision 1.20 1999/07/21 13:33:59 jim -# untabified. -# -# Revision 1.19 1999/07/15 16:43:15 jim -# Checked in Scott Robertson's thread-safety fixes. -# -# Revision 1.18 1999/03/24 00:03:18 klm -# Provide for relative links, eg whatever, -# as: -# -# "whatever", :file_in_same_dir -# -# or -# -# "whatever"::file_in_same_dir -# -# .__init__(): relax the second gsub, using a '*' instead of a '+', so -# the stuff before the ':' can be missing, and also do postprocessing so -# any resulting ''s have the superfluous ':' -# removed. *Seems* good! -# -# Revision 1.17 1999/03/12 23:21:39 klm -# Gratuituous checkin to test my cvs *update* logging hook. -# -# Revision 1.16 1999/03/12 17:12:12 klm -# Added support for underlined elements, in the obvious way (and -# included an entry in the module docstring for it). -# -# Added an entry in the module docstring describing what i *guess* is -# the criterion for identifying header elements. (I'm going to have to -# delve into and understand the framework a bit better before *knowing* -# this is the case.) -# -# Revision 1.15 1999/03/11 22:40:18 klm -# Handle links that include '#' named links. -# -# Revision 1.14 1999/03/11 01:35:19 klm -# Fixed a small typo, and refined the module docstring link example, in -# order to do a checkin to exercise the CVS repository mirroring. Might -# as well include my last checkin message, with some substantial stuff: -# -# Links are now recognized whether or not the candidate strings are -# terminated with punctuation before the trailing whitespace. The old -# form - trailing punctuation then whitespace - is preserved, but the -# punctuation is now unnecessary. -# -# The regular expressions are a bit more complicated, but i've factored -# out the common parts and but them in variables with suggestive names, -# which may make them easier to understand. -# -# Revision 1.13 1999/03/11 00:49:57 klm -# Links are now recognized whether or not the candidate strings are -# terminated with punctuation before the trailing whitespace. The old -# form - trailing punctuation then whitespace - is preserved, but the -# punctuation is now unnecessary. -# -# The regular expressions are a bit more complicated, but i've factored -# out the common parts and but them in variables with suggestive names, -# which may make them easier to understand. -# -# Revision 1.12 1999/03/10 00:15:46 klm -# Committing with version 1.0 of the license. -# -# Revision 1.11 1999/02/08 18:13:12 klm -# Trival checkin (spelling fix "preceedeing" -> "preceding" and similar) -# to see what pitfalls my environment presents to accomplishing a -# successful checkin. (It turns out that i can't do it from aldous because -# the new version of cvs doesn't support the '-t' and '-f' options in the -# cvswrappers file...) -# -# Revision 1.10 1998/12/29 22:30:43 amos -# Improved doc string to describe hyper link and references capabilities. -# -# Revision 1.9 1998/12/04 20:15:31 jim -# Detabification and new copyright. -# -# Revision 1.8 1998/02/27 18:45:22 jim -# Various updates, including new indentation utilities. -# -# Revision 1.7 1997/12/12 15:39:54 jim -# Added level as argument for html_with_references. -# -# Revision 1.6 1997/12/12 15:27:25 jim -# Added additional pattern matching for HTML references. -# -# Revision 1.5 1997/03/08 16:01:03 jim -# Moved code to recognize: "foo bar", url. -# into object initializer, so it gets applied in all cases. -# -# Revision 1.4 1997/02/17 23:36:35 jim -# Added support for "foo title", http:/foohost/foo -# -# Revision 1.3 1996/12/06 15:57:37 jim -# Fixed bugs in character tags. -# -# Added -t command-line option to generate title if: -# -# - The first paragraph is one line (i.e. a heading) and -# -# - All other paragraphs are indented. -# -# Revision 1.2 1996/10/28 13:56:02 jim -# Fixed bug in ordered lists. -# Added option for either HTML-style headings or descriptive-list style -# headings. -# -# Revision 1.1 1996/10/23 14:00:45 jim -# *** empty log message *** -# -# -# - -import ts_regex, regex -from ts_regex import gsub -from string import split, join, strip, find - -def untabify(aString, - indent_tab=ts_regex.compile('\(\n\|^\)\( *\)\t').search_group, - ): - '''\ - Convert indentation tabs to spaces. - ''' - result='' - rest=aString - while 1: - ts_results = indent_tab(rest, (1,2)) - if ts_results: - start, grps = ts_results - lnl=len(grps[0]) - indent=len(grps[1]) - result=result+rest[:start] - rest="\n%s%s" % (' ' * ((indent/8+1)*8), - rest[start+indent+1+lnl:]) - else: - return result+rest - -def indent(aString, indent=2): - """Indent a string the given number of spaces""" - r=split(untabify(aString),'\n') - if not r: return '' - if not r[-1]: del r[-1] - tab=' '*level - return "%s%s\n" % (tab,join(r,'\n'+tab)) - -def reindent(aString, indent=2, already_untabified=0): - "reindent a block of text, so that the minimum indent is as given" - - if not already_untabified: aString=untabify(aString) - - l=indent_level(aString)[0] - if indent==l: return aString - - r=[] - - append=r.append - - if indent > l: - tab=' ' * (indent-l) - for s in split(aString,'\n'): append(tab+s) - else: - l=l-indent - for s in split(aString,'\n'): append(s[l:]) - - return join(r,'\n') - -def indent_level(aString, - indent_space=ts_regex.compile('\n\( *\)').search_group, - ): - '''\ - Find the minimum indentation for a string, not counting blank lines. - ''' - start=0 - text='\n'+aString - indent=l=len(text) - while 1: - - ts_results = indent_space(text, (1,2), start) - if ts_results: - start, grps = ts_results - i=len(grps[0]) - start=start+i+1 - if start < l and text[start] != '\n': # Skip blank lines - if not i: return (0,aString) - if i < indent: indent = i - else: - return (indent,aString) - -def paragraphs(list,start): - l=len(list) - level=list[start][0] - i=start+1 - while i < l and list[i][0] > level: i=i+1 - return i-1-start - -def structure(list): - if not list: return [] - i=0 - l=len(list) - r=[] - while i < l: - sublen=paragraphs(list,i) - i=i+1 - r.append((list[i-1][1],structure(list[i:i+sublen]))) - i=i+sublen - return r - - -class Table: - CELL=' %s\n' - ROW=' \n%s \n' - TABLE='\n\n%s
' - - def create(self,aPar,td=ts_regex.compile( - '[ \t\n]*||\([^\0|]*\)').match_group): - '''parses a table and returns nested list representing the - table''' - self.table=[] - text=filter(None,split(aPar,'\n')) - for line in text: - row=[] - while 1: - pos=td(line,(1,)) - if not pos:return 0 - row.append(pos[1]) - if pos[0]==len(line):break - line=line[pos[0]:] - self.table.append(row) - return 1 - - def html(self): - '''Creates an HTML representation of table''' - htmltable=[] - for row in self.table: - htmlrow=[] - colspan=1 - for cell in row: - if cell=='': - colspan=colspan+1 - continue - else: - htmlrow.append(self.CELL%(colspan,cell)) - colspan=1 - htmltable.append(self.ROW%join(htmlrow,'')) - return self.TABLE%join(htmltable,'') - -optional_trailing_punctuation = '\(,\|\([.:?;]\)\)?' -trailing_space = '\([\0- ]\)' -not_punctuation_or_whitespace = "[^-,.?:\0- ]" -table=Table() - -class StructuredText: - - """Model text as structured collection of paragraphs. - - Structure is implied by the indentation level. - - This class is intended as a base classes that do actual text - output formatting. - """ - - def __init__(self, aStructuredString, level=0, - paragraph_divider=regex.compile('\(\n *\)+\n'), - ): - '''Convert a structured text string into a structured text object. - - Aguments: - - aStructuredString -- The string to be parsed. - level -- The level of top level headings to be created. - ''' - - aStructuredString = gsub( - '\"\([^\"\0]+\)\":' # title: <"text":> - + ('\([-:a-zA-Z0-9_,./?=@#~]+%s\)' - % not_punctuation_or_whitespace) - + optional_trailing_punctuation - + trailing_space, - '
\\1\\4\\5\\6', - aStructuredString) - - aStructuredString = gsub( - '\"\([^\"\0]+\)\",[\0- ]+' # title: <"text", > - + ('\([a-zA-Z]*:[-:a-zA-Z0-9_,./?=@#~]*%s\)' - % not_punctuation_or_whitespace) - + optional_trailing_punctuation - + trailing_space, - '\\1\\4\\5\\6', - aStructuredString) - - protoless = find(aStructuredString, '\\2\\3',s) - s=gsub(under, '\\1\\2\\3',s) - s=gsub(code, '\\1\\2\\3',s) - s=gsub(em, '\\1\\2\\3',s) - return s - -class HTML(StructuredText): - - '''\ - An HTML structured text formatter. - '''\ - - def __str__(self, - extra_dl=regex.compile("\n
"), - extra_ul=regex.compile("\n