]>
Commit | Line | Data |
---|---|---|
c12bc4de RD |
1 | #! /usr/bin/env python -- # -*- python -*- |
2 | ############################################################################## | |
3 | # | |
4 | # Zope Public License (ZPL) Version 1.0 | |
5 | # ------------------------------------- | |
6 | # | |
7 | # Copyright (c) Digital Creations. All rights reserved. | |
8 | # | |
9 | # This license has been certified as Open Source(tm). | |
10 | # | |
11 | # Redistribution and use in source and binary forms, with or without | |
12 | # modification, are permitted provided that the following conditions are | |
13 | # met: | |
14 | # | |
15 | # 1. Redistributions in source code must retain the above copyright | |
16 | # notice, this list of conditions, and the following disclaimer. | |
17 | # | |
18 | # 2. Redistributions in binary form must reproduce the above copyright | |
19 | # notice, this list of conditions, and the following disclaimer in | |
20 | # the documentation and/or other materials provided with the | |
21 | # distribution. | |
22 | # | |
23 | # 3. Digital Creations requests that attribution be given to Zope | |
24 | # in any manner possible. Zope includes a "Powered by Zope" | |
25 | # button that is installed by default. While it is not a license | |
26 | # violation to remove this button, it is requested that the | |
27 | # attribution remain. A significant investment has been put | |
28 | # into Zope, and this effort will continue if the Zope community | |
29 | # continues to grow. This is one way to assure that growth. | |
30 | # | |
31 | # 4. All advertising materials and documentation mentioning | |
32 | # features derived from or use of this software must display | |
33 | # the following acknowledgement: | |
34 | # | |
35 | # "This product includes software developed by Digital Creations | |
36 | # for use in the Z Object Publishing Environment | |
37 | # (http://www.zope.org/)." | |
38 | # | |
39 | # In the event that the product being advertised includes an | |
40 | # intact Zope distribution (with copyright and license included) | |
41 | # then this clause is waived. | |
42 | # | |
43 | # 5. Names associated with Zope or Digital Creations must not be used to | |
44 | # endorse or promote products derived from this software without | |
45 | # prior written permission from Digital Creations. | |
46 | # | |
47 | # 6. Modified redistributions of any form whatsoever must retain | |
48 | # the following acknowledgment: | |
49 | # | |
50 | # "This product includes software developed by Digital Creations | |
51 | # for use in the Z Object Publishing Environment | |
52 | # (http://www.zope.org/)." | |
53 | # | |
54 | # Intact (re-)distributions of any official Zope release do not | |
55 | # require an external acknowledgement. | |
56 | # | |
57 | # 7. Modifications are encouraged but must be packaged separately as | |
58 | # patches to official Zope releases. Distributions that do not | |
59 | # clearly separate the patches from the original work must be clearly | |
60 | # labeled as unofficial distributions. Modifications which do not | |
61 | # carry the name Zope may be packaged in any form, as long as they | |
62 | # conform to all of the clauses above. | |
63 | # | |
64 | # | |
65 | # Disclaimer | |
66 | # | |
67 | # THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY | |
68 | # EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
69 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
70 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DIGITAL CREATIONS OR ITS | |
71 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
72 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
73 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF | |
74 | # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | |
75 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
76 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT | |
77 | # OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
78 | # SUCH DAMAGE. | |
79 | # | |
80 | # | |
81 | # This software consists of contributions made by Digital Creations and | |
82 | # many individuals on behalf of Digital Creations. Specific | |
83 | # attributions are listed in the accompanying credits file. | |
84 | # | |
85 | ############################################################################## | |
86 | '''Structured Text Manipulation | |
87 | ||
88 | Parse a structured text string into a form that can be used with | |
89 | structured formats, like html. | |
90 | ||
91 | Structured text is text that uses indentation and simple | |
92 | symbology to indicate the structure of a document. | |
93 | ||
94 | A structured string consists of a sequence of paragraphs separated by | |
95 | one or more blank lines. Each paragraph has a level which is defined | |
96 | as the minimum indentation of the paragraph. A paragraph is a | |
97 | sub-paragraph of another paragraph if the other paragraph is the last | |
98 | preceding paragraph that has a lower level. | |
99 | ||
100 | Special symbology is used to indicate special constructs: | |
101 | ||
102 | - A single-line paragraph whose immediately succeeding paragraphs are lower | |
103 | level is treated as a header. | |
104 | ||
105 | - A paragraph that begins with a '-', '*', or 'o' is treated as an | |
106 | unordered list (bullet) element. | |
107 | ||
108 | - A paragraph that begins with a sequence of digits followed by a | |
109 | white-space character is treated as an ordered list element. | |
110 | ||
111 | - A paragraph that begins with a sequence of sequences, where each | |
112 | sequence is a sequence of digits or a sequence of letters followed | |
113 | by a period, is treated as an ordered list element. | |
114 | ||
115 | - A paragraph with a first line that contains some text, followed by | |
116 | some white-space and '--' is treated as | |
117 | a descriptive list element. The leading text is treated as the | |
118 | element title. | |
119 | ||
120 | - Sub-paragraphs of a paragraph that ends in the word 'example' or the | |
121 | word 'examples', or '::' is treated as example code and is output as is. | |
122 | ||
123 | - Text enclosed single quotes (with white-space to the left of the | |
124 | first quote and whitespace or puctuation to the right of the second quote) | |
125 | is treated as example code. | |
126 | ||
127 | - Text surrounded by '*' characters (with white-space to the left of the | |
128 | first '*' and whitespace or puctuation to the right of the second '*') | |
129 | is emphasized. | |
130 | ||
131 | - Text surrounded by '**' characters (with white-space to the left of the | |
132 | first '**' and whitespace or puctuation to the right of the second '**') | |
133 | is made strong. | |
134 | ||
135 | - Text surrounded by '_' underscore characters (with whitespace to the left | |
136 | and whitespace or punctuation to the right) is made underlined. | |
137 | ||
138 | - Text encloded by double quotes followed by a colon, a URL, and concluded | |
139 | by punctuation plus white space, *or* just white space, is treated as a | |
140 | hyper link. For example: | |
141 | ||
142 | "Zope":http://www.zope.org/ is ... | |
143 | ||
144 | Is interpreted as '<a href="http://www.zope.org/">Zope</a> is ....' | |
145 | Note: This works for relative as well as absolute URLs. | |
146 | ||
147 | - Text enclosed by double quotes followed by a comma, one or more spaces, | |
148 | an absolute URL and concluded by punctuation plus white space, or just | |
149 | white space, is treated as a hyper link. For example: | |
150 | ||
151 | "mail me", mailto:amos@digicool.com. | |
152 | ||
153 | Is interpreted as '<a href="mailto:amos@digicool.com">mail me</a>.' | |
154 | ||
155 | - Text enclosed in brackets which consists only of letters, digits, | |
156 | underscores and dashes is treated as hyper links within the document. | |
157 | For example: | |
158 | ||
159 | As demonstrated by Smith [12] this technique is quite effective. | |
160 | ||
161 | Is interpreted as '... by Smith <a href="#12">[12]</a> this ...'. Together | |
162 | with the next rule this allows easy coding of references or end notes. | |
163 | ||
164 | - Text enclosed in brackets which is preceded by the start of a line, two | |
165 | periods and a space is treated as a named link. For example: | |
166 | ||
167 | .. [12] "Effective Techniques" Smith, Joe ... | |
168 | ||
169 | Is interpreted as '<a name="12">[12]</a> "Effective Techniques" ...'. | |
170 | Together with the previous rule this allows easy coding of references or | |
171 | end notes. | |
172 | ||
173 | ||
174 | - A paragraph that has blocks of text enclosed in '||' is treated as a | |
175 | table. The text blocks correspond to table cells and table rows are | |
176 | denoted by newlines. By default the cells are center aligned. A cell | |
177 | can span more than one column by preceding a block of text with an | |
178 | equivalent number of cell separators '||'. Newlines and '|' cannot | |
179 | be a part of the cell text. For example: | |
180 | ||
181 | |||| **Ingredients** || | |
182 | || *Name* || *Amount* || | |
183 | ||Spam||10|| | |
184 | ||Eggs||3|| | |
185 | ||
186 | is interpreted as:: | |
187 | ||
188 | <TABLE BORDER=1 CELLPADDING=2> | |
189 | <TR> | |
190 | <TD ALIGN=CENTER COLSPAN=2> <strong>Ingredients</strong> </TD> | |
191 | </TR> | |
192 | <TR> | |
193 | <TD ALIGN=CENTER COLSPAN=1> <em>Name</em> </TD> | |
194 | <TD ALIGN=CENTER COLSPAN=1> <em>Amount</em> </TD> | |
195 | </TR> | |
196 | <TR> | |
197 | <TD ALIGN=CENTER COLSPAN=1>Spam</TD> | |
198 | <TD ALIGN=CENTER COLSPAN=1>10</TD> | |
199 | </TR> | |
200 | <TR> | |
201 | <TD ALIGN=CENTER COLSPAN=1>Eggs</TD> | |
202 | <TD ALIGN=CENTER COLSPAN=1>3</TD> | |
203 | </TR> | |
204 | </TABLE> | |
205 | ||
206 | ||
207 | $Id$''' | |
208 | # Copyright | |
209 | # | |
210 | # Copyright 1996 Digital Creations, L.C., 910 Princess Anne | |
211 | # Street, Suite 300, Fredericksburg, Virginia 22401 U.S.A. All | |
212 | # rights reserved. Copyright in this software is owned by DCLC, | |
213 | # unless otherwise indicated. Permission to use, copy and | |
214 | # distribute this software is hereby granted, provided that the | |
215 | # above copyright notice appear in all copies and that both that | |
216 | # copyright notice and this permission notice appear. Note that | |
217 | # any product, process or technology described in this software | |
218 | # may be the subject of other Intellectual Property rights | |
219 | # reserved by Digital Creations, L.C. and are not licensed | |
220 | # hereunder. | |
221 | # | |
222 | # Trademarks | |
223 | # | |
224 | # Digital Creations & DCLC, are trademarks of Digital Creations, L.C.. | |
225 | # All other trademarks are owned by their respective companies. | |
226 | # | |
227 | # No Warranty | |
228 | # | |
229 | # The software is provided "as is" without warranty of any kind, | |
230 | # either express or implied, including, but not limited to, the | |
231 | # implied warranties of merchantability, fitness for a particular | |
232 | # purpose, or non-infringement. This software could include | |
233 | # technical inaccuracies or typographical errors. Changes are | |
234 | # periodically made to the software; these changes will be | |
235 | # incorporated in new editions of the software. DCLC may make | |
236 | # improvements and/or changes in this software at any time | |
237 | # without notice. | |
238 | # | |
239 | # Limitation Of Liability | |
240 | # | |
241 | # In no event will DCLC be liable for direct, indirect, special, | |
242 | # incidental, economic, cover, or consequential damages arising | |
243 | # out of the use of or inability to use this software even if | |
244 | # advised of the possibility of such damages. Some states do not | |
245 | # allow the exclusion or limitation of implied warranties or | |
246 | # limitation of liability for incidental or consequential | |
247 | # damages, so the above limitation or exclusion may not apply to | |
248 | # you. | |
249 | # | |
250 | # | |
251 | # If you have questions regarding this software, | |
252 | # contact: | |
253 | # | |
254 | # Jim Fulton, jim@digicool.com | |
255 | # | |
256 | # (540) 371-6909 | |
257 | # | |
258 | # $Log$ | |
259 | # Revision 1.1 2001/03/10 05:07:20 RD | |
260 | # Added some simple sample apps | |
261 | # | |
262 | # Revision 1.27 2000/04/21 13:38:10 jim | |
263 | # Added closing list tags. Woo hoo! | |
264 | # | |
265 | # Revision 1.26 2000/03/14 17:22:04 brian | |
266 | # Allow ~ in hrefs. | |
267 | # | |
268 | # Revision 1.25 2000/02/17 00:53:24 klm | |
269 | # HTML._str(): We were getting preformatted examples rendered twice, | |
270 | # second time without preformatting. Problem was a missing 'continue' | |
271 | # in one of the cases. | |
272 | # | |
273 | # Revision 1.24 1999/12/13 16:32:48 klm | |
274 | # Incorporated pavlos christoforou's mods to handle simple tables. From | |
275 | # his web page at http://www.zope.org/Members/gaaros/StructuredText: | |
276 | # | |
277 | # Structured Text module with table support | |
278 | # | |
279 | # A paragraph that has blocks of text enclosed in '||' is treated as a | |
280 | # table. The text blocks correspond to table cells and table rows are | |
281 | # denoted by newlines. By default the cells are center aligned. You can | |
282 | # change the defaults by modifying the CELL,ROW and TABLE class | |
283 | # attributes in class Table. A cell can span more than one column by | |
284 | # preceding a block of text with an equivalent number of cell separators | |
285 | # '||'. Newlines and '|' cannot be a part of the cell text. If you need | |
286 | # newlines use <BR>. For example: | |
287 | # | |
288 | # |||| **Ingredients** || | |
289 | # || *Name* || *Amount* || | |
290 | # ||Spam||10|| | |
291 | # ||Eggs||3|| | |
292 | # | |
293 | # Revision 1.23 1999/08/03 20:49:05 jim | |
294 | # Fixed to allow list elements to introduce examples. | |
295 | # | |
296 | # Restructured _str using continue to avoid excessive nesting. | |
297 | # | |
298 | # Revision 1.22 1999/08/02 22:01:28 jim | |
299 | # Fixed a bunch of bugs introduced by making ts_regex actually thread | |
300 | # safe. | |
301 | # | |
302 | # Also localized a bunch of regular expressions | |
303 | # using "static" variables (aka always default arguments). | |
304 | # | |
305 | # Revision 1.21 1999/08/02 13:26:52 jim | |
306 | # paragraph_divider needs to be a regular (thread-unsafe) regex | |
307 | # since it gets passed to ts_regex.split, which is thread-safe | |
308 | # and wants to use regs. | |
309 | # | |
310 | # Revision 1.20 1999/07/21 13:33:59 jim | |
311 | # untabified. | |
312 | # | |
313 | # Revision 1.19 1999/07/15 16:43:15 jim | |
314 | # Checked in Scott Robertson's thread-safety fixes. | |
315 | # | |
316 | # Revision 1.18 1999/03/24 00:03:18 klm | |
317 | # Provide for relative links, eg <a href="file_in_same_dir">whatever</a>, | |
318 | # as: | |
319 | # | |
320 | # "whatever", :file_in_same_dir | |
321 | # | |
322 | # or | |
323 | # | |
324 | # "whatever"::file_in_same_dir | |
325 | # | |
326 | # .__init__(): relax the second gsub, using a '*' instead of a '+', so | |
327 | # the stuff before the ':' can be missing, and also do postprocessing so | |
328 | # any resulting '<a href=":file_in_same_dir">'s have the superfluous ':' | |
329 | # removed. *Seems* good! | |
330 | # | |
331 | # Revision 1.17 1999/03/12 23:21:39 klm | |
332 | # Gratuituous checkin to test my cvs *update* logging hook. | |
333 | # | |
334 | # Revision 1.16 1999/03/12 17:12:12 klm | |
335 | # Added support for underlined elements, in the obvious way (and | |
336 | # included an entry in the module docstring for it). | |
337 | # | |
338 | # Added an entry in the module docstring describing what i *guess* is | |
339 | # the criterion for identifying header elements. (I'm going to have to | |
340 | # delve into and understand the framework a bit better before *knowing* | |
341 | # this is the case.) | |
342 | # | |
343 | # Revision 1.15 1999/03/11 22:40:18 klm | |
344 | # Handle links that include '#' named links. | |
345 | # | |
346 | # Revision 1.14 1999/03/11 01:35:19 klm | |
347 | # Fixed a small typo, and refined the module docstring link example, in | |
348 | # order to do a checkin to exercise the CVS repository mirroring. Might | |
349 | # as well include my last checkin message, with some substantial stuff: | |
350 | # | |
351 | # Links are now recognized whether or not the candidate strings are | |
352 | # terminated with punctuation before the trailing whitespace. The old | |
353 | # form - trailing punctuation then whitespace - is preserved, but the | |
354 | # punctuation is now unnecessary. | |
355 | # | |
356 | # The regular expressions are a bit more complicated, but i've factored | |
357 | # out the common parts and but them in variables with suggestive names, | |
358 | # which may make them easier to understand. | |
359 | # | |
360 | # Revision 1.13 1999/03/11 00:49:57 klm | |
361 | # Links are now recognized whether or not the candidate strings are | |
362 | # terminated with punctuation before the trailing whitespace. The old | |
363 | # form - trailing punctuation then whitespace - is preserved, but the | |
364 | # punctuation is now unnecessary. | |
365 | # | |
366 | # The regular expressions are a bit more complicated, but i've factored | |
367 | # out the common parts and but them in variables with suggestive names, | |
368 | # which may make them easier to understand. | |
369 | # | |
370 | # Revision 1.12 1999/03/10 00:15:46 klm | |
371 | # Committing with version 1.0 of the license. | |
372 | # | |
373 | # Revision 1.11 1999/02/08 18:13:12 klm | |
374 | # Trival checkin (spelling fix "preceedeing" -> "preceding" and similar) | |
375 | # to see what pitfalls my environment presents to accomplishing a | |
376 | # successful checkin. (It turns out that i can't do it from aldous because | |
377 | # the new version of cvs doesn't support the '-t' and '-f' options in the | |
378 | # cvswrappers file...) | |
379 | # | |
380 | # Revision 1.10 1998/12/29 22:30:43 amos | |
381 | # Improved doc string to describe hyper link and references capabilities. | |
382 | # | |
383 | # Revision 1.9 1998/12/04 20:15:31 jim | |
384 | # Detabification and new copyright. | |
385 | # | |
386 | # Revision 1.8 1998/02/27 18:45:22 jim | |
387 | # Various updates, including new indentation utilities. | |
388 | # | |
389 | # Revision 1.7 1997/12/12 15:39:54 jim | |
390 | # Added level as argument for html_with_references. | |
391 | # | |
392 | # Revision 1.6 1997/12/12 15:27:25 jim | |
393 | # Added additional pattern matching for HTML references. | |
394 | # | |
395 | # Revision 1.5 1997/03/08 16:01:03 jim | |
396 | # Moved code to recognize: "foo bar", url. | |
397 | # into object initializer, so it gets applied in all cases. | |
398 | # | |
399 | # Revision 1.4 1997/02/17 23:36:35 jim | |
400 | # Added support for "foo title", http:/foohost/foo | |
401 | # | |
402 | # Revision 1.3 1996/12/06 15:57:37 jim | |
403 | # Fixed bugs in character tags. | |
404 | # | |
405 | # Added -t command-line option to generate title if: | |
406 | # | |
407 | # - The first paragraph is one line (i.e. a heading) and | |
408 | # | |
409 | # - All other paragraphs are indented. | |
410 | # | |
411 | # Revision 1.2 1996/10/28 13:56:02 jim | |
412 | # Fixed bug in ordered lists. | |
413 | # Added option for either HTML-style headings or descriptive-list style | |
414 | # headings. | |
415 | # | |
416 | # Revision 1.1 1996/10/23 14:00:45 jim | |
417 | # *** empty log message *** | |
418 | # | |
419 | # | |
420 | # | |
421 | ||
422 | import ts_regex, regex | |
423 | from ts_regex import gsub | |
424 | from string import split, join, strip, find | |
425 | ||
426 | def untabify(aString, | |
427 | indent_tab=ts_regex.compile('\(\n\|^\)\( *\)\t').search_group, | |
428 | ): | |
429 | '''\ | |
430 | Convert indentation tabs to spaces. | |
431 | ''' | |
432 | result='' | |
433 | rest=aString | |
434 | while 1: | |
435 | ts_results = indent_tab(rest, (1,2)) | |
436 | if ts_results: | |
437 | start, grps = ts_results | |
438 | lnl=len(grps[0]) | |
439 | indent=len(grps[1]) | |
440 | result=result+rest[:start] | |
441 | rest="\n%s%s" % (' ' * ((indent/8+1)*8), | |
442 | rest[start+indent+1+lnl:]) | |
443 | else: | |
444 | return result+rest | |
445 | ||
446 | def indent(aString, indent=2): | |
447 | """Indent a string the given number of spaces""" | |
448 | r=split(untabify(aString),'\n') | |
449 | if not r: return '' | |
450 | if not r[-1]: del r[-1] | |
451 | tab=' '*level | |
452 | return "%s%s\n" % (tab,join(r,'\n'+tab)) | |
453 | ||
454 | def reindent(aString, indent=2, already_untabified=0): | |
455 | "reindent a block of text, so that the minimum indent is as given" | |
456 | ||
457 | if not already_untabified: aString=untabify(aString) | |
458 | ||
459 | l=indent_level(aString)[0] | |
460 | if indent==l: return aString | |
461 | ||
462 | r=[] | |
463 | ||
464 | append=r.append | |
465 | ||
466 | if indent > l: | |
467 | tab=' ' * (indent-l) | |
468 | for s in split(aString,'\n'): append(tab+s) | |
469 | else: | |
470 | l=l-indent | |
471 | for s in split(aString,'\n'): append(s[l:]) | |
472 | ||
473 | return join(r,'\n') | |
474 | ||
475 | def indent_level(aString, | |
476 | indent_space=ts_regex.compile('\n\( *\)').search_group, | |
477 | ): | |
478 | '''\ | |
479 | Find the minimum indentation for a string, not counting blank lines. | |
480 | ''' | |
481 | start=0 | |
482 | text='\n'+aString | |
483 | indent=l=len(text) | |
484 | while 1: | |
485 | ||
486 | ts_results = indent_space(text, (1,2), start) | |
487 | if ts_results: | |
488 | start, grps = ts_results | |
489 | i=len(grps[0]) | |
490 | start=start+i+1 | |
491 | if start < l and text[start] != '\n': # Skip blank lines | |
492 | if not i: return (0,aString) | |
493 | if i < indent: indent = i | |
494 | else: | |
495 | return (indent,aString) | |
496 | ||
497 | def paragraphs(list,start): | |
498 | l=len(list) | |
499 | level=list[start][0] | |
500 | i=start+1 | |
501 | while i < l and list[i][0] > level: i=i+1 | |
502 | return i-1-start | |
503 | ||
504 | def structure(list): | |
505 | if not list: return [] | |
506 | i=0 | |
507 | l=len(list) | |
508 | r=[] | |
509 | while i < l: | |
510 | sublen=paragraphs(list,i) | |
511 | i=i+1 | |
512 | r.append((list[i-1][1],structure(list[i:i+sublen]))) | |
513 | i=i+sublen | |
514 | return r | |
515 | ||
516 | ||
517 | class Table: | |
518 | CELL=' <TD ALIGN=CENTER COLSPAN=%i>%s</TD>\n' | |
519 | ROW=' <TR>\n%s </TR>\n' | |
520 | TABLE='\n<TABLE BORDER=1 CELLPADDING=2>\n%s</TABLE>' | |
521 | ||
522 | def create(self,aPar,td=ts_regex.compile( | |
523 | '[ \t\n]*||\([^\0|]*\)').match_group): | |
524 | '''parses a table and returns nested list representing the | |
525 | table''' | |
526 | self.table=[] | |
527 | text=filter(None,split(aPar,'\n')) | |
528 | for line in text: | |
529 | row=[] | |
530 | while 1: | |
531 | pos=td(line,(1,)) | |
532 | if not pos:return 0 | |
533 | row.append(pos[1]) | |
534 | if pos[0]==len(line):break | |
535 | line=line[pos[0]:] | |
536 | self.table.append(row) | |
537 | return 1 | |
538 | ||
539 | def html(self): | |
540 | '''Creates an HTML representation of table''' | |
541 | htmltable=[] | |
542 | for row in self.table: | |
543 | htmlrow=[] | |
544 | colspan=1 | |
545 | for cell in row: | |
546 | if cell=='': | |
547 | colspan=colspan+1 | |
548 | continue | |
549 | else: | |
550 | htmlrow.append(self.CELL%(colspan,cell)) | |
551 | colspan=1 | |
552 | htmltable.append(self.ROW%join(htmlrow,'')) | |
553 | return self.TABLE%join(htmltable,'') | |
554 | ||
555 | optional_trailing_punctuation = '\(,\|\([.:?;]\)\)?' | |
556 | trailing_space = '\([\0- ]\)' | |
557 | not_punctuation_or_whitespace = "[^-,.?:\0- ]" | |
558 | table=Table() | |
559 | ||
560 | class StructuredText: | |
561 | ||
562 | """Model text as structured collection of paragraphs. | |
563 | ||
564 | Structure is implied by the indentation level. | |
565 | ||
566 | This class is intended as a base classes that do actual text | |
567 | output formatting. | |
568 | """ | |
569 | ||
570 | def __init__(self, aStructuredString, level=0, | |
571 | paragraph_divider=regex.compile('\(\n *\)+\n'), | |
572 | ): | |
573 | '''Convert a structured text string into a structured text object. | |
574 | ||
575 | Aguments: | |
576 | ||
577 | aStructuredString -- The string to be parsed. | |
578 | level -- The level of top level headings to be created. | |
579 | ''' | |
580 | ||
581 | aStructuredString = gsub( | |
582 | '\"\([^\"\0]+\)\":' # title: <"text":> | |
583 | + ('\([-:a-zA-Z0-9_,./?=@#~]+%s\)' | |
584 | % not_punctuation_or_whitespace) | |
585 | + optional_trailing_punctuation | |
586 | + trailing_space, | |
587 | '<a href="\\2">\\1</a>\\4\\5\\6', | |
588 | aStructuredString) | |
589 | ||
590 | aStructuredString = gsub( | |
591 | '\"\([^\"\0]+\)\",[\0- ]+' # title: <"text", > | |
592 | + ('\([a-zA-Z]*:[-:a-zA-Z0-9_,./?=@#~]*%s\)' | |
593 | % not_punctuation_or_whitespace) | |
594 | + optional_trailing_punctuation | |
595 | + trailing_space, | |
596 | '<a href="\\2">\\1</a>\\4\\5\\6', | |
597 | aStructuredString) | |
598 | ||
599 | protoless = find(aStructuredString, '<a href=":') | |
600 | if protoless != -1: | |
601 | aStructuredString = gsub('<a href=":', '<a href="', | |
602 | aStructuredString) | |
603 | ||
604 | self.level=level | |
605 | paragraphs=ts_regex.split(untabify(aStructuredString), | |
606 | paragraph_divider) | |
607 | paragraphs=map(indent_level,paragraphs) | |
608 | ||
609 | self.structure=structure(paragraphs) | |
610 | ||
611 | ||
612 | def __str__(self): | |
613 | return str(self.structure) | |
614 | ||
615 | ||
616 | ctag_prefix="\([\0- (]\|^\)" | |
617 | ctag_suffix="\([\0- ,.:;!?)]\|$\)" | |
618 | ctag_middle="[%s]\([^\0- %s][^%s]*[^\0- %s]\|[^%s]\)[%s]" | |
619 | ctag_middl2="[%s][%s]\([^\0- %s][^%s]*[^\0- %s]\|[^%s]\)[%s][%s]" | |
620 | ||
621 | def ctag(s, | |
622 | em=regex.compile( | |
623 | ctag_prefix+(ctag_middle % (("*",)*6) )+ctag_suffix), | |
624 | strong=regex.compile( | |
625 | ctag_prefix+(ctag_middl2 % (("*",)*8))+ctag_suffix), | |
626 | under=regex.compile( | |
627 | ctag_prefix+(ctag_middle % (("_",)*6) )+ctag_suffix), | |
628 | code=regex.compile( | |
629 | ctag_prefix+(ctag_middle % (("\'",)*6))+ctag_suffix), | |
630 | ): | |
631 | if s is None: s='' | |
632 | s=gsub(strong,'\\1<strong>\\2</strong>\\3',s) | |
633 | s=gsub(under, '\\1<u>\\2</u>\\3',s) | |
634 | s=gsub(code, '\\1<code>\\2</code>\\3',s) | |
635 | s=gsub(em, '\\1<em>\\2</em>\\3',s) | |
636 | return s | |
637 | ||
638 | class HTML(StructuredText): | |
639 | ||
640 | '''\ | |
641 | An HTML structured text formatter. | |
642 | '''\ | |
643 | ||
644 | def __str__(self, | |
645 | extra_dl=regex.compile("</dl>\n<dl>"), | |
646 | extra_ul=regex.compile("</ul>\n<ul>"), | |
647 | extra_ol=regex.compile("</ol>\n<ol>"), | |
648 | ): | |
649 | '''\ | |
650 | Return an HTML string representation of the structured text data. | |
651 | ||
652 | ''' | |
653 | s=self._str(self.structure,self.level) | |
654 | s=gsub(extra_dl,'\n',s) | |
655 | s=gsub(extra_ul,'\n',s) | |
656 | s=gsub(extra_ol,'\n',s) | |
657 | return s | |
658 | ||
659 | def ul(self, before, p, after): | |
660 | if p: p="<p>%s</p>" % strip(ctag(p)) | |
661 | return ('%s<ul><li>%s\n%s\n</li></ul>\n' | |
662 | % (before,p,after)) | |
663 | ||
664 | def ol(self, before, p, after): | |
665 | if p: p="<p>%s</p>" % strip(ctag(p)) | |
666 | return ('%s<ol><li>%s\n%s\n</li></ol>\n' | |
667 | % (before,p,after)) | |
668 | ||
669 | def dl(self, before, t, d, after): | |
670 | return ('%s<dl><dt>%s</dt><dd><p>%s</p>\n%s\n</dd></dl>\n' | |
671 | % (before,ctag(t),ctag(d),after)) | |
672 | ||
673 | def head(self, before, t, level, d): | |
674 | if level > 0 and level < 6: | |
675 | return ('%s<h%d>%s</h%d>\n%s\n' | |
676 | % (before,level,strip(ctag(t)),level,d)) | |
677 | ||
678 | t="<p><strong>%s</strong><p>" % strip(ctag(t)) | |
679 | return ('%s<dl><dt>%s\n</dt><dd>%s\n</dd></dl>\n' | |
680 | % (before,t,d)) | |
681 | ||
682 | def normal(self,before,p,after): | |
683 | return '%s<p>%s</p>\n%s\n' % (before,ctag(p),after) | |
684 | ||
685 | def pre(self,structure,tagged=0): | |
686 | if not structure: return '' | |
687 | if tagged: | |
688 | r='' | |
689 | else: | |
690 | r='<PRE>\n' | |
691 | for s in structure: | |
692 | r="%s%s\n\n%s" % (r,html_quote(s[0]),self.pre(s[1],1)) | |
693 | if not tagged: r=r+'</PRE>\n' | |
694 | return r | |
695 | ||
696 | def table(self,before,table,after): | |
697 | return '%s<p>%s</p>\n%s\n' % (before,ctag(table),after) | |
698 | ||
699 | def _str(self,structure,level, | |
700 | # Static | |
701 | bullet=ts_regex.compile('[ \t\n]*[o*-][ \t\n]+\([^\0]*\)' | |
702 | ).match_group, | |
703 | example=ts_regex.compile('[\0- ]examples?:[\0- ]*$' | |
704 | ).search, | |
705 | dl=ts_regex.compile('\([^\n]+\)[ \t]+--[ \t\n]+\([^\0]*\)' | |
706 | ).match_group, | |
707 | nl=ts_regex.compile('\n').search, | |
708 | ol=ts_regex.compile( | |
709 | '[ \t]*\(\([0-9]+\|[a-zA-Z]+\)[.)]\)+[ \t\n]+\([^\0]*\|$\)' | |
710 | ).match_group, | |
711 | olp=ts_regex.compile('[ \t]*([0-9]+)[ \t\n]+\([^\0]*\|$\)' | |
712 | ).match_group, | |
713 | ): | |
714 | r='' | |
715 | for s in structure: | |
716 | ||
717 | ts_results = bullet(s[0], (1,)) | |
718 | if ts_results: | |
719 | p = ts_results[1] | |
720 | if s[0][-2:]=='::' and s[1]: ps=self.pre(s[1]) | |
721 | else: ps=self._str(s[1],level) | |
722 | r=self.ul(r,p,ps) | |
723 | continue | |
724 | ts_results = ol(s[0], (3,)) | |
725 | if ts_results: | |
726 | p = ts_results[1] | |
727 | if s[0][-2:]=='::' and s[1]: ps=self.pre(s[1]) | |
728 | else: ps=self._str(s[1],level) | |
729 | r=self.ol(r,p,ps) | |
730 | continue | |
731 | ts_results = olp(s[0], (1,)) | |
732 | if ts_results: | |
733 | p = ts_results[1] | |
734 | if s[0][-2:]=='::' and s[1]: ps=self.pre(s[1]) | |
735 | else: ps=self._str(s[1],level) | |
736 | r=self.ol(r,p,ps) | |
737 | continue | |
738 | ts_results = dl(s[0], (1,2)) | |
739 | if ts_results: | |
740 | t,d = ts_results[1] | |
741 | r=self.dl(r,t,d,self._str(s[1],level)) | |
742 | continue | |
743 | if example(s[0]) >= 0 and s[1]: | |
744 | # Introduce an example, using pre tags: | |
745 | r=self.normal(r,s[0],self.pre(s[1])) | |
746 | continue | |
747 | if s[0][-2:]=='::' and s[1]: | |
748 | # Introduce an example, using pre tags: | |
749 | r=self.normal(r,s[0][:-1],self.pre(s[1])) | |
750 | continue | |
751 | if table.create(s[0]): | |
752 | ## table support. | |
753 | r=self.table(r,table.html(),self._str(s[1],level)) | |
754 | continue | |
755 | else: | |
756 | ||
757 | if nl(s[0]) < 0 and s[1] and s[0][-1:] != ':': | |
758 | # Treat as a heading | |
759 | t=s[0] | |
760 | r=self.head(r,t,level, | |
761 | self._str(s[1],level and level+1)) | |
762 | else: | |
763 | r=self.normal(r,s[0],self._str(s[1],level)) | |
764 | return r | |
765 | ||
766 | ||
767 | def html_quote(v, | |
768 | character_entities=( | |
769 | (regex.compile('&'), '&'), | |
770 | (regex.compile("<"), '<' ), | |
771 | (regex.compile(">"), '>' ), | |
772 | (regex.compile('"'), '"') | |
773 | )): #" | |
774 | text=str(v) | |
775 | for re,name in character_entities: | |
776 | text=gsub(re,name,text) | |
777 | return text | |
778 | ||
779 | def html_with_references(text, level=1): | |
780 | text = gsub( | |
781 | '[\0\n].. \[\([-_0-9_a-zA-Z-]+\)\]', | |
782 | '\n <a name="\\1">[\\1]</a>', | |
783 | text) | |
784 | ||
785 | text = gsub( | |
786 | '\([\0- ,]\)\[\([0-9_a-zA-Z-]+\)\]\([\0- ,.:]\)', | |
787 | '\\1<a href="#\\2">[\\2]</a>\\3', | |
788 | text) | |
789 | ||
790 | text = gsub( | |
791 | '\([\0- ,]\)\[\([^]]+\)\.html\]\([\0- ,.:]\)', | |
792 | '\\1<a href="\\2.html">[\\2]</a>\\3', | |
793 | text) | |
794 | ||
795 | return HTML(text,level=level) | |
796 | ||
797 | ||
798 | def main(): | |
799 | import sys, getopt | |
800 | ||
801 | opts,args=getopt.getopt(sys.argv[1:],'tw') | |
802 | ||
803 | if args: | |
804 | [infile]=args | |
805 | s=open(infile,'r').read() | |
806 | else: | |
807 | s=sys.stdin.read() | |
808 | ||
809 | if opts: | |
810 | ||
811 | if filter(lambda o: o[0]=='-w', opts): | |
812 | print 'Content-Type: text/html\n' | |
813 | ||
814 | if s[:2]=='#!': | |
815 | s=ts_regex.sub('^#![^\n]+','',s) | |
816 | ||
817 | r=ts_regex.compile('\([\0-\n]*\n\)') | |
818 | ts_results = r.match_group(s, (1,)) | |
819 | if ts_results: | |
820 | s=s[len(ts_results[1]):] | |
821 | s=str(html_with_references(s)) | |
822 | if s[:4]=='<h1>': | |
823 | t=s[4:find(s,'</h1>')] | |
824 | s='''<html><head><title>%s</title> | |
825 | </head><body> | |
826 | %s | |
827 | </body></html> | |
828 | ''' % (t,s) | |
829 | print s | |
830 | else: | |
831 | print html_with_references(s) | |
832 | ||
833 | if __name__=="__main__": main() |