]> git.saurik.com Git - wxWidgets.git/blob - wxPython/samples/stxview/StructuredText/DocumentClass.py
Got a new version of StructuredText from Zope's CVS.
[wxWidgets.git] / wxPython / samples / stxview / StructuredText / DocumentClass.py
1 ##############################################################################
2 #
3 # Zope Public License (ZPL) Version 1.0
4 # -------------------------------------
5 #
6 # Copyright (c) Digital Creations. All rights reserved.
7 #
8 # This license has been certified as Open Source(tm).
9 #
10 # Redistribution and use in source and binary forms, with or without
11 # modification, are permitted provided that the following conditions are
12 # met:
13 #
14 # 1. Redistributions in source code must retain the above copyright
15 # notice, this list of conditions, and the following disclaimer.
16 #
17 # 2. Redistributions in binary form must reproduce the above copyright
18 # notice, this list of conditions, and the following disclaimer in
19 # the documentation and/or other materials provided with the
20 # distribution.
21 #
22 # 3. Digital Creations requests that attribution be given to Zope
23 # in any manner possible. Zope includes a "Powered by Zope"
24 # button that is installed by default. While it is not a license
25 # violation to remove this button, it is requested that the
26 # attribution remain. A significant investment has been put
27 # into Zope, and this effort will continue if the Zope community
28 # continues to grow. This is one way to assure that growth.
29 #
30 # 4. All advertising materials and documentation mentioning
31 # features derived from or use of this software must display
32 # the following acknowledgement:
33 #
34 # "This product includes software developed by Digital Creations
35 # for use in the Z Object Publishing Environment
36 # (http://www.zope.org/)."
37 #
38 # In the event that the product being advertised includes an
39 # intact Zope distribution (with copyright and license included)
40 # then this clause is waived.
41 #
42 # 5. Names associated with Zope or Digital Creations must not be used to
43 # endorse or promote products derived from this software without
44 # prior written permission from Digital Creations.
45 #
46 # 6. Modified redistributions of any form whatsoever must retain
47 # the following acknowledgment:
48 #
49 # "This product includes software developed by Digital Creations
50 # for use in the Z Object Publishing Environment
51 # (http://www.zope.org/)."
52 #
53 # Intact (re-)distributions of any official Zope release do not
54 # require an external acknowledgement.
55 #
56 # 7. Modifications are encouraged but must be packaged separately as
57 # patches to official Zope releases. Distributions that do not
58 # clearly separate the patches from the original work must be clearly
59 # labeled as unofficial distributions. Modifications which do not
60 # carry the name Zope may be packaged in any form, as long as they
61 # conform to all of the clauses above.
62 #
63 #
64 # Disclaimer
65 #
66 # THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
67 # EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
68 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
69 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
70 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
71 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
72 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
73 # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
74 # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
75 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
76 # OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
77 # SUCH DAMAGE.
78 #
79 #
80 # This software consists of contributions made by Digital Creations and
81 # many individuals on behalf of Digital Creations. Specific
82 # attributions are listed in the accompanying credits file.
83 #
84 ##############################################################################
85
86 import re, ST, STDOM
87 from string import split, join, replace, expandtabs, strip, find, rstrip
88 from STletters import *
89
90
91 StringType=type('')
92 ListType=type([])
93
94 def flatten(obj, append):
95 if obj.getNodeType()==STDOM.TEXT_NODE:
96 append(obj.getNodeValue())
97 else:
98 for child in obj.getChildNodes():
99 flatten(child, append)
100
101
102 class StructuredTextExample(ST.StructuredTextParagraph):
103 """Represents a section of document with literal text, as for examples"""
104
105 def __init__(self, subs, **kw):
106 t=[]
107 a=t.append
108 for s in subs:
109 flatten(s, a)
110 apply(ST.StructuredTextParagraph.__init__,
111 (self, join(t,'\n\n'), ()),
112 kw)
113
114 def getColorizableTexts(self): return ()
115 def setColorizableTexts(self, src): pass # never color examples
116
117 class StructuredTextBullet(ST.StructuredTextParagraph):
118 """Represents a section of a document with a title and a body"""
119
120 class StructuredTextNumbered(ST.StructuredTextParagraph):
121 """Represents a section of a document with a title and a body"""
122
123 class StructuredTextDescriptionTitle(ST.StructuredTextParagraph):
124 """Represents a section of a document with a title and a body"""
125
126 class StructuredTextDescriptionBody(ST.StructuredTextParagraph):
127 """Represents a section of a document with a title and a body"""
128
129 class StructuredTextDescription(ST.StructuredTextParagraph):
130 """Represents a section of a document with a title and a body"""
131
132 def __init__(self, title, src, subs, **kw):
133 apply(ST.StructuredTextParagraph.__init__, (self, src, subs), kw)
134 self._title=title
135
136 def getColorizableTexts(self): return self._title, self._src
137 def setColorizableTexts(self, src): self._title, self._src = src
138
139 def getChildren(self):
140 return (StructuredTextDescriptionTitle(self._title),
141 StructuredTextDescriptionBody(self._src, self._subs))
142
143 class StructuredTextSectionTitle(ST.StructuredTextParagraph):
144 """Represents a section of a document with a title and a body"""
145
146 class StructuredTextSection(ST.StructuredTextParagraph):
147 """Represents a section of a document with a title and a body"""
148 def __init__(self, src, subs=None, **kw):
149 apply(ST.StructuredTextParagraph.__init__,
150 (self, StructuredTextSectionTitle(src), subs),
151 kw)
152
153 def getColorizableTexts(self):
154 return self._src.getColorizableTexts()
155
156 def setColorizableTexts(self,src):
157 self._src.setColorizableTexts(src)
158
159 # a StructuredTextTable holds StructuredTextRows
160 class StructuredTextTable(ST.StructuredTextParagraph):
161 """
162 rows is a list of lists containing tuples, which
163 represent the columns/cells in each rows.
164 EX
165 rows = [[('row 1:column1',1)],[('row2:column1',1)]]
166 """
167
168 def __init__(self, rows, src, subs, **kw):
169 apply(ST.StructuredTextParagraph.__init__,(self,subs),kw)
170 self._rows = []
171 for row in rows:
172 if row:
173 self._rows.append(StructuredTextRow(row,kw))
174
175 def getRows(self):
176 return [self._rows]
177
178 def _getRows(self):
179 return self.getRows()
180
181 def getColumns(self):
182 result = []
183 for row in self._rows:
184 result.append(row.getColumns())
185 return result
186
187 def _getColumns(self):
188 return self.getColumns()
189
190 def setColumns(self,columns):
191 for index in range(len(self._rows)):
192 self._rows[index].setColumns(columns[index])
193
194 def _setColumns(self,columns):
195 return self.setColumns(columns)
196
197 def getColorizableTexts(self):
198 """
199 return a tuple where each item is a column/cell's
200 contents. The tuple, result, will be of this format.
201 ("r1 col1", "r1=col2", "r2 col1", "r2 col2")
202 """
203
204 result = []
205 for row in self._rows:
206 for column in row.getColumns()[0]:
207 result.append(column.getColorizableTexts()[0])
208 return result
209
210 def setColorizableTexts(self,texts):
211 """
212 texts is going to a tuple where each item is the
213 result of being mapped to the colortext function.
214 Need to insert the results appropriately into the
215 individual columns/cells
216 """
217 for row_index in range(len(self._rows)):
218 for column_index in range(len(self._rows[row_index]._columns)):
219 self._rows[row_index]._columns[column_index].setColorizableTexts((texts[0],))
220 texts = texts[1:]
221
222 def _getColorizableTexts(self):
223 return self.getColorizableTexts()
224
225 def _setColorizableTexts(self):
226 return self.setColorizableTexts()
227
228 # StructuredTextRow holds StructuredTextColumns
229 class StructuredTextRow(ST.StructuredTextParagraph):
230
231 def __init__(self,row,kw):
232 """
233 row is a list of tuples, where each tuple is
234 the raw text for a cell/column and the span
235 of that cell/column.
236 EX
237 [('this is column one',1), ('this is column two',1)]
238 """
239
240 apply(ST.StructuredTextParagraph.__init__,(self,[]),kw)
241
242 self._columns = []
243 for column in row:
244 self._columns.append(StructuredTextColumn(column[0],
245 column[1],
246 column[2],
247 column[3],
248 column[4],
249 kw))
250
251 def getColumns(self):
252 return [self._columns]
253
254 def _getColumns(self):
255 return [self._columns]
256
257 def setColumns(self,columns):
258 self._columns = columns
259
260 def _setColumns(self,columns):
261 return self.setColumns(columns)
262
263 # this holds the text of a table cell
264 class StructuredTextColumn(ST.StructuredTextParagraph):
265 """
266 StructuredTextColumn is a cell/column in a table.
267 A cell can hold multiple paragraphs. The cell
268 is either classified as a StructuredTextTableHeader
269 or StructuredTextTableData.
270 """
271
272 def __init__(self,text,span,align,valign,typ,kw):
273 apply(ST.StructuredTextParagraph.__init__,(self,text,[]),kw)
274 self._span = span
275 self._align = align
276 self._valign = valign
277 self._type = typ
278
279 def getSpan(self):
280 return self._span
281
282 def _getSpan(self):
283 return self._span
284
285 def getAlign(self):
286 return self._align
287
288 def _getAlign(self):
289 return self.getAlign()
290
291 def getValign(self):
292 return self._valign
293
294 def _getValign(self):
295 return self.getValign()
296
297 def getType(self):
298 return self._type
299
300 def _getType(self):
301 return self.getType()
302
303 class StructuredTextTableHeader(ST.StructuredTextParagraph): pass
304
305 class StructuredTextTableData(ST.StructuredTextParagraph): pass
306
307 class StructuredTextMarkup(STDOM.Element):
308
309 def __init__(self, v, **kw):
310 self._value=v
311 self._attributes=kw.keys()
312 for k, v in kw.items(): setattr(self, k, v)
313
314 def getChildren(self, type=type, lt=type([])):
315 v=self._value
316 if type(v) is not lt: v=[v]
317 return v
318
319 def getColorizableTexts(self): return self._value,
320 def setColorizableTexts(self, v): self._value=v[0]
321
322 def __repr__(self):
323 return '%s(%s)' % (self.__class__.__name__, `self._value`)
324
325 class StructuredTextLiteral(StructuredTextMarkup):
326 def getColorizableTexts(self): return ()
327 def setColorizableTexts(self, v): pass
328
329 class StructuredTextEmphasis(StructuredTextMarkup): pass
330
331 class StructuredTextStrong(StructuredTextMarkup): pass
332
333 class StructuredTextInnerLink(StructuredTextMarkup): pass
334
335 class StructuredTextNamedLink(StructuredTextMarkup): pass
336
337 class StructuredTextUnderline(StructuredTextMarkup): pass
338
339 class StructuredTextSGML(StructuredTextMarkup): pass
340
341 class StructuredTextLink(StructuredTextMarkup): pass
342
343 class StructuredTextXref(StructuredTextMarkup): pass
344
345 class DocumentClass:
346 """
347 Class instance calls [ex.=> x()] require a structured text
348 structure. Doc will then parse each paragraph in the structure
349 and will find the special structures within each paragraph.
350 Each special structure will be stored as an instance. Special
351 structures within another special structure are stored within
352 the 'top' structure
353 EX : '-underline this-' => would be turned into an underline
354 instance. '-underline **this**' would be stored as an underline
355 instance with a strong instance stored in its string
356 """
357
358 paragraph_types = [
359 'doc_bullet',
360 'doc_numbered',
361 'doc_description',
362 'doc_header',
363 'doc_table',
364 ]
365
366 #'doc_inner_link',
367 #'doc_named_link',
368 #'doc_underline',
369 text_types = [
370 'doc_sgml',
371 'doc_href',
372 'doc_strong',
373 'doc_emphasize',
374 'doc_literal',
375 'doc_sgml',
376 'doc_xref',
377 ]
378
379 def __call__(self, doc):
380 if type(doc) is type(''):
381 doc=ST.StructuredText(doc)
382 doc.setSubparagraphs(self.color_paragraphs(
383 doc.getSubparagraphs()))
384 else:
385 doc=ST.StructuredTextDocument(self.color_paragraphs(
386 doc.getSubparagraphs()))
387 return doc
388
389 def parse(self, raw_string, text_type,
390 type=type, st=type(''), lt=type([])):
391
392 """
393 Parse accepts a raw_string, an expr to test the raw_string,
394 and the raw_string's subparagraphs.
395
396 Parse will continue to search through raw_string until
397 all instances of expr in raw_string are found.
398
399 If no instances of expr are found, raw_string is returned.
400 Otherwise a list of substrings and instances is returned
401 """
402
403 tmp = [] # the list to be returned if raw_string is split
404 append=tmp.append
405
406 if type(text_type) is st: text_type=getattr(self, text_type)
407
408 while 1:
409 t = text_type(raw_string)
410 if not t: break
411 #an instance of expr was found
412 t, start, end = t
413
414 if start: append(raw_string[0:start])
415
416 tt=type(t)
417 if tt is st:
418 # if we get a string back, add it to text to be parsed
419 raw_string = t+raw_string[end:len(raw_string)]
420 else:
421 if tt is lt:
422 # is we get a list, append it's elements
423 tmp[len(tmp):]=t
424 else:
425 # normal case, an object
426 append(t)
427 raw_string = raw_string[end:len(raw_string)]
428
429 if not tmp: return raw_string # nothing found
430
431 if raw_string: append(raw_string)
432 elif len(tmp)==1: return tmp[0]
433
434 return tmp
435
436
437 def color_text(self, str, types=None):
438 """Search the paragraph for each special structure
439 """
440 if types is None: types=self.text_types
441
442 for text_type in types:
443
444 if type(str) is StringType:
445 str = self.parse(str, text_type)
446 elif type(str) is ListType:
447 r=[]; a=r.append
448 for s in str:
449 if type(s) is StringType:
450 s=self.parse(s, text_type)
451 if type(s) is ListType: r[len(r):]=s
452 else: a(s)
453 else:
454 s.setColorizableTexts(
455 map(self.color_text,
456 s.getColorizableTexts()
457 ))
458 a(s)
459 str=r
460 else:
461 r=[]; a=r.append; color=self.color_text
462 for s in str.getColorizableTexts():
463 color(s, (text_type,))
464 a(s)
465
466 str.setColorizableTexts(r)
467
468 return str
469
470 def color_paragraphs(self, raw_paragraphs,
471 type=type, sequence_types=(type([]), type(())),
472 st=type('')):
473 result=[]
474 for paragraph in raw_paragraphs:
475 if paragraph.getNodeName() != 'StructuredTextParagraph':
476 result.append(paragraph)
477 continue
478
479 for pt in self.paragraph_types:
480 if type(pt) is st:
481 # grab the corresponding function
482 pt=getattr(self, pt)
483 # evaluate the paragraph
484 r=pt(paragraph)
485 if r:
486 if type(r) not in sequence_types:
487 r=r,
488 new_paragraphs=r
489 for paragraph in new_paragraphs:
490 paragraph.setSubparagraphs(self.color_paragraphs(paragraph.getSubparagraphs()))
491 break
492 else:
493 new_paragraphs=ST.StructuredTextParagraph(paragraph.getColorizableTexts()[0],
494 self.color_paragraphs(paragraph.getSubparagraphs()),
495 indent=paragraph.indent),
496
497 # color the inline StructuredText types
498 # for each StructuredTextParagraph
499 for paragraph in new_paragraphs:
500
501 if paragraph.getNodeName() is "StructuredTextTable":
502 cells = paragraph.getColumns()
503 text = paragraph.getColorizableTexts()
504 text = map(ST.StructuredText,text)
505 text = map(self.__call__,text)
506 for t in range(len(text)):
507 text[t] = text[t].getSubparagraphs()
508 paragraph.setColorizableTexts(text)
509
510 paragraph.setColorizableTexts(
511 map(self.color_text,
512 paragraph.getColorizableTexts()
513 ))
514 result.append(paragraph)
515
516 return result
517
518 def doc_table(self, paragraph, expr = re.compile(r'\s*\|[-]+\|').match):
519 text = paragraph.getColorizableTexts()[0]
520 m = expr(text)
521
522 subs = paragraph.getSubparagraphs()
523
524 if not (m):
525 return None
526 rows = []
527
528 spans = []
529 ROWS = []
530 COLS = []
531 indexes = []
532 ignore = []
533
534 TDdivider = re.compile("[\-]+").match
535 THdivider = re.compile("[\=]+").match
536 col = re.compile('\|').search
537 innertable = re.compile('\|([-]+|[=]+)\|').search
538
539 text = strip(text)
540 rows = split(text,'\n')
541 foo = ""
542
543 for row in range(len(rows)):
544 rows[row] = strip(rows[row])
545
546 # have indexes store if a row is a divider
547 # or a cell part
548 for index in range(len(rows)):
549 tmpstr = rows[index][1:len(rows[index])-1]
550 if TDdivider(tmpstr):
551 indexes.append("TDdivider")
552 elif THdivider(tmpstr):
553 indexes.append("THdivider")
554 else:
555 indexes.append("cell")
556
557 for index in range(len(indexes)):
558 if indexes[index] is "TDdivider" or indexes[index] is THdivider:
559 ignore = [] # reset ignore
560 #continue # skip dividers
561
562 tmp = strip(rows[index]) # clean the row up
563 tmp = tmp[1:len(tmp)-1] # remove leading + trailing |
564 offset = 0
565
566 # find the start and end of inner
567 # tables. ignore everything between
568 if innertable(tmp):
569 tmpstr = strip(tmp)
570 while innertable(tmpstr):
571 start,end = innertable(tmpstr).span()
572 if not (start,end-1) in ignore:
573 ignore.append(start,end-1)
574 tmpstr = " " + tmpstr[end:]
575
576 # find the location of column dividers
577 # NOTE: |'s in inner tables do not count
578 # as column dividers
579 if col(tmp):
580 while col(tmp):
581 bar = 1 # true if start is not in ignore
582 start,end = col(tmp).span()
583
584 if not start+offset in spans:
585 for s,e in ignore:
586 if start+offset >= s or start+offset <= e:
587 bar = None
588 break
589 if bar: # start is clean
590 spans.append(start+offset)
591 if not bar:
592 foo = foo + tmp[:end]
593 tmp = tmp[end:]
594 offset = offset + end
595 else:
596 COLS.append((foo + tmp[0:start],start+offset))
597 foo = ""
598 tmp = " " + tmp[end:]
599 offset = offset + start
600 if not offset+len(tmp) in spans:
601 spans.append(offset+len(tmp))
602 COLS.append((foo + tmp,offset+len(tmp)))
603 foo = ""
604 ROWS.append(COLS)
605 COLS = []
606
607 spans.sort()
608 ROWS = ROWS[1:len(ROWS)]
609
610 # find each column span
611 cols = []
612 tmp = []
613
614 for row in ROWS:
615 for c in row:
616 tmp.append(c[1])
617 cols.append(tmp)
618 tmp = []
619
620 cur = 1
621 tmp = []
622 C = []
623 for col in cols:
624 for span in spans:
625 if not span in col:
626 cur = cur + 1
627 else:
628 tmp.append(cur)
629 cur = 1
630 C.append(tmp)
631 tmp = []
632
633 for index in range(len(C)):
634 for i in range(len(C[index])):
635 ROWS[index][i] = (ROWS[index][i][0],C[index][i])
636 rows = ROWS
637
638 # label things as either TableData or
639 # Table header
640 TD = []
641 TH = []
642 all = []
643 for index in range(len(indexes)):
644 if indexes[index] is "TDdivider":
645 TD.append(index)
646 all.append(index)
647 if indexes[index] is "THdivider":
648 TH.append(index)
649 all.append(index)
650 TD = TD[1:]
651 dividers = all[1:]
652 #print "TD => ", TD
653 #print "TH => ", TH
654 #print "all => ", all, "\n"
655
656 for div in dividers:
657 if div in TD:
658 index = all.index(div)
659 for rowindex in range(all[index-1],all[index]):
660 for i in range(len(rows[rowindex])):
661 rows[rowindex][i] = (rows[rowindex][i][0],
662 rows[rowindex][i][1],
663 "td")
664 else:
665 index = all.index(div)
666 for rowindex in range(all[index-1],all[index]):
667 for i in range(len(rows[rowindex])):
668 rows[rowindex][i] = (rows[rowindex][i][0],
669 rows[rowindex][i][1],
670 "th")
671
672 # now munge the multi-line cells together
673 # as paragraphs
674 ROWS = []
675 COLS = []
676 for row in rows:
677 for index in range(len(row)):
678 if not COLS:
679 COLS = range(len(row))
680 for i in range(len(COLS)):
681 COLS[i] = ["",1,""]
682 if TDdivider(row[index][0]) or THdivider(row[index][0]):
683 ROWS.append(COLS)
684 COLS = []
685 else:
686 COLS[index][0] = COLS[index][0] + (row[index][0]) + "\n"
687 COLS[index][1] = row[index][1]
688 COLS[index][2] = row[index][2]
689
690 # now that each cell has been munged together,
691 # determine the cell's alignment.
692 # Default is to center. Also determine the cell's
693 # vertical alignment, top, middle, bottom. Default is
694 # to middle
695 rows = []
696 cols = []
697 for row in ROWS:
698 for index in range(len(row)):
699 topindent = 0
700 bottomindent = 0
701 leftindent = 0
702 rightindent = 0
703 left = []
704 right = []
705 text = row[index][0]
706 text = split(text,'\n')
707 text = text[:len(text)-1]
708 align = ""
709 valign = ""
710 for t in text:
711 t = strip(t)
712 if not t:
713 topindent = topindent + 1
714 else:
715 break
716 text.reverse()
717 for t in text:
718 t = strip(t)
719 if not t:
720 bottomindent = bottomindent + 1
721 else:
722 break
723 text.reverse()
724 tmp = join(text[topindent:len(text)-bottomindent],"\n")
725 pars = re.compile("\n\s*\n").split(tmp)
726 for par in pars:
727 if index > 0:
728 par = par[1:]
729 par = split(par, ' ')
730 for p in par:
731 if not p:
732 leftindent = leftindent+1
733 else:
734 break
735 left.append(leftindent)
736 leftindent = 0
737 par.reverse()
738 for p in par:
739 if not p:
740 rightindent = rightindent + 1
741 else:
742 break
743 right.append(rightindent)
744 rightindent = 0
745 left.sort()
746 right.sort()
747
748 if topindent == bottomindent:
749 valign="middle"
750 elif topindent < 1:
751 valign="top"
752 elif bottomindent < 1:
753 valign="bottom"
754 else:
755 valign="middle"
756
757 if left[0] < 1:
758 align = "left"
759 elif right[0] < 1:
760 align = "right"
761 elif left[0] > 1 and right[0] > 1:
762 align="center"
763 else:
764 align="left"
765
766 cols.append(row[index][0],row[index][1],align,valign,row[index][2])
767 rows.append(cols)
768 cols = []
769 return StructuredTextTable(rows,text,subs,indent=paragraph.indent)
770
771 def doc_bullet(self, paragraph, expr = re.compile(r'\s*[-*o]\s+').match):
772 top=paragraph.getColorizableTexts()[0]
773 m=expr(top)
774
775 if not m:
776 return None
777
778 subs=paragraph.getSubparagraphs()
779 if top[-2:]=='::':
780 subs=[StructuredTextExample(subs)]
781 top=top[:-1]
782 return StructuredTextBullet(top[m.span()[1]:], subs,
783 indent=paragraph.indent,
784 bullet=top[:m.span()[1]]
785 )
786
787 def doc_numbered(
788 self, paragraph,
789 expr = re.compile(r'(\s*[%s]+\.)|(\s*[0-9]+\.)|(\s*[0-9]+\s+)' % letters).match):
790
791 # This is the old expression. It had a nasty habit
792 # of grabbing paragraphs that began with a single
793 # letter word even if there was no following period.
794
795 #expr = re.compile('\s*'
796 # '(([a-zA-Z]|[0-9]+|[ivxlcdmIVXLCDM]+)\.)*'
797 # '([a-zA-Z]|[0-9]+|[ivxlcdmIVXLCDM]+)\.?'
798 # '\s+').match):
799
800 top=paragraph.getColorizableTexts()[0]
801 m=expr(top)
802 if not m: return None
803 subs=paragraph.getSubparagraphs()
804 if top[-2:]=='::':
805 subs=[StructuredTextExample(subs)]
806 top=top[:-1]
807 return StructuredTextNumbered(top[m.span()[1]:], subs,
808 indent=paragraph.indent,
809 number=top[:m.span()[1]])
810
811 def doc_description(
812 self, paragraph,
813 delim = re.compile(r'\s+--\s+').search,
814 nb=re.compile(r'[^\000- ]').search,
815 ):
816
817 top=paragraph.getColorizableTexts()[0]
818 d=delim(top)
819 if not d: return None
820 start, end = d.span()
821 title=top[:start]
822 if find(title, '\n') >= 0: return None
823 if not nb(title): return None
824 d=top[start:end]
825 top=top[end:]
826
827 subs=paragraph.getSubparagraphs()
828 if top[-2:]=='::':
829 subs=[StructuredTextExample(subs)]
830 top=top[:-1]
831
832 return StructuredTextDescription(
833 title, top, subs,
834 indent=paragraph.indent,
835 delim=d)
836
837 def doc_header(self, paragraph,
838 expr = re.compile(r'[ %s0-9.:/,-_*<>\?\'\"]+' % letters).match
839 ):
840 subs=paragraph.getSubparagraphs()
841 if not subs: return None
842 top=paragraph.getColorizableTexts()[0]
843 if not strip(top): return None
844 if top[-2:]=='::':
845 subs=StructuredTextExample(subs)
846 if strip(top)=='::': return subs
847 return ST.StructuredTextParagraph(
848 top[:-1], [subs], indent=paragraph.indent)
849
850 if find(top,'\n') >= 0: return None
851 return StructuredTextSection(top, subs, indent=paragraph.indent)
852
853 def doc_literal(
854 self, s,
855 expr=re.compile(
856 r"(?:\s|^)'" # open
857 r"([^ \t\n\r\f\v']|[^ \t\n\r\f\v'][^\n']*[^ \t\n\r\f\v'])" # contents
858 r"'(?:\s|[,.;:!?]|$)" # close
859 ).search):
860
861 r=expr(s)
862 if r:
863 start, end = r.span(1)
864 return (StructuredTextLiteral(s[start:end]), start-1, end+1)
865 else:
866 return None
867
868 def doc_emphasize(
869 self, s,
870 expr = re.compile(r'\s*\*([ \n%s0-9]+)\*(?!\*|-)' % lettpunc).search
871 ):
872
873 r=expr(s)
874 if r:
875 start, end = r.span(1)
876 return (StructuredTextEmphasis(s[start:end]), start-1, end+1)
877 else:
878 return None
879
880 def doc_inner_link(self,
881 s,
882 expr1 = re.compile(r"\.\.\s*").search,
883 expr2 = re.compile(r"\[[%s0-9]+\]" % letters ).search):
884
885 # make sure we dont grab a named link
886 if expr2(s) and expr1(s):
887 start1,end1 = expr1(s).span()
888 start2,end2 = expr2(s).span()
889 if end1 == start2:
890 # uh-oh, looks like a named link
891 return None
892 else:
893 # the .. is somewhere else, ignore it
894 return (StructuredTextInnerLink(s[start2+1,end2-1],start2,end2))
895 return None
896 elif expr2(s) and not expr1(s):
897 start,end = expr2(s).span()
898 return (StructuredTextInnerLink(s[start+1:end-1]),start,end)
899 return None
900
901 def doc_named_link(self,
902 s,
903 expr=re.compile(r"(\.\.\s)(\[[%s0-9]+\])" % letters).search):
904
905 result = expr(s)
906 if result:
907 start,end = result.span(2)
908 a,b = result.span(1)
909 str = strip(s[a:b]) + s[start:end]
910 st,en = result.span()
911 return (StructuredTextNamedLink(str),st,en)
912 #return (StructuredTextNamedLink(s[st:en]),st,en)
913 return None
914
915 def doc_underline(self,
916 s,
917 expr=re.compile(r"\s+\_([%s0-9\s]+)\_" % lettpunc).search):
918
919 result = expr(s)
920 if result:
921 start,end = result.span(1)
922 st,e = result.span()
923 return (StructuredTextUnderline(s[start:end]),st,e)
924 else:
925 return None
926
927 def doc_strong(self,
928 s,
929 expr = re.compile(r'\s*\*\*([ \n%s0-9]+)\*\*' % lettpunc).search
930 ):
931
932 r=expr(s)
933 if r:
934 start, end = r.span(1)
935 return (StructuredTextStrong(s[start:end]), start-2, end+2)
936 else:
937 return None
938
939 ## Some constants to make the doc_href() regex easier to read.
940 _DQUOTEDTEXT = r'("[%s0-9\n\-\.\,\;\(\)\/\:\/\*\']+")' % letters ## double quoted text
941 _URL_AND_PUNC = r'([%s0-9\@\.\,\?\!\/\:\;\-\#\~]+)' % letters
942 _SPACES = r'(\s*)'
943
944 def doc_href(self, s,
945 expr1 = re.compile(_DQUOTEDTEXT + "(:)" + _URL_AND_PUNC + _SPACES).search,
946 expr2 = re.compile(_DQUOTEDTEXT + r'(\,\s+)' + _URL_AND_PUNC + _SPACES).search):
947
948 punctuation = re.compile(r"[\,\.\?\!\;]+").match
949 r=expr1(s) or expr2(s)
950
951 if r:
952 # need to grab the href part and the
953 # beginning part
954
955 start,e = r.span(1)
956 name = s[start:e]
957 name = replace(name,'"','',2)
958 #start = start + 1
959 st,end = r.span(3)
960 if punctuation(s[end-1:end]):
961 end = end -1
962 link = s[st:end]
963 #end = end - 1
964
965 # name is the href title, link is the target
966 # of the href
967 return (StructuredTextLink(name, href=link),
968 start, end)
969
970 #return (StructuredTextLink(s[start:end], href=s[start:end]),
971 # start, end)
972 else:
973 return None
974
975 def doc_sgml(self,s,expr=re.compile(r"\<[%s0-9\.\=\'\"\:\/\-\#\+\s\*]+\>" % letters).search):
976 """
977 SGML text is ignored and outputed as-is
978 """
979 r = expr(s)
980 if r:
981 start,end = r.span()
982 text = s[start:end]
983 return (StructuredTextSGML(text),start,end)
984
985
986 def doc_xref(self, s,
987 expr = re.compile('\[([%s0-9\-.:/;,\n\~]+)\]' % letters).search
988 ):
989 r = expr(s)
990 if r:
991 start, end = r.span(1)
992 return (StructuredTextXref(s[start:end]), start-1, end+1)
993 else:
994 return None
995
996
997
998