]> git.saurik.com Git - wxWidgets.git/blobdiff - wxPython/samples/stxview/StructuredText/DocumentClass.py
Replaced /'s with \'s as BCC requires \'s for path names
[wxWidgets.git] / wxPython / samples / stxview / StructuredText / DocumentClass.py
index ec6dc402a4febacf0d6e72f94091e80559e92910..405f35e25d27b0efeaae699cd8e7a582152d964f 100644 (file)
 
 import re, ST, STDOM
 from string import split, join, replace, expandtabs, strip, find, rstrip
 
 import re, ST, STDOM
 from string import split, join, replace, expandtabs, strip, find, rstrip
+from STletters import *
+
 
 StringType=type('')
 ListType=type([])
 
 
 StringType=type('')
 ListType=type([])
 
+def flatten(obj, append):
+   if obj.getNodeType()==STDOM.TEXT_NODE:
+      append(obj.getNodeValue())
+   else:
+      for child in obj.getChildNodes():
+         flatten(child, append)
+
+
 class StructuredTextExample(ST.StructuredTextParagraph):
     """Represents a section of document with literal text, as for examples"""
 
     def __init__(self, subs, **kw):
 class StructuredTextExample(ST.StructuredTextParagraph):
     """Represents a section of document with literal text, as for examples"""
 
     def __init__(self, subs, **kw):
-       t=[]; a=t.append
-       for s in subs: a(s.getNodeValue())
-       apply(ST.StructuredTextParagraph.__init__,
-             (self, join(t,'\n\n'), ()),
-             kw)
+        t=[]
+        a=t.append
+        for s in subs:
+            flatten(s, a)
+        apply(ST.StructuredTextParagraph.__init__,
+              (self, join(t,'\n\n'), ()),
+              kw)
 
     def getColorizableTexts(self): return ()
     def setColorizableTexts(self, src): pass # never color examples
 
     def getColorizableTexts(self): return ()
     def setColorizableTexts(self, src): pass # never color examples
@@ -137,9 +149,15 @@ class StructuredTextSection(ST.StructuredTextParagraph):
        apply(ST.StructuredTextParagraph.__init__,
              (self, StructuredTextSectionTitle(src), subs),
              kw)
        apply(ST.StructuredTextParagraph.__init__,
              (self, StructuredTextSectionTitle(src), subs),
              kw)
-
+    
+    def getColorizableTexts(self):
+        return self._src.getColorizableTexts()
+    
+    def setColorizableTexts(self,src):
+        self._src.setColorizableTexts(src)
+        
 # a StructuredTextTable holds StructuredTextRows
 # a StructuredTextTable holds StructuredTextRows
-class StructuredTextTable(ST.StructuredTextDocument):
+class StructuredTextTable(ST.StructuredTextParagraph):
     """
     rows is a list of lists containing tuples, which
     represent the columns/cells in each rows.
     """
     rows is a list of lists containing tuples, which
     represent the columns/cells in each rows.
@@ -148,7 +166,7 @@ class StructuredTextTable(ST.StructuredTextDocument):
     """
     
     def __init__(self, rows, src, subs, **kw):
     """
     
     def __init__(self, rows, src, subs, **kw):
-        apply(ST.StructuredTextDocument.__init__,(self,subs),kw)
+        apply(ST.StructuredTextParagraph.__init__,(self,subs),kw)
         self._rows = []
         for row in rows:
             if row:
         self._rows = []
         for row in rows:
             if row:
@@ -208,34 +226,40 @@ class StructuredTextTable(ST.StructuredTextDocument):
         return self.setColorizableTexts()
     
 # StructuredTextRow holds StructuredTextColumns
         return self.setColorizableTexts()
     
 # StructuredTextRow holds StructuredTextColumns
-class StructuredTextRow(ST.StructuredTextDocument):
+class StructuredTextRow(ST.StructuredTextParagraph):
     
     def __init__(self,row,kw):
         """
         row is a list of tuples, where each tuple is
         the raw text for a cell/column and the span
     
     def __init__(self,row,kw):
         """
         row is a list of tuples, where each tuple is
         the raw text for a cell/column and the span
-        of that cell/column"
+        of that cell/column. 
         EX 
         [('this is column one',1), ('this is column two',1)]
         """
         
         EX 
         [('this is column one',1), ('this is column two',1)]
         """
         
-        apply(ST.StructuredTextDocument.__init__,(self,[]),kw)
+        apply(ST.StructuredTextParagraph.__init__,(self,[]),kw)
+        
         self._columns = []
         self._columns = []
-        for column in row:            
-            self._columns.append(StructuredTextColumn(column[0],column[1],kw))
-
+        for column in row:
+            self._columns.append(StructuredTextColumn(column[0],
+                                                      column[1],
+                                                      column[2],
+                                                      column[3],
+                                                      column[4],
+                                                      kw))
+    
     def getColumns(self):
         return [self._columns]
     def getColumns(self):
         return [self._columns]
-
+    
     def _getColumns(self):
         return [self._columns]
     
     def setColumns(self,columns):
         self._columns = columns
     def _getColumns(self):
         return [self._columns]
     
     def setColumns(self,columns):
         self._columns = columns
-        
+    
     def _setColumns(self,columns):
         return self.setColumns(columns)
     def _setColumns(self,columns):
         return self.setColumns(columns)
-
+    
 # this holds the text of a table cell
 class StructuredTextColumn(ST.StructuredTextParagraph):
     """
 # this holds the text of a table cell
 class StructuredTextColumn(ST.StructuredTextParagraph):
     """
@@ -245,20 +269,40 @@ class StructuredTextColumn(ST.StructuredTextParagraph):
     or StructuredTextTableData.
     """
     
     or StructuredTextTableData.
     """
     
-    def __init__(self,text,span,kw):
-        # print "StructuredTextColumn", text, span
+    def __init__(self,text,span,align,valign,typ,kw):
         apply(ST.StructuredTextParagraph.__init__,(self,text,[]),kw)
         self._span = span
         apply(ST.StructuredTextParagraph.__init__,(self,text,[]),kw)
         self._span = span
+        self._align = align
+        self._valign = valign
+        self._type = typ
     
     def getSpan(self):
         return self._span
     
     def _getSpan(self):
         return self._span
     
     def getSpan(self):
         return self._span
     
     def _getSpan(self):
         return self._span
+    
+    def getAlign(self):
+        return self._align
+    
+    def _getAlign(self):
+        return self.getAlign()
+    
+    def getValign(self):
+        return self._valign
+    
+    def _getValign(self):
+        return self.getValign()
+    
+    def getType(self):
+        return self._type
+    
+    def _getType(self):
+        return self.getType()
+    
+class StructuredTextTableHeader(ST.StructuredTextParagraph): pass
 
 
-class StructuredTextTableHeader(ST.StructuredTextDocument): pass
-
-class StructuredTextTableData(ST.StructuredTextDocument): pass
+class StructuredTextTableData(ST.StructuredTextParagraph): pass
 
 class StructuredTextMarkup(STDOM.Element):
     
 
 class StructuredTextMarkup(STDOM.Element):
     
@@ -266,22 +310,22 @@ class StructuredTextMarkup(STDOM.Element):
        self._value=v
        self._attributes=kw.keys()
        for k, v in kw.items(): setattr(self, k, v)
        self._value=v
        self._attributes=kw.keys()
        for k, v in kw.items(): setattr(self, k, v)
-
+    
     def getChildren(self, type=type, lt=type([])):
        v=self._value
        if type(v) is not lt: v=[v]
        return v
     def getChildren(self, type=type, lt=type([])):
        v=self._value
        if type(v) is not lt: v=[v]
        return v
-
+    
     def getColorizableTexts(self): return self._value,
     def setColorizableTexts(self, v): self._value=v[0]
     def getColorizableTexts(self): return self._value,
     def setColorizableTexts(self, v): self._value=v[0]
-
+    
     def __repr__(self):
        return '%s(%s)' % (self.__class__.__name__, `self._value`)
     def __repr__(self):
        return '%s(%s)' % (self.__class__.__name__, `self._value`)
-
+    
 class StructuredTextLiteral(StructuredTextMarkup):
     def getColorizableTexts(self): return ()
     def setColorizableTexts(self, v): pass
 class StructuredTextLiteral(StructuredTextMarkup):
     def getColorizableTexts(self): return ()
     def setColorizableTexts(self, v): pass
-
+    
 class StructuredTextEmphasis(StructuredTextMarkup): pass
 
 class StructuredTextStrong(StructuredTextMarkup): pass
 class StructuredTextEmphasis(StructuredTextMarkup): pass
 
 class StructuredTextStrong(StructuredTextMarkup): pass
@@ -294,9 +338,11 @@ class StructuredTextUnderline(StructuredTextMarkup): pass
 
 class StructuredTextSGML(StructuredTextMarkup): pass
 
 
 class StructuredTextSGML(StructuredTextMarkup): pass
 
-class StructuredTextLink(StructuredTextMarkup): pass    
+class StructuredTextLink(StructuredTextMarkup): pass
+
+class StructuredTextXref(StructuredTextMarkup): pass
 
 
-class DocumentClass:    
+class DocumentClass:
     """
     Class instance calls [ex.=> x()] require a structured text
     structure. Doc will then parse each paragraph in the structure
     """
     Class instance calls [ex.=> x()] require a structured text
     structure. Doc will then parse each paragraph in the structure
@@ -309,7 +355,6 @@ class DocumentClass:
     instance with a strong instance stored in its string
     """
     
     instance with a strong instance stored in its string
     """
     
-    #'doc_table',
     paragraph_types  = [
         'doc_bullet',
         'doc_numbered',
     paragraph_types  = [
         'doc_bullet',
         'doc_numbered',
@@ -322,13 +367,15 @@ class DocumentClass:
     #'doc_named_link',
     #'doc_underline',
     text_types = [
     #'doc_named_link',
     #'doc_underline',
     text_types = [
+        'doc_sgml',
         'doc_href',
         'doc_strong',
         'doc_emphasize',
         'doc_literal',
         'doc_href',
         'doc_strong',
         'doc_emphasize',
         'doc_literal',
-        'doc_sgml'
+        'doc_sgml',
+        'doc_xref',
         ]
         ]
-
+    
     def __call__(self, doc):
         if type(doc) is type(''):
            doc=ST.StructuredText(doc)
     def __call__(self, doc):
         if type(doc) is type(''):
            doc=ST.StructuredText(doc)
@@ -338,10 +385,10 @@ class DocumentClass:
            doc=ST.StructuredTextDocument(self.color_paragraphs(
               doc.getSubparagraphs()))
         return doc
            doc=ST.StructuredTextDocument(self.color_paragraphs(
               doc.getSubparagraphs()))
         return doc
-
+    
     def parse(self, raw_string, text_type,
               type=type, st=type(''), lt=type([])):
     def parse(self, raw_string, text_type,
               type=type, st=type(''), lt=type([])):
-
+        
        """
        Parse accepts a raw_string, an expr to test the raw_string,
        and the raw_string's subparagraphs.
        """
        Parse accepts a raw_string, an expr to test the raw_string,
        and the raw_string's subparagraphs.
@@ -425,7 +472,6 @@ class DocumentClass:
                            st=type('')):
        result=[]
        for paragraph in raw_paragraphs:
                            st=type('')):
        result=[]
        for paragraph in raw_paragraphs:
-          #print type(paragraph)
           if paragraph.getNodeName() != 'StructuredTextParagraph':
              result.append(paragraph)
              continue
           if paragraph.getNodeName() != 'StructuredTextParagraph':
              result.append(paragraph)
              continue
@@ -445,23 +491,22 @@ class DocumentClass:
                 break
           else:
              new_paragraphs=ST.StructuredTextParagraph(paragraph.getColorizableTexts()[0],
                 break
           else:
              new_paragraphs=ST.StructuredTextParagraph(paragraph.getColorizableTexts()[0],
-                                                          self.color_paragraphs(paragraph.getSubparagraphs()),
-                                                          indent=paragraph.indent),
+                                                       self.color_paragraphs(paragraph.getSubparagraphs()),
+                                                       indent=paragraph.indent),
         
           # color the inline StructuredText types
           # for each StructuredTextParagraph
           for paragraph in new_paragraphs:
              
              if paragraph.getNodeName() is "StructuredTextTable":
         
           # color the inline StructuredText types
           # for each StructuredTextParagraph
           for paragraph in new_paragraphs:
              
              if paragraph.getNodeName() is "StructuredTextTable":
-                #print "we have a table"
                 cells = paragraph.getColumns()
                 text = paragraph.getColorizableTexts()
                 text = map(ST.StructuredText,text)
                 text = map(self.__call__,text)
                 cells = paragraph.getColumns()
                 text = paragraph.getColorizableTexts()
                 text = map(ST.StructuredText,text)
                 text = map(self.__call__,text)
-                #for index in range(len(text)):
-                #    text[index].setColorizableTexts(map(self.color_text,text[index].getColorizableTexts()))
+                for t in range(len(text)):
+                    text[t] = text[t].getSubparagraphs()
                 paragraph.setColorizableTexts(text)
                 paragraph.setColorizableTexts(text)
-                        
+                
              paragraph.setColorizableTexts(
                 map(self.color_text,
                     paragraph.getColorizableTexts()
              paragraph.setColorizableTexts(
                 map(self.color_text,
                     paragraph.getColorizableTexts()
@@ -470,7 +515,7 @@ class DocumentClass:
 
        return result
     
 
        return result
     
-    def doc_table(self, paragraph, expr = re.compile('\s*\|[-]+\|').match):
+    def doc_table(self, paragraph, expr = re.compile(r'\s*\|[-]+\|').match):
         text    = paragraph.getColorizableTexts()[0]
         m       = expr(text)
         
         text    = paragraph.getColorizableTexts()[0]
         m       = expr(text)
         
@@ -479,58 +524,102 @@ class DocumentClass:
         if not (m):
             return None
         rows = []
         if not (m):
             return None
         rows = []
-                                                                
-        rows = split(text,'\n')        
-        
+                
         spans   = []
         ROWS    = []
         COLS    = []
         spans   = []
         ROWS    = []
         COLS    = []
-    
-        TDdivider = re.compile("[\-]+").match
-        THdivider = re.compile("[\=]+").match
-    
-        # find where the column markers are located
-        col = re.compile('\|').search
+        indexes = []
+        ignore  = []
+        
+        TDdivider   = re.compile("[\-]+").match
+        THdivider   = re.compile("[\=]+").match
+        col         = re.compile('\|').search
+        innertable  = re.compile('\|([-]+|[=]+)\|').search
+        
         text = strip(text)
         rows = split(text,'\n')
         text = strip(text)
         rows = split(text,'\n')
+        foo  = ""
+        
         for row in range(len(rows)):
             rows[row] = strip(rows[row])
         for row in range(len(rows)):
             rows[row] = strip(rows[row])
-    
-        for row in rows:
-            tmp = strip(row)
-            tmp = row[1:len(tmp)-1] # remove leading and trailing |
-            offset = 0
+        
+        # have indexes store if a row is a divider
+        # or a cell part
+        for index in range(len(rows)):
+            tmpstr = rows[index][1:len(rows[index])-1]
+            if TDdivider(tmpstr):
+                indexes.append("TDdivider")
+            elif THdivider(tmpstr):
+                indexes.append("THdivider")
+            else:
+                indexes.append("cell")
+
+        for index in range(len(indexes)):
+            if indexes[index] is "TDdivider" or indexes[index] is THdivider:
+                ignore = [] # reset ignore
+                #continue    # skip dividers
+
+            tmp     = strip(rows[index])    # clean the row up
+            tmp     = tmp[1:len(tmp)-1]     # remove leading + trailing |
+            offset  = 0
+
+            # find the start and end of inner
+            # tables. ignore everything between
+            if innertable(tmp):
+                tmpstr = strip(tmp)
+                while innertable(tmpstr):
+                    start,end   = innertable(tmpstr).span()
+                    if not (start,end-1) in ignore:
+                        ignore.append(start,end-1)
+                    tmpstr = " " + tmpstr[end:]
+
+            # find the location of column dividers
+            # NOTE: |'s in inner tables do not count
+            #   as column dividers
             if col(tmp):
                 while col(tmp):
             if col(tmp):
                 while col(tmp):
-                    start,end = col(tmp).span()
+                    bar         = 1   # true if start is not in ignore
+                    start,end   = col(tmp).span()
+
                     if not start+offset in spans:
                     if not start+offset in spans:
-                        spans.append(start + offset)
-                    COLS.append((tmp[0:start],start+offset))
-                    tmp = " " + tmp[end:]
-                    offset = offset + (start)
+                        for s,e in ignore:
+                            if start+offset >= s or start+offset <= e:
+                                bar = None
+                                break
+                        if bar:   # start is clean
+                            spans.append(start+offset)
+                    if not bar:
+                        foo = foo + tmp[:end]
+                        tmp = tmp[end:]
+                        offset = offset + end
+                    else:
+                        COLS.append((foo + tmp[0:start],start+offset))
+                        foo = ""
+                        tmp = " " + tmp[end:]
+                        offset = offset + start
             if not offset+len(tmp) in spans:
                 spans.append(offset+len(tmp))
             if not offset+len(tmp) in spans:
                 spans.append(offset+len(tmp))
-            COLS.append((tmp,offset+len(tmp)))
+            COLS.append((foo + tmp,offset+len(tmp)))
+            foo = ""
             ROWS.append(COLS)
             COLS = []
             ROWS.append(COLS)
             COLS = []
-    
-        spans.sort()
-    
-        ROWS = ROWS[1:len(ROWS)]        
         
         
+        spans.sort()
+        ROWS = ROWS[1:len(ROWS)]
+
         # find each column span
         cols    = []
         tmp     = []
         # find each column span
         cols    = []
         tmp     = []
-    
+        
         for row in ROWS:
             for c in row:
                 tmp.append(c[1])
             cols.append(tmp)
             tmp = []
         for row in ROWS:
             for c in row:
                 tmp.append(c[1])
             cols.append(tmp)
             tmp = []
-    
-        cur = 1     # the current column span
-        tmp = []    
-        C   = []    # holds the span of each cell
+        
+        cur = 1
+        tmp = []
+        C   = []
         for col in cols:
             for span in spans:
                 if not span in col:
         for col in cols:
             for span in spans:
                 if not span in col:
@@ -541,14 +630,47 @@ class DocumentClass:
             C.append(tmp)
             tmp = []
         
             C.append(tmp)
             tmp = []
         
-        # make rows contain the cell's text and the span
-        # of that cell
         for index in range(len(C)):
             for i in range(len(C[index])):
                 ROWS[index][i] = (ROWS[index][i][0],C[index][i])
         rows = ROWS
         
         for index in range(len(C)):
             for i in range(len(C[index])):
                 ROWS[index][i] = (ROWS[index][i][0],C[index][i])
         rows = ROWS
         
-        # now munge the table cells together
+        # label things as either TableData or
+        # Table header
+        TD  = []
+        TH  = []
+        all = []
+        for index in range(len(indexes)):
+            if indexes[index] is "TDdivider":
+                TD.append(index)
+                all.append(index)
+            if indexes[index] is "THdivider":
+                TH.append(index)
+                all.append(index)
+        TD = TD[1:]
+        dividers = all[1:]
+        #print "TD  => ", TD
+        #print "TH  => ", TH
+        #print "all => ", all, "\n"
+        
+        for div in dividers:
+            if div in TD:
+                index = all.index(div)
+                for rowindex in range(all[index-1],all[index]):                    
+                    for i in range(len(rows[rowindex])):
+                        rows[rowindex][i] = (rows[rowindex][i][0],
+                                             rows[rowindex][i][1],
+                                             "td")
+            else:
+                index = all.index(div)
+                for rowindex in range(all[index-1],all[index]):
+                    for i in range(len(rows[rowindex])):
+                        rows[rowindex][i] = (rows[rowindex][i][0],
+                                             rows[rowindex][i][1],
+                                             "th")
+        
+        # now munge the multi-line cells together
+        # as paragraphs
         ROWS    = []
         COLS    = []
         for row in rows:
         ROWS    = []
         COLS    = []
         for row in rows:
@@ -556,16 +678,97 @@ class DocumentClass:
                 if not COLS:
                     COLS = range(len(row))
                     for i in range(len(COLS)):
                 if not COLS:
                     COLS = range(len(row))
                     for i in range(len(COLS)):
-                        COLS[i] = ["",1]
+                        COLS[i] = ["",1,""]
                 if TDdivider(row[index][0]) or THdivider(row[index][0]):
                     ROWS.append(COLS)
                     COLS = []
                 else:
                 if TDdivider(row[index][0]) or THdivider(row[index][0]):
                     ROWS.append(COLS)
                     COLS = []
                 else:
-                    COLS[index][0] = COLS[index][0] + rstrip(row[index][0]) + "\n"
+                    COLS[index][0] = COLS[index][0] + (row[index][0]) + "\n"
                     COLS[index][1] = row[index][1]
                     COLS[index][1] = row[index][1]
-        return StructuredTextTable(ROWS,text,subs,indent=paragraph.indent)
+                    COLS[index][2] = row[index][2]
+        
+        # now that each cell has been munged together,
+        # determine the cell's alignment.
+        # Default is to center. Also determine the cell's
+        # vertical alignment, top, middle, bottom. Default is
+        # to middle
+        rows = []
+        cols = []
+        for row in ROWS:
+            for index in range(len(row)):
+                topindent       = 0
+                bottomindent    = 0
+                leftindent      = 0
+                rightindent     = 0
+                left            = []
+                right           = []                                    
+                text            = row[index][0]
+                text            = split(text,'\n')
+                text            = text[:len(text)-1]
+                align           = ""
+                valign          = ""
+                for t in text:
+                    t = strip(t)
+                    if not t:
+                        topindent = topindent + 1
+                    else:
+                        break
+                text.reverse()
+                for t in text:
+                    t = strip(t)
+                    if not t:
+                        bottomindent = bottomindent + 1
+                    else:
+                        break
+                text.reverse()
+                tmp   = join(text[topindent:len(text)-bottomindent],"\n")
+                pars  = re.compile("\n\s*\n").split(tmp)
+                for par in pars:
+                    if index > 0:
+                        par = par[1:]
+                    par = split(par, ' ')
+                    for p in par:
+                        if not p:
+                            leftindent = leftindent+1
+                        else:
+                            break
+                    left.append(leftindent)
+                    leftindent = 0
+                    par.reverse()
+                    for p in par:
+                        if not p:
+                            rightindent = rightindent + 1
+                        else:
+                            break
+                    right.append(rightindent)
+                    rightindent = 0
+                left.sort()
+                right.sort()
+
+                if topindent == bottomindent:
+                    valign="middle"
+                elif topindent < 1:
+                    valign="top"
+                elif bottomindent < 1:
+                    valign="bottom"
+                else:
+                    valign="middle"
+
+                if left[0] < 1:
+                    align = "left"
+                elif right[0] < 1:
+                    align = "right"
+                elif left[0] > 1 and right[0] > 1:
+                    align="center"
+                else:
+                    align="left"
+                
+                cols.append(row[index][0],row[index][1],align,valign,row[index][2])
+            rows.append(cols)
+            cols = []
+        return StructuredTextTable(rows,text,subs,indent=paragraph.indent)
             
             
-    def doc_bullet(self, paragraph, expr = re.compile('\s*[-*o]\s+').match):
+    def doc_bullet(self, paragraph, expr = re.compile(r'\s*[-*o]\s+').match):
         top=paragraph.getColorizableTexts()[0]
         m=expr(top)
 
         top=paragraph.getColorizableTexts()[0]
         m=expr(top)
 
@@ -583,7 +786,7 @@ class DocumentClass:
 
     def doc_numbered(
         self, paragraph,
 
     def doc_numbered(
         self, paragraph,
-        expr = re.compile('(\s*[a-zA-Z]+\.)|(\s*[0-9]+\.)|(\s*[0-9]+\s+)').match):
+        expr = re.compile(r'(\s*[%s]+\.)|(\s*[0-9]+\.)|(\s*[0-9]+\s+)' % letters).match):
         
         # This is the old expression. It had a nasty habit
         # of grabbing paragraphs that began with a single
         
         # This is the old expression. It had a nasty habit
         # of grabbing paragraphs that began with a single
@@ -607,8 +810,8 @@ class DocumentClass:
 
     def doc_description(
         self, paragraph,
 
     def doc_description(
         self, paragraph,
-        delim = re.compile('\s+--\s+').search,
-        nb=re.compile(r'[^\0- ]').search,
+        delim = re.compile(r'\s+--\s+').search,
+        nb=re.compile(r'[^\000- ]').search,
         ):
 
         top=paragraph.getColorizableTexts()[0]
         ):
 
         top=paragraph.getColorizableTexts()[0]
@@ -632,7 +835,7 @@ class DocumentClass:
            delim=d)
 
     def doc_header(self, paragraph,
            delim=d)
 
     def doc_header(self, paragraph,
-                    expr    = re.compile('[ a-zA-Z0-9.:/,-_*<>\?\'\"]+').match
+                    expr    = re.compile(r'[ %s0-9.:/,-_*<>\?\'\"]+' % letters).match
                     ):
         subs=paragraph.getSubparagraphs()
         if not subs: return None
                     ):
         subs=paragraph.getSubparagraphs()
         if not subs: return None
@@ -650,9 +853,9 @@ class DocumentClass:
     def doc_literal(
         self, s,
         expr=re.compile(
     def doc_literal(
         self, s,
         expr=re.compile(
-          "(?:\s|^)'"                                                  # open
-          "([^ \t\n\r\f\v']|[^ \t\n\r\f\v'][^\n']*[^ \t\n\r\f\v'])" # contents
-          "'(?:\s|[,.;:!?]|$)"                                        # close
+          r"(?:\s|^)'"                                                  # open
+          r"([^ \t\n\r\f\v']|[^ \t\n\r\f\v'][^\n']*[^ \t\n\r\f\v'])" # contents
+          r"'(?:\s|[,.;:!?]|$)"                                        # close
           ).search):
         
         r=expr(s)
           ).search):
         
         r=expr(s)
@@ -664,7 +867,7 @@ class DocumentClass:
 
     def doc_emphasize(
         self, s,
 
     def doc_emphasize(
         self, s,
-        expr = re.compile('\s*\*([ \na-zA-Z0-9.:/;,\'\"\?]+)\*(?!\*|-)').search
+        expr = re.compile(r'\s*\*([ \n%s0-9]+)\*(?!\*|-)' % lettpunc).search
         ):
 
         r=expr(s)
         ):
 
         r=expr(s)
@@ -676,8 +879,8 @@ class DocumentClass:
     
     def doc_inner_link(self,
                        s,
     
     def doc_inner_link(self,
                        s,
-                       expr1 = re.compile("\.\.\s*").search,
-                       expr2 = re.compile("\[[a-zA-Z0-9]+\]").search):
+                       expr1 = re.compile(r"\.\.\s*").search,
+                       expr2 = re.compile(r"\[[%s0-9]+\]" % letters ).search):
         
         # make sure we dont grab a named link
         if expr2(s) and expr1(s):
         
         # make sure we dont grab a named link
         if expr2(s) and expr1(s):
@@ -697,7 +900,7 @@ class DocumentClass:
     
     def doc_named_link(self,
                        s,
     
     def doc_named_link(self,
                        s,
-                       expr=re.compile("(\.\.\s)(\[[a-zA-Z0-9]+\])").search):
+                       expr=re.compile(r"(\.\.\s)(\[[%s0-9]+\])" % letters).search):
         
         result = expr(s)
         if result:
         
         result = expr(s)
         if result:
@@ -711,7 +914,7 @@ class DocumentClass:
     
     def doc_underline(self,
                       s,
     
     def doc_underline(self,
                       s,
-                      expr=re.compile("\_([a-zA-Z0-9\s\.,\?]+)\_").search):
+                      expr=re.compile(r"\s+\_([%s0-9\s]+)\_" % lettpunc).search):
         
         result = expr(s)
         if result:
         
         result = expr(s)
         if result:
@@ -723,7 +926,7 @@ class DocumentClass:
     
     def doc_strong(self, 
                    s,
     
     def doc_strong(self, 
                    s,
-        expr = re.compile('\s*\*\*([ \na-zA-Z0-9.:/;\-,!\?\'\"]+)\*\*').search
+        expr = re.compile(r'\s*\*\*([ \n%s0-9]+)\*\*' % lettpunc).search
         ):
 
         r=expr(s)
         ):
 
         r=expr(s)
@@ -732,14 +935,17 @@ class DocumentClass:
            return (StructuredTextStrong(s[start:end]), start-2, end+2)
         else:
            return None
            return (StructuredTextStrong(s[start:end]), start-2, end+2)
         else:
            return None
+
+    ## Some constants to make the doc_href() regex easier to read.
+    _DQUOTEDTEXT = r'("[%s0-9\n\-\.\,\;\(\)\/\:\/\*\']+")'  % letters ## double quoted text
+    _URL_AND_PUNC = r'([%s0-9\@\.\,\?\!\/\:\;\-\#\~]+)' % letters 
+    _SPACES = r'(\s*)'
     
     
-    def doc_href(
-        
-        self, s,
-        expr1 = re.compile("(\"[ a-zA-Z0-9\n\-\.\,\;\(\)\/\:\/]+\")(:)([a-zA-Z0-9\:\/\.\~\-]+)([,]*\s*)").search,
-        expr2 = re.compile('(\"[ a-zA-Z0-9\n\-\.\:\;\(\)\/]+\")([,]+\s+)([a-zA-Z0-9\@\.\,\?\!\/\:\;\-\#]+)(\s*)').search):
+    def doc_href(self, s,
+                 expr1 = re.compile(_DQUOTEDTEXT + "(:)" + _URL_AND_PUNC + _SPACES).search,
+                 expr2 = re.compile(_DQUOTEDTEXT + r'(\,\s+)' + _URL_AND_PUNC + _SPACES).search):
         
         
-        punctuation = re.compile("[\,\.\?\!\;]+").match
+        punctuation = re.compile(r"[\,\.\?\!\;]+").match
         r=expr1(s) or expr2(s)
 
         if r:
         r=expr1(s) or expr2(s)
 
         if r:
@@ -766,7 +972,7 @@ class DocumentClass:
         else:
             return None
     
         else:
             return None
     
-    def doc_sgml(self,s,expr=re.compile("\<[a-zA-Z0-9\.\=\'\"\:\/\-\#\+\s]+\>").search):
+    def doc_sgml(self,s,expr=re.compile(r"\<[%s0-9\.\=\'\"\:\/\-\#\+\s\*]+\>" % letters).search):
         """
         SGML text is ignored and outputed as-is
         """
         """
         SGML text is ignored and outputed as-is
         """
@@ -775,3 +981,18 @@ class DocumentClass:
             start,end = r.span()
             text = s[start:end]
             return (StructuredTextSGML(text),start,end)
             start,end = r.span()
             text = s[start:end]
             return (StructuredTextSGML(text),start,end)
+
+
+    def doc_xref(self, s,
+        expr = re.compile('\[([%s0-9\-.:/;,\n\~]+)\]' % letters).search
+        ):
+        r = expr(s)
+        if r:
+            start, end = r.span(1)
+            return (StructuredTextXref(s[start:end]), start-1, end+1)
+        else:
+            return None
+
+
+
+