wxPython/samples/ide/activegrid/util/parser.py

   1 #----------------------------------------------------------------------------
   2 # Name:         parser.py
   3 # Purpose:      parsing utilities
   4 #
   5 # Author:       Jeff Norton
   6 #
   7 # Created:      8/9/05
   8 # CVS-ID:       $Id$
   9 # Copyright:    (c) 2004-2005 ActiveGrid, Inc.
  10 # License:      wxWindows License
  11 #----------------------------------------------------------------------------
  12
  13 import re
  14 from activegrid.util.lang import *
  15 ifDefPy()
  16 import string
  17 import array
  18 endIfDef()
  19
  20 XPATH_ROOT_VAR = '__rootObj__'
  21 GETOBJECTPARTNAMES  =   ["primaryRef", "ref", "orderings", "limit"]
  22
  23 class Tokenizer(object):
  24
  25     TOKEN_IDENT = 1
  26     TOKEN_STRING = 2
  27     TOKEN_OP = 3
  28     TOKEN_WS = 4
  29 ##    TOKEN_PLACEHOLDER = 5
  30
  31     def __init__(self, text, identStart=None, tokenSep=None, ignoreWhitespace=True):
  32         """
  33 Turn a string into individual tokens.  Three types of tokens are recognized:
  34     TOKEN_IDENT:   identifiers (those that start with the identStart pattern)
  35     TOKEN_STRING:  quoted string
  36     TOKEN_OP:      everything else
  37 Tokens are separated by white space or the tokenSep pattern.
  38 Constructor parameters:
  39     text:  The string to tokenize
  40     identStart:  A regular expression describing characters which start an identifier
  41                  The default expression accepts letters, "_", and "/".
  42     tokenSep:    A regular expression describing the characters which end a token
  43                  (in addition to whitespace).  The default expression accepts
  44                  anything except alpha-numerics, "_", "/", and ":".
  45 Usage:
  46     Invoke getNextToken (or next) to get the next token.  The instance variables
  47     token, and tokenVal will be populated with the current token type (TOKEN_IDENT,
  48     TOKEN_STRING, or TOEKN_OP) and value respectively.  nextToken and nextTokenVal
  49     will also be available for lookahead.   The next method is similar to
  50     getNextToken but also returns the token value.  A value of None signals end
  51     of stream.
  52         """
  53         self.ignoreWhitespace=ignoreWhitespace
  54         ifDefPy()
  55         if (isinstance(text, array.array)):
  56             text = text.tostring()
  57         endIfDef()
  58         self.text = asString(text)
  59         self.textIndex = 0
  60         self.textLen = len(self.text)
  61         self.token = None
  62         self.tokenVal = None
  63         self.nextToken = None
  64         self.nextTokenVal = None
  65         if (identStart == None):
  66             identStart = "[a-zA-Z_/]"
  67         if (tokenSep == None):
  68             tokenSep = "[^a-zA-Z0-9_/:]"
  69         self.identStart = re.compile(identStart)
  70         self.tokenSep = re.compile(tokenSep)
  71         self.getNextToken() # Prime the pump
  72
  73     def isEscaped(text, index):
  74         if ((index > 0) and (text[index-1] == '\\') and ((index < 2) or (text[index-2] != '\\'))):
  75             return True
  76         return False
  77     isEscaped = staticmethod(isEscaped)
  78
  79     def findClosingQuote(text, index, char):
  80         index = index + 1
  81         while True:
  82             endIndex = text.find(char, index)
  83             if (endIndex < 1):
  84                 return -1
  85             if (Tokenizer.isEscaped(text, endIndex)):
  86                 index = endIndex+1
  87             else:
  88                 break
  89         return endIndex + 1
  90     findClosingQuote = staticmethod(findClosingQuote)
  91
  92     def _findClosing(self, char):
  93         if (self.textIndex >= self.textLen):
  94             raise Exception("The text \"%s\" has an unmatched string starting at %d" % (self.text, self.textIndex))
  95         index = Tokenizer.findClosingQuote(self.text, self.textIndex, char)
  96         if (index < 0):
  97             raise Exception("The text \"%s\" has an unmatched string starting at %d" % (self.text, self.textIndex-1))
  98         return index
  99
 100     def next(self):
 101         self.getNextToken()
 102         if (self.token == None):
 103             raise StopIteration()
 104         return self.tokenVal
 105
 106     def getNextToken(self):
 107         self.token = self.nextToken
 108         self.tokenVal = self.nextTokenVal
 109         while (self.textIndex < self.textLen):
 110             c = self.text[self.textIndex]
 111             if (c not in string.whitespace):
 112                 if (c == '"' or c == "'" or c == '`'):
 113                     endIndex = self._findClosing(c)
 114                     self.nextToken = self.TOKEN_STRING
 115                     self.nextTokenVal = self.text[self.textIndex:endIndex]
 116                     self.textIndex = endIndex
 117                     return
 118                 elif (self.identStart.search(c)):
 119                     endMatch = self.tokenSep.search(self.text, self.textIndex+1)
 120                     if (endMatch):
 121                         endIndex = endMatch.start()
 122                     else:
 123                         endIndex = self.textLen
 124                     self.nextToken = self.TOKEN_IDENT
 125                     self.nextTokenVal = self.text[self.textIndex:endIndex]
 126                     self.textIndex = endIndex
 127                     return
 128                 else:
 129                     self.nextToken = self.TOKEN_OP
 130                     endIndex = self.textIndex + 1
 131                     if (c == '<' or c == '>' or c == '!' or c == '='):
 132                         if ((endIndex < self.textLen) and (self.text[endIndex] == '=')):
 133                             endIndex += 1
 134                     elif ((c == '%') and (endIndex < self.textLen)):
 135                         c = self.text[endIndex]
 136                         if (c in ['d', 'i', 'o', 'u', 'x', 'X', 'e', 'E', 'f', 'F', 'g', 'G', 'c', 'r', 's', '%']):
 137                             endIndex += 1
 138 ##                            self.nextToken = self.TOKEN_PLACEHOLDER # Should really be this but no one can handle it yet
 139                     self.nextTokenVal = self.text[self.textIndex:endIndex]
 140                     self.textIndex = endIndex
 141                     return
 142             elif not self.ignoreWhitespace:
 143                 self.nextToken=self.TOKEN_WS
 144                 self.nextTokenVal=""
 145                 while c in string.whitespace:
 146                     self.nextTokenVal+=c
 147                     self.textIndex+=1
 148                     if self.textIndex==len(self.text):
 149                         break
 150                     c=self.text[self.textIndex]
 151                 return
 152             self.textIndex += 1
 153         self.nextToken = None
 154         self.nextTokenVal = None
 155
 156 def isXPathNonVar(var):
 157     """Returns true iff var is a string ("foo" or 'foo') or a number."""
 158     if (var.startswith("'") and var.endswith("'")) or \
 159             (var.startswith('"') and var.endswith('"')):
 160         return True
 161
 162     # list from XPathToCode, below
 163     if var.lower() in ["count", "empty", "true", "false", "null", "and", "or", \
 164             "like", "not"]:
 165         return True
 166
 167     try:
 168         t=int(var)
 169         return True
 170     except TypeError, e:
 171         pass
 172     except ValueError, e:
 173         pass
 174
 175     return False
 176
 177 def xpathToCode(xpaths, convertBracket=True):
 178     if ((xpaths == None) or (len(xpaths) < 1)):
 179         return "True"
 180     if (not isinstance(xpaths, (list, tuple))):
 181         xpaths = [xpaths]
 182     result = []
 183     for xpath in xpaths:
 184         t = Tokenizer(xpath, "[a-zA-Z0-9_/:\.]", "[^a-zA-Z0-9_/:\.]", ignoreWhitespace=False)
 185         expr = []
 186         lastToken=None
 187         while t.nextToken != None:
 188             t.getNextToken()
 189             if (t.token == Tokenizer.TOKEN_WS):
 190                 expr.append(" ")
 191             elif (t.token == Tokenizer.TOKEN_OP):
 192                 if (t.tokenVal == "="):
 193                     expr.append("==")
 194                 elif (t.tokenVal == "[" and convertBracket):
 195                     expr.append("(")
 196                 elif (t.tokenVal == "]" and convertBracket):
 197                     expr.append(")")
 198                 else:
 199                     expr.append(t.tokenVal)
 200             elif (t.token == Tokenizer.TOKEN_IDENT):
 201                 if (t.tokenVal == "and"):
 202                     expr.append(" and ")
 203                 elif (t.tokenVal == "or"):
 204                     expr.append(" or ")
 205                 elif (t.tokenVal == "not"):
 206                     expr.append(" not ")
 207                 elif (t.tokenVal == "like"):
 208                     # REVIEW stoens@activegrid.com 02-Nov-05 --
 209                     # This is very limited support for like:
 210                     # typically like queries look like this: "foo like 'blah%'".
 211                     # So translate this into "foo.startswith(blah)".
 212                     # We should use a regular expression to support '%'s in
 213                     # arbitrary places in the string. After 1.1.
 214                     if t.nextToken and t.nextTokenVal.endswith("%'"):
 215                         t.getNextToken() # throw away the "like" token
 216                         last = len(expr) - 1
 217                         expr[last] = "%s.startswith(%s')"\
 218                             % (expr[last], t.tokenVal[:-2])
 219                     else:
 220                         # old behavior
 221                         expr.append(t.tokenVal)
 222
 223                 elif (t.tokenVal == "count"):
 224                     expr.append("len")
 225                 elif (t.tokenVal == 'empty'):
 226                     expr.append('ctx.isEmptyPath')
 227                 elif (t.tokenVal == 'true'):
 228                     expr.append(_parseConstantFunction(t, 'True'))
 229                 elif (t.tokenVal == 'false'):
 230                     expr.append(_parseConstantFunction(t, 'False'))
 231                 elif (t.tokenVal == 'null'):
 232                     expr.append(_parseConstantFunction(t, 'None'))
 233                 elif (-1!=t.tokenVal.find(':')):
 234                     serviceDef, args=_parseServiceFunction(t)
 235
 236                     # XXX handle serviceDef, args being None
 237
 238                     for i in range(len(args)):
 239                         args[i]=xpathToCode(args[i], False)
 240                     jargs="[%s]" % (",".join(args))
 241
 242                     # XXX should be processmodel.DATASERVICE_PROCESS_NAME, not "dataservice"
 243                     if serviceDef[0]=='dataservice':
 244                         expr.append("runtimesupport.invokeDataServiceWrapper(%s, %s, ctx, locals())" % \
 245                                 (serviceDef, jargs))
 246                     else:
 247                         expr.append("runtimesupport.invokeServiceWrapper(%s, %s, ctx)" % \
 248                                 (serviceDef, jargs))
 249                 else:
 250                     if (lastToken==')' or lastToken==']'):
 251                         wasFunc=True
 252                     else:
 253                         wasFunc=False
 254                     if (t.tokenVal.startswith('/')) and not wasFunc:
 255                         expr.append(XPATH_ROOT_VAR)
 256                     expr.append(t.tokenVal.replace('/','.'))
 257                 lastToken=t.tokenVal
 258             else:
 259                 expr.append(t.tokenVal)
 260
 261
 262         if (len(expr) == 2 and expr[0]==" "):
 263             expr = "".join(expr)
 264             result.append(expr)
 265         elif (len(expr) > 1):
 266             expr = "".join(expr)
 267             result.append("(%s)" % expr)
 268         elif (len(expr) > 0):
 269             result.append(expr[0])
 270
 271     return " and ".join(result)
 272
 273 def _parseArgs(t):
 274     args=[]
 275     argcon=""
 276
 277     if t.tokenVal!='(':
 278         return []
 279     if t.nextTokenVal==')':
 280         t.getNextToken()
 281         return []
 282
 283     depth=1
 284
 285     while(depth!=0):
 286         if not t.nextToken:
 287             raise Exception("parameters list with no closing ) after token: %s" % t.tokenVal)
 288         t.getNextToken()
 289
 290         if t.tokenVal=='(':
 291             depth+=1
 292         if t.tokenVal==')':
 293             depth-=1
 294
 295         if depth==0 or (depth==1 and t.tokenVal==','):
 296             args.append(argcon)
 297             argcon=""
 298         else:
 299             argcon+=t.tokenVal
 300     return args
 301
 302 def _parseServiceFunction(t):
 303     """Parses what appears to be a service function call into serviceDefs and args lists.
 304
 305     Returns None, None if the serviceFunction appears to be invalid.
 306     """
 307     if t.nextTokenVal!='(':
 308         return t.tokenVal, None
 309
 310     serviceDef=t.tokenVal.split(':')
 311     t.getNextToken()
 312     args=_parseArgs(t)
 313
 314     return serviceDef, args
 315
 316 def _parseConstantFunction(t, outputValue):
 317     firstVal = t.tokenVal
 318     if t.nextTokenVal != '(':
 319         return firstVal
 320     t.getNextToken()
 321     if t.nextTokenVal != ')':
 322         return "%s%s" % (firstVal, '(')
 323     t.getNextToken()
 324     return outputValue
 325
 326 def parseDSPredicate(ctx, str, vars, valueList=None):
 327     from activegrid.util.utillang import evalCode
 328     from activegrid.util.utillang import ObjAsDict
 329
 330     if valueList == None:
 331         valueList = []
 332     indexVar=0
 333     oldIndexVar=0
 334     sourceStr=str
 335     inlinedPredicate=[]
 336     qualifications=[]
 337     while True:
 338         oldIndexVar = indexVar
 339         dollarCurlForm = False
 340         quoted = False
 341         indexVar = sourceStr.find("bpws:getVariableData", indexVar)
 342         if indexVar == -1:
 343             indexVar = sourceStr.find("${", oldIndexVar)
 344             if indexVar == -1:
 345                 break
 346             dollarCurlForm = True
 347         if indexVar > 0 and sourceStr[indexVar-1] in ('"',"'"):
 348             quoted = True
 349         if not dollarCurlForm:
 350             openParen = sourceStr.find("(", indexVar)
 351             if openParen == -1:
 352                 break
 353             closeParen = sourceStr.find(")", openParen)
 354             if closeParen == -1:
 355                 break
 356         else:
 357             openParen = indexVar+1
 358             closeParen = sourceStr.find("}", openParen)
 359             if closeParen == -1:
 360                 break
 361         varRef = sourceStr[openParen+1: closeParen]
 362         if varRef.startswith('"') or varRef.startswith("'"):
 363             varRef = varRef[1:]
 364         if varRef.endswith('"') or varRef.endswith("'"):
 365             varRef = varRef[:-1]
 366         if isinstance(vars, dict) or isinstance(vars, ObjAsDict):
 367             varRefCode = xpathToCode(varRef)
 368             value = evalCode(varRefCode, vars)
 369         else:
 370             value = ctx.evalPath(vars, varRef)
 371         inlinedPredicate.append(sourceStr[oldIndexVar:indexVar])
 372         if quoted:
 373             inlinedPredicate.append("%s" % value)
 374         else:
 375             inlinedPredicate.append('%s')
 376             valueList.append(value)
 377         indexVar = closeParen+1
 378     inlinedPredicate.append(sourceStr[oldIndexVar:])
 379     qualifications.append(''.join(inlinedPredicate))
 380     return qualifications, valueList