--- /dev/null
+#----------------------------------------------------------------------------
+# Name: parser.py
+# Purpose: parsing utilities
+#
+# Author: Jeff Norton
+#
+# Created: 8/9/05
+# CVS-ID: $Id$
+# Copyright: (c) 2004-2005 ActiveGrid, Inc.
+# License: wxWindows License
+#----------------------------------------------------------------------------
+
+import re
+from activegrid.util.lang import *
+ifDefPy()
+import string
+import array
+endIfDef()
+
+XPATH_ROOT_VAR = '__rootObj__'
+GETOBJECTPARTNAMES = ["primaryRef", "ref", "orderings", "limit"]
+
+class Tokenizer(object):
+
+ TOKEN_IDENT = 1
+ TOKEN_STRING = 2
+ TOKEN_OP = 3
+ TOKEN_WS = 4
+## TOKEN_PLACEHOLDER = 5
+
+ def __init__(self, text, identStart=None, tokenSep=None, ignoreWhitespace=True):
+ """
+Turn a string into individual tokens. Three types of tokens are recognized:
+ TOKEN_IDENT: identifiers (those that start with the identStart pattern)
+ TOKEN_STRING: quoted string
+ TOKEN_OP: everything else
+Tokens are separated by white space or the tokenSep pattern.
+Constructor parameters:
+ text: The string to tokenize
+ identStart: A regular expression describing characters which start an identifier
+ The default expression accepts letters, "_", and "/".
+ tokenSep: A regular expression describing the characters which end a token
+ (in addition to whitespace). The default expression accepts
+ anything except alpha-numerics, "_", "/", and ":".
+Usage:
+ Invoke getNextToken (or next) to get the next token. The instance variables
+ token, and tokenVal will be populated with the current token type (TOKEN_IDENT,
+ TOKEN_STRING, or TOEKN_OP) and value respectively. nextToken and nextTokenVal
+ will also be available for lookahead. The next method is similar to
+ getNextToken but also returns the token value. A value of None signals end
+ of stream.
+ """
+ self.ignoreWhitespace=ignoreWhitespace
+ ifDefPy()
+ if (isinstance(text, array.array)):
+ text = text.tostring()
+ endIfDef()
+ self.text = asString(text)
+ self.textIndex = 0
+ self.textLen = len(self.text)
+ self.token = None
+ self.tokenVal = None
+ self.nextToken = None
+ self.nextTokenVal = None
+ if (identStart == None):
+ identStart = "[a-zA-Z_/]"
+ if (tokenSep == None):
+ tokenSep = "[^a-zA-Z0-9_/:]"
+ self.identStart = re.compile(identStart)
+ self.tokenSep = re.compile(tokenSep)
+ self.getNextToken() # Prime the pump
+
+ def isEscaped(text, index):
+ if ((index > 0) and (text[index-1] == '\\') and ((index < 2) or (text[index-2] != '\\'))):
+ return True
+ return False
+ isEscaped = staticmethod(isEscaped)
+
+ def findClosingQuote(text, index, char):
+ index = index + 1
+ while True:
+ endIndex = text.find(char, index)
+ if (endIndex < 1):
+ return -1
+ if (Tokenizer.isEscaped(text, endIndex)):
+ index = endIndex+1
+ else:
+ break
+ return endIndex + 1
+ findClosingQuote = staticmethod(findClosingQuote)
+
+ def _findClosing(self, char):
+ if (self.textIndex >= self.textLen):
+ raise Exception("The text \"%s\" has an unmatched string starting at %d" % (self.text, self.textIndex))
+ index = Tokenizer.findClosingQuote(self.text, self.textIndex, char)
+ if (index < 0):
+ raise Exception("The text \"%s\" has an unmatched string starting at %d" % (self.text, self.textIndex-1))
+ return index
+
+ def next(self):
+ self.getNextToken()
+ if (self.token == None):
+ raise StopIteration()
+ return self.tokenVal
+
+ def getNextToken(self):
+ self.token = self.nextToken
+ self.tokenVal = self.nextTokenVal
+ while (self.textIndex < self.textLen):
+ c = self.text[self.textIndex]
+ if (c not in string.whitespace):
+ if (c == '"' or c == "'" or c == '`'):
+ endIndex = self._findClosing(c)
+ self.nextToken = self.TOKEN_STRING
+ self.nextTokenVal = self.text[self.textIndex:endIndex]
+ self.textIndex = endIndex
+ return
+ elif (self.identStart.search(c)):
+ endMatch = self.tokenSep.search(self.text, self.textIndex+1)
+ if (endMatch):
+ endIndex = endMatch.start()
+ else:
+ endIndex = self.textLen
+ self.nextToken = self.TOKEN_IDENT
+ self.nextTokenVal = self.text[self.textIndex:endIndex]
+ self.textIndex = endIndex
+ return
+ else:
+ self.nextToken = self.TOKEN_OP
+ endIndex = self.textIndex + 1
+ if (c == '<' or c == '>' or c == '!' or c == '='):
+ if ((endIndex < self.textLen) and (self.text[endIndex] == '=')):
+ endIndex += 1
+ elif ((c == '%') and (endIndex < self.textLen)):
+ c = self.text[endIndex]
+ if (c in ['d', 'i', 'o', 'u', 'x', 'X', 'e', 'E', 'f', 'F', 'g', 'G', 'c', 'r', 's', '%']):
+ endIndex += 1
+## self.nextToken = self.TOKEN_PLACEHOLDER # Should really be this but no one can handle it yet
+ self.nextTokenVal = self.text[self.textIndex:endIndex]
+ self.textIndex = endIndex
+ return
+ elif not self.ignoreWhitespace:
+ self.nextToken=self.TOKEN_WS
+ self.nextTokenVal=""
+ while c in string.whitespace:
+ self.nextTokenVal+=c
+ self.textIndex+=1
+ if self.textIndex==len(self.text):
+ break
+ c=self.text[self.textIndex]
+ return
+ self.textIndex += 1
+ self.nextToken = None
+ self.nextTokenVal = None
+
+def isXPathNonVar(var):
+ """Returns true iff var is a string ("foo" or 'foo') or a number."""
+ if (var.startswith("'") and var.endswith("'")) or \
+ (var.startswith('"') and var.endswith('"')):
+ return True
+
+ # list from XPathToCode, below
+ if var.lower() in ["count", "empty", "true", "false", "null", "and", "or", \
+ "like", "not"]:
+ return True
+
+ try:
+ t=int(var)
+ return True
+ except TypeError, e:
+ pass
+ except ValueError, e:
+ pass
+
+ return False
+
+def xpathToCode(xpaths, convertBracket=True):
+ if ((xpaths == None) or (len(xpaths) < 1)):
+ return "True"
+ if (not isinstance(xpaths, (list, tuple))):
+ xpaths = [xpaths]
+ result = []
+ for xpath in xpaths:
+ t = Tokenizer(xpath, "[a-zA-Z0-9_/:\.]", "[^a-zA-Z0-9_/:\.]", ignoreWhitespace=False)
+ expr = []
+ lastToken=None
+ while t.nextToken != None:
+ t.getNextToken()
+ if (t.token == Tokenizer.TOKEN_WS):
+ expr.append(" ")
+ elif (t.token == Tokenizer.TOKEN_OP):
+ if (t.tokenVal == "="):
+ expr.append("==")
+ elif (t.tokenVal == "[" and convertBracket):
+ expr.append("(")
+ elif (t.tokenVal == "]" and convertBracket):
+ expr.append(")")
+ else:
+ expr.append(t.tokenVal)
+ elif (t.token == Tokenizer.TOKEN_IDENT):
+ if (t.tokenVal == "and"):
+ expr.append(" and ")
+ elif (t.tokenVal == "or"):
+ expr.append(" or ")
+ elif (t.tokenVal == "not"):
+ expr.append(" not ")
+ elif (t.tokenVal == "like"):
+ # REVIEW stoens@activegrid.com 02-Nov-05 --
+ # This is very limited support for like:
+ # typically like queries look like this: "foo like 'blah%'".
+ # So translate this into "foo.startswith(blah)".
+ # We should use a regular expression to support '%'s in
+ # arbitrary places in the string. After 1.1.
+ if t.nextToken and t.nextTokenVal.endswith("%'"):
+ t.getNextToken() # throw away the "like" token
+ last = len(expr) - 1
+ expr[last] = "%s.startswith(%s')"\
+ % (expr[last], t.tokenVal[:-2])
+ else:
+ # old behavior
+ expr.append(t.tokenVal)
+
+ elif (t.tokenVal == "count"):
+ expr.append("len")
+ elif (t.tokenVal == 'empty'):
+ expr.append('ctx.isEmptyPath')
+ elif (t.tokenVal == 'true'):
+ expr.append(_parseConstantFunction(t, 'True'))
+ elif (t.tokenVal == 'false'):
+ expr.append(_parseConstantFunction(t, 'False'))
+ elif (t.tokenVal == 'null'):
+ expr.append(_parseConstantFunction(t, 'None'))
+ elif (-1!=t.tokenVal.find(':')):
+ serviceDef, args=_parseServiceFunction(t)
+
+ # XXX handle serviceDef, args being None
+
+ for i in range(len(args)):
+ args[i]=xpathToCode(args[i], False)
+ jargs="[%s]" % (",".join(args))
+
+ # XXX should be processmodel.DATASERVICE_PROCESS_NAME, not "dataservice"
+ if serviceDef[0]=='dataservice':
+ expr.append("runtimesupport.invokeDataServiceWrapper(%s, %s, ctx, locals())" % \
+ (serviceDef, jargs))
+ else:
+ expr.append("runtimesupport.invokeServiceWrapper(%s, %s, ctx)" % \
+ (serviceDef, jargs))
+ else:
+ if (lastToken==')' or lastToken==']'):
+ wasFunc=True
+ else:
+ wasFunc=False
+ if (t.tokenVal.startswith('/')) and not wasFunc:
+ expr.append(XPATH_ROOT_VAR)
+ expr.append(t.tokenVal.replace('/','.'))
+ lastToken=t.tokenVal
+ else:
+ expr.append(t.tokenVal)
+
+
+ if (len(expr) == 2 and expr[0]==" "):
+ expr = "".join(expr)
+ result.append(expr)
+ elif (len(expr) > 1):
+ expr = "".join(expr)
+ result.append("(%s)" % expr)
+ elif (len(expr) > 0):
+ result.append(expr[0])
+
+ return " and ".join(result)
+
+def _parseArgs(t):
+ args=[]
+ argcon=""
+
+ if t.tokenVal!='(':
+ return []
+ if t.nextTokenVal==')':
+ t.getNextToken()
+ return []
+
+ depth=1
+
+ while(depth!=0):
+ if not t.nextToken:
+ raise Exception("parameters list with no closing ) after token: %s" % t.tokenVal)
+ t.getNextToken()
+
+ if t.tokenVal=='(':
+ depth+=1
+ if t.tokenVal==')':
+ depth-=1
+
+ if depth==0 or (depth==1 and t.tokenVal==','):
+ args.append(argcon)
+ argcon=""
+ else:
+ argcon+=t.tokenVal
+ return args
+
+def _parseServiceFunction(t):
+ """Parses what appears to be a service function call into serviceDefs and args lists.
+
+ Returns None, None if the serviceFunction appears to be invalid.
+ """
+ if t.nextTokenVal!='(':
+ return t.tokenVal, None
+
+ serviceDef=t.tokenVal.split(':')
+ t.getNextToken()
+ args=_parseArgs(t)
+
+ return serviceDef, args
+
+def _parseConstantFunction(t, outputValue):
+ firstVal = t.tokenVal
+ if t.nextTokenVal != '(':
+ return firstVal
+ t.getNextToken()
+ if t.nextTokenVal != ')':
+ return "%s%s" % (firstVal, '(')
+ t.getNextToken()
+ return outputValue
+
+def parseDSPredicate(ctx, str, vars, valueList=None):
+ from activegrid.util.utillang import evalCode
+ from activegrid.util.utillang import ObjAsDict
+
+ if valueList == None:
+ valueList = []
+ indexVar=0
+ oldIndexVar=0
+ sourceStr=str
+ inlinedPredicate=[]
+ qualifications=[]
+ while True:
+ oldIndexVar = indexVar
+ dollarCurlForm = False
+ quoted = False
+ indexVar = sourceStr.find("bpws:getVariableData", indexVar)
+ if indexVar == -1:
+ indexVar = sourceStr.find("${", oldIndexVar)
+ if indexVar == -1:
+ break
+ dollarCurlForm = True
+ if indexVar > 0 and sourceStr[indexVar-1] in ('"',"'"):
+ quoted = True
+ if not dollarCurlForm:
+ openParen = sourceStr.find("(", indexVar)
+ if openParen == -1:
+ break
+ closeParen = sourceStr.find(")", openParen)
+ if closeParen == -1:
+ break
+ else:
+ openParen = indexVar+1
+ closeParen = sourceStr.find("}", openParen)
+ if closeParen == -1:
+ break
+ varRef = sourceStr[openParen+1: closeParen]
+ if varRef.startswith('"') or varRef.startswith("'"):
+ varRef = varRef[1:]
+ if varRef.endswith('"') or varRef.endswith("'"):
+ varRef = varRef[:-1]
+ if isinstance(vars, dict) or isinstance(vars, ObjAsDict):
+ varRefCode = xpathToCode(varRef)
+ value = evalCode(varRefCode, vars)
+ else:
+ value = ctx.evalPath(vars, varRef)
+ inlinedPredicate.append(sourceStr[oldIndexVar:indexVar])
+ if quoted:
+ inlinedPredicate.append("%s" % value)
+ else:
+ inlinedPredicate.append('%s')
+ valueList.append(value)
+ indexVar = closeParen+1
+ inlinedPredicate.append(sourceStr[oldIndexVar:])
+ qualifications.append(''.join(inlinedPredicate))
+ return qualifications, valueList