X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/ea5449ae514c2eba0dae9828cbed53b902b52d89..aca310e5cc45db153540efe2df577d8f985ccfc1:/wxPython/samples/ide/activegrid/util/parser.py diff --git a/wxPython/samples/ide/activegrid/util/parser.py b/wxPython/samples/ide/activegrid/util/parser.py new file mode 100644 index 0000000000..364099b990 --- /dev/null +++ b/wxPython/samples/ide/activegrid/util/parser.py @@ -0,0 +1,380 @@ +#---------------------------------------------------------------------------- +# Name: parser.py +# Purpose: parsing utilities +# +# Author: Jeff Norton +# +# Created: 8/9/05 +# CVS-ID: $Id$ +# Copyright: (c) 2004-2005 ActiveGrid, Inc. +# License: wxWindows License +#---------------------------------------------------------------------------- + +import re +from activegrid.util.lang import * +ifDefPy() +import string +import array +endIfDef() + +XPATH_ROOT_VAR = '__rootObj__' +GETOBJECTPARTNAMES = ["primaryRef", "ref", "orderings", "limit"] + +class Tokenizer(object): + + TOKEN_IDENT = 1 + TOKEN_STRING = 2 + TOKEN_OP = 3 + TOKEN_WS = 4 +## TOKEN_PLACEHOLDER = 5 + + def __init__(self, text, identStart=None, tokenSep=None, ignoreWhitespace=True): + """ +Turn a string into individual tokens. Three types of tokens are recognized: + TOKEN_IDENT: identifiers (those that start with the identStart pattern) + TOKEN_STRING: quoted string + TOKEN_OP: everything else +Tokens are separated by white space or the tokenSep pattern. +Constructor parameters: + text: The string to tokenize + identStart: A regular expression describing characters which start an identifier + The default expression accepts letters, "_", and "/". + tokenSep: A regular expression describing the characters which end a token + (in addition to whitespace). The default expression accepts + anything except alpha-numerics, "_", "/", and ":". +Usage: + Invoke getNextToken (or next) to get the next token. The instance variables + token, and tokenVal will be populated with the current token type (TOKEN_IDENT, + TOKEN_STRING, or TOEKN_OP) and value respectively. nextToken and nextTokenVal + will also be available for lookahead. The next method is similar to + getNextToken but also returns the token value. A value of None signals end + of stream. + """ + self.ignoreWhitespace=ignoreWhitespace + ifDefPy() + if (isinstance(text, array.array)): + text = text.tostring() + endIfDef() + self.text = asString(text) + self.textIndex = 0 + self.textLen = len(self.text) + self.token = None + self.tokenVal = None + self.nextToken = None + self.nextTokenVal = None + if (identStart == None): + identStart = "[a-zA-Z_/]" + if (tokenSep == None): + tokenSep = "[^a-zA-Z0-9_/:]" + self.identStart = re.compile(identStart) + self.tokenSep = re.compile(tokenSep) + self.getNextToken() # Prime the pump + + def isEscaped(text, index): + if ((index > 0) and (text[index-1] == '\\') and ((index < 2) or (text[index-2] != '\\'))): + return True + return False + isEscaped = staticmethod(isEscaped) + + def findClosingQuote(text, index, char): + index = index + 1 + while True: + endIndex = text.find(char, index) + if (endIndex < 1): + return -1 + if (Tokenizer.isEscaped(text, endIndex)): + index = endIndex+1 + else: + break + return endIndex + 1 + findClosingQuote = staticmethod(findClosingQuote) + + def _findClosing(self, char): + if (self.textIndex >= self.textLen): + raise Exception("The text \"%s\" has an unmatched string starting at %d" % (self.text, self.textIndex)) + index = Tokenizer.findClosingQuote(self.text, self.textIndex, char) + if (index < 0): + raise Exception("The text \"%s\" has an unmatched string starting at %d" % (self.text, self.textIndex-1)) + return index + + def next(self): + self.getNextToken() + if (self.token == None): + raise StopIteration() + return self.tokenVal + + def getNextToken(self): + self.token = self.nextToken + self.tokenVal = self.nextTokenVal + while (self.textIndex < self.textLen): + c = self.text[self.textIndex] + if (c not in string.whitespace): + if (c == '"' or c == "'" or c == '`'): + endIndex = self._findClosing(c) + self.nextToken = self.TOKEN_STRING + self.nextTokenVal = self.text[self.textIndex:endIndex] + self.textIndex = endIndex + return + elif (self.identStart.search(c)): + endMatch = self.tokenSep.search(self.text, self.textIndex+1) + if (endMatch): + endIndex = endMatch.start() + else: + endIndex = self.textLen + self.nextToken = self.TOKEN_IDENT + self.nextTokenVal = self.text[self.textIndex:endIndex] + self.textIndex = endIndex + return + else: + self.nextToken = self.TOKEN_OP + endIndex = self.textIndex + 1 + if (c == '<' or c == '>' or c == '!' or c == '='): + if ((endIndex < self.textLen) and (self.text[endIndex] == '=')): + endIndex += 1 + elif ((c == '%') and (endIndex < self.textLen)): + c = self.text[endIndex] + if (c in ['d', 'i', 'o', 'u', 'x', 'X', 'e', 'E', 'f', 'F', 'g', 'G', 'c', 'r', 's', '%']): + endIndex += 1 +## self.nextToken = self.TOKEN_PLACEHOLDER # Should really be this but no one can handle it yet + self.nextTokenVal = self.text[self.textIndex:endIndex] + self.textIndex = endIndex + return + elif not self.ignoreWhitespace: + self.nextToken=self.TOKEN_WS + self.nextTokenVal="" + while c in string.whitespace: + self.nextTokenVal+=c + self.textIndex+=1 + if self.textIndex==len(self.text): + break + c=self.text[self.textIndex] + return + self.textIndex += 1 + self.nextToken = None + self.nextTokenVal = None + +def isXPathNonVar(var): + """Returns true iff var is a string ("foo" or 'foo') or a number.""" + if (var.startswith("'") and var.endswith("'")) or \ + (var.startswith('"') and var.endswith('"')): + return True + + # list from XPathToCode, below + if var.lower() in ["count", "empty", "true", "false", "null", "and", "or", \ + "like", "not"]: + return True + + try: + t=int(var) + return True + except TypeError, e: + pass + except ValueError, e: + pass + + return False + +def xpathToCode(xpaths, convertBracket=True): + if ((xpaths == None) or (len(xpaths) < 1)): + return "True" + if (not isinstance(xpaths, (list, tuple))): + xpaths = [xpaths] + result = [] + for xpath in xpaths: + t = Tokenizer(xpath, "[a-zA-Z0-9_/:\.]", "[^a-zA-Z0-9_/:\.]", ignoreWhitespace=False) + expr = [] + lastToken=None + while t.nextToken != None: + t.getNextToken() + if (t.token == Tokenizer.TOKEN_WS): + expr.append(" ") + elif (t.token == Tokenizer.TOKEN_OP): + if (t.tokenVal == "="): + expr.append("==") + elif (t.tokenVal == "[" and convertBracket): + expr.append("(") + elif (t.tokenVal == "]" and convertBracket): + expr.append(")") + else: + expr.append(t.tokenVal) + elif (t.token == Tokenizer.TOKEN_IDENT): + if (t.tokenVal == "and"): + expr.append(" and ") + elif (t.tokenVal == "or"): + expr.append(" or ") + elif (t.tokenVal == "not"): + expr.append(" not ") + elif (t.tokenVal == "like"): + # REVIEW stoens@activegrid.com 02-Nov-05 -- + # This is very limited support for like: + # typically like queries look like this: "foo like 'blah%'". + # So translate this into "foo.startswith(blah)". + # We should use a regular expression to support '%'s in + # arbitrary places in the string. After 1.1. + if t.nextToken and t.nextTokenVal.endswith("%'"): + t.getNextToken() # throw away the "like" token + last = len(expr) - 1 + expr[last] = "%s.startswith(%s')"\ + % (expr[last], t.tokenVal[:-2]) + else: + # old behavior + expr.append(t.tokenVal) + + elif (t.tokenVal == "count"): + expr.append("len") + elif (t.tokenVal == 'empty'): + expr.append('ctx.isEmptyPath') + elif (t.tokenVal == 'true'): + expr.append(_parseConstantFunction(t, 'True')) + elif (t.tokenVal == 'false'): + expr.append(_parseConstantFunction(t, 'False')) + elif (t.tokenVal == 'null'): + expr.append(_parseConstantFunction(t, 'None')) + elif (-1!=t.tokenVal.find(':')): + serviceDef, args=_parseServiceFunction(t) + + # XXX handle serviceDef, args being None + + for i in range(len(args)): + args[i]=xpathToCode(args[i], False) + jargs="[%s]" % (",".join(args)) + + # XXX should be processmodel.DATASERVICE_PROCESS_NAME, not "dataservice" + if serviceDef[0]=='dataservice': + expr.append("runtimesupport.invokeDataServiceWrapper(%s, %s, ctx, locals())" % \ + (serviceDef, jargs)) + else: + expr.append("runtimesupport.invokeServiceWrapper(%s, %s, ctx)" % \ + (serviceDef, jargs)) + else: + if (lastToken==')' or lastToken==']'): + wasFunc=True + else: + wasFunc=False + if (t.tokenVal.startswith('/')) and not wasFunc: + expr.append(XPATH_ROOT_VAR) + expr.append(t.tokenVal.replace('/','.')) + lastToken=t.tokenVal + else: + expr.append(t.tokenVal) + + + if (len(expr) == 2 and expr[0]==" "): + expr = "".join(expr) + result.append(expr) + elif (len(expr) > 1): + expr = "".join(expr) + result.append("(%s)" % expr) + elif (len(expr) > 0): + result.append(expr[0]) + + return " and ".join(result) + +def _parseArgs(t): + args=[] + argcon="" + + if t.tokenVal!='(': + return [] + if t.nextTokenVal==')': + t.getNextToken() + return [] + + depth=1 + + while(depth!=0): + if not t.nextToken: + raise Exception("parameters list with no closing ) after token: %s" % t.tokenVal) + t.getNextToken() + + if t.tokenVal=='(': + depth+=1 + if t.tokenVal==')': + depth-=1 + + if depth==0 or (depth==1 and t.tokenVal==','): + args.append(argcon) + argcon="" + else: + argcon+=t.tokenVal + return args + +def _parseServiceFunction(t): + """Parses what appears to be a service function call into serviceDefs and args lists. + + Returns None, None if the serviceFunction appears to be invalid. + """ + if t.nextTokenVal!='(': + return t.tokenVal, None + + serviceDef=t.tokenVal.split(':') + t.getNextToken() + args=_parseArgs(t) + + return serviceDef, args + +def _parseConstantFunction(t, outputValue): + firstVal = t.tokenVal + if t.nextTokenVal != '(': + return firstVal + t.getNextToken() + if t.nextTokenVal != ')': + return "%s%s" % (firstVal, '(') + t.getNextToken() + return outputValue + +def parseDSPredicate(ctx, str, vars, valueList=None): + from activegrid.util.utillang import evalCode + from activegrid.util.utillang import ObjAsDict + + if valueList == None: + valueList = [] + indexVar=0 + oldIndexVar=0 + sourceStr=str + inlinedPredicate=[] + qualifications=[] + while True: + oldIndexVar = indexVar + dollarCurlForm = False + quoted = False + indexVar = sourceStr.find("bpws:getVariableData", indexVar) + if indexVar == -1: + indexVar = sourceStr.find("${", oldIndexVar) + if indexVar == -1: + break + dollarCurlForm = True + if indexVar > 0 and sourceStr[indexVar-1] in ('"',"'"): + quoted = True + if not dollarCurlForm: + openParen = sourceStr.find("(", indexVar) + if openParen == -1: + break + closeParen = sourceStr.find(")", openParen) + if closeParen == -1: + break + else: + openParen = indexVar+1 + closeParen = sourceStr.find("}", openParen) + if closeParen == -1: + break + varRef = sourceStr[openParen+1: closeParen] + if varRef.startswith('"') or varRef.startswith("'"): + varRef = varRef[1:] + if varRef.endswith('"') or varRef.endswith("'"): + varRef = varRef[:-1] + if isinstance(vars, dict) or isinstance(vars, ObjAsDict): + varRefCode = xpathToCode(varRef) + value = evalCode(varRefCode, vars) + else: + value = ctx.evalPath(vars, varRef) + inlinedPredicate.append(sourceStr[oldIndexVar:indexVar]) + if quoted: + inlinedPredicate.append("%s" % value) + else: + inlinedPredicate.append('%s') + valueList.append(value) + indexVar = closeParen+1 + inlinedPredicate.append(sourceStr[oldIndexVar:]) + qualifications.append(''.join(inlinedPredicate)) + return qualifications, valueList