DocView and ActiveGrid IDE updates from Morgan Hua:

[wxWidgets.git] / wxPython / samples / ide / activegrid / util / parser.py
diff --git a/wxPython/samples/ide/activegrid/util/parser.py b/wxPython/samples/ide/activegrid/util/parser.py

new file mode 100644 (file)

index 0000000..364099b
--- /dev/null
+++ b/wxPython/samples/ide/activegrid/util/parser.py
@@ -0,0 +1,380 @@
+#----------------------------------------------------------------------------
+# Name:         parser.py
+# Purpose:      parsing utilities
+#
+# Author:       Jeff Norton
+#
+# Created:      8/9/05
+# CVS-ID:       $Id$
+# Copyright:    (c) 2004-2005 ActiveGrid, Inc.
+# License:      wxWindows License
+#----------------------------------------------------------------------------
+
+import re
+from activegrid.util.lang import *
+ifDefPy()
+import string
+import array
+endIfDef()
+
+XPATH_ROOT_VAR = '__rootObj__'
+GETOBJECTPARTNAMES  =   ["primaryRef", "ref", "orderings", "limit"]
+
+class Tokenizer(object):
+    
+    TOKEN_IDENT = 1
+    TOKEN_STRING = 2
+    TOKEN_OP = 3
+    TOKEN_WS = 4
+##    TOKEN_PLACEHOLDER = 5
+    
+    def __init__(self, text, identStart=None, tokenSep=None, ignoreWhitespace=True):
+        """
+Turn a string into individual tokens.  Three types of tokens are recognized:
+    TOKEN_IDENT:   identifiers (those that start with the identStart pattern)
+    TOKEN_STRING:  quoted string
+    TOKEN_OP:      everything else
+Tokens are separated by white space or the tokenSep pattern.  
+Constructor parameters:
+    text:  The string to tokenize
+    identStart:  A regular expression describing characters which start an identifier
+                 The default expression accepts letters, "_", and "/".
+    tokenSep:    A regular expression describing the characters which end a token 
+                 (in addition to whitespace).  The default expression accepts
+                 anything except alpha-numerics, "_", "/", and ":".
+Usage:
+    Invoke getNextToken (or next) to get the next token.  The instance variables
+    token, and tokenVal will be populated with the current token type (TOKEN_IDENT,
+    TOKEN_STRING, or TOEKN_OP) and value respectively.  nextToken and nextTokenVal
+    will also be available for lookahead.   The next method is similar to
+    getNextToken but also returns the token value.  A value of None signals end
+    of stream.                                           
+        """
+        self.ignoreWhitespace=ignoreWhitespace
+        ifDefPy()
+        if (isinstance(text, array.array)):
+            text = text.tostring()
+        endIfDef()
+        self.text = asString(text)
+        self.textIndex = 0
+        self.textLen = len(self.text)
+        self.token = None
+        self.tokenVal = None
+        self.nextToken = None
+        self.nextTokenVal = None
+        if (identStart == None):
+            identStart = "[a-zA-Z_/]"
+        if (tokenSep == None):
+            tokenSep = "[^a-zA-Z0-9_/:]"
+        self.identStart = re.compile(identStart)
+        self.tokenSep = re.compile(tokenSep)
+        self.getNextToken() # Prime the pump
+        
+    def isEscaped(text, index):
+        if ((index > 0) and (text[index-1] == '\\') and ((index < 2) or (text[index-2] != '\\'))):
+            return True
+        return False
+    isEscaped = staticmethod(isEscaped)
+
+    def findClosingQuote(text, index, char):
+        index = index + 1
+        while True:
+            endIndex = text.find(char, index)
+            if (endIndex < 1):
+                return -1
+            if (Tokenizer.isEscaped(text, endIndex)):
+                index = endIndex+1
+            else:
+                break
+        return endIndex + 1
+    findClosingQuote = staticmethod(findClosingQuote)
+
+    def _findClosing(self, char):
+        if (self.textIndex >= self.textLen):
+            raise Exception("The text \"%s\" has an unmatched string starting at %d" % (self.text, self.textIndex))
+        index = Tokenizer.findClosingQuote(self.text, self.textIndex, char)
+        if (index < 0):
+            raise Exception("The text \"%s\" has an unmatched string starting at %d" % (self.text, self.textIndex-1))
+        return index
+        
+    def next(self):
+        self.getNextToken()
+        if (self.token == None):
+            raise StopIteration()
+        return self.tokenVal
+
+    def getNextToken(self):
+        self.token = self.nextToken
+        self.tokenVal = self.nextTokenVal
+        while (self.textIndex < self.textLen):
+            c = self.text[self.textIndex]
+            if (c not in string.whitespace):
+                if (c == '"' or c == "'" or c == '`'):
+                    endIndex = self._findClosing(c)
+                    self.nextToken = self.TOKEN_STRING
+                    self.nextTokenVal = self.text[self.textIndex:endIndex]
+                    self.textIndex = endIndex
+                    return
+                elif (self.identStart.search(c)):
+                    endMatch = self.tokenSep.search(self.text, self.textIndex+1)
+                    if (endMatch):
+                        endIndex = endMatch.start()
+                    else:
+                        endIndex = self.textLen
+                    self.nextToken = self.TOKEN_IDENT
+                    self.nextTokenVal = self.text[self.textIndex:endIndex]
+                    self.textIndex = endIndex
+                    return
+                else:
+                    self.nextToken = self.TOKEN_OP
+                    endIndex = self.textIndex + 1
+                    if (c == '<' or c == '>' or c == '!' or c == '='):
+                        if ((endIndex < self.textLen) and (self.text[endIndex] == '=')):
+                            endIndex += 1
+                    elif ((c == '%') and (endIndex < self.textLen)):
+                        c = self.text[endIndex]
+                        if (c in ['d', 'i', 'o', 'u', 'x', 'X', 'e', 'E', 'f', 'F', 'g', 'G', 'c', 'r', 's', '%']):
+                            endIndex += 1
+##                            self.nextToken = self.TOKEN_PLACEHOLDER # Should really be this but no one can handle it yet
+                    self.nextTokenVal = self.text[self.textIndex:endIndex]
+                    self.textIndex = endIndex
+                    return
+            elif not self.ignoreWhitespace:
+                self.nextToken=self.TOKEN_WS
+                self.nextTokenVal=""
+                while c in string.whitespace:
+                    self.nextTokenVal+=c
+                    self.textIndex+=1
+                    if self.textIndex==len(self.text):
+                        break
+                    c=self.text[self.textIndex]
+                return
+            self.textIndex += 1
+        self.nextToken = None
+        self.nextTokenVal = None
+
+def isXPathNonVar(var):
+    """Returns true iff var is a string ("foo" or 'foo') or a number."""
+    if (var.startswith("'") and var.endswith("'")) or \
+            (var.startswith('"') and var.endswith('"')):
+        return True
+
+    # list from XPathToCode, below
+    if var.lower() in ["count", "empty", "true", "false", "null", "and", "or", \
+            "like", "not"]:
+        return True
+
+    try:
+        t=int(var)
+        return True
+    except TypeError, e:
+        pass
+    except ValueError, e:
+        pass
+
+    return False
+
+def xpathToCode(xpaths, convertBracket=True):
+    if ((xpaths == None) or (len(xpaths) < 1)):
+        return "True"
+    if (not isinstance(xpaths, (list, tuple))):
+        xpaths = [xpaths]
+    result = []
+    for xpath in xpaths:
+        t = Tokenizer(xpath, "[a-zA-Z0-9_/:\.]", "[^a-zA-Z0-9_/:\.]", ignoreWhitespace=False)
+        expr = []
+        lastToken=None
+        while t.nextToken != None:
+            t.getNextToken()
+            if (t.token == Tokenizer.TOKEN_WS):
+                expr.append(" ")
+            elif (t.token == Tokenizer.TOKEN_OP):
+                if (t.tokenVal == "="):
+                    expr.append("==")
+                elif (t.tokenVal == "[" and convertBracket):
+                    expr.append("(")
+                elif (t.tokenVal == "]" and convertBracket):
+                    expr.append(")")
+                else:
+                    expr.append(t.tokenVal)
+            elif (t.token == Tokenizer.TOKEN_IDENT):
+                if (t.tokenVal == "and"):
+                    expr.append(" and ")
+                elif (t.tokenVal == "or"):
+                    expr.append(" or ")
+                elif (t.tokenVal == "not"):
+                    expr.append(" not ")
+                elif (t.tokenVal == "like"):
+                    # REVIEW stoens@activegrid.com 02-Nov-05 --
+                    # This is very limited support for like:
+                    # typically like queries look like this: "foo like 'blah%'".
+                    # So translate this into "foo.startswith(blah)".
+                    # We should use a regular expression to support '%'s in
+                    # arbitrary places in the string. After 1.1.
+                    if t.nextToken and t.nextTokenVal.endswith("%'"):
+                        t.getNextToken() # throw away the "like" token
+                        last = len(expr) - 1
+                        expr[last] = "%s.startswith(%s')"\
+                            % (expr[last], t.tokenVal[:-2])
+                    else:
+                        # old behavior
+                        expr.append(t.tokenVal)
+                    
+                elif (t.tokenVal == "count"):
+                    expr.append("len")
+                elif (t.tokenVal == 'empty'):
+                    expr.append('ctx.isEmptyPath')
+                elif (t.tokenVal == 'true'):
+                    expr.append(_parseConstantFunction(t, 'True'))
+                elif (t.tokenVal == 'false'):
+                    expr.append(_parseConstantFunction(t, 'False'))
+                elif (t.tokenVal == 'null'):
+                    expr.append(_parseConstantFunction(t, 'None'))
+                elif (-1!=t.tokenVal.find(':')):
+                    serviceDef, args=_parseServiceFunction(t)
+
+                    # XXX handle serviceDef, args being None
+
+                    for i in range(len(args)):
+                        args[i]=xpathToCode(args[i], False)
+                    jargs="[%s]" % (",".join(args))
+
+                    # XXX should be processmodel.DATASERVICE_PROCESS_NAME, not "dataservice"
+                    if serviceDef[0]=='dataservice':
+                        expr.append("runtimesupport.invokeDataServiceWrapper(%s, %s, ctx, locals())" % \
+                                (serviceDef, jargs))
+                    else:
+                        expr.append("runtimesupport.invokeServiceWrapper(%s, %s, ctx)" % \
+                                (serviceDef, jargs))
+                else:
+                    if (lastToken==')' or lastToken==']'):
+                        wasFunc=True
+                    else:
+                        wasFunc=False
+                    if (t.tokenVal.startswith('/')) and not wasFunc:
+                        expr.append(XPATH_ROOT_VAR)
+                    expr.append(t.tokenVal.replace('/','.'))
+                lastToken=t.tokenVal
+            else: 
+                expr.append(t.tokenVal)
+                
+
+        if (len(expr) == 2 and expr[0]==" "):
+            expr = "".join(expr)
+            result.append(expr)
+        elif (len(expr) > 1):
+            expr = "".join(expr)
+            result.append("(%s)" % expr)
+        elif (len(expr) > 0):
+            result.append(expr[0])
+        
+    return " and ".join(result)
+
+def _parseArgs(t):
+    args=[]
+    argcon=""
+
+    if t.tokenVal!='(':
+        return []
+    if t.nextTokenVal==')':
+        t.getNextToken()
+        return []
+
+    depth=1
+
+    while(depth!=0):
+        if not t.nextToken:
+            raise Exception("parameters list with no closing ) after token: %s" % t.tokenVal)
+        t.getNextToken()
+
+        if t.tokenVal=='(':
+            depth+=1
+        if t.tokenVal==')':
+            depth-=1
+
+        if depth==0 or (depth==1 and t.tokenVal==','):
+            args.append(argcon)
+            argcon=""
+        else:
+            argcon+=t.tokenVal
+    return args
+
+def _parseServiceFunction(t):
+    """Parses what appears to be a service function call into serviceDefs and args lists.
+
+    Returns None, None if the serviceFunction appears to be invalid.
+    """
+    if t.nextTokenVal!='(':
+        return t.tokenVal, None
+
+    serviceDef=t.tokenVal.split(':')
+    t.getNextToken()
+    args=_parseArgs(t)
+
+    return serviceDef, args
+    
+def _parseConstantFunction(t, outputValue):
+    firstVal = t.tokenVal
+    if t.nextTokenVal != '(':
+        return firstVal
+    t.getNextToken()
+    if t.nextTokenVal != ')':
+        return "%s%s" % (firstVal, '(')
+    t.getNextToken()
+    return outputValue
+
+def parseDSPredicate(ctx, str, vars, valueList=None):
+    from activegrid.util.utillang import evalCode
+    from activegrid.util.utillang import ObjAsDict
+
+    if valueList == None:
+        valueList = []
+    indexVar=0
+    oldIndexVar=0
+    sourceStr=str
+    inlinedPredicate=[]
+    qualifications=[]
+    while True:
+        oldIndexVar = indexVar
+        dollarCurlForm = False
+        quoted = False
+        indexVar = sourceStr.find("bpws:getVariableData", indexVar)
+        if indexVar == -1:
+            indexVar = sourceStr.find("${", oldIndexVar)
+            if indexVar == -1:
+                break
+            dollarCurlForm = True
+        if indexVar > 0 and sourceStr[indexVar-1] in ('"',"'"):
+            quoted = True
+        if not dollarCurlForm:
+            openParen = sourceStr.find("(", indexVar)
+            if openParen == -1:
+                break
+            closeParen = sourceStr.find(")", openParen)
+            if closeParen == -1:
+                break
+        else:
+            openParen = indexVar+1
+            closeParen = sourceStr.find("}", openParen)
+            if closeParen == -1:
+                break
+        varRef = sourceStr[openParen+1: closeParen]
+        if varRef.startswith('"') or varRef.startswith("'"):
+            varRef = varRef[1:]
+        if varRef.endswith('"') or varRef.endswith("'"):
+            varRef = varRef[:-1]
+        if isinstance(vars, dict) or isinstance(vars, ObjAsDict):
+            varRefCode = xpathToCode(varRef)
+            value = evalCode(varRefCode, vars)
+        else:
+            value = ctx.evalPath(vars, varRef)
+        inlinedPredicate.append(sourceStr[oldIndexVar:indexVar])
+        if quoted:
+            inlinedPredicate.append("%s" % value)
+        else:
+            inlinedPredicate.append('%s')
+            valueList.append(value)
+        indexVar = closeParen+1
+    inlinedPredicate.append(sourceStr[oldIndexVar:])
+    qualifications.append(''.join(inlinedPredicate))
+    return qualifications, valueList