]> git.saurik.com Git - wxWidgets.git/blob - wxPython/samples/ide/activegrid/util/parser.py
DocView and ActiveGrid IDE updates from Morgan Hua:
[wxWidgets.git] / wxPython / samples / ide / activegrid / util / parser.py
1 #----------------------------------------------------------------------------
2 # Name: parser.py
3 # Purpose: parsing utilities
4 #
5 # Author: Jeff Norton
6 #
7 # Created: 8/9/05
8 # CVS-ID: $Id$
9 # Copyright: (c) 2004-2005 ActiveGrid, Inc.
10 # License: wxWindows License
11 #----------------------------------------------------------------------------
12
13 import re
14 from activegrid.util.lang import *
15 ifDefPy()
16 import string
17 import array
18 endIfDef()
19
20 XPATH_ROOT_VAR = '__rootObj__'
21 GETOBJECTPARTNAMES = ["primaryRef", "ref", "orderings", "limit"]
22
23 class Tokenizer(object):
24
25 TOKEN_IDENT = 1
26 TOKEN_STRING = 2
27 TOKEN_OP = 3
28 TOKEN_WS = 4
29 ## TOKEN_PLACEHOLDER = 5
30
31 def __init__(self, text, identStart=None, tokenSep=None, ignoreWhitespace=True):
32 """
33 Turn a string into individual tokens. Three types of tokens are recognized:
34 TOKEN_IDENT: identifiers (those that start with the identStart pattern)
35 TOKEN_STRING: quoted string
36 TOKEN_OP: everything else
37 Tokens are separated by white space or the tokenSep pattern.
38 Constructor parameters:
39 text: The string to tokenize
40 identStart: A regular expression describing characters which start an identifier
41 The default expression accepts letters, "_", and "/".
42 tokenSep: A regular expression describing the characters which end a token
43 (in addition to whitespace). The default expression accepts
44 anything except alpha-numerics, "_", "/", and ":".
45 Usage:
46 Invoke getNextToken (or next) to get the next token. The instance variables
47 token, and tokenVal will be populated with the current token type (TOKEN_IDENT,
48 TOKEN_STRING, or TOEKN_OP) and value respectively. nextToken and nextTokenVal
49 will also be available for lookahead. The next method is similar to
50 getNextToken but also returns the token value. A value of None signals end
51 of stream.
52 """
53 self.ignoreWhitespace=ignoreWhitespace
54 ifDefPy()
55 if (isinstance(text, array.array)):
56 text = text.tostring()
57 endIfDef()
58 self.text = asString(text)
59 self.textIndex = 0
60 self.textLen = len(self.text)
61 self.token = None
62 self.tokenVal = None
63 self.nextToken = None
64 self.nextTokenVal = None
65 if (identStart == None):
66 identStart = "[a-zA-Z_/]"
67 if (tokenSep == None):
68 tokenSep = "[^a-zA-Z0-9_/:]"
69 self.identStart = re.compile(identStart)
70 self.tokenSep = re.compile(tokenSep)
71 self.getNextToken() # Prime the pump
72
73 def isEscaped(text, index):
74 if ((index > 0) and (text[index-1] == '\\') and ((index < 2) or (text[index-2] != '\\'))):
75 return True
76 return False
77 isEscaped = staticmethod(isEscaped)
78
79 def findClosingQuote(text, index, char):
80 index = index + 1
81 while True:
82 endIndex = text.find(char, index)
83 if (endIndex < 1):
84 return -1
85 if (Tokenizer.isEscaped(text, endIndex)):
86 index = endIndex+1
87 else:
88 break
89 return endIndex + 1
90 findClosingQuote = staticmethod(findClosingQuote)
91
92 def _findClosing(self, char):
93 if (self.textIndex >= self.textLen):
94 raise Exception("The text \"%s\" has an unmatched string starting at %d" % (self.text, self.textIndex))
95 index = Tokenizer.findClosingQuote(self.text, self.textIndex, char)
96 if (index < 0):
97 raise Exception("The text \"%s\" has an unmatched string starting at %d" % (self.text, self.textIndex-1))
98 return index
99
100 def next(self):
101 self.getNextToken()
102 if (self.token == None):
103 raise StopIteration()
104 return self.tokenVal
105
106 def getNextToken(self):
107 self.token = self.nextToken
108 self.tokenVal = self.nextTokenVal
109 while (self.textIndex < self.textLen):
110 c = self.text[self.textIndex]
111 if (c not in string.whitespace):
112 if (c == '"' or c == "'" or c == '`'):
113 endIndex = self._findClosing(c)
114 self.nextToken = self.TOKEN_STRING
115 self.nextTokenVal = self.text[self.textIndex:endIndex]
116 self.textIndex = endIndex
117 return
118 elif (self.identStart.search(c)):
119 endMatch = self.tokenSep.search(self.text, self.textIndex+1)
120 if (endMatch):
121 endIndex = endMatch.start()
122 else:
123 endIndex = self.textLen
124 self.nextToken = self.TOKEN_IDENT
125 self.nextTokenVal = self.text[self.textIndex:endIndex]
126 self.textIndex = endIndex
127 return
128 else:
129 self.nextToken = self.TOKEN_OP
130 endIndex = self.textIndex + 1
131 if (c == '<' or c == '>' or c == '!' or c == '='):
132 if ((endIndex < self.textLen) and (self.text[endIndex] == '=')):
133 endIndex += 1
134 elif ((c == '%') and (endIndex < self.textLen)):
135 c = self.text[endIndex]
136 if (c in ['d', 'i', 'o', 'u', 'x', 'X', 'e', 'E', 'f', 'F', 'g', 'G', 'c', 'r', 's', '%']):
137 endIndex += 1
138 ## self.nextToken = self.TOKEN_PLACEHOLDER # Should really be this but no one can handle it yet
139 self.nextTokenVal = self.text[self.textIndex:endIndex]
140 self.textIndex = endIndex
141 return
142 elif not self.ignoreWhitespace:
143 self.nextToken=self.TOKEN_WS
144 self.nextTokenVal=""
145 while c in string.whitespace:
146 self.nextTokenVal+=c
147 self.textIndex+=1
148 if self.textIndex==len(self.text):
149 break
150 c=self.text[self.textIndex]
151 return
152 self.textIndex += 1
153 self.nextToken = None
154 self.nextTokenVal = None
155
156 def isXPathNonVar(var):
157 """Returns true iff var is a string ("foo" or 'foo') or a number."""
158 if (var.startswith("'") and var.endswith("'")) or \
159 (var.startswith('"') and var.endswith('"')):
160 return True
161
162 # list from XPathToCode, below
163 if var.lower() in ["count", "empty", "true", "false", "null", "and", "or", \
164 "like", "not"]:
165 return True
166
167 try:
168 t=int(var)
169 return True
170 except TypeError, e:
171 pass
172 except ValueError, e:
173 pass
174
175 return False
176
177 def xpathToCode(xpaths, convertBracket=True):
178 if ((xpaths == None) or (len(xpaths) < 1)):
179 return "True"
180 if (not isinstance(xpaths, (list, tuple))):
181 xpaths = [xpaths]
182 result = []
183 for xpath in xpaths:
184 t = Tokenizer(xpath, "[a-zA-Z0-9_/:\.]", "[^a-zA-Z0-9_/:\.]", ignoreWhitespace=False)
185 expr = []
186 lastToken=None
187 while t.nextToken != None:
188 t.getNextToken()
189 if (t.token == Tokenizer.TOKEN_WS):
190 expr.append(" ")
191 elif (t.token == Tokenizer.TOKEN_OP):
192 if (t.tokenVal == "="):
193 expr.append("==")
194 elif (t.tokenVal == "[" and convertBracket):
195 expr.append("(")
196 elif (t.tokenVal == "]" and convertBracket):
197 expr.append(")")
198 else:
199 expr.append(t.tokenVal)
200 elif (t.token == Tokenizer.TOKEN_IDENT):
201 if (t.tokenVal == "and"):
202 expr.append(" and ")
203 elif (t.tokenVal == "or"):
204 expr.append(" or ")
205 elif (t.tokenVal == "not"):
206 expr.append(" not ")
207 elif (t.tokenVal == "like"):
208 # REVIEW stoens@activegrid.com 02-Nov-05 --
209 # This is very limited support for like:
210 # typically like queries look like this: "foo like 'blah%'".
211 # So translate this into "foo.startswith(blah)".
212 # We should use a regular expression to support '%'s in
213 # arbitrary places in the string. After 1.1.
214 if t.nextToken and t.nextTokenVal.endswith("%'"):
215 t.getNextToken() # throw away the "like" token
216 last = len(expr) - 1
217 expr[last] = "%s.startswith(%s')"\
218 % (expr[last], t.tokenVal[:-2])
219 else:
220 # old behavior
221 expr.append(t.tokenVal)
222
223 elif (t.tokenVal == "count"):
224 expr.append("len")
225 elif (t.tokenVal == 'empty'):
226 expr.append('ctx.isEmptyPath')
227 elif (t.tokenVal == 'true'):
228 expr.append(_parseConstantFunction(t, 'True'))
229 elif (t.tokenVal == 'false'):
230 expr.append(_parseConstantFunction(t, 'False'))
231 elif (t.tokenVal == 'null'):
232 expr.append(_parseConstantFunction(t, 'None'))
233 elif (-1!=t.tokenVal.find(':')):
234 serviceDef, args=_parseServiceFunction(t)
235
236 # XXX handle serviceDef, args being None
237
238 for i in range(len(args)):
239 args[i]=xpathToCode(args[i], False)
240 jargs="[%s]" % (",".join(args))
241
242 # XXX should be processmodel.DATASERVICE_PROCESS_NAME, not "dataservice"
243 if serviceDef[0]=='dataservice':
244 expr.append("runtimesupport.invokeDataServiceWrapper(%s, %s, ctx, locals())" % \
245 (serviceDef, jargs))
246 else:
247 expr.append("runtimesupport.invokeServiceWrapper(%s, %s, ctx)" % \
248 (serviceDef, jargs))
249 else:
250 if (lastToken==')' or lastToken==']'):
251 wasFunc=True
252 else:
253 wasFunc=False
254 if (t.tokenVal.startswith('/')) and not wasFunc:
255 expr.append(XPATH_ROOT_VAR)
256 expr.append(t.tokenVal.replace('/','.'))
257 lastToken=t.tokenVal
258 else:
259 expr.append(t.tokenVal)
260
261
262 if (len(expr) == 2 and expr[0]==" "):
263 expr = "".join(expr)
264 result.append(expr)
265 elif (len(expr) > 1):
266 expr = "".join(expr)
267 result.append("(%s)" % expr)
268 elif (len(expr) > 0):
269 result.append(expr[0])
270
271 return " and ".join(result)
272
273 def _parseArgs(t):
274 args=[]
275 argcon=""
276
277 if t.tokenVal!='(':
278 return []
279 if t.nextTokenVal==')':
280 t.getNextToken()
281 return []
282
283 depth=1
284
285 while(depth!=0):
286 if not t.nextToken:
287 raise Exception("parameters list with no closing ) after token: %s" % t.tokenVal)
288 t.getNextToken()
289
290 if t.tokenVal=='(':
291 depth+=1
292 if t.tokenVal==')':
293 depth-=1
294
295 if depth==0 or (depth==1 and t.tokenVal==','):
296 args.append(argcon)
297 argcon=""
298 else:
299 argcon+=t.tokenVal
300 return args
301
302 def _parseServiceFunction(t):
303 """Parses what appears to be a service function call into serviceDefs and args lists.
304
305 Returns None, None if the serviceFunction appears to be invalid.
306 """
307 if t.nextTokenVal!='(':
308 return t.tokenVal, None
309
310 serviceDef=t.tokenVal.split(':')
311 t.getNextToken()
312 args=_parseArgs(t)
313
314 return serviceDef, args
315
316 def _parseConstantFunction(t, outputValue):
317 firstVal = t.tokenVal
318 if t.nextTokenVal != '(':
319 return firstVal
320 t.getNextToken()
321 if t.nextTokenVal != ')':
322 return "%s%s" % (firstVal, '(')
323 t.getNextToken()
324 return outputValue
325
326 def parseDSPredicate(ctx, str, vars, valueList=None):
327 from activegrid.util.utillang import evalCode
328 from activegrid.util.utillang import ObjAsDict
329
330 if valueList == None:
331 valueList = []
332 indexVar=0
333 oldIndexVar=0
334 sourceStr=str
335 inlinedPredicate=[]
336 qualifications=[]
337 while True:
338 oldIndexVar = indexVar
339 dollarCurlForm = False
340 quoted = False
341 indexVar = sourceStr.find("bpws:getVariableData", indexVar)
342 if indexVar == -1:
343 indexVar = sourceStr.find("${", oldIndexVar)
344 if indexVar == -1:
345 break
346 dollarCurlForm = True
347 if indexVar > 0 and sourceStr[indexVar-1] in ('"',"'"):
348 quoted = True
349 if not dollarCurlForm:
350 openParen = sourceStr.find("(", indexVar)
351 if openParen == -1:
352 break
353 closeParen = sourceStr.find(")", openParen)
354 if closeParen == -1:
355 break
356 else:
357 openParen = indexVar+1
358 closeParen = sourceStr.find("}", openParen)
359 if closeParen == -1:
360 break
361 varRef = sourceStr[openParen+1: closeParen]
362 if varRef.startswith('"') or varRef.startswith("'"):
363 varRef = varRef[1:]
364 if varRef.endswith('"') or varRef.endswith("'"):
365 varRef = varRef[:-1]
366 if isinstance(vars, dict) or isinstance(vars, ObjAsDict):
367 varRefCode = xpathToCode(varRef)
368 value = evalCode(varRefCode, vars)
369 else:
370 value = ctx.evalPath(vars, varRef)
371 inlinedPredicate.append(sourceStr[oldIndexVar:indexVar])
372 if quoted:
373 inlinedPredicate.append("%s" % value)
374 else:
375 inlinedPredicate.append('%s')
376 valueList.append(value)
377 indexVar = closeParen+1
378 inlinedPredicate.append(sourceStr[oldIndexVar:])
379 qualifications.append(''.join(inlinedPredicate))
380 return qualifications, valueList