]>
Commit | Line | Data |
---|---|---|
aca310e5 RD |
1 | #---------------------------------------------------------------------------- |
2 | # Name: parser.py | |
3 | # Purpose: parsing utilities | |
4 | # | |
5 | # Author: Jeff Norton | |
6 | # | |
7 | # Created: 8/9/05 | |
8 | # CVS-ID: $Id$ | |
9 | # Copyright: (c) 2004-2005 ActiveGrid, Inc. | |
10 | # License: wxWindows License | |
11 | #---------------------------------------------------------------------------- | |
12 | ||
13 | import re | |
14 | from activegrid.util.lang import * | |
15 | ifDefPy() | |
16 | import string | |
17 | import array | |
18 | endIfDef() | |
19 | ||
20 | XPATH_ROOT_VAR = '__rootObj__' | |
21 | GETOBJECTPARTNAMES = ["primaryRef", "ref", "orderings", "limit"] | |
22 | ||
23 | class Tokenizer(object): | |
24 | ||
25 | TOKEN_IDENT = 1 | |
26 | TOKEN_STRING = 2 | |
27 | TOKEN_OP = 3 | |
28 | TOKEN_WS = 4 | |
29 | ## TOKEN_PLACEHOLDER = 5 | |
30 | ||
31 | def __init__(self, text, identStart=None, tokenSep=None, ignoreWhitespace=True): | |
32 | """ | |
33 | Turn a string into individual tokens. Three types of tokens are recognized: | |
34 | TOKEN_IDENT: identifiers (those that start with the identStart pattern) | |
35 | TOKEN_STRING: quoted string | |
36 | TOKEN_OP: everything else | |
37 | Tokens are separated by white space or the tokenSep pattern. | |
38 | Constructor parameters: | |
39 | text: The string to tokenize | |
40 | identStart: A regular expression describing characters which start an identifier | |
41 | The default expression accepts letters, "_", and "/". | |
42 | tokenSep: A regular expression describing the characters which end a token | |
43 | (in addition to whitespace). The default expression accepts | |
44 | anything except alpha-numerics, "_", "/", and ":". | |
45 | Usage: | |
46 | Invoke getNextToken (or next) to get the next token. The instance variables | |
47 | token, and tokenVal will be populated with the current token type (TOKEN_IDENT, | |
48 | TOKEN_STRING, or TOEKN_OP) and value respectively. nextToken and nextTokenVal | |
49 | will also be available for lookahead. The next method is similar to | |
50 | getNextToken but also returns the token value. A value of None signals end | |
51 | of stream. | |
52 | """ | |
53 | self.ignoreWhitespace=ignoreWhitespace | |
54 | ifDefPy() | |
55 | if (isinstance(text, array.array)): | |
56 | text = text.tostring() | |
57 | endIfDef() | |
58 | self.text = asString(text) | |
59 | self.textIndex = 0 | |
60 | self.textLen = len(self.text) | |
61 | self.token = None | |
62 | self.tokenVal = None | |
63 | self.nextToken = None | |
64 | self.nextTokenVal = None | |
65 | if (identStart == None): | |
66 | identStart = "[a-zA-Z_/]" | |
67 | if (tokenSep == None): | |
68 | tokenSep = "[^a-zA-Z0-9_/:]" | |
69 | self.identStart = re.compile(identStart) | |
70 | self.tokenSep = re.compile(tokenSep) | |
71 | self.getNextToken() # Prime the pump | |
72 | ||
73 | def isEscaped(text, index): | |
74 | if ((index > 0) and (text[index-1] == '\\') and ((index < 2) or (text[index-2] != '\\'))): | |
75 | return True | |
76 | return False | |
77 | isEscaped = staticmethod(isEscaped) | |
78 | ||
79 | def findClosingQuote(text, index, char): | |
80 | index = index + 1 | |
81 | while True: | |
82 | endIndex = text.find(char, index) | |
83 | if (endIndex < 1): | |
84 | return -1 | |
85 | if (Tokenizer.isEscaped(text, endIndex)): | |
86 | index = endIndex+1 | |
87 | else: | |
88 | break | |
89 | return endIndex + 1 | |
90 | findClosingQuote = staticmethod(findClosingQuote) | |
91 | ||
92 | def _findClosing(self, char): | |
93 | if (self.textIndex >= self.textLen): | |
94 | raise Exception("The text \"%s\" has an unmatched string starting at %d" % (self.text, self.textIndex)) | |
95 | index = Tokenizer.findClosingQuote(self.text, self.textIndex, char) | |
96 | if (index < 0): | |
97 | raise Exception("The text \"%s\" has an unmatched string starting at %d" % (self.text, self.textIndex-1)) | |
98 | return index | |
99 | ||
100 | def next(self): | |
101 | self.getNextToken() | |
102 | if (self.token == None): | |
103 | raise StopIteration() | |
104 | return self.tokenVal | |
105 | ||
106 | def getNextToken(self): | |
107 | self.token = self.nextToken | |
108 | self.tokenVal = self.nextTokenVal | |
109 | while (self.textIndex < self.textLen): | |
110 | c = self.text[self.textIndex] | |
111 | if (c not in string.whitespace): | |
112 | if (c == '"' or c == "'" or c == '`'): | |
113 | endIndex = self._findClosing(c) | |
114 | self.nextToken = self.TOKEN_STRING | |
115 | self.nextTokenVal = self.text[self.textIndex:endIndex] | |
116 | self.textIndex = endIndex | |
117 | return | |
118 | elif (self.identStart.search(c)): | |
119 | endMatch = self.tokenSep.search(self.text, self.textIndex+1) | |
120 | if (endMatch): | |
121 | endIndex = endMatch.start() | |
122 | else: | |
123 | endIndex = self.textLen | |
124 | self.nextToken = self.TOKEN_IDENT | |
125 | self.nextTokenVal = self.text[self.textIndex:endIndex] | |
126 | self.textIndex = endIndex | |
127 | return | |
128 | else: | |
129 | self.nextToken = self.TOKEN_OP | |
130 | endIndex = self.textIndex + 1 | |
131 | if (c == '<' or c == '>' or c == '!' or c == '='): | |
132 | if ((endIndex < self.textLen) and (self.text[endIndex] == '=')): | |
133 | endIndex += 1 | |
134 | elif ((c == '%') and (endIndex < self.textLen)): | |
135 | c = self.text[endIndex] | |
136 | if (c in ['d', 'i', 'o', 'u', 'x', 'X', 'e', 'E', 'f', 'F', 'g', 'G', 'c', 'r', 's', '%']): | |
137 | endIndex += 1 | |
138 | ## self.nextToken = self.TOKEN_PLACEHOLDER # Should really be this but no one can handle it yet | |
139 | self.nextTokenVal = self.text[self.textIndex:endIndex] | |
140 | self.textIndex = endIndex | |
141 | return | |
142 | elif not self.ignoreWhitespace: | |
143 | self.nextToken=self.TOKEN_WS | |
144 | self.nextTokenVal="" | |
145 | while c in string.whitespace: | |
146 | self.nextTokenVal+=c | |
147 | self.textIndex+=1 | |
148 | if self.textIndex==len(self.text): | |
149 | break | |
150 | c=self.text[self.textIndex] | |
151 | return | |
152 | self.textIndex += 1 | |
153 | self.nextToken = None | |
154 | self.nextTokenVal = None | |
155 | ||
156 | def isXPathNonVar(var): | |
157 | """Returns true iff var is a string ("foo" or 'foo') or a number.""" | |
158 | if (var.startswith("'") and var.endswith("'")) or \ | |
159 | (var.startswith('"') and var.endswith('"')): | |
160 | return True | |
161 | ||
162 | # list from XPathToCode, below | |
163 | if var.lower() in ["count", "empty", "true", "false", "null", "and", "or", \ | |
164 | "like", "not"]: | |
165 | return True | |
166 | ||
167 | try: | |
168 | t=int(var) | |
169 | return True | |
170 | except TypeError, e: | |
171 | pass | |
172 | except ValueError, e: | |
173 | pass | |
174 | ||
175 | return False | |
176 | ||
177 | def xpathToCode(xpaths, convertBracket=True): | |
178 | if ((xpaths == None) or (len(xpaths) < 1)): | |
179 | return "True" | |
180 | if (not isinstance(xpaths, (list, tuple))): | |
181 | xpaths = [xpaths] | |
182 | result = [] | |
183 | for xpath in xpaths: | |
184 | t = Tokenizer(xpath, "[a-zA-Z0-9_/:\.]", "[^a-zA-Z0-9_/:\.]", ignoreWhitespace=False) | |
185 | expr = [] | |
186 | lastToken=None | |
187 | while t.nextToken != None: | |
188 | t.getNextToken() | |
189 | if (t.token == Tokenizer.TOKEN_WS): | |
190 | expr.append(" ") | |
191 | elif (t.token == Tokenizer.TOKEN_OP): | |
192 | if (t.tokenVal == "="): | |
193 | expr.append("==") | |
194 | elif (t.tokenVal == "[" and convertBracket): | |
195 | expr.append("(") | |
196 | elif (t.tokenVal == "]" and convertBracket): | |
197 | expr.append(")") | |
198 | else: | |
199 | expr.append(t.tokenVal) | |
200 | elif (t.token == Tokenizer.TOKEN_IDENT): | |
201 | if (t.tokenVal == "and"): | |
202 | expr.append(" and ") | |
203 | elif (t.tokenVal == "or"): | |
204 | expr.append(" or ") | |
205 | elif (t.tokenVal == "not"): | |
206 | expr.append(" not ") | |
207 | elif (t.tokenVal == "like"): | |
208 | # REVIEW stoens@activegrid.com 02-Nov-05 -- | |
209 | # This is very limited support for like: | |
210 | # typically like queries look like this: "foo like 'blah%'". | |
211 | # So translate this into "foo.startswith(blah)". | |
212 | # We should use a regular expression to support '%'s in | |
213 | # arbitrary places in the string. After 1.1. | |
214 | if t.nextToken and t.nextTokenVal.endswith("%'"): | |
215 | t.getNextToken() # throw away the "like" token | |
216 | last = len(expr) - 1 | |
217 | expr[last] = "%s.startswith(%s')"\ | |
218 | % (expr[last], t.tokenVal[:-2]) | |
219 | else: | |
220 | # old behavior | |
221 | expr.append(t.tokenVal) | |
222 | ||
223 | elif (t.tokenVal == "count"): | |
224 | expr.append("len") | |
225 | elif (t.tokenVal == 'empty'): | |
226 | expr.append('ctx.isEmptyPath') | |
227 | elif (t.tokenVal == 'true'): | |
228 | expr.append(_parseConstantFunction(t, 'True')) | |
229 | elif (t.tokenVal == 'false'): | |
230 | expr.append(_parseConstantFunction(t, 'False')) | |
231 | elif (t.tokenVal == 'null'): | |
232 | expr.append(_parseConstantFunction(t, 'None')) | |
233 | elif (-1!=t.tokenVal.find(':')): | |
234 | serviceDef, args=_parseServiceFunction(t) | |
235 | ||
236 | # XXX handle serviceDef, args being None | |
237 | ||
238 | for i in range(len(args)): | |
239 | args[i]=xpathToCode(args[i], False) | |
240 | jargs="[%s]" % (",".join(args)) | |
241 | ||
242 | # XXX should be processmodel.DATASERVICE_PROCESS_NAME, not "dataservice" | |
243 | if serviceDef[0]=='dataservice': | |
244 | expr.append("runtimesupport.invokeDataServiceWrapper(%s, %s, ctx, locals())" % \ | |
245 | (serviceDef, jargs)) | |
246 | else: | |
247 | expr.append("runtimesupport.invokeServiceWrapper(%s, %s, ctx)" % \ | |
248 | (serviceDef, jargs)) | |
249 | else: | |
250 | if (lastToken==')' or lastToken==']'): | |
251 | wasFunc=True | |
252 | else: | |
253 | wasFunc=False | |
254 | if (t.tokenVal.startswith('/')) and not wasFunc: | |
255 | expr.append(XPATH_ROOT_VAR) | |
256 | expr.append(t.tokenVal.replace('/','.')) | |
257 | lastToken=t.tokenVal | |
258 | else: | |
259 | expr.append(t.tokenVal) | |
260 | ||
261 | ||
262 | if (len(expr) == 2 and expr[0]==" "): | |
263 | expr = "".join(expr) | |
264 | result.append(expr) | |
265 | elif (len(expr) > 1): | |
266 | expr = "".join(expr) | |
267 | result.append("(%s)" % expr) | |
268 | elif (len(expr) > 0): | |
269 | result.append(expr[0]) | |
270 | ||
271 | return " and ".join(result) | |
272 | ||
273 | def _parseArgs(t): | |
274 | args=[] | |
275 | argcon="" | |
276 | ||
277 | if t.tokenVal!='(': | |
278 | return [] | |
279 | if t.nextTokenVal==')': | |
280 | t.getNextToken() | |
281 | return [] | |
282 | ||
283 | depth=1 | |
284 | ||
285 | while(depth!=0): | |
286 | if not t.nextToken: | |
287 | raise Exception("parameters list with no closing ) after token: %s" % t.tokenVal) | |
288 | t.getNextToken() | |
289 | ||
290 | if t.tokenVal=='(': | |
291 | depth+=1 | |
292 | if t.tokenVal==')': | |
293 | depth-=1 | |
294 | ||
295 | if depth==0 or (depth==1 and t.tokenVal==','): | |
296 | args.append(argcon) | |
297 | argcon="" | |
298 | else: | |
299 | argcon+=t.tokenVal | |
300 | return args | |
301 | ||
302 | def _parseServiceFunction(t): | |
303 | """Parses what appears to be a service function call into serviceDefs and args lists. | |
304 | ||
305 | Returns None, None if the serviceFunction appears to be invalid. | |
306 | """ | |
307 | if t.nextTokenVal!='(': | |
308 | return t.tokenVal, None | |
309 | ||
310 | serviceDef=t.tokenVal.split(':') | |
311 | t.getNextToken() | |
312 | args=_parseArgs(t) | |
313 | ||
314 | return serviceDef, args | |
315 | ||
316 | def _parseConstantFunction(t, outputValue): | |
317 | firstVal = t.tokenVal | |
318 | if t.nextTokenVal != '(': | |
319 | return firstVal | |
320 | t.getNextToken() | |
321 | if t.nextTokenVal != ')': | |
322 | return "%s%s" % (firstVal, '(') | |
323 | t.getNextToken() | |
324 | return outputValue | |
325 | ||
326 | def parseDSPredicate(ctx, str, vars, valueList=None): | |
327 | from activegrid.util.utillang import evalCode | |
328 | from activegrid.util.utillang import ObjAsDict | |
329 | ||
330 | if valueList == None: | |
331 | valueList = [] | |
332 | indexVar=0 | |
333 | oldIndexVar=0 | |
334 | sourceStr=str | |
335 | inlinedPredicate=[] | |
336 | qualifications=[] | |
337 | while True: | |
338 | oldIndexVar = indexVar | |
339 | dollarCurlForm = False | |
340 | quoted = False | |
341 | indexVar = sourceStr.find("bpws:getVariableData", indexVar) | |
342 | if indexVar == -1: | |
343 | indexVar = sourceStr.find("${", oldIndexVar) | |
344 | if indexVar == -1: | |
345 | break | |
346 | dollarCurlForm = True | |
347 | if indexVar > 0 and sourceStr[indexVar-1] in ('"',"'"): | |
348 | quoted = True | |
349 | if not dollarCurlForm: | |
350 | openParen = sourceStr.find("(", indexVar) | |
351 | if openParen == -1: | |
352 | break | |
353 | closeParen = sourceStr.find(")", openParen) | |
354 | if closeParen == -1: | |
355 | break | |
356 | else: | |
357 | openParen = indexVar+1 | |
358 | closeParen = sourceStr.find("}", openParen) | |
359 | if closeParen == -1: | |
360 | break | |
361 | varRef = sourceStr[openParen+1: closeParen] | |
362 | if varRef.startswith('"') or varRef.startswith("'"): | |
363 | varRef = varRef[1:] | |
364 | if varRef.endswith('"') or varRef.endswith("'"): | |
365 | varRef = varRef[:-1] | |
366 | if isinstance(vars, dict) or isinstance(vars, ObjAsDict): | |
367 | varRefCode = xpathToCode(varRef) | |
368 | value = evalCode(varRefCode, vars) | |
369 | else: | |
370 | value = ctx.evalPath(vars, varRef) | |
371 | inlinedPredicate.append(sourceStr[oldIndexVar:indexVar]) | |
372 | if quoted: | |
373 | inlinedPredicate.append("%s" % value) | |
374 | else: | |
375 | inlinedPredicate.append('%s') | |
376 | valueList.append(value) | |
377 | indexVar = closeParen+1 | |
378 | inlinedPredicate.append(sourceStr[oldIndexVar:]) | |
379 | qualifications.append(''.join(inlinedPredicate)) | |
380 | return qualifications, valueList |