]>
Commit | Line | Data |
---|---|---|
ce1245e1 RD |
1 | import sys, os, string, glob |
2 | import re | |
3 | from docparser.wxclasses import * | |
4 | import wx | |
5 | ||
6 | ||
7 | outputdir = "output" | |
8 | ||
9 | # | |
10 | # Class REs | |
11 | # | |
12 | ||
13 | class_desc_re = """<H2>.*?</H2>(.*?)<B><FONT COLOR="#FF0000">""" | |
14 | win_styles_re = """<B><FONT COLOR="#FF0000">Window styles</FONT></B><P>(.*?)<B><FONT COLOR="#FF0000">""" | |
15 | win_styles_extra_re = """<B><FONT COLOR="#FF0000">Extra window styles</FONT></B><P>(.*?)<B><FONT COLOR="#FF0000">""" | |
16 | win_style_re = """<TR><TD VALIGN=TOP WIDTH=.*?>\s*?<FONT FACE=".*?">\s*?<B>(.*?)</B>\s*?</FONT></TD>\s*?<TD VALIGN=TOP>\s*?<FONT FACE=".*?">(.*?)</FONT></TD></TR>""" | |
17 | derived_re = """<B><FONT COLOR="#FF0000">Derived from</FONT></B><P>(.*?)<P>""" | |
18 | derived_class_re = """<A HREF=".*?">(.*?)</A>""" | |
19 | ||
20 | # | |
21 | # Method REs | |
22 | # | |
23 | ||
24 | # groups - header, description | |
25 | method_re = "<H3>(.*?)</H3>\s*?<P>(.*?)<HR>" | |
26 | lastmethod_re = "<H3>(.*?)</H3>\s*?<P>(.*?)\s*?<P>\s*?</FONT>" | |
27 | headings_re = "<B><FONT COLOR=\"#FF0000\">(.*?)</FONT></B><P>(.*?)" | |
28 | # groups = param name, param value | |
29 | param_re = "<I>(.*?)</I><UL><UL>(.*?)</UL></UL>" | |
30 | # groups - return type, method name, arguments | |
31 | proto_re = "<B>(.*?)</B>.*?<B>(.*?)</B>\s*?\((.*?)\)" | |
32 | # groups - arg type, arg name | |
33 | args_re = "<B>(.*?)</B>.*?<I>(.*?)</I>" | |
34 | code_re = "<PRE>(.*?)</PRE>" | |
35 | link_re = "<A href=\"(.*?)\"><B>(.*?)</B></A><BR>" | |
36 | ||
37 | # | |
38 | # wxPython/wxPerl note REs | |
39 | # | |
40 | ||
41 | wx_re = "wx[A-Z]\S+" | |
42 | wxperl_overload_re = "<B><FONT COLOR=\"#0000C8\">wxPerl note:</FONT></B> In wxPerl there are two methods instead of a single overloaded method:<P>\s*?<UL><UL>(.*?)</UL></UL>" | |
43 | wxperl_re = "<B><FONT COLOR=\"#0000C8\">wxPerl note:</FONT></B>(.*?)<P>" | |
44 | ||
45 | wxpython_constructors_re = """<B><FONT COLOR="#0000C8">wxPython note:</FONT></B> Constructors supported by wxPython are:<P>\s*?<UL><UL>(.*?)</UL></UL>""" | |
46 | wxpython_overload_re = """<TR><TD VALIGN=TOP.*?>\s*?<FONT FACE=".*?">\s*?<B>(.*?)</B>\s*?</FONT></TD>\s*?<TD VALIGN=TOP>\s*?<FONT FACE=".*?">(.*?)</FONT></TD></TR>""" | |
47 | ||
48 | wxpython_overloads_re = "<B><FONT COLOR=\"#0000C8\">wxPython note:</FONT></B> In place of a single overloaded method name, wxPython\s*?implements the following methods:<P>\s*?<UL><UL>(.*?)</UL></UL>" | |
49 | wxpython_re = "<B><FONT COLOR=\"#0000C8\">wxPython note:</FONT></B>(.*?)<P>" | |
50 | ||
51 | ||
52 | # convert wxWhatever to wx.Whatever | |
53 | def namespacify_wxClasses(contents): | |
54 | wx_regex = re.compile(wx_re, re.MULTILINE | re.DOTALL) | |
55 | ||
56 | result = wx_regex.sub(wxReplaceFunc, contents) | |
57 | return result | |
58 | ||
59 | def wxReplaceFunc(match): | |
60 | text = match.group() | |
61 | if text.find("wxWidgets") == -1 and text.find("wxPython") == -1 and text.find("wxPerl") == -1: | |
62 | text = text.replace("wx", "wx.") | |
63 | return text | |
64 | ||
65 | ||
66 | ||
67 | # Methods to de-C++itize data. | |
68 | def pythonize_text(contents): | |
69 | """ | |
70 | Remove C++isms that definitely shouldn't be in any text. | |
71 | """ | |
72 | contents = contents.replace("false", "False") | |
73 | contents = contents.replace("true", "True") | |
74 | contents = contents.replace("non-NULL", "not None") | |
75 | contents = contents.replace("NULL", "None") | |
76 | contents = contents.replace("const ", "") | |
77 | contents = contents.replace("::", ".") | |
78 | contents = contents.replace("\r\n", "\n") | |
79 | contents = contents.replace("\r", "\n") | |
80 | contents = contents.replace("''", "\"") | |
81 | return namespacify_wxClasses(contents) | |
82 | ||
83 | def pythonize_args(contents): | |
84 | """ | |
85 | Remove C++isms from arguments (some of these terms may be used in other | |
86 | contexts in actual documentation, so we don't remove them there). | |
87 | """ | |
88 | contents = contents.replace("static", "") | |
89 | contents = contents.replace("virtual void", "") | |
90 | contents = contents.replace("virtual", "") | |
91 | contents = contents.replace("void*", "int") | |
92 | contents = contents.replace("void", "") | |
93 | ||
94 | contents = contents.replace("off_t", "long") | |
95 | contents = contents.replace("size_t", "long") | |
96 | contents = contents.replace("*", "") | |
97 | contents = contents.replace("&", "") | |
98 | contents = contents.replace("&", "") | |
99 | contents = contents.replace("char", "string") | |
100 | contents = contents.replace("wxChar", "string") | |
101 | contents = contents.replace("wxCoord", "int") | |
102 | contents = contents.replace("<A HREF=\"wx_wxstring.html#wxstring\">wxString</A>", "string") | |
103 | ||
104 | return pythonize_text(contents) | |
105 | ||
106 | def formatMethodProtos(protos): | |
107 | """ | |
108 | Remove C++isms in the method prototypes. | |
109 | """ | |
110 | for proto in protos: | |
111 | proto[0] = pythonize_args(proto[0]) | |
112 | proto[0] = proto[0].strip() | |
113 | ||
114 | proto[1] = namespacify_wxClasses(proto[1]) | |
115 | for arg in proto[2]: | |
116 | arg[0] = pythonize_args(arg[0]) | |
117 | arg[0].strip() | |
118 | ||
119 | # for arg names, we should be more careful about what we replace | |
120 | arg[1] = pythonize_text(arg[1]) | |
121 | arg[1] = arg[1].replace("*", "") | |
122 | arg[1] = arg[1].replace("&", "") | |
123 | ||
124 | return protos | |
125 | ||
126 | ||
127 | ||
128 | # functions for getting data from methods | |
129 | def getMethodWxPythonOverrides(text, isConstructor=False): | |
130 | overloads_re = wxpython_overloads_re | |
131 | if isConstructor: | |
132 | overloads_re = wxpython_constructors_re | |
133 | overload_regex = re.compile(overloads_re, re.MULTILINE | re.DOTALL | re.IGNORECASE) | |
134 | match = overload_regex.search(text, 0) | |
135 | note = "" | |
136 | start = -1 | |
137 | end = -1 | |
138 | overrides = [] | |
139 | if match: | |
140 | def getWxPythonOverridesFromMatch(match): | |
141 | return [namespacify_wxClasses(match.group(1)), pythonize_text(match.group(2))] | |
142 | ||
143 | start = match.start() | |
144 | end = match.end() | |
145 | overrides, returntext = findAllMatches(wxpython_overload_re, match.group(1), getWxPythonOverridesFromMatch) | |
146 | ||
147 | returntext = text | |
148 | ||
149 | if start != -1 and end != -1: | |
150 | #print "note is: " + text[start:end] | |
151 | returntext = text.replace(text[start:end], "") | |
152 | ||
153 | return overrides, returntext | |
154 | ||
155 | def getMethodWxPythonNote(text): | |
156 | python_regex = re.compile(wxpython_re, re.MULTILINE | re.DOTALL | re.IGNORECASE) | |
157 | match = python_regex.search(text) | |
158 | start = -1 | |
159 | end = -1 | |
160 | note = "" | |
161 | if match: | |
162 | start = match.start() | |
163 | end = match.end() | |
164 | note = match.group(1) | |
165 | ||
166 | returntext = text | |
167 | ||
168 | if start != -1 and end != -1: | |
169 | #print "note is: " + text[start:end] | |
170 | returntext = text.replace(text[start:end], "") | |
171 | ||
172 | return note, returntext | |
173 | ||
174 | def findAllMatches(re_string, text, handler, start=0): | |
175 | """ | |
176 | findAllMatches finds matches for a given regex, then runs the handler function | |
177 | on each match, and returns a list of objects, along with a version of the | |
178 | text with the area matches were found stripped. | |
179 | Note the stripping of text is not generally usable yet, it assumes matches | |
180 | are in continuous blocks, which is true of the wx docs. | |
181 | """ | |
182 | regex = re.compile(re_string, re.MULTILINE | re.DOTALL | re.IGNORECASE) | |
183 | match = regex.search(text, start) | |
184 | results = [] | |
185 | ||
186 | startpoint = -1 | |
187 | endpoint = -1 | |
188 | ||
189 | if match: | |
190 | startpoint = match.start() | |
191 | ||
192 | while match: | |
193 | start = match.end() | |
194 | results.append(handler(match)) | |
195 | endpoint = match.end() | |
196 | match = regex.search(text, start) | |
197 | ||
198 | returntext = text | |
199 | if startpoint != -1 and endpoint != -1: | |
200 | returntext = text.replace(text[startpoint:endpoint], "") | |
201 | ||
202 | return results, returntext | |
203 | ||
204 | def getMethodParams(text): | |
205 | paramstart = text.find("<B><FONT COLOR=\"#FF0000\">Parameters</FONT></B><P>") | |
206 | params, returntext = findAllMatches(param_re, text, getMethodParamsFromMatch, paramstart) | |
207 | ||
208 | return params, returntext | |
209 | ||
210 | def getMethodParamsFromMatch(match): | |
211 | return [match.group(1).strip(), pythonize_text(match.group(2)).strip()] | |
212 | ||
213 | def getPrototypeFromMatch(match): | |
214 | return [match.group(1), match.group(2), getProtoArgs(match.group(3))] | |
215 | ||
216 | def getProtoArgsFromMatch(match): | |
217 | return [match.group(1), match.group(2)] | |
218 | ||
219 | ||
220 | ||
221 | # These methods parse the docs, finding matches and then using the FromMatch | |
222 | # functions to parse the data. After that, the results are "Pythonized" | |
223 | # by removing C++isms. | |
224 | def getMethodProtos(text): | |
225 | protos, returntext = findAllMatches(proto_re, text, getPrototypeFromMatch) | |
226 | return formatMethodProtos(protos), returntext | |
227 | ||
228 | def getProtoArgs(text): | |
229 | args, returntext = findAllMatches(args_re, text, getProtoArgsFromMatch) | |
230 | return args | |
231 | ||
232 | def getMethodDesc(text): | |
233 | heading_text = "<B><FONT COLOR=\"#FF0000\">" | |
234 | return_text = text | |
235 | end = text.find(heading_text) | |
236 | if end != -1: | |
237 | return_text = text[0:end] | |
238 | ||
239 | return pythonize_text(return_text) | |
240 | ||
241 | ||
242 | def removeWxPerlNotes(text): | |
243 | perl_overload_regex = re.compile(wxperl_overload_re, re.MULTILINE | re.DOTALL | re.IGNORECASE) | |
244 | result = perl_overload_regex.sub("", text) | |
245 | ||
246 | perl_regex = re.compile(wxperl_re, re.MULTILINE | re.DOTALL | re.IGNORECASE) | |
247 | result = perl_regex.sub("", result) | |
248 | ||
249 | return result | |
250 | ||
251 | def removeCPPCode(text): | |
252 | code_regex = re.compile(code_re, re.MULTILINE | re.DOTALL | re.IGNORECASE) | |
253 | ||
254 | result = code_regex.sub("", text) | |
255 | return result | |
256 | ||
257 | ||
258 | def getMethod(match, parent): | |
259 | name = match.group(1) | |
260 | if name.find("::") != -1: | |
261 | name = name.split("::")[1] | |
262 | name = namespacify_wxClasses(name).strip() | |
263 | start = match.end() | |
264 | protos, remainder = getMethodProtos(match.group(2)) | |
265 | ||
266 | isConstructor = False | |
267 | #print "name: %s, parent name: %s" % (name, parent.name) | |
268 | if name == parent.name.replace("wx", "wx."): | |
269 | isConstructor = True | |
270 | overrides, remainder = getMethodWxPythonOverrides(remainder, isConstructor) | |
271 | ||
272 | note, remainder = getMethodWxPythonNote(remainder) | |
273 | params, remainder = getMethodParams(remainder) | |
274 | desc = getMethodDesc(remainder) | |
275 | method = wxMethod(name, parent, protos, params, desc) | |
276 | method.pythonNote = note | |
277 | method.pythonOverrides = overrides | |
278 | if len(method.pythonOverrides) > 0: | |
279 | print "has overrides!\n\n\n\n" | |
280 | return method | |
281 | ||
282 | def getClassDerivedFrom(text): | |
283 | ||
284 | def getDerivedClassesFromMatch(match): | |
285 | return namespacify_wxClasses(match.group(1)) | |
286 | ||
287 | derived_classes = [] | |
288 | derived_regex = re.compile(derived_re, re.MULTILINE | re.DOTALL | re.IGNORECASE) | |
289 | match = derived_regex.search(text) | |
290 | if match: | |
291 | derived_classes, returntext = findAllMatches(derived_class_re, match.group(1), getDerivedClassesFromMatch) | |
292 | ||
293 | return derived_classes | |
294 | ||
295 | def getClassDescription(text): | |
296 | ||
297 | def getClassDescriptionFromMatch(match): | |
298 | return match.group(1) | |
299 | ||
300 | desc, returntext = findAllMatches(class_desc_re, text, getClassDescriptionFromMatch) | |
301 | ||
302 | return pythonize_text(desc[0]) | |
303 | ||
304 | def getClassStyles(text, extraStyles=False): | |
305 | styles_re = win_styles_re | |
306 | if extraStyles: | |
307 | styles_re = win_styles_extra_re | |
308 | styles_regex = re.compile(styles_re, re.MULTILINE | re.DOTALL | re.IGNORECASE) | |
309 | match = styles_regex.search(text) | |
310 | ||
311 | styles = [] | |
312 | if match: | |
313 | def getClassStyleFromMatch(match): | |
314 | return [namespacify_wxClasses(match.group(1)), pythonize_text(match.group(2))] | |
315 | ||
316 | styles, remainder = findAllMatches(win_style_re, match.group(1), getClassStyleFromMatch) | |
317 | ||
318 | return styles | |
319 | ||
320 | # Main functions - these drive the process. | |
321 | def getClassMethods(doc, parent): | |
322 | contents = open(doc, "rb").read() | |
323 | ||
324 | # get rid of some particularly tricky parts before parsing | |
325 | contents = contents.replace("<B>const</B>", "") | |
326 | contents = removeWxPerlNotes(contents) | |
327 | contents = removeCPPCode(contents) | |
328 | ||
329 | method_regex = re.compile(method_re, re.MULTILINE | re.DOTALL | re.IGNORECASE) | |
330 | match = method_regex.search(contents) | |
331 | start = 0 | |
332 | methods = {} | |
333 | while match: | |
334 | start = match.end() | |
335 | newmethod = getMethod(match, parent) | |
336 | basename = parent.name.replace("wx", "") | |
337 | isConstructor = (basename == newmethod.name.replace("wx.", "")) | |
338 | if isConstructor or eval("newmethod.name in dir(wx.%s)" % basename): | |
339 | print "Adding %s.%s" % (parent.name, newmethod.name) | |
340 | methods[newmethod.name] = newmethod | |
341 | match = method_regex.search(contents, start) | |
342 | ||
343 | lastmethod_regex = re.compile(lastmethod_re, re.MULTILINE | re.DOTALL | re.IGNORECASE) | |
344 | match = lastmethod_regex.search(contents, start) | |
345 | if match: | |
346 | newmethod = getMethod(match, parent) | |
347 | basename = parent.name.replace("wx", "") | |
348 | isConstructor = (basename == newmethod.name.replace("wx.", "")) | |
349 | if isConstructor or eval("newmethod.name in dir(wx.%s)" % basename): | |
350 | print "Adding %s.%s" % (parent.name, newmethod.name) | |
351 | methods[newmethod.name] = newmethod | |
352 | ||
353 | for name in methods: | |
354 | if name[0:3] == "Get": | |
355 | propname = name[3:] | |
356 | basename = parent.name.replace("wx", "") | |
357 | if not propname in eval("dir(wx.%s)" % basename): | |
358 | parent.props.append(propname) | |
359 | else: | |
360 | parent.propConflicts.append(parent.name + "." + propname) | |
361 | # get rid of the destructor and operator methods | |
362 | ignore_methods = ["~" + namespacify_wxClasses(parent.name), "operator ==", | |
363 | "operator <<", "operator >>", "operator =", | |
364 | "operator !=", "operator*", "operator++" ] | |
365 | for method in ignore_methods: | |
366 | if method in methods: | |
367 | methods.pop(method) | |
368 | ||
369 | return methods | |
370 | ||
371 | def getClasses(doc): | |
372 | global docspath | |
373 | contents = open(doc, "rb").read() | |
374 | link_regex = re.compile(link_re, re.MULTILINE | re.DOTALL | re.IGNORECASE) | |
375 | start = contents.find("<H2>Alphabetical class reference</H2>") | |
376 | result = link_regex.search(contents, start) | |
377 | classes = {} | |
378 | while result: | |
379 | start = result.end() | |
380 | name = result.group(2).strip() | |
381 | classpage = result.group(1).split("#")[0] | |
382 | basename = name.replace("wx", "") | |
383 | if basename in dir(wx): | |
384 | classfile = os.path.join(os.path.dirname(doc), classpage) | |
385 | classtext = open(classfile, "rb").read() | |
386 | derivedClasses = getClassDerivedFrom(classtext) | |
387 | description = getClassDescription(classtext) | |
388 | styles = getClassStyles(classtext) | |
389 | extra_styles = getClassStyles(classtext, extraStyles=True) | |
390 | classes[name] = wxClass(name, description, derivedClasses, styles, extra_styles) | |
391 | classes[name].methods = getClassMethods(classfile, classes[name]) | |
392 | result = link_regex.search(contents, start) | |
393 | ||
394 | return classes |