]> git.saurik.com Git - wxWidgets.git/blob - wxPython/docs/bin/docparser/wxhtmlparse.py
fixed wxVsnprintf() to write as much as it can if the output buffer is too short
[wxWidgets.git] / wxPython / docs / bin / docparser / wxhtmlparse.py
1 import sys, os, string, glob
2 import re
3 from docparser.wxclasses import *
4 import wx
5
6
7 outputdir = "output"
8
9 #
10 # Class REs
11 #
12
13 class_desc_re = """<H2>.*?</H2>(.*?)<B><FONT COLOR="#FF0000">"""
14 win_styles_re = """<B><FONT COLOR="#FF0000">Window styles</FONT></B><P>(.*?)<B><FONT COLOR="#FF0000">"""
15 win_styles_extra_re = """<B><FONT COLOR="#FF0000">Extra window styles</FONT></B><P>(.*?)<B><FONT COLOR="#FF0000">"""
16 win_style_re = """<TR><TD VALIGN=TOP WIDTH=.*?>\s*?<FONT FACE=".*?">\s*?<B>(.*?)</B>\s*?</FONT></TD>\s*?<TD VALIGN=TOP>\s*?<FONT FACE=".*?">(.*?)</FONT></TD></TR>"""
17 derived_re = """<B><FONT COLOR="#FF0000">Derived from</FONT></B><P>(.*?)<P>"""
18 derived_class_re = """<A HREF=".*?">(.*?)</A>"""
19
20 #
21 # Method REs
22 #
23
24 # groups - header, description
25 method_re = "<H3>(.*?)</H3>\s*?<P>(.*?)<HR>"
26 lastmethod_re = "<H3>(.*?)</H3>\s*?<P>(.*?)\s*?<P>\s*?</FONT>"
27 headings_re = "<B><FONT COLOR=\"#FF0000\">(.*?)</FONT></B><P>(.*?)"
28 # groups = param name, param value
29 param_re = "<I>(.*?)</I><UL><UL>(.*?)</UL></UL>"
30 # groups - return type, method name, arguments
31 proto_re = "<B>(.*?)</B>.*?<B>(.*?)</B>\s*?\((.*?)\)"
32 # groups - arg type, arg name
33 args_re = "<B>(.*?)</B>.*?<I>(.*?)</I>"
34 code_re = "<PRE>(.*?)</PRE>"
35 link_re = "<A href=\"(.*?)\"><B>(.*?)</B></A><BR>"
36
37 #
38 # wxPython/wxPerl note REs
39 #
40
41 wx_re = "wx[A-Z]\S+"
42 wxperl_overload_re = "<B><FONT COLOR=\"#0000C8\">wxPerl note:</FONT></B> In wxPerl there are two methods instead of a single overloaded method:<P>\s*?<UL><UL>(.*?)</UL></UL>"
43 wxperl_re = "<B><FONT COLOR=\"#0000C8\">wxPerl note:</FONT></B>(.*?)<P>"
44
45 wxpython_constructors_re = """<B><FONT COLOR="#0000C8">wxPython note:</FONT></B> Constructors supported by wxPython are:<P>\s*?<UL><UL>(.*?)</UL></UL>"""
46 wxpython_overload_re = """<TR><TD VALIGN=TOP.*?>\s*?<FONT FACE=".*?">\s*?<B>(.*?)</B>\s*?</FONT></TD>\s*?<TD VALIGN=TOP>\s*?<FONT FACE=".*?">(.*?)</FONT></TD></TR>"""
47
48 wxpython_overloads_re = "<B><FONT COLOR=\"#0000C8\">wxPython note:</FONT></B> In place of a single overloaded method name, wxPython\s*?implements the following methods:<P>\s*?<UL><UL>(.*?)</UL></UL>"
49 wxpython_re = "<B><FONT COLOR=\"#0000C8\">wxPython note:</FONT></B>(.*?)<P>"
50
51
52 # convert wxWhatever to wx.Whatever
53 def namespacify_wxClasses(contents):
54 wx_regex = re.compile(wx_re, re.MULTILINE | re.DOTALL)
55
56 result = wx_regex.sub(wxReplaceFunc, contents)
57 return result
58
59 def wxReplaceFunc(match):
60 text = match.group()
61 if text.find("wxWidgets") == -1 and text.find("wxPython") == -1 and text.find("wxPerl") == -1:
62 text = text.replace("wx", "wx.")
63 return text
64
65
66
67 # Methods to de-C++itize data.
68 def pythonize_text(contents):
69 """
70 Remove C++isms that definitely shouldn't be in any text.
71 """
72 contents = contents.replace("false", "False")
73 contents = contents.replace("true", "True")
74 contents = contents.replace("non-NULL", "not None")
75 contents = contents.replace("NULL", "None")
76 contents = contents.replace("const ", "")
77 contents = contents.replace("::", ".")
78 contents = contents.replace("\r\n", "\n")
79 contents = contents.replace("\r", "\n")
80 contents = contents.replace("''", "\"")
81 return namespacify_wxClasses(contents)
82
83 def pythonize_args(contents):
84 """
85 Remove C++isms from arguments (some of these terms may be used in other
86 contexts in actual documentation, so we don't remove them there).
87 """
88 contents = contents.replace("static", "")
89 contents = contents.replace("virtual void", "")
90 contents = contents.replace("virtual", "")
91 contents = contents.replace("void*", "int")
92 contents = contents.replace("void", "")
93
94 contents = contents.replace("off_t", "long")
95 contents = contents.replace("size_t", "long")
96 contents = contents.replace("*", "")
97 contents = contents.replace("&amp;", "")
98 contents = contents.replace("&", "")
99 contents = contents.replace("char", "string")
100 contents = contents.replace("wxChar", "string")
101 contents = contents.replace("wxCoord", "int")
102 contents = contents.replace("<A HREF=\"wx_wxstring.html#wxstring\">wxString</A>", "string")
103
104 return pythonize_text(contents)
105
106 def formatMethodProtos(protos):
107 """
108 Remove C++isms in the method prototypes.
109 """
110 for proto in protos:
111 proto[0] = pythonize_args(proto[0])
112 proto[0] = proto[0].strip()
113
114 proto[1] = namespacify_wxClasses(proto[1])
115 for arg in proto[2]:
116 arg[0] = pythonize_args(arg[0])
117 arg[0].strip()
118
119 # for arg names, we should be more careful about what we replace
120 arg[1] = pythonize_text(arg[1])
121 arg[1] = arg[1].replace("*", "")
122 arg[1] = arg[1].replace("&", "")
123
124 return protos
125
126
127
128 # functions for getting data from methods
129 def getMethodWxPythonOverrides(text, isConstructor=False):
130 overloads_re = wxpython_overloads_re
131 if isConstructor:
132 overloads_re = wxpython_constructors_re
133 overload_regex = re.compile(overloads_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
134 match = overload_regex.search(text, 0)
135 note = ""
136 start = -1
137 end = -1
138 overrides = []
139 if match:
140 def getWxPythonOverridesFromMatch(match):
141 return [namespacify_wxClasses(match.group(1)), pythonize_text(match.group(2))]
142
143 start = match.start()
144 end = match.end()
145 overrides, returntext = findAllMatches(wxpython_overload_re, match.group(1), getWxPythonOverridesFromMatch)
146
147 returntext = text
148
149 if start != -1 and end != -1:
150 #print "note is: " + text[start:end]
151 returntext = text.replace(text[start:end], "")
152
153 return overrides, returntext
154
155 def getMethodWxPythonNote(text):
156 python_regex = re.compile(wxpython_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
157 match = python_regex.search(text)
158 start = -1
159 end = -1
160 note = ""
161 if match:
162 start = match.start()
163 end = match.end()
164 note = match.group(1)
165
166 returntext = text
167
168 if start != -1 and end != -1:
169 #print "note is: " + text[start:end]
170 returntext = text.replace(text[start:end], "")
171
172 return note, returntext
173
174 def findAllMatches(re_string, text, handler, start=0):
175 """
176 findAllMatches finds matches for a given regex, then runs the handler function
177 on each match, and returns a list of objects, along with a version of the
178 text with the area matches were found stripped.
179 Note the stripping of text is not generally usable yet, it assumes matches
180 are in continuous blocks, which is true of the wx docs.
181 """
182 regex = re.compile(re_string, re.MULTILINE | re.DOTALL | re.IGNORECASE)
183 match = regex.search(text, start)
184 results = []
185
186 startpoint = -1
187 endpoint = -1
188
189 if match:
190 startpoint = match.start()
191
192 while match:
193 start = match.end()
194 results.append(handler(match))
195 endpoint = match.end()
196 match = regex.search(text, start)
197
198 returntext = text
199 if startpoint != -1 and endpoint != -1:
200 returntext = text.replace(text[startpoint:endpoint], "")
201
202 return results, returntext
203
204 def getMethodParams(text):
205 paramstart = text.find("<B><FONT COLOR=\"#FF0000\">Parameters</FONT></B><P>")
206 params, returntext = findAllMatches(param_re, text, getMethodParamsFromMatch, paramstart)
207
208 return params, returntext
209
210 def getMethodParamsFromMatch(match):
211 return [match.group(1).strip(), pythonize_text(match.group(2)).strip()]
212
213 def getPrototypeFromMatch(match):
214 return [match.group(1), match.group(2), getProtoArgs(match.group(3))]
215
216 def getProtoArgsFromMatch(match):
217 return [match.group(1), match.group(2)]
218
219
220
221 # These methods parse the docs, finding matches and then using the FromMatch
222 # functions to parse the data. After that, the results are "Pythonized"
223 # by removing C++isms.
224 def getMethodProtos(text):
225 protos, returntext = findAllMatches(proto_re, text, getPrototypeFromMatch)
226 return formatMethodProtos(protos), returntext
227
228 def getProtoArgs(text):
229 args, returntext = findAllMatches(args_re, text, getProtoArgsFromMatch)
230 return args
231
232 def getMethodDesc(text):
233 heading_text = "<B><FONT COLOR=\"#FF0000\">"
234 return_text = text
235 end = text.find(heading_text)
236 if end != -1:
237 return_text = text[0:end]
238
239 return pythonize_text(return_text)
240
241
242 def removeWxPerlNotes(text):
243 perl_overload_regex = re.compile(wxperl_overload_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
244 result = perl_overload_regex.sub("", text)
245
246 perl_regex = re.compile(wxperl_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
247 result = perl_regex.sub("", result)
248
249 return result
250
251 def removeCPPCode(text):
252 code_regex = re.compile(code_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
253
254 result = code_regex.sub("", text)
255 return result
256
257
258 def getMethod(match, parent):
259 name = match.group(1)
260 if name.find("::") != -1:
261 name = name.split("::")[1]
262 name = namespacify_wxClasses(name).strip()
263 start = match.end()
264 protos, remainder = getMethodProtos(match.group(2))
265
266 isConstructor = False
267 #print "name: %s, parent name: %s" % (name, parent.name)
268 if name == parent.name.replace("wx", "wx."):
269 isConstructor = True
270 overrides, remainder = getMethodWxPythonOverrides(remainder, isConstructor)
271
272 note, remainder = getMethodWxPythonNote(remainder)
273 params, remainder = getMethodParams(remainder)
274 desc = getMethodDesc(remainder)
275 method = wxMethod(name, parent, protos, params, desc)
276 method.pythonNote = note
277 method.pythonOverrides = overrides
278 if len(method.pythonOverrides) > 0:
279 print "has overrides!\n\n\n\n"
280 return method
281
282 def getClassDerivedFrom(text):
283
284 def getDerivedClassesFromMatch(match):
285 return namespacify_wxClasses(match.group(1))
286
287 derived_classes = []
288 derived_regex = re.compile(derived_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
289 match = derived_regex.search(text)
290 if match:
291 derived_classes, returntext = findAllMatches(derived_class_re, match.group(1), getDerivedClassesFromMatch)
292
293 return derived_classes
294
295 def getClassDescription(text):
296
297 def getClassDescriptionFromMatch(match):
298 return match.group(1)
299
300 desc, returntext = findAllMatches(class_desc_re, text, getClassDescriptionFromMatch)
301
302 return pythonize_text(desc[0])
303
304 def getClassStyles(text, extraStyles=False):
305 styles_re = win_styles_re
306 if extraStyles:
307 styles_re = win_styles_extra_re
308 styles_regex = re.compile(styles_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
309 match = styles_regex.search(text)
310
311 styles = []
312 if match:
313 def getClassStyleFromMatch(match):
314 return [namespacify_wxClasses(match.group(1)), pythonize_text(match.group(2))]
315
316 styles, remainder = findAllMatches(win_style_re, match.group(1), getClassStyleFromMatch)
317
318 return styles
319
320 # Main functions - these drive the process.
321 def getClassMethods(doc, parent):
322 contents = open(doc, "rb").read()
323
324 # get rid of some particularly tricky parts before parsing
325 contents = contents.replace("<B>const</B>", "")
326 contents = removeWxPerlNotes(contents)
327 contents = removeCPPCode(contents)
328
329 method_regex = re.compile(method_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
330 match = method_regex.search(contents)
331 start = 0
332 methods = {}
333 while match:
334 start = match.end()
335 newmethod = getMethod(match, parent)
336 basename = parent.name.replace("wx", "")
337 isConstructor = (basename == newmethod.name.replace("wx.", ""))
338 if isConstructor or eval("newmethod.name in dir(wx.%s)" % basename):
339 print "Adding %s.%s" % (parent.name, newmethod.name)
340 methods[newmethod.name] = newmethod
341 match = method_regex.search(contents, start)
342
343 lastmethod_regex = re.compile(lastmethod_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
344 match = lastmethod_regex.search(contents, start)
345 if match:
346 newmethod = getMethod(match, parent)
347 basename = parent.name.replace("wx", "")
348 isConstructor = (basename == newmethod.name.replace("wx.", ""))
349 if isConstructor or eval("newmethod.name in dir(wx.%s)" % basename):
350 print "Adding %s.%s" % (parent.name, newmethod.name)
351 methods[newmethod.name] = newmethod
352
353 for name in methods:
354 if name[0:3] == "Get":
355 propname = name[3:]
356 basename = parent.name.replace("wx", "")
357 if not propname in eval("dir(wx.%s)" % basename):
358 parent.props.append(propname)
359 else:
360 parent.propConflicts.append(parent.name + "." + propname)
361 # get rid of the destructor and operator methods
362 ignore_methods = ["~" + namespacify_wxClasses(parent.name), "operator ==",
363 "operator &lt;&lt;", "operator &gt;&gt;", "operator =",
364 "operator !=", "operator*", "operator++" ]
365 for method in ignore_methods:
366 if method in methods:
367 methods.pop(method)
368
369 return methods
370
371 def getClasses(doc):
372 global docspath
373 contents = open(doc, "rb").read()
374 link_regex = re.compile(link_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
375 start = contents.find("<H2>Alphabetical class reference</H2>")
376 result = link_regex.search(contents, start)
377 classes = {}
378 while result:
379 start = result.end()
380 name = result.group(2).strip()
381 classpage = result.group(1).split("#")[0]
382 basename = name.replace("wx", "")
383 if basename in dir(wx):
384 classfile = os.path.join(os.path.dirname(doc), classpage)
385 classtext = open(classfile, "rb").read()
386 derivedClasses = getClassDerivedFrom(classtext)
387 description = getClassDescription(classtext)
388 styles = getClassStyles(classtext)
389 extra_styles = getClassStyles(classtext, extraStyles=True)
390 classes[name] = wxClass(name, description, derivedClasses, styles, extra_styles)
391 classes[name].methods = getClassMethods(classfile, classes[name])
392 result = link_regex.search(contents, start)
393
394 return classes