+import sys, os, string, glob
+import re
+from docparser.wxclasses import *
+import wx
+
+
+outputdir = "output"
+
+#
+# Class REs
+#
+
+class_desc_re = """<H2>.*?</H2>(.*?)<B><FONT COLOR="#FF0000">"""
+win_styles_re = """<B><FONT COLOR="#FF0000">Window styles</FONT></B><P>(.*?)<B><FONT COLOR="#FF0000">"""
+win_styles_extra_re = """<B><FONT COLOR="#FF0000">Extra window styles</FONT></B><P>(.*?)<B><FONT COLOR="#FF0000">"""
+win_style_re = """<TR><TD VALIGN=TOP WIDTH=.*?>\s*?<FONT FACE=".*?">\s*?<B>(.*?)</B>\s*?</FONT></TD>\s*?<TD VALIGN=TOP>\s*?<FONT FACE=".*?">(.*?)</FONT></TD></TR>"""
+derived_re = """<B><FONT COLOR="#FF0000">Derived from</FONT></B><P>(.*?)<P>"""
+derived_class_re = """<A HREF=".*?">(.*?)</A>"""
+
+#
+# Method REs
+#
+
+# groups - header, description
+method_re = "<H3>(.*?)</H3>\s*?<P>(.*?)<HR>"
+lastmethod_re = "<H3>(.*?)</H3>\s*?<P>(.*?)\s*?<P>\s*?</FONT>"
+headings_re = "<B><FONT COLOR=\"#FF0000\">(.*?)</FONT></B><P>(.*?)"
+# groups = param name, param value
+param_re = "<I>(.*?)</I><UL><UL>(.*?)</UL></UL>"
+# groups - return type, method name, arguments
+proto_re = "<B>(.*?)</B>.*?<B>(.*?)</B>\s*?\((.*?)\)"
+# groups - arg type, arg name
+args_re = "<B>(.*?)</B>.*?<I>(.*?)</I>"
+code_re = "<PRE>(.*?)</PRE>"
+link_re = "<A href=\"(.*?)\"><B>(.*?)</B></A><BR>"
+
+#
+# wxPython/wxPerl note REs
+#
+
+wx_re = "wx[A-Z]\S+"
+wxperl_overload_re = "<B><FONT COLOR=\"#0000C8\">wxPerl note:</FONT></B> In wxPerl there are two methods instead of a single overloaded method:<P>\s*?<UL><UL>(.*?)</UL></UL>"
+wxperl_re = "<B><FONT COLOR=\"#0000C8\">wxPerl note:</FONT></B>(.*?)<P>"
+
+wxpython_constructors_re = """<B><FONT COLOR="#0000C8">wxPython note:</FONT></B> Constructors supported by wxPython are:<P>\s*?<UL><UL>(.*?)</UL></UL>"""
+wxpython_overload_re = """<TR><TD VALIGN=TOP.*?>\s*?<FONT FACE=".*?">\s*?<B>(.*?)</B>\s*?</FONT></TD>\s*?<TD VALIGN=TOP>\s*?<FONT FACE=".*?">(.*?)</FONT></TD></TR>"""
+
+wxpython_overloads_re = "<B><FONT COLOR=\"#0000C8\">wxPython note:</FONT></B> In place of a single overloaded method name, wxPython\s*?implements the following methods:<P>\s*?<UL><UL>(.*?)</UL></UL>"
+wxpython_re = "<B><FONT COLOR=\"#0000C8\">wxPython note:</FONT></B>(.*?)<P>"
+
+
+# convert wxWhatever to wx.Whatever
+def namespacify_wxClasses(contents):
+ wx_regex = re.compile(wx_re, re.MULTILINE | re.DOTALL)
+
+ result = wx_regex.sub(wxReplaceFunc, contents)
+ return result
+
+def wxReplaceFunc(match):
+ text = match.group()
+ if text.find("wxWidgets") == -1 and text.find("wxPython") == -1 and text.find("wxPerl") == -1:
+ text = text.replace("wx", "wx.")
+ return text
+
+
+
+# Methods to de-C++itize data.
+def pythonize_text(contents):
+ """
+ Remove C++isms that definitely shouldn't be in any text.
+ """
+ contents = contents.replace("false", "False")
+ contents = contents.replace("true", "True")
+ contents = contents.replace("non-NULL", "not None")
+ contents = contents.replace("NULL", "None")
+ contents = contents.replace("const ", "")
+ contents = contents.replace("::", ".")
+ contents = contents.replace("\r\n", "\n")
+ contents = contents.replace("\r", "\n")
+ contents = contents.replace("''", "\"")
+ return namespacify_wxClasses(contents)
+
+def pythonize_args(contents):
+ """
+ Remove C++isms from arguments (some of these terms may be used in other
+ contexts in actual documentation, so we don't remove them there).
+ """
+ contents = contents.replace("static", "")
+ contents = contents.replace("virtual void", "")
+ contents = contents.replace("virtual", "")
+ contents = contents.replace("void*", "int")
+ contents = contents.replace("void", "")
+
+ contents = contents.replace("off_t", "long")
+ contents = contents.replace("size_t", "long")
+ contents = contents.replace("*", "")
+ contents = contents.replace("&", "")
+ contents = contents.replace("&", "")
+ contents = contents.replace("char", "string")
+ contents = contents.replace("wxChar", "string")
+ contents = contents.replace("wxCoord", "int")
+ contents = contents.replace("<A HREF=\"wx_wxstring.html#wxstring\">wxString</A>", "string")
+
+ return pythonize_text(contents)
+
+def formatMethodProtos(protos):
+ """
+ Remove C++isms in the method prototypes.
+ """
+ for proto in protos:
+ proto[0] = pythonize_args(proto[0])
+ proto[0] = proto[0].strip()
+
+ proto[1] = namespacify_wxClasses(proto[1])
+ for arg in proto[2]:
+ arg[0] = pythonize_args(arg[0])
+ arg[0].strip()
+
+ # for arg names, we should be more careful about what we replace
+ arg[1] = pythonize_text(arg[1])
+ arg[1] = arg[1].replace("*", "")
+ arg[1] = arg[1].replace("&", "")
+
+ return protos
+
+
+
+# functions for getting data from methods
+def getMethodWxPythonOverrides(text, isConstructor=False):
+ overloads_re = wxpython_overloads_re
+ if isConstructor:
+ overloads_re = wxpython_constructors_re
+ overload_regex = re.compile(overloads_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
+ match = overload_regex.search(text, 0)
+ note = ""
+ start = -1
+ end = -1
+ overrides = []
+ if match:
+ def getWxPythonOverridesFromMatch(match):
+ return [namespacify_wxClasses(match.group(1)), pythonize_text(match.group(2))]
+
+ start = match.start()
+ end = match.end()
+ overrides, returntext = findAllMatches(wxpython_overload_re, match.group(1), getWxPythonOverridesFromMatch)
+
+ returntext = text
+
+ if start != -1 and end != -1:
+ #print "note is: " + text[start:end]
+ returntext = text.replace(text[start:end], "")
+
+ return overrides, returntext
+
+def getMethodWxPythonNote(text):
+ python_regex = re.compile(wxpython_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
+ match = python_regex.search(text)
+ start = -1
+ end = -1
+ note = ""
+ if match:
+ start = match.start()
+ end = match.end()
+ note = match.group(1)
+
+ returntext = text
+
+ if start != -1 and end != -1:
+ #print "note is: " + text[start:end]
+ returntext = text.replace(text[start:end], "")
+
+ return note, returntext
+
+def findAllMatches(re_string, text, handler, start=0):
+ """
+ findAllMatches finds matches for a given regex, then runs the handler function
+ on each match, and returns a list of objects, along with a version of the
+ text with the area matches were found stripped.
+ Note the stripping of text is not generally usable yet, it assumes matches
+ are in continuous blocks, which is true of the wx docs.
+ """
+ regex = re.compile(re_string, re.MULTILINE | re.DOTALL | re.IGNORECASE)
+ match = regex.search(text, start)
+ results = []
+
+ startpoint = -1
+ endpoint = -1
+
+ if match:
+ startpoint = match.start()
+
+ while match:
+ start = match.end()
+ results.append(handler(match))
+ endpoint = match.end()
+ match = regex.search(text, start)
+
+ returntext = text
+ if startpoint != -1 and endpoint != -1:
+ returntext = text.replace(text[startpoint:endpoint], "")
+
+ return results, returntext
+
+def getMethodParams(text):
+ paramstart = text.find("<B><FONT COLOR=\"#FF0000\">Parameters</FONT></B><P>")
+ params, returntext = findAllMatches(param_re, text, getMethodParamsFromMatch, paramstart)
+
+ return params, returntext
+
+def getMethodParamsFromMatch(match):
+ return [match.group(1).strip(), pythonize_text(match.group(2)).strip()]
+
+def getPrototypeFromMatch(match):
+ return [match.group(1), match.group(2), getProtoArgs(match.group(3))]
+
+def getProtoArgsFromMatch(match):
+ return [match.group(1), match.group(2)]
+
+
+
+# These methods parse the docs, finding matches and then using the FromMatch
+# functions to parse the data. After that, the results are "Pythonized"
+# by removing C++isms.
+def getMethodProtos(text):
+ protos, returntext = findAllMatches(proto_re, text, getPrototypeFromMatch)
+ return formatMethodProtos(protos), returntext
+
+def getProtoArgs(text):
+ args, returntext = findAllMatches(args_re, text, getProtoArgsFromMatch)
+ return args
+
+def getMethodDesc(text):
+ heading_text = "<B><FONT COLOR=\"#FF0000\">"
+ return_text = text
+ end = text.find(heading_text)
+ if end != -1:
+ return_text = text[0:end]
+
+ return pythonize_text(return_text)
+
+
+def removeWxPerlNotes(text):
+ perl_overload_regex = re.compile(wxperl_overload_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
+ result = perl_overload_regex.sub("", text)
+
+ perl_regex = re.compile(wxperl_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
+ result = perl_regex.sub("", result)
+
+ return result
+
+def removeCPPCode(text):
+ code_regex = re.compile(code_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
+
+ result = code_regex.sub("", text)
+ return result
+
+
+def getMethod(match, parent):
+ name = match.group(1)
+ if name.find("::") != -1:
+ name = name.split("::")[1]
+ name = namespacify_wxClasses(name).strip()
+ start = match.end()
+ protos, remainder = getMethodProtos(match.group(2))
+
+ isConstructor = False
+ #print "name: %s, parent name: %s" % (name, parent.name)
+ if name == parent.name.replace("wx", "wx."):
+ isConstructor = True
+ overrides, remainder = getMethodWxPythonOverrides(remainder, isConstructor)
+
+ note, remainder = getMethodWxPythonNote(remainder)
+ params, remainder = getMethodParams(remainder)
+ desc = getMethodDesc(remainder)
+ method = wxMethod(name, parent, protos, params, desc)
+ method.pythonNote = note
+ method.pythonOverrides = overrides
+ if len(method.pythonOverrides) > 0:
+ print "has overrides!\n\n\n\n"
+ return method
+
+def getClassDerivedFrom(text):
+
+ def getDerivedClassesFromMatch(match):
+ return namespacify_wxClasses(match.group(1))
+
+ derived_classes = []
+ derived_regex = re.compile(derived_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
+ match = derived_regex.search(text)
+ if match:
+ derived_classes, returntext = findAllMatches(derived_class_re, match.group(1), getDerivedClassesFromMatch)
+
+ return derived_classes
+
+def getClassDescription(text):
+
+ def getClassDescriptionFromMatch(match):
+ return match.group(1)
+
+ desc, returntext = findAllMatches(class_desc_re, text, getClassDescriptionFromMatch)
+
+ return pythonize_text(desc[0])
+
+def getClassStyles(text, extraStyles=False):
+ styles_re = win_styles_re
+ if extraStyles:
+ styles_re = win_styles_extra_re
+ styles_regex = re.compile(styles_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
+ match = styles_regex.search(text)
+
+ styles = []
+ if match:
+ def getClassStyleFromMatch(match):
+ return [namespacify_wxClasses(match.group(1)), pythonize_text(match.group(2))]
+
+ styles, remainder = findAllMatches(win_style_re, match.group(1), getClassStyleFromMatch)
+
+ return styles
+
+# Main functions - these drive the process.
+def getClassMethods(doc, parent):
+ contents = open(doc, "rb").read()
+
+ # get rid of some particularly tricky parts before parsing
+ contents = contents.replace("<B>const</B>", "")
+ contents = removeWxPerlNotes(contents)
+ contents = removeCPPCode(contents)
+
+ method_regex = re.compile(method_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
+ match = method_regex.search(contents)
+ start = 0
+ methods = {}
+ while match:
+ start = match.end()
+ newmethod = getMethod(match, parent)
+ basename = parent.name.replace("wx", "")
+ isConstructor = (basename == newmethod.name.replace("wx.", ""))
+ if isConstructor or eval("newmethod.name in dir(wx.%s)" % basename):
+ print "Adding %s.%s" % (parent.name, newmethod.name)
+ methods[newmethod.name] = newmethod
+ match = method_regex.search(contents, start)
+
+ lastmethod_regex = re.compile(lastmethod_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
+ match = lastmethod_regex.search(contents, start)
+ if match:
+ newmethod = getMethod(match, parent)
+ basename = parent.name.replace("wx", "")
+ isConstructor = (basename == newmethod.name.replace("wx.", ""))
+ if isConstructor or eval("newmethod.name in dir(wx.%s)" % basename):
+ print "Adding %s.%s" % (parent.name, newmethod.name)
+ methods[newmethod.name] = newmethod
+
+ for name in methods:
+ if name[0:3] == "Get":
+ propname = name[3:]
+ basename = parent.name.replace("wx", "")
+ if not propname in eval("dir(wx.%s)" % basename):
+ parent.props.append(propname)
+ else:
+ parent.propConflicts.append(parent.name + "." + propname)
+ # get rid of the destructor and operator methods
+ ignore_methods = ["~" + namespacify_wxClasses(parent.name), "operator ==",
+ "operator <<", "operator >>", "operator =",
+ "operator !=", "operator*", "operator++" ]
+ for method in ignore_methods:
+ if method in methods:
+ methods.pop(method)
+
+ return methods
+
+def getClasses(doc):
+ global docspath
+ contents = open(doc, "rb").read()
+ link_regex = re.compile(link_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
+ start = contents.find("<H2>Alphabetical class reference</H2>")
+ result = link_regex.search(contents, start)
+ classes = {}
+ while result:
+ start = result.end()
+ name = result.group(2).strip()
+ classpage = result.group(1).split("#")[0]
+ basename = name.replace("wx", "")
+ if basename in dir(wx):
+ classfile = os.path.join(os.path.dirname(doc), classpage)
+ classtext = open(classfile, "rb").read()
+ derivedClasses = getClassDerivedFrom(classtext)
+ description = getClassDescription(classtext)
+ styles = getClassStyles(classtext)
+ extra_styles = getClassStyles(classtext, extraStyles=True)
+ classes[name] = wxClass(name, description, derivedClasses, styles, extra_styles)
+ classes[name].methods = getClassMethods(classfile, classes[name])
+ result = link_regex.search(contents, start)
+
+ return classes