From ce1245e1fa3d59428e5f3c9bb78d77bcfe9e586c Mon Sep 17 00:00:00 2001 From: Robin Dunn Date: Sat, 9 Sep 2006 19:42:51 +0000 Subject: [PATCH] It's not in use yet, but add Kevin's docparser code so it doesn't get lost... git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@41113 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775 --- wxPython/docs/bin/docparser/__init__.py | 0 wxPython/docs/bin/docparser/restconvert.py | 32 ++ wxPython/docs/bin/docparser/wxclasses.py | 173 +++++++++ wxPython/docs/bin/docparser/wxhtmlparse.py | 394 +++++++++++++++++++++ wxPython/docs/bin/pythonize_docs.py | 119 +++++++ 5 files changed, 718 insertions(+) create mode 100644 wxPython/docs/bin/docparser/__init__.py create mode 100644 wxPython/docs/bin/docparser/restconvert.py create mode 100644 wxPython/docs/bin/docparser/wxclasses.py create mode 100644 wxPython/docs/bin/docparser/wxhtmlparse.py create mode 100644 wxPython/docs/bin/pythonize_docs.py diff --git a/wxPython/docs/bin/docparser/__init__.py b/wxPython/docs/bin/docparser/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/wxPython/docs/bin/docparser/restconvert.py b/wxPython/docs/bin/docparser/restconvert.py new file mode 100644 index 0000000000..e3f98d99c9 --- /dev/null +++ b/wxPython/docs/bin/docparser/restconvert.py @@ -0,0 +1,32 @@ +import re + +conversion_table = { + "B" : "**", + "I" : "*", + "TT": "``", + "P" : "\n", + "BR": "\n", + } + +html_classlink_re = "(.*?)" + +def htmlToReST(html): + # \n is useless in the HTML docs, we'll use P tags to break paragraphs. + restText = html.replace("\n", "") + restText = restText.replace("*", "\\*") + if restText.find("

") == 0: + restText = restText[3:] + + link_regex = re.compile(html_classlink_re, re.DOTALL | re.MULTILINE | re.IGNORECASE) + restText = link_regex.sub("`\g<1>`", restText) + + + for htmltag in conversion_table: + + for tagname in [htmltag, htmltag.lower()]: + restText = restText.replace("<%s>" % tagname, conversion_table[htmltag]) + restText = restText.replace("" % tagname, conversion_table[htmltag]) + + # we need to escape any remaining double-quotes + restText = restText.replace('"', '\\"') + return restText.strip() \ No newline at end of file diff --git a/wxPython/docs/bin/docparser/wxclasses.py b/wxPython/docs/bin/docparser/wxclasses.py new file mode 100644 index 0000000000..db10e80076 --- /dev/null +++ b/wxPython/docs/bin/docparser/wxclasses.py @@ -0,0 +1,173 @@ +import wx +import restconvert +html_heading = "

%s

" + +def stylesAsHtml(styles, extraStyles=False): + heading = "Window styles" + if extraStyles: + heading = "Extra window styles" + + html = html_heading % heading + html += """""" + for style in styles: + html += "" % (style[0], style[1]) + + html += "
%s%s
" + + return html + +class wxClass: + def __init__(self, name, description="", derivedFrom=[], styles=[], extrastyles=[]): + self.name = name + self.description = description + self.derivedFrom = derivedFrom + self.styles = styles + self.extrastyles = extrastyles + self.methods = {} + self.propConflicts = [] + self.props = [] + + def asHtml(self): + html = "

%s

" % self.name + html += self.description + if len(self.derivedFrom) > 0: + html += html_heading % "Derived from" + for der in self.derivedFrom: + derurl = der.replace("wx.", "wx").lower() + html += "%s
" % (derurl, der) + + if len(self.styles) > 0: + html += stylesAsHtml(self.styles) + + if len(self.extrastyles) > 0: + html += stylesAsHtml(self.extrastyles, extraStyles=True) + + return html + + def asReST(self): + restText = "DocStr(%s,\n" % (self.name) + + restText += ");" + return restText + + def createProps(self): + propsText = "" + propList = self.props + for conflict in self.propConflicts: + if conflict in propList: + propList.remove(conflict) + + basename = self.name.replace("wx", "") + for prop in propList: + if prop != "": + propname = prop + if propname[0] == "3": + propname = "Three" + propname[1:] + + getter = "wx.%s.Get%s" % (basename, prop) + setter = "wx.%s.Set%s" % (basename, prop) + propsText += "wx.%s.%s = property(%s" % (basename, propname, getter) + hasSetter = eval("(\"%s\" in dir(wx.%s))" % ("Set" + prop, basename)) + if hasSetter: + propsText += ", %s" % setter + propsText += ")\n" + + if propsText != "": + propsText += "\n\n" + + return propsText + +class wxMethod: + def __init__(self, name, parent, prototypes=[], params={}, description="", remarks=""): + self.name = name + self.parent = parent + self.prototypes = prototypes + self.params = params + self.description = description + self.remarks = remarks + self.pythonNote = "" + self.pythonOverrides = [] + + def asReST(self): + restText = "" + + # The below code converts prototypes into ReST, but currently isn't + # needed. Left here in case we change approach later. + + #for proto in self.prototypes: + # restText += proto[1] + "(" + # counter = 1 + # for arg in proto[2]: + # restText += "%s %s" % (arg[0].replace("wx.", ""), arg[1]) + # if counter < len(proto[2]): + # restText += ", " + # counter += 1 + # if proto[0] != "": + # restText += "-> " + proto[0] + # restText += "\n" + #restText += "\n" + + if len(self.params) > 0: + + for param in self.params: + restText += "\n:param %s: %s" % (param[0], restconvert.htmlToReST(param[1])) + restText += "\n\n" + + restText += restconvert.htmlToReST(self.description.strip()) + return restText + + def asHtml(self): + anchorname = self.getAnchorName() + retval = "" % (anchorname) + retval += "

%s

" % self.name + if len(self.pythonOverrides) > 0: + for myfunc in self.pythonOverrides: + retval += "

%s
%s

" % (myfunc[0], myfunc[1]) + else: + for proto in self.prototypes: + retval += "

" + if proto[0] != "": + retval += proto[0] + " " + retval += proto[1] + "(" + counter = 1 + for arg in proto[2]: + retval += "%s %s" % (arg[0], arg[1]) + if counter < len(proto[2]): + retval += ", " + counter += 1 + retval += ")

" + + if len(self.params) > 0: + retval += "" + for param in self.params: + retval += "" % (param[0], param[1]) + retval += "
%s%s
" + + retval += "

%s

" % self.description + + if self.remarks != "": + retval += "Remarks

%s

" % self.remarks + + return retval + + def getAnchorName(self): + anchorname = self.parent.name.lower() + self.name.lower() + if self.parent.name == self.name: + anchorname = self.name.lower() + + return anchorname + + def asString(self): + retval = "method: " + self.name + retval += "\n\nprototypes: " + for proto in self.prototypes: + retval += "\t%s" % `proto` + retval += "\n\nparams: " + for param in self.params: + retval += "%s: %s" % (param, self.params[param]) + + retval += "\n\ndescription: \n" + self.description + + retval += "remarks: \n" + self.remarks + + return retval diff --git a/wxPython/docs/bin/docparser/wxhtmlparse.py b/wxPython/docs/bin/docparser/wxhtmlparse.py new file mode 100644 index 0000000000..dc3ff81bb5 --- /dev/null +++ b/wxPython/docs/bin/docparser/wxhtmlparse.py @@ -0,0 +1,394 @@ +import sys, os, string, glob +import re +from docparser.wxclasses import * +import wx + + +outputdir = "output" + +# +# Class REs +# + +class_desc_re = """

.*?

(.*?)""" +win_styles_re = """Window styles

(.*?)""" +win_styles_extra_re = """Extra window styles

(.*?)""" +win_style_re = """\s*?\s*?(.*?)\s*?\s*?\s*?(.*?)""" +derived_re = """Derived from

(.*?)

""" +derived_class_re = """(.*?)""" + +# +# Method REs +# + +# groups - header, description +method_re = "

(.*?)

\s*?

(.*?)


" +lastmethod_re = "

(.*?)

\s*?

(.*?)\s*?

\s*?" +headings_re = "(.*?)

(.*?)" +# groups = param name, param value +param_re = "(.*?)

      (.*?)
" +# groups - return type, method name, arguments +proto_re = "(.*?).*?(.*?)\s*?\((.*?)\)" +# groups - arg type, arg name +args_re = "(.*?).*?(.*?)" +code_re = "
(.*?)
" +link_re = "(.*?)
" + +# +# wxPython/wxPerl note REs +# + +wx_re = "wx[A-Z]\S+" +wxperl_overload_re = "wxPerl note: In wxPerl there are two methods instead of a single overloaded method:

\s*?

      (.*?)
" +wxperl_re = "wxPerl note:(.*?)

" + +wxpython_constructors_re = """wxPython note: Constructors supported by wxPython are:

\s*?

      (.*?)
""" +wxpython_overload_re = """\s*?\s*?(.*?)\s*?\s*?\s*?(.*?)""" + +wxpython_overloads_re = "wxPython note: In place of a single overloaded method name, wxPython\s*?implements the following methods:

\s*?

      (.*?)
" +wxpython_re = "wxPython note:(.*?)

" + + +# convert wxWhatever to wx.Whatever +def namespacify_wxClasses(contents): + wx_regex = re.compile(wx_re, re.MULTILINE | re.DOTALL) + + result = wx_regex.sub(wxReplaceFunc, contents) + return result + +def wxReplaceFunc(match): + text = match.group() + if text.find("wxWidgets") == -1 and text.find("wxPython") == -1 and text.find("wxPerl") == -1: + text = text.replace("wx", "wx.") + return text + + + +# Methods to de-C++itize data. +def pythonize_text(contents): + """ + Remove C++isms that definitely shouldn't be in any text. + """ + contents = contents.replace("false", "False") + contents = contents.replace("true", "True") + contents = contents.replace("non-NULL", "not None") + contents = contents.replace("NULL", "None") + contents = contents.replace("const ", "") + contents = contents.replace("::", ".") + contents = contents.replace("\r\n", "\n") + contents = contents.replace("\r", "\n") + contents = contents.replace("''", "\"") + return namespacify_wxClasses(contents) + +def pythonize_args(contents): + """ + Remove C++isms from arguments (some of these terms may be used in other + contexts in actual documentation, so we don't remove them there). + """ + contents = contents.replace("static", "") + contents = contents.replace("virtual void", "") + contents = contents.replace("virtual", "") + contents = contents.replace("void*", "int") + contents = contents.replace("void", "") + + contents = contents.replace("off_t", "long") + contents = contents.replace("size_t", "long") + contents = contents.replace("*", "") + contents = contents.replace("&", "") + contents = contents.replace("&", "") + contents = contents.replace("char", "string") + contents = contents.replace("wxChar", "string") + contents = contents.replace("wxCoord", "int") + contents = contents.replace("wxString", "string") + + return pythonize_text(contents) + +def formatMethodProtos(protos): + """ + Remove C++isms in the method prototypes. + """ + for proto in protos: + proto[0] = pythonize_args(proto[0]) + proto[0] = proto[0].strip() + + proto[1] = namespacify_wxClasses(proto[1]) + for arg in proto[2]: + arg[0] = pythonize_args(arg[0]) + arg[0].strip() + + # for arg names, we should be more careful about what we replace + arg[1] = pythonize_text(arg[1]) + arg[1] = arg[1].replace("*", "") + arg[1] = arg[1].replace("&", "") + + return protos + + + +# functions for getting data from methods +def getMethodWxPythonOverrides(text, isConstructor=False): + overloads_re = wxpython_overloads_re + if isConstructor: + overloads_re = wxpython_constructors_re + overload_regex = re.compile(overloads_re, re.MULTILINE | re.DOTALL | re.IGNORECASE) + match = overload_regex.search(text, 0) + note = "" + start = -1 + end = -1 + overrides = [] + if match: + def getWxPythonOverridesFromMatch(match): + return [namespacify_wxClasses(match.group(1)), pythonize_text(match.group(2))] + + start = match.start() + end = match.end() + overrides, returntext = findAllMatches(wxpython_overload_re, match.group(1), getWxPythonOverridesFromMatch) + + returntext = text + + if start != -1 and end != -1: + #print "note is: " + text[start:end] + returntext = text.replace(text[start:end], "") + + return overrides, returntext + +def getMethodWxPythonNote(text): + python_regex = re.compile(wxpython_re, re.MULTILINE | re.DOTALL | re.IGNORECASE) + match = python_regex.search(text) + start = -1 + end = -1 + note = "" + if match: + start = match.start() + end = match.end() + note = match.group(1) + + returntext = text + + if start != -1 and end != -1: + #print "note is: " + text[start:end] + returntext = text.replace(text[start:end], "") + + return note, returntext + +def findAllMatches(re_string, text, handler, start=0): + """ + findAllMatches finds matches for a given regex, then runs the handler function + on each match, and returns a list of objects, along with a version of the + text with the area matches were found stripped. + Note the stripping of text is not generally usable yet, it assumes matches + are in continuous blocks, which is true of the wx docs. + """ + regex = re.compile(re_string, re.MULTILINE | re.DOTALL | re.IGNORECASE) + match = regex.search(text, start) + results = [] + + startpoint = -1 + endpoint = -1 + + if match: + startpoint = match.start() + + while match: + start = match.end() + results.append(handler(match)) + endpoint = match.end() + match = regex.search(text, start) + + returntext = text + if startpoint != -1 and endpoint != -1: + returntext = text.replace(text[startpoint:endpoint], "") + + return results, returntext + +def getMethodParams(text): + paramstart = text.find("Parameters

") + params, returntext = findAllMatches(param_re, text, getMethodParamsFromMatch, paramstart) + + return params, returntext + +def getMethodParamsFromMatch(match): + return [match.group(1).strip(), pythonize_text(match.group(2)).strip()] + +def getPrototypeFromMatch(match): + return [match.group(1), match.group(2), getProtoArgs(match.group(3))] + +def getProtoArgsFromMatch(match): + return [match.group(1), match.group(2)] + + + +# These methods parse the docs, finding matches and then using the FromMatch +# functions to parse the data. After that, the results are "Pythonized" +# by removing C++isms. +def getMethodProtos(text): + protos, returntext = findAllMatches(proto_re, text, getPrototypeFromMatch) + return formatMethodProtos(protos), returntext + +def getProtoArgs(text): + args, returntext = findAllMatches(args_re, text, getProtoArgsFromMatch) + return args + +def getMethodDesc(text): + heading_text = "" + return_text = text + end = text.find(heading_text) + if end != -1: + return_text = text[0:end] + + return pythonize_text(return_text) + + +def removeWxPerlNotes(text): + perl_overload_regex = re.compile(wxperl_overload_re, re.MULTILINE | re.DOTALL | re.IGNORECASE) + result = perl_overload_regex.sub("", text) + + perl_regex = re.compile(wxperl_re, re.MULTILINE | re.DOTALL | re.IGNORECASE) + result = perl_regex.sub("", result) + + return result + +def removeCPPCode(text): + code_regex = re.compile(code_re, re.MULTILINE | re.DOTALL | re.IGNORECASE) + + result = code_regex.sub("", text) + return result + + +def getMethod(match, parent): + name = match.group(1) + if name.find("::") != -1: + name = name.split("::")[1] + name = namespacify_wxClasses(name).strip() + start = match.end() + protos, remainder = getMethodProtos(match.group(2)) + + isConstructor = False + #print "name: %s, parent name: %s" % (name, parent.name) + if name == parent.name.replace("wx", "wx."): + isConstructor = True + overrides, remainder = getMethodWxPythonOverrides(remainder, isConstructor) + + note, remainder = getMethodWxPythonNote(remainder) + params, remainder = getMethodParams(remainder) + desc = getMethodDesc(remainder) + method = wxMethod(name, parent, protos, params, desc) + method.pythonNote = note + method.pythonOverrides = overrides + if len(method.pythonOverrides) > 0: + print "has overrides!\n\n\n\n" + return method + +def getClassDerivedFrom(text): + + def getDerivedClassesFromMatch(match): + return namespacify_wxClasses(match.group(1)) + + derived_classes = [] + derived_regex = re.compile(derived_re, re.MULTILINE | re.DOTALL | re.IGNORECASE) + match = derived_regex.search(text) + if match: + derived_classes, returntext = findAllMatches(derived_class_re, match.group(1), getDerivedClassesFromMatch) + + return derived_classes + +def getClassDescription(text): + + def getClassDescriptionFromMatch(match): + return match.group(1) + + desc, returntext = findAllMatches(class_desc_re, text, getClassDescriptionFromMatch) + + return pythonize_text(desc[0]) + +def getClassStyles(text, extraStyles=False): + styles_re = win_styles_re + if extraStyles: + styles_re = win_styles_extra_re + styles_regex = re.compile(styles_re, re.MULTILINE | re.DOTALL | re.IGNORECASE) + match = styles_regex.search(text) + + styles = [] + if match: + def getClassStyleFromMatch(match): + return [namespacify_wxClasses(match.group(1)), pythonize_text(match.group(2))] + + styles, remainder = findAllMatches(win_style_re, match.group(1), getClassStyleFromMatch) + + return styles + +# Main functions - these drive the process. +def getClassMethods(doc, parent): + contents = open(doc, "rb").read() + + # get rid of some particularly tricky parts before parsing + contents = contents.replace("const", "") + contents = removeWxPerlNotes(contents) + contents = removeCPPCode(contents) + + method_regex = re.compile(method_re, re.MULTILINE | re.DOTALL | re.IGNORECASE) + match = method_regex.search(contents) + start = 0 + methods = {} + while match: + start = match.end() + newmethod = getMethod(match, parent) + basename = parent.name.replace("wx", "") + isConstructor = (basename == newmethod.name.replace("wx.", "")) + if isConstructor or eval("newmethod.name in dir(wx.%s)" % basename): + print "Adding %s.%s" % (parent.name, newmethod.name) + methods[newmethod.name] = newmethod + match = method_regex.search(contents, start) + + lastmethod_regex = re.compile(lastmethod_re, re.MULTILINE | re.DOTALL | re.IGNORECASE) + match = lastmethod_regex.search(contents, start) + if match: + newmethod = getMethod(match, parent) + basename = parent.name.replace("wx", "") + isConstructor = (basename == newmethod.name.replace("wx.", "")) + if isConstructor or eval("newmethod.name in dir(wx.%s)" % basename): + print "Adding %s.%s" % (parent.name, newmethod.name) + methods[newmethod.name] = newmethod + + for name in methods: + if name[0:3] == "Get": + propname = name[3:] + basename = parent.name.replace("wx", "") + if not propname in eval("dir(wx.%s)" % basename): + parent.props.append(propname) + else: + parent.propConflicts.append(parent.name + "." + propname) + # get rid of the destructor and operator methods + ignore_methods = ["~" + namespacify_wxClasses(parent.name), "operator ==", + "operator <<", "operator >>", "operator =", + "operator !=", "operator*", "operator++" ] + for method in ignore_methods: + if method in methods: + methods.pop(method) + + return methods + +def getClasses(doc): + global docspath + contents = open(doc, "rb").read() + link_regex = re.compile(link_re, re.MULTILINE | re.DOTALL | re.IGNORECASE) + start = contents.find("

Alphabetical class reference

") + result = link_regex.search(contents, start) + classes = {} + while result: + start = result.end() + name = result.group(2).strip() + classpage = result.group(1).split("#")[0] + basename = name.replace("wx", "") + if basename in dir(wx): + classfile = os.path.join(os.path.dirname(doc), classpage) + classtext = open(classfile, "rb").read() + derivedClasses = getClassDerivedFrom(classtext) + description = getClassDescription(classtext) + styles = getClassStyles(classtext) + extra_styles = getClassStyles(classtext, extraStyles=True) + classes[name] = wxClass(name, description, derivedClasses, styles, extra_styles) + classes[name].methods = getClassMethods(classfile, classes[name]) + result = link_regex.search(contents, start) + + return classes diff --git a/wxPython/docs/bin/pythonize_docs.py b/wxPython/docs/bin/pythonize_docs.py new file mode 100644 index 0000000000..52259bcd06 --- /dev/null +++ b/wxPython/docs/bin/pythonize_docs.py @@ -0,0 +1,119 @@ +import sys, os, string, glob +import re +from docparser.wxclasses import * +from docparser.wxhtmlparse import * +import wx + +# HTML macros +html_heading = "

%s

" + +def classToHTML(name, thisclass): + global outdir, classes + page = open(os.path.join(outdir, "wx_" + name.lower() + ".html"), "w") + classname = namespacify_wxClasses(name) + page.write(thisclass.asHtml()) + page.write("
" + html_heading % "Methods") + + methods = thisclass.methods + if len(thisclass.derivedFrom) > 0: + for parentclass in thisclass.derivedFrom: + classname = parentclass.replace("wx.", "wx") + if classname in classes.keys(): + derivedmethods = classes[classname].methods + if parentclass in derivedmethods: + derivedmethods.pop(parentclass) + methods.update(derivedmethods) + + methodnames = sortMethods(classname, methods.keys()) + + for method in methodnames: + page.write("%s
" % (methods[method].getAnchorName(), method)) + + page.write("
") + + for method in methodnames: + page.write(methods[method].asHtml()) + page.write("
") + page.close() + +def sortMethods(classname, methodnames): + names = methodnames + names.sort() + # bump the constructor to the top of the list. + if classname in names: + names.remove(classname) + names.insert(0, classname) + + return names + +def makeDocString(name, docstring, longdocs=""): + myname = name.replace("wx.", "wx") + return "DocStr(%s, \"%s\", \"%s\");\n\n" % (myname, docstring, longdocs) + +def classToReST(name, thisclass): + global restdir + page = open(os.path.join(restdir, "_" + name + "_docstrings.i"), "w") + page.write(makeDocString(thisclass.name, thisclass.description)) + + classname = namespacify_wxClasses(name) + methodnames = sortMethods(classname, thisclass.methods.keys()) + + for method in methodnames: + docstr = makeDocString(name + "::" + method.replace("wx.", "wx"), thisclass.methods[method].asReST()) + page.write(docstr) + + page.close() + + +docspath = sys.argv[1] +if not os.path.isdir(docspath): + # get default directory + print "Please specify the directory where docs are located." + +outdir = os.path.join(docspath, outputdir) +if not os.path.exists(outdir): + os.makedirs(outdir) + +restdir = os.path.join(docspath, "docstrings") +if not os.path.exists(restdir): + os.makedirs(restdir) + +classes_page = os.path.join(docspath, "wx_classref.html") +print "docspath: %s" % (classes_page) +if os.path.exists(classes_page): + + # first, add namespace conventions to classes page. + output = open(os.path.join(outdir, os.path.basename(classes_page)), "w") + output.write("") + + propsfile = open(os.path.join(outdir, "props.py"), "w") + propsfile.write("import wx\n\n") + + # now, change the classes. + print "parsing wx HTML docs..." + classes = getClasses(classes_page) + names = classes.keys() + names.sort() + propConflicts = [] + for name in names: + basename = name.replace("wx", "") + urlname = "wx_%s.html" % name.lower() + output.write("%s
" % (urlname, basename)) + print "creating HTML docs for " + name + classToHTML(name, classes[name]) + print "creating rest docs for " + name + classToReST(name, classes[name]) + propsfile.write(classes[name].createProps()) + + propsfile.close() + output.write("") + output.close() + + print "prop conflicts: " + `propConflicts` + +#for doc in glob.glob(os.path.join(docspath, "wx_*.html")): +# print "doc is: %s" % (doc) +# pythonize_doc(doc) + + + \ No newline at end of file -- 2.45.2