[wxWidgets.git] / wxPython / docs / bin / docparser / wxhtmlparse.py

import sys, os, string, glob
import re
from docparser.wxclasses import *
import wx


outputdir = "output"

#
# Class REs
#

class_desc_re = """<H2>.*?</H2>(.*?)<B><FONT COLOR="#FF0000">"""
win_styles_re = """<B><FONT COLOR="#FF0000">Window styles</FONT></B><P>(.*?)<B><FONT COLOR="#FF0000">"""
win_styles_extra_re = """<B><FONT COLOR="#FF0000">Extra window styles</FONT></B><P>(.*?)<B><FONT COLOR="#FF0000">"""
win_style_re = """<TR><TD VALIGN=TOP WIDTH=.*?>\s*?<FONT FACE=".*?">\s*?<B>(.*?)</B>\s*?</FONT></TD>\s*?<TD VALIGN=TOP>\s*?<FONT FACE=".*?">(.*?)</FONT></TD></TR>"""
derived_re = """<B><FONT COLOR="#FF0000">Derived from</FONT></B><P>(.*?)<P>"""
derived_class_re = """<A HREF=".*?">(.*?)</A>"""

#
# Method REs
#

# groups - header, description
method_re = "<H3>(.*?)</H3>\s*?<P>(.*?)<HR>"
lastmethod_re = "<H3>(.*?)</H3>\s*?<P>(.*?)\s*?<P>\s*?</FONT>"
headings_re = "<B><FONT COLOR=\"#FF0000\">(.*?)</FONT></B><P>(.*?)"
# groups = param name, param value 
param_re = "<I>(.*?)</I><UL><UL>(.*?)</UL></UL>"
# groups - return type, method name, arguments
proto_re = "<B>(.*?)</B>.*?<B>(.*?)</B>\s*?\((.*?)\)"
# groups - arg type, arg name
args_re = "<B>(.*?)</B>.*?<I>(.*?)</I>"
code_re = "<PRE>(.*?)</PRE>"
link_re = "<A href=\"(.*?)\"><B>(.*?)</B></A><BR>"

#
# wxPython/wxPerl note REs
# 

wx_re = "wx[A-Z]\S+"
wxperl_overload_re = "<B><FONT COLOR=\"#0000C8\">wxPerl note:</FONT></B> In wxPerl there are two methods instead of a single overloaded method:<P>\s*?<UL><UL>(.*?)</UL></UL>"
wxperl_re = "<B><FONT COLOR=\"#0000C8\">wxPerl note:</FONT></B>(.*?)<P>"

wxpython_constructors_re = """<B><FONT COLOR="#0000C8">wxPython note:</FONT></B> Constructors supported by wxPython are:<P>\s*?<UL><UL>(.*?)</UL></UL>"""
wxpython_overload_re = """<TR><TD VALIGN=TOP.*?>\s*?<FONT FACE=".*?">\s*?<B>(.*?)</B>\s*?</FONT></TD>\s*?<TD VALIGN=TOP>\s*?<FONT FACE=".*?">(.*?)</FONT></TD></TR>"""

wxpython_overloads_re = "<B><FONT COLOR=\"#0000C8\">wxPython note:</FONT></B> In place of a single overloaded method name, wxPython\s*?implements the following methods:<P>\s*?<UL><UL>(.*?)</UL></UL>"
wxpython_re = "<B><FONT COLOR=\"#0000C8\">wxPython note:</FONT></B>(.*?)<P>"


# convert wxWhatever to wx.Whatever
def namespacify_wxClasses(contents):
    wx_regex = re.compile(wx_re, re.MULTILINE | re.DOTALL)
    
    result = wx_regex.sub(wxReplaceFunc, contents)
    return result

def wxReplaceFunc(match):
    text = match.group()
    if text.find("wxWidgets") == -1 and text.find("wxPython") == -1 and text.find("wxPerl") == -1:
        text = text.replace("wx", "wx.")
    return text


# Methods to de-C++itize data.
def pythonize_text(contents):
    """
    Remove C++isms that definitely shouldn't be in any text.
    """
    contents = contents.replace("false", "False")
    contents = contents.replace("true", "True")
    contents = contents.replace("non-NULL", "not None")
    contents = contents.replace("NULL", "None")
    contents = contents.replace("const ", "")
    contents = contents.replace("::", ".")
    contents = contents.replace("\r\n", "\n")
    contents = contents.replace("\r", "\n")
    contents = contents.replace("''", "\"")
    return namespacify_wxClasses(contents)

def pythonize_args(contents):
    """
    Remove C++isms from arguments (some of these terms may be used in other
    contexts in actual documentation, so we don't remove them there).
    """
    contents = contents.replace("static", "")
    contents = contents.replace("virtual void", "")
    contents = contents.replace("virtual", "")
    contents = contents.replace("void*", "int")
    contents = contents.replace("void", "")
    
    contents = contents.replace("off_t", "long")
    contents = contents.replace("size_t", "long")
    contents = contents.replace("*", "")
    contents = contents.replace("&amp;", "")
    contents = contents.replace("&", "")
    contents = contents.replace("char", "string") 
    contents = contents.replace("wxChar", "string") 
    contents = contents.replace("wxCoord", "int")
    contents = contents.replace("<A HREF=\"wx_wxstring.html#wxstring\">wxString</A>", "string")
    
    return pythonize_text(contents)
    
def formatMethodProtos(protos):
    """
    Remove C++isms in the method prototypes. 
    """
    for proto in protos:
        proto[0] = pythonize_args(proto[0])
        proto[0] = proto[0].strip()
        
        proto[1] = namespacify_wxClasses(proto[1])
        for arg in proto[2]:
            arg[0] = pythonize_args(arg[0])
            arg[0].strip()
            
            # for arg names, we should be more careful about what we replace
            arg[1] = pythonize_text(arg[1])
            arg[1] = arg[1].replace("*", "")
            arg[1] = arg[1].replace("&", "")
    
    return protos


# functions for getting data from methods 
def getMethodWxPythonOverrides(text, isConstructor=False):
    overloads_re = wxpython_overloads_re
    if isConstructor:
        overloads_re = wxpython_constructors_re
    overload_regex = re.compile(overloads_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
    match = overload_regex.search(text, 0)
    note = ""
    start = -1
    end = -1
    overrides = []
    if match:
        def getWxPythonOverridesFromMatch(match):
            return [namespacify_wxClasses(match.group(1)), pythonize_text(match.group(2))]
            
        start = match.start()
        end = match.end()
        overrides, returntext = findAllMatches(wxpython_overload_re, match.group(1), getWxPythonOverridesFromMatch)
        
    returntext = text
    
    if start != -1 and end != -1:
        #print "note is: " + text[start:end]
        returntext = text.replace(text[start:end], "")
        
    return overrides, returntext

def getMethodWxPythonNote(text):
    python_regex = re.compile(wxpython_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
    match = python_regex.search(text)
    start = -1
    end = -1
    note = ""
    if match:
        start = match.start()
        end = match.end()
        note = match.group(1)
            
    returntext = text
    
    if start != -1 and end != -1:
        #print "note is: " + text[start:end]
        returntext = text.replace(text[start:end], "")
            
    return note, returntext
    
def findAllMatches(re_string, text, handler, start=0):
    """
    findAllMatches finds matches for a given regex, then runs the handler function
    on each match, and returns a list of objects, along with a version of the 
    text with the area matches were found stripped.
    Note the stripping of text is not generally usable yet, it assumes matches
    are in continuous blocks, which is true of the wx docs.
    """
    regex = re.compile(re_string, re.MULTILINE | re.DOTALL | re.IGNORECASE)
    match = regex.search(text, start)
    results = []
    
    startpoint = -1
    endpoint = -1
    
    if match:
        startpoint = match.start()
    
    while match:
        start = match.end()
        results.append(handler(match))
        endpoint = match.end()
        match = regex.search(text, start)
        
    returntext = text
    if startpoint != -1 and endpoint != -1:
        returntext = text.replace(text[startpoint:endpoint], "")

    return results, returntext

def getMethodParams(text):
    paramstart = text.find("<B><FONT COLOR=\"#FF0000\">Parameters</FONT></B><P>")
    params, returntext = findAllMatches(param_re, text, getMethodParamsFromMatch, paramstart)
    
    return params, returntext
    
def getMethodParamsFromMatch(match):
    return [match.group(1).strip(), pythonize_text(match.group(2)).strip()]

def getPrototypeFromMatch(match):
    return [match.group(1), match.group(2), getProtoArgs(match.group(3))]

def getProtoArgsFromMatch(match):
    return [match.group(1), match.group(2)]


# These methods parse the docs, finding matches and then using the FromMatch
# functions to parse the data. After that, the results are "Pythonized"
# by removing C++isms.
def getMethodProtos(text):
    protos, returntext = findAllMatches(proto_re, text, getPrototypeFromMatch)
    return formatMethodProtos(protos), returntext
    
def getProtoArgs(text):
    args, returntext = findAllMatches(args_re, text, getProtoArgsFromMatch)
    return args
    
def getMethodDesc(text):
    heading_text = "<B><FONT COLOR=\"#FF0000\">"
    return_text = text
    end = text.find(heading_text)
    if end != -1:
        return_text = text[0:end]
        
    return pythonize_text(return_text)
    

def removeWxPerlNotes(text):
    perl_overload_regex = re.compile(wxperl_overload_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
    result = perl_overload_regex.sub("", text)
    
    perl_regex = re.compile(wxperl_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
    result = perl_regex.sub("", result)
    
    return result
    
def removeCPPCode(text):
    code_regex = re.compile(code_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
    
    result = code_regex.sub("", text)
    return result


def getMethod(match, parent):
    name = match.group(1)
    if name.find("::") != -1:
        name = name.split("::")[1]
    name = namespacify_wxClasses(name).strip()
    start = match.end()
    protos, remainder = getMethodProtos(match.group(2))
    
    isConstructor = False
    #print "name: %s, parent name: %s" % (name, parent.name)
    if name == parent.name.replace("wx", "wx."):
        isConstructor = True
    overrides, remainder = getMethodWxPythonOverrides(remainder, isConstructor)
    
    note, remainder = getMethodWxPythonNote(remainder)
    params, remainder = getMethodParams(remainder)
    desc = getMethodDesc(remainder)
    method = wxMethod(name, parent, protos, params, desc)
    method.pythonNote = note
    method.pythonOverrides = overrides
    if len(method.pythonOverrides) > 0:
        print "has overrides!\n\n\n\n"
    return method

def getClassDerivedFrom(text):

    def getDerivedClassesFromMatch(match):
        return namespacify_wxClasses(match.group(1))

    derived_classes = []
    derived_regex = re.compile(derived_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
    match = derived_regex.search(text)
    if match:
        derived_classes, returntext = findAllMatches(derived_class_re, match.group(1), getDerivedClassesFromMatch)
        
    return derived_classes
    
def getClassDescription(text):
    
    def getClassDescriptionFromMatch(match):
        return match.group(1)
    
    desc, returntext = findAllMatches(class_desc_re, text, getClassDescriptionFromMatch)
    
    return pythonize_text(desc[0])
    
def getClassStyles(text, extraStyles=False):
    styles_re = win_styles_re
    if extraStyles:
        styles_re = win_styles_extra_re
    styles_regex = re.compile(styles_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
    match = styles_regex.search(text)
    
    styles = []
    if match:
        def getClassStyleFromMatch(match):
            return [namespacify_wxClasses(match.group(1)), pythonize_text(match.group(2))]
            
        styles, remainder = findAllMatches(win_style_re, match.group(1), getClassStyleFromMatch)
        
    return styles

# Main functions - these drive the process.
def getClassMethods(doc, parent):
    contents = open(doc, "rb").read()
    
    # get rid of some particularly tricky parts before parsing
    contents = contents.replace("<B>const</B>", "")
    contents = removeWxPerlNotes(contents)
    contents = removeCPPCode(contents)
    
    method_regex = re.compile(method_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
    match = method_regex.search(contents)
    start = 0
    methods = {}
    while match:
        start = match.end()
        newmethod = getMethod(match, parent)
        basename = parent.name.replace("wx", "")
        isConstructor = (basename == newmethod.name.replace("wx.", ""))
        if isConstructor or eval("newmethod.name in dir(wx.%s)" % basename):
            print "Adding %s.%s" % (parent.name, newmethod.name)
            methods[newmethod.name] = newmethod
        match = method_regex.search(contents, start)
    
    lastmethod_regex = re.compile(lastmethod_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
    match = lastmethod_regex.search(contents, start)
    if match: 
        newmethod = getMethod(match, parent)
        basename = parent.name.replace("wx", "")
        isConstructor = (basename == newmethod.name.replace("wx.", ""))
        if isConstructor or eval("newmethod.name in dir(wx.%s)" % basename):
            print "Adding %s.%s" % (parent.name, newmethod.name)
            methods[newmethod.name] = newmethod
    
    for name in methods:
        if name[0:3] == "Get":
            propname = name[3:]
            basename = parent.name.replace("wx", "")
            if not propname in eval("dir(wx.%s)" % basename):
                parent.props.append(propname)
            else:
                parent.propConflicts.append(parent.name + "." + propname)
    # get rid of the destructor and operator methods
    ignore_methods = ["~" + namespacify_wxClasses(parent.name), "operator ==", 
                        "operator &lt;&lt;", "operator &gt;&gt;", "operator =", 
                        "operator !=", "operator*", "operator++" ]
    for method in ignore_methods:
        if method in methods:
            methods.pop(method)
            
    return methods
        
def getClasses(doc):
    global docspath
    contents = open(doc, "rb").read()
    link_regex = re.compile(link_re, re.MULTILINE | re.DOTALL | re.IGNORECASE)
    start = contents.find("<H2>Alphabetical class reference</H2>")
    result = link_regex.search(contents, start)
    classes = {}
    while result:
        start = result.end()
        name = result.group(2).strip()
        classpage = result.group(1).split("#")[0]
        basename = name.replace("wx", "")
        if basename in dir(wx):
            classfile = os.path.join(os.path.dirname(doc), classpage)
            classtext = open(classfile, "rb").read()
            derivedClasses = getClassDerivedFrom(classtext)
            description = getClassDescription(classtext)
            styles = getClassStyles(classtext)
            extra_styles = getClassStyles(classtext, extraStyles=True)
            classes[name] = wxClass(name, description, derivedClasses, styles, extra_styles)
            classes[name].methods = getClassMethods(classfile, classes[name])
        result = link_regex.search(contents, start)

    return classes
Commit	Line	Data
ce1245e1 RD	1	import sys, os, string, glob
	2	import re
	3	from docparser.wxclasses import *
	4	import wx
	5
	6
	7	outputdir = "output"
	8
	9	#
	10	# Class REs
	11	#
	12
	13	class_desc_re = """<H2>.?</H2>(.?)<B><FONT COLOR="#FF0000">"""
	14	win_styles_re = """<B><FONT COLOR="#FF0000">Window styles</FONT></B><P>(.*?)<B><FONT COLOR="#FF0000">"""
	15	win_styles_extra_re = """<B><FONT COLOR="#FF0000">Extra window styles</FONT></B><P>(.*?)<B><FONT COLOR="#FF0000">"""
	16	win_style_re = """<TR><TD VALIGN=TOP WIDTH=.?>\s?<FONT FACE=".?">\s?<B>(.?)</B>\s?</FONT></TD>\s?<TD VALIGN=TOP>\s?<FONT FACE=".?">(.?)</FONT></TD></TR>"""
	17	derived_re = """<B><FONT COLOR="#FF0000">Derived from</FONT></B><P>(.*?)<P>"""
	18	derived_class_re = """<A HREF=".?">(.?)</A>"""
	19
	20	#
	21	# Method REs
	22	#
	23
	24	# groups - header, description
	25	method_re = "<H3>(.?)</H3>\s?<P>(.*?)<HR>"
	26	lastmethod_re = "<H3>(.?)</H3>\s?<P>(.?)\s?<P>\s*?</FONT>"
	27	headings_re = "<B><FONT COLOR=\"#FF0000\">(.?)</FONT></B><P>(.?)"
	28	# groups = param name, param value
	29	param_re = "<I>(.?)</I><UL><UL>(.?)</UL></UL>"
	30	# groups - return type, method name, arguments
	31	proto_re = "<B>(.?)</B>.?<B>(.?)</B>\s?\((.*?)\)"
	32	# groups - arg type, arg name
	33	args_re = "<B>(.?)</B>.?<I>(.*?)</I>"
	34	code_re = "<PRE>(.*?)</PRE>"
	35	link_re = "<A href=\"(.?)\"><B>(.?)</B></A><BR>"
	36
	37	#
	38	# wxPython/wxPerl note REs
	39	#
	40
	41	wx_re = "wx[A-Z]\S+"
	42	wxperl_overload_re = "<B><FONT COLOR=\"#0000C8\">wxPerl note:</FONT></B> In wxPerl there are two methods instead of a single overloaded method:<P>\s?<UL><UL>(.?)</UL></UL>"
	43	wxperl_re = "<B><FONT COLOR=\"#0000C8\">wxPerl note:</FONT></B>(.*?)<P>"
	44
	45	wxpython_constructors_re = """<B><FONT COLOR="#0000C8">wxPython note:</FONT></B> Constructors supported by wxPython are:<P>\s?<UL><UL>(.?)</UL></UL>"""
	46	wxpython_overload_re = """<TR><TD VALIGN=TOP.?>\s?<FONT FACE=".?">\s?<B>(.?)</B>\s?</FONT></TD>\s?<TD VALIGN=TOP>\s?<FONT FACE=".?">(.?)</FONT></TD></TR>"""
	47
	48	wxpython_overloads_re = "<B><FONT COLOR=\"#0000C8\">wxPython note:</FONT></B> In place of a single overloaded method name, wxPython\s?implements the following methods:<P>\s?<UL><UL>(.*?)</UL></UL>"
	49	wxpython_re = "<B><FONT COLOR=\"#0000C8\">wxPython note:</FONT></B>(.*?)<P>"
	50
	51
	52	# convert wxWhatever to wx.Whatever
	53	def namespacify_wxClasses(contents):
	54	wx_regex = re.compile(wx_re, re.MULTILINE \| re.DOTALL)
	55
	56	result = wx_regex.sub(wxReplaceFunc, contents)
	57	return result
	58
	59	def wxReplaceFunc(match):
	60	text = match.group()
	61	if text.find("wxWidgets") == -1 and text.find("wxPython") == -1 and text.find("wxPerl") == -1:
	62	text = text.replace("wx", "wx.")
	63	return text
	64
65
66
67	# Methods to de-C++itize data.
68	def pythonize_text(contents):
69	"""
70	Remove C++isms that definitely shouldn't be in any text.
71	"""
72	contents = contents.replace("false", "False")
73	contents = contents.replace("true", "True")
74	contents = contents.replace("non-NULL", "not None")
75	contents = contents.replace("NULL", "None")
76	contents = contents.replace("const ", "")
77	contents = contents.replace("::", ".")
78	contents = contents.replace("\r\n", "\n")
79	contents = contents.replace("\r", "\n")
80	contents = contents.replace("''", "\"")
81	return namespacify_wxClasses(contents)
82
83	def pythonize_args(contents):
84	"""
85	Remove C++isms from arguments (some of these terms may be used in other
86	contexts in actual documentation, so we don't remove them there).
87	"""
88	contents = contents.replace("static", "")
89	contents = contents.replace("virtual void", "")
90	contents = contents.replace("virtual", "")
91	contents = contents.replace("void*", "int")
92	contents = contents.replace("void", "")
93
94	contents = contents.replace("off_t", "long")
95	contents = contents.replace("size_t", "long")
96	contents = contents.replace("*", "")
97	contents = contents.replace("&", "")
98	contents = contents.replace("&", "")
99	contents = contents.replace("char", "string")
100	contents = contents.replace("wxChar", "string")
101	contents = contents.replace("wxCoord", "int")
102	contents = contents.replace("<A HREF=\"wx_wxstring.html#wxstring\">wxString</A>", "string")
103
104	return pythonize_text(contents)
105
106	def formatMethodProtos(protos):
107	"""
108	Remove C++isms in the method prototypes.
109	"""
110	for proto in protos:
111	proto[0] = pythonize_args(proto[0])
112	proto[0] = proto[0].strip()
113
114	proto[1] = namespacify_wxClasses(proto[1])
115	for arg in proto[2]:
116	arg[0] = pythonize_args(arg[0])
117	arg[0].strip()
118
119	# for arg names, we should be more careful about what we replace
120	arg[1] = pythonize_text(arg[1])
121	arg[1] = arg[1].replace("*", "")
122	arg[1] = arg[1].replace("&", "")
123
124	return protos
125
126
127
128	# functions for getting data from methods
129	def getMethodWxPythonOverrides(text, isConstructor=False):
130	overloads_re = wxpython_overloads_re
131	if isConstructor:
132	overloads_re = wxpython_constructors_re
133	overload_regex = re.compile(overloads_re, re.MULTILINE \| re.DOTALL \| re.IGNORECASE)
134	match = overload_regex.search(text, 0)
135	note = ""
136	start = -1
137	end = -1
138	overrides = []
139	if match:
140	def getWxPythonOverridesFromMatch(match):
141	return [namespacify_wxClasses(match.group(1)), pythonize_text(match.group(2))]
142
143	start = match.start()
144	end = match.end()
145	overrides, returntext = findAllMatches(wxpython_overload_re, match.group(1), getWxPythonOverridesFromMatch)
146
147	returntext = text
148
149	if start != -1 and end != -1:
150	#print "note is: " + text[start:end]
151	returntext = text.replace(text[start:end], "")
152
153	return overrides, returntext
154
155	def getMethodWxPythonNote(text):
156	python_regex = re.compile(wxpython_re, re.MULTILINE \| re.DOTALL \| re.IGNORECASE)
157	match = python_regex.search(text)
158	start = -1
159	end = -1
160	note = ""
161	if match:
162	start = match.start()
163	end = match.end()
164	note = match.group(1)
165
166	returntext = text
167
168	if start != -1 and end != -1:
169	#print "note is: " + text[start:end]
170	returntext = text.replace(text[start:end], "")
171
172	return note, returntext
173
174	def findAllMatches(re_string, text, handler, start=0):
175	"""
176	findAllMatches finds matches for a given regex, then runs the handler function
177	on each match, and returns a list of objects, along with a version of the
178	text with the area matches were found stripped.
179	Note the stripping of text is not generally usable yet, it assumes matches
180	are in continuous blocks, which is true of the wx docs.
181	"""
182	regex = re.compile(re_string, re.MULTILINE \| re.DOTALL \| re.IGNORECASE)
183	match = regex.search(text, start)
184	results = []
185
186	startpoint = -1
187	endpoint = -1
188
189	if match:
190	startpoint = match.start()
191
192	while match:
193	start = match.end()
194	results.append(handler(match))
195	endpoint = match.end()
196	match = regex.search(text, start)
197
198	returntext = text
199	if startpoint != -1 and endpoint != -1:
200	returntext = text.replace(text[startpoint:endpoint], "")
201
202	return results, returntext
203
204	def getMethodParams(text):
205	paramstart = text.find("<B><FONT COLOR=\"#FF0000\">Parameters</FONT></B><P>")
206	params, returntext = findAllMatches(param_re, text, getMethodParamsFromMatch, paramstart)
207
208	return params, returntext
209
210	def getMethodParamsFromMatch(match):
211	return [match.group(1).strip(), pythonize_text(match.group(2)).strip()]
212
213	def getPrototypeFromMatch(match):
214	return [match.group(1), match.group(2), getProtoArgs(match.group(3))]
215
216	def getProtoArgsFromMatch(match):
217	return [match.group(1), match.group(2)]
218
219
220
221	# These methods parse the docs, finding matches and then using the FromMatch
222	# functions to parse the data. After that, the results are "Pythonized"
223	# by removing C++isms.
224	def getMethodProtos(text):
225	protos, returntext = findAllMatches(proto_re, text, getPrototypeFromMatch)
226	return formatMethodProtos(protos), returntext
227
228	def getProtoArgs(text):
229	args, returntext = findAllMatches(args_re, text, getProtoArgsFromMatch)
230	return args
231
232	def getMethodDesc(text):
233	heading_text = "<B><FONT COLOR=\"#FF0000\">"
234	return_text = text
235	end = text.find(heading_text)
236	if end != -1:
237	return_text = text[0:end]
238
239	return pythonize_text(return_text)
240
241
242	def removeWxPerlNotes(text):
243	perl_overload_regex = re.compile(wxperl_overload_re, re.MULTILINE \| re.DOTALL \| re.IGNORECASE)
244	result = perl_overload_regex.sub("", text)
245
246	perl_regex = re.compile(wxperl_re, re.MULTILINE \| re.DOTALL \| re.IGNORECASE)
247	result = perl_regex.sub("", result)
248
249	return result
250
251	def removeCPPCode(text):
252	code_regex = re.compile(code_re, re.MULTILINE \| re.DOTALL \| re.IGNORECASE)
253
254	result = code_regex.sub("", text)
255	return result
256
257
258	def getMethod(match, parent):
259	name = match.group(1)
260	if name.find("::") != -1:
261	name = name.split("::")[1]
262	name = namespacify_wxClasses(name).strip()
263	start = match.end()
264	protos, remainder = getMethodProtos(match.group(2))
265
266	isConstructor = False
267	#print "name: %s, parent name: %s" % (name, parent.name)
268	if name == parent.name.replace("wx", "wx."):
269	isConstructor = True
270	overrides, remainder = getMethodWxPythonOverrides(remainder, isConstructor)
271
272	note, remainder = getMethodWxPythonNote(remainder)
273	params, remainder = getMethodParams(remainder)
274	desc = getMethodDesc(remainder)
275	method = wxMethod(name, parent, protos, params, desc)
276	method.pythonNote = note
277	method.pythonOverrides = overrides
278	if len(method.pythonOverrides) > 0:
279	print "has overrides!\n\n\n\n"
280	return method
281
282	def getClassDerivedFrom(text):
283
284	def getDerivedClassesFromMatch(match):
285	return namespacify_wxClasses(match.group(1))
286
287	derived_classes = []
288	derived_regex = re.compile(derived_re, re.MULTILINE \| re.DOTALL \| re.IGNORECASE)
289	match = derived_regex.search(text)
290	if match:
291	derived_classes, returntext = findAllMatches(derived_class_re, match.group(1), getDerivedClassesFromMatch)
292
293	return derived_classes
294
295	def getClassDescription(text):
296
297	def getClassDescriptionFromMatch(match):
298	return match.group(1)
299
300	desc, returntext = findAllMatches(class_desc_re, text, getClassDescriptionFromMatch)
301
302	return pythonize_text(desc[0])
303
304	def getClassStyles(text, extraStyles=False):
305	styles_re = win_styles_re
306	if extraStyles:
307	styles_re = win_styles_extra_re
308	styles_regex = re.compile(styles_re, re.MULTILINE \| re.DOTALL \| re.IGNORECASE)
309	match = styles_regex.search(text)
310
311	styles = []
312	if match:
313	def getClassStyleFromMatch(match):
314	return [namespacify_wxClasses(match.group(1)), pythonize_text(match.group(2))]
315
316	styles, remainder = findAllMatches(win_style_re, match.group(1), getClassStyleFromMatch)
317
318	return styles
319
320	# Main functions - these drive the process.
321	def getClassMethods(doc, parent):
322	contents = open(doc, "rb").read()
323
324	# get rid of some particularly tricky parts before parsing
325	contents = contents.replace("<B>const</B>", "")
326	contents = removeWxPerlNotes(contents)
327	contents = removeCPPCode(contents)
328
329	method_regex = re.compile(method_re, re.MULTILINE \| re.DOTALL \| re.IGNORECASE)
330	match = method_regex.search(contents)
331	start = 0
332	methods = {}
333	while match:
334	start = match.end()
335	newmethod = getMethod(match, parent)
336	basename = parent.name.replace("wx", "")
337	isConstructor = (basename == newmethod.name.replace("wx.", ""))
338	if isConstructor or eval("newmethod.name in dir(wx.%s)" % basename):
339	print "Adding %s.%s" % (parent.name, newmethod.name)
340	methods[newmethod.name] = newmethod
341	match = method_regex.search(contents, start)
342
343	lastmethod_regex = re.compile(lastmethod_re, re.MULTILINE \| re.DOTALL \| re.IGNORECASE)
344	match = lastmethod_regex.search(contents, start)
345	if match:
346	newmethod = getMethod(match, parent)
347	basename = parent.name.replace("wx", "")
348	isConstructor = (basename == newmethod.name.replace("wx.", ""))
349	if isConstructor or eval("newmethod.name in dir(wx.%s)" % basename):
350	print "Adding %s.%s" % (parent.name, newmethod.name)
351	methods[newmethod.name] = newmethod
352
353	for name in methods:
354	if name[0:3] == "Get":
355	propname = name[3:]
356	basename = parent.name.replace("wx", "")
357	if not propname in eval("dir(wx.%s)" % basename):
358	parent.props.append(propname)
359	else:
360	parent.propConflicts.append(parent.name + "." + propname)
361	# get rid of the destructor and operator methods
362	ignore_methods = ["~" + namespacify_wxClasses(parent.name), "operator ==",
363	"operator <<", "operator >>", "operator =",
364	"operator !=", "operator*", "operator++" ]
365	for method in ignore_methods:
366	if method in methods:
367	methods.pop(method)
368
369	return methods
370
371	def getClasses(doc):
372	global docspath
373	contents = open(doc, "rb").read()
374	link_regex = re.compile(link_re, re.MULTILINE \| re.DOTALL \| re.IGNORECASE)
375	start = contents.find("<H2>Alphabetical class reference</H2>")
376	result = link_regex.search(contents, start)
377	classes = {}
378	while result:
379	start = result.end()
380	name = result.group(2).strip()
381	classpage = result.group(1).split("#")[0]
382	basename = name.replace("wx", "")
383	if basename in dir(wx):
384	classfile = os.path.join(os.path.dirname(doc), classpage)
385	classtext = open(classfile, "rb").read()
386	derivedClasses = getClassDerivedFrom(classtext)
387	description = getClassDescription(classtext)
388	styles = getClassStyles(classtext)
389	extra_styles = getClassStyles(classtext, extraStyles=True)
390	classes[name] = wxClass(name, description, derivedClasses, styles, extra_styles)
391	classes[name].methods = getClassMethods(classfile, classes[name])
392	result = link_regex.search(contents, start)
393
394	return classes