]> git.saurik.com Git - wxWidgets.git/blob - src/stc/scintilla/src/LexGen.py
571332e6e38d73bbdca6ebb16b8e46449aac82f5
[wxWidgets.git] / src / stc / scintilla / src / LexGen.py
1 # LexGen.py - implemented 2002 by Neil Hodgson neilh@scintilla.org
2 # Released to the public domain.
3
4 # Regenerate the Scintilla and SciTE source files that list
5 # all the lexers and all the properties files.
6 # Should be run whenever a new lexer is added or removed.
7 # Requires Python 2.4 or later
8 # Most files are regenerated in place with templates stored in comments.
9 # The VS .NET project file is generated into a different file as the
10 # VS .NET environment will not retain comments when modifying the file.
11 # The files are copied to a string apart from sections between a
12 # ++Autogenerated comment and a --Autogenerated comment which is
13 # generated by the CopyWithInsertion function. After the whole
14 # string is instantiated, it is compared with the target file and
15 # if different the file is rewritten.
16 # Does not regenerate the Visual C++ 6 project files but does the VS .NET
17 # project file.
18
19 import string
20 import sys
21 import os
22 import glob
23
24 # EOL constants
25 CR = "\r"
26 LF = "\n"
27 CRLF = "\r\n"
28 if sys.platform == "win32":
29 NATIVE = CRLF
30 else:
31 # Yes, LF is the native EOL even on Mac OS X. CR is just for
32 # Mac OS <=9 (a.k.a. "Mac Classic")
33 NATIVE = LF
34
35 # Automatically generated sections contain start and end comments,
36 # a definition line and the results.
37 # The results are replaced by regenerating based on the definition line.
38 # The definition line is a comment prefix followed by "**".
39 # If there is a digit after the ** then this indicates which list to use
40 # and the digit and next character are not part of the definition
41 # Backslash is used as an escape within the definition line.
42 # The part between \( and \) is repeated for each item in the list.
43 # \* is replaced by each list item. \t, and \n are tab and newline.
44 def CopyWithInsertion(input, commentPrefix, retainDefs, eolType, *lists):
45 copying = 1
46 listid = 0
47 output = []
48 for line in input.splitlines(0):
49 isStartGenerated = line.startswith(commentPrefix + "++Autogenerated")
50 if copying and not isStartGenerated:
51 output.append(line)
52 if isStartGenerated:
53 if retainDefs:
54 output.append(line)
55 copying = 0
56 definition = ""
57 elif not copying and line.startswith(commentPrefix + "**"):
58 if retainDefs:
59 output.append(line)
60 definition = line[len(commentPrefix + "**"):]
61 if (commentPrefix == "<!--") and (" -->" in definition):
62 definition = definition.replace(" -->", "")
63 listid = 0
64 if definition[0] in string.digits:
65 listid = int(definition[:1])
66 definition = definition[2:]
67 # Hide double slashes as a control character
68 definition = definition.replace("\\\\", "\001")
69 # Do some normal C style transforms
70 definition = definition.replace("\\n", "\n")
71 definition = definition.replace("\\t", "\t")
72 # Get the doubled backslashes back as single backslashes
73 definition = definition.replace("\001", "\\")
74 startRepeat = definition.find("\\(")
75 endRepeat = definition.find("\\)")
76 intro = definition[:startRepeat]
77 out = ""
78 if intro.endswith("\n"):
79 pos = 0
80 else:
81 pos = len(intro)
82 out += intro
83 middle = definition[startRepeat+2:endRepeat]
84 for i in lists[listid]:
85 item = middle.replace("\\*", i)
86 if pos and (pos + len(item) >= 80):
87 out += "\\\n"
88 pos = 0
89 out += item
90 pos += len(item)
91 if item.endswith("\n"):
92 pos = 0
93 outro = definition[endRepeat+2:]
94 out += outro
95 out = out.replace("\n", eolType) # correct EOLs in generated content
96 output.append(out)
97 elif line.startswith(commentPrefix + "--Autogenerated"):
98 copying = 1
99 if retainDefs:
100 output.append(line)
101 output = [line.rstrip(" \t") for line in output] # trim trailing whitespace
102 return eolType.join(output) + eolType
103
104 def UpdateFile(filename, updated):
105 """ If the file is different to updated then copy updated
106 into the file else leave alone so CVS and make don't treat
107 it as modified. """
108 try:
109 infile = open(filename, "rb")
110 except IOError: # File is not there yet
111 out = open(filename, "wb")
112 out.write(updated.encode('utf-8'))
113 out.close()
114 print("New %s" % filename)
115 return
116 original = infile.read()
117 infile.close()
118 original = original.decode('utf-8')
119 if updated != original:
120 os.unlink(filename)
121 out = open(filename, "wb")
122 out.write(updated.encode('utf-8'))
123 out.close()
124 print("Changed %s " % filename)
125 #~ else:
126 #~ print "Unchanged", filename
127
128 def Generate(inpath, outpath, commentPrefix, eolType, *lists):
129 """Generate 'outpath' from 'inpath'.
130
131 "eolType" indicates the type of EOLs to use in the generated
132 file. It should be one of following constants: LF, CRLF,
133 CR, or NATIVE.
134 """
135 #print "generate '%s' -> '%s' (comment prefix: %r, eols: %r)"\
136 # % (inpath, outpath, commentPrefix, eolType)
137 try:
138 infile = open(inpath, "rb")
139 except IOError:
140 print("Can not open %s" % inpath)
141 return
142 original = infile.read()
143 infile.close()
144 original = original.decode('utf-8')
145 updated = CopyWithInsertion(original, commentPrefix,
146 inpath == outpath, eolType, *lists)
147 UpdateFile(outpath, updated)
148
149 def Regenerate(filename, commentPrefix, eolType, *lists):
150 """Regenerate the given file.
151
152 "eolType" indicates the type of EOLs to use in the generated
153 file. It should be one of following constants: LF, CRLF,
154 CR, or NATIVE.
155 """
156 Generate(filename, filename, commentPrefix, eolType, *lists)
157
158 def FindModules(lexFile):
159 modules = []
160 f = open(lexFile)
161 for l in f.readlines():
162 if l.startswith("LexerModule"):
163 l = l.replace("(", " ")
164 modules.append(l.split()[1])
165 return modules
166
167 knownIrregularProperties = [
168 "fold",
169 "styling.within.preprocessor",
170 "tab.timmy.whinge.level",
171 "asp.default.language",
172 "html.tags.case.sensitive",
173 "ps.level",
174 "ps.tokenize",
175 "sql.backslash.escapes",
176 "nsis.uservars",
177 "nsis.ignorecase"
178 ]
179
180 def FindProperties(lexFile):
181 properties = {}
182 f = open(lexFile)
183 for l in f.readlines():
184 if "GetProperty" in l:
185 l = l.strip()
186 if not l.startswith("//"): # Drop comments
187 propertyName = l.split("\"")[1]
188 if propertyName.lower() == propertyName:
189 # Only allow lower case property names
190 if propertyName in knownIrregularProperties or \
191 propertyName.startswith("fold.") or \
192 propertyName.startswith("lexer."):
193 properties[propertyName] = 1
194 return properties
195
196 def FindPropertyDocumentation(lexFile):
197 documents = {}
198 f = open(lexFile)
199 name = ""
200 for l in f.readlines():
201 l = l.strip()
202 if "// property " in l:
203 propertyName = l.split()[2]
204 if propertyName.lower() == propertyName:
205 # Only allow lower case property names
206 name = propertyName
207 documents[name] = ""
208 elif name:
209 if l.startswith("//"):
210 if documents[name]:
211 documents[name] += " "
212 documents[name] += l[2:].strip()
213 else:
214 name = ""
215 return documents
216
217 def ciCompare(a,b):
218 return cmp(a.lower(), b.lower())
219
220 def ciKey(a):
221 return a.lower()
222
223 def sortListInsensitive(l):
224 try: # Try key function
225 l.sort(key=ciKey)
226 except TypeError: # Earlier version of Python, so use comparison function
227 l.sort(ciCompare)
228
229 def RegenerateAll():
230 root="../../"
231
232 # Find all the lexer source code files
233 lexFilePaths = glob.glob(root + "scintilla/src/Lex*.cxx")
234 sortListInsensitive(lexFilePaths)
235 lexFiles = [os.path.basename(f)[:-4] for f in lexFilePaths]
236 print(lexFiles)
237 lexerModules = []
238 lexerProperties = {}
239 propertyDocuments = {}
240 for lexFile in lexFilePaths:
241 lexerModules.extend(FindModules(lexFile))
242 for k in FindProperties(lexFile).keys():
243 lexerProperties[k] = 1
244 documents = FindPropertyDocumentation(lexFile)
245 for k in documents.keys():
246 propertyDocuments[k] = documents[k]
247 sortListInsensitive(lexerModules)
248 del lexerProperties["fold.comment.python"]
249 lexerProperties = list(lexerProperties.keys())
250 sortListInsensitive(lexerProperties)
251
252 # Generate HTML to document each property
253 # This is done because tags can not be safely put inside comments in HTML
254 documentProperties = list(propertyDocuments.keys())
255 sortListInsensitive(documentProperties)
256 propertiesHTML = []
257 for k in documentProperties:
258 propertiesHTML.append("\t<tr>\n\t<td>%s</td>\n\t<td>%s</td>\n\t</tr>" %
259 (k, propertyDocuments[k]))
260
261 # Find all the SciTE properties files
262 otherProps = ["abbrev.properties", "Embedded.properties", "SciTEGlobal.properties", "SciTE.properties"]
263 if os.path.exists(root + "scite"):
264 propFilePaths = glob.glob(root + "scite/src/*.properties")
265 sortListInsensitive(propFilePaths)
266 propFiles = [os.path.basename(f) for f in propFilePaths if os.path.basename(f) not in otherProps]
267 sortListInsensitive(propFiles)
268 print(propFiles)
269
270 Regenerate(root + "scintilla/src/KeyWords.cxx", "//", NATIVE, lexerModules)
271 Regenerate(root + "scintilla/win32/makefile", "#", NATIVE, lexFiles)
272 Regenerate(root + "scintilla/win32/scintilla.mak", "#", NATIVE, lexFiles)
273 Regenerate(root + "scintilla/win32/scintilla_vc6.mak", "#", NATIVE, lexFiles)
274 # Use Unix EOLs for gtk Makefiles so they work for Linux users when
275 # extracted from the Scintilla source ZIP (typically created on
276 # Windows).
277 Regenerate(root + "scintilla/gtk/makefile", "#", LF, lexFiles)
278 Regenerate(root + "scintilla/gtk/scintilla.mak", "#", NATIVE, lexFiles)
279 Regenerate(root + "scintilla/macosx/makefile", "#", LF, lexFiles)
280 if os.path.exists(root + "scite"):
281 Regenerate(root + "scite/win32/makefile", "#", NATIVE, lexFiles, propFiles)
282 Regenerate(root + "scite/win32/scite.mak", "#", NATIVE, lexFiles, propFiles)
283 Regenerate(root + "scite/src/SciTEProps.cxx", "//", NATIVE, lexerProperties)
284 Regenerate(root + "scite/doc/SciTEDoc.html", "<!--", NATIVE, propertiesHTML)
285 Generate(root + "scite/boundscheck/vcproj.gen",
286 root + "scite/boundscheck/SciTE.vcproj", "#", NATIVE, lexFiles)
287
288 RegenerateAll()