src/stc/scintilla/src/LexGen.py

   1 # LexGen.py - implemented 2002 by Neil Hodgson neilh@scintilla.org
   2 # Released to the public domain.
   3
   4 # Regenerate the Scintilla and SciTE source files that list
   5 # all the lexers and all the properties files.
   6 # Should be run whenever a new lexer is added or removed.
   7 # Requires Python 2.4 or later
   8 # Most files are regenerated in place with templates stored in comments.
   9 # The VS .NET project file is generated into a different file as the
  10 # VS .NET environment will not retain comments when modifying the file.
  11 # The files are copied to a string apart from sections between a
  12 # ++Autogenerated comment and a --Autogenerated comment which is
  13 # generated by the CopyWithInsertion function. After the whole
  14 # string is instantiated, it is compared with the target file and
  15 # if different the file is rewritten.
  16 # Does not regenerate the Visual C++ 6 project files but does the VS .NET
  17 # project file.
  18
  19 import string
  20 import sys
  21 import os
  22 import glob
  23
  24 # EOL constants
  25 CR = "\r"
  26 LF = "\n"
  27 CRLF = "\r\n"
  28 if sys.platform == "win32":
  29     NATIVE = CRLF
  30 else:
  31     # Yes, LF is the native EOL even on Mac OS X. CR is just for
  32     # Mac OS <=9 (a.k.a. "Mac Classic")
  33     NATIVE = LF
  34
  35 # Automatically generated sections contain start and end comments,
  36 # a definition line and the results.
  37 # The results are replaced by regenerating based on the definition line.
  38 # The definition line is a comment prefix followed by "**".
  39 # If there is a digit after the ** then this indicates which list to use
  40 # and the digit and next character are not part of the definition
  41 # Backslash is used as an escape within the definition line.
  42 # The part between \( and \) is repeated for each item in the list.
  43 # \* is replaced by each list item. \t, and \n are tab and newline.
  44 def CopyWithInsertion(input, commentPrefix, retainDefs, eolType, *lists):
  45     copying = 1
  46     listid = 0
  47     output = []
  48     for line in input.splitlines(0):
  49         isStartGenerated = line.startswith(commentPrefix + "++Autogenerated")
  50         if copying and not isStartGenerated:
  51             output.append(line)
  52         if isStartGenerated:
  53             if retainDefs:
  54                 output.append(line)
  55             copying = 0
  56             definition = ""
  57         elif not copying and line.startswith(commentPrefix + "**"):
  58             if retainDefs:
  59                 output.append(line)
  60             definition = line[len(commentPrefix + "**"):]
  61             if (commentPrefix == "<!--") and (" -->" in definition):
  62                 definition = definition.replace(" -->", "")
  63             listid = 0
  64             if definition[0] in string.digits:
  65                 listid = int(definition[:1])
  66                 definition = definition[2:]
  67             # Hide double slashes as a control character
  68             definition = definition.replace("\\\\", "\001")
  69             # Do some normal C style transforms
  70             definition = definition.replace("\\n", "\n")
  71             definition = definition.replace("\\t", "\t")
  72             # Get the doubled backslashes back as single backslashes
  73             definition = definition.replace("\001", "\\")
  74             startRepeat = definition.find("\\(")
  75             endRepeat = definition.find("\\)")
  76             intro = definition[:startRepeat]
  77             out = ""
  78             if intro.endswith("\n"):
  79                 pos = 0
  80             else:
  81                 pos = len(intro)
  82             out += intro
  83             middle = definition[startRepeat+2:endRepeat]
  84             for i in lists[listid]:
  85                 item = middle.replace("\\*", i)
  86                 if pos and (pos + len(item) >= 80):
  87                     out += "\\\n"
  88                     pos = 0
  89                 out += item
  90                 pos += len(item)
  91                 if item.endswith("\n"):
  92                     pos = 0
  93             outro = definition[endRepeat+2:]
  94             out += outro
  95             out = out.replace("\n", eolType) # correct EOLs in generated content
  96             output.append(out)
  97         elif line.startswith(commentPrefix + "--Autogenerated"):
  98             copying = 1
  99             if retainDefs:
 100                 output.append(line)
 101     output = [line.rstrip(" \t") for line in output] # trim trailing whitespace
 102     return eolType.join(output) + eolType
 103
 104 def UpdateFile(filename, updated):
 105     """ If the file is different to updated then copy updated
 106     into the file else leave alone so CVS and make don't treat
 107     it as modified. """
 108     try:
 109         infile = open(filename, "rb")
 110     except IOError:     # File is not there yet
 111         out = open(filename, "wb")
 112         out.write(updated.encode('utf-8'))
 113         out.close()
 114         print("New %s" % filename)
 115         return
 116     original = infile.read()
 117     infile.close()
 118     original = original.decode('utf-8')
 119     if updated != original:
 120         os.unlink(filename)
 121         out = open(filename, "wb")
 122         out.write(updated.encode('utf-8'))
 123         out.close()
 124         print("Changed %s " % filename)
 125     #~ else:
 126         #~ print "Unchanged", filename
 127
 128 def Generate(inpath, outpath, commentPrefix, eolType, *lists):
 129     """Generate 'outpath' from 'inpath'.
 130
 131         "eolType" indicates the type of EOLs to use in the generated
 132             file. It should be one of following constants: LF, CRLF,
 133             CR, or NATIVE.
 134     """
 135     #print "generate '%s' -> '%s' (comment prefix: %r, eols: %r)"\
 136     #      % (inpath, outpath, commentPrefix, eolType)
 137     try:
 138         infile = open(inpath, "rb")
 139     except IOError:
 140         print("Can not open %s" % inpath)
 141         return
 142     original = infile.read()
 143     infile.close()
 144     original = original.decode('utf-8')
 145     updated = CopyWithInsertion(original, commentPrefix,
 146         inpath == outpath, eolType, *lists)
 147     UpdateFile(outpath, updated)
 148
 149 def Regenerate(filename, commentPrefix, eolType, *lists):
 150     """Regenerate the given file.
 151
 152         "eolType" indicates the type of EOLs to use in the generated
 153             file. It should be one of following constants: LF, CRLF,
 154             CR, or NATIVE.
 155     """
 156     Generate(filename, filename, commentPrefix, eolType, *lists)
 157
 158 def FindModules(lexFile):
 159     modules = []
 160     f = open(lexFile)
 161     for l in f.readlines():
 162         if l.startswith("LexerModule"):
 163             l = l.replace("(", " ")
 164             modules.append(l.split()[1])
 165     return modules
 166
 167 knownIrregularProperties = [
 168     "fold",
 169     "styling.within.preprocessor",
 170     "tab.timmy.whinge.level",
 171     "asp.default.language",
 172     "html.tags.case.sensitive",
 173     "ps.level",
 174     "ps.tokenize",
 175     "sql.backslash.escapes",
 176     "nsis.uservars",
 177     "nsis.ignorecase"
 178 ]
 179
 180 def FindProperties(lexFile):
 181     properties = {}
 182     f = open(lexFile)
 183     for l in f.readlines():
 184         if "GetProperty" in l:
 185             l = l.strip()
 186             if not l.startswith("//"):  # Drop comments
 187                 propertyName = l.split("\"")[1]
 188                 if propertyName.lower() == propertyName:
 189                     # Only allow lower case property names
 190                     if propertyName in knownIrregularProperties or \
 191                         propertyName.startswith("fold.") or \
 192                         propertyName.startswith("lexer."):
 193                         properties[propertyName] = 1
 194     return properties
 195
 196 def FindPropertyDocumentation(lexFile):
 197     documents = {}
 198     f = open(lexFile)
 199     name = ""
 200     for l in f.readlines():
 201         l = l.strip()
 202         if "// property " in l:
 203             propertyName = l.split()[2]
 204             if propertyName.lower() == propertyName:
 205                 # Only allow lower case property names
 206                 name = propertyName
 207                 documents[name] = ""
 208         elif name:
 209             if l.startswith("//"):
 210                 if documents[name]:
 211                     documents[name] += " "
 212                 documents[name] += l[2:].strip()
 213             else:
 214                 name = ""
 215     return documents
 216
 217 def ciCompare(a,b):
 218     return cmp(a.lower(), b.lower())
 219
 220 def ciKey(a):
 221     return a.lower()
 222
 223 def sortListInsensitive(l):
 224     try:    # Try key function
 225         l.sort(key=ciKey)
 226     except TypeError:    # Earlier version of Python, so use comparison function
 227         l.sort(ciCompare)
 228
 229 def RegenerateAll():
 230     root="../../"
 231
 232     # Find all the lexer source code files
 233     lexFilePaths = glob.glob(root + "scintilla/src/Lex*.cxx")
 234     sortListInsensitive(lexFilePaths)
 235     lexFiles = [os.path.basename(f)[:-4] for f in lexFilePaths]
 236     print(lexFiles)
 237     lexerModules = []
 238     lexerProperties = {}
 239     propertyDocuments = {}
 240     for lexFile in lexFilePaths:
 241         lexerModules.extend(FindModules(lexFile))
 242         for k in FindProperties(lexFile).keys():
 243             lexerProperties[k] = 1
 244         documents = FindPropertyDocumentation(lexFile)
 245         for k in documents.keys():
 246             propertyDocuments[k] = documents[k]
 247     sortListInsensitive(lexerModules)
 248     del lexerProperties["fold.comment.python"]
 249     lexerProperties = list(lexerProperties.keys())
 250     sortListInsensitive(lexerProperties)
 251
 252     # Generate HTML to document each property
 253     # This is done because tags can not be safely put inside comments in HTML
 254     documentProperties = list(propertyDocuments.keys())
 255     sortListInsensitive(documentProperties)
 256     propertiesHTML = []
 257     for k in documentProperties:
 258         propertiesHTML.append("\t<tr>\n\t<td>%s</td>\n\t<td>%s</td>\n\t</tr>" %
 259             (k, propertyDocuments[k]))
 260
 261     # Find all the SciTE properties files
 262     otherProps = ["abbrev.properties", "Embedded.properties", "SciTEGlobal.properties", "SciTE.properties"]
 263     if os.path.exists(root + "scite"):
 264         propFilePaths = glob.glob(root + "scite/src/*.properties")
 265         sortListInsensitive(propFilePaths)
 266         propFiles = [os.path.basename(f) for f in propFilePaths if os.path.basename(f) not in otherProps]
 267         sortListInsensitive(propFiles)
 268         print(propFiles)
 269
 270     Regenerate(root + "scintilla/src/KeyWords.cxx", "//", NATIVE, lexerModules)
 271     Regenerate(root + "scintilla/win32/makefile", "#", NATIVE, lexFiles)
 272     Regenerate(root + "scintilla/win32/scintilla.mak", "#", NATIVE, lexFiles)
 273     Regenerate(root + "scintilla/win32/scintilla_vc6.mak", "#", NATIVE, lexFiles)
 274     # Use Unix EOLs for gtk Makefiles so they work for Linux users when
 275     # extracted from the Scintilla source ZIP (typically created on
 276     # Windows).
 277     Regenerate(root + "scintilla/gtk/makefile", "#", LF, lexFiles)
 278     Regenerate(root + "scintilla/gtk/scintilla.mak", "#", NATIVE, lexFiles)
 279     Regenerate(root + "scintilla/macosx/makefile", "#", LF, lexFiles)
 280     if os.path.exists(root + "scite"):
 281         Regenerate(root + "scite/win32/makefile", "#", NATIVE, lexFiles, propFiles)
 282         Regenerate(root + "scite/win32/scite.mak", "#", NATIVE, lexFiles, propFiles)
 283         Regenerate(root + "scite/src/SciTEProps.cxx", "//", NATIVE, lexerProperties)
 284         Regenerate(root + "scite/doc/SciTEDoc.html", "<!--", NATIVE, propertiesHTML)
 285         Generate(root + "scite/boundscheck/vcproj.gen",
 286          root + "scite/boundscheck/SciTE.vcproj", "#", NATIVE, lexFiles)
 287
 288 RegenerateAll()