]>
Commit | Line | Data |
---|---|---|
729e4ab9 A |
1 | #! /usr/bin/python |
2 | ||
4388f060 | 3 | # Copyright (C) 2009-2011, International Business Machines Corporation, Google and Others. |
729e4ab9 A |
4 | # All rights reserved. |
5 | ||
6 | # | |
7 | # Script to check and fix svn property settings for ICU source files. | |
8 | # Also check for the correct line endings on files with svn:eol-style = native | |
9 | # | |
10 | # THIS SCRIPT DOES NOT WORK ON WINDOWS | |
11 | # It only works correctly on platforms where the native line ending is a plain \n | |
12 | # | |
13 | # usage: | |
14 | # icu-svnprops-check.py [options] | |
15 | # | |
16 | # options: | |
17 | # -f | --fix Fix any problems that are found | |
18 | # -h | --help Print a usage line and exit. | |
19 | # | |
20 | # The tool operates recursively on the directory from which it is run. | |
21 | # Only files from the svn repository are checked. | |
22 | # No changes are made to the repository; only the working copy will be altered. | |
23 | ||
24 | import sys | |
25 | import os | |
26 | import os.path | |
27 | import re | |
28 | import getopt | |
29 | ||
30 | # | |
31 | # svn autoprops definitions. | |
32 | # Copy and paste here the ICU recommended auto-props from | |
33 | # http://icu-project.org/docs/subversion_howto/index.html | |
34 | # | |
35 | # This program will parse this autoprops string, and verify that files in | |
36 | # the repository have the recommeded properties set. | |
37 | # | |
38 | svn_auto_props = """ | |
39 | ### Section for configuring automatic properties. | |
40 | [auto-props] | |
41 | ### The format of the entries is: | |
42 | ### file-name-pattern = propname[=value][;propname[=value]...] | |
43 | ### The file-name-pattern can contain wildcards (such as '*' and | |
44 | ### '?'). All entries which match will be applied to the file. | |
45 | ### Note that auto-props functionality must be enabled, which | |
46 | ### is typically done by setting the 'enable-auto-props' option. | |
47 | *.c = svn:eol-style=native | |
48 | *.cc = svn:eol-style=native | |
49 | *.cpp = svn:eol-style=native | |
50 | *.h = svn:eol-style=native | |
51 | *.rc = svn:eol-style=native | |
52 | *.dsp = svn:eol-style=native | |
53 | *.dsw = svn:eol-style=native | |
54 | *.sln = svn:eol-style=native | |
55 | *.vcproj = svn:eol-style=native | |
56 | configure = svn:eol-style=native;svn:executable | |
57 | *.sh = svn:eol-style=native;svn:executable | |
58 | *.pl = svn:eol-style=native;svn:executable | |
59 | *.py = svn:eol-style=native;svn:executable | |
60 | *.txt = svn:mime-type=text/plain;svn:eol-style=native | |
61 | *.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8 | |
62 | *.ucm = svn:eol-style=native | |
63 | *.html = svn:eol-style=native;svn:mime-type=text/html | |
64 | *.htm = svn:eol-style=native;svn:mime-type=text/html | |
65 | *.xml = svn:eol-style=native | |
66 | Makefile = svn:eol-style=native | |
67 | *.in = svn:eol-style=native | |
68 | *.mak = svn:eol-style=native | |
69 | *.mk = svn:eol-style=native | |
70 | *.png = svn:mime-type=image/png | |
71 | *.jpeg = svn:mime-type=image/jpeg | |
72 | *.jpg = svn:mime-type=image/jpeg | |
73 | *.bin = svn:mime-type=application/octet-stream | |
74 | *.brk = svn:mime-type=application/octet-stream | |
75 | *.cnv = svn:mime-type=application/octet-stream | |
76 | *.dat = svn:mime-type=application/octet-stream | |
77 | *.icu = svn:mime-type=application/octet-stream | |
78 | *.res = svn:mime-type=application/octet-stream | |
79 | *.spp = svn:mime-type=application/octet-stream | |
80 | # new additions 2007-dec-5 srl | |
81 | *.rtf = mime-type=text/rtf | |
82 | *.pdf = mime-type=application/pdf | |
83 | # changed 2008-04-08: modified .txt, above, adding mime-type | |
84 | # changed 2010-11-09: modified .java, adding mime-type | |
85 | # Note: The escape syntax for semicolon (";;") is supported since subversion 1.6.1 | |
86 | """ | |
87 | ||
88 | ||
89 | # file_types: The parsed form of the svn auto-props specification. | |
90 | # A list of file types - .cc, .cpp, .txt, etc. | |
91 | # each element is a [type, proplist] | |
92 | # "type" is a regular expression string that will match a file name | |
93 | # prop list is another list, one element per property. | |
94 | # Each property item is a two element list, [prop name, prop value] | |
95 | file_types = list() | |
96 | ||
97 | def parse_auto_props(): | |
98 | aprops = svn_auto_props.splitlines() | |
99 | for propline in aprops: | |
100 | if re.match("\s*(#.*)?$", propline): # Match comment and blank lines | |
101 | continue | |
102 | if re.match("\s*\[auto-props\]", propline): # Match the [auto-props] line. | |
103 | continue | |
104 | if not re.match("\s*[^\s]+\s*=", propline): # minimal syntax check for <file-type> = | |
105 | print "Bad line from autoprops definitions: " + propline | |
106 | continue | |
107 | file_type, string_proplist = propline.split("=", 1) | |
108 | ||
109 | #transform the file type expression from autoprops into a normal regular expression. | |
110 | # e.g. "*.cpp" ==> ".*\.cpp$" | |
111 | file_type = file_type.strip() | |
112 | file_type = file_type.replace(".", "\.") | |
113 | file_type = file_type.replace("*", ".*") | |
114 | file_type = file_type + "$" | |
115 | ||
116 | # example string_proplist at this point: " svn:eol-style=native;svn:executable" | |
117 | # split on ';' into a list of properties. The negative lookahead and lookbehind | |
118 | # in the split regexp are to prevent matching on ';;', which is an escaped ';' | |
119 | # within a property value. | |
120 | string_proplist = re.split("(?<!;);(?!;)", string_proplist) | |
121 | proplist = list() | |
122 | for prop in string_proplist: | |
123 | if prop.find("=") >= 0: | |
124 | prop_name, prop_val = prop.split("=", 1) | |
125 | else: | |
126 | # properties with no explicit value, e.g. svn:executable | |
127 | prop_name, prop_val = prop, "" | |
128 | prop_name = prop_name.strip() | |
129 | prop_val = prop_val.strip() | |
130 | # unescape any ";;" in a property value, e.g. the mime-type from | |
131 | # *.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8 | |
132 | prop_val = prop_val.replace(";;", ";"); | |
133 | proplist.append((prop_name, prop_val)) | |
134 | ||
135 | file_types.append((file_type, proplist)) | |
136 | # print file_types | |
137 | ||
138 | ||
139 | def runCommand(cmd): | |
140 | output_file = os.popen(cmd); | |
141 | output_text = output_file.read(); | |
142 | exit_status = output_file.close(); | |
143 | if exit_status: | |
144 | print >>sys.stderr, '"', cmd, '" failed. Exiting.' | |
145 | sys.exit(exit_status) | |
146 | return output_text | |
147 | ||
148 | ||
149 | def usage(): | |
150 | print "usage: " + sys.argv[0] + " [-f | --fix] [-h | --help]" | |
151 | ||
152 | ||
153 | # | |
154 | # UTF-8 file check. For text files, add a charset to the mime-type if their contents are UTF-8 | |
155 | # file_name: name of a text file. | |
156 | # base_mime_type: svn:mime-type property value from the auto-props file (no charset= part) | |
157 | # actual_mime_type: existing svn:mime-type property value for the file. | |
158 | # return: svn:mime-type property value, with charset added when appropriate. | |
159 | # | |
160 | def check_utf8(file_name, base_mime_type, actual_mime_type): | |
161 | ||
162 | # If the file already has a charset in its mime-type, don't make any change. | |
163 | ||
164 | if actual_mime_type.find("charset=") > 0: | |
165 | return actual_mime_type; | |
166 | ||
167 | f = open(file_name, 'r') | |
168 | bytes = f.read() | |
169 | f.close() | |
170 | ||
171 | if all(ord(byte) < 128 for byte in bytes): | |
172 | # pure ASCII. | |
173 | # print "Pure ASCII " + file_name | |
174 | return base_mime_type | |
175 | ||
176 | try: | |
177 | bytes.decode("UTF-8") | |
178 | except UnicodeDecodeError: | |
179 | print "warning: %s: not ASCII, not UTF-8" % file_name | |
180 | return base_mime_type | |
181 | ||
182 | if ord(bytes[0]) != 0xef: | |
183 | print "UTF-8 file with no BOM: " + file_name | |
184 | ||
4388f060 A |
185 | # Append charset=utf-8. |
186 | return base_mime_type + ';charset=utf-8' | |
729e4ab9 A |
187 | |
188 | ||
189 | def main(argv): | |
190 | fix_problems = False; | |
191 | try: | |
192 | opts, args = getopt.getopt(argv, "fh", ("fix", "help")) | |
193 | except getopt.GetoptError: | |
194 | print "unrecognized option: " + argv[0] | |
195 | usage() | |
196 | sys.exit(2) | |
197 | for opt, arg in opts: | |
198 | if opt in ("-h", "--help"): | |
199 | usage() | |
200 | sys.exit() | |
201 | if opt in ("-f", "--fix"): | |
202 | fix_problems = True | |
203 | if args: | |
204 | print "unexpected command line argument" | |
205 | usage() | |
206 | sys.exit() | |
207 | ||
208 | parse_auto_props() | |
209 | output = runCommand("svn ls -R "); | |
210 | file_list = output.splitlines() | |
211 | ||
212 | for f in file_list: | |
213 | if os.path.isdir(f): | |
214 | # print "Skipping dir " + f | |
215 | continue | |
216 | if not os.path.isfile(f): | |
217 | print "Repository file not in working copy: " + f | |
218 | continue; | |
219 | ||
220 | for file_pattern, props in file_types: | |
221 | if re.match(file_pattern, f): | |
222 | # print "doing " + f | |
223 | for propname, propval in props: | |
224 | actual_propval = runCommand("svn propget --strict " + propname + " " + f) | |
225 | #print propname + ": " + actual_propval | |
226 | if propname == "svn:mime-type" and propval.find("text/") == 0: | |
227 | # check for UTF-8 text files, should have svn:mime-type=text/something; charset=utf8 | |
228 | propval = check_utf8(f, propval, actual_propval) | |
229 | if not (propval == actual_propval or (propval == "" and actual_propval == "*")): | |
230 | print "svn propset %s '%s' %s" % (propname, propval, f) | |
231 | if fix_problems: | |
232 | os.system("svn propset %s '%s' %s" % (propname, propval, f)) | |
233 | if propname == "svn:eol-style" and propval == "native": | |
234 | if os.system("grep -q -v \r " + f): | |
235 | if fix_problems: | |
236 | print f + ": Removing DOS CR characters." | |
237 | os.system("sed -i s/\r// " + f); | |
238 | else: | |
239 | print f + " contains DOS CR characters." | |
240 | ||
241 | ||
242 | if __name__ == "__main__": | |
243 | main(sys.argv[1:]) |