]> git.saurik.com Git - apple/icu.git/blame - icuSources/tools/icu-svnprops-check.py
ICU-57165.0.1.tar.gz
[apple/icu.git] / icuSources / tools / icu-svnprops-check.py
CommitLineData
729e4ab9
A
1#! /usr/bin/python
2
4388f060 3# Copyright (C) 2009-2011, International Business Machines Corporation, Google and Others.
729e4ab9
A
4# All rights reserved.
5
6#
7# Script to check and fix svn property settings for ICU source files.
8# Also check for the correct line endings on files with svn:eol-style = native
9#
10# THIS SCRIPT DOES NOT WORK ON WINDOWS
11# It only works correctly on platforms where the native line ending is a plain \n
12#
13# usage:
14# icu-svnprops-check.py [options]
15#
16# options:
17# -f | --fix Fix any problems that are found
18# -h | --help Print a usage line and exit.
19#
20# The tool operates recursively on the directory from which it is run.
21# Only files from the svn repository are checked.
22# No changes are made to the repository; only the working copy will be altered.
23
24import sys
25import os
26import os.path
27import re
28import getopt
29
30#
31# svn autoprops definitions.
32# Copy and paste here the ICU recommended auto-props from
33# http://icu-project.org/docs/subversion_howto/index.html
34#
35# This program will parse this autoprops string, and verify that files in
36# the repository have the recommeded properties set.
37#
38svn_auto_props = """
39### Section for configuring automatic properties.
40[auto-props]
41### The format of the entries is:
42### file-name-pattern = propname[=value][;propname[=value]...]
43### The file-name-pattern can contain wildcards (such as '*' and
44### '?'). All entries which match will be applied to the file.
45### Note that auto-props functionality must be enabled, which
46### is typically done by setting the 'enable-auto-props' option.
47*.c = svn:eol-style=native
48*.cc = svn:eol-style=native
49*.cpp = svn:eol-style=native
50*.h = svn:eol-style=native
51*.rc = svn:eol-style=native
52*.dsp = svn:eol-style=native
53*.dsw = svn:eol-style=native
54*.sln = svn:eol-style=native
55*.vcproj = svn:eol-style=native
56configure = svn:eol-style=native;svn:executable
57*.sh = svn:eol-style=native;svn:executable
58*.pl = svn:eol-style=native;svn:executable
59*.py = svn:eol-style=native;svn:executable
60*.txt = svn:mime-type=text/plain;svn:eol-style=native
61*.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8
62*.ucm = svn:eol-style=native
63*.html = svn:eol-style=native;svn:mime-type=text/html
64*.htm = svn:eol-style=native;svn:mime-type=text/html
65*.xml = svn:eol-style=native
66Makefile = svn:eol-style=native
67*.in = svn:eol-style=native
68*.mak = svn:eol-style=native
69*.mk = svn:eol-style=native
70*.png = svn:mime-type=image/png
71*.jpeg = svn:mime-type=image/jpeg
72*.jpg = svn:mime-type=image/jpeg
73*.bin = svn:mime-type=application/octet-stream
74*.brk = svn:mime-type=application/octet-stream
75*.cnv = svn:mime-type=application/octet-stream
76*.dat = svn:mime-type=application/octet-stream
77*.icu = svn:mime-type=application/octet-stream
78*.res = svn:mime-type=application/octet-stream
79*.spp = svn:mime-type=application/octet-stream
80# new additions 2007-dec-5 srl
81*.rtf = mime-type=text/rtf
82*.pdf = mime-type=application/pdf
83# changed 2008-04-08: modified .txt, above, adding mime-type
84# changed 2010-11-09: modified .java, adding mime-type
85# Note: The escape syntax for semicolon (";;") is supported since subversion 1.6.1
86"""
87
88
89# file_types: The parsed form of the svn auto-props specification.
90# A list of file types - .cc, .cpp, .txt, etc.
91# each element is a [type, proplist]
92# "type" is a regular expression string that will match a file name
93# prop list is another list, one element per property.
94# Each property item is a two element list, [prop name, prop value]
95file_types = list()
96
97def parse_auto_props():
98 aprops = svn_auto_props.splitlines()
99 for propline in aprops:
100 if re.match("\s*(#.*)?$", propline): # Match comment and blank lines
101 continue
102 if re.match("\s*\[auto-props\]", propline): # Match the [auto-props] line.
103 continue
104 if not re.match("\s*[^\s]+\s*=", propline): # minimal syntax check for <file-type> =
105 print "Bad line from autoprops definitions: " + propline
106 continue
107 file_type, string_proplist = propline.split("=", 1)
108
109 #transform the file type expression from autoprops into a normal regular expression.
110 # e.g. "*.cpp" ==> ".*\.cpp$"
111 file_type = file_type.strip()
112 file_type = file_type.replace(".", "\.")
113 file_type = file_type.replace("*", ".*")
114 file_type = file_type + "$"
115
116 # example string_proplist at this point: " svn:eol-style=native;svn:executable"
117 # split on ';' into a list of properties. The negative lookahead and lookbehind
118 # in the split regexp are to prevent matching on ';;', which is an escaped ';'
119 # within a property value.
120 string_proplist = re.split("(?<!;);(?!;)", string_proplist)
121 proplist = list()
122 for prop in string_proplist:
123 if prop.find("=") >= 0:
124 prop_name, prop_val = prop.split("=", 1)
125 else:
126 # properties with no explicit value, e.g. svn:executable
127 prop_name, prop_val = prop, ""
128 prop_name = prop_name.strip()
129 prop_val = prop_val.strip()
130 # unescape any ";;" in a property value, e.g. the mime-type from
131 # *.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8
132 prop_val = prop_val.replace(";;", ";");
133 proplist.append((prop_name, prop_val))
134
135 file_types.append((file_type, proplist))
136 # print file_types
137
138
139def runCommand(cmd):
140 output_file = os.popen(cmd);
141 output_text = output_file.read();
142 exit_status = output_file.close();
143 if exit_status:
144 print >>sys.stderr, '"', cmd, '" failed. Exiting.'
145 sys.exit(exit_status)
146 return output_text
147
148
149def usage():
150 print "usage: " + sys.argv[0] + " [-f | --fix] [-h | --help]"
151
152
153#
154# UTF-8 file check. For text files, add a charset to the mime-type if their contents are UTF-8
155# file_name: name of a text file.
156# base_mime_type: svn:mime-type property value from the auto-props file (no charset= part)
157# actual_mime_type: existing svn:mime-type property value for the file.
158# return: svn:mime-type property value, with charset added when appropriate.
159#
160def check_utf8(file_name, base_mime_type, actual_mime_type):
161
162 # If the file already has a charset in its mime-type, don't make any change.
163
164 if actual_mime_type.find("charset=") > 0:
165 return actual_mime_type;
166
167 f = open(file_name, 'r')
168 bytes = f.read()
169 f.close()
170
171 if all(ord(byte) < 128 for byte in bytes):
172 # pure ASCII.
173 # print "Pure ASCII " + file_name
174 return base_mime_type
175
176 try:
177 bytes.decode("UTF-8")
178 except UnicodeDecodeError:
179 print "warning: %s: not ASCII, not UTF-8" % file_name
180 return base_mime_type
181
182 if ord(bytes[0]) != 0xef:
183 print "UTF-8 file with no BOM: " + file_name
184
4388f060
A
185 # Append charset=utf-8.
186 return base_mime_type + ';charset=utf-8'
729e4ab9
A
187
188
189def main(argv):
190 fix_problems = False;
191 try:
192 opts, args = getopt.getopt(argv, "fh", ("fix", "help"))
193 except getopt.GetoptError:
194 print "unrecognized option: " + argv[0]
195 usage()
196 sys.exit(2)
197 for opt, arg in opts:
198 if opt in ("-h", "--help"):
199 usage()
200 sys.exit()
201 if opt in ("-f", "--fix"):
202 fix_problems = True
203 if args:
204 print "unexpected command line argument"
205 usage()
206 sys.exit()
207
208 parse_auto_props()
209 output = runCommand("svn ls -R ");
210 file_list = output.splitlines()
211
212 for f in file_list:
213 if os.path.isdir(f):
214 # print "Skipping dir " + f
215 continue
216 if not os.path.isfile(f):
217 print "Repository file not in working copy: " + f
218 continue;
219
220 for file_pattern, props in file_types:
221 if re.match(file_pattern, f):
222 # print "doing " + f
223 for propname, propval in props:
224 actual_propval = runCommand("svn propget --strict " + propname + " " + f)
225 #print propname + ": " + actual_propval
226 if propname == "svn:mime-type" and propval.find("text/") == 0:
227 # check for UTF-8 text files, should have svn:mime-type=text/something; charset=utf8
228 propval = check_utf8(f, propval, actual_propval)
229 if not (propval == actual_propval or (propval == "" and actual_propval == "*")):
230 print "svn propset %s '%s' %s" % (propname, propval, f)
231 if fix_problems:
232 os.system("svn propset %s '%s' %s" % (propname, propval, f))
233 if propname == "svn:eol-style" and propval == "native":
234 if os.system("grep -q -v \r " + f):
235 if fix_problems:
236 print f + ": Removing DOS CR characters."
237 os.system("sed -i s/\r// " + f);
238 else:
239 print f + " contains DOS CR characters."
240
241
242if __name__ == "__main__":
243 main(sys.argv[1:])