]> git.saurik.com Git - apple/icu.git/blame - icuSources/tools/icu-svnprops-check.py
ICU-62135.0.1.tar.gz
[apple/icu.git] / icuSources / tools / icu-svnprops-check.py
CommitLineData
729e4ab9
A
1#! /usr/bin/python
2
f3c0d7a5
A
3# Copyright (C) 2016 and later: Unicode, Inc. and others.
4# License & terms of use: http://www.unicode.org/copyright.html
5
4388f060 6# Copyright (C) 2009-2011, International Business Machines Corporation, Google and Others.
729e4ab9
A
7# All rights reserved.
8
9#
10# Script to check and fix svn property settings for ICU source files.
11# Also check for the correct line endings on files with svn:eol-style = native
12#
13# THIS SCRIPT DOES NOT WORK ON WINDOWS
14# It only works correctly on platforms where the native line ending is a plain \n
15#
16# usage:
17# icu-svnprops-check.py [options]
18#
19# options:
20# -f | --fix Fix any problems that are found
21# -h | --help Print a usage line and exit.
22#
23# The tool operates recursively on the directory from which it is run.
24# Only files from the svn repository are checked.
25# No changes are made to the repository; only the working copy will be altered.
26
27import sys
28import os
29import os.path
30import re
31import getopt
32
729e4ab9
A
33
34# file_types: The parsed form of the svn auto-props specification.
35# A list of file types - .cc, .cpp, .txt, etc.
36# each element is a [type, proplist]
37# "type" is a regular expression string that will match a file name
38# prop list is another list, one element per property.
39# Each property item is a two element list, [prop name, prop value]
40file_types = list()
41
42def parse_auto_props():
43 aprops = svn_auto_props.splitlines()
44 for propline in aprops:
45 if re.match("\s*(#.*)?$", propline): # Match comment and blank lines
46 continue
47 if re.match("\s*\[auto-props\]", propline): # Match the [auto-props] line.
48 continue
49 if not re.match("\s*[^\s]+\s*=", propline): # minimal syntax check for <file-type> =
50 print "Bad line from autoprops definitions: " + propline
51 continue
52 file_type, string_proplist = propline.split("=", 1)
53
54 #transform the file type expression from autoprops into a normal regular expression.
55 # e.g. "*.cpp" ==> ".*\.cpp$"
56 file_type = file_type.strip()
57 file_type = file_type.replace(".", "\.")
58 file_type = file_type.replace("*", ".*")
59 file_type = file_type + "$"
60
61 # example string_proplist at this point: " svn:eol-style=native;svn:executable"
62 # split on ';' into a list of properties. The negative lookahead and lookbehind
63 # in the split regexp are to prevent matching on ';;', which is an escaped ';'
64 # within a property value.
65 string_proplist = re.split("(?<!;);(?!;)", string_proplist)
66 proplist = list()
67 for prop in string_proplist:
68 if prop.find("=") >= 0:
69 prop_name, prop_val = prop.split("=", 1)
70 else:
71 # properties with no explicit value, e.g. svn:executable
72 prop_name, prop_val = prop, ""
73 prop_name = prop_name.strip()
74 prop_val = prop_val.strip()
75 # unescape any ";;" in a property value, e.g. the mime-type from
76 # *.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8
77 prop_val = prop_val.replace(";;", ";");
0f5d89e8
A
78 # If the prop value "is quoted", remove the quotes.
79 # See svn:keywords for an example of a quoted prop value.
80 match = re.match('^"(.+)"$', prop_val)
81 if match:
82 prop_val = match.group(1)
729e4ab9
A
83 proplist.append((prop_name, prop_val))
84
85 file_types.append((file_type, proplist))
86 # print file_types
87
88
89def runCommand(cmd):
90 output_file = os.popen(cmd);
91 output_text = output_file.read();
92 exit_status = output_file.close();
93 if exit_status:
94 print >>sys.stderr, '"', cmd, '" failed. Exiting.'
95 sys.exit(exit_status)
96 return output_text
97
f3c0d7a5 98svn_auto_props = runCommand("svn propget svn:auto-props http://source.icu-project.org/repos/icu")
729e4ab9
A
99
100def usage():
101 print "usage: " + sys.argv[0] + " [-f | --fix] [-h | --help]"
102
103
104#
f3c0d7a5 105# UTF-8 file check. For text files with svn:mime-type=text/anything, check the specified charset
729e4ab9 106# file_name: name of a text file.
f3c0d7a5 107# base_mime_type: svn:mime-type property from the auto-props settings for this file type.
729e4ab9 108# actual_mime_type: existing svn:mime-type property value for the file.
f3c0d7a5
A
109# return: The correct svn:mime-type property value,
110# either the original, if it looks OK, otherwise the value from auto-props
729e4ab9
A
111#
112def check_utf8(file_name, base_mime_type, actual_mime_type):
113
729e4ab9
A
114 f = open(file_name, 'r')
115 bytes = f.read()
116 f.close()
f3c0d7a5 117 file_is_utf8 = True
729e4ab9
A
118 try:
119 bytes.decode("UTF-8")
120 except UnicodeDecodeError:
f3c0d7a5
A
121 file_is_utf8 = False
122
123 if not file_is_utf8 and actual_mime_type.find("utf-8") >= 0:
124 print "Error: %s is not valid utf-8, but has a utf-8 mime type." % file_name
125 return actual_mime_type
126
127 if file_is_utf8 and actual_mime_type.find("charset") >=0 and actual_mime_type.find("utf-8") < 0:
128 print "Warning: %s is valid utf-8, but has a mime-type of %s." % (file_name, actual_mime_type)
729e4ab9 129
f3c0d7a5
A
130 if ord(bytes[0]) == 0xef:
131 if not file_name.endswith(".txt"):
132 print "Warning: file %s contains a UTF-8 BOM: " % file_name
133
134 # If the file already has a charset in its mime-type, don't make any change.
135
136 if actual_mime_type.find("charset=") >= 0:
137 return actual_mime_type;
729e4ab9 138
f3c0d7a5 139 return base_mime_type
729e4ab9
A
140
141
142def main(argv):
143 fix_problems = False;
144 try:
145 opts, args = getopt.getopt(argv, "fh", ("fix", "help"))
146 except getopt.GetoptError:
147 print "unrecognized option: " + argv[0]
148 usage()
149 sys.exit(2)
150 for opt, arg in opts:
151 if opt in ("-h", "--help"):
152 usage()
153 sys.exit()
154 if opt in ("-f", "--fix"):
155 fix_problems = True
156 if args:
157 print "unexpected command line argument"
158 usage()
159 sys.exit()
160
161 parse_auto_props()
162 output = runCommand("svn ls -R ");
163 file_list = output.splitlines()
164
165 for f in file_list:
166 if os.path.isdir(f):
167 # print "Skipping dir " + f
168 continue
169 if not os.path.isfile(f):
170 print "Repository file not in working copy: " + f
171 continue;
172
173 for file_pattern, props in file_types:
174 if re.match(file_pattern, f):
175 # print "doing " + f
176 for propname, propval in props:
177 actual_propval = runCommand("svn propget --strict " + propname + " " + f)
178 #print propname + ": " + actual_propval
179 if propname == "svn:mime-type" and propval.find("text/") == 0:
180 # check for UTF-8 text files, should have svn:mime-type=text/something; charset=utf8
181 propval = check_utf8(f, propval, actual_propval)
182 if not (propval == actual_propval or (propval == "" and actual_propval == "*")):
183 print "svn propset %s '%s' %s" % (propname, propval, f)
184 if fix_problems:
185 os.system("svn propset %s '%s' %s" % (propname, propval, f))
729e4ab9
A
186
187
188if __name__ == "__main__":
189 main(sys.argv[1:])