]>
Commit | Line | Data |
---|---|---|
729e4ab9 A |
1 | #! /usr/bin/python |
2 | ||
f3c0d7a5 A |
3 | # Copyright (C) 2016 and later: Unicode, Inc. and others. |
4 | # License & terms of use: http://www.unicode.org/copyright.html | |
5 | ||
4388f060 | 6 | # Copyright (C) 2009-2011, International Business Machines Corporation, Google and Others. |
729e4ab9 A |
7 | # All rights reserved. |
8 | ||
9 | # | |
10 | # Script to check and fix svn property settings for ICU source files. | |
11 | # Also check for the correct line endings on files with svn:eol-style = native | |
12 | # | |
13 | # THIS SCRIPT DOES NOT WORK ON WINDOWS | |
14 | # It only works correctly on platforms where the native line ending is a plain \n | |
15 | # | |
16 | # usage: | |
17 | # icu-svnprops-check.py [options] | |
18 | # | |
19 | # options: | |
20 | # -f | --fix Fix any problems that are found | |
21 | # -h | --help Print a usage line and exit. | |
22 | # | |
23 | # The tool operates recursively on the directory from which it is run. | |
24 | # Only files from the svn repository are checked. | |
25 | # No changes are made to the repository; only the working copy will be altered. | |
26 | ||
27 | import sys | |
28 | import os | |
29 | import os.path | |
30 | import re | |
31 | import getopt | |
32 | ||
729e4ab9 A |
33 | |
34 | # file_types: The parsed form of the svn auto-props specification. | |
35 | # A list of file types - .cc, .cpp, .txt, etc. | |
36 | # each element is a [type, proplist] | |
37 | # "type" is a regular expression string that will match a file name | |
38 | # prop list is another list, one element per property. | |
39 | # Each property item is a two element list, [prop name, prop value] | |
40 | file_types = list() | |
41 | ||
42 | def parse_auto_props(): | |
43 | aprops = svn_auto_props.splitlines() | |
44 | for propline in aprops: | |
45 | if re.match("\s*(#.*)?$", propline): # Match comment and blank lines | |
46 | continue | |
47 | if re.match("\s*\[auto-props\]", propline): # Match the [auto-props] line. | |
48 | continue | |
49 | if not re.match("\s*[^\s]+\s*=", propline): # minimal syntax check for <file-type> = | |
50 | print "Bad line from autoprops definitions: " + propline | |
51 | continue | |
52 | file_type, string_proplist = propline.split("=", 1) | |
53 | ||
54 | #transform the file type expression from autoprops into a normal regular expression. | |
55 | # e.g. "*.cpp" ==> ".*\.cpp$" | |
56 | file_type = file_type.strip() | |
57 | file_type = file_type.replace(".", "\.") | |
58 | file_type = file_type.replace("*", ".*") | |
59 | file_type = file_type + "$" | |
60 | ||
61 | # example string_proplist at this point: " svn:eol-style=native;svn:executable" | |
62 | # split on ';' into a list of properties. The negative lookahead and lookbehind | |
63 | # in the split regexp are to prevent matching on ';;', which is an escaped ';' | |
64 | # within a property value. | |
65 | string_proplist = re.split("(?<!;);(?!;)", string_proplist) | |
66 | proplist = list() | |
67 | for prop in string_proplist: | |
68 | if prop.find("=") >= 0: | |
69 | prop_name, prop_val = prop.split("=", 1) | |
70 | else: | |
71 | # properties with no explicit value, e.g. svn:executable | |
72 | prop_name, prop_val = prop, "" | |
73 | prop_name = prop_name.strip() | |
74 | prop_val = prop_val.strip() | |
75 | # unescape any ";;" in a property value, e.g. the mime-type from | |
76 | # *.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8 | |
77 | prop_val = prop_val.replace(";;", ";"); | |
0f5d89e8 A |
78 | # If the prop value "is quoted", remove the quotes. |
79 | # See svn:keywords for an example of a quoted prop value. | |
80 | match = re.match('^"(.+)"$', prop_val) | |
81 | if match: | |
82 | prop_val = match.group(1) | |
729e4ab9 A |
83 | proplist.append((prop_name, prop_val)) |
84 | ||
85 | file_types.append((file_type, proplist)) | |
86 | # print file_types | |
87 | ||
88 | ||
89 | def runCommand(cmd): | |
90 | output_file = os.popen(cmd); | |
91 | output_text = output_file.read(); | |
92 | exit_status = output_file.close(); | |
93 | if exit_status: | |
94 | print >>sys.stderr, '"', cmd, '" failed. Exiting.' | |
95 | sys.exit(exit_status) | |
96 | return output_text | |
97 | ||
f3c0d7a5 | 98 | svn_auto_props = runCommand("svn propget svn:auto-props http://source.icu-project.org/repos/icu") |
729e4ab9 A |
99 | |
100 | def usage(): | |
101 | print "usage: " + sys.argv[0] + " [-f | --fix] [-h | --help]" | |
102 | ||
103 | ||
104 | # | |
f3c0d7a5 | 105 | # UTF-8 file check. For text files with svn:mime-type=text/anything, check the specified charset |
729e4ab9 | 106 | # file_name: name of a text file. |
f3c0d7a5 | 107 | # base_mime_type: svn:mime-type property from the auto-props settings for this file type. |
729e4ab9 | 108 | # actual_mime_type: existing svn:mime-type property value for the file. |
f3c0d7a5 A |
109 | # return: The correct svn:mime-type property value, |
110 | # either the original, if it looks OK, otherwise the value from auto-props | |
729e4ab9 A |
111 | # |
112 | def check_utf8(file_name, base_mime_type, actual_mime_type): | |
113 | ||
729e4ab9 A |
114 | f = open(file_name, 'r') |
115 | bytes = f.read() | |
116 | f.close() | |
f3c0d7a5 | 117 | file_is_utf8 = True |
729e4ab9 A |
118 | try: |
119 | bytes.decode("UTF-8") | |
120 | except UnicodeDecodeError: | |
f3c0d7a5 A |
121 | file_is_utf8 = False |
122 | ||
123 | if not file_is_utf8 and actual_mime_type.find("utf-8") >= 0: | |
124 | print "Error: %s is not valid utf-8, but has a utf-8 mime type." % file_name | |
125 | return actual_mime_type | |
126 | ||
127 | if file_is_utf8 and actual_mime_type.find("charset") >=0 and actual_mime_type.find("utf-8") < 0: | |
128 | print "Warning: %s is valid utf-8, but has a mime-type of %s." % (file_name, actual_mime_type) | |
729e4ab9 | 129 | |
f3c0d7a5 A |
130 | if ord(bytes[0]) == 0xef: |
131 | if not file_name.endswith(".txt"): | |
132 | print "Warning: file %s contains a UTF-8 BOM: " % file_name | |
133 | ||
134 | # If the file already has a charset in its mime-type, don't make any change. | |
135 | ||
136 | if actual_mime_type.find("charset=") >= 0: | |
137 | return actual_mime_type; | |
729e4ab9 | 138 | |
f3c0d7a5 | 139 | return base_mime_type |
729e4ab9 A |
140 | |
141 | ||
142 | def main(argv): | |
143 | fix_problems = False; | |
144 | try: | |
145 | opts, args = getopt.getopt(argv, "fh", ("fix", "help")) | |
146 | except getopt.GetoptError: | |
147 | print "unrecognized option: " + argv[0] | |
148 | usage() | |
149 | sys.exit(2) | |
150 | for opt, arg in opts: | |
151 | if opt in ("-h", "--help"): | |
152 | usage() | |
153 | sys.exit() | |
154 | if opt in ("-f", "--fix"): | |
155 | fix_problems = True | |
156 | if args: | |
157 | print "unexpected command line argument" | |
158 | usage() | |
159 | sys.exit() | |
160 | ||
161 | parse_auto_props() | |
162 | output = runCommand("svn ls -R "); | |
163 | file_list = output.splitlines() | |
164 | ||
165 | for f in file_list: | |
166 | if os.path.isdir(f): | |
167 | # print "Skipping dir " + f | |
168 | continue | |
169 | if not os.path.isfile(f): | |
170 | print "Repository file not in working copy: " + f | |
171 | continue; | |
172 | ||
173 | for file_pattern, props in file_types: | |
174 | if re.match(file_pattern, f): | |
175 | # print "doing " + f | |
176 | for propname, propval in props: | |
177 | actual_propval = runCommand("svn propget --strict " + propname + " " + f) | |
178 | #print propname + ": " + actual_propval | |
179 | if propname == "svn:mime-type" and propval.find("text/") == 0: | |
180 | # check for UTF-8 text files, should have svn:mime-type=text/something; charset=utf8 | |
181 | propval = check_utf8(f, propval, actual_propval) | |
182 | if not (propval == actual_propval or (propval == "" and actual_propval == "*")): | |
183 | print "svn propset %s '%s' %s" % (propname, propval, f) | |
184 | if fix_problems: | |
185 | os.system("svn propset %s '%s' %s" % (propname, propval, f)) | |
729e4ab9 A |
186 | |
187 | ||
188 | if __name__ == "__main__": | |
189 | main(sys.argv[1:]) |