]>
git.saurik.com Git - apple/icu.git/blob - icuSources/tools/icu-svnprops-check.py
3 # Copyright (C) 2016 and later: Unicode, Inc. and others.
4 # License & terms of use: http://www.unicode.org/copyright.html
6 # Copyright (C) 2009-2011, International Business Machines Corporation, Google and Others.
10 # Script to check and fix svn property settings for ICU source files.
11 # Also check for the correct line endings on files with svn:eol-style = native
13 # THIS SCRIPT DOES NOT WORK ON WINDOWS
14 # It only works correctly on platforms where the native line ending is a plain \n
17 # icu-svnprops-check.py [options]
20 # -f | --fix Fix any problems that are found
21 # -h | --help Print a usage line and exit.
23 # The tool operates recursively on the directory from which it is run.
24 # Only files from the svn repository are checked.
25 # No changes are made to the repository; only the working copy will be altered.
34 # file_types: The parsed form of the svn auto-props specification.
35 # A list of file types - .cc, .cpp, .txt, etc.
36 # each element is a [type, proplist]
37 # "type" is a regular expression string that will match a file name
38 # prop list is another list, one element per property.
39 # Each property item is a two element list, [prop name, prop value]
42 def parse_auto_props():
43 aprops
= svn_auto_props
.splitlines()
44 for propline
in aprops
:
45 if re
.match("\s*(#.*)?$", propline
): # Match comment and blank lines
47 if re
.match("\s*\[auto-props\]", propline
): # Match the [auto-props] line.
49 if not re
.match("\s*[^\s]+\s*=", propline
): # minimal syntax check for <file-type> =
50 print "Bad line from autoprops definitions: " + propline
52 file_type
, string_proplist
= propline
.split("=", 1)
54 #transform the file type expression from autoprops into a normal regular expression.
55 # e.g. "*.cpp" ==> ".*\.cpp$"
56 file_type
= file_type
.strip()
57 file_type
= file_type
.replace(".", "\.")
58 file_type
= file_type
.replace("*", ".*")
59 file_type
= file_type
+ "$"
61 # example string_proplist at this point: " svn:eol-style=native;svn:executable"
62 # split on ';' into a list of properties. The negative lookahead and lookbehind
63 # in the split regexp are to prevent matching on ';;', which is an escaped ';'
64 # within a property value.
65 string_proplist
= re
.split("(?<!;);(?!;)", string_proplist
)
67 for prop
in string_proplist
:
68 if prop
.find("=") >= 0:
69 prop_name
, prop_val
= prop
.split("=", 1)
71 # properties with no explicit value, e.g. svn:executable
72 prop_name
, prop_val
= prop
, ""
73 prop_name
= prop_name
.strip()
74 prop_val
= prop_val
.strip()
75 # unescape any ";;" in a property value, e.g. the mime-type from
76 # *.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8
77 prop_val
= prop_val
.replace(";;", ";");
78 # If the prop value "is quoted", remove the quotes.
79 # See svn:keywords for an example of a quoted prop value.
80 match
= re
.match('^"(.+)"$', prop_val
)
82 prop_val
= match
.group(1)
83 proplist
.append((prop_name
, prop_val
))
85 file_types
.append((file_type
, proplist
))
90 output_file
= os
.popen(cmd
);
91 output_text
= output_file
.read();
92 exit_status
= output_file
.close();
94 print >>sys
.stderr
, '"', cmd
, '" failed. Exiting.'
98 svn_auto_props
= runCommand("svn propget svn:auto-props http://source.icu-project.org/repos/icu")
101 print "usage: " + sys
.argv
[0] + " [-f | --fix] [-h | --help]"
105 # UTF-8 file check. For text files with svn:mime-type=text/anything, check the specified charset
106 # file_name: name of a text file.
107 # base_mime_type: svn:mime-type property from the auto-props settings for this file type.
108 # actual_mime_type: existing svn:mime-type property value for the file.
109 # return: The correct svn:mime-type property value,
110 # either the original, if it looks OK, otherwise the value from auto-props
112 def check_utf8(file_name
, base_mime_type
, actual_mime_type
):
114 f
= open(file_name
, 'r')
119 bytes.decode("UTF-8")
120 except UnicodeDecodeError:
123 if not file_is_utf8
and actual_mime_type
.find("utf-8") >= 0:
124 print "Error: %s is not valid utf-8, but has a utf-8 mime type." % file_name
125 return actual_mime_type
127 if file_is_utf8
and actual_mime_type
.find("charset") >=0 and actual_mime_type
.find("utf-8") < 0:
128 print "Warning: %s is valid utf-8, but has a mime-type of %s." % (file_name
, actual_mime_type
)
130 if ord(bytes[0]) == 0xef:
131 if not file_name
.endswith(".txt"):
132 print "Warning: file %s contains a UTF-8 BOM: " % file_name
134 # If the file already has a charset in its mime-type, don't make any change.
136 if actual_mime_type
.find("charset=") >= 0:
137 return actual_mime_type
;
139 return base_mime_type
143 fix_problems
= False;
145 opts
, args
= getopt
.getopt(argv
, "fh", ("fix", "help"))
146 except getopt
.GetoptError
:
147 print "unrecognized option: " + argv
[0]
150 for opt
, arg
in opts
:
151 if opt
in ("-h", "--help"):
154 if opt
in ("-f", "--fix"):
157 print "unexpected command line argument"
162 output
= runCommand("svn ls -R ");
163 file_list
= output
.splitlines()
167 # print "Skipping dir " + f
169 if not os
.path
.isfile(f
):
170 print "Repository file not in working copy: " + f
173 for file_pattern
, props
in file_types
:
174 if re
.match(file_pattern
, f
):
176 for propname
, propval
in props
:
177 actual_propval
= runCommand("svn propget --strict " + propname
+ " " + f
)
178 #print propname + ": " + actual_propval
179 if propname
== "svn:mime-type" and propval
.find("text/") == 0:
180 # check for UTF-8 text files, should have svn:mime-type=text/something; charset=utf8
181 propval
= check_utf8(f
, propval
, actual_propval
)
182 if not (propval
== actual_propval
or (propval
== "" and actual_propval
== "*")):
183 print "svn propset %s '%s' %s" % (propname
, propval
, f
)
185 os
.system("svn propset %s '%s' %s" % (propname
, propval
, f
))
188 if __name__
== "__main__":