]>
git.saurik.com Git - apple/icu.git/blob - icuSources/tools/icu-file-utf8-check.py
3 # Copyright (C) 2016 and later: Unicode, Inc. and others.
4 # License & terms of use: http://www.unicode.org/copyright.html
6 # Copyright (C) 2009-2011, International Business Machines Corporation, Google and Others.
10 # Script to check that ICU source files contain only valid UTF-8 encoded text,
11 # and that all files except '.txt' files do not contain a Byte Order Mark (BOM).
13 # THIS SCRIPT DOES NOT WORK ON WINDOWS
14 # It only works correctly on platforms where the native line ending is a plain \n
17 # icu-file-utf8-check.py [options]
20 # -h | --help Print a usage line and exit.
22 # The tool operates recursively on the directory from which it is run.
23 # Only files from the ICU github repository are checked.
24 # No changes are made to the repository; only the working copy will be altered.
26 from __future__
import print_function
36 output_file
= os
.popen(cmd
);
37 output_text
= output_file
.read();
38 exit_status
= output_file
.close();
40 print('"', cmd
, '" failed. Exiting.', file=sys
.stderr
)
46 print("usage: " + sys
.argv
[0] + " [-h | --help]")
50 # File check. Check source code files for UTF-8 and all except text files for not containing a BOM
51 # file_name: name of a text file.
52 # is_source: Flag, set to True if file is a source code file (.c, .cpp, .h, .java).
54 def check_file(file_name
, is_source
):
55 f
= open(file_name
, 'rb')
62 except UnicodeDecodeError:
63 print("Error: %s is a source code file but contains non-utf-8 bytes." % file_name
)
66 if not (file_name
.endswith(".txt") or file_name
.endswith(".sln")
67 or file_name
.endswith(".targets")
68 or ".vcxproj" in file_name
):
69 print("Warning: file %s contains a UTF-8 BOM: " % file_name
)
75 opts
, args
= getopt
.getopt(argv
, "h", ("help"))
76 except getopt
.GetoptError
:
77 print("unrecognized option: " + argv
[0])
81 if opt
in ("-h", "--help"):
85 print("unexpected command line argument")
89 output
= runCommand("git ls-files ");
90 file_list
= output
.splitlines()
92 source_file_re
= re
.compile(".*((?:\\.c$)|(?:\\.cpp$)|(?:\\.h$)|(?:\\.java$))")
96 print("Skipping dir " + f
)
98 if not os
.path
.isfile(f
):
99 print("Repository file not in working copy: " + f
)
102 source_file
= source_file_re
.match(f
)
103 check_file(f
, source_file
)
105 if __name__
== "__main__":