]>
Commit | Line | Data |
---|---|---|
1e4a197e RD |
1 | """distutils.filelist |
2 | ||
3 | Provides the FileList class, used for poking about the filesystem | |
4 | and building lists of files. | |
5 | """ | |
6 | ||
7 | # This module should be kept compatible with Python 1.5.2. | |
8 | ||
9 | __revision__ = "$Id$" | |
10 | ||
11 | import os, string, re | |
12 | import fnmatch | |
13 | from types import * | |
14 | from glob import glob | |
15 | from distutils.util import convert_path | |
16 | from distutils.errors import DistutilsTemplateError, DistutilsInternalError | |
17 | from distutils import log | |
18 | ||
19 | class FileList: | |
20 | ||
21 | """A list of files built by on exploring the filesystem and filtered by | |
22 | applying various patterns to what we find there. | |
23 | ||
24 | Instance attributes: | |
25 | dir | |
26 | directory from which files will be taken -- only used if | |
27 | 'allfiles' not supplied to constructor | |
28 | files | |
29 | list of filenames currently being built/filtered/manipulated | |
30 | allfiles | |
31 | complete list of files under consideration (ie. without any | |
32 | filtering applied) | |
33 | """ | |
34 | ||
35 | def __init__(self, | |
36 | warn=None, | |
37 | debug_print=None): | |
38 | # ignore argument to FileList, but keep them for backwards | |
39 | # compatibility | |
40 | ||
41 | self.allfiles = None | |
42 | self.files = [] | |
43 | ||
44 | def set_allfiles (self, allfiles): | |
45 | self.allfiles = allfiles | |
46 | ||
47 | def findall (self, dir=os.curdir): | |
48 | self.allfiles = findall(dir) | |
49 | ||
50 | def debug_print (self, msg): | |
51 | """Print 'msg' to stdout if the global DEBUG (taken from the | |
52 | DISTUTILS_DEBUG environment variable) flag is true. | |
53 | """ | |
54 | from distutils.debug import DEBUG | |
55 | if DEBUG: | |
56 | print msg | |
57 | ||
58 | # -- List-like methods --------------------------------------------- | |
59 | ||
60 | def append (self, item): | |
61 | self.files.append(item) | |
62 | ||
63 | def extend (self, items): | |
64 | self.files.extend(items) | |
65 | ||
66 | def sort (self): | |
67 | # Not a strict lexical sort! | |
68 | sortable_files = map(os.path.split, self.files) | |
69 | sortable_files.sort() | |
70 | self.files = [] | |
71 | for sort_tuple in sortable_files: | |
72 | self.files.append(apply(os.path.join, sort_tuple)) | |
73 | ||
74 | ||
75 | # -- Other miscellaneous utility methods --------------------------- | |
76 | ||
77 | def remove_duplicates (self): | |
78 | # Assumes list has been sorted! | |
79 | for i in range(len(self.files) - 1, 0, -1): | |
80 | if self.files[i] == self.files[i - 1]: | |
81 | del self.files[i] | |
82 | ||
83 | ||
84 | # -- "File template" methods --------------------------------------- | |
85 | ||
86 | def _parse_template_line (self, line): | |
87 | words = string.split(line) | |
88 | action = words[0] | |
89 | ||
90 | patterns = dir = dir_pattern = None | |
91 | ||
92 | if action in ('include', 'exclude', | |
93 | 'global-include', 'global-exclude'): | |
94 | if len(words) < 2: | |
95 | raise DistutilsTemplateError, \ | |
96 | "'%s' expects <pattern1> <pattern2> ..." % action | |
97 | ||
98 | patterns = map(convert_path, words[1:]) | |
99 | ||
100 | elif action in ('recursive-include', 'recursive-exclude'): | |
101 | if len(words) < 3: | |
102 | raise DistutilsTemplateError, \ | |
103 | "'%s' expects <dir> <pattern1> <pattern2> ..." % action | |
104 | ||
105 | dir = convert_path(words[1]) | |
106 | patterns = map(convert_path, words[2:]) | |
107 | ||
108 | elif action in ('graft', 'prune'): | |
109 | if len(words) != 2: | |
110 | raise DistutilsTemplateError, \ | |
111 | "'%s' expects a single <dir_pattern>" % action | |
112 | ||
113 | dir_pattern = convert_path(words[1]) | |
114 | ||
115 | else: | |
116 | raise DistutilsTemplateError, "unknown action '%s'" % action | |
117 | ||
118 | return (action, patterns, dir, dir_pattern) | |
119 | ||
120 | # _parse_template_line () | |
121 | ||
122 | ||
123 | def process_template_line (self, line): | |
124 | ||
125 | # Parse the line: split it up, make sure the right number of words | |
126 | # is there, and return the relevant words. 'action' is always | |
127 | # defined: it's the first word of the line. Which of the other | |
128 | # three are defined depends on the action; it'll be either | |
129 | # patterns, (dir and patterns), or (dir_pattern). | |
130 | (action, patterns, dir, dir_pattern) = self._parse_template_line(line) | |
131 | ||
132 | # OK, now we know that the action is valid and we have the | |
133 | # right number of words on the line for that action -- so we | |
134 | # can proceed with minimal error-checking. | |
135 | if action == 'include': | |
136 | self.debug_print("include " + string.join(patterns)) | |
137 | for pattern in patterns: | |
138 | if not self.include_pattern(pattern, anchor=1): | |
139 | log.warn("warning: no files found matching '%s'", | |
140 | pattern) | |
141 | ||
142 | elif action == 'exclude': | |
143 | self.debug_print("exclude " + string.join(patterns)) | |
144 | for pattern in patterns: | |
145 | if not self.exclude_pattern(pattern, anchor=1): | |
146 | log.warn(("warning: no previously-included files " | |
147 | "found matching '%s'"), pattern) | |
148 | ||
149 | elif action == 'global-include': | |
150 | self.debug_print("global-include " + string.join(patterns)) | |
151 | for pattern in patterns: | |
152 | if not self.include_pattern(pattern, anchor=0): | |
153 | log.warn(("warning: no files found matching '%s' " + | |
154 | "anywhere in distribution"), pattern) | |
155 | ||
156 | elif action == 'global-exclude': | |
157 | self.debug_print("global-exclude " + string.join(patterns)) | |
158 | for pattern in patterns: | |
159 | if not self.exclude_pattern(pattern, anchor=0): | |
160 | log.warn(("warning: no previously-included files matching " | |
161 | "'%s' found anywhere in distribution"), | |
162 | pattern) | |
163 | ||
164 | elif action == 'recursive-include': | |
165 | self.debug_print("recursive-include %s %s" % | |
166 | (dir, string.join(patterns))) | |
167 | for pattern in patterns: | |
168 | if not self.include_pattern(pattern, prefix=dir): | |
169 | log.warn(("warngin: no files found matching '%s' " + | |
170 | "under directory '%s'"), | |
171 | pattern, dir) | |
172 | ||
173 | elif action == 'recursive-exclude': | |
174 | self.debug_print("recursive-exclude %s %s" % | |
175 | (dir, string.join(patterns))) | |
176 | for pattern in patterns: | |
177 | if not self.exclude_pattern(pattern, prefix=dir): | |
178 | log.warn(("warning: no previously-included files matching " | |
179 | "'%s' found under directory '%s'"), | |
180 | pattern, dir) | |
181 | ||
182 | elif action == 'graft': | |
183 | self.debug_print("graft " + dir_pattern) | |
184 | if not self.include_pattern(None, prefix=dir_pattern): | |
185 | log.warn("warning: no directories found matching '%s'", | |
186 | dir_pattern) | |
187 | ||
188 | elif action == 'prune': | |
189 | self.debug_print("prune " + dir_pattern) | |
190 | if not self.exclude_pattern(None, prefix=dir_pattern): | |
191 | log.warn(("no previously-included directories found " + | |
192 | "matching '%s'"), dir_pattern) | |
193 | else: | |
194 | raise DistutilsInternalError, \ | |
195 | "this cannot happen: invalid action '%s'" % action | |
196 | ||
197 | # process_template_line () | |
198 | ||
199 | ||
200 | # -- Filtering/selection methods ----------------------------------- | |
201 | ||
202 | def include_pattern (self, pattern, | |
203 | anchor=1, prefix=None, is_regex=0): | |
204 | """Select strings (presumably filenames) from 'self.files' that | |
205 | match 'pattern', a Unix-style wildcard (glob) pattern. Patterns | |
206 | are not quite the same as implemented by the 'fnmatch' module: '*' | |
207 | and '?' match non-special characters, where "special" is platform- | |
208 | dependent: slash on Unix; colon, slash, and backslash on | |
209 | DOS/Windows; and colon on Mac OS. | |
210 | ||
211 | If 'anchor' is true (the default), then the pattern match is more | |
212 | stringent: "*.py" will match "foo.py" but not "foo/bar.py". If | |
213 | 'anchor' is false, both of these will match. | |
214 | ||
215 | If 'prefix' is supplied, then only filenames starting with 'prefix' | |
216 | (itself a pattern) and ending with 'pattern', with anything in between | |
217 | them, will match. 'anchor' is ignored in this case. | |
218 | ||
219 | If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and | |
220 | 'pattern' is assumed to be either a string containing a regex or a | |
221 | regex object -- no translation is done, the regex is just compiled | |
222 | and used as-is. | |
223 | ||
224 | Selected strings will be added to self.files. | |
225 | ||
226 | Return 1 if files are found. | |
227 | """ | |
228 | files_found = 0 | |
229 | pattern_re = translate_pattern(pattern, anchor, prefix, is_regex) | |
230 | self.debug_print("include_pattern: applying regex r'%s'" % | |
231 | pattern_re.pattern) | |
232 | ||
233 | # delayed loading of allfiles list | |
234 | if self.allfiles is None: | |
235 | self.findall() | |
236 | ||
237 | for name in self.allfiles: | |
238 | if pattern_re.search(name): | |
239 | self.debug_print(" adding " + name) | |
240 | self.files.append(name) | |
241 | files_found = 1 | |
242 | ||
243 | return files_found | |
244 | ||
245 | # include_pattern () | |
246 | ||
247 | ||
248 | def exclude_pattern (self, pattern, | |
249 | anchor=1, prefix=None, is_regex=0): | |
250 | """Remove strings (presumably filenames) from 'files' that match | |
251 | 'pattern'. Other parameters are the same as for | |
252 | 'include_pattern()', above. | |
253 | The list 'self.files' is modified in place. | |
254 | Return 1 if files are found. | |
255 | """ | |
256 | files_found = 0 | |
257 | pattern_re = translate_pattern(pattern, anchor, prefix, is_regex) | |
258 | self.debug_print("exclude_pattern: applying regex r'%s'" % | |
259 | pattern_re.pattern) | |
260 | for i in range(len(self.files)-1, -1, -1): | |
261 | if pattern_re.search(self.files[i]): | |
262 | self.debug_print(" removing " + self.files[i]) | |
263 | del self.files[i] | |
264 | files_found = 1 | |
265 | ||
266 | return files_found | |
267 | ||
268 | # exclude_pattern () | |
269 | ||
270 | # class FileList | |
271 | ||
272 | ||
273 | # ---------------------------------------------------------------------- | |
274 | # Utility functions | |
275 | ||
276 | def findall (dir = os.curdir): | |
277 | """Find all files under 'dir' and return the list of full filenames | |
278 | (relative to 'dir'). | |
279 | """ | |
280 | from stat import ST_MODE, S_ISREG, S_ISDIR, S_ISLNK | |
281 | ||
282 | list = [] | |
283 | stack = [dir] | |
284 | pop = stack.pop | |
285 | push = stack.append | |
286 | ||
287 | while stack: | |
288 | dir = pop() | |
289 | names = os.listdir(dir) | |
290 | ||
291 | for name in names: | |
292 | if dir != os.curdir: # avoid the dreaded "./" syndrome | |
293 | fullname = os.path.join(dir, name) | |
294 | else: | |
295 | fullname = name | |
296 | ||
297 | # Avoid excess stat calls -- just one will do, thank you! | |
298 | stat = os.stat(fullname) | |
299 | mode = stat[ST_MODE] | |
300 | if S_ISREG(mode): | |
301 | list.append(fullname) | |
302 | elif S_ISDIR(mode) and not S_ISLNK(mode): | |
303 | push(fullname) | |
304 | ||
305 | return list | |
306 | ||
307 | ||
308 | def glob_to_re (pattern): | |
309 | """Translate a shell-like glob pattern to a regular expression; return | |
310 | a string containing the regex. Differs from 'fnmatch.translate()' in | |
311 | that '*' does not match "special characters" (which are | |
312 | platform-specific). | |
313 | """ | |
314 | pattern_re = fnmatch.translate(pattern) | |
315 | ||
316 | # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which | |
317 | # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix, | |
318 | # and by extension they shouldn't match such "special characters" under | |
319 | # any OS. So change all non-escaped dots in the RE to match any | |
320 | # character except the special characters. | |
321 | # XXX currently the "special characters" are just slash -- i.e. this is | |
322 | # Unix-only. | |
323 | pattern_re = re.sub(r'(^|[^\\])\.', r'\1[^/]', pattern_re) | |
324 | return pattern_re | |
325 | ||
326 | # glob_to_re () | |
327 | ||
328 | ||
329 | def translate_pattern (pattern, anchor=1, prefix=None, is_regex=0): | |
330 | """Translate a shell-like wildcard pattern to a compiled regular | |
331 | expression. Return the compiled regex. If 'is_regex' true, | |
332 | then 'pattern' is directly compiled to a regex (if it's a string) | |
333 | or just returned as-is (assumes it's a regex object). | |
334 | """ | |
335 | if is_regex: | |
336 | if type(pattern) is StringType: | |
337 | return re.compile(pattern) | |
338 | else: | |
339 | return pattern | |
340 | ||
341 | if pattern: | |
342 | pattern_re = glob_to_re(pattern) | |
343 | else: | |
344 | pattern_re = '' | |
345 | ||
346 | if prefix is not None: | |
347 | prefix_re = (glob_to_re(prefix))[0:-1] # ditch trailing $ | |
348 | pattern_re = "^" + os.path.join(prefix_re, ".*" + pattern_re) | |
349 | else: # no prefix -- respect anchor flag | |
350 | if anchor: | |
351 | pattern_re = "^" + pattern_re | |
352 | ||
353 | return re.compile(pattern_re) | |
354 | ||
355 | # translate_pattern () |