1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 """File and file-path manipulation utilities.
19
20 :group path manipulation: first_level_directory, relative_path, is_binary,\
21 get_by_ext, remove_dead_links
22 :group file manipulation: norm_read, norm_open, lines, stream_lines, lines,\
23 write_open_mode, ensure_fs_mode, export
24 :sort: path manipulation, file manipulation
25 """
26
27 from __future__ import print_function
28
29 __docformat__ = "restructuredtext en"
30
31 import sys
32 import shutil
33 import mimetypes
34 from os.path import isabs, isdir, islink, split, exists, normpath, join
35 from os.path import abspath
36 from os import sep, mkdir, remove, listdir, stat, chmod, walk
37 from stat import ST_MODE, S_IWRITE
38
39 from logilab.common import STD_BLACKLIST as BASE_BLACKLIST, IGNORED_EXTENSIONS
40 from logilab.common.shellutils import find
41 from logilab.common.deprecation import deprecated
42 from logilab.common.compat import FileIO
43
45 """Return the first level directory of a path.
46
47 >>> first_level_directory('home/syt/work')
48 'home'
49 >>> first_level_directory('/home/syt/work')
50 '/'
51 >>> first_level_directory('work')
52 'work'
53 >>>
54
55 :type path: str
56 :param path: the path for which we want the first level directory
57
58 :rtype: str
59 :return: the first level directory appearing in `path`
60 """
61 head, tail = split(path)
62 while head and tail:
63 head, tail = split(head)
64 if tail:
65 return tail
66
67 return head
68
70 """Lists path's content using absolute paths."""
71 path = abspath(path)
72 return [join(path, filename) for filename in listdir(path)]
73
74
76 """Return true if filename may be a binary file, according to it's
77 extension.
78
79 :type filename: str
80 :param filename: the name of the file
81
82 :rtype: bool
83 :return:
84 true if the file is a binary file (actually if it's mime type
85 isn't beginning by text/)
86 """
87 try:
88 return not mimetypes.guess_type(filename)[0].startswith('text')
89 except AttributeError:
90 return 1
91
92
94 """Return the write mode that should used to open file.
95
96 :type filename: str
97 :param filename: the name of the file
98
99 :rtype: str
100 :return: the mode that should be use to open the file ('w' or 'wb')
101 """
102 if is_binary(filename):
103 return 'wb'
104 return 'w'
105
106
108 """Check that the given file has the given mode(s) set, else try to
109 set it.
110
111 :type filepath: str
112 :param filepath: path of the file
113
114 :type desired_mode: int
115 :param desired_mode:
116 ORed flags describing the desired mode. Use constants from the
117 `stat` module for file permission's modes
118 """
119 mode = stat(filepath)[ST_MODE]
120 if not mode & desired_mode:
121 chmod(filepath, mode | desired_mode)
122
123
124
126 """A special file-object class that automatically does a 'chmod +w' when
127 needed.
128
129 XXX: for now, the way it is done allows 'normal file-objects' to be
130 created during the ProtectedFile object lifetime.
131 One way to circumvent this would be to chmod / unchmod on each
132 write operation.
133
134 One other way would be to :
135
136 - catch the IOError in the __init__
137
138 - if IOError, then create a StringIO object
139
140 - each write operation writes in this StringIO object
141
142 - on close()/del(), write/append the StringIO content to the file and
143 do the chmod only once
144 """
146 self.original_mode = stat(filepath)[ST_MODE]
147 self.mode_changed = False
148 if mode in ('w', 'a', 'wb', 'ab'):
149 if not self.original_mode & S_IWRITE:
150 chmod(filepath, self.original_mode | S_IWRITE)
151 self.mode_changed = True
152 FileIO.__init__(self, filepath, mode)
153
155 """restores the original mode if needed"""
156 if self.mode_changed:
157 chmod(self.name, self.original_mode)
158
159 self.mode_changed = False
160
162 """restore mode before closing"""
163 self._restore_mode()
164 FileIO.close(self)
165
167 if not self.closed:
168 self.close()
169
170
172 """Exception raised by relative path when it's unable to compute relative
173 path between two paths.
174 """
175
177 """Try to get a relative path from `from_file` to `to_file`
178 (path will be absolute if to_file is an absolute file). This function
179 is useful to create link in `from_file` to `to_file`. This typical use
180 case is used in this function description.
181
182 If both files are relative, they're expected to be relative to the same
183 directory.
184
185 >>> relative_path( from_file='toto/index.html', to_file='index.html')
186 '../index.html'
187 >>> relative_path( from_file='index.html', to_file='toto/index.html')
188 'toto/index.html'
189 >>> relative_path( from_file='tutu/index.html', to_file='toto/index.html')
190 '../toto/index.html'
191 >>> relative_path( from_file='toto/index.html', to_file='/index.html')
192 '/index.html'
193 >>> relative_path( from_file='/toto/index.html', to_file='/index.html')
194 '../index.html'
195 >>> relative_path( from_file='/toto/index.html', to_file='/toto/summary.html')
196 'summary.html'
197 >>> relative_path( from_file='index.html', to_file='index.html')
198 ''
199 >>> relative_path( from_file='/index.html', to_file='toto/index.html')
200 Traceback (most recent call last):
201 File "<string>", line 1, in ?
202 File "<stdin>", line 37, in relative_path
203 UnresolvableError
204 >>> relative_path( from_file='/index.html', to_file='/index.html')
205 ''
206 >>>
207
208 :type from_file: str
209 :param from_file: source file (where links will be inserted)
210
211 :type to_file: str
212 :param to_file: target file (on which links point)
213
214 :raise UnresolvableError: if it has been unable to guess a correct path
215
216 :rtype: str
217 :return: the relative path of `to_file` from `from_file`
218 """
219 from_file = normpath(from_file)
220 to_file = normpath(to_file)
221 if from_file == to_file:
222 return ''
223 if isabs(to_file):
224 if not isabs(from_file):
225 return to_file
226 elif isabs(from_file):
227 raise UnresolvableError()
228 from_parts = from_file.split(sep)
229 to_parts = to_file.split(sep)
230 idem = 1
231 result = []
232 while len(from_parts) > 1:
233 dirname = from_parts.pop(0)
234 if idem and len(to_parts) > 1 and dirname == to_parts[0]:
235 to_parts.pop(0)
236 else:
237 idem = 0
238 result.append('..')
239 result += to_parts
240 return sep.join(result)
241
242
244 """Return the content of the file with normalized line feeds.
245
246 :type path: str
247 :param path: path to the file to read
248
249 :rtype: str
250 :return: the content of the file with normalized line feeds
251 """
252 return open(path, 'U').read()
253 norm_read = deprecated("use \"open(path, 'U').read()\"")(norm_read)
254
256 """Return a stream for a file with content with normalized line feeds.
257
258 :type path: str
259 :param path: path to the file to open
260
261 :rtype: file or StringIO
262 :return: the opened file with normalized line feeds
263 """
264 return open(path, 'U')
265 norm_open = deprecated("use \"open(path, 'U')\"")(norm_open)
266
267 -def lines(path, comments=None):
268 """Return a list of non empty lines in the file located at `path`.
269
270 :type path: str
271 :param path: path to the file
272
273 :type comments: str or None
274 :param comments:
275 optional string which can be used to comment a line in the file
276 (i.e. lines starting with this string won't be returned)
277
278 :rtype: list
279 :return:
280 a list of stripped line in the file, without empty and commented
281 lines
282
283 :warning: at some point this function will probably return an iterator
284 """
285 stream = open(path, 'U')
286 result = stream_lines(stream, comments)
287 stream.close()
288 return result
289
290
292 """Return a list of non empty lines in the given `stream`.
293
294 :type stream: object implementing 'xreadlines' or 'readlines'
295 :param stream: file like object
296
297 :type comments: str or None
298 :param comments:
299 optional string which can be used to comment a line in the file
300 (i.e. lines starting with this string won't be returned)
301
302 :rtype: list
303 :return:
304 a list of stripped line in the file, without empty and commented
305 lines
306
307 :warning: at some point this function will probably return an iterator
308 """
309 try:
310 readlines = stream.xreadlines
311 except AttributeError:
312 readlines = stream.readlines
313 result = []
314 for line in readlines():
315 line = line.strip()
316 if line and (comments is None or not line.startswith(comments)):
317 result.append(line)
318 return result
319
320
324 """Make a mirror of `from_dir` in `to_dir`, omitting directories and
325 files listed in the black list or ending with one of the given
326 extensions.
327
328 :type from_dir: str
329 :param from_dir: directory to export
330
331 :type to_dir: str
332 :param to_dir: destination directory
333
334 :type blacklist: list or tuple
335 :param blacklist:
336 list of files or directories to ignore, default to the content of
337 `BASE_BLACKLIST`
338
339 :type ignore_ext: list or tuple
340 :param ignore_ext:
341 list of extensions to ignore, default to the content of
342 `IGNORED_EXTENSIONS`
343
344 :type verbose: bool
345 :param verbose:
346 flag indicating whether information about exported files should be
347 printed to stderr, default to False
348 """
349 try:
350 mkdir(to_dir)
351 except OSError:
352 pass
353
354 for directory, dirnames, filenames in walk(from_dir):
355 for norecurs in blacklist:
356 try:
357 dirnames.remove(norecurs)
358 except ValueError:
359 continue
360 for dirname in dirnames:
361 src = join(directory, dirname)
362 dest = to_dir + src[len(from_dir):]
363 if isdir(src):
364 if not exists(dest):
365 mkdir(dest)
366 for filename in filenames:
367
368
369 if any([filename.endswith(ext) for ext in ignore_ext]):
370 continue
371 src = join(directory, filename)
372 dest = to_dir + src[len(from_dir):]
373 if verbose:
374 print(src, '->', dest, file=sys.stderr)
375 if exists(dest):
376 remove(dest)
377 shutil.copy2(src, dest)
378
379
381 """Recursively traverse directory and remove all dead links.
382
383 :type directory: str
384 :param directory: directory to cleanup
385
386 :type verbose: bool
387 :param verbose:
388 flag indicating whether information about deleted links should be
389 printed to stderr, default to False
390 """
391 for dirpath, dirname, filenames in walk(directory):
392 for filename in dirnames + filenames:
393 src = join(dirpath, filename)
394 if islink(src) and not exists(src):
395 if verbose:
396 print('remove dead link', src)
397 remove(src)
398