Get file size during os.walk
我正在使用
同样的方法,不使用
1 2 3 4 5 6 | for root, dirs, files in os.walk(some_directory): for fn in files: path = os.path.join(root, fn) size = os.stat(path).st_size # in bytes # ... |
getsize(path)可以为您提供文件的文件大小,但是拥有两个相同大小的文件并不意味着它们总是相同的。您可以读取文件的内容,并将其作为MD5或哈希进行比较。
正如其他人所说:你可以用
仅供参考,python3中有一个更高效的解决方案:
1 2 3 4 5 6 7 | import os with os.scandir(rootdir) as it: for entry in it: if entry.is_file(): filepath = entry.path # absolute path filesize = entry.stat().st_size |
有关变量
请注意,上面的内容不是递归的(子文件夹将不会被浏览)。为了获得一个类似于
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 | from collections import namedtuple _wrap_entry = namedtuple( 'DirEntryWrapper', 'name path islink size' ) def scantree( rootdir, follow_links=False, reldir='' ): visited = set() rootdir = normpath(rootdir) with os.scandir(rootdir) as it: for entry in it: if entry.is_dir(): if not entry.is_symlink() or follow_links: absdir = realpath(entry.path) if absdir in visited: continue else: visited.add(absdir) yield from scantree( entry.path, follow_links, pathjoin(reldir,entry.name) ) else: yield _wrap_entry( pathjoin(reldir,entry.name), entry.path, entry.is_symlink(), entry.stat().st_size ) |
并用它作为
1 2 3 | for entry in scantree(rootdir, follow_links=False): filepath = entry.path filesize = entry.size |