# -*- coding: gbk -*-
# Example Input: F:\, F:\temp
import os
def get_file_info_list(path):
file_info_list = {}
for root, dir_list, file_list in os.walk(path):
for file_name in file_list:
cur_file = os.path.join(root, file_name)
# Added try-except to prevent "windows error" causing by filename parsing error
try:
file_stat = os.stat(cur_file)
file_info = {
'file_size': file_stat[-4],
'file_mtime': file_stat[-2],
}
file_info_list[cur_file.replace(path, '', 1)] = file_info
except:
print "Error: " + cur_file
return file_info_list
def file_contrast(path1, path2):
file_info_list1 = get_file_info_list(path1)
file_info_list2 = get_file_info_list(path2)
for fi in file_info_list1.keys():
if fi in file_info_list2.keys():
if file_info_list1[fi] == file_info_list2[fi]:
del file_info_list1[fi]
del file_info_list2[fi]
different_files = add_root_path(path1, file_info_list1)
different_files += add_root_path(path2, file_info_list2)
different_files.sort()
return different_files
def add_root_path(path, file_info_list):
return [os.path.join(path, name) for name in file_info_list.keys()]
def format_path(path):
return path.rstrip('\\') + '\\'
def write_file(content):
with open("file_contrast_result.txt", "w") as f:
f.writelines([line + os.linesep for line in content])
print "Done!"
if __name__ == '__main__':
path1 = format_path(raw_input('Please input PATH 1: '))
path2 = format_path(raw_input('Please input PATH 2: '))
write_file(file_contrast(path1, path2))
楼主可以采用filecmp模块
谢谢,果然我又自己造轮子了
楼主,你还在么?
如果我想比较文件夹1和文件夹2里的pdf文档是否有重复,并把重复的文件清单列出来(显示在生成的txt中),该如何操作呢?急盼回复:)
对取到的文件名判断下扩展名是否是pdf的,只比较pdf的
楼主,还在吗?急求源码
是指write_file那个函数么?