# -*- coding: utf-8 -*-
import sys
import os
import time
import hashlib
# 扫描目录(可修改)
target_path = "./"
# 存储已存在的文件MD5,用于快速查重
exist_md5 = set()
# 统计
total_files = 0
del_files = 0
# 获取当前脚本绝对路径,避免删除自己
script_path = os.path.abspath(sys.argv[0])
def get_file_md5(file_path):
"""
计算文件MD5(支持大文件,分块读取)
"""
md5 = hashlib.md5()
try:
with open(file_path, 'rb') as f:
# 每次读取 4MB,避免内存溢出
while chunk := f.read(4 * 1024 * 1024):
md5.update(chunk)
return md5.hexdigest()
except Exception as e:
print(f"[无法读取] {file_path} => {str(e)}")
return None
def scan_files(path):
"""
递归遍历所有文件
"""
file_list = []
for root, dirs, files in os.walk(path):
for file in files:
file_path = os.path.abspath(os.path.join(root, file))
# 跳过自身脚本
if file_path == script_path:
continue
file_list.append(file_path)
return file_list
def main():
global total_files, del_files
print(f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] 开始扫描重复文件...")
# 获取所有文件
all_files = scan_files(target_path)
for file in all_files:
total_files += 1
print(f"[{total_files}] 检查: {file}")
# 计算MD5
md5 = get_file_md5(file)
if not md5:
continue
# 判断是否重复
if md5 in exist_md5:
try:
os.remove(file)
del_files += 1
print(f"[重复已删除] => {file}")
except:
print(f"[删除失败] => {file}")
else:
exist_md5.add(md5)
# 最终统计
print("\n" + "="*50)
print(f"扫描完成!")
print(f"总文件数: {total_files}")
print(f"唯一文件数: {len(exist_md5)}")
print(f"删除重复文件: {del_files} 个")
print("="*50)
if __name__ == "__main__":
main()
本文地址:https://zhaoshuman.cn/%E6%8A%80%E6%9C%AF%E5%88%86%E4%BA%AB/12.html
免责声明:本文为原创文章,版权归 zhaoshuman 所有,欢迎分享本文,转载请保留出处!
免责声明:本文为原创文章,版权归 zhaoshuman 所有,欢迎分享本文,转载请保留出处!
发表评论