1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
| import os import shutil import hashlib from PIL import Image import sys
def get_file_hash(file_path): """计算文件的MD5哈希值,用于比较文件是否相同""" hash_md5 = hashlib.md5() with open(file_path, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): hash_md5.update(chunk) return hash_md5.hexdigest()
def convert_to_jpeg(image_path, output_path): """将图片转换为JPEG格式""" try: img = Image.open(image_path) if img.mode != 'RGB': img = img.convert('RGB') img.save(output_path, 'JPEG', quality=95) return True except Exception as e: print(f"转换图片失败 {image_path}: {e}") return False
def main(): if len(sys.argv) != 2: print("请提供主文件夹路径") print("使用方法: python image_organizer.py <主文件夹路径>") sys.exit(1) main_folder = sys.argv[1] sub_folder = os.path.join(main_folder, "可用图片-网上收集") if not os.path.exists(main_folder): print(f"主文件夹不存在: {main_folder}") sys.exit(1) if not os.path.exists(sub_folder): print(f"子文件夹不存在: {sub_folder}") sys.exit(1) main_folder_hashes = {} main_image_files = [] for file in os.listdir(main_folder): file_path = os.path.join(main_folder, file) if os.path.isfile(file_path): ext = os.path.splitext(file)[1].lower() if ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp']: main_image_files.append(file_path) main_folder_hashes[get_file_hash(file_path)] = file_path rename_counter = 6290 processed_files = [] duplicate_files = [] for file in os.listdir(sub_folder): file_path = os.path.join(sub_folder, file) if os.path.isfile(file_path): ext = os.path.splitext(file)[1].lower() if ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp']: file_hash = get_file_hash(file_path) if file_hash in main_folder_hashes: duplicate_files.append(file_path) print(f"发现重复文件,将删除: {file_path}") continue new_file_name = f"{rename_counter:07d}.jpg" new_file_path = os.path.join(main_folder, new_file_name) if ext in ['.jpg', '.jpeg']: shutil.copy2(file_path, new_file_path) else: if convert_to_jpeg(file_path, new_file_path): processed_files.append(new_file_path) rename_counter += 1 for file_path in duplicate_files: try: os.remove(file_path) except Exception as e: print(f"删除重复文件失败 {file_path}: {e}") try: shutil.rmtree(sub_folder) print(f"已删除子文件夹: {sub_folder}") except Exception as e: print(f"删除子文件夹失败: {e}") print(f"\n处理完成!") print(f"主文件夹中的图片数量: {len(main_image_files)}") print(f"子文件夹中处理的图片数量: {len(processed_files)}") print(f"发现并删除的重复图片数量: {len(duplicate_files)}")
if __name__ == "__main__": main()
|