Rename artschedrov.yml to artschedrov.yaml
[rms-support-letter.git] / find-duplicates.py
blob04031bd094c43d3f8815293c82741d3c225c4381
1 import os
2 from collections import defaultdict
5 file_name_by_name = defaultdict(list)
6 file_name_by_link = defaultdict(list)
8 for file_name in sorted(os.listdir("_data/signed")):
9 with open(f"_data/signed/{file_name}") as f:
10 contents = f.read().replace("\r", "")
11 name = next(line for line in contents.split("\n") if line.startswith("name:"))[5:].strip()
12 link = next(line for line in contents.split("\n") if line.startswith("link:"))[5:].strip()
13 if name[0] == name[0].lower() or " " in name: # looks like a nickname or a full name
14 file_name_by_name[name].append(file_name)
15 if link != "/#":
16 file_name_by_link[link].append(file_name)
18 for name, file_names in file_name_by_name.items():
19 if len(file_names) == 1:
20 continue
21 print(name, "duplicates:", file_names)
23 for link, file_names in file_name_by_link.items():
24 if len(file_names) == 1:
25 continue
26 print(link, "duplicates:", file_names)