Last active 1670065438

A tool to generate checksums for all files in current directory and notify when mismatches with an existing file are found

recursive_checksum.py Raw
1#!/usr/bin/env python
2
3import os
4import json
5from hashlib import md5
6
7rootdir = os.getcwd()
8
9CHECKSUMS = "checksums.json"
10
11try:
12 with open(CHECKSUMS) as f:
13 output = json.load(f)
14except:
15 output = {}
16
17def md5sum(filename):
18 """ Opens a file and progressively generates an MD5 hash
19 from its contents, avoiding loading the complete
20 contents into ram at once
21 http://stackoverflow.com/a/24847608 """
22 hash = md5()
23 with open(filename, "rb") as f:
24 for chunk in iter(lambda: f.read(128 * hash.block_size), b""):
25 hash.update(chunk)
26 return hash.hexdigest()
27
28def main():
29 """ Iterate over files in subdirectories of current dir, use
30 md5sum() to generate checksums for each, append to a dict
31 for inclusion in checksums.json (name configurable) or
32 checks an existing checksums.json for conflicts """
33 for folder, subs, files in os.walk(rootdir):
34 for filename in files:
35 if filename == CHECKSUMS:
36 continue
37 fn = os.path.join(folder, filename)[len(rootdir)+1:]
38 if fn in output:
39 if output[fn] != md5sum(os.path.join(folder, filename)):
40 # replace with code to redownload given file?
41 print("Mismatch on {}".format(fn))
42 else:
43 output[os.path.join(folder, filename)[len(rootdir)+1:]] = \
44 md5sum(os.path.join(folder, filename))
45
46 # Pretty prints json to output file
47 with open(os.path.join(rootdir, CHECKSUMS), "w") as f:
48 json.dump(output, f, indent=4)
49
50main()