Steven Smith revised this gist . Go to revision
1 file changed, 3 insertions, 1 deletion
recursive_checksum.py
| @@ -45,4 +45,6 @@ def main(): | |||
| 45 | 45 | ||
| 46 | 46 | # Pretty prints json to output file | |
| 47 | 47 | with open(os.path.join(rootdir, CHECKSUMS), "w") as f: | |
| 48 | - | json.dump(output, f, indent=4) | |
| 48 | + | json.dump(output, f, indent=4) | |
| 49 | + | ||
| 50 | + | main() | |
Steven Smith revised this gist . Go to revision
1 file changed, 48 insertions
recursive_checksum.py(file created)
| @@ -0,0 +1,48 @@ | |||
| 1 | + | #!/usr/bin/env python | |
| 2 | + | ||
| 3 | + | import os | |
| 4 | + | import json | |
| 5 | + | from hashlib import md5 | |
| 6 | + | ||
| 7 | + | rootdir = os.getcwd() | |
| 8 | + | ||
| 9 | + | CHECKSUMS = "checksums.json" | |
| 10 | + | ||
| 11 | + | try: | |
| 12 | + | with open(CHECKSUMS) as f: | |
| 13 | + | output = json.load(f) | |
| 14 | + | except: | |
| 15 | + | output = {} | |
| 16 | + | ||
| 17 | + | def md5sum(filename): | |
| 18 | + | """ Opens a file and progressively generates an MD5 hash | |
| 19 | + | from its contents, avoiding loading the complete | |
| 20 | + | contents into ram at once | |
| 21 | + | http://stackoverflow.com/a/24847608 """ | |
| 22 | + | hash = md5() | |
| 23 | + | with open(filename, "rb") as f: | |
| 24 | + | for chunk in iter(lambda: f.read(128 * hash.block_size), b""): | |
| 25 | + | hash.update(chunk) | |
| 26 | + | return hash.hexdigest() | |
| 27 | + | ||
| 28 | + | def main(): | |
| 29 | + | """ Iterate over files in subdirectories of current dir, use | |
| 30 | + | md5sum() to generate checksums for each, append to a dict | |
| 31 | + | for inclusion in checksums.json (name configurable) or | |
| 32 | + | checks an existing checksums.json for conflicts """ | |
| 33 | + | for folder, subs, files in os.walk(rootdir): | |
| 34 | + | for filename in files: | |
| 35 | + | if filename == CHECKSUMS: | |
| 36 | + | continue | |
| 37 | + | fn = os.path.join(folder, filename)[len(rootdir)+1:] | |
| 38 | + | if fn in output: | |
| 39 | + | if output[fn] != md5sum(os.path.join(folder, filename)): | |
| 40 | + | # replace with code to redownload given file? | |
| 41 | + | print("Mismatch on {}".format(fn)) | |
| 42 | + | else: | |
| 43 | + | output[os.path.join(folder, filename)[len(rootdir)+1:]] = \ | |
| 44 | + | md5sum(os.path.join(folder, filename)) | |
| 45 | + | ||
| 46 | + | # Pretty prints json to output file | |
| 47 | + | with open(os.path.join(rootdir, CHECKSUMS), "w") as f: | |
| 48 | + | json.dump(output, f, indent=4) | |