recursive_checksum.py
· 1.6 KiB · Python
Raw
#!/usr/bin/env python
import os
import json
from hashlib import md5
rootdir = os.getcwd()
CHECKSUMS = "checksums.json"
try:
with open(CHECKSUMS) as f:
output = json.load(f)
except:
output = {}
def md5sum(filename):
""" Opens a file and progressively generates an MD5 hash
from its contents, avoiding loading the complete
contents into ram at once
http://stackoverflow.com/a/24847608 """
hash = md5()
with open(filename, "rb") as f:
for chunk in iter(lambda: f.read(128 * hash.block_size), b""):
hash.update(chunk)
return hash.hexdigest()
def main():
""" Iterate over files in subdirectories of current dir, use
md5sum() to generate checksums for each, append to a dict
for inclusion in checksums.json (name configurable) or
checks an existing checksums.json for conflicts """
for folder, subs, files in os.walk(rootdir):
for filename in files:
if filename == CHECKSUMS:
continue
fn = os.path.join(folder, filename)[len(rootdir)+1:]
if fn in output:
if output[fn] != md5sum(os.path.join(folder, filename)):
# replace with code to redownload given file?
print("Mismatch on {}".format(fn))
else:
output[os.path.join(folder, filename)[len(rootdir)+1:]] = \
md5sum(os.path.join(folder, filename))
# Pretty prints json to output file
with open(os.path.join(rootdir, CHECKSUMS), "w") as f:
json.dump(output, f, indent=4)
main()
| 1 | #!/usr/bin/env python |
| 2 | |
| 3 | import os |
| 4 | import json |
| 5 | from hashlib import md5 |
| 6 | |
| 7 | rootdir = os.getcwd() |
| 8 | |
| 9 | CHECKSUMS = "checksums.json" |
| 10 | |
| 11 | try: |
| 12 | with open(CHECKSUMS) as f: |
| 13 | output = json.load(f) |
| 14 | except: |
| 15 | output = {} |
| 16 | |
| 17 | def md5sum(filename): |
| 18 | """ Opens a file and progressively generates an MD5 hash |
| 19 | from its contents, avoiding loading the complete |
| 20 | contents into ram at once |
| 21 | http://stackoverflow.com/a/24847608 """ |
| 22 | hash = md5() |
| 23 | with open(filename, "rb") as f: |
| 24 | for chunk in iter(lambda: f.read(128 * hash.block_size), b""): |
| 25 | hash.update(chunk) |
| 26 | return hash.hexdigest() |
| 27 | |
| 28 | def main(): |
| 29 | """ Iterate over files in subdirectories of current dir, use |
| 30 | md5sum() to generate checksums for each, append to a dict |
| 31 | for inclusion in checksums.json (name configurable) or |
| 32 | checks an existing checksums.json for conflicts """ |
| 33 | for folder, subs, files in os.walk(rootdir): |
| 34 | for filename in files: |
| 35 | if filename == CHECKSUMS: |
| 36 | continue |
| 37 | fn = os.path.join(folder, filename)[len(rootdir)+1:] |
| 38 | if fn in output: |
| 39 | if output[fn] != md5sum(os.path.join(folder, filename)): |
| 40 | # replace with code to redownload given file? |
| 41 | print("Mismatch on {}".format(fn)) |
| 42 | else: |
| 43 | output[os.path.join(folder, filename)[len(rootdir)+1:]] = \ |
| 44 | md5sum(os.path.join(folder, filename)) |
| 45 | |
| 46 | # Pretty prints json to output file |
| 47 | with open(os.path.join(rootdir, CHECKSUMS), "w") as f: |
| 48 | json.dump(output, f, indent=4) |
| 49 | |
| 50 | main() |