recursive_checksum.py
                        
                             · 1.6 KiB · Python
                        
                    
                    
                      
                        Raw
                      
                      
                        
                          
                        
                    
                    
                
                
            #!/usr/bin/env python
import os
import json
from hashlib import md5
rootdir = os.getcwd()
CHECKSUMS = "checksums.json"
try:
    with open(CHECKSUMS) as f:
        output = json.load(f)
except:
    output = {}
def md5sum(filename):
    """ Opens a file and progressively generates an MD5 hash
        from its contents, avoiding loading the complete
        contents into ram at once
        http://stackoverflow.com/a/24847608 """
    hash = md5()
    with open(filename, "rb") as f:
        for chunk in iter(lambda: f.read(128 * hash.block_size), b""):
            hash.update(chunk)
    return hash.hexdigest()
def main():
    """ Iterate over files in subdirectories of current dir, use
        md5sum() to generate checksums for each, append to a dict
        for inclusion in checksums.json (name configurable) or
        checks an existing checksums.json for conflicts """
    for folder, subs, files in os.walk(rootdir):
        for filename in files:
            if filename == CHECKSUMS:
                continue
            fn = os.path.join(folder, filename)[len(rootdir)+1:]
            if fn in output:
                if output[fn] != md5sum(os.path.join(folder, filename)):
                    # replace with code to redownload given file?
                    print("Mismatch on {}".format(fn))
            else:
                output[os.path.join(folder, filename)[len(rootdir)+1:]] = \
                    md5sum(os.path.join(folder, filename))
    # Pretty prints json to output file
    with open(os.path.join(rootdir, CHECKSUMS), "w") as f:
        json.dump(output, f, indent=4)
main()
                | 1 | #!/usr/bin/env python | 
| 2 | |
| 3 | import os | 
| 4 | import json | 
| 5 | from hashlib import md5 | 
| 6 | |
| 7 | rootdir = os.getcwd() | 
| 8 | |
| 9 | CHECKSUMS = "checksums.json" | 
| 10 | |
| 11 | try: | 
| 12 | with open(CHECKSUMS) as f: | 
| 13 | output = json.load(f) | 
| 14 | except: | 
| 15 | output = {} | 
| 16 | |
| 17 | def md5sum(filename): | 
| 18 | """ Opens a file and progressively generates an MD5 hash | 
| 19 | from its contents, avoiding loading the complete | 
| 20 | contents into ram at once | 
| 21 | http://stackoverflow.com/a/24847608 """ | 
| 22 | hash = md5() | 
| 23 | with open(filename, "rb") as f: | 
| 24 | for chunk in iter(lambda: f.read(128 * hash.block_size), b""): | 
| 25 | hash.update(chunk) | 
| 26 | return hash.hexdigest() | 
| 27 | |
| 28 | def main(): | 
| 29 | """ Iterate over files in subdirectories of current dir, use | 
| 30 | md5sum() to generate checksums for each, append to a dict | 
| 31 | for inclusion in checksums.json (name configurable) or | 
| 32 | checks an existing checksums.json for conflicts """ | 
| 33 | for folder, subs, files in os.walk(rootdir): | 
| 34 | for filename in files: | 
| 35 | if filename == CHECKSUMS: | 
| 36 | continue | 
| 37 | fn = os.path.join(folder, filename)[len(rootdir)+1:] | 
| 38 | if fn in output: | 
| 39 | if output[fn] != md5sum(os.path.join(folder, filename)): | 
| 40 | # replace with code to redownload given file? | 
| 41 | print("Mismatch on {}".format(fn)) | 
| 42 | else: | 
| 43 | output[os.path.join(folder, filename)[len(rootdir)+1:]] = \ | 
| 44 | md5sum(os.path.join(folder, filename)) | 
| 45 | |
| 46 | # Pretty prints json to output file | 
| 47 | with open(os.path.join(rootdir, CHECKSUMS), "w") as f: | 
| 48 | json.dump(output, f, indent=4) | 
| 49 | |
| 50 | main() |