-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathchecksum.py
executable file
·181 lines (153 loc) · 6.69 KB
/
checksum.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
#!/usr/bin/env python3
"""checksum.py
Calculate a sha256 checksum of the concatenation of all .py files in this repository.
[//]: # (markdown comment # noqa)
Usage:
checksum.py [ --ends-with=".py" ]
[ --verbose | -v ]
[ --debug | -d ]
[ -n ]
checksum.py (-h | --help)
[ --verbose | -v ]
[ --debug | -d ]
[ -n ]
Options:
-h --help Show this screen.
--debug -d printouts while running, extra debugging.
--ends-with=".py" defaults to ".py". Allows program to run on other kinds of files.
-n Do not print the trailing newline character for final output.
Example:
$ python checksum.py
A_64_CHARACTER_HEXADECIMAL_STRING_GENERATED_BY_SHA_256_ALGORITHM
$ python checksum.py --version
Checksum 1.0.0
# TEST CASE 1 (verifying basic input/output and concatenation property)
# note that sha256("") = 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855'
# note that sha256("hi") = '8f434346648f6b96df89dda901c5176b10a6d83961dd3c1ac88b59b2dc327aa4'
# note that sha256("hihi") = '27e6f695d734689575e2a063b77668a1fab9c7a83071134630f6a02ebf697592'
$ rm *.TEST_FOO
$ python checksum.py --ends-with=".TEST_FOO"
e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
$ echo -n "hi" > 1.TEST_FOO
$ python checksum.py --ends-with=".TEST_FOO"
8f434346648f6b96df89dda901c5176b10a6d83961dd3c1ac88b59b2dc327aa4
$ echo -n "hi" > 2.TEST_FOO
$ python checksum.py --ends-with=".TEST_FOO"
27e6f695d734689575e2a063b77668a1fab9c7a83071134630f6a02ebf697592
$ rm *.TEST_FOO
# TEST CASE 2 (comparing against another sha256 tool, expect same results)
$ python checksum.py --verbose
...
...
py_files: ['./A.py', './B.py', './C.py', './D/efg.py']
...
...
len(catted_files): 48114
...
A_64_CHARACTER_HEXADECIMAL_STRING_GENERATED_BY_SHA_256_ALGORITHM
$ cat A.py B.py C.py D/efg.py | shasum -a 256 -U
A_64_CHARACTER_HEXADECIMAL_STRING_GENERATED_BY_SHA_256_ALGORITHM
Resources:
* docopt is cool
* http://docopt.org
Changelog:
All notable changes to this file `checksum.py` will be documented here.
The format is _loosely_ based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
## [1.0.0] - 2020-04-18 (latest)
### Added/Changed/Fixed
- what was `checksum.sh` is now `checksum.py`
- much more readable and hopefully future-proof.
- also would work on windows because python is portable.
- also works correctly as shown by simple "hihi" test case in Example
"""
import hashlib
import os
from docopt import docopt
from utils.terminal_colors import print_colored_doc, print_debug, print_verbose
def print_help():
to_color_green_bold = (
"checksum.py",
"(-h | --help)",
)
to_color_yellow_bold = tuple()
to_color_white_bold = (
"Calculate a sha256 checksum of the concatenation of all .py files in this repository.", # noqa
"Usage:",
"Options:",
"Example:",
"Resources:",
"Changelog:",
)
to_color_white_bold_patterns = (r"(\$.*)",)
to_color_red_bold_patterns = (r"(defaults to.*)",)
to_color_grey_out = ("[//]: # (markdown comment # noqa)",)
print_colored_doc(
doc=__doc__,
to_color_green_bold=to_color_green_bold,
to_color_yellow_bold=to_color_yellow_bold,
to_color_white_bold=to_color_white_bold,
to_color_white_bold_patterns=to_color_white_bold_patterns,
to_color_red_bold_patterns=to_color_red_bold_patterns,
to_color_grey_out=to_color_grey_out,
)
if __name__ == "__main__":
# My basic docopt setup...
arguments = docopt(__doc__, version="Checksum 1.0.0", help=False)
VERBOSE = arguments["--verbose"] or arguments["-v"]
DEBUG = arguments["--debug"]
print(arguments) if DEBUG else None
print("VERBOSE:", VERBOSE) if DEBUG else None
print("DEBUG:", DEBUG) if DEBUG else None
if arguments["--help"]:
print_help()
exit()
# main code...
THIS_FILENAME = "checksum.py"
print_debug("THIS_FILENAME", THIS_FILENAME) if DEBUG else None
# ASSUMPTION 1 - this program is running from root directory
# assert this assumption before anything else.
msg = f"{THIS_FILENAME} expects to run in same directory that it exists"
msg += "\n and should run in the ROOT directory."
assert THIS_FILENAME in os.listdir(), msg
# ASSUMPTION 2 - the files we care about can be nested at arbitrary depth
# Recursively get all file pathsfrom current directory downward
FILEPATHS = [
os.path.join(dp, f)
for dp, dn, fn in os.walk(os.path.expanduser("."))
for f in fn
]
print_debug("FILEPATHS", FILEPATHS) if DEBUG else None
print_verbose("FILEPATHS[:5]", FILEPATHS[:5]) if VERBOSE else None
print_verbose("len(FILEPATHS)", len(FILEPATHS)) if VERBOSE or DEBUG else None
# ASSUMPTION 3 - only python files need be considered for checksum
ENDS_WITH = arguments["--ends-with"] or ".py"
py_files = [x for x in filter(lambda x: x.endswith(ENDS_WITH), FILEPATHS)]
print_verbose("py_files", py_files) if VERBOSE or DEBUG else None
print_verbose("len(py_files)", len(py_files)) if VERBOSE or DEBUG else None
# ASSUMPTION 4 - ORDER MATTERS
# once all python filepaths are found from current directory,
# then they will be sorted in alphabetical order
# e.g. py_files: ['./checksum.py', './clubs.py', './main.py',
# './ntfp/ntfp_types.py', './ntfp/__init__.py',
# './ntfp/ntfp.py', './utils/__init__.py',
# './utils/terminal_colors.py']
# sorted_py_files: ['./checksum.py', './clubs.py', './main.py',
# './ntfp/__init__.py', './ntfp/ntfp.py',
# './ntfp/ntfp_types.py', './utils/__init__.py',
# './utils/terminal_colors.py']
sorted_py_files = sorted(py_files)
# ASSUMPTION 5 - the following code equivalent to `cat file.py **.py ...py`
catted_files = b""
for filename in sorted_py_files:
with open(filename, "rb") as f:
catted_files += f.read()
print_debug("catted_files", catted_files) if DEBUG else None
print_verbose("catted_files[:99]", catted_files[:99]) if VERBOSE else None
print_verbose("len(catted_files)", len(catted_files)) if VERBOSE or DEBUG else None
# ASSUMPTION 6 - hashlib.sha256 equiv to `shasum -a 256 -U` on macOS
sha = hashlib.sha256(catted_files).hexdigest()
print_verbose("sha", sha) if VERBOSE or DEBUG else None
if arguments["-n"]:
print(sha, end="")
else:
print(sha)