-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlab3.py
167 lines (142 loc) · 3.97 KB
/
lab3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# Created by: Kevin Nguyen
# Class: Special Topics in CS
# Lab 3
import operator, sys, json, hashlib, os, struct
from heapq import merge
from math import exp
def calculateHistogram(lst):
histogram = {}
i = 0
while i < len(lst):
# keys must be a string
char = str(lst[i])
if char not in histogram:
histogram[char] = 1
else:
histogram[char] += 1
i += 1
return histogram
"""
The histogram should be an array of tuples where the tuples contain (key, value)
"""
def formatHist(hist):
formattedHist = []
for key,value in hist.items():
formattedHist.append((key,value))
return formattedHist
"""
Huff compression assigns a binary code to each key/char
"""
def huffCompress(hist):
lst = sorted(hist, key = lambda y: y[1])
while len(lst) > 1:
fst = lst[0]
snd = lst[1]
val = fst[1] + snd[1]
key = [fst[0], snd[0]]
lst = lst[2:]
lst.append([key,val])
lst = sorted(lst, key = lambda y: y[1])
tree = lst[0][0]
code = []
huffCodeGenerator(tree, "", code)
dictCode = dict(code)
return dictCode
def huffCodeGenerator(inputList, side, code):
# could've been a numpy.float32 so if it's anything but a list then it's over
if not isinstance(inputList, list):
code.append((inputList, side))
else:
left = huffCodeGenerator(inputList[0], side + "0", code)
right = huffCodeGenerator(inputList[1], side + "1", code)
"""
the keys should be the char order value not the actual char itself
"""
def formatCodeDict(codeDict):
printableHist = {}
for key,value in codeDict.items():
printableHist[key] = value
return printableHist
"""
Given any input, outputs the JSON equivalent
"""
def JSONFormatter(input):
return json.dumps(input)
"""
Given a textfile and the dictionary of char to binary value,
convert it into a sequence of binary bits
"""
def textFileToBinary(lst, printableHist):
encodedFile = ""
i = 0
while i < len(lst):
# keys must be a string
char = str(lst[i])
if not char:
break
charVal = char
encodedFile += printableHist[charVal]
i += 1
isMultipleOfEight = 8 - (len(encodedFile) % 8)
if isMultipleOfEight != 8:
for i in range(isMultipleOfEight):
encodedFile += "0"
return encodedFile
"""
Given an 8 bit binary string, generate the integer equivalent
"""
def binStringToInt(binString):
reversedString = binString[::-1]
val = 0
for i in range(len(reversedString)):
val += (int(reversedString[i]) * pow(2, i))
return val
"""
Create a header for the compressed file
"""
def writeToNewFile(out, headerMap, JSONHist, encodedFile):
# Write everything to new file
f = open(out, "wb")
f.write(headerMap + "\n")
f.write(JSONHist + "\n")
for i in range(0,len(encodedFile), 8):
f.write(struct.pack("B",binStringToInt(encodedFile[i:i+8])))
f.close()
return
def fullHuffman(lst, headerMap, out):
try:
hist = calculateHistogram(lst)
formattedHist = formatHist(hist)
codeDict = huffCompress(formattedHist)
printableHist = formatCodeDict(codeDict)
JSONHist = JSONFormatter(printableHist)
JSONHeaderMap = JSONFormatter(headerMap)
#sys.stdout.write(JSONHist + "\n")
encodedFile = textFileToBinary(lst, printableHist)
writeToNewFile(out, JSONHeaderMap, JSONHist, encodedFile)
return codeDict
except IOError:
print "\nError. Quitting..."
return
def decode(input_file_name, output_file_name, code_dict):
#quantization factor. Might change
input_file = open(input_file_name, 'rb')
decode_dict = {v.encode() : k for k, v in code_dict.iteritems()}
#print decode_dict
binary_data = input_file.read()
binary_string = ""
for byte in binary_data:
binary_string += format(ord(byte),'08b')
decdoed_data = []
while len(decdoed_data) != 8:
sub_str = ""
i = 0
while sub_str not in decode_dict:
sub_str = binary_string[0:i]
i=i+1
decdoed_data += [int(decode_dict[sub_str])]
binary_string = binary_string[i-1:]
print decdoed_data
if __name__ == '__main__':
codeDict = fullHuffman([1,1,1,1,2,2,3,1], "t", "test.txt")
decode("test.txt", "testDecoded.txt", codeDict)