-
Notifications
You must be signed in to change notification settings - Fork 0
/
LZ78.py
70 lines (62 loc) · 1.91 KB
/
LZ78.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# LZ78 implementation
# Université de Fribourg, Suisse
# AUTHOR: Noé Zufferey - [email protected]
# CREATION: april 2018
# USAGE: python3 LZ78.py '<string to encode>'
import sys
def compress(data):
comp_data = []
dictionnary = ['']
word = ''
i = 0
for char in data:
i += 1
word += char
if not word in dictionnary:
dictionnary.append(word)
comp_data.append([dictionnary.index(word[:-1]), word[-1]])
word = ''
elif i == len(data):
comp_data.append([dictionnary.index(word)])
word = ''
return comp_data
def add_zeros(code, nbr):
pre = ''
i = 0
while i < nbr - len(code):
pre += '0'
i += 1
return pre + code
def to_bits(data, h=False):
len_ind = 1
result = ''
first_round = True
for word in data:
if not first_round:
pre = add_zeros(bin(word[0])[2:], len_ind)
result += pre
len_ind = len(pre)
if h and (word[1] != '') : result += ','
else:
first_round = False
next_char = add_zeros(bin(ord(word[1]))[2:], 8) if not (word[1] == '') else ''
result += next_char
if h : result += '|'
return result
if __name__ == '__main__':
with open('freq17_320ver.txt', 'r') as content_file:
content = content_file.read()
comp_data = compress(content)
comp_data = str(comp_data)
f= open("freq17_com.txt","w+")
print(comp_data[3])
for i in range(len(comp_data)):
#To ignore [ , '
if comp_data[i] != "[":
if comp_data[i] != ",":
if comp_data[i] != "'":
if comp_data[i] != " ":
f.write(comp_data[i])
#print(comp_data)
#print(to_bits(comp_data, True))
#print(to_bits(comp_data))