-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlz77.c
156 lines (139 loc) · 6.09 KB
/
lz77.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
#include "lz77.h"
bit_string_t *lz77_encode(bit_string_t *src, size_t nw, bit_string_t *window, size_t *longest_match)
{
bit_string_t *result = bit_string_init(src->size * 4);
size_t offset = nw; /* Next bit to code */
size_t first = nw; /* First bit that was not coded yet */
size_t lognw = ceil(log2(nw));
size_t best_match, best_offset, current_match;
size_t tmp_longest_match = 0;
if(window == NULL) {
/* Write first nw bits as is without compression */
/* We write nw bits as is and decoder later uses this information to */
/* recover initial nw */
bit_string_concat_and_destroy(result, cfc_encode(nw));
bit_string_copy(result,src,0,nw);
//DEBUG_PRINT("lz77_encode start nw = %zu lognw = %zu offset = %zu first = %zu src->offset %zu\n",nw,lognw,offset,first,src->offset);
} else {
/* This is not a first block, so the window was passed to this
* function. Encode next block using that window as initial window. We
* do it simply by adding window in front of source bitstring. */
bit_string_t *tmp = bit_string_init(src->size + window->size);
bit_string_concat(tmp,window);
bit_string_concat(tmp,src);
src = tmp;
offset = window->offset; /* Next bit to code and first uncoded bit are */
first = window->offset; /* now right after window */
}
/* After copying first nw bits we start main cycle. We go through our bit */
/* string and search for matches in previous nw bits. If match is found */
/* and it is big enough >log(nw), then we compress it. Otherwise we check */
/* if number of bits we have is not too big. If it becomes bigger than */
/* log(nw), we have to write them uncompressed. */
while(offset < src->offset) {
PRINT_DEBUG("lz77_encode offset %zu first %zu\n",offset,first);
if(offset - first >= lognw) {
PRINT_DEBUG("lz77_encode %zu bits without matches, sending them uncompressed\n",lognw);
bit_string_concat_and_destroy(result, cfc_encode(lognw));
bit_string_copy(result,src,first,lognw);
first += lognw;
}
best_match = best_offset = 0;
for(int i = 1; i < nw; ++i) {
current_match = bit_string_sub_cmp(src,src,offset,offset - i);
if(current_match > best_match) {
best_match = current_match;
best_offset = i;
}
}
PRINT_DEBUG("lz77_encode best_match %zu best_offset %zu\n",best_match,best_offset);
if(tmp_longest_match < best_match) {
tmp_longest_match = best_match;
}
if(best_match > lognw) {
if(first != offset) {
bit_string_concat_and_destroy(result, cfc_encode(offset - first));
bit_string_copy(result,src,first,offset - first);
}
bit_string_concat_and_destroy(result, cfc_encode(best_match));
for(int i = lognw - 1; i >= 0; --i) {
bit_string_append_bit(result, best_offset >> i & 1);
}
offset += best_match;
first = offset;
} else {
offset++;
}
}
if(offset != first) {
PRINT_DEBUG("lz77_encode write %zu last bits\n",offset - first);
bit_string_concat_and_destroy(result, cfc_encode(offset - first));
bit_string_copy(result,src,first,offset - first);
}
if(longest_match != NULL) {
*longest_match = tmp_longest_match;
}
if(window != NULL) {
bit_string_destroy(src);
}
return result;
}
bit_string_t *lz77_decode(bit_string_t *src, size_t size, size_t *enc_size, bit_string_t *window,size_t *window_size)
{
if(window != NULL) {
size += window->offset;
}
bit_string_t *result = bit_string_init(size);
size_t len, data_offset, offset = 0;
if(window == NULL) {
*window_size = cfc_decode(src,&offset);
}
size_t nw = *window_size;
size_t lognw = ceil(log2(nw));
PRINT_DEBUG("lz77_decode nw %zu lognw %zu\n",nw,lognw);
if(window == NULL) {
/* Window is null, therefore this is first block. Copy first nw bits as
* is to result. */
for(int i = 0; i < nw; ++i) {
bit_string_append_bit(result,bit_string_get_bit(src,offset++));
}
} else {
/* Window was passed. Copy it as is to result to be able to decode
* everything properly. After decoding first window bits should be cut. */
for(int i = 0; i < window->offset; ++i) {
bit_string_append_bit(result,bit_string_get_bit(window,i));
}
}
while(offset < src->offset && result->offset < size) {
PRINT_DEBUG("lz77_decode offset %zu src->offset %zu\n",offset,src->offset);
len = cfc_decode(src,&offset);
if(len == 0 || offset > src->offset) {
PRINT_DEBUG("lz77_decode 0 length block or out of bounds - stopping\n");
break;
}
if(len <= lognw) {
// Uncompressed data follows
PRINT_DEBUG("lz77_decode uncompressed data length %zu\n",len);
bit_string_copy(result,src,offset,len);
offset += len;
} else {
// Offset follows (lognw bits number)
data_offset = 0;
for(int i = 0; i < lognw; ++i) {
data_offset = (data_offset << 1) | bit_string_get_bit(src,offset++);
}
PRINT_DEBUG("lz77_decode compressed data legnth %zu offset %zu\n",len,data_offset);
for(int i = 0; i < len; ++i) {
bit_string_append_bit(result,bit_string_get_bit(result,result->offset - data_offset));
}
}
}
*enc_size = offset;
if(window != NULL) {
/* Window is not null, so strip first window size bits of result. */
bit_string_t *tmp = bit_string_substr(result,window->offset,result->offset - window->offset);
bit_string_destroy(result);
result = tmp;
}
return result;
}