Skip to content

Commit 47d559e

Browse files
committed
new file: Compression/High-level/lz255_file_compression.pl
new file: Compression/High-level/lzbw4_file_compression.pl new file: Compression/High-level/lzhd2_file_compression.pl
1 parent 7755125 commit 47d559e

File tree

4 files changed

+493
-0
lines changed

4 files changed

+493
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
#!/usr/bin/perl
2+
3+
# Author: Trizen
4+
# Date: 15 December 2022
5+
# Edit: 01 September 2024
6+
# https://github.com/trizen
7+
8+
# Compress/decompress files using LZSS compression + MRL + Huffman coding, using a maximum match distance of 255.
9+
10+
use 5.036;
11+
12+
use Getopt::Std qw(getopts);
13+
use File::Basename qw(basename);
14+
use Compression::Util qw(:all);
15+
16+
use constant {
17+
PKGNAME => 'LZ255',
18+
VERSION => '0.01',
19+
FORMAT => 'lz255',
20+
21+
CHUNK_SIZE => 1 << 18, # higher value = better compression
22+
};
23+
24+
# Container signature
25+
use constant SIGNATURE => uc(FORMAT) . chr(1);
26+
27+
sub usage {
28+
my ($code) = @_;
29+
print <<"EOH";
30+
usage: $0 [options] [input file] [output file]
31+
32+
options:
33+
-e : extract
34+
-i <filename> : input filename
35+
-o <filename> : output filename
36+
-r : rewrite output
37+
38+
-v : version number
39+
-h : this message
40+
41+
examples:
42+
$0 document.txt
43+
$0 document.txt archive.${\FORMAT}
44+
$0 archive.${\FORMAT} document.txt
45+
$0 -e -i archive.${\FORMAT} -o document.txt
46+
47+
EOH
48+
49+
exit($code // 0);
50+
}
51+
52+
sub version {
53+
printf("%s %s\n", PKGNAME, VERSION);
54+
exit;
55+
}
56+
57+
sub valid_archive {
58+
my ($fh) = @_;
59+
60+
if (read($fh, (my $sig), length(SIGNATURE), 0) == length(SIGNATURE)) {
61+
$sig eq SIGNATURE || return;
62+
}
63+
64+
return 1;
65+
}
66+
67+
sub main {
68+
my %opt;
69+
getopts('ei:o:vhr', \%opt);
70+
71+
$opt{h} && usage(0);
72+
$opt{v} && version();
73+
74+
my ($input, $output) = @ARGV;
75+
$input //= $opt{i} // usage(2);
76+
$output //= $opt{o};
77+
78+
my $ext = qr{\.${\FORMAT}\z}io;
79+
if ($opt{e} || $input =~ $ext) {
80+
81+
if (not defined $output) {
82+
($output = basename($input)) =~ s{$ext}{}
83+
|| die "$0: no output file specified!\n";
84+
}
85+
86+
if (not $opt{r} and -e $output) {
87+
print "'$output' already exists! -- Replace? [y/N] ";
88+
<STDIN> =~ /^y/i || exit 17;
89+
}
90+
91+
decompress_file($input, $output)
92+
|| die "$0: error: decompression failed!\n";
93+
}
94+
elsif ($input !~ $ext || (defined($output) && $output =~ $ext)) {
95+
$output //= basename($input) . '.' . FORMAT;
96+
compress_file($input, $output)
97+
|| die "$0: error: compression failed!\n";
98+
}
99+
else {
100+
warn "$0: don't know what to do...\n";
101+
usage(1);
102+
}
103+
}
104+
105+
# Compress file
106+
sub compress_file ($input, $output) {
107+
108+
open my $fh, '<:raw', $input
109+
or die "Can't open file <<$input>> for reading: $!";
110+
111+
my $header = SIGNATURE;
112+
113+
# Open the output file for writing
114+
open my $out_fh, '>:raw', $output
115+
or die "Can't open file <<$output>> for write: $!";
116+
117+
# Print the header
118+
print $out_fh $header;
119+
120+
# Compress data
121+
while (read($fh, (my $chunk), CHUNK_SIZE)) {
122+
local $Compression::Util::LZ_MAX_DIST = 255;
123+
print $out_fh lzss_compress($chunk, \&mrl_compress);
124+
}
125+
126+
# Close the file
127+
close $out_fh;
128+
}
129+
130+
# Decompress file
131+
sub decompress_file ($input, $output) {
132+
133+
# Open and validate the input file
134+
open my $fh, '<:raw', $input
135+
or die "Can't open file <<$input>> for reading: $!";
136+
137+
valid_archive($fh) || die "$0: file `$input' is not a \U${\FORMAT}\E v${\VERSION} archive!\n";
138+
139+
# Open the output file
140+
open my $out_fh, '>:raw', $output
141+
or die "Can't open file <<$output>> for writing: $!";
142+
143+
while (!eof($fh)) {
144+
print $out_fh lzss_decompress($fh, \&mrl_decompress_symbolic);
145+
}
146+
147+
# Close the file
148+
close $fh;
149+
close $out_fh;
150+
}
151+
152+
main();
153+
exit(0);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
#!/usr/bin/perl
2+
3+
# Author: Trizen
4+
# Date: 15 December 2022
5+
# Edit: 01 September 2024
6+
# https://github.com/trizen
7+
8+
# Compress/decompress files using LZ77 compression + MRL + BWT + Huffman coding, using a maximum match distance of 255.
9+
10+
use 5.036;
11+
12+
use Getopt::Std qw(getopts);
13+
use File::Basename qw(basename);
14+
use Compression::Util qw(:all);
15+
16+
use constant {
17+
PKGNAME => 'LZBW4',
18+
VERSION => '0.01',
19+
FORMAT => 'lzbw4',
20+
21+
CHUNK_SIZE => 1 << 18, # higher value = better compression
22+
};
23+
24+
# Container signature
25+
use constant SIGNATURE => uc(FORMAT) . chr(1);
26+
27+
sub usage {
28+
my ($code) = @_;
29+
print <<"EOH";
30+
usage: $0 [options] [input file] [output file]
31+
32+
options:
33+
-e : extract
34+
-i <filename> : input filename
35+
-o <filename> : output filename
36+
-r : rewrite output
37+
38+
-v : version number
39+
-h : this message
40+
41+
examples:
42+
$0 document.txt
43+
$0 document.txt archive.${\FORMAT}
44+
$0 archive.${\FORMAT} document.txt
45+
$0 -e -i archive.${\FORMAT} -o document.txt
46+
47+
EOH
48+
49+
exit($code // 0);
50+
}
51+
52+
sub version {
53+
printf("%s %s\n", PKGNAME, VERSION);
54+
exit;
55+
}
56+
57+
sub valid_archive {
58+
my ($fh) = @_;
59+
60+
if (read($fh, (my $sig), length(SIGNATURE), 0) == length(SIGNATURE)) {
61+
$sig eq SIGNATURE || return;
62+
}
63+
64+
return 1;
65+
}
66+
67+
sub main {
68+
my %opt;
69+
getopts('ei:o:vhr', \%opt);
70+
71+
$opt{h} && usage(0);
72+
$opt{v} && version();
73+
74+
my ($input, $output) = @ARGV;
75+
$input //= $opt{i} // usage(2);
76+
$output //= $opt{o};
77+
78+
my $ext = qr{\.${\FORMAT}\z}io;
79+
if ($opt{e} || $input =~ $ext) {
80+
81+
if (not defined $output) {
82+
($output = basename($input)) =~ s{$ext}{}
83+
|| die "$0: no output file specified!\n";
84+
}
85+
86+
if (not $opt{r} and -e $output) {
87+
print "'$output' already exists! -- Replace? [y/N] ";
88+
<STDIN> =~ /^y/i || exit 17;
89+
}
90+
91+
decompress_file($input, $output)
92+
|| die "$0: error: decompression failed!\n";
93+
}
94+
elsif ($input !~ $ext || (defined($output) && $output =~ $ext)) {
95+
$output //= basename($input) . '.' . FORMAT;
96+
compress_file($input, $output)
97+
|| die "$0: error: compression failed!\n";
98+
}
99+
else {
100+
warn "$0: don't know what to do...\n";
101+
usage(1);
102+
}
103+
}
104+
105+
# Compress file
106+
sub compress_file ($input, $output) {
107+
108+
open my $fh, '<:raw', $input
109+
or die "Can't open file <<$input>> for reading: $!";
110+
111+
my $header = SIGNATURE;
112+
113+
# Open the output file for writing
114+
open my $out_fh, '>:raw', $output
115+
or die "Can't open file <<$output>> for write: $!";
116+
117+
# Print the header
118+
print $out_fh $header;
119+
120+
# Compress data
121+
while (read($fh, (my $chunk), CHUNK_SIZE)) {
122+
123+
my ($uncompressed, $distances, $lengths, $matches) = do {
124+
local $Compression::Util::LZ_MAX_DIST = 255;
125+
lz77_encode($chunk);
126+
};
127+
my $est_ratio = length($chunk) / (4 * scalar(@$uncompressed));
128+
say(scalar(@$uncompressed), ' -> ', $est_ratio);
129+
130+
print $out_fh mrl_compress($uncompressed);
131+
print $out_fh fibonacci_encode($lengths);
132+
print $out_fh bwt_compress(symbols2string($matches));
133+
print $out_fh bwt_compress(symbols2string($distances));
134+
}
135+
136+
# Close the file
137+
close $out_fh;
138+
}
139+
140+
# Decompress file
141+
sub decompress_file ($input, $output) {
142+
143+
# Open and validate the input file
144+
open my $fh, '<:raw', $input
145+
or die "Can't open file <<$input>> for reading: $!";
146+
147+
valid_archive($fh) || die "$0: file `$input' is not a \U${\FORMAT}\E v${\VERSION} archive!\n";
148+
149+
# Open the output file
150+
open my $out_fh, '>:raw', $output
151+
or die "Can't open file <<$output>> for writing: $!";
152+
153+
while (!eof($fh)) {
154+
155+
my $uncompressed = mrl_decompress_symbolic($fh);
156+
my $lengths = fibonacci_decode($fh);
157+
my $matches = bwt_decompress_symbolic($fh);
158+
my $distances = bwt_decompress_symbolic($fh);
159+
160+
print $out_fh lz77_decode($uncompressed, $distances, $lengths, $matches);
161+
}
162+
163+
# Close the file
164+
close $fh;
165+
close $out_fh;
166+
}
167+
168+
main();
169+
exit(0);

0 commit comments

Comments
 (0)