Skip to content

Commit

Permalink
Implement chunk decompression for #3.
Browse files Browse the repository at this point in the history
  • Loading branch information
bmarwell committed May 20, 2019
1 parent 078b75b commit 6edfaf4
Show file tree
Hide file tree
Showing 17 changed files with 348 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@
import io.github.zchunk.compressedint.CompressedInt;
import io.github.zchunk.compressedint.CompressedIntFactory;
import io.github.zchunk.compression.api.CompressionAlgorithm;
import io.github.zchunk.compression.api.err.DecompressionException;
import java.io.InputStream;
import java.util.function.Function;
import java.util.function.BiFunction;

public class UnknownAlgorithm implements CompressionAlgorithm {

Expand All @@ -35,7 +36,7 @@ public String getName() {
}

@Override
public Function<InputStream, InputStream> getOutputStreamSupplier() {
throw new UnsupportedOperationException("not implemented");
public BiFunction<InputStream, byte[], InputStream> getOutputStreamSupplier() {
throw new DecompressionException("Could not a valid decompressor implementation.");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,8 @@

import io.github.zchunk.compressedint.CompressedInt;
import java.io.InputStream;
import java.util.function.Function;
import org.immutables.value.Value;
import java.util.function.BiFunction;

@Value.Immutable
public interface CompressionAlgorithm {

/**
Expand All @@ -41,6 +39,7 @@ public interface CompressionAlgorithm {
/**
* A method that will take in a stream and output an uncompressed stream.
* @return a stream conversion method.
* @throws io.github.zchunk.compression.api.err.DecompressionException if an exception occurs.
*/
Function<InputStream, InputStream> getOutputStreamSupplier();
BiFunction<InputStream, byte[], InputStream> getOutputStreamSupplier();
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import static java.util.stream.Collectors.toList;
import static java.util.stream.Collectors.toMap;

import io.github.zchunk.compressedint.CompressedInt;
import io.github.zchunk.compression.algo.unknown.UnknownAlgorithm;
import io.github.zchunk.compression.api.internal.ReflectionUtil;
import java.util.AbstractMap;
Expand All @@ -43,7 +44,7 @@ private CompressionAlgorithmFactory() {

private static Map.@Nullable Entry<Long, Class<CompressionAlgorithm>> mapEntryOrNull(final Class<CompressionAlgorithm> clazz) {
return ReflectionUtil.newInstance(clazz)
.map(compInstance -> new AbstractMap.SimpleEntry<>(compInstance.getCompressionTypeValue().getLongValue(), clazz))
.map(compInstance -> new AbstractMap.SimpleEntry<>(compInstance.getCompressionTypeValue().getUnsignedLongValue(), clazz))
.orElse(null);
}

Expand Down Expand Up @@ -72,6 +73,10 @@ private static Map<Long, Class<CompressionAlgorithm>> getTypeMappings() {
return ResourceHolder.newInstance(ROOT_PACKAGE).getTypeMapping();
}

public static CompressionAlgorithm forType(final CompressedInt compressedInt) {
return forType(compressedInt.getLongValue());
}


private static class ResourceHolder {

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package io.github.zchunk.compression.api.err;

public class DecompressionException extends RuntimeException {

private static final long serialVersionUID = 8784156714006343592L;

public DecompressionException(String message) {
super(message);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ public static <T> List<Class<T>> getClasses(final String rootPackage, final Clas
.flatMap(Collection::stream)
.collect(toList());
} catch (final IOException ioEx) {
LOG.log(Level.SEVERE, ioEx, () -> String.format("Unable to load classes in root package [%s].", rootPackage));
return emptyList();
}
}
Expand Down Expand Up @@ -106,10 +107,12 @@ private static <T> List<Class<T>> findClasses(final String packageName, final Cl
private static <T> Optional<Class<T>> loadClass(final String packageName, final Class<T> clazzType, final File file) {
try {
final Class<?> aClass = Class.forName(packageName + '.' + file.getName().substring(0, file.getName().length() - 6));
if (classImplementsCompressionAlgorithm(clazzType).test(aClass)) {
final boolean classImplementsType = classImplementsCompressionAlgorithm(clazzType).test(aClass);

if (classImplementsType) {
@SuppressWarnings("unchecked")
final Class<T> castedClass = (Class<T>) aClass;
return Optional.ofNullable(castedClass);
return Optional.of(castedClass);
}
} catch (final ClassNotFoundException e) {
LOG.log(Level.WARNING, e, () -> String.format("Class file [%s] found, but unable to create instance.", file.getAbsolutePath()));
Expand Down Expand Up @@ -143,7 +146,7 @@ public static <T> Optional<T> newInstance(final Class<T> clazz) {
}

public static <T> Predicate<Class<?>> classImplementsCompressionAlgorithm(final Class<T> type) {
return clazz -> getListFromArray(type.getInterfaces()).contains(type);
return clazz -> getListFromArray(clazz.getInterfaces()).contains(type);
}

}
24 changes: 24 additions & 0 deletions compression/compression-none/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://maven.apache.org/POM/4.0.0"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>zchunk-parent</artifactId>
<groupId>io.github.zchunk</groupId>
<version>1.0.0-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>
<modelVersion>4.0.0</modelVersion>

<artifactId>zchunk-compression-none</artifactId>

<dependencies>
<dependency>
<groupId>io.github.zchunk</groupId>
<artifactId>compression-api</artifactId>
<version>1.0.0-SNAPSHOT</version>
</dependency>
</dependencies>


</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package io.github.zchunk.compression.algo.none;

import io.github.zchunk.compressedint.CompressedInt;
import io.github.zchunk.compressedint.CompressedIntFactory;
import io.github.zchunk.compression.api.CompressionAlgorithm;

import java.io.InputStream;
import java.util.function.BiFunction;

public class NoneCompressionAlgorithm implements CompressionAlgorithm {

@Override
public CompressedInt getCompressionTypeValue() {
return CompressedIntFactory.valueOf(0L);
}

@Override
public String getName() {
return "none";
}

@Override
public BiFunction<InputStream, byte[], InputStream> getOutputStreamSupplier() {
return (a, b) -> a;
}
}
33 changes: 33 additions & 0 deletions compression/compression-zstd/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://maven.apache.org/POM/4.0.0"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>zchunk-parent</artifactId>
<groupId>io.github.zchunk</groupId>
<version>1.0.0-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>
<modelVersion>4.0.0</modelVersion>

<artifactId>zchunk-compression-zstd</artifactId>

<dependencies>
<dependency>
<groupId>io.github.zchunk</groupId>
<artifactId>compression-api</artifactId>
<version>1.0.0-SNAPSHOT</version>
</dependency>

<!-- actual implementation -->
<!-- https://mvnrepository.com/artifact/com.github.luben/zstd-jni -->
<dependency>
<groupId>com.github.luben</groupId>
<artifactId>zstd-jni</artifactId>
<version>1.4.0-1</version>
</dependency>

</dependencies>


</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package io.github.zchunk.compression.algo.zstd;

import com.github.luben.zstd.ZstdInputStream;
import io.github.zchunk.compressedint.CompressedInt;
import io.github.zchunk.compressedint.CompressedIntFactory;
import io.github.zchunk.compression.api.CompressionAlgorithm;
import io.github.zchunk.compression.api.err.DecompressionException;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.function.BiFunction;

public class ZStdCompressionAlgorithm implements CompressionAlgorithm {

private static final CompressedInt TWO = CompressedIntFactory.valueOf(2L);
private static final String ALGORITHM_NAME_ZSTD = "zstd";

@Override
public CompressedInt getCompressionTypeValue() {
return TWO;
}

@Override
public String getName() {
return ALGORITHM_NAME_ZSTD;
}

@Override
public BiFunction<InputStream, byte[], InputStream> getOutputStreamSupplier() {
return createZstdInputStream();
}

private BiFunction<InputStream, byte[], InputStream> createZstdInputStream() {
return (compressedInputStream, dict) -> {
try {
final ZstdInputStream zstdInputStream = new ZstdInputStream(compressedInputStream);
if (!Arrays.equals(new byte[0], dict)) {
zstdInputStream.setDict(dict);
}

return zstdInputStream;
} catch (final IOException e) {
throw new DecompressionException("Unable to create input stream.");
}
};
}


}
13 changes: 13 additions & 0 deletions fileformat/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,19 @@
<version>1.0.0-SNAPSHOT</version>
</dependency>

<!-- this is an optional dependency, only used by tests. -->
<dependency>
<groupId>io.github.zchunk</groupId>
<artifactId>zchunk-compression-none</artifactId>
<version>1.0.0-SNAPSHOT</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>io.github.zchunk</groupId>
<artifactId>zchunk-compression-zstd</artifactId>
<version>1.0.0-SNAPSHOT</version>
<scope>test</scope>
</dependency>

<!-- 3rd party -->
<dependency>
Expand Down
65 changes: 64 additions & 1 deletion fileformat/src/main/java/io/github/zchunk/fileformat/ZChunk.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,21 @@

package io.github.zchunk.fileformat;

import io.github.zchunk.compression.api.CompressionAlgorithm;
import io.github.zchunk.fileformat.err.InvalidFileException;
import io.github.zchunk.fileformat.io.BoundedInputStream;
import io.github.zchunk.fileformat.util.ChecksumUtil;
import io.github.zchunk.fileformat.util.OffsetUtil;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.function.BiFunction;
import java.util.logging.Logger;

public class ZChunk {
public final class ZChunk {

private static final Logger LOG = Logger.getLogger("io.github.zchunk.fileformat.ZChunk");

/**
* Reads in a zchunk file.
Expand Down Expand Up @@ -52,4 +62,57 @@ public static boolean validateFile(final File file) {
&& ChecksumUtil.isValidData(header, file);
}

/**
* Get a chunk info item.
*
* @param header
* the header to extract the info from.
* @param chunkNumber
* the chunk number to extract.
* @return the ZChunk if it was found.
* @throws IllegalArgumentException
* if the chunk was not found.
*/
public static ZChunkHeaderChunkInfo getChunkInfo(final ZChunkHeader header, final long chunkNumber) {
return header.getIndex().getChunkInfoSortedByIndex().stream()
.filter(currChunk -> currChunk.getCurrentIndex() == chunkNumber)
.findFirst().orElseThrow(IllegalArgumentException::new);
}

public static byte[] getDecompressedDict(final ZChunkHeader header, final File input) {
final long offset = OffsetUtil.getDictOffset(header);
final CompressionAlgorithm compressionAlgorithm = header.getPreface().getCompressionAlgorithm();
final BiFunction<InputStream, byte[], InputStream> decompressor = compressionAlgorithm.getOutputStreamSupplier();

try (
final FileInputStream fis = new FileInputStream(input);
final InputStream decompressedStream = decompressor.apply(fis, new byte[0])
) {
fis.skip(offset);
final byte[] dictBuffer = new byte[header.getIndex().getUncompressedDictLength().getIntValue()];
decompressedStream.read(dictBuffer);
return dictBuffer;
} catch (final IOException ioEx) {
final String message = String.format("Unable to read dictionary at offset [%d] from file [%s].", offset, input.getAbsolutePath());
throw new IllegalArgumentException(message);
}

}

public static InputStream getDecompressedChunk(final ZChunkHeader header,
final File testFile,
final byte[] dict,
final long chunkNumber) throws IOException {
final long chunkOffset = OffsetUtil.getChunkOffset(header, chunkNumber);
final ZChunkHeaderChunkInfo chunk = getChunkInfo(header, chunkNumber);
final CompressionAlgorithm compressionAlgorithm = header.getPreface().getCompressionAlgorithm();
final BiFunction<InputStream, byte[], InputStream> decompressor = compressionAlgorithm.getOutputStreamSupplier();

// including skip
final long compressedBytesReadLimit = chunkOffset + chunk.getChunkLength().getLongValue();
final BoundedInputStream fis = new BoundedInputStream(new FileInputStream(testFile), compressedBytesReadLimit);
fis.skip(chunkOffset);

return decompressor.apply(fis, dict);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import io.github.zchunk.compressedint.CompressedIntFactory;
import io.github.zchunk.compression.api.CompressionAlgorithm;
import io.github.zchunk.compression.api.CompressionAlgorithmFactory;
import io.github.zchunk.compression.api.ImmutableCompressionAlgorithm;
import io.github.zchunk.fileformat.err.InvalidFileException;
import io.github.zchunk.fileformat.parser.ZChunkIndexParser;
import io.github.zchunk.fileformat.parser.ZChunkLeadParser;
Expand Down Expand Up @@ -157,11 +156,7 @@ private static ZChunkHeaderPreface getZChunkFileHeaderPrefaceFromParser(final ZC
final CompressedInt prefaceFlagsInt = prefaceParser.readFlagsInt();
final Set<PrefaceFlag> flags = PrefaceFlag.getPrefaceFlags(prefaceFlagsInt);

final CompressionAlgorithm compressionAlgorithm = ImmutableCompressionAlgorithm.builder()
.compressionTypeValue(prefaceParser.readCompressionType())
.name("unknown")
.outputStreamSupplier(a -> a)
.build();
final CompressionAlgorithm compressionAlgorithm = CompressionAlgorithmFactory.forType(prefaceParser.readCompressionType());

return ImmutableZChunkHeaderPreface.builder()
.totalDataChecksum(prefaceParser.readTotalDataCksum())
Expand Down
Loading

0 comments on commit 6edfaf4

Please sign in to comment.