From ecb02df07d38182c4bb7d7228d52899e14474404 Mon Sep 17 00:00:00 2001 From: mh-northlander Date: Thu, 24 Oct 2024 12:15:44 +0900 Subject: [PATCH 1/2] add byteorder arg to readAllBytes --- src/main/java/com/worksap/nlp/sudachi/Config.java | 6 ++++-- .../java/com/worksap/nlp/sudachi/StringUtil.java | 12 +++++++++++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/worksap/nlp/sudachi/Config.java b/src/main/java/com/worksap/nlp/sudachi/Config.java index f18fd53d..28b2fc43 100644 --- a/src/main/java/com/worksap/nlp/sudachi/Config.java +++ b/src/main/java/com/worksap/nlp/sudachi/Config.java @@ -29,6 +29,7 @@ import java.lang.reflect.InvocationTargetException; import java.net.URL; import java.nio.ByteBuffer; +import java.nio.ByteOrder; import java.nio.file.Files; import java.nio.file.Path; import java.util.*; @@ -866,7 +867,8 @@ public InputStream asInputStream() throws IOException { /** * Get view of this resource as a ByteBuffer. When it is possible, the data will * be memory mapped, if it is not possible, it will be fully read into the - * memory. Will not work for files more than 2^31 bytes (2 GB) in size. + * memory. Will not work for files more than 2^31 bytes (2 GB) in size. The + * ByteOrder is set to little endian. * * @return ByteBuffer containing the whole contents of the file * @throws IOException @@ -958,7 +960,7 @@ public ByteBuffer asByteBuffer() throws IOException { if (Objects.equals(url.getProtocol(), "file")) { return MMap.map(url.getPath()); } - return StringUtil.readAllBytes(url); + return StringUtil.readAllBytes(url, ByteOrder.LITTLE_ENDIAN); } @Override diff --git a/src/main/java/com/worksap/nlp/sudachi/StringUtil.java b/src/main/java/com/worksap/nlp/sudachi/StringUtil.java index c5108c79..af255db7 100644 --- a/src/main/java/com/worksap/nlp/sudachi/StringUtil.java +++ b/src/main/java/com/worksap/nlp/sudachi/StringUtil.java @@ -21,6 +21,7 @@ import java.io.InputStreamReader; import java.net.URL; import java.nio.ByteBuffer; +import java.nio.ByteOrder; import java.nio.CharBuffer; import java.nio.charset.StandardCharsets; import java.nio.file.Files; @@ -56,12 +57,20 @@ public static String readFully(InputStream stream) throws IOException { } public static ByteBuffer readAllBytes(URL url) throws IOException { + return readAllBytes(url, ByteOrder.BIG_ENDIAN); + } + + public static ByteBuffer readAllBytes(URL url, ByteOrder order) throws IOException { try (InputStream is = url.openStream()) { - return readAllBytes(is); + return readAllBytes(is, order); } } public static ByteBuffer readAllBytes(InputStream inputStream) throws IOException { + return readAllBytes(inputStream, ByteOrder.BIG_ENDIAN); + } + + public static ByteBuffer readAllBytes(InputStream inputStream, ByteOrder order) throws IOException { byte[] buffer = new byte[inputStream.available() + 1024]; int offset = 0; @@ -78,6 +87,7 @@ public static ByteBuffer readAllBytes(InputStream inputStream) throws IOExceptio } ByteBuffer bbuf = ByteBuffer.wrap(buffer); bbuf.limit(offset); + bbuf.order(order); return bbuf; } } From 8b540b10562ab6fdb20210b689b1a7bc87531a42 Mon Sep 17 00:00:00 2001 From: mh-northlander Date: Wed, 30 Oct 2024 16:13:39 +0900 Subject: [PATCH 2/2] set default byte order to little endian --- src/main/java/com/worksap/nlp/sudachi/Config.java | 2 +- src/main/java/com/worksap/nlp/sudachi/StringUtil.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/worksap/nlp/sudachi/Config.java b/src/main/java/com/worksap/nlp/sudachi/Config.java index 28b2fc43..9a1c9e7c 100644 --- a/src/main/java/com/worksap/nlp/sudachi/Config.java +++ b/src/main/java/com/worksap/nlp/sudachi/Config.java @@ -960,7 +960,7 @@ public ByteBuffer asByteBuffer() throws IOException { if (Objects.equals(url.getProtocol(), "file")) { return MMap.map(url.getPath()); } - return StringUtil.readAllBytes(url, ByteOrder.LITTLE_ENDIAN); + return StringUtil.readAllBytes(url); } @Override diff --git a/src/main/java/com/worksap/nlp/sudachi/StringUtil.java b/src/main/java/com/worksap/nlp/sudachi/StringUtil.java index af255db7..b5c811d4 100644 --- a/src/main/java/com/worksap/nlp/sudachi/StringUtil.java +++ b/src/main/java/com/worksap/nlp/sudachi/StringUtil.java @@ -57,7 +57,7 @@ public static String readFully(InputStream stream) throws IOException { } public static ByteBuffer readAllBytes(URL url) throws IOException { - return readAllBytes(url, ByteOrder.BIG_ENDIAN); + return readAllBytes(url, ByteOrder.LITTLE_ENDIAN); } public static ByteBuffer readAllBytes(URL url, ByteOrder order) throws IOException { @@ -67,7 +67,7 @@ public static ByteBuffer readAllBytes(URL url, ByteOrder order) throws IOExcepti } public static ByteBuffer readAllBytes(InputStream inputStream) throws IOException { - return readAllBytes(inputStream, ByteOrder.BIG_ENDIAN); + return readAllBytes(inputStream, ByteOrder.LITTLE_ENDIAN); } public static ByteBuffer readAllBytes(InputStream inputStream, ByteOrder order) throws IOException {