From 822df59443a255cf04e79f9d66dfa3a2f9bebfa9 Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Mon, 3 Feb 2025 14:24:59 +0000 Subject: [PATCH] wire format.tensors="hex" into /document/v1 API --- .../com/yahoo/search/result/FeatureData.java | 6 +-- .../json/JsonSerializationHelper.java | 9 +++- .../com/yahoo/document/json/JsonWriter.java | 48 +++++++++++++++---- .../resource/DocumentV1ApiHandler.java | 39 +++++++++++---- .../tensor/serialization/JsonFormat.java | 5 ++ 5 files changed, 85 insertions(+), 22 deletions(-) diff --git a/container-search/src/main/java/com/yahoo/search/result/FeatureData.java b/container-search/src/main/java/com/yahoo/search/result/FeatureData.java index 09b98623d9a0..8de48ac8e270 100644 --- a/container-search/src/main/java/com/yahoo/search/result/FeatureData.java +++ b/container-search/src/main/java/com/yahoo/search/result/FeatureData.java @@ -68,15 +68,15 @@ public Inspector inspect() { @Override public String toJson() { - return toJson(new JsonFormat.EncodeOptions(false, false, false)); + return toJson(new JsonFormat.EncodeOptions()); } public String toJson(boolean tensorShortForm) { - return toJson(new JsonFormat.EncodeOptions(tensorShortForm, false, false)); + return toJson(new JsonFormat.EncodeOptions(tensorShortForm)); } public String toJson(boolean tensorShortForm, boolean tensorDirectValues) { - return toJson(new JsonFormat.EncodeOptions(tensorShortForm, tensorDirectValues, false)); + return toJson(new JsonFormat.EncodeOptions(tensorShortForm, tensorDirectValues)); } public String toJson(JsonFormat.EncodeOptions tensorOptions) { diff --git a/document/src/main/java/com/yahoo/document/json/JsonSerializationHelper.java b/document/src/main/java/com/yahoo/document/json/JsonSerializationHelper.java index ed6bdc721a02..c35418d88d32 100644 --- a/document/src/main/java/com/yahoo/document/json/JsonSerializationHelper.java +++ b/document/src/main/java/com/yahoo/document/json/JsonSerializationHelper.java @@ -72,13 +72,20 @@ static void wrapIOException(SubroutineThrowingIOException lambda) { } } + // TODO: remove public static void serializeTensorField(JsonGenerator generator, FieldBase field, TensorFieldValue value, boolean shortForm, boolean directValues) { + serializeTensorField(generator, field, value, new JsonFormat.EncodeOptions(shortForm, directValues)); + + } + + public static void serializeTensorField(JsonGenerator generator, FieldBase field, TensorFieldValue value, + JsonFormat.EncodeOptions tensorOptions) { wrapIOException(() -> { fieldNameIfNotNull(generator, field); if (value.getTensor().isPresent()) { Tensor tensor = value.getTensor().get(); - byte[] encoded = JsonFormat.encode(tensor, shortForm, directValues); + byte[] encoded = JsonFormat.encode(tensor, tensorOptions); generator.writeRawValue(new String(encoded, StandardCharsets.UTF_8)); } else { diff --git a/document/src/main/java/com/yahoo/document/json/JsonWriter.java b/document/src/main/java/com/yahoo/document/json/JsonWriter.java index 2b0ba1384664..d9eec1020d19 100644 --- a/document/src/main/java/com/yahoo/document/json/JsonWriter.java +++ b/document/src/main/java/com/yahoo/document/json/JsonWriter.java @@ -31,6 +31,7 @@ import com.yahoo.document.datatypes.TensorFieldValue; import com.yahoo.document.datatypes.WeightedSet; import com.yahoo.document.serialization.DocumentWriter; +import com.yahoo.tensor.serialization.JsonFormat; import com.yahoo.vespa.objects.FieldBase; import com.yahoo.vespa.objects.Serializer; @@ -83,9 +84,7 @@ public class JsonWriter implements DocumentWriter { .build(); private final JsonGenerator generator; - - private final boolean tensorShortForm; - private final boolean tensorDirectValues; + private final JsonFormat.EncodeOptions tensorOptions; /** * Creates a JsonWriter. @@ -97,26 +96,42 @@ public JsonWriter(OutputStream out) { this(createPrivateGenerator(out)); } + // do not use public JsonWriter(OutputStream out, boolean tensorShortForm, boolean tensorDirectValues) { this(createPrivateGenerator(out), tensorShortForm, tensorDirectValues); } + public JsonWriter(OutputStream out, JsonFormat.EncodeOptions tensorOptions) { + this(createPrivateGenerator(out), tensorOptions); + } + /** * Create a Document writer which will write to the input JSON generator. - * JsonWriter will not close the generator and only flush it explicitly - * after having written a full Document instance. In other words, JsonWriter - * will not take ownership of the generator. + * TODO: remove. * * @param generator the output JSON generator * @param tensorShortForm whether to use the short type-dependent form for tensor values * @param tensorDirectValues whether to output tensor values directly or wrapped in a map also containing the type */ public JsonWriter(JsonGenerator generator, boolean tensorShortForm, boolean tensorDirectValues) { + this(generator, new JsonFormat.EncodeOptions(tensorShortForm, tensorDirectValues)); + } + + /** + * Create a Document writer which will write to the input JSON generator. + * JsonWriter will not close the generator and only flush it explicitly + * after having written a full Document instance. In other words, JsonWriter + * will not take ownership of the generator. + * + * @param generator the output JSON generator + * @param tensorOptions tensor formatting options (short/long, direct/wrapped, hexdump or not) + */ + public JsonWriter(JsonGenerator generator, JsonFormat.EncodeOptions tensorOptions) { this.generator = generator; - this.tensorShortForm = tensorShortForm; - this.tensorDirectValues = tensorDirectValues; + this.tensorOptions = tensorOptions; } + private static JsonGenerator createPrivateGenerator(OutputStream out) { try { return jsonFactory.createGenerator(out); @@ -221,7 +236,7 @@ public void write(FieldBase field, StringFieldValue value) { @Override public void write(FieldBase field, TensorFieldValue value) { - serializeTensorField(generator, field, value, tensorShortForm, tensorDirectValues); + serializeTensorField(generator, field, value, tensorOptions); } @Override @@ -287,6 +302,7 @@ public void write(DocumentUpdate documentUpdate) { /** * Utility method to easily serialize a single document. + * TODO: remove * * @param document the document to be serialized * @param tensorShortForm whether tensors should be serialized in a type-dependent short form @@ -301,6 +317,20 @@ public static byte[] toByteArray(Document document, boolean tensorShortForm, boo return out.toByteArray(); } + /** + * Utility method to easily serialize a single document. + * + * @param document the document to be serialized + * @param tensorOptions tensor formatting options (short/long, direct/wrapped, hexdump or not) + * @return the input document serialised as UTF-8 encoded JSON + */ + public static byte[] toByteArray(Document document, JsonFormat.EncodeOptions tensorOptions) { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + JsonWriter writer = new JsonWriter(out, tensorOptions); + writer.write(document); + return out.toByteArray(); + } + /** * Utility method to easily serialize a single document. * diff --git a/vespaclient-container-plugin/src/main/java/com/yahoo/document/restapi/resource/DocumentV1ApiHandler.java b/vespaclient-container-plugin/src/main/java/com/yahoo/document/restapi/resource/DocumentV1ApiHandler.java index 6cc389c3612c..17a63473ff79 100644 --- a/vespaclient-container-plugin/src/main/java/com/yahoo/document/restapi/resource/DocumentV1ApiHandler.java +++ b/vespaclient-container-plugin/src/main/java/com/yahoo/document/restapi/resource/DocumentV1ApiHandler.java @@ -70,6 +70,7 @@ import com.yahoo.metrics.simple.MetricReceiver; import com.yahoo.restapi.Path; import com.yahoo.search.query.ParameterParser; +import com.yahoo.tensor.serialization.JsonFormat; import com.yahoo.text.Text; import com.yahoo.vespa.config.content.AllClustersBucketSpacesConfig; import com.yahoo.vespa.http.server.Headers; @@ -771,22 +772,42 @@ private void writeTrace(TraceNode node) throws IOException { } } + private JsonFormat.EncodeOptions tensorOptions() { + // TODO: Flip default on Vespa 9 to "short-value" + String format = "short"; + if (request != null && request.parameters().containsKey("format.tensors")) { + var params = request.parameters().get("format.tensors"); + if (params.size() == 1) { + format = params.get(0); + } + } + return switch (format) { + case "hex" -> + new JsonFormat.EncodeOptions(true, false, true); + case "hex-value" -> + new JsonFormat.EncodeOptions(true, true, true); + default -> + // aka "short" + new JsonFormat.EncodeOptions(true, false, false); + case "short-value" -> + new JsonFormat.EncodeOptions(true, true, false); + case "long" -> + new JsonFormat.EncodeOptions(false, false, false); + case "long-value" -> + new JsonFormat.EncodeOptions(false, true, false); + }; + } + private boolean tensorShortForm() { - return request == null || - !request.parameters().containsKey("format.tensors") || - (!request.parameters().get("format.tensors").contains("long") - && !request.parameters().get("format.tensors").contains("long-value"));// default + return tensorOptions().shortForm(); } private boolean tensorDirectValues() { - return request != null && - request.parameters().containsKey("format.tensors") && - (request.parameters().get("format.tensors").contains("short-value") - || request.parameters().get("format.tensors").contains("long-value"));// TODO: Flip default on Vespa 9 + return tensorOptions().directValues(); } synchronized void writeSingleDocument(Document document) throws IOException { - new JsonWriter(json, tensorShortForm(), tensorDirectValues()).writeFields(document); + new JsonWriter(json, tensorOptions()).writeFields(document); } synchronized void writeDocumentsArrayStart() throws IOException { diff --git a/vespajlib/src/main/java/com/yahoo/tensor/serialization/JsonFormat.java b/vespajlib/src/main/java/com/yahoo/tensor/serialization/JsonFormat.java index 43a4136cb641..718d528c241e 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/serialization/JsonFormat.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/serialization/JsonFormat.java @@ -35,6 +35,11 @@ public class JsonFormat { /** Options for encode */ public record EncodeOptions(boolean shortForm, boolean directValues, boolean hexForDensePart) { // TODO - consider "compact" flag + public EncodeOptions() { this(false); } + public EncodeOptions(boolean shortForm) { this(shortForm, false); } + public EncodeOptions(boolean shortForm, boolean directValues) { + this(shortForm, directValues, false); + } } /** * Serializes the given tensor value into JSON format.