Skip to content

Commit

Permalink
Merge pull request #33250 from vespa-engine/arnej/wire-hex-tensors-to…
Browse files Browse the repository at this point in the history
…-document-v1

wire format.tensors="hex" into /document/v1 API
  • Loading branch information
bratseth authored Feb 3, 2025
2 parents b5f590b + 822df59 commit 124622f
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,15 @@ public Inspector inspect() {

@Override
public String toJson() {
return toJson(new JsonFormat.EncodeOptions(false, false, false));
return toJson(new JsonFormat.EncodeOptions());
}

public String toJson(boolean tensorShortForm) {
return toJson(new JsonFormat.EncodeOptions(tensorShortForm, false, false));
return toJson(new JsonFormat.EncodeOptions(tensorShortForm));
}

public String toJson(boolean tensorShortForm, boolean tensorDirectValues) {
return toJson(new JsonFormat.EncodeOptions(tensorShortForm, tensorDirectValues, false));
return toJson(new JsonFormat.EncodeOptions(tensorShortForm, tensorDirectValues));
}

public String toJson(JsonFormat.EncodeOptions tensorOptions) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,20 @@ static void wrapIOException(SubroutineThrowingIOException lambda) {
}
}

// TODO: remove
public static void serializeTensorField(JsonGenerator generator, FieldBase field, TensorFieldValue value,
boolean shortForm, boolean directValues) {
serializeTensorField(generator, field, value, new JsonFormat.EncodeOptions(shortForm, directValues));

}

public static void serializeTensorField(JsonGenerator generator, FieldBase field, TensorFieldValue value,
JsonFormat.EncodeOptions tensorOptions) {
wrapIOException(() -> {
fieldNameIfNotNull(generator, field);
if (value.getTensor().isPresent()) {
Tensor tensor = value.getTensor().get();
byte[] encoded = JsonFormat.encode(tensor, shortForm, directValues);
byte[] encoded = JsonFormat.encode(tensor, tensorOptions);
generator.writeRawValue(new String(encoded, StandardCharsets.UTF_8));
}
else {
Expand Down
48 changes: 39 additions & 9 deletions document/src/main/java/com/yahoo/document/json/JsonWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import com.yahoo.document.datatypes.TensorFieldValue;
import com.yahoo.document.datatypes.WeightedSet;
import com.yahoo.document.serialization.DocumentWriter;
import com.yahoo.tensor.serialization.JsonFormat;
import com.yahoo.vespa.objects.FieldBase;
import com.yahoo.vespa.objects.Serializer;

Expand Down Expand Up @@ -83,9 +84,7 @@ public class JsonWriter implements DocumentWriter {
.build();

private final JsonGenerator generator;

private final boolean tensorShortForm;
private final boolean tensorDirectValues;
private final JsonFormat.EncodeOptions tensorOptions;

/**
* Creates a JsonWriter.
Expand All @@ -97,26 +96,42 @@ public JsonWriter(OutputStream out) {
this(createPrivateGenerator(out));
}

// do not use
public JsonWriter(OutputStream out, boolean tensorShortForm, boolean tensorDirectValues) {
this(createPrivateGenerator(out), tensorShortForm, tensorDirectValues);
}

public JsonWriter(OutputStream out, JsonFormat.EncodeOptions tensorOptions) {
this(createPrivateGenerator(out), tensorOptions);
}

/**
* Create a Document writer which will write to the input JSON generator.
* JsonWriter will not close the generator and only flush it explicitly
* after having written a full Document instance. In other words, JsonWriter
* will <i>not</i> take ownership of the generator.
* TODO: remove.
*
* @param generator the output JSON generator
* @param tensorShortForm whether to use the short type-dependent form for tensor values
* @param tensorDirectValues whether to output tensor values directly or wrapped in a map also containing the type
*/
public JsonWriter(JsonGenerator generator, boolean tensorShortForm, boolean tensorDirectValues) {
this(generator, new JsonFormat.EncodeOptions(tensorShortForm, tensorDirectValues));
}

/**
* Create a Document writer which will write to the input JSON generator.
* JsonWriter will not close the generator and only flush it explicitly
* after having written a full Document instance. In other words, JsonWriter
* will <i>not</i> take ownership of the generator.
*
* @param generator the output JSON generator
* @param tensorOptions tensor formatting options (short/long, direct/wrapped, hexdump or not)
*/
public JsonWriter(JsonGenerator generator, JsonFormat.EncodeOptions tensorOptions) {
this.generator = generator;
this.tensorShortForm = tensorShortForm;
this.tensorDirectValues = tensorDirectValues;
this.tensorOptions = tensorOptions;
}


private static JsonGenerator createPrivateGenerator(OutputStream out) {
try {
return jsonFactory.createGenerator(out);
Expand Down Expand Up @@ -221,7 +236,7 @@ public void write(FieldBase field, StringFieldValue value) {

@Override
public void write(FieldBase field, TensorFieldValue value) {
serializeTensorField(generator, field, value, tensorShortForm, tensorDirectValues);
serializeTensorField(generator, field, value, tensorOptions);
}

@Override
Expand Down Expand Up @@ -287,6 +302,7 @@ public void write(DocumentUpdate documentUpdate) {

/**
* Utility method to easily serialize a single document.
* TODO: remove
*
* @param document the document to be serialized
* @param tensorShortForm whether tensors should be serialized in a type-dependent short form
Expand All @@ -301,6 +317,20 @@ public static byte[] toByteArray(Document document, boolean tensorShortForm, boo
return out.toByteArray();
}

/**
* Utility method to easily serialize a single document.
*
* @param document the document to be serialized
* @param tensorOptions tensor formatting options (short/long, direct/wrapped, hexdump or not)
* @return the input document serialised as UTF-8 encoded JSON
*/
public static byte[] toByteArray(Document document, JsonFormat.EncodeOptions tensorOptions) {
ByteArrayOutputStream out = new ByteArrayOutputStream();
JsonWriter writer = new JsonWriter(out, tensorOptions);
writer.write(document);
return out.toByteArray();
}

/**
* Utility method to easily serialize a single document.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
import com.yahoo.metrics.simple.MetricReceiver;
import com.yahoo.restapi.Path;
import com.yahoo.search.query.ParameterParser;
import com.yahoo.tensor.serialization.JsonFormat;
import com.yahoo.text.Text;
import com.yahoo.vespa.config.content.AllClustersBucketSpacesConfig;
import com.yahoo.vespa.http.server.Headers;
Expand Down Expand Up @@ -771,22 +772,42 @@ private void writeTrace(TraceNode node) throws IOException {
}
}

private JsonFormat.EncodeOptions tensorOptions() {
// TODO: Flip default on Vespa 9 to "short-value"
String format = "short";
if (request != null && request.parameters().containsKey("format.tensors")) {
var params = request.parameters().get("format.tensors");
if (params.size() == 1) {
format = params.get(0);
}
}
return switch (format) {
case "hex" ->
new JsonFormat.EncodeOptions(true, false, true);
case "hex-value" ->
new JsonFormat.EncodeOptions(true, true, true);
default ->
// aka "short"
new JsonFormat.EncodeOptions(true, false, false);
case "short-value" ->
new JsonFormat.EncodeOptions(true, true, false);
case "long" ->
new JsonFormat.EncodeOptions(false, false, false);
case "long-value" ->
new JsonFormat.EncodeOptions(false, true, false);
};
}

private boolean tensorShortForm() {
return request == null ||
!request.parameters().containsKey("format.tensors") ||
(!request.parameters().get("format.tensors").contains("long")
&& !request.parameters().get("format.tensors").contains("long-value"));// default
return tensorOptions().shortForm();
}

private boolean tensorDirectValues() {
return request != null &&
request.parameters().containsKey("format.tensors") &&
(request.parameters().get("format.tensors").contains("short-value")
|| request.parameters().get("format.tensors").contains("long-value"));// TODO: Flip default on Vespa 9
return tensorOptions().directValues();
}

synchronized void writeSingleDocument(Document document) throws IOException {
new JsonWriter(json, tensorShortForm(), tensorDirectValues()).writeFields(document);
new JsonWriter(json, tensorOptions()).writeFields(document);
}

synchronized void writeDocumentsArrayStart() throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ public class JsonFormat {
/** Options for encode */
public record EncodeOptions(boolean shortForm, boolean directValues, boolean hexForDensePart) {
// TODO - consider "compact" flag
public EncodeOptions() { this(false); }
public EncodeOptions(boolean shortForm) { this(shortForm, false); }
public EncodeOptions(boolean shortForm, boolean directValues) {
this(shortForm, directValues, false);
}
}
/**
* Serializes the given tensor value into JSON format.
Expand Down

0 comments on commit 124622f

Please sign in to comment.