diff --git a/protobuf/grpc_service.proto b/protobuf/grpc_service.proto index 5c1acee..260a005 100644 --- a/protobuf/grpc_service.proto +++ b/protobuf/grpc_service.proto @@ -429,6 +429,13 @@ message ModelMetadataResponse //@@ The model's outputs. //@@ repeated TensorMetadata outputs = 5; + + //@@ + //@@ .. cpp:var:: string card + //@@ + //@@ The model card. + //@@ + string card = 6; } //@@ @@ -923,7 +930,7 @@ message InferStatistics //@@ .. cpp:var:: StatisticDuration queue //@@ //@@ The count and cumulative duration that inference requests wait in - //@@ scheduling or other queues. The "queue" count and cumulative + //@@ scheduling or other queues. The "queue" count and cumulative //@@ duration includes cache hits. //@@ StatisticDuration queue = 3; @@ -965,7 +972,7 @@ message InferStatistics //@@ and extract output tensor data from the Response Cache on a cache //@@ hit. For example, this duration should include the time to copy //@@ output tensor data from the Response Cache to the response object. - //@@ On cache hits, triton does not need to go to the model/backend + //@@ On cache hits, triton does not need to go to the model/backend //@@ for the output tensor data, so the "compute_input", "compute_infer", //@@ and "compute_output" fields are not updated. Assuming the response //@@ cache is enabled for a given model, a cache hit occurs for a