From abfd3e070bdea802d4547fe20234915d53a8484e Mon Sep 17 00:00:00 2001 From: Abhishek Jindal Date: Fri, 2 Aug 2024 16:49:12 -0700 Subject: [PATCH 01/35] add GetOutput for C API --- src/ort_genai.h | 6 ++++++ src/ort_genai_c.cpp | 9 +++++++++ src/ort_genai_c.h | 2 ++ 3 files changed, 17 insertions(+) diff --git a/src/ort_genai.h b/src/ort_genai.h index d0c1d0c75..198792fd2 100644 --- a/src/ort_genai.h +++ b/src/ort_genai.h @@ -232,6 +232,12 @@ struct OgaGenerator : OgaAbstract { return OgaGenerator_GetSequenceData(this, index); } + OgaTensor* GetOutput(const char* name) { + OgaTensor* out; + OgaCheckResult(OgaGenerator_GetOutput(this, name, &out)); + return std::unique_ptr(out); + } + #if __cplusplus >= 202002L std::span GetSequence(size_t index) const { return {GetSequenceData(index), GetSequenceCount(index)}; diff --git a/src/ort_genai_c.cpp b/src/ort_genai_c.cpp index 6f26d2857..9f03bab53 100644 --- a/src/ort_genai_c.cpp +++ b/src/ort_genai_c.cpp @@ -208,6 +208,15 @@ OgaResult* OGA_API_CALL OgaGenerator_GenerateNextToken(OgaGenerator* generator) OGA_CATCH } +OgaResult* OgaGenerator_GetOutput OgaGenerator_GetOutput(const OgaGenerator* generator, const char* name, OgaTensor** out) { + OGA_TRY + auto& generator = *reinterpret_cast(generator); + auto* OrtValue = generator.State.GetOutput(name); + *out = reinterpret_cast(OrtValue); + return nullptr; + OGA_CATCH +} + size_t OGA_API_CALL OgaGenerator_GetSequenceCount(const OgaGenerator* oga_generator, size_t index) { auto& generator = *reinterpret_cast(oga_generator); return generator.GetSequence(static_cast(index)).GetCPU().size(); diff --git a/src/ort_genai_c.h b/src/ort_genai_c.h index ec97ce4e5..d1ee27509 100644 --- a/src/ort_genai_c.h +++ b/src/ort_genai_c.h @@ -224,6 +224,8 @@ OGA_EXPORT bool OGA_API_CALL OgaGenerator_IsDone(const OgaGenerator* generator); OGA_EXPORT OgaResult* OGA_API_CALL OgaGenerator_ComputeLogits(OgaGenerator* generator); OGA_EXPORT OgaResult* OGA_API_CALL OgaGenerator_GenerateNextToken(OgaGenerator* generator); +OGA_EXPORT OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* generator, const char* name, OgaTensor** out); + /* * \brief Returns the number of tokens in the sequence at the given index. * \param[in] generator The generator to get the count of the tokens for the sequence at the given index. From fe423407539c3b475253099787aa7c7351eee346 Mon Sep 17 00:00:00 2001 From: Abhishek Jindal Date: Fri, 2 Aug 2024 16:57:35 -0700 Subject: [PATCH 02/35] update return var --- src/ort_genai.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ort_genai.h b/src/ort_genai.h index 198792fd2..f3a5dd1ca 100644 --- a/src/ort_genai.h +++ b/src/ort_genai.h @@ -235,7 +235,7 @@ struct OgaGenerator : OgaAbstract { OgaTensor* GetOutput(const char* name) { OgaTensor* out; OgaCheckResult(OgaGenerator_GetOutput(this, name, &out)); - return std::unique_ptr(out); + return out; } #if __cplusplus >= 202002L From 1b100335e3cc432b9e08c174fb0a8bc5d028172b Mon Sep 17 00:00:00 2001 From: Abhishek Jindal Date: Fri, 2 Aug 2024 17:00:07 -0700 Subject: [PATCH 03/35] fix typo --- src/ort_genai_c.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ort_genai_c.cpp b/src/ort_genai_c.cpp index 9f03bab53..30bda6dbd 100644 --- a/src/ort_genai_c.cpp +++ b/src/ort_genai_c.cpp @@ -208,7 +208,7 @@ OgaResult* OGA_API_CALL OgaGenerator_GenerateNextToken(OgaGenerator* generator) OGA_CATCH } -OgaResult* OgaGenerator_GetOutput OgaGenerator_GetOutput(const OgaGenerator* generator, const char* name, OgaTensor** out) { +OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* generator, const char* name, OgaTensor** out) { OGA_TRY auto& generator = *reinterpret_cast(generator); auto* OrtValue = generator.State.GetOutput(name); From b042eab323f21ddde21b8568143526a5a83998e1 Mon Sep 17 00:00:00 2001 From: Abhishek Jindal Date: Fri, 2 Aug 2024 17:05:05 -0700 Subject: [PATCH 04/35] change gen name --- src/ort_genai_c.cpp | 4 ++-- src/ort_genai_c.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ort_genai_c.cpp b/src/ort_genai_c.cpp index 30bda6dbd..b21d00a4e 100644 --- a/src/ort_genai_c.cpp +++ b/src/ort_genai_c.cpp @@ -208,9 +208,9 @@ OgaResult* OGA_API_CALL OgaGenerator_GenerateNextToken(OgaGenerator* generator) OGA_CATCH } -OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* generator, const char* name, OgaTensor** out) { +OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* oga_generator, const char* name, OgaTensor** out) { OGA_TRY - auto& generator = *reinterpret_cast(generator); + auto& generator = *reinterpret_cast(oga_generator); auto* OrtValue = generator.State.GetOutput(name); *out = reinterpret_cast(OrtValue); return nullptr; diff --git a/src/ort_genai_c.h b/src/ort_genai_c.h index d1ee27509..0e1f12aae 100644 --- a/src/ort_genai_c.h +++ b/src/ort_genai_c.h @@ -224,7 +224,7 @@ OGA_EXPORT bool OGA_API_CALL OgaGenerator_IsDone(const OgaGenerator* generator); OGA_EXPORT OgaResult* OGA_API_CALL OgaGenerator_ComputeLogits(OgaGenerator* generator); OGA_EXPORT OgaResult* OGA_API_CALL OgaGenerator_GenerateNextToken(OgaGenerator* generator); -OGA_EXPORT OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* generator, const char* name, OgaTensor** out); +OGA_EXPORT OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* oga_generator, const char* name, OgaTensor** out); /* * \brief Returns the number of tokens in the sequence at the given index. From a658c9db8bfbcd858a73d5c7d2b523e981365df4 Mon Sep 17 00:00:00 2001 From: Abhishek Jindal Date: Fri, 2 Aug 2024 17:11:18 -0700 Subject: [PATCH 05/35] change state --- src/ort_genai_c.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ort_genai_c.cpp b/src/ort_genai_c.cpp index b21d00a4e..92a2df035 100644 --- a/src/ort_genai_c.cpp +++ b/src/ort_genai_c.cpp @@ -211,7 +211,7 @@ OgaResult* OGA_API_CALL OgaGenerator_GenerateNextToken(OgaGenerator* generator) OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* oga_generator, const char* name, OgaTensor** out) { OGA_TRY auto& generator = *reinterpret_cast(oga_generator); - auto* OrtValue = generator.State.GetOutput(name); + auto* OrtValue = generator.state_.GetOutput(name); *out = reinterpret_cast(OrtValue); return nullptr; OGA_CATCH From 2f9c734736a12e77874ca5c1d00dec3f273354e2 Mon Sep 17 00:00:00 2001 From: Abhishek Jindal Date: Sun, 4 Aug 2024 22:39:19 -0700 Subject: [PATCH 06/35] change var name --- src/ort_genai_c.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ort_genai_c.cpp b/src/ort_genai_c.cpp index 92a2df035..b90991b59 100644 --- a/src/ort_genai_c.cpp +++ b/src/ort_genai_c.cpp @@ -211,8 +211,8 @@ OgaResult* OGA_API_CALL OgaGenerator_GenerateNextToken(OgaGenerator* generator) OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* oga_generator, const char* name, OgaTensor** out) { OGA_TRY auto& generator = *reinterpret_cast(oga_generator); - auto* OrtValue = generator.state_.GetOutput(name); - *out = reinterpret_cast(OrtValue); + auto* ortValueOutput = generator.state_->GetOutput(name); + *out = reinterpret_cast(ortValueOutput); return nullptr; OGA_CATCH } From 2134e24fa45d119272fdeb53282961d06bf2764a Mon Sep 17 00:00:00 2001 From: Abhishek Jindal Date: Mon, 5 Aug 2024 17:09:10 -0700 Subject: [PATCH 07/35] make a copy of buffer --- src/ort_genai.h | 4 ++-- src/ort_genai_c.cpp | 20 ++++++++++++++++++-- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/ort_genai.h b/src/ort_genai.h index f3a5dd1ca..4a83b69e2 100644 --- a/src/ort_genai.h +++ b/src/ort_genai.h @@ -232,10 +232,10 @@ struct OgaGenerator : OgaAbstract { return OgaGenerator_GetSequenceData(this, index); } - OgaTensor* GetOutput(const char* name) { + std::unique_ptr GetOutput(const char* name) { OgaTensor* out; OgaCheckResult(OgaGenerator_GetOutput(this, name, &out)); - return out; + return std::unique_ptr(out); } #if __cplusplus >= 202002L diff --git a/src/ort_genai_c.cpp b/src/ort_genai_c.cpp index b90991b59..5a84684ab 100644 --- a/src/ort_genai_c.cpp +++ b/src/ort_genai_c.cpp @@ -211,8 +211,24 @@ OgaResult* OGA_API_CALL OgaGenerator_GenerateNextToken(OgaGenerator* generator) OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* oga_generator, const char* name, OgaTensor** out) { OGA_TRY auto& generator = *reinterpret_cast(oga_generator); - auto* ortValueOutput = generator.state_->GetOutput(name); - *out = reinterpret_cast(ortValueOutput); + auto* ortvalue_output = generator.state_->GetOutput(name); + auto type_info = ortvalue_output->GetTensorTypeAndShapeInfo(); + std::unique_ptr ortvalue_clone = OrtValue::CreateTensor(generator.model_->allocator_device_, + type_info->GetShape(), + type_info->ElementType()); + // Copy data to ortvalue_clone + auto element_size = Generators::SizeOf(type_info->GetElementType()); + auto data_size = type_info->GetElementCount() * element_size; + + if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_GPU && generator.model_.device_type_ == Generators::DeviceType::CUDA) { + Generators::CudaCheck() == cudaMemcpy(ortvalue_clone.get(), ortvalue_output->GetTensorMutableRawData(), data_size, cudaMemcpyDeviceToHost); + } else{ + std::copy(ortvalue_output->GetTensorMutableRawData(), ortvalue_output->GetTensorMutableRawData() + data_size, ortvalue_clone.get()); + } + + auto tensor = std::make_shared(std::move(ortvalue_clone)); + tensor->external_owner_ = tensor; + *out = reinterpret_cast(tensor.get()); return nullptr; OGA_CATCH } From 65fd9c0b3eb5e2a8ed1dc8ac6c3e34208c130d2e Mon Sep 17 00:00:00 2001 From: Abhishek Jindal Date: Mon, 5 Aug 2024 17:22:33 -0700 Subject: [PATCH 08/35] modify copy of data --- src/ort_genai_c.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/ort_genai_c.cpp b/src/ort_genai_c.cpp index 5a84684ab..140171aed 100644 --- a/src/ort_genai_c.cpp +++ b/src/ort_genai_c.cpp @@ -215,15 +215,14 @@ OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* oga_generator auto type_info = ortvalue_output->GetTensorTypeAndShapeInfo(); std::unique_ptr ortvalue_clone = OrtValue::CreateTensor(generator.model_->allocator_device_, type_info->GetShape(), - type_info->ElementType()); + type_info->GetElementType()); // Copy data to ortvalue_clone auto element_size = Generators::SizeOf(type_info->GetElementType()); auto data_size = type_info->GetElementCount() * element_size; - - if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_GPU && generator.model_.device_type_ == Generators::DeviceType::CUDA) { - Generators::CudaCheck() == cudaMemcpy(ortvalue_clone.get(), ortvalue_output->GetTensorMutableRawData(), data_size, cudaMemcpyDeviceToHost); - } else{ - std::copy(ortvalue_output->GetTensorMutableRawData(), ortvalue_output->GetTensorMutableRawData() + data_size, ortvalue_clone.get()); + if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_GPU && generator.model_->device_type_ == Generators::DeviceType::CUDA) { + cudaMemcpy(ortvalue_clone->GetTensorMutableRawData(), ortvalue_output->GetTensorMutableRawData(), data_size, cudaMemcpyDeviceToHost); + } else { + std::copy(ortvalue_output->GetTensorMutableRawData(), ortvalue_output->GetTensorMutableRawData() + data_size, ortvalue_clone->GetTensorMutableRawData()); } auto tensor = std::make_shared(std::move(ortvalue_clone)); From 545034f9b8968ab5bf637d788647bf4b17cb41ce Mon Sep 17 00:00:00 2001 From: Abhishek Jindal Date: Mon, 5 Aug 2024 17:53:03 -0700 Subject: [PATCH 09/35] add use cuda --- src/ort_genai_c.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/ort_genai_c.cpp b/src/ort_genai_c.cpp index 140171aed..26d5ed9d6 100644 --- a/src/ort_genai_c.cpp +++ b/src/ort_genai_c.cpp @@ -213,17 +213,19 @@ OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* oga_generator auto& generator = *reinterpret_cast(oga_generator); auto* ortvalue_output = generator.state_->GetOutput(name); auto type_info = ortvalue_output->GetTensorTypeAndShapeInfo(); - std::unique_ptr ortvalue_clone = OrtValue::CreateTensor(generator.model_->allocator_device_, + std::unique_ptr ortvalue_clone = OrtValue::CreateTensor(*generator.model_->allocator_device_, type_info->GetShape(), type_info->GetElementType()); // Copy data to ortvalue_clone auto element_size = Generators::SizeOf(type_info->GetElementType()); auto data_size = type_info->GetElementCount() * element_size; +#if USE_CUDA if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_GPU && generator.model_->device_type_ == Generators::DeviceType::CUDA) { cudaMemcpy(ortvalue_clone->GetTensorMutableRawData(), ortvalue_output->GetTensorMutableRawData(), data_size, cudaMemcpyDeviceToHost); - } else { - std::copy(ortvalue_output->GetTensorMutableRawData(), ortvalue_output->GetTensorMutableRawData() + data_size, ortvalue_clone->GetTensorMutableRawData()); } +#else + std::copy(ortvalue_output->GetTensorMutableRawData(), ortvalue_output->GetTensorMutableRawData() + data_size, ortvalue_clone->GetTensorMutableRawData()); +#endif auto tensor = std::make_shared(std::move(ortvalue_clone)); tensor->external_owner_ = tensor; From ec4f6cf3afbd200ad8a12e4cff806243dcea23cb Mon Sep 17 00:00:00 2001 From: Abhishek Jindal Date: Tue, 6 Aug 2024 10:22:48 -0700 Subject: [PATCH 10/35] modify std copy --- src/ort_genai_c.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/ort_genai_c.cpp b/src/ort_genai_c.cpp index 26d5ed9d6..c6e21f46a 100644 --- a/src/ort_genai_c.cpp +++ b/src/ort_genai_c.cpp @@ -224,7 +224,9 @@ OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* oga_generator cudaMemcpy(ortvalue_clone->GetTensorMutableRawData(), ortvalue_output->GetTensorMutableRawData(), data_size, cudaMemcpyDeviceToHost); } #else - std::copy(ortvalue_output->GetTensorMutableRawData(), ortvalue_output->GetTensorMutableRawData() + data_size, ortvalue_clone->GetTensorMutableRawData()); + std::copy(static_cast(ortvalue_output->GetTensorMutableRawData()), + static_cast(ortvalue_output->GetTensorMutableRawData()) + data_size, + static_cast(ortvalue_clone->GetTensorMutableRawData())); #endif auto tensor = std::make_shared(std::move(ortvalue_clone)); From 471d52bb2b6c0727706da7507f7e41962c2a8030 Mon Sep 17 00:00:00 2001 From: Abhishek Jindal Date: Thu, 8 Aug 2024 11:20:23 -0700 Subject: [PATCH 11/35] change allocator device and add for DML --- src/ort_genai_c.cpp | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/ort_genai_c.cpp b/src/ort_genai_c.cpp index c6e21f46a..9f11aa3f3 100644 --- a/src/ort_genai_c.cpp +++ b/src/ort_genai_c.cpp @@ -213,7 +213,7 @@ OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* oga_generator auto& generator = *reinterpret_cast(oga_generator); auto* ortvalue_output = generator.state_->GetOutput(name); auto type_info = ortvalue_output->GetTensorTypeAndShapeInfo(); - std::unique_ptr ortvalue_clone = OrtValue::CreateTensor(*generator.model_->allocator_device_, + std::unique_ptr ortvalue_clone = OrtValue::CreateTensor(*generator.model_->allocator_cpu_, type_info->GetShape(), type_info->GetElementType()); // Copy data to ortvalue_clone @@ -223,6 +223,20 @@ OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* oga_generator if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_GPU && generator.model_->device_type_ == Generators::DeviceType::CUDA) { cudaMemcpy(ortvalue_clone->GetTensorMutableRawData(), ortvalue_output->GetTensorMutableRawData(), data_size, cudaMemcpyDeviceToHost); } +#elif USE_DML + if (*generator.model_->device_type_ == DeviceType::DML) { + ComPtr gpu_resource; + Ort::ThrowOnError(generator.model_->GetOrtDmlApi()->GetD3D12ResourceFromAllocation( + *generator.model_->allocator_device_, + ortvalue_output->GetTensorMutableRawData(), + &gpu_resource)); + auto cpu_tensor = ortvalue_clone->GetTensorMutableRawData(); + generator.model_->GetDmlReadbackHeap()->ReadbackFromGpu( + std::span(reinterpret_cast(cpu_tensor), data_size), + gpu_resource.Get(), + 0, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + } #else std::copy(static_cast(ortvalue_output->GetTensorMutableRawData()), static_cast(ortvalue_output->GetTensorMutableRawData()) + data_size, From eaa921195a921e6e7422bf3150898c98b508e8ac Mon Sep 17 00:00:00 2001 From: Abhishek Jindal Date: Thu, 8 Aug 2024 11:33:53 -0700 Subject: [PATCH 12/35] change allocator cpu --- src/ort_genai_c.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ort_genai_c.cpp b/src/ort_genai_c.cpp index 9f11aa3f3..b33adf516 100644 --- a/src/ort_genai_c.cpp +++ b/src/ort_genai_c.cpp @@ -213,7 +213,7 @@ OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* oga_generator auto& generator = *reinterpret_cast(oga_generator); auto* ortvalue_output = generator.state_->GetOutput(name); auto type_info = ortvalue_output->GetTensorTypeAndShapeInfo(); - std::unique_ptr ortvalue_clone = OrtValue::CreateTensor(*generator.model_->allocator_cpu_, + std::unique_ptr ortvalue_clone = OrtValue::CreateTensor(generator.model_->allocator_cpu_, type_info->GetShape(), type_info->GetElementType()); // Copy data to ortvalue_clone @@ -224,7 +224,7 @@ OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* oga_generator cudaMemcpy(ortvalue_clone->GetTensorMutableRawData(), ortvalue_output->GetTensorMutableRawData(), data_size, cudaMemcpyDeviceToHost); } #elif USE_DML - if (*generator.model_->device_type_ == DeviceType::DML) { + if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_GPU && *generator.model_->device_type_ == DeviceType::DML) { ComPtr gpu_resource; Ort::ThrowOnError(generator.model_->GetOrtDmlApi()->GetD3D12ResourceFromAllocation( *generator.model_->allocator_device_, From c2c2816410d2fbfcd98355960141fd72639a37be Mon Sep 17 00:00:00 2001 From: Abhishek Jindal Date: Thu, 8 Aug 2024 12:05:55 -0700 Subject: [PATCH 13/35] update dml code --- src/ort_genai_c.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ort_genai_c.cpp b/src/ort_genai_c.cpp index b33adf516..60b0f92f1 100644 --- a/src/ort_genai_c.cpp +++ b/src/ort_genai_c.cpp @@ -224,10 +224,10 @@ OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* oga_generator cudaMemcpy(ortvalue_clone->GetTensorMutableRawData(), ortvalue_output->GetTensorMutableRawData(), data_size, cudaMemcpyDeviceToHost); } #elif USE_DML - if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_GPU && *generator.model_->device_type_ == DeviceType::DML) { + if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_GPU && *generator.model_->device_type_ == Generators::DeviceType::DML) { ComPtr gpu_resource; Ort::ThrowOnError(generator.model_->GetOrtDmlApi()->GetD3D12ResourceFromAllocation( - *generator.model_->allocator_device_, + generator.model_->allocator_device_, ortvalue_output->GetTensorMutableRawData(), &gpu_resource)); auto cpu_tensor = ortvalue_clone->GetTensorMutableRawData(); From 63ece2bd50c8bdd40279f6f0f1ccdfda9fba0d01 Mon Sep 17 00:00:00 2001 From: Abhishek Jindal Date: Thu, 8 Aug 2024 12:15:56 -0700 Subject: [PATCH 14/35] update dml device type --- src/ort_genai_c.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ort_genai_c.cpp b/src/ort_genai_c.cpp index 60b0f92f1..4cfd5a735 100644 --- a/src/ort_genai_c.cpp +++ b/src/ort_genai_c.cpp @@ -224,7 +224,7 @@ OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* oga_generator cudaMemcpy(ortvalue_clone->GetTensorMutableRawData(), ortvalue_output->GetTensorMutableRawData(), data_size, cudaMemcpyDeviceToHost); } #elif USE_DML - if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_GPU && *generator.model_->device_type_ == Generators::DeviceType::DML) { + if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_GPU && generator.model_->device_type_ == Generators::DeviceType::DML) { ComPtr gpu_resource; Ort::ThrowOnError(generator.model_->GetOrtDmlApi()->GetD3D12ResourceFromAllocation( generator.model_->allocator_device_, From cbeaefe17169d627823bae409debb203865e9c06 Mon Sep 17 00:00:00 2001 From: Abhishek Jindal Date: Thu, 8 Aug 2024 15:49:51 -0700 Subject: [PATCH 15/35] add test for GetOutput --- test/c_api_tests.cpp | 55 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/test/c_api_tests.cpp b/test/c_api_tests.cpp index eba5aff15..d86336486 100644 --- a/test/c_api_tests.cpp +++ b/test/c_api_tests.cpp @@ -173,6 +173,61 @@ TEST(CAPITests, GreedySearchGptFp32CAPI) { } #endif +TEST(CAPITests, GetOutputCAPI) { + std::vector input_ids_shape{2, 4}; + std::vector input_ids{0, 0, 0, 52, 0, 0, 195, 731}; + + auto input_sequence_length = input_ids_shape[1]; + auto batch_size = input_ids_shape[0]; + int max_length = 10; + + // To generate this file: + // python convert_generation.py --model_type gpt2 -m hf-internal-testing/tiny-random-gpt2 --output tiny_gpt2_greedysearch_fp16.onnx --use_gpu --max_length 20 + // And copy the resulting gpt2_init_past_fp32.onnx file into these two files (as it's the same for gpt2) + + auto model = OgaModel::Create(MODEL_PATH "hf-internal-testing/tiny-random-gpt2-fp32"); + + auto params = OgaGeneratorParams::Create(*model); + params->SetSearchOption("max_length", max_length); + params->SetInputIDs(input_ids.data(), input_ids.size(), input_sequence_length, batch_size); + + auto generator = OgaGenerator::Create(*model, *params); + generator->ComputeLogits(); + auto prompt_logits = generator->GetOutput('logits'); + + // check prompt + // full logits has shape [2, 4, 1000]. Sample 1 for every 200 tokens and the expected sampled logits has shape [2, 4, 5] + std::vector expected_sampled_logits_prompt{0.29694548, 0.00955007, 0.0430819, 0.10063869, 0.0437237, + 0.27329233, 0.00841076, -0.1060291, 0.11328877, 0.13369876, + 0.30323744, 0.0545997, 0.03894716, 0.11702324, 0.0410665, + -0.12675379, -0.04443946, 0.14492269, 0.03021223, -0.03212897, + 0.29694548, 0.00955007, 0.0430819, 0.10063869, 0.0437237, + 0.27329233, 0.00841076, -0.1060291, 0.11328877, 0.13369876, + -0.04699047, 0.17915794, 0.20838135, 0.10888482, -0.00277808, + 0.2938929, -0.10538938, -0.00226692, 0.12050669, -0.10622668}; + + int num_prompt_outputs_to_check = 40; + int sample_size = 200; + float tolerance = 0.001; + // Verify outputs match expected outputs + for (int i = 0; i < num_prompt_outputs_to_check; i++) { + EXPECT_NEAR(expected_sampled_logits_prompt[i], prompt_logits[i*sample_size], tolerance); + } + + generator->GenerateNextToken(); + auto token_gen_logits = generator->GetOutput('logits'); + + // check for the 1st token generation + // full logits has shape [2, 1, 1000]. Sample 1 for every 200 tokens and the expected sampled logits has shape [2, 1, 5] + std::vector expected_sampled_logits_token_gen{0.03742531, -0.05752287, 0.14159015, 0.04210977, -0.1484456, + 0.3041716, -0.08701379, -0.03778192, 0.07471392, -0.02049096}; + int num_token_gen_outputs_to_check = 10; + + for (int i = 0; i < num_token_gen_outputs_to_check; i++) { + EXPECT_NEAR(expected_sampled_logits_token_gen[i], token_gen_logits[i*sample_size], tolerance); + } +} + #if TEST_PHI2 struct Phi2Test { From 5eabc4d0f8d594dd43a2d9eaf6f441c7dc9a0bbc Mon Sep 17 00:00:00 2001 From: Abhishek Jindal Date: Thu, 8 Aug 2024 15:59:57 -0700 Subject: [PATCH 16/35] update copy type --- src/ort_genai_c.cpp | 6 +++--- test/c_api_tests.cpp | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ort_genai_c.cpp b/src/ort_genai_c.cpp index 4cfd5a735..ceafd057a 100644 --- a/src/ort_genai_c.cpp +++ b/src/ort_genai_c.cpp @@ -238,9 +238,9 @@ OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* oga_generator D3D12_RESOURCE_STATE_UNORDERED_ACCESS); } #else - std::copy(static_cast(ortvalue_output->GetTensorMutableRawData()), - static_cast(ortvalue_output->GetTensorMutableRawData()) + data_size, - static_cast(ortvalue_clone->GetTensorMutableRawData())); + std::copy(static_cast(ortvalue_output->GetTensorMutableRawData()), + static_cast(ortvalue_output->GetTensorMutableRawData()) + data_size, + static_cast(ortvalue_clone->GetTensorMutableRawData())); #endif auto tensor = std::make_shared(std::move(ortvalue_clone)); diff --git a/test/c_api_tests.cpp b/test/c_api_tests.cpp index d86336486..652ec48ec 100644 --- a/test/c_api_tests.cpp +++ b/test/c_api_tests.cpp @@ -193,7 +193,7 @@ TEST(CAPITests, GetOutputCAPI) { auto generator = OgaGenerator::Create(*model, *params); generator->ComputeLogits(); - auto prompt_logits = generator->GetOutput('logits'); + auto prompt_logits = generator->GetOutput("logits"); // check prompt // full logits has shape [2, 4, 1000]. Sample 1 for every 200 tokens and the expected sampled logits has shape [2, 4, 5] @@ -215,7 +215,7 @@ TEST(CAPITests, GetOutputCAPI) { } generator->GenerateNextToken(); - auto token_gen_logits = generator->GetOutput('logits'); + auto token_gen_logits = generator->GetOutput("logits"); // check for the 1st token generation // full logits has shape [2, 1, 1000]. Sample 1 for every 200 tokens and the expected sampled logits has shape [2, 1, 5] From b44602270753b6518d2652d30ffed408cd19e1ff Mon Sep 17 00:00:00 2001 From: Abhishek Jindal Date: Thu, 8 Aug 2024 16:48:28 -0700 Subject: [PATCH 17/35] use output data --- test/c_api_tests.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/c_api_tests.cpp b/test/c_api_tests.cpp index 652ec48ec..5dc520d5f 100644 --- a/test/c_api_tests.cpp +++ b/test/c_api_tests.cpp @@ -193,7 +193,7 @@ TEST(CAPITests, GetOutputCAPI) { auto generator = OgaGenerator::Create(*model, *params); generator->ComputeLogits(); - auto prompt_logits = generator->GetOutput("logits"); + auto prompt_logits = generator->GetOutput("logits")->Data(); // check prompt // full logits has shape [2, 4, 1000]. Sample 1 for every 200 tokens and the expected sampled logits has shape [2, 4, 5] @@ -215,7 +215,7 @@ TEST(CAPITests, GetOutputCAPI) { } generator->GenerateNextToken(); - auto token_gen_logits = generator->GetOutput("logits"); + auto token_gen_logits = generator->GetOutput("logits")->Data(); // check for the 1st token generation // full logits has shape [2, 1, 1000]. Sample 1 for every 200 tokens and the expected sampled logits has shape [2, 1, 5] From 3ac992d29697135f4cc33e8fdcd00736e3ef17f3 Mon Sep 17 00:00:00 2001 From: Abhishek Jindal Date: Thu, 8 Aug 2024 17:00:33 -0700 Subject: [PATCH 18/35] cast to float --- test/c_api_tests.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/c_api_tests.cpp b/test/c_api_tests.cpp index 5dc520d5f..d25181fa6 100644 --- a/test/c_api_tests.cpp +++ b/test/c_api_tests.cpp @@ -193,7 +193,7 @@ TEST(CAPITests, GetOutputCAPI) { auto generator = OgaGenerator::Create(*model, *params); generator->ComputeLogits(); - auto prompt_logits = generator->GetOutput("logits")->Data(); + auto prompt_logits = static_cast(generator->GetOutput("logits")->Data()); // check prompt // full logits has shape [2, 4, 1000]. Sample 1 for every 200 tokens and the expected sampled logits has shape [2, 4, 5] @@ -215,7 +215,7 @@ TEST(CAPITests, GetOutputCAPI) { } generator->GenerateNextToken(); - auto token_gen_logits = generator->GetOutput("logits")->Data(); + auto token_gen_logits = static_cast(generator->GetOutput("logits")->Data();) // check for the 1st token generation // full logits has shape [2, 1, 1000]. Sample 1 for every 200 tokens and the expected sampled logits has shape [2, 1, 5] From 3df46f1fe400f087e8e660b3e64d9e0a3f2da217 Mon Sep 17 00:00:00 2001 From: Abhishek Jindal Date: Thu, 8 Aug 2024 17:09:02 -0700 Subject: [PATCH 19/35] fix typo --- test/c_api_tests.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/c_api_tests.cpp b/test/c_api_tests.cpp index d25181fa6..41aa54b13 100644 --- a/test/c_api_tests.cpp +++ b/test/c_api_tests.cpp @@ -215,7 +215,7 @@ TEST(CAPITests, GetOutputCAPI) { } generator->GenerateNextToken(); - auto token_gen_logits = static_cast(generator->GetOutput("logits")->Data();) + auto token_gen_logits = static_cast(generator->GetOutput("logits")->Data()); // check for the 1st token generation // full logits has shape [2, 1, 1000]. Sample 1 for every 200 tokens and the expected sampled logits has shape [2, 1, 5] From 20115b9bd0c4109d1b4f90509ce22827cbf5f1e4 Mon Sep 17 00:00:00 2001 From: Abhishek Jindal Date: Thu, 8 Aug 2024 23:07:18 -0700 Subject: [PATCH 20/35] logging --- src/ort_genai_c.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/ort_genai_c.cpp b/src/ort_genai_c.cpp index ceafd057a..6caa2d679 100644 --- a/src/ort_genai_c.cpp +++ b/src/ort_genai_c.cpp @@ -219,12 +219,15 @@ OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* oga_generator // Copy data to ortvalue_clone auto element_size = Generators::SizeOf(type_info->GetElementType()); auto data_size = type_info->GetElementCount() * element_size; + std::cout << "Data count:" << type_info->GetElementCount() << " size:" << element_size << std::endl; #if USE_CUDA if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_GPU && generator.model_->device_type_ == Generators::DeviceType::CUDA) { + std::cout << "OrtValue Output on CUDA device" << std::endl; cudaMemcpy(ortvalue_clone->GetTensorMutableRawData(), ortvalue_output->GetTensorMutableRawData(), data_size, cudaMemcpyDeviceToHost); } #elif USE_DML if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_GPU && generator.model_->device_type_ == Generators::DeviceType::DML) { + std::cout << "OrtValue Output on DML device" << std::endl; ComPtr gpu_resource; Ort::ThrowOnError(generator.model_->GetOrtDmlApi()->GetD3D12ResourceFromAllocation( generator.model_->allocator_device_, @@ -238,9 +241,13 @@ OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* oga_generator D3D12_RESOURCE_STATE_UNORDERED_ACCESS); } #else - std::copy(static_cast(ortvalue_output->GetTensorMutableRawData()), - static_cast(ortvalue_output->GetTensorMutableRawData()) + data_size, - static_cast(ortvalue_clone->GetTensorMutableRawData())); + if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_CPU) { + std::cout << "OrtValue Output on CPU device" << std::endl; + std::copy(static_cast(ortvalue_output->GetTensorMutableRawData()), + static_cast(ortvalue_output->GetTensorMutableRawData()) + data_size, + static_cast(ortvalue_clone->GetTensorMutableRawData())); + } + std::cout << "Data type:" << type_info->GetElementType() << std::endl; #endif auto tensor = std::make_shared(std::move(ortvalue_clone)); From b4aaf5b0b54253d03aa94ed037e25a6611e291ce Mon Sep 17 00:00:00 2001 From: Abhishek Jindal Date: Thu, 8 Aug 2024 23:31:30 -0700 Subject: [PATCH 21/35] add logging --- test/c_api_tests.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/c_api_tests.cpp b/test/c_api_tests.cpp index 41aa54b13..224aab183 100644 --- a/test/c_api_tests.cpp +++ b/test/c_api_tests.cpp @@ -211,6 +211,7 @@ TEST(CAPITests, GetOutputCAPI) { float tolerance = 0.001; // Verify outputs match expected outputs for (int i = 0; i < num_prompt_outputs_to_check; i++) { + std::cout << "Output:" << i << "exp:" << expected_sampled_logits_prompt[i] << " act:" << prompt_logits[i*sample_size] << std:endl; EXPECT_NEAR(expected_sampled_logits_prompt[i], prompt_logits[i*sample_size], tolerance); } @@ -224,8 +225,10 @@ TEST(CAPITests, GetOutputCAPI) { int num_token_gen_outputs_to_check = 10; for (int i = 0; i < num_token_gen_outputs_to_check; i++) { + std::cout << "Output:" << i << "exp:" << expected_sampled_logits_token_gen[i] << " act:" << token_gen_logits[i*sample_size] << std:endl; EXPECT_NEAR(expected_sampled_logits_token_gen[i], token_gen_logits[i*sample_size], tolerance); } + generator->GenerateNextToken(); } #if TEST_PHI2 From 687369367b1206ec3ef782e1155cdd4a2d9235dc Mon Sep 17 00:00:00 2001 From: Abhishek Jindal Date: Thu, 8 Aug 2024 23:39:35 -0700 Subject: [PATCH 22/35] typo fix --- test/c_api_tests.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/c_api_tests.cpp b/test/c_api_tests.cpp index 224aab183..711f4b35f 100644 --- a/test/c_api_tests.cpp +++ b/test/c_api_tests.cpp @@ -211,7 +211,7 @@ TEST(CAPITests, GetOutputCAPI) { float tolerance = 0.001; // Verify outputs match expected outputs for (int i = 0; i < num_prompt_outputs_to_check; i++) { - std::cout << "Output:" << i << "exp:" << expected_sampled_logits_prompt[i] << " act:" << prompt_logits[i*sample_size] << std:endl; + std::cout << "Output:" << i << "exp:" << expected_sampled_logits_prompt[i] << " act:" << prompt_logits[i*sample_size] << std::endl; EXPECT_NEAR(expected_sampled_logits_prompt[i], prompt_logits[i*sample_size], tolerance); } @@ -225,7 +225,7 @@ TEST(CAPITests, GetOutputCAPI) { int num_token_gen_outputs_to_check = 10; for (int i = 0; i < num_token_gen_outputs_to_check; i++) { - std::cout << "Output:" << i << "exp:" << expected_sampled_logits_token_gen[i] << " act:" << token_gen_logits[i*sample_size] << std:endl; + std::cout << "Output:" << i << "exp:" << expected_sampled_logits_token_gen[i] << " act:" << token_gen_logits[i*sample_size] << std::endl; EXPECT_NEAR(expected_sampled_logits_token_gen[i], token_gen_logits[i*sample_size], tolerance); } generator->GenerateNextToken(); From 5fa23a033302f852ba3ecdb9ca2468a7e20b22ff Mon Sep 17 00:00:00 2001 From: Abhishek Jindal Date: Thu, 8 Aug 2024 23:56:41 -0700 Subject: [PATCH 23/35] add missing code for compute logits --- test/c_api_tests.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/test/c_api_tests.cpp b/test/c_api_tests.cpp index 711f4b35f..7e5819389 100644 --- a/test/c_api_tests.cpp +++ b/test/c_api_tests.cpp @@ -192,8 +192,6 @@ TEST(CAPITests, GetOutputCAPI) { params->SetInputIDs(input_ids.data(), input_ids.size(), input_sequence_length, batch_size); auto generator = OgaGenerator::Create(*model, *params); - generator->ComputeLogits(); - auto prompt_logits = static_cast(generator->GetOutput("logits")->Data()); // check prompt // full logits has shape [2, 4, 1000]. Sample 1 for every 200 tokens and the expected sampled logits has shape [2, 4, 5] @@ -206,6 +204,8 @@ TEST(CAPITests, GetOutputCAPI) { -0.04699047, 0.17915794, 0.20838135, 0.10888482, -0.00277808, 0.2938929, -0.10538938, -0.00226692, 0.12050669, -0.10622668}; + generator->ComputeLogits(); + auto prompt_logits = static_cast(generator->GetOutput("logits")->Data()); int num_prompt_outputs_to_check = 40; int sample_size = 200; float tolerance = 0.001; @@ -214,14 +214,15 @@ TEST(CAPITests, GetOutputCAPI) { std::cout << "Output:" << i << "exp:" << expected_sampled_logits_prompt[i] << " act:" << prompt_logits[i*sample_size] << std::endl; EXPECT_NEAR(expected_sampled_logits_prompt[i], prompt_logits[i*sample_size], tolerance); } - generator->GenerateNextToken(); - auto token_gen_logits = static_cast(generator->GetOutput("logits")->Data()); // check for the 1st token generation // full logits has shape [2, 1, 1000]. Sample 1 for every 200 tokens and the expected sampled logits has shape [2, 1, 5] std::vector expected_sampled_logits_token_gen{0.03742531, -0.05752287, 0.14159015, 0.04210977, -0.1484456, 0.3041716, -0.08701379, -0.03778192, 0.07471392, -0.02049096}; + + generator->ComputeLogits(); + auto token_gen_logits = static_cast(generator->GetOutput("logits")->Data()); int num_token_gen_outputs_to_check = 10; for (int i = 0; i < num_token_gen_outputs_to_check; i++) { From c02241233271dd192357dd37d54f1c4bc4a0859f Mon Sep 17 00:00:00 2001 From: Abhishek Jindal Date: Fri, 9 Aug 2024 00:14:28 -0700 Subject: [PATCH 24/35] add more logs and conditions --- src/ort_genai_c.cpp | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/ort_genai_c.cpp b/src/ort_genai_c.cpp index 6caa2d679..adfd45659 100644 --- a/src/ort_genai_c.cpp +++ b/src/ort_genai_c.cpp @@ -224,6 +224,13 @@ OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* oga_generator if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_GPU && generator.model_->device_type_ == Generators::DeviceType::CUDA) { std::cout << "OrtValue Output on CUDA device" << std::endl; cudaMemcpy(ortvalue_clone->GetTensorMutableRawData(), ortvalue_output->GetTensorMutableRawData(), data_size, cudaMemcpyDeviceToHost); + } else if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_CPU) { + std::cout << "OrtValue Output on GPU and CPU device" << std::endl; + std::copy(static_cast(ortvalue_output->GetTensorMutableRawData()), + static_cast(ortvalue_output->GetTensorMutableRawData()) + data_size, + static_cast(ortvalue_clone->GetTensorMutableRawData())); + } else { + std::cout << "OrtValue Output on Unknown USE_CUDA device" << std::endl; } #elif USE_DML if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_GPU && generator.model_->device_type_ == Generators::DeviceType::DML) { @@ -239,6 +246,13 @@ OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* oga_generator gpu_resource.Get(), 0, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + } else if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_CPU) { + std::cout << "OrtValue Output on DML and CPU device" << std::endl; + std::copy(static_cast(ortvalue_output->GetTensorMutableRawData()), + static_cast(ortvalue_output->GetTensorMutableRawData()) + data_size, + static_cast(ortvalue_clone->GetTensorMutableRawData())); + } else { + std::cout << "OrtValue Output on Unknown USE_DML device" << std::endl; } #else if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_CPU) { @@ -247,9 +261,9 @@ OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* oga_generator static_cast(ortvalue_output->GetTensorMutableRawData()) + data_size, static_cast(ortvalue_clone->GetTensorMutableRawData())); } - std::cout << "Data type:" << type_info->GetElementType() << std::endl; #endif + std::cout << "Data type:" << type_info->GetElementType() << std::endl; auto tensor = std::make_shared(std::move(ortvalue_clone)); tensor->external_owner_ = tensor; *out = reinterpret_cast(tensor.get()); From df482ad9e791ef46b2f9d868eacfdfe8ac748a64 Mon Sep 17 00:00:00 2001 From: Abhishek Jindal Date: Fri, 9 Aug 2024 00:42:37 -0700 Subject: [PATCH 25/35] restructure --- src/ort_genai_c.cpp | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/src/ort_genai_c.cpp b/src/ort_genai_c.cpp index adfd45659..6f511d659 100644 --- a/src/ort_genai_c.cpp +++ b/src/ort_genai_c.cpp @@ -224,13 +224,6 @@ OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* oga_generator if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_GPU && generator.model_->device_type_ == Generators::DeviceType::CUDA) { std::cout << "OrtValue Output on CUDA device" << std::endl; cudaMemcpy(ortvalue_clone->GetTensorMutableRawData(), ortvalue_output->GetTensorMutableRawData(), data_size, cudaMemcpyDeviceToHost); - } else if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_CPU) { - std::cout << "OrtValue Output on GPU and CPU device" << std::endl; - std::copy(static_cast(ortvalue_output->GetTensorMutableRawData()), - static_cast(ortvalue_output->GetTensorMutableRawData()) + data_size, - static_cast(ortvalue_clone->GetTensorMutableRawData())); - } else { - std::cout << "OrtValue Output on Unknown USE_CUDA device" << std::endl; } #elif USE_DML if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_GPU && generator.model_->device_type_ == Generators::DeviceType::DML) { @@ -246,22 +239,15 @@ OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* oga_generator gpu_resource.Get(), 0, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); - } else if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_CPU) { - std::cout << "OrtValue Output on DML and CPU device" << std::endl; - std::copy(static_cast(ortvalue_output->GetTensorMutableRawData()), - static_cast(ortvalue_output->GetTensorMutableRawData()) + data_size, - static_cast(ortvalue_clone->GetTensorMutableRawData())); - } else { - std::cout << "OrtValue Output on Unknown USE_DML device" << std::endl; } -#else +#endif + if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_CPU) { std::cout << "OrtValue Output on CPU device" << std::endl; std::copy(static_cast(ortvalue_output->GetTensorMutableRawData()), static_cast(ortvalue_output->GetTensorMutableRawData()) + data_size, static_cast(ortvalue_clone->GetTensorMutableRawData())); } -#endif std::cout << "Data type:" << type_info->GetElementType() << std::endl; auto tensor = std::make_shared(std::move(ortvalue_clone)); From 5f63d009ef11ad1aa584a9413096b2c86f95a0cf Mon Sep 17 00:00:00 2001 From: Abhishek Jindal Date: Fri, 9 Aug 2024 11:24:14 -0700 Subject: [PATCH 26/35] use float and restructure code --- src/ort_genai_c.cpp | 13 +++++++------ test/c_api_tests.cpp | 22 ++++++++++++---------- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/src/ort_genai_c.cpp b/src/ort_genai_c.cpp index 6f511d659..de8cc048a 100644 --- a/src/ort_genai_c.cpp +++ b/src/ort_genai_c.cpp @@ -220,13 +220,14 @@ OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* oga_generator auto element_size = Generators::SizeOf(type_info->GetElementType()); auto data_size = type_info->GetElementCount() * element_size; std::cout << "Data count:" << type_info->GetElementCount() << " size:" << element_size << std::endl; -#if USE_CUDA if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_GPU && generator.model_->device_type_ == Generators::DeviceType::CUDA) { +#if USE_CUDA std::cout << "OrtValue Output on CUDA device" << std::endl; cudaMemcpy(ortvalue_clone->GetTensorMutableRawData(), ortvalue_output->GetTensorMutableRawData(), data_size, cudaMemcpyDeviceToHost); +#endif } -#elif USE_DML - if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_GPU && generator.model_->device_type_ == Generators::DeviceType::DML) { + else if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_GPU && generator.model_->device_type_ == Generators::DeviceType::DML) { +if USE_DML std::cout << "OrtValue Output on DML device" << std::endl; ComPtr gpu_resource; Ort::ThrowOnError(generator.model_->GetOrtDmlApi()->GetD3D12ResourceFromAllocation( @@ -239,16 +240,16 @@ OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* oga_generator gpu_resource.Get(), 0, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); - } #endif - - if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_CPU) { + } else if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_CPU) { std::cout << "OrtValue Output on CPU device" << std::endl; std::copy(static_cast(ortvalue_output->GetTensorMutableRawData()), static_cast(ortvalue_output->GetTensorMutableRawData()) + data_size, static_cast(ortvalue_clone->GetTensorMutableRawData())); } + // Add else statement for no recognized device found above + std::cout << "Data type:" << type_info->GetElementType() << std::endl; auto tensor = std::make_shared(std::move(ortvalue_clone)); tensor->external_owner_ = tensor; diff --git a/test/c_api_tests.cpp b/test/c_api_tests.cpp index 7e5819389..128b3af82 100644 --- a/test/c_api_tests.cpp +++ b/test/c_api_tests.cpp @@ -173,6 +173,7 @@ TEST(CAPITests, GreedySearchGptFp32CAPI) { } #endif +#if !USE_DML TEST(CAPITests, GetOutputCAPI) { std::vector input_ids_shape{2, 4}; std::vector input_ids{0, 0, 0, 52, 0, 0, 195, 731}; @@ -195,14 +196,14 @@ TEST(CAPITests, GetOutputCAPI) { // check prompt // full logits has shape [2, 4, 1000]. Sample 1 for every 200 tokens and the expected sampled logits has shape [2, 4, 5] - std::vector expected_sampled_logits_prompt{0.29694548, 0.00955007, 0.0430819, 0.10063869, 0.0437237, - 0.27329233, 0.00841076, -0.1060291, 0.11328877, 0.13369876, - 0.30323744, 0.0545997, 0.03894716, 0.11702324, 0.0410665, - -0.12675379, -0.04443946, 0.14492269, 0.03021223, -0.03212897, - 0.29694548, 0.00955007, 0.0430819, 0.10063869, 0.0437237, - 0.27329233, 0.00841076, -0.1060291, 0.11328877, 0.13369876, - -0.04699047, 0.17915794, 0.20838135, 0.10888482, -0.00277808, - 0.2938929, -0.10538938, -0.00226692, 0.12050669, -0.10622668}; + std::vector expected_sampled_logits_prompt{0.29694548f, 0.00955007f, 0.0430819f, 0.10063869f, 0.0437237f, + 0.27329233f, 0.00841076f, -0.1060291f, 0.11328877f, 0.13369876f, + 0.30323744f, 0.0545997f, 0.03894716f, 0.11702324f, 0.0410665f, + -0.12675379f, -0.04443946f, 0.14492269f, 0.03021223f, -0.03212897f, + 0.29694548f, 0.00955007f, 0.0430819f, 0.10063869f, 0.0437237f, + 0.27329233f, 0.00841076f, -0.1060291f, 0.11328877f, 0.13369876f, + -0.04699047f, 0.17915794f, 0.20838135f, 0.10888482f, -0.00277808f, + 0.2938929f, -0.10538938f, -0.00226692f, 0.12050669f, -0.10622668f}; generator->ComputeLogits(); auto prompt_logits = static_cast(generator->GetOutput("logits")->Data()); @@ -218,8 +219,8 @@ TEST(CAPITests, GetOutputCAPI) { // check for the 1st token generation // full logits has shape [2, 1, 1000]. Sample 1 for every 200 tokens and the expected sampled logits has shape [2, 1, 5] - std::vector expected_sampled_logits_token_gen{0.03742531, -0.05752287, 0.14159015, 0.04210977, -0.1484456, - 0.3041716, -0.08701379, -0.03778192, 0.07471392, -0.02049096}; + std::vector expected_sampled_logits_token_gen{0.03742531f, -0.05752287f, 0.14159015f, 0.04210977f, -0.1484456f, + 0.3041716f, -0.08701379f, -0.03778192f, 0.07471392f, -0.02049096f}; generator->ComputeLogits(); auto token_gen_logits = static_cast(generator->GetOutput("logits")->Data()); @@ -231,6 +232,7 @@ TEST(CAPITests, GetOutputCAPI) { } generator->GenerateNextToken(); } +#endif #if TEST_PHI2 From 533911dc451a168ba2a29bbf607e2e47b88d6be3 Mon Sep 17 00:00:00 2001 From: Abhishek Jindal Date: Fri, 9 Aug 2024 11:25:57 -0700 Subject: [PATCH 27/35] typo --- src/ort_genai_c.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/ort_genai_c.cpp b/src/ort_genai_c.cpp index de8cc048a..f31667d72 100644 --- a/src/ort_genai_c.cpp +++ b/src/ort_genai_c.cpp @@ -225,8 +225,7 @@ OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* oga_generator std::cout << "OrtValue Output on CUDA device" << std::endl; cudaMemcpy(ortvalue_clone->GetTensorMutableRawData(), ortvalue_output->GetTensorMutableRawData(), data_size, cudaMemcpyDeviceToHost); #endif - } - else if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_GPU && generator.model_->device_type_ == Generators::DeviceType::DML) { + } else if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_GPU && generator.model_->device_type_ == Generators::DeviceType::DML) { if USE_DML std::cout << "OrtValue Output on DML device" << std::endl; ComPtr gpu_resource; From 19e97748fe7a3271afde08a99eb7b3f23274dff4 Mon Sep 17 00:00:00 2001 From: Abhishek Jindal Date: Fri, 9 Aug 2024 12:00:55 -0700 Subject: [PATCH 28/35] fix typo --- src/ort_genai_c.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ort_genai_c.cpp b/src/ort_genai_c.cpp index f31667d72..025b1917d 100644 --- a/src/ort_genai_c.cpp +++ b/src/ort_genai_c.cpp @@ -226,7 +226,7 @@ OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* oga_generator cudaMemcpy(ortvalue_clone->GetTensorMutableRawData(), ortvalue_output->GetTensorMutableRawData(), data_size, cudaMemcpyDeviceToHost); #endif } else if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_GPU && generator.model_->device_type_ == Generators::DeviceType::DML) { -if USE_DML +#if USE_DML std::cout << "OrtValue Output on DML device" << std::endl; ComPtr gpu_resource; Ort::ThrowOnError(generator.model_->GetOrtDmlApi()->GetD3D12ResourceFromAllocation( From 175a5c50b949ff61e1455c5b9fe21d96f0defabb Mon Sep 17 00:00:00 2001 From: Abhishek Jindal Date: Mon, 12 Aug 2024 00:01:30 -0700 Subject: [PATCH 29/35] try more loggin --- src/ort_genai_c.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/ort_genai_c.cpp b/src/ort_genai_c.cpp index 025b1917d..8a2f4f0d7 100644 --- a/src/ort_genai_c.cpp +++ b/src/ort_genai_c.cpp @@ -249,6 +249,11 @@ OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* oga_generator // Add else statement for no recognized device found above + for (int i =0; i < 10; i++) { + std::cout << "ORTValue Output:" << i << "act:" << ortvalue_output[i*200] << std::endl; + std::cout << "Copied Output:" << i << "act:" << ortvalue_clone[i*200] << std::endl; + } + std::cout << "Data type:" << type_info->GetElementType() << std::endl; auto tensor = std::make_shared(std::move(ortvalue_clone)); tensor->external_owner_ = tensor; From 692fa3cd60e84bfb92faf76869f94875b43ff15e Mon Sep 17 00:00:00 2001 From: Abhishek Jindal Date: Mon, 12 Aug 2024 00:13:38 -0700 Subject: [PATCH 30/35] modify type --- src/ort_genai_c.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/ort_genai_c.cpp b/src/ort_genai_c.cpp index 8a2f4f0d7..1b16ce3a7 100644 --- a/src/ort_genai_c.cpp +++ b/src/ort_genai_c.cpp @@ -249,9 +249,11 @@ OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* oga_generator // Add else statement for no recognized device found above + ortvalue_output->GetTensorMutableData() + for (int i =0; i < 10; i++) { - std::cout << "ORTValue Output:" << i << "act:" << ortvalue_output[i*200] << std::endl; - std::cout << "Copied Output:" << i << "act:" << ortvalue_clone[i*200] << std::endl; + std::cout << "ORTValue Output:" << i << "act:" << static_cast(ortvalue_output->GetTensorMutableData())[i*200] << std::endl; + std::cout << "Copied Output:" << i << "act:" << static_cast(ortvalue_clone->GetTensorMutableData())[i*200] << std::endl; } std::cout << "Data type:" << type_info->GetElementType() << std::endl; From ac841571bda3f36907d01bfcf4e1a45195c05b57 Mon Sep 17 00:00:00 2001 From: Abhishek Jindal Date: Mon, 12 Aug 2024 10:54:24 -0700 Subject: [PATCH 31/35] logits separate ptr --- src/ort_genai_c.cpp | 12 ------------ test/c_api_tests.cpp | 10 +++++----- 2 files changed, 5 insertions(+), 17 deletions(-) diff --git a/src/ort_genai_c.cpp b/src/ort_genai_c.cpp index 1b16ce3a7..96e54cdb9 100644 --- a/src/ort_genai_c.cpp +++ b/src/ort_genai_c.cpp @@ -219,15 +219,12 @@ OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* oga_generator // Copy data to ortvalue_clone auto element_size = Generators::SizeOf(type_info->GetElementType()); auto data_size = type_info->GetElementCount() * element_size; - std::cout << "Data count:" << type_info->GetElementCount() << " size:" << element_size << std::endl; if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_GPU && generator.model_->device_type_ == Generators::DeviceType::CUDA) { #if USE_CUDA - std::cout << "OrtValue Output on CUDA device" << std::endl; cudaMemcpy(ortvalue_clone->GetTensorMutableRawData(), ortvalue_output->GetTensorMutableRawData(), data_size, cudaMemcpyDeviceToHost); #endif } else if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_GPU && generator.model_->device_type_ == Generators::DeviceType::DML) { #if USE_DML - std::cout << "OrtValue Output on DML device" << std::endl; ComPtr gpu_resource; Ort::ThrowOnError(generator.model_->GetOrtDmlApi()->GetD3D12ResourceFromAllocation( generator.model_->allocator_device_, @@ -241,7 +238,6 @@ OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* oga_generator D3D12_RESOURCE_STATE_UNORDERED_ACCESS); #endif } else if (ortvalue_output->GetTensorMemoryInfo().GetDeviceType() == OrtMemoryInfoDeviceType_CPU) { - std::cout << "OrtValue Output on CPU device" << std::endl; std::copy(static_cast(ortvalue_output->GetTensorMutableRawData()), static_cast(ortvalue_output->GetTensorMutableRawData()) + data_size, static_cast(ortvalue_clone->GetTensorMutableRawData())); @@ -249,14 +245,6 @@ OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* oga_generator // Add else statement for no recognized device found above - ortvalue_output->GetTensorMutableData() - - for (int i =0; i < 10; i++) { - std::cout << "ORTValue Output:" << i << "act:" << static_cast(ortvalue_output->GetTensorMutableData())[i*200] << std::endl; - std::cout << "Copied Output:" << i << "act:" << static_cast(ortvalue_clone->GetTensorMutableData())[i*200] << std::endl; - } - - std::cout << "Data type:" << type_info->GetElementType() << std::endl; auto tensor = std::make_shared(std::move(ortvalue_clone)); tensor->external_owner_ = tensor; *out = reinterpret_cast(tensor.get()); diff --git a/test/c_api_tests.cpp b/test/c_api_tests.cpp index 128b3af82..0e20653a7 100644 --- a/test/c_api_tests.cpp +++ b/test/c_api_tests.cpp @@ -206,28 +206,28 @@ TEST(CAPITests, GetOutputCAPI) { 0.2938929f, -0.10538938f, -0.00226692f, 0.12050669f, -0.10622668f}; generator->ComputeLogits(); - auto prompt_logits = static_cast(generator->GetOutput("logits")->Data()); + auto prompt_logits_ptr = generator->GetOutput("logits"); + auto prompt_logits = static_cast(prompt_logits_ptr->Data()); int num_prompt_outputs_to_check = 40; int sample_size = 200; float tolerance = 0.001; // Verify outputs match expected outputs for (int i = 0; i < num_prompt_outputs_to_check; i++) { - std::cout << "Output:" << i << "exp:" << expected_sampled_logits_prompt[i] << " act:" << prompt_logits[i*sample_size] << std::endl; EXPECT_NEAR(expected_sampled_logits_prompt[i], prompt_logits[i*sample_size], tolerance); } - generator->GenerateNextToken(); + generator->GenerateNextToken(); // check for the 1st token generation // full logits has shape [2, 1, 1000]. Sample 1 for every 200 tokens and the expected sampled logits has shape [2, 1, 5] std::vector expected_sampled_logits_token_gen{0.03742531f, -0.05752287f, 0.14159015f, 0.04210977f, -0.1484456f, 0.3041716f, -0.08701379f, -0.03778192f, 0.07471392f, -0.02049096f}; generator->ComputeLogits(); - auto token_gen_logits = static_cast(generator->GetOutput("logits")->Data()); + auto token_gen_logits_ptr = generator->GetOutput("logits"); + auto token_gen_logits = static_cast(token_gen_logits_ptr->Data()); int num_token_gen_outputs_to_check = 10; for (int i = 0; i < num_token_gen_outputs_to_check; i++) { - std::cout << "Output:" << i << "exp:" << expected_sampled_logits_token_gen[i] << " act:" << token_gen_logits[i*sample_size] << std::endl; EXPECT_NEAR(expected_sampled_logits_token_gen[i], token_gen_logits[i*sample_size], tolerance); } generator->GenerateNextToken(); From 38d9e6829e3e89c62cdc3f10b91ae8c752101088 Mon Sep 17 00:00:00 2001 From: Abhishek Jindal Date: Mon, 12 Aug 2024 12:13:03 -0700 Subject: [PATCH 32/35] convert double to float --- test/c_api_tests.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/c_api_tests.cpp b/test/c_api_tests.cpp index 0e20653a7..c0fb71066 100644 --- a/test/c_api_tests.cpp +++ b/test/c_api_tests.cpp @@ -210,7 +210,7 @@ TEST(CAPITests, GetOutputCAPI) { auto prompt_logits = static_cast(prompt_logits_ptr->Data()); int num_prompt_outputs_to_check = 40; int sample_size = 200; - float tolerance = 0.001; + float tolerance = 0.001f; // Verify outputs match expected outputs for (int i = 0; i < num_prompt_outputs_to_check; i++) { EXPECT_NEAR(expected_sampled_logits_prompt[i], prompt_logits[i*sample_size], tolerance); From 5f8d9bf9678d1c4de4b821bb1395c21a7267a22e Mon Sep 17 00:00:00 2001 From: Abhishek Jindal Date: Mon, 12 Aug 2024 23:00:15 -0700 Subject: [PATCH 33/35] add info about the fn --- src/ort_genai_c.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/ort_genai_c.h b/src/ort_genai_c.h index 0e1f12aae..65a0fdd63 100644 --- a/src/ort_genai_c.h +++ b/src/ort_genai_c.h @@ -224,6 +224,11 @@ OGA_EXPORT bool OGA_API_CALL OgaGenerator_IsDone(const OgaGenerator* generator); OGA_EXPORT OgaResult* OGA_API_CALL OgaGenerator_ComputeLogits(OgaGenerator* generator); OGA_EXPORT OgaResult* OGA_API_CALL OgaGenerator_GenerateNextToken(OgaGenerator* generator); +/* + * \brief Runs GetOutput on the name provided and copies the data in another tensor and creates OgaTensor for it + * \param[in] generator The generator to run the GetOutput on the name provided and the out pointer to store the output + * \return OgaResult containing the error message if the computation failed. + */ OGA_EXPORT OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* oga_generator, const char* name, OgaTensor** out); /* From cf2d2e7624f507b36be8f81a41f8098a2f590c26 Mon Sep 17 00:00:00 2001 From: Abhishek Jindal Date: Thu, 15 Aug 2024 12:56:01 -0700 Subject: [PATCH 34/35] updata comments --- src/ort_genai_c.cpp | 4 ++-- src/ort_genai_c.h | 7 +++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/ort_genai_c.cpp b/src/ort_genai_c.cpp index 96e54cdb9..a40807845 100644 --- a/src/ort_genai_c.cpp +++ b/src/ort_genai_c.cpp @@ -241,10 +241,10 @@ OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* oga_generator std::copy(static_cast(ortvalue_output->GetTensorMutableRawData()), static_cast(ortvalue_output->GetTensorMutableRawData()) + data_size, static_cast(ortvalue_clone->GetTensorMutableRawData())); + } else { + throw std::runtime_error("Unsupported Device type: " + ortvalue_output->GetTensorMemoryInfo().GetDeviceType()); } - // Add else statement for no recognized device found above - auto tensor = std::make_shared(std::move(ortvalue_clone)); tensor->external_owner_ = tensor; *out = reinterpret_cast(tensor.get()); diff --git a/src/ort_genai_c.h b/src/ort_genai_c.h index 65a0fdd63..76ccc7993 100644 --- a/src/ort_genai_c.h +++ b/src/ort_genai_c.h @@ -225,7 +225,7 @@ OGA_EXPORT OgaResult* OGA_API_CALL OgaGenerator_ComputeLogits(OgaGenerator* gene OGA_EXPORT OgaResult* OGA_API_CALL OgaGenerator_GenerateNextToken(OgaGenerator* generator); /* - * \brief Runs GetOutput on the name provided and copies the data in another tensor and creates OgaTensor for it + * \brief Returns a copy of the model output identified by the given name as an OgaTensor on CPU * \param[in] generator The generator to run the GetOutput on the name provided and the out pointer to store the output * \return OgaResult containing the error message if the computation failed. */ @@ -280,10 +280,9 @@ OGA_EXPORT void OGA_API_CALL OgaDestroyTokenizerStream(OgaTokenizerStream*); */ OGA_EXPORT OgaResult* OGA_API_CALL OgaTokenizerStreamDecode(OgaTokenizerStream*, int32_t token, const char** out); -/* Create an OgaTensor from a user owned buffer. The OgaTensor does not own the memory (as it has no way to free it) so - * the 'data' parameter must be valid for the lifetime of the OgaTensor. +/* Create an OgaTensor from a user owned buffer. * - * \param[in] data User supplied memory pointer, must remain valid for lifetime of the OgaTensor + * \param[in] data Pointer to store the data * \param[in] shape_dims Pointer to array of int64_t values that define the tensor shape, example [1 20 30] would be equivalent to a C array of [1][20][30] * \param[in] shape_dims_count Count of elements in the shape_dims array * \param[in] element_type The data type that 'data' points to. From 2887997f6a897dbea3a24e280e148da5fbb7e9d1 Mon Sep 17 00:00:00 2001 From: Abhishek Jindal Date: Thu, 15 Aug 2024 13:08:01 -0700 Subject: [PATCH 35/35] test for DML --- src/ort_genai_c.h | 8 +++++--- test/c_api_tests.cpp | 2 -- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ort_genai_c.h b/src/ort_genai_c.h index 76ccc7993..7b1f084c2 100644 --- a/src/ort_genai_c.h +++ b/src/ort_genai_c.h @@ -225,7 +225,8 @@ OGA_EXPORT OgaResult* OGA_API_CALL OgaGenerator_ComputeLogits(OgaGenerator* gene OGA_EXPORT OgaResult* OGA_API_CALL OgaGenerator_GenerateNextToken(OgaGenerator* generator); /* - * \brief Returns a copy of the model output identified by the given name as an OgaTensor on CPU + * \brief Returns a copy of the model output identified by the given name as an OgaTensor on CPU. The buffer is owned by returned OgaTensor + * and will be released when the OgaTensor is destroyed * \param[in] generator The generator to run the GetOutput on the name provided and the out pointer to store the output * \return OgaResult containing the error message if the computation failed. */ @@ -280,9 +281,10 @@ OGA_EXPORT void OGA_API_CALL OgaDestroyTokenizerStream(OgaTokenizerStream*); */ OGA_EXPORT OgaResult* OGA_API_CALL OgaTokenizerStreamDecode(OgaTokenizerStream*, int32_t token, const char** out); -/* Create an OgaTensor from a user owned buffer. +/* Create an OgaTensor from a user owned buffer. The OgaTensor does not own the memory (as it has no way to free it) so + * the 'data' parameter must be valid for the lifetime of the OgaTensor. * - * \param[in] data Pointer to store the data + * \param[in] data User supplied memory pointer, must remain valid for lifetime of the OgaTensor * \param[in] shape_dims Pointer to array of int64_t values that define the tensor shape, example [1 20 30] would be equivalent to a C array of [1][20][30] * \param[in] shape_dims_count Count of elements in the shape_dims array * \param[in] element_type The data type that 'data' points to. diff --git a/test/c_api_tests.cpp b/test/c_api_tests.cpp index c0fb71066..8e8cc13cb 100644 --- a/test/c_api_tests.cpp +++ b/test/c_api_tests.cpp @@ -173,7 +173,6 @@ TEST(CAPITests, GreedySearchGptFp32CAPI) { } #endif -#if !USE_DML TEST(CAPITests, GetOutputCAPI) { std::vector input_ids_shape{2, 4}; std::vector input_ids{0, 0, 0, 52, 0, 0, 195, 731}; @@ -232,7 +231,6 @@ TEST(CAPITests, GetOutputCAPI) { } generator->GenerateNextToken(); } -#endif #if TEST_PHI2