VLM: add get_tokenizer() (openvinotoolkit#977)

Co-authored-by: Ilya Lavrenov <[email protected]>
sungeunk · Oct 15, 2024 · bc73e62 · bc73e62
1 parent 2d144b1
commit bc73e62
Show file tree

Hide file tree

Showing 4 changed files with 14 additions and 0 deletions.
diff --git a/src/cpp/include/openvino/genai/visual_language/pipeline.hpp b/src/cpp/include/openvino/genai/visual_language/pipeline.hpp
@@ -89,6 +89,9 @@ class OPENVINO_GENAI_EXPORTS VLMPipeline {
     /// model chat_template.
     /// @param new_template A new template to override with.
     void set_chat_template(const std::string& new_template);
+    /// @brief Get a Tokenizer used to tokenize input and detokenize
+    /// output.
+    ov::genai::Tokenizer get_tokenizer() const;
     /// @brief Extract GenerationConfig used to get default values.
     /// @return Default values used.
     GenerationConfig get_generation_config() const;

diff --git a/src/cpp/src/visual_language/pipeline.cpp b/src/cpp/src/visual_language/pipeline.cpp
@@ -527,6 +527,10 @@ class ov::genai::VLMPipeline::VLMPipelineImpl {
 
     void finish_chat() {m_is_chat_conversation = false;}
 
+    Tokenizer get_tokenizer() const {
+        return m_tokenizer;
+    }
+
     void set_chat_template(const std::string& new_template) {
         m_tokenizer.set_chat_template(new_template);
     }
@@ -773,6 +777,10 @@ void VLMPipeline::set_chat_template(const std::string& new_template) {
     m_pimpl->set_chat_template(new_template);
 }
 
+Tokenizer VLMPipeline::get_tokenizer() const {
+    return m_pimpl->get_tokenizer();
+}
+
 GenerationConfig VLMPipeline::get_generation_config() const {
     return m_pimpl->get_generation_config();
 }

diff --git a/src/python/py_vlm_pipeline.cpp b/src/python/py_vlm_pipeline.cpp
@@ -120,6 +120,7 @@ void init_vlm_pipeline(py::module_& m) {
 
         .def("start_chat", &ov::genai::VLMPipeline::start_chat, py::arg("system_message") = "")
         .def("finish_chat", &ov::genai::VLMPipeline::finish_chat) 
+        .def("get_tokenizer", &ov::genai::VLMPipeline::get_tokenizer)
         .def("get_generation_config", &ov::genai::VLMPipeline::get_generation_config)
         .def("set_generation_config", &ov::genai::VLMPipeline::set_generation_config)
         .def(

diff --git a/tests/python_tests/test_vlm_api.py b/tests/python_tests/test_vlm_api.py
@@ -105,6 +105,8 @@ def streamer(word: str) -> bool:
 
             pipe.finish_chat()
             gc.collect()
+    tokenizer = pipe.get_tokenizer()
+    tokenizer.encode("")
     del pipe
     gc.collect()