Merge branch 'main' into mkulakow/llm_model_context_length

openvinotoolkit · Nov 27, 2024 · d47ae16 · d47ae16
2 parents 152eb3c + c880550
commit d47ae16
Show file tree

Hide file tree

Showing 66 changed files with 1,148 additions and 278 deletions.
diff --git a/.bazelrc b/.bazelrc
@@ -122,7 +122,6 @@ build:windows --define=use_fast_cpp_protos=true
 build:windows --define=allow_oversize_protos=true
 
 build:windows --spawn_strategy=standalone
-build:windows -c opt
 
 # Make Bazel print out all options from rc files.
 build:windows --announce_rc
@@ -288,5 +287,5 @@ build:coverity --disk_cache=
 
 # Windows config default flags
 build:windows --define=CLOUD_DISABLE=1
-build:windows --define=PYTHON_DISABLE=1
+build:windows --define=PYTHON_DISABLE=0
 build:windows --define=MEDIAPIPE_DISABLE=0
diff --git a/BUILD.bazel b/BUILD.bazel
@@ -77,7 +77,7 @@ cc_library(
             })
          + select({
             "//:not_disable_python": [
-                "@python3_linux//:python3-lib",
+                "//third_party:python3",
                 "@pybind11//:pybind11_embed",
                 ],
             "//:disable_python": []

diff --git a/WORKSPACE b/WORKSPACE
@@ -33,6 +33,14 @@ bazel_skylib_workspace()
 load("@bazel_skylib//lib:versions.bzl", "versions")
 versions.check(minimum_bazel_version = "6.0.0")
 
+http_archive(
+    name = "zlib",
+    build_file = "@mediapipe//third_party:zlib.BUILD",
+    sha256 = "9a93b2b7dfdac77ceba5a558a580e74667dd6fede4585b91eefb60f03b72df23",
+    strip_prefix = "zlib-1.3.1",
+    url = "http://zlib.net/fossils/zlib-1.3.1.tar.gz",
+)
+
 # RapidJSON
 # Must be defined earlier than tensorflow_serving because TFS is using older rapidjson
 # Version must match openvino.genai -> jinja2cpp -> rapidjson
@@ -235,12 +243,21 @@ new_local_repository(
 load("@//third_party/python:python_repo.bzl", "python_repository")
 python_repository(name = "_python3-linux")
 
+load("@//third_party/python:python_repo_win.bzl", "python_repository")
+python_repository(name = "_python3-windows")
+
 new_local_repository(
     name = "python3_linux",
     path = "/usr",
     build_file = "@_python3-linux//:BUILD"
 )
 
+new_local_repository(
+    name = "python3_windows",
+    path = "C:\\opt\\",
+    build_file = "@_python3-windows//:BUILD"
+)
+
 http_archive(
   name = "pybind11_bazel",
   strip_prefix = "pybind11_bazel-b162c7c88a253e3f6b673df0c621aca27596ce6b",

diff --git a/build_windows.bat b/build_windows.bat
@@ -22,7 +22,8 @@ set "bazelStartupCmd=--output_user_root=%BAZEL_SHORT_PATH%"
 
 set "buildCommand=bazel %bazelStartupCmd% build --config=windows --jobs=%NUMBER_OF_PROCESSORS% --verbose_failures //src:ovms 2>&1 | tee win_build.log"
 set "buildTestCommand=bazel %bazelStartupCmd% build --config=windows --jobs=%NUMBER_OF_PROCESSORS% --verbose_failures //src:ovms_test 2>&1 | tee win_build_test.log"
-set "runTest=%cd%\bazel-bin\src\ovms_test.exe --gtest_filter=-OvmsConfigTest 2>&1 | tee win_full_test.log"
+set "changeConfigsCmd=windows_change_test_configs.py"
+set "runTest=%cd%\bazel-bin\src\ovms_test.exe --gtest_filter=* 2>&1 | tee win_full_test.log"
 
 :: Setting PATH environment variable based on default windows node settings: Added ovms_windows specific python settings and c:/opt and removed unused Nvidia and OCL specific tools.
 :: When changing the values here you can print the node default PATH value and base your changes on it.
@@ -49,6 +50,9 @@ set > %envPath%
 :: Start bazel build test
 %buildTestCommand%
 
+:: Change tests configs to windows paths
+%changeConfigsCmd%
+
 :: Start unit test
 %runTest%
 

diff --git a/ci/cppclean.sh b/ci/cppclean.sh
@@ -39,10 +39,10 @@ errors=""
 if [ ${NO_WARNINGS_FORWARD} -gt 9 ]; then
     errors+="Failed due to not using forward declarations where possible: ${NO_WARNINGS_FORWARD}"$'\n'
 fi
-if [ ${NO_WARNINGS_DIRECT} -gt 20 ]; then
+if [ ${NO_WARNINGS_DIRECT} -gt 21 ]; then
     errors+="Failed probably due to not using static keyword with functions definitions: ${NO_WARNINGS_DIRECT}"$'\n'
 fi
-if [ ${NO_WARNINGS_NOTUSED} -gt 5 ]; then
+if [ ${NO_WARNINGS_NOTUSED} -gt 6 ]; then
     errors+="Failed probably due to unnecessary forward includes: ${NO_WARNINGS_NOTUSED}"$'\n'
 fi
 if [ ${NO_WARNINGS_TEST_FORWARD} -gt 1 ]; then
@@ -54,7 +54,7 @@ fi
 if [ ${NO_WARNINGS_TEST_NOTUSED} -gt 0 ]; then
     errors+="Failed probably due to unnecessary forward includes: ${NO_WARNINGS_TEST_NOTUSED}"$'\n'
 fi
-if [ ${NO_WARNINGS} -gt  193 ]; then
+if [ ${NO_WARNINGS} -gt  194 ]; then
     errors+="Failed due to higher than allowed number of issues in code: ${NO_WARNINGS}"$'\n'
 fi
 if [ ${NO_WARNINGS_TEST} -gt  52 ]; then

diff --git a/ci/loadWin.groovy b/ci/loadWin.groovy
@@ -22,13 +22,13 @@ def check_tests(){
 
     status = bat(returnStatus: true, script: 'grep "       OK " win_test.log')
     if (status != 0) {
-            error "Error: Windows run test failed ${status}. Check win_test.log for details."
+            error "Error: Windows run test failed ${status}. Expecting passed tests and no passed tests detected. Check win_test.log for details."
     }
 
-    // TODO Windows: Currently some tests fail change to no fail when fixed.
     status = bat(returnStatus: true, script: 'grep "  FAILED  " win_test.log')
-    if (status != 0) {
-            error "Error: Windows run test failed ${status}. Check win_test.log for details."
+    if (status == 0) {
+            def failed = bat(returnStatus: false, returnStdout: true, script: 'grep "  FAILED  " win_test.log | wc -l')
+            error "Error: Windows run test failed ${status}. ${failed} failed tests . Check win_test.log for details."
     } else {
         echo "Run test successful."
     }

diff --git a/client/cpp/kserve-api/README.md b/client/cpp/kserve-api/README.md
@@ -388,7 +388,7 @@ Usage:
 
 ```Bash
 ./http_model_metadata --http_port 8000 --http_address localhost
-{"name":"dummy","versions":["1"],"platform":"OpenVINO","inputs":[{"name":"b","datatype":"FP32","shape":[1,10]}],"outputs":[{"name":"a","datatype":"FP32","shape":[1,10]}]}
+{"name":"dummy","versions":["1"],"platform":"OpenVINO","inputs":[{"name":"b","datatype":"FP32","shape":[1,10]}],"outputs":[{"name":"a","datatype":"FP32","shape":[1,10]}],"rt_info":{"model_info":{"precision":"FP16","resolution":{"height":"200","width":"300"}}}}
 ```
 ### Run the Client to perform inference
 
@@ -644,4 +644,4 @@ Number of requests: 10
 Total processing time: 178 ms
 Latency: 42.5398 ms
 Requests per second: 23.5074
-```
+```
diff --git a/client/python/kserve-api/samples/README.md b/client/python/kserve-api/samples/README.md
@@ -955,4 +955,4 @@ imagenet top results in a single batch:
 imagenet top results in a single batch:
          0 zebra 340 ; Correct match.
 Classification accuracy: 100.00
-```
+```
diff --git a/common_settings.bzl b/common_settings.bzl
@@ -80,11 +80,18 @@ def create_config_settings():
         negate = ":fuzzer_build",
     )
 
+    # is windows or mediapipe is disabled (no_http dependency)
     selects.config_setting_group(
         name = "is_windows_or_mediapipe_is_disabled_no_http",
         match_any = ["//src:windows", "//:disable_mediapipe"]
     )
 
+    # is windows or python is disabled"(no llm dependency)
+    selects.config_setting_group(
+        name = "is_windows_or_python_is_disabled_no_llm",
+        match_any = ["//src:windows", "//:disable_python"]
+    )
+
 
 ###############################
 # compilation settings
@@ -165,6 +172,6 @@ COMMON_FUZZER_LINKOPTS = [
 ]
 COMMON_LOCAL_DEFINES = ["SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_TRACE"]
 PYBIND_DEPS = [
-    "@python3_linux//:python3-lib",
+    "//third_party:python3",
     "@pybind11//:pybind11_embed",
 ]
diff --git a/demos/continuous_batching/accuracy/README.md b/demos/continuous_batching/accuracy/README.md
@@ -10,12 +10,13 @@ It reports end to end quality of served model from the client application point
 Install the framework via:
 ```bash
 export PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
-pip3 install lm_eval[api]
+pip3 install lm_eval[api] langdetect immutabledict
 ```
 
 ## Exporting the models and starting the model server
 ```bash
 git clone https://github.com/openvinotoolkit/model_server.git
+cd model_server
 pip3 install -U -r demos/common/export_models/requirements.txt
 mkdir models 
 python demos/common/export_models/export_model.py text_generation --source_model meta-llama/Meta-Llama-3-8B-Instruct --weight-format fp16 --kv_cache_precision u8 --config_file_path models/config.json --model_repository_path models

diff --git a/demos/continuous_batching/scaling/README.md b/demos/continuous_batching/scaling/README.md
@@ -11,17 +11,17 @@ It deploys two instances of the model server allocated to difference CPU sockets
 ## Start the Model Server instances
 
 Let's assume we have two CPU sockets server with two NUMA nodes. 
-```
+```bash
 lscpu | grep NUMA
 NUMA node(s):                       2
 NUMA node0 CPU(s):                  0-31,64-95
 NUMA node1 CPU(s):                  32-63,96-127
 ```
 Following the prework from [demo](../README.md) start the instances like below:
-```
-docker run --cpuset-cpus $(lscpu | grep node0 | cut -d: -f2)  -d --rm -p 8003:8003 -v $(pwd)/:/workspace:ro openvino/model_server:latest --rest_port 8003 --config_path /workspace/config.json
+```bash
+docker run --cpuset-cpus $(lscpu | grep node0 | cut -d: -f2)  -d --rm -p 8003:8003 -v $(pwd)/models:/workspace:ro openvino/model_server:latest --rest_port 8003 --config_path /workspace/config.json
 
-docker run --cpuset-cpus $(lscpu | grep node1 | cut -d: -f2)  -d --rm -p 8004:8004 -v $(pwd)/:/workspace:ro openvino/model_server:latest --rest_port 8004 --config_path /workspace/config.json
+docker run --cpuset-cpus $(lscpu | grep node1 | cut -d: -f2)  -d --rm -p 8004:8004 -v $(pwd)/models:/workspace:ro openvino/model_server:latest --rest_port 8004 --config_path /workspace/config.json
 ```
 Confirm in logs if the containers loaded the models successfully.
 
@@ -42,14 +42,14 @@ stream {
 }
 ```
 Start the Nginx container with: 
-```
+```bash
 docker run -v $(pwd)/nginx.conf:/etc/nginx/nginx.conf:ro -d --net=host -p 80:80 nginx
 ```
 
 ## Testing the scalability
 
 Start benchmarking script like in [demo](../README.md), pointing to the load balancer port and host.
-```
+```bash
 python benchmark_serving.py --host localhost --port 80 --endpoint /v3/chat/completions --backend openai-chat --model meta-llama/Meta-Llama-3-8B-Instruct --dataset-path ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts 2000 --request-rate inf --save-result --seed 10
 Initial test run completed. Starting main benchmark run...
 Traffic request rate: inf

diff --git a/demos/embeddings/README.md b/demos/embeddings/README.md
@@ -8,6 +8,12 @@ Text generation use case is exposed via OpenAI API `embeddings` endpoint.
 Here, the original Pytorch LLM model and the tokenizer will be converted to IR format and optionally quantized.
 That ensures faster initialization time, better performance and lower memory consumption.
 
+Clone model server repository:
+```bash
+git clone https://github.com/openvinotoolkit/model_server.git
+cd model_server
+```
+
 Install python dependencies for the conversion script:
 ```bash
 pushd .

diff --git a/demos/rerank/README.md b/demos/rerank/README.md
@@ -5,6 +5,12 @@
 Here, the original Pytorch LLM model and the tokenizer will be converted to IR format and optionally quantized.
 That ensures faster initialization time, better performance and lower memory consumption.
 
+Clone model server repository:
+```bash
+git clone https://github.com/openvinotoolkit/model_server.git
+cd model_server
+```
+
 Install python dependencies for the conversion script:
 ```bash
 pip3 install -r demos/common/export_models/requirements.txt
@@ -46,7 +52,7 @@ docker run -d --rm -p 8000:8000 -v $(pwd)/models:/workspace:ro openvino/model_se
 
 Readiness of the model can be reported with a simple curl command. 
 ```bash
-curl -i http://localhost:8000/v3/models/BAAI%2Fbge-reranker-large/ready
+curl -i http://localhost:8000/v2/models/BAAI%2Fbge-reranker-large/ready
 HTTP/1.1 200 OK
 Content-Type: application/json
 Date: Sat, 09 Nov 2024 23:19:27 GMT
@@ -57,7 +63,7 @@ Content-Length: 0
 
 
 ```bash
-curl http://localhost:8000/v2/rerank  -H "Content-Type: application/json" \
+curl http://localhost:8000/v3/rerank  -H "Content-Type: application/json" \
 -d '{ "model": "BAAI/bge-reranker-large", "query": "welcome", "documents":["good morning","farewell"]}' | jq .
 ```
 ```json

diff --git a/docs/windows_binary_guide.md b/docs/windows_binary_guide.md
@@ -32,7 +32,7 @@ md c:\opt
 Visual Studio 2019 with C++ - https://visualstudio.microsoft.com/downloads/
 
 ## PYTHON: https://www.python.org/ftp/python/3.9.0/python-3.9.0-amd64.exe in C:\opt\Python39
-Python3. (Python 3.11.9 is tested)
+Python3.9
 ```
 pip install numpy==1.23
 ```
@@ -56,7 +56,7 @@ Open cmd.exe in c:\opt
 md test\model\1
 C:\opt\intel\openvino_2024\setupvars.bat
 C:\opt\opencv\build\setup_vars_opencv4.cmd
-xcopy /r /s /e /Y ovms.exe c:\opt\test
+xcopy /r /Y ovms.exe c:\opt\test
 cd c:\opt\test
 wget https://www.kaggle.com/api/v1/models/tensorflow/faster-rcnn-resnet-v1/tensorFlow2/faster-rcnn-resnet50-v1-640x640/1/download -O 1.tar.gz
 tar xzf 1.tar.gz -C model\1

diff --git a/docs/windows_developer_guide.md b/docs/windows_developer_guide.md
@@ -30,7 +30,7 @@ md c:\opt
 Visual Studio 2019 with C++ - https://visualstudio.microsoft.com/downloads/
 
 ## PYTHON: https://www.python.org/ftp/python/3.9.0/python-3.9.0-amd64.exe in C:\opt\Python39
-Python3. (Python 3.11.9 is tested)
+Python3.9
 pip install numpy==1.23
 make sure you install numpy for the python version you pass as build argument
 make sure default "python --version" gets you 3.9
@@ -56,6 +56,14 @@ nvm use 22.9.0
 npm cache clean --force
 ```
 
+# Building without proxy
+Please set the proxy setting for windows for in environment variables when building behind proxy
+```
+set HTTP_PROXY=
+set HTTPS_PROXY=
+```
+Also remove proxy from your .gitconfig
+
 If you want to compile without proxy, npm proxy needs to be reset:
 ```
 set http_proxy=
@@ -66,9 +74,21 @@ npm i --global yarn
 yarn
 ```
 
+## Building with proxy
+Please set the proxy setting for windows for in environment variables when building behind proxy
+```
+set HTTP_PROXY=my.proxy.com:123
+set HTTPS_PROXY=my.proxy.com:122
+```
+
 ## OPENCV install to - "C:\\opt\\opencv\\"
 https://github.com/opencv/opencv/releases/download/4.10.0/opencv-4.10.0-windows.exe
 
+# OPENCV contrib for optflow
+cd c:\opt
+git clone https://github.com/opencv/opencv_contrib.git
+xcopy /s /r /Y opencv_contrib\modules\optflow\include\opencv2\* C:\opt\opencv\build\include\opencv2
+
 ## WGET
 https://eternallybored.org/misc/wget/1.21.4/64/wget.exe download to c:\opt
 Add c:\opt to system env PATH
@@ -97,7 +117,7 @@ cd model_server
 
 ## COMPILE
 ```
-bazel build --config=windows --jobs=8 --subcommands --repo_env PYTHON_BIN_PATH=C:/opt/Python39/python.exe --verbose_failures --define CLOUD_DISABLE=1 --define MEDIAPIPE_DISABLE=1 --define PYTHON_DISABLE=1 //src:ovms > compilation.log 2>&1
+bazel build --config=windows --jobs=8 --subcommands --repo_env PYTHON_BIN_PATH=C:/opt/Python39/python.exe --verbose_failures --define CLOUD_DISABLE=1 --define MEDIAPIPE_DISABLE=0 --define PYTHON_DISABLE=0 //src:ovms > compilation.log 2>&1
 ```
 
 ## To run ovms in developer command line
@@ -113,7 +133,7 @@ bazel-out\x64_windows-opt\bin\src\ovms.exe --help
 Open cmd.exe in c:\opt
 ```
 md test\model\1
-xcopy /r /s /e /Y C:\git\model_server\bazel-out\x64_windows-opt\bin\src\ovms.exe c:\opt\test
+xcopy /r /Y C:\git\model_server\bazel-out\x64_windows-opt\bin\src\ovms.exe c:\opt\test
 c:\opt\intel\openvino_2024\setupvars.bat
 C:\opt\opencv\build\setup_vars_opencv4.cmd
 cd c:\opt\test

diff --git a/spelling-whitelist.txt b/spelling-whitelist.txt
@@ -9,14 +9,15 @@ src/shape.cpp:438: strIn
 src/shape.cpp:488: strIn
 src/shape.cpp:507: strIn
 src/shape.hpp:121: strIn
-src/test/modelconfig_test.cpp:472: OptionA
-src/test/modelconfig_test.cpp:478: OptionA
-src/test/modelconfig_test.cpp:484: OptionA
-src/test/modelconfig_test.cpp:490: OptionA
-src/test/modelconfig_test.cpp:496: OptionA
-src/test/modelconfig_test.cpp:502: OptionA
+src/test/modelconfig_test.cpp:473: OptionA
+src/test/modelconfig_test.cpp:479: OptionA
+src/test/modelconfig_test.cpp:485: OptionA
+src/test/modelconfig_test.cpp:491: OptionA
+src/test/modelconfig_test.cpp:497: OptionA
+src/test/modelconfig_test.cpp:503: OptionA
 src/test/modelinstance_test.cpp:1093: THROUGHTPUT
 third_party/aws-sdk-cpp/aws-sdk-cpp.bz
 third_party/llm_engine/llm_engine.bzl
 WORKSPACE:39: thirdparty
+WORKSPACE:47: thirdparty
 demos/classification_using_paddlepaddle_model/python/utils/imagenet_class_index.json