Skip to content

Commit

Permalink
Merge branch 'main' into mkulakow/llm_model_context_length
Browse files Browse the repository at this point in the history
  • Loading branch information
michalkulakowski authored Nov 27, 2024
2 parents 152eb3c + c880550 commit d47ae16
Show file tree
Hide file tree
Showing 66 changed files with 1,148 additions and 278 deletions.
3 changes: 1 addition & 2 deletions .bazelrc
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,6 @@ build:windows --define=use_fast_cpp_protos=true
build:windows --define=allow_oversize_protos=true

build:windows --spawn_strategy=standalone
build:windows -c opt

# Make Bazel print out all options from rc files.
build:windows --announce_rc
Expand Down Expand Up @@ -288,5 +287,5 @@ build:coverity --disk_cache=

# Windows config default flags
build:windows --define=CLOUD_DISABLE=1
build:windows --define=PYTHON_DISABLE=1
build:windows --define=PYTHON_DISABLE=0
build:windows --define=MEDIAPIPE_DISABLE=0
2 changes: 1 addition & 1 deletion BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ cc_library(
})
+ select({
"//:not_disable_python": [
"@python3_linux//:python3-lib",
"//third_party:python3",
"@pybind11//:pybind11_embed",
],
"//:disable_python": []
Expand Down
17 changes: 17 additions & 0 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,14 @@ bazel_skylib_workspace()
load("@bazel_skylib//lib:versions.bzl", "versions")
versions.check(minimum_bazel_version = "6.0.0")

http_archive(
name = "zlib",
build_file = "@mediapipe//third_party:zlib.BUILD",
sha256 = "9a93b2b7dfdac77ceba5a558a580e74667dd6fede4585b91eefb60f03b72df23",
strip_prefix = "zlib-1.3.1",
url = "http://zlib.net/fossils/zlib-1.3.1.tar.gz",
)

# RapidJSON
# Must be defined earlier than tensorflow_serving because TFS is using older rapidjson
# Version must match openvino.genai -> jinja2cpp -> rapidjson
Expand Down Expand Up @@ -235,12 +243,21 @@ new_local_repository(
load("@//third_party/python:python_repo.bzl", "python_repository")
python_repository(name = "_python3-linux")

load("@//third_party/python:python_repo_win.bzl", "python_repository")
python_repository(name = "_python3-windows")

new_local_repository(
name = "python3_linux",
path = "/usr",
build_file = "@_python3-linux//:BUILD"
)

new_local_repository(
name = "python3_windows",
path = "C:\\opt\\",
build_file = "@_python3-windows//:BUILD"
)

http_archive(
name = "pybind11_bazel",
strip_prefix = "pybind11_bazel-b162c7c88a253e3f6b673df0c621aca27596ce6b",
Expand Down
6 changes: 5 additions & 1 deletion build_windows.bat
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ set "bazelStartupCmd=--output_user_root=%BAZEL_SHORT_PATH%"

set "buildCommand=bazel %bazelStartupCmd% build --config=windows --jobs=%NUMBER_OF_PROCESSORS% --verbose_failures //src:ovms 2>&1 | tee win_build.log"
set "buildTestCommand=bazel %bazelStartupCmd% build --config=windows --jobs=%NUMBER_OF_PROCESSORS% --verbose_failures //src:ovms_test 2>&1 | tee win_build_test.log"
set "runTest=%cd%\bazel-bin\src\ovms_test.exe --gtest_filter=-OvmsConfigTest 2>&1 | tee win_full_test.log"
set "changeConfigsCmd=windows_change_test_configs.py"
set "runTest=%cd%\bazel-bin\src\ovms_test.exe --gtest_filter=* 2>&1 | tee win_full_test.log"

:: Setting PATH environment variable based on default windows node settings: Added ovms_windows specific python settings and c:/opt and removed unused Nvidia and OCL specific tools.
:: When changing the values here you can print the node default PATH value and base your changes on it.
Expand All @@ -49,6 +50,9 @@ set > %envPath%
:: Start bazel build test
%buildTestCommand%

:: Change tests configs to windows paths
%changeConfigsCmd%

:: Start unit test
%runTest%

Expand Down
6 changes: 3 additions & 3 deletions ci/cppclean.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ errors=""
if [ ${NO_WARNINGS_FORWARD} -gt 9 ]; then
errors+="Failed due to not using forward declarations where possible: ${NO_WARNINGS_FORWARD}"$'\n'
fi
if [ ${NO_WARNINGS_DIRECT} -gt 20 ]; then
if [ ${NO_WARNINGS_DIRECT} -gt 21 ]; then
errors+="Failed probably due to not using static keyword with functions definitions: ${NO_WARNINGS_DIRECT}"$'\n'
fi
if [ ${NO_WARNINGS_NOTUSED} -gt 5 ]; then
if [ ${NO_WARNINGS_NOTUSED} -gt 6 ]; then
errors+="Failed probably due to unnecessary forward includes: ${NO_WARNINGS_NOTUSED}"$'\n'
fi
if [ ${NO_WARNINGS_TEST_FORWARD} -gt 1 ]; then
Expand All @@ -54,7 +54,7 @@ fi
if [ ${NO_WARNINGS_TEST_NOTUSED} -gt 0 ]; then
errors+="Failed probably due to unnecessary forward includes: ${NO_WARNINGS_TEST_NOTUSED}"$'\n'
fi
if [ ${NO_WARNINGS} -gt 193 ]; then
if [ ${NO_WARNINGS} -gt 194 ]; then
errors+="Failed due to higher than allowed number of issues in code: ${NO_WARNINGS}"$'\n'
fi
if [ ${NO_WARNINGS_TEST} -gt 52 ]; then
Expand Down
8 changes: 4 additions & 4 deletions ci/loadWin.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@ def check_tests(){

status = bat(returnStatus: true, script: 'grep " OK " win_test.log')
if (status != 0) {
error "Error: Windows run test failed ${status}. Check win_test.log for details."
error "Error: Windows run test failed ${status}. Expecting passed tests and no passed tests detected. Check win_test.log for details."
}

// TODO Windows: Currently some tests fail change to no fail when fixed.
status = bat(returnStatus: true, script: 'grep " FAILED " win_test.log')
if (status != 0) {
error "Error: Windows run test failed ${status}. Check win_test.log for details."
if (status == 0) {
def failed = bat(returnStatus: false, returnStdout: true, script: 'grep " FAILED " win_test.log | wc -l')
error "Error: Windows run test failed ${status}. ${failed} failed tests . Check win_test.log for details."
} else {
echo "Run test successful."
}
Expand Down
4 changes: 2 additions & 2 deletions client/cpp/kserve-api/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,7 @@ Usage:
```Bash
./http_model_metadata --http_port 8000 --http_address localhost
{"name":"dummy","versions":["1"],"platform":"OpenVINO","inputs":[{"name":"b","datatype":"FP32","shape":[1,10]}],"outputs":[{"name":"a","datatype":"FP32","shape":[1,10]}]}
{"name":"dummy","versions":["1"],"platform":"OpenVINO","inputs":[{"name":"b","datatype":"FP32","shape":[1,10]}],"outputs":[{"name":"a","datatype":"FP32","shape":[1,10]}],"rt_info":{"model_info":{"precision":"FP16","resolution":{"height":"200","width":"300"}}}}
```
### Run the Client to perform inference
Expand Down Expand Up @@ -644,4 +644,4 @@ Number of requests: 10
Total processing time: 178 ms
Latency: 42.5398 ms
Requests per second: 23.5074
```
```
2 changes: 1 addition & 1 deletion client/python/kserve-api/samples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -955,4 +955,4 @@ imagenet top results in a single batch:
imagenet top results in a single batch:
0 zebra 340 ; Correct match.
Classification accuracy: 100.00
```
```
9 changes: 8 additions & 1 deletion common_settings.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,18 @@ def create_config_settings():
negate = ":fuzzer_build",
)

# is windows or mediapipe is disabled (no_http dependency)
selects.config_setting_group(
name = "is_windows_or_mediapipe_is_disabled_no_http",
match_any = ["//src:windows", "//:disable_mediapipe"]
)

# is windows or python is disabled"(no llm dependency)
selects.config_setting_group(
name = "is_windows_or_python_is_disabled_no_llm",
match_any = ["//src:windows", "//:disable_python"]
)


###############################
# compilation settings
Expand Down Expand Up @@ -165,6 +172,6 @@ COMMON_FUZZER_LINKOPTS = [
]
COMMON_LOCAL_DEFINES = ["SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_TRACE"]
PYBIND_DEPS = [
"@python3_linux//:python3-lib",
"//third_party:python3",
"@pybind11//:pybind11_embed",
]
3 changes: 2 additions & 1 deletion demos/continuous_batching/accuracy/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,13 @@ It reports end to end quality of served model from the client application point
Install the framework via:
```bash
export PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
pip3 install lm_eval[api]
pip3 install lm_eval[api] langdetect immutabledict
```

## Exporting the models and starting the model server
```bash
git clone https://github.com/openvinotoolkit/model_server.git
cd model_server
pip3 install -U -r demos/common/export_models/requirements.txt
mkdir models
python demos/common/export_models/export_model.py text_generation --source_model meta-llama/Meta-Llama-3-8B-Instruct --weight-format fp16 --kv_cache_precision u8 --config_file_path models/config.json --model_repository_path models
Expand Down
12 changes: 6 additions & 6 deletions demos/continuous_batching/scaling/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,17 @@ It deploys two instances of the model server allocated to difference CPU sockets
## Start the Model Server instances

Let's assume we have two CPU sockets server with two NUMA nodes.
```
```bash
lscpu | grep NUMA
NUMA node(s): 2
NUMA node0 CPU(s): 0-31,64-95
NUMA node1 CPU(s): 32-63,96-127
```
Following the prework from [demo](../README.md) start the instances like below:
```
docker run --cpuset-cpus $(lscpu | grep node0 | cut -d: -f2) -d --rm -p 8003:8003 -v $(pwd)/:/workspace:ro openvino/model_server:latest --rest_port 8003 --config_path /workspace/config.json
```bash
docker run --cpuset-cpus $(lscpu | grep node0 | cut -d: -f2) -d --rm -p 8003:8003 -v $(pwd)/models:/workspace:ro openvino/model_server:latest --rest_port 8003 --config_path /workspace/config.json

docker run --cpuset-cpus $(lscpu | grep node1 | cut -d: -f2) -d --rm -p 8004:8004 -v $(pwd)/:/workspace:ro openvino/model_server:latest --rest_port 8004 --config_path /workspace/config.json
docker run --cpuset-cpus $(lscpu | grep node1 | cut -d: -f2) -d --rm -p 8004:8004 -v $(pwd)/models:/workspace:ro openvino/model_server:latest --rest_port 8004 --config_path /workspace/config.json
```
Confirm in logs if the containers loaded the models successfully.

Expand All @@ -42,14 +42,14 @@ stream {
}
```
Start the Nginx container with:
```
```bash
docker run -v $(pwd)/nginx.conf:/etc/nginx/nginx.conf:ro -d --net=host -p 80:80 nginx
```

## Testing the scalability

Start benchmarking script like in [demo](../README.md), pointing to the load balancer port and host.
```
```bash
python benchmark_serving.py --host localhost --port 80 --endpoint /v3/chat/completions --backend openai-chat --model meta-llama/Meta-Llama-3-8B-Instruct --dataset-path ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts 2000 --request-rate inf --save-result --seed 10
Initial test run completed. Starting main benchmark run...
Traffic request rate: inf
Expand Down
6 changes: 6 additions & 0 deletions demos/embeddings/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@ Text generation use case is exposed via OpenAI API `embeddings` endpoint.
Here, the original Pytorch LLM model and the tokenizer will be converted to IR format and optionally quantized.
That ensures faster initialization time, better performance and lower memory consumption.

Clone model server repository:
```bash
git clone https://github.com/openvinotoolkit/model_server.git
cd model_server
```

Install python dependencies for the conversion script:
```bash
pushd .
Expand Down
10 changes: 8 additions & 2 deletions demos/rerank/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@
Here, the original Pytorch LLM model and the tokenizer will be converted to IR format and optionally quantized.
That ensures faster initialization time, better performance and lower memory consumption.

Clone model server repository:
```bash
git clone https://github.com/openvinotoolkit/model_server.git
cd model_server
```

Install python dependencies for the conversion script:
```bash
pip3 install -r demos/common/export_models/requirements.txt
Expand Down Expand Up @@ -46,7 +52,7 @@ docker run -d --rm -p 8000:8000 -v $(pwd)/models:/workspace:ro openvino/model_se

Readiness of the model can be reported with a simple curl command.
```bash
curl -i http://localhost:8000/v3/models/BAAI%2Fbge-reranker-large/ready
curl -i http://localhost:8000/v2/models/BAAI%2Fbge-reranker-large/ready
HTTP/1.1 200 OK
Content-Type: application/json
Date: Sat, 09 Nov 2024 23:19:27 GMT
Expand All @@ -57,7 +63,7 @@ Content-Length: 0


```bash
curl http://localhost:8000/v2/rerank -H "Content-Type: application/json" \
curl http://localhost:8000/v3/rerank -H "Content-Type: application/json" \
-d '{ "model": "BAAI/bge-reranker-large", "query": "welcome", "documents":["good morning","farewell"]}' | jq .
```
```json
Expand Down
4 changes: 2 additions & 2 deletions docs/windows_binary_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ md c:\opt
Visual Studio 2019 with C++ - https://visualstudio.microsoft.com/downloads/

## PYTHON: https://www.python.org/ftp/python/3.9.0/python-3.9.0-amd64.exe in C:\opt\Python39
Python3. (Python 3.11.9 is tested)
Python3.9
```
pip install numpy==1.23
```
Expand All @@ -56,7 +56,7 @@ Open cmd.exe in c:\opt
md test\model\1
C:\opt\intel\openvino_2024\setupvars.bat
C:\opt\opencv\build\setup_vars_opencv4.cmd
xcopy /r /s /e /Y ovms.exe c:\opt\test
xcopy /r /Y ovms.exe c:\opt\test
cd c:\opt\test
wget https://www.kaggle.com/api/v1/models/tensorflow/faster-rcnn-resnet-v1/tensorFlow2/faster-rcnn-resnet50-v1-640x640/1/download -O 1.tar.gz
tar xzf 1.tar.gz -C model\1
Expand Down
26 changes: 23 additions & 3 deletions docs/windows_developer_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ md c:\opt
Visual Studio 2019 with C++ - https://visualstudio.microsoft.com/downloads/

## PYTHON: https://www.python.org/ftp/python/3.9.0/python-3.9.0-amd64.exe in C:\opt\Python39
Python3. (Python 3.11.9 is tested)
Python3.9
pip install numpy==1.23
make sure you install numpy for the python version you pass as build argument
make sure default "python --version" gets you 3.9
Expand All @@ -56,6 +56,14 @@ nvm use 22.9.0
npm cache clean --force
```

# Building without proxy
Please set the proxy setting for windows for in environment variables when building behind proxy
```
set HTTP_PROXY=
set HTTPS_PROXY=
```
Also remove proxy from your .gitconfig

If you want to compile without proxy, npm proxy needs to be reset:
```
set http_proxy=
Expand All @@ -66,9 +74,21 @@ npm i --global yarn
yarn
```

## Building with proxy
Please set the proxy setting for windows for in environment variables when building behind proxy
```
set HTTP_PROXY=my.proxy.com:123
set HTTPS_PROXY=my.proxy.com:122
```

## OPENCV install to - "C:\\opt\\opencv\\"
https://github.com/opencv/opencv/releases/download/4.10.0/opencv-4.10.0-windows.exe

# OPENCV contrib for optflow
cd c:\opt
git clone https://github.com/opencv/opencv_contrib.git
xcopy /s /r /Y opencv_contrib\modules\optflow\include\opencv2\* C:\opt\opencv\build\include\opencv2

## WGET
https://eternallybored.org/misc/wget/1.21.4/64/wget.exe download to c:\opt
Add c:\opt to system env PATH
Expand Down Expand Up @@ -97,7 +117,7 @@ cd model_server

## COMPILE
```
bazel build --config=windows --jobs=8 --subcommands --repo_env PYTHON_BIN_PATH=C:/opt/Python39/python.exe --verbose_failures --define CLOUD_DISABLE=1 --define MEDIAPIPE_DISABLE=1 --define PYTHON_DISABLE=1 //src:ovms > compilation.log 2>&1
bazel build --config=windows --jobs=8 --subcommands --repo_env PYTHON_BIN_PATH=C:/opt/Python39/python.exe --verbose_failures --define CLOUD_DISABLE=1 --define MEDIAPIPE_DISABLE=0 --define PYTHON_DISABLE=0 //src:ovms > compilation.log 2>&1
```

## To run ovms in developer command line
Expand All @@ -113,7 +133,7 @@ bazel-out\x64_windows-opt\bin\src\ovms.exe --help
Open cmd.exe in c:\opt
```
md test\model\1
xcopy /r /s /e /Y C:\git\model_server\bazel-out\x64_windows-opt\bin\src\ovms.exe c:\opt\test
xcopy /r /Y C:\git\model_server\bazel-out\x64_windows-opt\bin\src\ovms.exe c:\opt\test
c:\opt\intel\openvino_2024\setupvars.bat
C:\opt\opencv\build\setup_vars_opencv4.cmd
cd c:\opt\test
Expand Down
13 changes: 7 additions & 6 deletions spelling-whitelist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,15 @@ src/shape.cpp:438: strIn
src/shape.cpp:488: strIn
src/shape.cpp:507: strIn
src/shape.hpp:121: strIn
src/test/modelconfig_test.cpp:472: OptionA
src/test/modelconfig_test.cpp:478: OptionA
src/test/modelconfig_test.cpp:484: OptionA
src/test/modelconfig_test.cpp:490: OptionA
src/test/modelconfig_test.cpp:496: OptionA
src/test/modelconfig_test.cpp:502: OptionA
src/test/modelconfig_test.cpp:473: OptionA
src/test/modelconfig_test.cpp:479: OptionA
src/test/modelconfig_test.cpp:485: OptionA
src/test/modelconfig_test.cpp:491: OptionA
src/test/modelconfig_test.cpp:497: OptionA
src/test/modelconfig_test.cpp:503: OptionA
src/test/modelinstance_test.cpp:1093: THROUGHTPUT
third_party/aws-sdk-cpp/aws-sdk-cpp.bz
third_party/llm_engine/llm_engine.bzl
WORKSPACE:39: thirdparty
WORKSPACE:47: thirdparty
demos/classification_using_paddlepaddle_model/python/utils/imagenet_class_index.json
Loading

0 comments on commit d47ae16

Please sign in to comment.