Skip to content

Commit

Permalink
DOCA Source Stage improvements (#1475)
Browse files Browse the repository at this point in the history
Upgrade DOCA source stage API from 2.2 to 2.5 + code restyle

```[tasklist]
### Waiting on
- [ ] nv-morpheus/MRC#433
- [ ] #1468
```

Authors:
  - eago (https://github.com/e-ago)
  - https://github.com/eagonv
  - Christopher Harris (https://github.com/cwharris)

Approvers:
  - Christopher Harris (https://github.com/cwharris)

URL: #1475
  • Loading branch information
e-ago authored Mar 22, 2024
1 parent 7bb4ec2 commit b939b3d
Show file tree
Hide file tree
Showing 22 changed files with 931 additions and 772 deletions.
5 changes: 1 addition & 4 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -149,11 +149,8 @@ FROM conda_env as base_extended
# Add one or more optional dependencies to the base environment
ARG MORPHEUS_ROOT_HOST
ARG MORPHEUS_SUPPORT_DOCA="FALSE"
ARG DOCA_ARTIFACTS_HOST
ARG DOCA_REPO_HOST
ARG DOCA_BUILD_ID=7930666
ARG DOCA_VERSION=2.2.0002-1
ARG DPDK_VERSION=22.11.0-1.4.1
ARG DOCA_VERSION=2.6.0-0.0.1

# Set this environment variable so it auto builds DOCA
ENV MORPHEUS_SUPPORT_DOCA=${MORPHEUS_SUPPORT_DOCA}
Expand Down
3 changes: 0 additions & 3 deletions docker/build_container.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ DOCKER_EXTRA_ARGS=${DOCKER_EXTRA_ARGS:-""}
CUDA_MAJOR_VER=${CUDA_MAJOR_VER:-12}
CUDA_MINOR_VER=${CUDA_MINOR_VER:-1}
CUDA_REV_VER=${CUDA_REV_VER:-1}
DOCA_ARTIFACTS_HOST=${DOCA_ARTIFACTS_HOST:-""}
DOCA_REPO_HOST=${DOCA_REPO_HOST:-""}
FROM_IMAGE=${FROM_IMAGE:-"nvidia/cuda"}
LINUX_DISTRO=${LINUX_DISTRO:-ubuntu}
Expand All @@ -48,7 +47,6 @@ DOCKER_ARGS="${DOCKER_ARGS} --target ${DOCKER_TARGET}"
DOCKER_ARGS="${DOCKER_ARGS} --build-arg CUDA_MAJOR_VER=${CUDA_MAJOR_VER}"
DOCKER_ARGS="${DOCKER_ARGS} --build-arg CUDA_MINOR_VER=${CUDA_MINOR_VER}"
DOCKER_ARGS="${DOCKER_ARGS} --build-arg CUDA_REV_VER=${CUDA_REV_VER}"
DOCKER_ARGS="${DOCKER_ARGS} --build-arg DOCA_ARTIFACTS_HOST=${DOCA_ARTIFACTS_HOST}"
DOCKER_ARGS="${DOCKER_ARGS} --build-arg DOCA_REPO_HOST=${DOCA_REPO_HOST}"
DOCKER_ARGS="${DOCKER_ARGS} --build-arg FROM_IMAGE=${FROM_IMAGE}"
DOCKER_ARGS="${DOCKER_ARGS} --build-arg LINUX_DISTRO=${LINUX_DISTRO}"
Expand All @@ -68,7 +66,6 @@ echo "Building morpheus:${DOCKER_TAG} with args..."
echo " CUDA_MAJOR_VER : ${CUDA_MAJOR_VER}"
echo " CUDA_MINOR_VER : ${CUDA_MINOR_VER}"
echo " CUDA_REV_VER : ${CUDA_REV_VER}"
echo " DOCA_ARTIFACTS_HOST : ${DOCA_ARTIFACTS_HOST}"
echo " DOCA_REPO_HOST : ${DOCA_REPO_HOST}"
echo " FROM_IMAGE : ${FROM_IMAGE}"
echo " LINUX_DISTRO : ${LINUX_DISTRO}"
Expand Down
29 changes: 4 additions & 25 deletions docker/optional_deps/doca.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,7 @@ if [[ ${MORPHEUS_SUPPORT_DOCA} != @(TRUE|ON) ]]; then
fi

DOCA_REPO_HOST=${DOCA_REPO_HOST:?"Must set \$DOCA_REPO_HOST to build DOCA."}
DOCA_ARTIFACTS_HOST=${DOCA_ARTIFACTS_HOST:?"Must set \$DOCA_ARTIFACTS_HOST to build DOCA."}

DOCA_BUILD_ID=${DOCA_BUILD_ID:-7930666}
DOCA_VERSION=${DOCA_VERSION:-2.2.0002-1}
DPDK_VERSION=${DPDK_VERSION:-22.11.0-1.4.1}

DOCA_VERSION=${DOCA_VERSION:-2.6.0-0.0.1}
WORKING_DIR=$1

echo "Installing DOCA using directory: ${WORKING_DIR}"
Expand All @@ -39,30 +34,14 @@ DEB_DIR=${WORKING_DIR}/deb
mkdir -p ${DEB_DIR}

# Download all files with -nc to skip download if its already there
wget -nc -P ${DEB_DIR} https://${DOCA_REPO_HOST}/doca-repo-2.2.0/doca-repo-2.2.0-0.0.1-230405-143032-daily/doca-host-repo-ubuntu2204_2.2.0-0.0.1-230405-143032-daily.2.0.2004.2devflexio.23.04.0.2.3.0_amd64.deb
wget -nc -P ${DEB_DIR} https://${DOCA_ARTIFACTS_HOST}/doca-gpunet/${DOCA_BUILD_ID}/doca-gpu-mlnx-dpdk/doca-apps-dev_${DOCA_VERSION}_amd64.deb
wget -nc -P ${DEB_DIR} https://${DOCA_ARTIFACTS_HOST}/doca-gpunet/${DOCA_BUILD_ID}/doca-gpu-mlnx-dpdk/doca-apps_${DOCA_VERSION}_amd64.deb
wget -nc -P ${DEB_DIR} https://${DOCA_ARTIFACTS_HOST}/doca-gpunet/${DOCA_BUILD_ID}/doca-gpu-mlnx-dpdk/doca-grpc-dev_${DOCA_VERSION}_amd64.deb
wget -nc -P ${DEB_DIR} https://${DOCA_ARTIFACTS_HOST}/doca-gpunet/${DOCA_BUILD_ID}/doca-gpu-mlnx-dpdk/doca-grpc_${DOCA_VERSION}_amd64.deb
wget -nc -P ${DEB_DIR} https://${DOCA_ARTIFACTS_HOST}/doca-gpunet/${DOCA_BUILD_ID}/doca-gpu-mlnx-dpdk/doca-libs_${DOCA_VERSION}_amd64.deb
wget -nc -P ${DEB_DIR} https://${DOCA_ARTIFACTS_HOST}/doca-gpunet/${DOCA_BUILD_ID}/doca-gpu-mlnx-dpdk/doca-prime-runtime_${DOCA_VERSION}_amd64.deb
wget -nc -P ${DEB_DIR} https://${DOCA_ARTIFACTS_HOST}/doca-gpunet/${DOCA_BUILD_ID}/doca-gpu-mlnx-dpdk/doca-prime-sdk_${DOCA_VERSION}_amd64.deb
wget -nc -P ${DEB_DIR} https://${DOCA_ARTIFACTS_HOST}/doca-gpunet/${DOCA_BUILD_ID}/doca-gpu-mlnx-dpdk/doca-prime-tools_${DOCA_VERSION}_amd64.deb
wget -nc -P ${DEB_DIR} https://${DOCA_ARTIFACTS_HOST}/doca-gpunet/${DOCA_BUILD_ID}/doca-gpu-mlnx-dpdk/doca-samples_${DOCA_VERSION}_amd64.deb
wget -nc -P ${DEB_DIR} https://${DOCA_ARTIFACTS_HOST}/doca-gpunet/${DOCA_BUILD_ID}/doca-gpu-mlnx-dpdk/doca-services_${DOCA_VERSION}_amd64.deb
wget -nc -P ${DEB_DIR} https://${DOCA_ARTIFACTS_HOST}/doca-gpunet/${DOCA_BUILD_ID}/doca-gpu-mlnx-dpdk/libdoca-libs-dev_${DOCA_VERSION}_amd64.deb
wget -nc -P ${DEB_DIR} https://${DOCA_ARTIFACTS_HOST}/doca-gpunet/${DOCA_BUILD_ID}/doca-gpu-mlnx-dpdk/mlnx-dpdk-dev_${DPDK_VERSION}_amd64.deb
wget -nc -P ${DEB_DIR} https://${DOCA_ARTIFACTS_HOST}/doca-gpunet/${DOCA_BUILD_ID}/doca-gpu-mlnx-dpdk/mlnx-dpdk-doc_${DPDK_VERSION}_all.deb
wget -nc -P ${DEB_DIR} https://${DOCA_ARTIFACTS_HOST}/doca-gpunet/${DOCA_BUILD_ID}/doca-gpu-mlnx-dpdk/mlnx-dpdk_${DPDK_VERSION}_amd64.deb

wget -nc -P ${DEB_DIR} https://${DOCA_REPO_HOST}/doca-repo-2.6.0/doca-repo-2.6.0-0.0.1-240205-083002-daily/doca-host-repo-ubuntu2204_2.6.0-0.0.1-240205-083002-daily.2.6.0058.1.24.01.0.3.3.1_amd64.deb
# Install the doca host repo
dpkg -i ${DEB_DIR}/doca-host-repo*.deb

# Install all other packages
apt-get update
# apt-get install -y libjson-c-dev meson cmake pkg-config
apt-get install -y ${DEB_DIR}/mlnx-dpdk*.deb
apt-get install -y ${DEB_DIR}/*doca*.deb
apt-get install -y doca-sdk doca-runtime doca-gpu doca-gpu-dev

# Now install the gdrcopy library according to: https://github.com/NVIDIA/gdrcopy
GDRCOPY_DIR=${WORKING_DIR}/gdrcopy
Expand All @@ -71,7 +50,7 @@ if [[ ! -d "${GDRCOPY_DIR}" ]] ; then
git clone https://github.com/NVIDIA/gdrcopy.git ${GDRCOPY_DIR}
cd ${GDRCOPY_DIR}
else
cd cd ${GDRCOPY_DIR}
cd ${GDRCOPY_DIR}
git pull https://github.com/NVIDIA/gdrcopy.git
fi

Expand Down
47 changes: 45 additions & 2 deletions examples/doca/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ conda remove --force rdma-core

To run the example from the Morpheus root directory and capture all TCP network traffic from the given NIC, use the following command and replace the `nic_addr` and `gpu_addr` arguments with your NIC and GPU PCIe addresses.
```
# python examples/doca/run.py --nic_addr cc:00.1 --gpu_addr cf:00.0
# python examples/doca/run.py --nic_addr cc:00.1 --gpu_addr cf:00.0 --traffic_type tcp
```
```
====Registering Pipeline====
Expand Down Expand Up @@ -117,4 +117,47 @@ Tokenize rate: 0 pkts [00:09, ? pkts/s]
Inference rate: 0 pkts [00:09, ? pkts/s]
AddClass rate: 0 pkts [00:09, ? pkts/s]
```
The output can be found in `doca_output.csv`
The output can be found in `doca_output.csv`

## Running the Example for UDP traffic

In case of UDP traffic, the sample will launch a simple pipeline with the DOCA Source Stage followed by a Monitor Stage to report number of received packets.
Command line is similar to the TCP example.

```
python3 ./examples/doca/run.py --nic_addr 17:00.1 --gpu_addr ca:00.0 --traffic_type udp
```
UDP traffic can be easily sent with nping to the interface where Morpheus is listening:
```
nping --udp -c 100000 -p 4100 192.168.2.27 --data-length 1024 --delay 0.1ms
```

Morpheus output would be:
```
====Pipeline Pre-build====
====Pre-Building Segment: linear_segment_0====
====Pre-Building Segment Complete!====
====Pipeline Pre-build Complete!====
====Registering Pipeline====
====Building Pipeline====
EAL: Detected CPU lcores: 64
EAL: Detected NUMA nodes: 2
EAL: Detected shared linkage of DPDK
EAL: Multi-process socket /var/run/dpdk/rte/mp_socket
EAL: Selected IOVA mode 'PA'
EAL: VFIO support initialized
TELEMETRY: No legacy callbacks, legacy socket not created
EAL: Probe PCI driver: mlx5_pci (15b3:a2dc) device: 0000:ca:00.0 (socket 1)
EAL: Probe PCI driver: gpu_cuda (10de:2331) device: 0000:17:00.0 (socket 0)
====Building Pipeline Complete!====
DOCA GPUNetIO rate: 0 pkts [00:00, ? pkts/s]====Registering Pipeline Complete!====
====Starting Pipeline====
====Pipeline Started====
====Building Segment: linear_segment_0====
Added source: <from-doca-0; DocaSourceStage(nic_pci_address=ca:00.0, gpu_pci_address=17:00.0, traffic_type=udp)>
└─> morpheus.MessageMeta
Added stage: <monitor-1; MonitorStage(description=DOCA GPUNetIO rate, smoothing=0.05, unit=pkts, delayed_start=False, determine_count_fn=None, log_level=LogLevels.INFO)>
└─ morpheus.MessageMeta -> morpheus.MessageMeta
====Building Segment Complete!====
DOCA GPUNetIO rate: 100000 pkts [00:12, 10963.39 pkts/s]
```
104 changes: 55 additions & 49 deletions examples/doca/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,6 @@


@click.command()
@click.option(
"--num_threads",
default=1,
type=click.IntRange(min=1),
help="Number of internal pipeline threads to use",
)
@click.option(
"--pipeline_batch_size",
default=1024,
Expand Down Expand Up @@ -72,8 +66,18 @@
help="GPU PCI Address",
required=True,
)
def run_pipeline(num_threads, pipeline_batch_size, model_max_batch_size, model_fea_length, out_file, nic_addr,
gpu_addr):
@click.option(
"--traffic_type",
help="UDP or TCP traffic",
required=True,
)
def run_pipeline(pipeline_batch_size,
model_max_batch_size,
model_fea_length,
out_file,
nic_addr,
gpu_addr,
traffic_type):
# Enable the default logger
configure_logging(log_level=logging.DEBUG)

Expand All @@ -83,7 +87,7 @@ def run_pipeline(num_threads, pipeline_batch_size, model_max_batch_size, model_f
config.mode = PipelineModes.NLP

# Below properties are specified by the command line
config.num_threads = num_threads
config.num_threads = 4
config.pipeline_batch_size = pipeline_batch_size
config.model_max_batch_size = model_max_batch_size
config.feature_length = model_fea_length
Expand All @@ -107,48 +111,50 @@ def run_pipeline(num_threads, pipeline_batch_size, model_max_batch_size, model_f
pipeline = LinearPipeline(config)

# add doca source stage
pipeline.set_source(DocaSourceStage(config, nic_addr, gpu_addr))
pipeline.set_source(DocaSourceStage(config, nic_addr, gpu_addr, traffic_type))
pipeline.add_stage(MonitorStage(config, description="DOCA GPUNetIO rate", unit='pkts'))

# add deserialize stage
pipeline.add_stage(DeserializeStage(config))
pipeline.add_stage(MonitorStage(config, description="Deserialize rate", unit='pkts'))

# add preprocessing stage
pipeline.add_stage(
PreprocessNLPStage(
config,
vocab_hash_file='/workspace/models/training-tuning-scripts/sid-models/resources/bert-base-uncased-hash.txt',
do_lower_case=True,
truncation=True,
add_special_tokens=False,
column='data'))

pipeline.add_stage(MonitorStage(config, description="Tokenize rate", unit='pkts'))

# add inference stage
pipeline.add_stage(
TritonInferenceStage(
config,
# model_name="sid-minibert-trt",
model_name="sid-minibert-onnx",
server_url="localhost:8000",
force_convert_inputs=True,
use_shared_memory=True))

pipeline.add_stage(MonitorStage(config, description="Inference rate", unit='pkts'))

# add class stage
pipeline.add_stage(AddClassificationsStage(config))
pipeline.add_stage(MonitorStage(config, description="AddClass rate", unit='pkts'))

# serialize
pipeline.add_stage(SerializeStage(config))
pipeline.add_stage(MonitorStage(config, description="Serialize rate", unit='pkts'))

# write to file
pipeline.add_stage(WriteToFileStage(config, filename=out_file, overwrite=True))
pipeline.add_stage(MonitorStage(config, description="Write to file rate", unit='pkts'))
if traffic_type == 'tcp':
# add deserialize stage
pipeline.add_stage(DeserializeStage(config))
pipeline.add_stage(MonitorStage(config, description="Deserialize rate", unit='pkts'))

hashfile = '/workspace/models/training-tuning-scripts/sid-models/resources/bert-base-uncased-hash.txt'

# add preprocessing stage
pipeline.add_stage(
PreprocessNLPStage(config,
vocab_hash_file=hashfile,
do_lower_case=True,
truncation=True,
add_special_tokens=False,
column='data'))

pipeline.add_stage(MonitorStage(config, description="Tokenize rate", unit='pkts'))

# add inference stage
pipeline.add_stage(
TritonInferenceStage(
config,
# model_name="sid-minibert-trt",
model_name="sid-minibert-onnx",
server_url="localhost:8000",
force_convert_inputs=True,
use_shared_memory=True))

pipeline.add_stage(MonitorStage(config, description="Inference rate", unit='pkts'))

# add class stage
pipeline.add_stage(AddClassificationsStage(config))
pipeline.add_stage(MonitorStage(config, description="AddClass rate", unit='pkts'))

# serialize
pipeline.add_stage(SerializeStage(config))
pipeline.add_stage(MonitorStage(config, description="Serialize rate", unit='pkts'))

# write to file
pipeline.add_stage(WriteToFileStage(config, filename=out_file, overwrite=True))
pipeline.add_stage(MonitorStage(config, description="Write to file rate", unit='pkts'))

# Build the pipeline here to see types in the vizualization
pipeline.build()
Expand Down
2 changes: 1 addition & 1 deletion morpheus/_lib/doca/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ __all__ = [


class DocaSourceStage(mrc.core.segment.SegmentObject):
def __init__(self, builder: mrc.core.segment.Builder, name: str, nic_pci_address: str, gpu_pci_address: str) -> None: ...
def __init__(self, builder: mrc.core.segment.Builder, name: str, nic_pci_address: str, gpu_pci_address: str, traffic_type: str) -> None: ...
pass
40 changes: 36 additions & 4 deletions morpheus/_lib/doca/include/common.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -18,12 +18,44 @@
#pragma once

#include <cstdint>
#include <memory>
#include <string>
#include <type_traits>
#include <vector>

uint32_t const PACKETS_PER_THREAD = 4;
uint32_t const THREADS_PER_BLOCK = 512;
uint32_t const THREADS_PER_BLOCK = 1024; // 512
uint32_t const PACKETS_PER_BLOCK = PACKETS_PER_THREAD * THREADS_PER_BLOCK;
uint32_t const PACKET_RX_TIMEOUT_NS = 5000;
uint32_t const PACKET_RX_TIMEOUT_NS = 1000000; // 1ms //500us

uint32_t const MAX_PKT_RECEIVE = PACKETS_PER_BLOCK;
uint32_t const MAX_PKT_SIZE = 8192;
uint32_t const MAX_PKT_SIZE = 4096;
uint32_t const MAX_PKT_NUM = 65536;
uint32_t const MAX_QUEUE = 4;
uint32_t const MAX_SEM_X_QUEUE = 32;

enum doca_traffic_type
{
DOCA_TRAFFIC_TYPE_UDP = 0,
DOCA_TRAFFIC_TYPE_TCP = 1,
};

struct packets_info
{
int32_t packet_count_out;
int32_t payload_size_total_out;

char* payload_buffer_out;
int32_t* payload_sizes_out;

int64_t* src_mac_out;
int64_t* dst_mac_out;
int64_t* src_ip_out;
int64_t* dst_ip_out;
uint16_t* src_port_out;
uint16_t* dst_port_out;
int32_t* tcp_flags_out;
int32_t* ether_type_out;
int32_t* next_proto_id_out;
uint32_t* timestamp_out;
};
10 changes: 4 additions & 6 deletions morpheus/_lib/doca/include/doca_context.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -17,16 +17,14 @@

#pragma once

#include "common.hpp"
#include "error.hpp"
#include "rte_context.hpp"

#include <doca_eth_rxq.h>
#include <doca_flow.h>
#include <doca_gpunetio.h>

#include <memory>
#include <string>
#include <type_traits>
#include <doca_log.h>

#define GPU_PAGE_SIZE (1UL << 16)

Expand All @@ -40,11 +38,11 @@ struct DocaContext
private:
doca_gpu* m_gpu;
doca_dev* m_dev;
doca_pci_bdf m_pci_bdf;
doca_flow_port* m_flow_port;
uint16_t m_nic_port;
uint32_t m_max_queue_count;
std::unique_ptr<RTEContext> m_rte_context;
doca_log_backend* sdk_log;

public:
DocaContext(std::string nic_addr, std::string gpu_addr);
Expand Down
Loading

0 comments on commit b939b3d

Please sign in to comment.