DOCA Source Stage improvements (#1475)

Upgrade DOCA source stage API from 2.2 to 2.5 + code restyle ```[tasklist] ### Waiting on - [ ] nv-morpheus/MRC#433 - [ ] #1468 ``` Authors: - eago (https://github.com/e-ago) - https://github.com/eagonv - Christopher Harris (https://github.com/cwharris) Approvers: - Christopher Harris (https://github.com/cwharris) URL: #1475
nv-morpheus · Mar 22, 2024 · b939b3d · b939b3d
1 parent 7bb4ec2
commit b939b3d
Show file tree

Hide file tree

Showing 22 changed files with 931 additions and 772 deletions.
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -149,11 +149,8 @@ FROM conda_env as base_extended
 # Add one or more optional dependencies to the base environment
 ARG MORPHEUS_ROOT_HOST
 ARG MORPHEUS_SUPPORT_DOCA="FALSE"
-ARG DOCA_ARTIFACTS_HOST
 ARG DOCA_REPO_HOST
-ARG DOCA_BUILD_ID=7930666
-ARG DOCA_VERSION=2.2.0002-1
-ARG DPDK_VERSION=22.11.0-1.4.1
+ARG DOCA_VERSION=2.6.0-0.0.1
 
 # Set this environment variable so it auto builds DOCA
 ENV MORPHEUS_SUPPORT_DOCA=${MORPHEUS_SUPPORT_DOCA}

diff --git a/docker/build_container.sh b/docker/build_container.sh
@@ -31,7 +31,6 @@ DOCKER_EXTRA_ARGS=${DOCKER_EXTRA_ARGS:-""}
 CUDA_MAJOR_VER=${CUDA_MAJOR_VER:-12}
 CUDA_MINOR_VER=${CUDA_MINOR_VER:-1}
 CUDA_REV_VER=${CUDA_REV_VER:-1}
-DOCA_ARTIFACTS_HOST=${DOCA_ARTIFACTS_HOST:-""}
 DOCA_REPO_HOST=${DOCA_REPO_HOST:-""}
 FROM_IMAGE=${FROM_IMAGE:-"nvidia/cuda"}
 LINUX_DISTRO=${LINUX_DISTRO:-ubuntu}
@@ -48,7 +47,6 @@ DOCKER_ARGS="${DOCKER_ARGS} --target ${DOCKER_TARGET}"
 DOCKER_ARGS="${DOCKER_ARGS} --build-arg CUDA_MAJOR_VER=${CUDA_MAJOR_VER}"
 DOCKER_ARGS="${DOCKER_ARGS} --build-arg CUDA_MINOR_VER=${CUDA_MINOR_VER}"
 DOCKER_ARGS="${DOCKER_ARGS} --build-arg CUDA_REV_VER=${CUDA_REV_VER}"
-DOCKER_ARGS="${DOCKER_ARGS} --build-arg DOCA_ARTIFACTS_HOST=${DOCA_ARTIFACTS_HOST}"
 DOCKER_ARGS="${DOCKER_ARGS} --build-arg DOCA_REPO_HOST=${DOCA_REPO_HOST}"
 DOCKER_ARGS="${DOCKER_ARGS} --build-arg FROM_IMAGE=${FROM_IMAGE}"
 DOCKER_ARGS="${DOCKER_ARGS} --build-arg LINUX_DISTRO=${LINUX_DISTRO}"
@@ -68,7 +66,6 @@ echo "Building morpheus:${DOCKER_TAG} with args..."
 echo "   CUDA_MAJOR_VER       : ${CUDA_MAJOR_VER}"
 echo "   CUDA_MINOR_VER       : ${CUDA_MINOR_VER}"
 echo "   CUDA_REV_VER         : ${CUDA_REV_VER}"
-echo "   DOCA_ARTIFACTS_HOST  : ${DOCA_ARTIFACTS_HOST}"
 echo "   DOCA_REPO_HOST       : ${DOCA_REPO_HOST}"
 echo "   FROM_IMAGE           : ${FROM_IMAGE}"
 echo "   LINUX_DISTRO         : ${LINUX_DISTRO}"

diff --git a/docker/optional_deps/doca.sh b/docker/optional_deps/doca.sh
@@ -24,12 +24,7 @@ if [[ ${MORPHEUS_SUPPORT_DOCA} != @(TRUE|ON) ]]; then
 fi
 
 DOCA_REPO_HOST=${DOCA_REPO_HOST:?"Must set \$DOCA_REPO_HOST to build DOCA."}
-DOCA_ARTIFACTS_HOST=${DOCA_ARTIFACTS_HOST:?"Must set \$DOCA_ARTIFACTS_HOST to build DOCA."}
-
-DOCA_BUILD_ID=${DOCA_BUILD_ID:-7930666}
-DOCA_VERSION=${DOCA_VERSION:-2.2.0002-1}
-DPDK_VERSION=${DPDK_VERSION:-22.11.0-1.4.1}
-
+DOCA_VERSION=${DOCA_VERSION:-2.6.0-0.0.1}
 WORKING_DIR=$1
 
 echo "Installing DOCA using directory: ${WORKING_DIR}"
@@ -39,30 +34,14 @@ DEB_DIR=${WORKING_DIR}/deb
 mkdir -p ${DEB_DIR}
 
 # Download all files with -nc to skip download if its already there
-wget -nc -P ${DEB_DIR} https://${DOCA_REPO_HOST}/doca-repo-2.2.0/doca-repo-2.2.0-0.0.1-230405-143032-daily/doca-host-repo-ubuntu2204_2.2.0-0.0.1-230405-143032-daily.2.0.2004.2devflexio.23.04.0.2.3.0_amd64.deb
-wget -nc -P ${DEB_DIR} https://${DOCA_ARTIFACTS_HOST}/doca-gpunet/${DOCA_BUILD_ID}/doca-gpu-mlnx-dpdk/doca-apps-dev_${DOCA_VERSION}_amd64.deb
-wget -nc -P ${DEB_DIR} https://${DOCA_ARTIFACTS_HOST}/doca-gpunet/${DOCA_BUILD_ID}/doca-gpu-mlnx-dpdk/doca-apps_${DOCA_VERSION}_amd64.deb
-wget -nc -P ${DEB_DIR} https://${DOCA_ARTIFACTS_HOST}/doca-gpunet/${DOCA_BUILD_ID}/doca-gpu-mlnx-dpdk/doca-grpc-dev_${DOCA_VERSION}_amd64.deb
-wget -nc -P ${DEB_DIR} https://${DOCA_ARTIFACTS_HOST}/doca-gpunet/${DOCA_BUILD_ID}/doca-gpu-mlnx-dpdk/doca-grpc_${DOCA_VERSION}_amd64.deb
-wget -nc -P ${DEB_DIR} https://${DOCA_ARTIFACTS_HOST}/doca-gpunet/${DOCA_BUILD_ID}/doca-gpu-mlnx-dpdk/doca-libs_${DOCA_VERSION}_amd64.deb
-wget -nc -P ${DEB_DIR} https://${DOCA_ARTIFACTS_HOST}/doca-gpunet/${DOCA_BUILD_ID}/doca-gpu-mlnx-dpdk/doca-prime-runtime_${DOCA_VERSION}_amd64.deb
-wget -nc -P ${DEB_DIR} https://${DOCA_ARTIFACTS_HOST}/doca-gpunet/${DOCA_BUILD_ID}/doca-gpu-mlnx-dpdk/doca-prime-sdk_${DOCA_VERSION}_amd64.deb
-wget -nc -P ${DEB_DIR} https://${DOCA_ARTIFACTS_HOST}/doca-gpunet/${DOCA_BUILD_ID}/doca-gpu-mlnx-dpdk/doca-prime-tools_${DOCA_VERSION}_amd64.deb
-wget -nc -P ${DEB_DIR} https://${DOCA_ARTIFACTS_HOST}/doca-gpunet/${DOCA_BUILD_ID}/doca-gpu-mlnx-dpdk/doca-samples_${DOCA_VERSION}_amd64.deb
-wget -nc -P ${DEB_DIR} https://${DOCA_ARTIFACTS_HOST}/doca-gpunet/${DOCA_BUILD_ID}/doca-gpu-mlnx-dpdk/doca-services_${DOCA_VERSION}_amd64.deb
-wget -nc -P ${DEB_DIR} https://${DOCA_ARTIFACTS_HOST}/doca-gpunet/${DOCA_BUILD_ID}/doca-gpu-mlnx-dpdk/libdoca-libs-dev_${DOCA_VERSION}_amd64.deb
-wget -nc -P ${DEB_DIR} https://${DOCA_ARTIFACTS_HOST}/doca-gpunet/${DOCA_BUILD_ID}/doca-gpu-mlnx-dpdk/mlnx-dpdk-dev_${DPDK_VERSION}_amd64.deb
-wget -nc -P ${DEB_DIR} https://${DOCA_ARTIFACTS_HOST}/doca-gpunet/${DOCA_BUILD_ID}/doca-gpu-mlnx-dpdk/mlnx-dpdk-doc_${DPDK_VERSION}_all.deb
-wget -nc -P ${DEB_DIR} https://${DOCA_ARTIFACTS_HOST}/doca-gpunet/${DOCA_BUILD_ID}/doca-gpu-mlnx-dpdk/mlnx-dpdk_${DPDK_VERSION}_amd64.deb
-
+wget -nc -P ${DEB_DIR} https://${DOCA_REPO_HOST}/doca-repo-2.6.0/doca-repo-2.6.0-0.0.1-240205-083002-daily/doca-host-repo-ubuntu2204_2.6.0-0.0.1-240205-083002-daily.2.6.0058.1.24.01.0.3.3.1_amd64.deb
 # Install the doca host repo
 dpkg -i ${DEB_DIR}/doca-host-repo*.deb
 
 # Install all other packages
 apt-get update
 # apt-get install -y libjson-c-dev meson cmake pkg-config
-apt-get install -y ${DEB_DIR}/mlnx-dpdk*.deb
-apt-get install -y ${DEB_DIR}/*doca*.deb
+apt-get install -y doca-sdk doca-runtime doca-gpu doca-gpu-dev
 
 # Now install the gdrcopy library according to: https://github.com/NVIDIA/gdrcopy
 GDRCOPY_DIR=${WORKING_DIR}/gdrcopy
@@ -71,7 +50,7 @@ if [[ ! -d "${GDRCOPY_DIR}" ]] ; then
     git clone https://github.com/NVIDIA/gdrcopy.git ${GDRCOPY_DIR}
     cd ${GDRCOPY_DIR}
 else
-    cd cd ${GDRCOPY_DIR}
+    cd ${GDRCOPY_DIR}
     git pull https://github.com/NVIDIA/gdrcopy.git
 fi
 

diff --git a/examples/doca/README.md b/examples/doca/README.md
@@ -69,7 +69,7 @@ conda remove --force rdma-core
 
 To run the example from the Morpheus root directory and capture all TCP network traffic from the given NIC, use the following command and replace the `nic_addr` and `gpu_addr` arguments with your NIC and GPU PCIe addresses.
 ```
-# python examples/doca/run.py --nic_addr cc:00.1 --gpu_addr cf:00.0
+# python examples/doca/run.py --nic_addr cc:00.1 --gpu_addr cf:00.0 --traffic_type tcp
 ```
 ```
 ====Registering Pipeline====
@@ -117,4 +117,47 @@ Tokenize rate: 0 pkts [00:09, ? pkts/s]
 Inference rate: 0 pkts [00:09, ? pkts/s]
 AddClass rate: 0 pkts [00:09, ? pkts/s]
 ```
-The output can be found in `doca_output.csv`
+The output can be found in `doca_output.csv`
+
+## Running the Example for UDP traffic
+
+In case of UDP traffic, the sample will launch a simple pipeline with the DOCA Source Stage followed by a Monitor Stage to report number of received packets.
+Command line is similar to the TCP example.
+
+```
+python3 ./examples/doca/run.py --nic_addr 17:00.1 --gpu_addr ca:00.0 --traffic_type udp
+```
+UDP traffic can be easily sent with nping to the interface where Morpheus is listening:
+```
+nping --udp -c 100000 -p 4100 192.168.2.27 --data-length 1024 --delay 0.1ms
+```
+
+Morpheus output would be:
+```
+====Pipeline Pre-build====
+====Pre-Building Segment: linear_segment_0====
+====Pre-Building Segment Complete!====
+====Pipeline Pre-build Complete!====
+====Registering Pipeline====
+====Building Pipeline====
+EAL: Detected CPU lcores: 64
+EAL: Detected NUMA nodes: 2
+EAL: Detected shared linkage of DPDK
+EAL: Multi-process socket /var/run/dpdk/rte/mp_socket
+EAL: Selected IOVA mode 'PA'
+EAL: VFIO support initialized
+TELEMETRY: No legacy callbacks, legacy socket not created
+EAL: Probe PCI driver: mlx5_pci (15b3:a2dc) device: 0000:ca:00.0 (socket 1)
+EAL: Probe PCI driver: gpu_cuda (10de:2331) device: 0000:17:00.0 (socket 0)
+====Building Pipeline Complete!====
+DOCA GPUNetIO rate: 0 pkts [00:00, ? pkts/s]====Registering Pipeline Complete!====
+====Starting Pipeline====
+====Pipeline Started====
+====Building Segment: linear_segment_0====
+Added source: <from-doca-0; DocaSourceStage(nic_pci_address=ca:00.0, gpu_pci_address=17:00.0, traffic_type=udp)>
+  └─> morpheus.MessageMeta
+Added stage: <monitor-1; MonitorStage(description=DOCA GPUNetIO rate, smoothing=0.05, unit=pkts, delayed_start=False, determine_count_fn=None, log_level=LogLevels.INFO)>
+  └─ morpheus.MessageMeta -> morpheus.MessageMeta
+====Building Segment Complete!====
+DOCA GPUNetIO rate: 100000 pkts [00:12, 10963.39 pkts/s]
+```
diff --git a/examples/doca/run.py b/examples/doca/run.py
@@ -32,12 +32,6 @@
 
 
 @click.command()
-@click.option(
-    "--num_threads",
-    default=1,
-    type=click.IntRange(min=1),
-    help="Number of internal pipeline threads to use",
-)
 @click.option(
     "--pipeline_batch_size",
     default=1024,
@@ -72,8 +66,18 @@
     help="GPU PCI Address",
     required=True,
 )
-def run_pipeline(num_threads, pipeline_batch_size, model_max_batch_size, model_fea_length, out_file, nic_addr,
-                 gpu_addr):
+@click.option(
+    "--traffic_type",
+    help="UDP or TCP traffic",
+    required=True,
+)
+def run_pipeline(pipeline_batch_size,
+                 model_max_batch_size,
+                 model_fea_length,
+                 out_file,
+                 nic_addr,
+                 gpu_addr,
+                 traffic_type):
     # Enable the default logger
     configure_logging(log_level=logging.DEBUG)
 
@@ -83,7 +87,7 @@ def run_pipeline(num_threads, pipeline_batch_size, model_max_batch_size, model_f
     config.mode = PipelineModes.NLP
 
     # Below properties are specified by the command line
-    config.num_threads = num_threads
+    config.num_threads = 4
     config.pipeline_batch_size = pipeline_batch_size
     config.model_max_batch_size = model_max_batch_size
     config.feature_length = model_fea_length
@@ -107,48 +111,50 @@ def run_pipeline(num_threads, pipeline_batch_size, model_max_batch_size, model_f
     pipeline = LinearPipeline(config)
 
     # add doca source stage
-    pipeline.set_source(DocaSourceStage(config, nic_addr, gpu_addr))
+    pipeline.set_source(DocaSourceStage(config, nic_addr, gpu_addr, traffic_type))
     pipeline.add_stage(MonitorStage(config, description="DOCA GPUNetIO rate", unit='pkts'))
 
-    # add deserialize stage
-    pipeline.add_stage(DeserializeStage(config))
-    pipeline.add_stage(MonitorStage(config, description="Deserialize rate", unit='pkts'))
-
-    # add preprocessing stage
-    pipeline.add_stage(
-        PreprocessNLPStage(
-            config,
-            vocab_hash_file='/workspace/models/training-tuning-scripts/sid-models/resources/bert-base-uncased-hash.txt',
-            do_lower_case=True,
-            truncation=True,
-            add_special_tokens=False,
-            column='data'))
-
-    pipeline.add_stage(MonitorStage(config, description="Tokenize rate", unit='pkts'))
-
-    # add inference stage
-    pipeline.add_stage(
-        TritonInferenceStage(
-            config,
-            # model_name="sid-minibert-trt",
-            model_name="sid-minibert-onnx",
-            server_url="localhost:8000",
-            force_convert_inputs=True,
-            use_shared_memory=True))
-
-    pipeline.add_stage(MonitorStage(config, description="Inference rate", unit='pkts'))
-
-    # add class stage
-    pipeline.add_stage(AddClassificationsStage(config))
-    pipeline.add_stage(MonitorStage(config, description="AddClass rate", unit='pkts'))
-
-    # serialize
-    pipeline.add_stage(SerializeStage(config))
-    pipeline.add_stage(MonitorStage(config, description="Serialize rate", unit='pkts'))
-
-    # write to file
-    pipeline.add_stage(WriteToFileStage(config, filename=out_file, overwrite=True))
-    pipeline.add_stage(MonitorStage(config, description="Write to file rate", unit='pkts'))
+    if traffic_type == 'tcp':
+        # add deserialize stage
+        pipeline.add_stage(DeserializeStage(config))
+        pipeline.add_stage(MonitorStage(config, description="Deserialize rate", unit='pkts'))
+
+        hashfile = '/workspace/models/training-tuning-scripts/sid-models/resources/bert-base-uncased-hash.txt'
+
+        # add preprocessing stage
+        pipeline.add_stage(
+            PreprocessNLPStage(config,
+                               vocab_hash_file=hashfile,
+                               do_lower_case=True,
+                               truncation=True,
+                               add_special_tokens=False,
+                               column='data'))
+
+        pipeline.add_stage(MonitorStage(config, description="Tokenize rate", unit='pkts'))
+
+        # add inference stage
+        pipeline.add_stage(
+            TritonInferenceStage(
+                config,
+                # model_name="sid-minibert-trt",
+                model_name="sid-minibert-onnx",
+                server_url="localhost:8000",
+                force_convert_inputs=True,
+                use_shared_memory=True))
+
+        pipeline.add_stage(MonitorStage(config, description="Inference rate", unit='pkts'))
+
+        # add class stage
+        pipeline.add_stage(AddClassificationsStage(config))
+        pipeline.add_stage(MonitorStage(config, description="AddClass rate", unit='pkts'))
+
+        # serialize
+        pipeline.add_stage(SerializeStage(config))
+        pipeline.add_stage(MonitorStage(config, description="Serialize rate", unit='pkts'))
+
+        # write to file
+        pipeline.add_stage(WriteToFileStage(config, filename=out_file, overwrite=True))
+        pipeline.add_stage(MonitorStage(config, description="Write to file rate", unit='pkts'))
 
     # Build the pipeline here to see types in the vizualization
     pipeline.build()

diff --git a/morpheus/_lib/doca/__init__.pyi b/morpheus/_lib/doca/__init__.pyi
@@ -10,5 +10,5 @@ __all__ = [
 
 
 class DocaSourceStage(mrc.core.segment.SegmentObject):
-    def __init__(self, builder: mrc.core.segment.Builder, name: str, nic_pci_address: str, gpu_pci_address: str) -> None: ...
+    def __init__(self, builder: mrc.core.segment.Builder, name: str, nic_pci_address: str, gpu_pci_address: str, traffic_type: str) -> None: ...
     pass
diff --git a/morpheus/_lib/doca/include/common.hpp b/morpheus/_lib/doca/include/common.hpp
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -18,12 +18,44 @@
 #pragma once
 
 #include <cstdint>
+#include <memory>
+#include <string>
+#include <type_traits>
+#include <vector>
 
 uint32_t const PACKETS_PER_THREAD   = 4;
-uint32_t const THREADS_PER_BLOCK    = 512;
+uint32_t const THREADS_PER_BLOCK    = 1024;  // 512
 uint32_t const PACKETS_PER_BLOCK    = PACKETS_PER_THREAD * THREADS_PER_BLOCK;
-uint32_t const PACKET_RX_TIMEOUT_NS = 5000;
+uint32_t const PACKET_RX_TIMEOUT_NS = 1000000;  // 1ms //500us
 
 uint32_t const MAX_PKT_RECEIVE = PACKETS_PER_BLOCK;
-uint32_t const MAX_PKT_SIZE    = 8192;
+uint32_t const MAX_PKT_SIZE    = 4096;
 uint32_t const MAX_PKT_NUM     = 65536;
+uint32_t const MAX_QUEUE       = 4;
+uint32_t const MAX_SEM_X_QUEUE = 32;
+
+enum doca_traffic_type
+{
+    DOCA_TRAFFIC_TYPE_UDP = 0,
+    DOCA_TRAFFIC_TYPE_TCP = 1,
+};
+
+struct packets_info
+{
+    int32_t packet_count_out;
+    int32_t payload_size_total_out;
+
+    char* payload_buffer_out;
+    int32_t* payload_sizes_out;
+
+    int64_t* src_mac_out;
+    int64_t* dst_mac_out;
+    int64_t* src_ip_out;
+    int64_t* dst_ip_out;
+    uint16_t* src_port_out;
+    uint16_t* dst_port_out;
+    int32_t* tcp_flags_out;
+    int32_t* ether_type_out;
+    int32_t* next_proto_id_out;
+    uint32_t* timestamp_out;
+};
diff --git a/morpheus/_lib/doca/include/doca_context.hpp b/morpheus/_lib/doca/include/doca_context.hpp
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -17,16 +17,14 @@
 
 #pragma once
 
+#include "common.hpp"
 #include "error.hpp"
 #include "rte_context.hpp"
 
 #include <doca_eth_rxq.h>
 #include <doca_flow.h>
 #include <doca_gpunetio.h>
-
-#include <memory>
-#include <string>
-#include <type_traits>
+#include <doca_log.h>
 
 #define GPU_PAGE_SIZE (1UL << 16)
 
@@ -40,11 +38,11 @@ struct DocaContext
   private:
     doca_gpu* m_gpu;
     doca_dev* m_dev;
-    doca_pci_bdf m_pci_bdf;
     doca_flow_port* m_flow_port;
     uint16_t m_nic_port;
     uint32_t m_max_queue_count;
     std::unique_ptr<RTEContext> m_rte_context;
+    doca_log_backend* sdk_log;
 
   public:
     DocaContext(std::string nic_addr, std::string gpu_addr);