Skip to content

Commit

Permalink
Implement mTLS support for the monitor gRPC server
Browse files Browse the repository at this point in the history
Signed-off-by: Alexander Indenbaum <[email protected]>
  • Loading branch information
Alexander Indenbaum authored and baum committed Jun 27, 2024
1 parent 2145b4b commit 0cf123d
Show file tree
Hide file tree
Showing 10 changed files with 144 additions and 25 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build-container.yml
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ jobs:
strategy:
fail-fast: false
matrix:
test: ["sanity", "state_transitions", "state_transitions_both_gws", "state_transitions_loop", "state_transitions_rand_loop", "late_registration", "late_registration_loop", "4gws", "4gws_loop", "namespaces", "namespaces_loop"]
test: ["sanity", "state_transitions", "state_transitions_both_gws", "state_transitions_loop", "state_transitions_rand_loop", "late_registration", "late_registration_loop", "4gws", "4gws_loop", "namespaces", "namespaces_loop", "mtls"]
runs-on: ubuntu-latest
env:
HUGEPAGES: 1024 # 4 spdk instances
Expand Down
35 changes: 20 additions & 15 deletions control/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,13 +144,8 @@ def set_group_id(self, id: int):

def _wait_for_group_id(self):
"""Waits for the monitor notification of this gatway's group id"""
# Python 3.8: Default value of max_workers is min(32, os.cpu_count() + 4).
# This default value preserves at least 5 workers for I/O bound tasks. It utilizes at
# most 32 CPU cores for CPU bound tasks which release the GIL. And it avoids using
# very large resources implicitly on many-core machines.
self.monitor_server = grpc.server(futures.ThreadPoolExecutor())
self.monitor_server = self._grpc_server(self._monitor_address())
monitor_pb2_grpc.add_MonitorGroupServicer_to_server(MonitorGroupService(self.set_group_id), self.monitor_server)
self.monitor_server.add_insecure_port(self._monitor_address())
self.monitor_server.start()
self.logger.info(f"MonitorGroup server is listening on {self._monitor_address()} for group id")
self.monitor_event.wait()
Expand Down Expand Up @@ -188,12 +183,9 @@ def serve(self):
gateway_state = GatewayStateHandler(self.config, local_state, omap_state, self.gateway_rpc_caller, f"gateway-{self.name}")
omap_lock = OmapLock(omap_state, gateway_state, self.rpc_lock)
self.gateway_rpc = GatewayService(self.config, gateway_state, self.rpc_lock, omap_lock, self.group_id, self.spdk_rpc_client, self.ceph_utils)
self.server = grpc.server(futures.ThreadPoolExecutor(max_workers=1))
self.server = self._grpc_server(self._gateway_address())
pb2_grpc.add_GatewayServicer_to_server(self.gateway_rpc, self.server)

# Add listener port
self._add_server_listener()

# Check for existing NVMeoF target state
gateway_state.start_update()

Expand Down Expand Up @@ -243,6 +235,11 @@ def _start_monitor_client(self):
'-c', '/etc/ceph/ceph.conf',
'-n', rados_id,
'-k', '/etc/ceph/keyring']
if self.config.getboolean("gateway", "enable_auth"):
cmd += [
"--server-cert", self.config.get("mtls", "server_cert"),
"--client-key", self.config.get("mtls", "client_key"),
"--client-cert", self.config.get("mtls", "client_cert") ]
self.logger.info(f"Starting {' '.join(cmd)}")
try:
# start monitor client process
Expand Down Expand Up @@ -293,8 +290,14 @@ def _monitor_address(self):
monitor_addr = GatewayUtils.escape_address_if_ipv6(monitor_addr)
return "{}:{}".format(monitor_addr, monitor_port)

def _add_server_listener(self):
"""Adds listener port to server."""
def _grpc_server(self, address):
"""Construct grpc server"""

# Python 3.8: Default value of max_workers is min(32, os.cpu_count() + 4).
# This default value preserves at least 5 workers for I/O bound tasks. It utilizes at
# most 32 CPU cores for CPU bound tasks which release the GIL. And it avoids using
# very large resources implicitly on many-core machines.
server = grpc.server(futures.ThreadPoolExecutor())

enable_auth = self.config.getboolean("gateway", "enable_auth")
if enable_auth:
Expand All @@ -321,11 +324,13 @@ def _add_server_listener(self):
)

# Add secure port using credentials
self.server.add_secure_port(
self._gateway_address(), server_credentials)
server.add_secure_port(
address, server_credentials)
else:
# Authentication is not enabled
self.server.add_insecure_port(self._gateway_address())
server.add_insecure_port(address)

return server

def _get_spdk_rpc_socket_path(self, omap_state) -> str:
# For backward compatibility, try first to get the old attribute
Expand Down
6 changes: 6 additions & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ services:
sh -c './vstart.sh --new $$CEPH_VSTART_ARGS &&
ceph osd pool create rbd &&
echo ceph dashboard nvmeof-gateway-add -i <(echo nvmeof-devel:5500) nvmeof.1 &&
pushd /etc/ceph &&
openssl req -x509 -newkey rsa:4096 -nodes -keyout server.key -out server.crt -days 3650 -subj /CN=my.server -addext "subjectAltName = IP:192.168.13.3, IP:0.0.0.0" &&
openssl req -x509 -newkey rsa:4096 -nodes -keyout client.key -out client.crt -days 3650 -subj /CN=client1 &&
popd &&
sleep infinity'
healthcheck:
test: ceph osd pool stats rbd
Expand Down Expand Up @@ -240,6 +244,8 @@ services:
HUGEPAGES_DIR:
labels:
io.ceph.nvmeof:
volumes:
- ceph-conf:/etc/ceph
volumes:
ceph-conf:
networks:
Expand Down
78 changes: 78 additions & 0 deletions tests/ceph-nvmeof.tls.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#
# Copyright (c) 2021 International Business Machines
# All rights reserved.
#
# SPDX-License-Identifier: LGPL-3.0-or-later
#
# Authors: [email protected], [email protected]
#

[gateway]
name =
group =
addr = 192.168.13.3
port = 5500
enable_auth = True
state_update_notify = True
state_update_interval_sec = 5
enable_spdk_discovery_controller = False
#omap_file_lock_duration = 20
#omap_file_lock_retries = 30
#omap_file_lock_retry_sleep_interval = 1.0
#omap_file_update_reloads = 10
#enable_prometheus_exporter = True
#prometheus_exporter_ssl = True
#prometheus_port = 10008
#prometheus_bdev_pools = rbd
#prometheus_stats_interval = 10
#verify_nqns = True
#allowed_consecutive_spdk_ping_failures = 1
#spdk_ping_interval_in_seconds = 2.0
#ping_spdk_under_lock = False

[gateway-logs]
log_level=debug
#log_files_enabled = True
#log_files_rotation_enabled = True
#verbose_log_messages = True
#max_log_file_size_in_mb=10
#max_log_files_count=20
#max_log_directory_backups=10
#
# Notice that if you change the log directory the log files will only be visible inside the container
#
#log_directory = /var/log/ceph/

[discovery]
addr = 0.0.0.0
port = 8009

[ceph]
pool = rbd
config_file = /etc/ceph/ceph.conf

[mtls]
server_key = /etc/ceph/server.key
client_key = /etc/ceph/client.key
server_cert = /etc/ceph/server.crt
client_cert = /etc/ceph/client.crt

[spdk]
bdevs_per_cluster = 32
tgt_path = /usr/local/bin/nvmf_tgt
#rpc_socket_dir = /var/tmp/
#rpc_socket_name = spdk.sock
#tgt_cmd_extra_args = --env-context="--no-huge -m1024" --iova-mode=va
timeout = 60.0
log_level = WARNING

# Example value: -m 0x3 -L all
# tgt_cmd_extra_args =

# transports = tcp

# Example value: {"max_queue_depth" : 16, "max_io_size" : 4194304, "io_unit_size" : 1048576, "zcopy" : false}
transport_tcp_options = {"in_capsule_data_size" : 8192, "max_io_qpairs_per_ctrlr" : 7}

[monitor]
#timeout = 1.0
1 change: 1 addition & 0 deletions tests/ha/mtls.sh
11 changes: 4 additions & 7 deletions tests/ha/sanity.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,17 @@ set -xe
# See
# - https://github.com/spdk/spdk/blob/master/doc/jsonrpc.md
# - https://spdk.io/doc/nvmf_multipath_howto.html
. .env
container_ip() {
docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$1"
}

GW1_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | awk '$2 ~ /nvmeof/ && $2 ~ /1/ {print $1}')
ip="$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$GW1_NAME")"
echo -n "ℹ️ Starting bdevperf container"
docker-compose up -d bdevperf
sleep 10
echo "ℹ️ bdevperf start up logs"
make logs SVC=bdevperf
eval $(make run SVC=bdevperf OPTS="--entrypoint=env" | grep BDEVPERF_SOCKET | tr -d '\n\r' )
BDEVPERF_SOCKET=/tmp/bdevperf.sock
NVMEOF_DISC_PORT=8009


ip=$(container_ip $GW1)
echo "ℹ️ Using discovery service in gateway $GW1 ip $ip"
rpc="/usr/libexec/spdk/scripts/rpc.py"
echo "ℹ️ bdevperf bdev_nvme_set_options"
Expand Down
13 changes: 13 additions & 0 deletions tests/ha/setup_mtls.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
set -xe

GW1_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | awk '$2 ~ /nvmeof/ && $2 ~ /1/ {print $1}')
GW1_IP="$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$GW1_NAME")"
NQN="nqn.2016-06.io.spdk:cnode1"

docker-compose run --rm nvmeof-cli --server-address $GW1_IP --server-port 5500 --server-cert /etc/ceph/server.crt --client-key /etc/ceph/client.key --client-cert /etc/ceph/client.crt subsystem add --subsystem $NQN
docker-compose run --rm nvmeof-cli --server-address $GW1_IP --server-port 5500 --server-cert /etc/ceph/server.crt --client-key /etc/ceph/client.key --client-cert /etc/ceph/client.crt namespace add --subsystem $NQN --rbd-pool rbd --rbd-image demo_image1 --size 10M --rbd-create-image -l 1
#docker-compose run --rm nvmeof-cli --server-address $GW1_IP --server-port 5500 --server-cert /etc/ceph/server.crt --client-key /etc/ceph/client.key --client-cert /etc/ceph/client.crt namespace add --subsystem $NQN --rbd-pool rbd --rbd-image demo_image2 --size 10M --rbd-create-image -l 2
docker-compose run --rm nvmeof-cli --server-address $GW1_IP --server-port 5500 --server-cert /etc/ceph/server.crt --client-key /etc/ceph/client.key --client-cert /etc/ceph/client.crt listener add --subsystem $NQN --host-name $GW1_NAME --traddr $GW1_IP --trsvcid 4420
docker-compose run --rm nvmeof-cli --server-address $GW1_IP --server-port 5500 --server-cert /etc/ceph/server.crt --client-key /etc/ceph/client.key --client-cert /etc/ceph/client.crt host add --subsystem $NQN --host "*"
docker-compose run --rm nvmeof-cli --server-address $GW1_IP --server-port 5500 --server-cert /etc/ceph/server.crt --client-key /etc/ceph/client.key --client-cert /etc/ceph/client.crt get_subsystems

9 changes: 9 additions & 0 deletions tests/ha/start_up_mtls.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Check if GITHUB_WORKSPACE is defined
if [ -n "$GITHUB_WORKSPACE" ]; then
test_dir="$GITHUB_WORKSPACE/tests/ha"
else
test_dir=$(dirname $0)
fi

export NVMEOF_CONFIG=./tests/ceph-nvmeof.tls.conf
$test_dir/start_up.sh 1
5 changes: 3 additions & 2 deletions tests/ha/wait_gateways.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
SCALE=2
echo CLI_TLS_ARGS $CLI_TLS_ARGS
# Check if argument is provided
if [ $# -ge 1 ]; then
# Check if argument is an integer larger or equal than 1
Expand All @@ -22,12 +23,12 @@ for i in $(seq $SCALE); do
continue
fi
GW_IP="$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$GW_NAME")"
if docker-compose run --rm nvmeof-cli --server-address $GW_IP --server-port 5500 get_subsystems 2>&1 | grep -i failed; then
if docker-compose run --rm nvmeof-cli $CLI_TLS_ARGS --server-address $GW_IP --server-port 5500 get_subsystems 2>&1 | grep -i failed; then
echo "Container $i $GW_NAME $GW_IP no subsystems. Waiting..."
continue
fi
echo "Container $i $GW_NAME $GW_IP subsystems:"
docker-compose run --rm nvmeof-cli --server-address $GW_IP --server-port 5500 get_subsystems
docker-compose run --rm nvmeof-cli $CLI_TLS_ARGS --server-address $GW_IP --server-port 5500 get_subsystems
break;
done
done
9 changes: 9 additions & 0 deletions tests/ha/wait_gateways_mtls.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Check if GITHUB_WORKSPACE is defined
if [ -n "$GITHUB_WORKSPACE" ]; then
test_dir="$GITHUB_WORKSPACE/tests/ha"
else
test_dir=$(dirname $0)
fi

export CLI_TLS_ARGS="--server-cert /etc/ceph/server.crt --client-key /etc/ceph/client.key --client-cert /etc/ceph/client.crt"
$test_dir/wait_gateways.sh 1

0 comments on commit 0cf123d

Please sign in to comment.