From c9583fca0201578e39cdfc6ac69074621c6cce84 Mon Sep 17 00:00:00 2001 From: Alexander Indenbaum Date: Tue, 4 Jun 2024 17:04:30 +0000 Subject: [PATCH] Implement mTLS support for the monitor gRPC server Signed-off-by: Alexander Indenbaum --- .env | 4 +- control/server.py | 35 +++++++++-------- docker-compose.yaml | 4 ++ tests/ceph-nvmeof.tls.conf | 78 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 104 insertions(+), 17 deletions(-) create mode 100644 tests/ceph-nvmeof.tls.conf diff --git a/.env b/.env index cb21db3b2..7fd00dbff 100644 --- a/.env +++ b/.env @@ -58,8 +58,8 @@ SPDK_CENTOS_REPO_VER="9.0-21.el9" # Ceph Cluster CEPH_CLUSTER_VERSION="${CEPH_VERSION}" -CEPH_BRANCH=ceph-nvmeof-mon -CEPH_SHA=ab10a8e374137883a2fe698d72bd58149031917f +CEPH_BRANCH=wip-baum-20240609-00 +CEPH_SHA=dbbc483066c53d15a54038f30317f9ea93073439 CEPH_VSTART_ARGS="--memstore" CEPH_DEVEL_MGR_PATH=../ceph diff --git a/control/server.py b/control/server.py index 409d5c977..309a4e44b 100644 --- a/control/server.py +++ b/control/server.py @@ -139,13 +139,8 @@ def set_group_id(self, id: int): def _wait_for_group_id(self): """Waits for the monitor notification of this gatway's group id""" - # Python 3.8: Default value of max_workers is min(32, os.cpu_count() + 4). - # This default value preserves at least 5 workers for I/O bound tasks. It utilizes at - # most 32 CPU cores for CPU bound tasks which release the GIL. And it avoids using - # very large resources implicitly on many-core machines. - self.monitor_server = grpc.server(futures.ThreadPoolExecutor()) + self.monitor_server = self._grpc_server(self._monitor_address()) monitor_pb2_grpc.add_MonitorGroupServicer_to_server(MonitorGroupService(self.set_group_id), self.monitor_server) - self.monitor_server.add_insecure_port(self._monitor_address()) self.monitor_server.start() self.logger.info(f"MonitorGroup server is listening on {self._monitor_address()} for group id") self.monitor_event.wait() @@ -182,12 +177,9 @@ def serve(self): gateway_state = GatewayStateHandler(self.config, local_state, omap_state, self.gateway_rpc_caller) omap_lock = OmapLock(omap_state, gateway_state, self.rpc_lock) self.gateway_rpc = GatewayService(self.config, gateway_state, self.rpc_lock, omap_lock, self.group_id, self.spdk_rpc_client, self.ceph_utils) - self.server = grpc.server(futures.ThreadPoolExecutor(max_workers=1)) + self.server = self._grpc_server(self._gateway_address()) pb2_grpc.add_GatewayServicer_to_server(self.gateway_rpc, self.server) - # Add listener port - self._add_server_listener() - # Check for existing NVMeoF target state gateway_state.start_update() @@ -237,6 +229,11 @@ def _start_monitor_client(self): '-c', '/etc/ceph/ceph.conf', '-n', rados_id, '-k', '/etc/ceph/keyring'] + if self.config.getboolean("gateway", "enable_auth"): + cmd += [ + "--server-cert", self.config.get("mtls", "server_cert"), + "--client-key", self.config.get("mtls", "client_key"), + "--client-cert", self.config.get("mtls", "client_cert") ] self.logger.info(f"Starting {' '.join(cmd)}") try: # start monitor client process @@ -287,8 +284,14 @@ def _monitor_address(self): monitor_addr = GatewayUtils.escape_address_if_ipv6(monitor_addr) return "{}:{}".format(monitor_addr, monitor_port) - def _add_server_listener(self): - """Adds listener port to server.""" + def _grpc_server(self, address): + """Construct grpc server""" + + # Python 3.8: Default value of max_workers is min(32, os.cpu_count() + 4). + # This default value preserves at least 5 workers for I/O bound tasks. It utilizes at + # most 32 CPU cores for CPU bound tasks which release the GIL. And it avoids using + # very large resources implicitly on many-core machines. + server = grpc.server(futures.ThreadPoolExecutor()) enable_auth = self.config.getboolean("gateway", "enable_auth") if enable_auth: @@ -315,11 +318,13 @@ def _add_server_listener(self): ) # Add secure port using credentials - self.server.add_secure_port( - self._gateway_address(), server_credentials) + server.add_secure_port( + address, server_credentials) else: # Authentication is not enabled - self.server.add_insecure_port(self._gateway_address()) + server.add_insecure_port(address) + + return server def _get_spdk_rpc_socket_path(self, omap_state) -> str: # For backward compatibility, try first to get the old attribute diff --git a/docker-compose.yaml b/docker-compose.yaml index a0e3029ee..9d29896a2 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -68,6 +68,10 @@ services: sh -c './vstart.sh --new $$CEPH_VSTART_ARGS && ceph osd pool create rbd && echo ceph dashboard nvmeof-gateway-add -i <(echo nvmeof-devel:5500) nvmeof.1 && + pushd /etc/ceph && + openssl req -x509 -newkey rsa:4096 -nodes -keyout server.key -out server.crt -days 3650 -subj /CN=my.server && + openssl req -x509 -newkey rsa:4096 -nodes -keyout client.key -out client.crt -days 3650 -subj /CN=client1 && + popd && sleep infinity' healthcheck: test: ceph osd pool stats rbd diff --git a/tests/ceph-nvmeof.tls.conf b/tests/ceph-nvmeof.tls.conf new file mode 100644 index 000000000..fcce445d3 --- /dev/null +++ b/tests/ceph-nvmeof.tls.conf @@ -0,0 +1,78 @@ +# +# Copyright (c) 2021 International Business Machines +# All rights reserved. +# +# SPDX-License-Identifier: LGPL-3.0-or-later +# +# Authors: anita.shekar@ibm.com, sandy.kaur@ibm.com +# + +[gateway] +name = +group = +addr = 0.0.0.0 +port = 5500 +enable_auth = True +state_update_notify = True +state_update_interval_sec = 5 +enable_spdk_discovery_controller = False +#omap_file_lock_duration = 20 +#omap_file_lock_retries = 30 +#omap_file_lock_retry_sleep_interval = 1.0 +#omap_file_update_reloads = 10 +#enable_prometheus_exporter = True +#prometheus_exporter_ssl = True +#prometheus_port = 10008 +#prometheus_bdev_pools = rbd +#prometheus_stats_interval = 10 +#verify_nqns = True +#allowed_consecutive_spdk_ping_failures = 1 +#spdk_ping_interval_in_seconds = 2.0 +#ping_spdk_under_lock = False + +[gateway-logs] +log_level=debug +#log_files_enabled = True +#log_files_rotation_enabled = True +#verbose_log_messages = True +#max_log_file_size_in_mb=10 +#max_log_files_count=20 +#max_log_directory_backups=10 +# +# Notice that if you change the log directory the log files will only be visible inside the container +# +#log_directory = /var/log/ceph/ + +[discovery] +addr = 0.0.0.0 +port = 8009 + +[ceph] +pool = rbd +config_file = /etc/ceph/ceph.conf + +[mtls] +server_key = /etc/ceph/server.key +client_key = /etc/ceph/client.key +server_cert = /etc/ceph/server.crt +client_cert = /etc/ceph/client.crt + +[spdk] +bdevs_per_cluster = 32 +tgt_path = /usr/local/bin/nvmf_tgt +#rpc_socket_dir = /var/tmp/ +#rpc_socket_name = spdk.sock +#tgt_cmd_extra_args = --env-context="--no-huge -m1024" --iova-mode=va +timeout = 60.0 +log_level = WARNING + +# Example value: -m 0x3 -L all +# tgt_cmd_extra_args = + +# transports = tcp + +# Example value: {"max_queue_depth" : 16, "max_io_size" : 4194304, "io_unit_size" : 1048576, "zcopy" : false} +transport_tcp_options = {"in_capsule_data_size" : 8192, "max_io_qpairs_per_ctrlr" : 7} + +[monitor] +#timeout = 1.0