Skip to content

Commit

Permalink
Encrypt keys before saving in OMAP file.
Browse files Browse the repository at this point in the history
Fixes #960

Signed-off-by: Gil Bregman <[email protected]>
  • Loading branch information
gbregman committed Dec 1, 2024
1 parent 9f1c300 commit 4c333c9
Show file tree
Hide file tree
Showing 17 changed files with 674 additions and 356 deletions.
2 changes: 2 additions & 0 deletions .env
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,5 @@ DHCHAP_KEY6="DHHC-1:01:Bu4tZd7X2oW7XxmVH5tGCdoS30pDX6bZvexHYoudeVlJW9yz:"
DHCHAP_KEY7="DHHC-1:01:JPJkDQ2po2FfLmKYlTF/sJ2HzVO/FKWxgXKE/H6XfL8ogQ1T:"
DHCHAP_KEY8="DHHC-1:01:e0B0vDxKleDzYVtG42xqFvoWZfiufkoywmfRKrETzayRdf1j:"
DHCHAP_KEY9="DHHC-1:01:KD+sfH3/o2bRQoV0ESjBUywQlMnSaYpZISUbVa0k0nsWpNST:"
DHCHAP_KEY10="DHHC-1:00:rWf0ZFYO7IgWGttM8w6jUrAY4cTQyqyXPdmxHeOSve3w5QU9:"
DHCHAP_KEY11="DHHC-1:02:j3uUz05r5aQy42vX4tDXqVf9HgUPPdEp3kXTgUWl9EphsG7jwpr9KSIt3bmRLXBijPTIDQ==:"
4 changes: 3 additions & 1 deletion ceph-nvmeof.conf
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ state_update_notify = True
state_update_timeout_in_msec = 2000
state_update_interval_sec = 5
enable_spdk_discovery_controller = False
enable_key_encryption = True
encryption_key = /etc/ceph/encryption.key
#omap_file_lock_duration = 20
#omap_file_lock_retries = 30
#omap_file_lock_retry_sleep_interval = 1.0
Expand All @@ -29,7 +31,7 @@ enable_spdk_discovery_controller = False
#verify_nqns = True
#allowed_consecutive_spdk_ping_failures = 1
#spdk_ping_interval_in_seconds = 2.0
#max_hosts_per_namespace = 1
#max_hosts_per_namespace = 8
#max_namespaces_with_netmask = 1000
#max_subsystems = 128
#max_namespaces = 1024
Expand Down
5 changes: 4 additions & 1 deletion control/discovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from .config import GatewayConfig
from .state import GatewayState, LocalGatewayState, OmapGatewayState, GatewayStateHandler
from .utils import GatewayLogger
from .utils import GatewayUtilsCrypto
from .proto import gateway_pb2 as pb2

import rados
Expand Down Expand Up @@ -1129,8 +1130,10 @@ def start_service(self):
t.start()

local_state = LocalGatewayState()
dummy_crypto = GatewayUtilsCrypto(None)
gateway_state = GatewayStateHandler(self.config, local_state,
self.omap_state, self._state_notify_update, f"discovery-{socket.gethostname()}")
self.omap_state, self._state_notify_update,
dummy_crypto, f"discovery-{socket.gethostname()}")
gateway_state.start_update()

try:
Expand Down
133 changes: 73 additions & 60 deletions control/grpc.py

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions control/proto/gateway.proto
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ message create_subsystem_req {
bool enable_ha = 4;
optional bool no_group_append = 5;
optional string dhchap_key = 6;
optional bool key_encrypted = 7;
}

message delete_subsystem_req {
Expand All @@ -215,6 +216,8 @@ message add_host_req {
string host_nqn = 2;
optional string psk = 3;
optional string dhchap_key = 4;
optional bool psk_encrypted = 5;
optional bool key_encrypted = 6;
}

message change_host_key_req {
Expand Down
27 changes: 26 additions & 1 deletion control/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from .config import GatewayConfig
from .utils import GatewayLogger
from .utils import GatewayUtils
from .utils import GatewayUtilsCrypto
from .cephutils import CephUtils
from .prometheus import start_exporter

Expand Down Expand Up @@ -114,6 +115,20 @@ def __init__(self, config: GatewayConfig):
self.monitor_client_log_file_path = None
self.omap_state = None
self.omap_lock = None
enc_key = None
enc_key_file = self.config.get_with_default("gateway", "encryption_key", "")
if enc_key_file:
try:
enc_key = GatewayUtilsCrypto.read_encryption_key(enc_key_file)
except Exception:
self.logger.exception(f"Got an error trying to read encryption key {enc_key_file}, will use fallback key")
if enc_key:
self.logger.info(f"Read encryption key from {enc_key_file}")
else:
# Use this key as the fallback in case the user didn't set his/her own key
self.logger.warning(f"No valid key file was set, will use fallback encryption key")
enc_key = b"Ubp_Ll9pnyXtluCvqyBrnta2Y7ju9qexc_f0iWYnhWI="
self.crypto = GatewayUtilsCrypto(enc_key)

self.name = self.config.get("gateway", "name")
if not self.name:
Expand Down Expand Up @@ -235,7 +250,8 @@ def serve(self):
self._start_discovery_service()

# Register service implementation with server
gateway_state = GatewayStateHandler(self.config, local_state, omap_state, self.gateway_rpc_caller, f"gateway-{self.name}")
gateway_state = GatewayStateHandler(self.config, local_state, omap_state,
self.gateway_rpc_caller, self.crypto, f"gateway-{self.name}")
self.omap_lock = OmapLock(omap_state, gateway_state, self.rpc_lock)
self.gateway_rpc = GatewayService(self.config, gateway_state, self.rpc_lock, self.omap_lock, self.group_id, self.spdk_rpc_client, self.spdk_rpc_subsystems_client, self.ceph_utils)
self.server = self._grpc_server(self._gateway_address())
Expand Down Expand Up @@ -764,6 +780,9 @@ def gateway_rpc_caller(self, requests, is_add_req):
if key.startswith(GatewayState.SUBSYSTEM_PREFIX):
if is_add_req:
req = json_format.Parse(val, pb2.create_subsystem_req(), ignore_unknown_fields=True)
if req.key_encrypted and req.dhchap_key:
req.dhchap_key = self.crypto.decrypt_text(req.dhchap_key)
req.key_encrypted = False
self.gateway_rpc.create_subsystem(req)
else:
req = json_format.Parse(val,
Expand All @@ -789,6 +808,12 @@ def gateway_rpc_caller(self, requests, is_add_req):
elif key.startswith(GatewayState.HOST_PREFIX):
if is_add_req:
req = json_format.Parse(val, pb2.add_host_req(), ignore_unknown_fields=True)
if req.key_encrypted and req.dhchap_key:
req.dhchap_key = self.crypto.decrypt_text(req.dhchap_key)
req.key_encrypted = False
if req.psk_encrypted and req.psk:
req.psk = self.crypto.decrypt_text(req.psk)
req.psk_encrypted = False
self.gateway_rpc.add_host(req)
else:
req = json_format.Parse(val, pb2.remove_host_req(), ignore_unknown_fields=True)
Expand Down
90 changes: 48 additions & 42 deletions control/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from abc import ABC, abstractmethod
from .utils import GatewayLogger
from .utils import GatewayUtils
from .utils import GatewayUtilsCrypto
from google.protobuf import json_format
from .proto import gateway_pb2 as pb2

Expand Down Expand Up @@ -629,8 +630,9 @@ class GatewayStateHandler:
use_notify: Flag to indicate use of OMAP watch/notify
"""

def __init__(self, config, local, omap, gateway_rpc_caller, id_text=""):
def __init__(self, config, local, omap, gateway_rpc_caller, crypto, id_text=""):
self.config = config
self.crypto = crypto
self.local = local
self.omap = omap
self.gateway_rpc_caller = gateway_rpc_caller
Expand Down Expand Up @@ -770,26 +772,35 @@ def host_only_key_changed(self, old_val, new_val):
old_req = json_format.Parse(old_val, pb2.add_host_req(), ignore_unknown_fields=True )
except json_format.ParseError:
self.logger.exception(f"Got exception parsing {old_val}")
return (False, None)
return (False, None, b"")
try:
new_req = json_format.Parse(new_val, pb2.add_host_req(), ignore_unknown_fields=True)
except json_format.ParseError:
self.logger.exeption(f"Got exception parsing {new_val}")
return (False, None)
return (False, None, b"")
if not old_req or not new_req:
self.logger.debug(f"Failed to parse requests, old: {old_val} -> {old_req}, new: {new_val} -> {new_req}")
return (False, None)
return (False, None, b"")
assert old_req != new_req, f"Something was wrong we shouldn't get identical old and new values ({old_req})"
# Because of Json formatting of empty fields we might get a difference here, so just use the same values for empty
if not old_req.dhchap_key:
old_req.dhchap_key = ""
if not new_req.dhchap_key:
new_req.dhchap_key = ""
if not old_req.key_encrypted:
old_req.key_encrypted = False
if not new_req.key_encrypted:
new_req.key_encrypted = False
if not old_req.psk_encrypted:
old_req.psk_encrypted = False
if not new_req.psk_encrypted:
new_req.psk_encrypted = False
old_req.dhchap_key = new_req.dhchap_key
old_req.key_encrypted = new_req.key_encrypted
if old_req != new_req:
# Something besides the keys is different
return (False, None)
return (True, new_req.dhchap_key)
return (False, None, b"")
return (True, new_req.dhchap_key, new_req.key_encrypted)

def subsystem_only_key_changed(self, old_val, new_val):
# If only the dhchap key field has changed we can use change_key request instead of re-adding the subsystem
Expand All @@ -799,26 +810,27 @@ def subsystem_only_key_changed(self, old_val, new_val):
old_req = json_format.Parse(old_val, pb2.create_subsystem_req(), ignore_unknown_fields=True )
except json_format.ParseError:
self.logger.exception(f"Got exception parsing {old_val}")
return (False, None)
return (False, None, b"")
try:
new_req = json_format.Parse(new_val, pb2.create_subsystem_req(), ignore_unknown_fields=True)
except json_format.ParseError:
self.logger.exeption(f"Got exception parsing {new_val}")
return (False, None)
return (False, None, b"")
if not old_req or not new_req:
self.logger.debug(f"Failed to parse requests, old: {old_val} -> {old_req}, new: {new_val} -> {new_req}")
return (False, None)
return (False, None, b"")
assert old_req != new_req, f"Something was wrong we shouldn't get identical old and new values ({old_req})"
# Because of Json formatting of empty fields we might get a difference here, so just use the same values for empty
if not old_req.dhchap_key:
old_req.dhchap_key = ""
if not new_req.dhchap_key:
new_req.dhchap_key = ""
old_req.dhchap_key = new_req.dhchap_key
old_req.key_encrypted = new_req.key_encrypted
if old_req != new_req:
# Something besides the keys is different
return (False, None)
return (True, new_req.dhchap_key)
return (False, None, b"")
return (True, new_req.dhchap_key, new_req.key_encrypted)

def break_namespace_key(self, ns_key: str):
if not ns_key.startswith(GatewayState.NAMESPACE_PREFIX):
Expand Down Expand Up @@ -940,35 +952,26 @@ def update(self) -> bool:
subsystem_prefix = GatewayState.build_subsystem_key("nqn")
for key in changed.keys():
if key.startswith(ns_prefix):
try:
(should_process, new_lb_grp_id) = self.namespace_only_lb_group_id_changed(local_state_dict[key],
omap_state_dict[key])
if should_process:
assert new_lb_grp_id, "Shouldn't get here with an empty lb group id"
self.logger.debug(f"Found {key} where only the load balancing group id has changed. The new group id is {new_lb_grp_id}")
only_lb_group_changed.append((key, new_lb_grp_id))
except Exception as ex:
self.logger.warning("Got exception checking namespace for load balancing group id change")
(should_process,
new_lb_grp_id) = self.namespace_only_lb_group_id_changed(local_state_dict[key], omap_state_dict[key])
if should_process:
assert new_lb_grp_id, "Shouldn't get here with an empty load balancing group id"
self.logger.debug(f"Found {key} where only the load balancing group id has changed. The new group id is {new_lb_grp_id}")
only_lb_group_changed.append((key, new_lb_grp_id))
elif key.startswith(host_prefix):
try:
(should_process,
new_dhchap_key) = self.host_only_key_changed(local_state_dict[key], omap_state_dict[key])
if should_process:
assert new_dhchap_key, "Shouldn't get here with an empty dhchap key"
self.logger.debug(f"Found {key} where only the key has changed. The new DHCHAP key is {new_dhchap_key}")
only_host_key_changed.append((key, new_dhchap_key))
except Exception as ex:
self.logger.warning("Got exception checking host for key change")
(should_process,
new_dhchap_key,
new_key_encrypted) = self.host_only_key_changed(local_state_dict[key], omap_state_dict[key])
if should_process:
self.logger.debug(f"Found {key} where only the key has changed.")
only_host_key_changed.append((key, new_dhchap_key, new_key_encrypted))
elif key.startswith(subsystem_prefix):
try:
(should_process,
new_dhchap_key) = self.subsystem_only_key_changed(local_state_dict[key], omap_state_dict[key])
if should_process:
assert new_dhchap_key, "Shouldn't get here with an empty dhchap key"
self.logger.debug(f"Found {key} where only the key has changed. The new DHCHAP key is {new_dhchap_key}")
only_subsystem_key_changed.append((key, new_dhchap_key))
except Exception as ex:
self.logger.warning("Got exception checking subsystem for key change")
(should_process,
new_dhchap_key,
new_key_encrypted) = self.subsystem_only_key_changed(local_state_dict[key], omap_state_dict[key])
if should_process:
self.logger.debug(f"Found {key} where only the key has changed.")
only_subsystem_key_changed.append((key, new_dhchap_key, new_key_encrypted))

for ns_key, new_lb_grp in only_lb_group_changed:
ns_nqn = None
Expand All @@ -989,7 +992,7 @@ def update(self) -> bool:
except Exception as ex:
self.logger.error(f"Exception formatting change namespace load balancing group request:\n{ex}")

for host_key, new_dhchap_key in only_host_key_changed:
for host_key, new_dhchap_key, new_key_encrypted in only_host_key_changed:
subsys_nqn = None
host_nqn = None
try:
Expand All @@ -1003,15 +1006,16 @@ def update(self) -> bool:
if subsys_nqn and host_nqn:
try:
host_key_key = GatewayState.build_host_key_key(subsys_nqn, host_nqn)
req = pb2.change_host_key_req(subsystem_nqn=subsys_nqn, host_nqn=host_nqn,
dhchap_key=new_dhchap_key)
if new_key_encrypted and new_dhchap_key:
new_dhchap_key = self.crypto.decrypt_text(new_dhchap_key)
req = pb2.change_host_key_req(subsystem_nqn=subsys_nqn, host_nqn=host_nqn, dhchap_key=new_dhchap_key)
json_req = json_format.MessageToJson(req, preserving_proto_field_name=True,
including_default_value_fields=True)
added[host_key_key] = json_req
except Exception as ex:
self.logger.error(f"Exception formatting change host key request:\n{ex}")

for subsys_key, new_dhchap_key in only_subsystem_key_changed:
for subsys_key, new_dhchap_key, new_key_encrypted in only_subsystem_key_changed:
subsys_nqn = None
try:
changed.pop(subsys_key)
Expand All @@ -1021,6 +1025,8 @@ def update(self) -> bool:
if subsys_nqn:
try:
subsys_key_key = GatewayState.build_subsystem_key_key(subsys_nqn)
if new_key_encrypted and new_dhchap_key:
new_dhchap_key = self.crypto.decrypt_text(new_dhchap_key)
req = pb2.change_subsystem_key_req(subsystem_nqn=subsys_nqn, dhchap_key=new_dhchap_key)
json_req = json_format.MessageToJson(req, preserving_proto_field_name=True,
including_default_value_fields=True)
Expand Down
45 changes: 44 additions & 1 deletion control/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
import shutil
import netifaces
from typing import Tuple, List

from cryptography.fernet import Fernet
import base64

class GatewayEnumUtils:
def get_value_from_key(e_type, keyval, ignore_case = False):
Expand Down Expand Up @@ -182,6 +183,48 @@ def is_valid_nqn(nqn):

return (0, os.strerror(0))

class GatewayUtilsCrypto:
KEY_SIZE = 32

def __init__(self, encryption_key: bytes):
if encryption_key:
self.__secret_box = Fernet(encryption_key)
else:
self.__secret_box = None

@classmethod
def read_encryption_key(cls, keyfile : str) -> bytes:
keyval = ""
encoded_key = None
with open(keyfile) as f:
for line in f:
if line.startswith("-----BEGIN PRIVATE KEY-----"):
continue
if line.startswith("-----END PRIVATE KEY-----"):
continue
keyval += line.rstrip('\n')

keybytes = base64.b64decode(keyval)
if len(keybytes) < cls.KEY_SIZE:
raise RuntimeError(f"Encryption key has length {len(keybytes)} which is too short. The minimal length is {cls.KEY_SIZE}")
encoded_key = base64.urlsafe_b64encode(keybytes[:cls.KEY_SIZE])

return encoded_key

def encrypt_text(self, msg : str) -> str:
if self.__secret_box:
encrypted = base64.b64encode(self.__secret_box.encrypt(msg.encode("utf-8"))).decode("utf-8")
else:
encrypted = msg
return encrypted

def decrypt_text(self, msg : str) -> str:
if self.__secret_box:
plain = self.__secret_box.decrypt(base64.b64decode(msg.encode("utf-8"))).decode("utf-8")
else:
plain = msg
return plain

class GatewayLogger:
CEPH_LOG_DIRECTORY = "/var/log/ceph/"
MAX_LOG_FILE_SIZE_DEFAULT = 10
Expand Down
Loading

0 comments on commit 4c333c9

Please sign in to comment.