Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a CLI command to show SPDK ANA states #1018

Merged
merged 1 commit into from
Jan 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 83 additions & 0 deletions control/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -511,10 +511,88 @@ def gw_set_log_level(self, args):
else:
assert False

def gw_listener_info(self, args):
"""Show gateway's listeners info"""

out_func, err_func = self.get_output_functions(args)
listeners_info = None
try:
list_req = pb2.show_gateway_listeners_info_req(subsystem_nqn=args.subsystem)
listeners_info = self.stub.show_gateway_listeners_info(list_req)
except Exception as ex:
listeners_info = pb2.gateway_listeners_info(status=errno.EINVAL,
error_message=f"Failure listing gateway "
f"listeners info:\n{ex}",
gw_listeners=[])

if args.format == "text" or args.format == "plain":
if listeners_info.status == 0:
listeners_list = []
for lstnr in listeners_info.gw_listeners:
ana_states = ""
for ana in lstnr.lb_states:
if not args.verbose and ana.state != pb2.ana_state.OPTIMIZED:
continue
state_str = GatewayEnumUtils.get_key_from_value(pb2.ana_state, ana.state)
if state_str is None:
ana_states += str(ana.grp_id) + ": " + str(ana.state) + "\n"
else:
ana_states += str(ana.grp_id) + ": " + state_str.title() + "\n"
adrfam = GatewayEnumUtils.get_key_from_value(pb2.AddressFamily,
lstnr.listener.adrfam)
adrfam = self.format_adrfam(adrfam)
secure = "Yes" if lstnr.listener.secure else "No"
ana_states = ana_states.removesuffix("\n")
listeners_list.append([lstnr.listener.host_name,
lstnr.listener.trtype,
adrfam,
f"{lstnr.listener.traddr}:{lstnr.listener.trsvcid}",
secure,
ana_states])
if len(listeners_list) > 0:
if args.format == "text":
table_format = "fancy_grid"
else:
table_format = "plain"
listeners_out = tabulate(listeners_list,
headers=["Host",
"Transport",
"Address Family",
"Address",
"Secure",
"Load Balancing Group ID/State"],
tablefmt=table_format)
out_func(f"Gateway listeners for {args.subsystem}:\n{listeners_out}")
else:
out_func(f"No gateway listeners for {args.subsystem}")
else:
err_func(f"{listeners_info.error_message}")
elif args.format == "json" or args.format == "yaml":
ret_str = json_format.MessageToJson(listeners_info, indent=4,
including_default_value_fields=True,
preserving_proto_field_name=True)
if args.format == "json":
out_func(ret_str)
elif args.format == "yaml":
obj = json.loads(ret_str)
out_func(yaml.dump(obj))
elif args.format == "python":
return listeners_info
else:
assert False

return listeners_info.status

gw_set_log_level_args = [
argument("--level", "-l", help="Gateway log level", required=True,
type=str, choices=get_enum_keys_list(pb2.GwLogLevel, False)),
]
gw_listener_info_args = [
argument("--subsystem",
"-n",
help="Subsystem NQN",
required=True),
]
gw_actions = []
gw_actions.append({"name": "version",
"args": [],
Expand All @@ -528,6 +606,9 @@ def gw_set_log_level(self, args):
gw_actions.append({"name": "set_log_level",
"args": gw_set_log_level_args,
"help": "Set gateway's log level"})
gw_actions.append({"name": "listener_info",
"args": gw_listener_info_args,
"help": "Show listeners information for the gateway"})
gw_choices = get_actions(gw_actions)

@cli.cmd(gw_actions)
Expand All @@ -542,6 +623,8 @@ def gw(self, args):
return self.gw_get_log_level(args)
elif args.action == "set_log_level":
return self.gw_set_log_level(args)
elif args.action == "listener_info":
return self.gw_listener_info(args)
if not args.action:
self.cli.parser.error(f"missing action for gw command (choose from "
f"{GatewayClient.gw_choices})")
Expand Down
114 changes: 109 additions & 5 deletions control/grpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -565,6 +565,17 @@ def __init__(self, config: GatewayConfig, gateway_state: GatewayStateHandler,
self.ana_grp_ns_load = {}
self.ana_grp_subs_load = defaultdict(dict)
self.max_ana_grps = self.config.getint_with_default("gateway", "max_gws_in_grp", 16)
if self.max_ana_grps > self.max_namespaces:
self.logger.warning(f"Maximal number of load balancing groups can't be greather "
f"than the maximal number of namespaces, will truncate "
f"to {self.max_namespaces}")
self.max_ana_grps = self.max_namespaces

if self.max_namespaces_per_subsystem > self.max_namespaces:
self.logger.warning(f"Maximal number of namespace per subsystem can't be greater "
f"than the global maximal number of namespaces, will truncate "
f"to {self.max_namespaces}")
self.max_namespaces_per_subsystem = self.max_namespaces

for i in range(self.max_ana_grps + 1):
self.ana_grp_ns_load[i] = 0
Expand Down Expand Up @@ -1500,8 +1511,8 @@ def create_namespace(self, subsystem_nqn, bdev_name, nsid, anagrpid, uuid,
add_namespace_error_prefix = f"Failure adding namespace{nsid_msg} to {subsystem_nqn}"

peer_msg = self.get_peer_message(context)
self.logger.info(f"Received request to add {bdev_name} to {subsystem_nqn} with ANA group "
f"id {anagrpid}{nsid_msg}, auto_visible: {auto_visible}, "
self.logger.info(f"Received request to add {bdev_name} to {subsystem_nqn} with load "
f"balancing group id {anagrpid}{nsid_msg}, auto_visible: {auto_visible}, "
f"context: {context}{peer_msg}")

if subsystem_nqn not in self.subsys_max_ns:
Expand Down Expand Up @@ -1746,7 +1757,7 @@ def namespace_add_safe(self, request, context):
self.gateway_group)
else:
anagrp = self.choose_anagrpid_for_namespace(request.nsid)
assert anagrp != 0, "Chosen ANA group is 0"
assert anagrp != 0, "Chosen load balancing group is 0"

if request.nsid:
ns = self.subsystem_nsid_bdev_and_uuid.find_namespace(request.subsystem_nqn,
Expand Down Expand Up @@ -1788,7 +1799,7 @@ def namespace_add_safe(self, request, context):
# If an explicit load balancing group was passed, make sure it exists
if request.anagrpid != 0:
if request.anagrpid not in grps_list:
self.logger.debug(f"ANA groups: {grps_list}")
self.logger.debug(f"Load balancing groups: {grps_list}")
errmsg = f"Failure adding namespace {nsid_msg}to " \
f"{request.subsystem_nqn}: Load balancing group " \
f"{request.anagrpid} doesn't exist"
Expand Down Expand Up @@ -1897,7 +1908,7 @@ def namespace_change_load_balancing_group_safe(self, request, context):
grps_list = self.ceph_utils.get_number_created_gateways(
self.gateway_pool, self.gateway_group)
if request.anagrpid not in grps_list:
self.logger.debug(f"ANA groups: {grps_list}")
self.logger.debug(f"Load balancing groups: {grps_list}")
errmsg = f"{change_lb_group_failure_prefix}: Load balancing group " \
f"{request.anagrpid} doesn't exist"
self.logger.error(errmsg)
Expand Down Expand Up @@ -4168,6 +4179,99 @@ def list_listeners_safe(self, request, context):
def list_listeners(self, request, context=None):
return self.execute_grpc_function(self.list_listeners_safe, request, context)

def show_gateway_listeners_info_safe(self, request, context):
"""Show gateway's listeners info."""

peer_msg = self.get_peer_message(context)
self.logger.info(f"Received request to show gateway listeners info for "
f"{request.subsystem_nqn}, context: {context}{peer_msg}")

if self.ana_grp_state[0] != pb2.ana_state.INACCESSIBLE:
errmsg = "Internal error, we shouldn't have a real state for load balancing group 0"
self.logger.error(errmsg)
return pb2.gateway_listeners_info(status=errno.EINVAL,
error_message=errmsg,
gw_listeners=[])

try:
ret = rpc_nvmf.nvmf_subsystem_get_listeners(self.spdk_rpc_client,
nqn=request.subsystem_nqn)
self.logger.debug(f"get_listeners: {ret}")
except Exception as ex:
errmsg = "Failure listing gateway listeners"
self.logger.exception(errmsg)
errmsg = f"{errmsg}:\n{ex}"
resp = self.parse_json_exeption(ex)
status = errno.ENODEV
if resp:
status = resp["code"]
errmsg = f"Failure listing gateway listeners: {resp['message']}"
return pb2.gateway_listeners_info(status=status,
error_message=errmsg,
gw_listeners=[])

gw_listeners = []
for lstnr in ret:
try:
secure = False
if request.subsystem_nqn in self.subsystem_listeners:
local_lstnr = (lstnr["address"]["adrfam"].lower(),
lstnr["address"]["traddr"],
int(lstnr["address"]["trsvcid"]),
True)
if local_lstnr in self.subsystem_listeners[request.subsystem_nqn]:
secure = True
lstnr_part = pb2.listener_info(host_name=self.host_name,
trtype=lstnr["address"]["trtype"].upper(),
adrfam=lstnr["address"]["adrfam"].lower(),
traddr=lstnr["address"]["traddr"],
trsvcid=int(lstnr["address"]["trsvcid"]),
secure=secure)
except Exception:
self.logger.exception(f"Error getting address from {lstnr}")
continue

ana_states = []
try:
for ana_state in lstnr["ana_states"]:
spdk_group = ana_state["ana_group"]
if spdk_group > self.max_ana_grps:
continue
spdk_state = ana_state["ana_state"]
spdk_state_enum_val = GatewayEnumUtils.get_value_from_key(pb2.ana_state,
spdk_state.upper())
if spdk_state_enum_val is None:
self.logger.error(f"Unknown state \"{spdk_state}\" for "
f"load balancing group {spdk_group} in SPDK")
continue

ana_states.append(pb2.ana_group_state(grp_id=spdk_group,
state=spdk_state_enum_val))
if spdk_group in self.ana_grp_state:
if self.ana_grp_state[spdk_group] != spdk_state_enum_val:
gw_state_str = GatewayEnumUtils.get_key_from_value(
pb2.ana_state, self.ana_grp_state[spdk_group])
if gw_state_str is None:
self.logger.error(f'State for load balancing group {spdk_group} '
f'is "{self.ana_grp_state[spdk_group]}" '
f'but is {spdk_state_enum_val} in SPDK')
else:
self.logger.error(f'State for load balancing group {spdk_group} '
f'is "{gw_state_str}" '
f'but is "{spdk_state}" in SPDK')
except Exception:
self.logger.exception(f"Error parsing load balancing state {ana_state}")
continue

gw_lstnr = pb2.gateway_listener_info(listener=lstnr_part, lb_states=ana_states)
gw_listeners.append(gw_lstnr)

return pb2.gateway_listeners_info(status=0, error_message=os.strerror(0),
gw_listeners=gw_listeners)

def show_gateway_listeners_info(self, request, context=None):
return self.execute_grpc_function(self.show_gateway_listeners_info_safe, request, context)

def list_subsystems_safe(self, request, context):
"""List subsystems."""

Expand Down
18 changes: 18 additions & 0 deletions control/proto/gateway.proto
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,9 @@ service Gateway {

// Set gateway log level
rpc set_gateway_log_level(set_gateway_log_level_req) returns(req_status) {}

// Show gateway listeners info
rpc show_gateway_listeners_info(show_gateway_listeners_info_req) returns(gateway_listeners_info) {}
}

// Request messages
Expand Down Expand Up @@ -299,6 +302,10 @@ message set_gateway_log_level_req {
GwLogLevel log_level = 1;
}

message show_gateway_listeners_info_req {
string subsystem_nqn = 1;
}

// From https://nvmexpress.org/wp-content/uploads/NVM-Express-1_4-2019.06.10-Ratified.pdf page 138
// Asymmetric Namespace Access state for all namespaces in this ANA
// Group when accessed through this controller.
Expand Down Expand Up @@ -455,6 +462,17 @@ message listeners_info {
repeated listener_info listeners = 3;
}

message gateway_listener_info {
listener_info listener = 1;
repeated ana_group_state lb_states = 2;
}

message gateway_listeners_info {
int32 status = 1;
string error_message = 2;
repeated gateway_listener_info gw_listeners = 3;
}

message host {
string nqn = 1;
optional bool use_psk = 2;
Expand Down
Loading
Loading