From ddc55789d716e6b7253105a37ded176ac6917513 Mon Sep 17 00:00:00 2001 From: Stephen Bates Date: Fri, 22 Nov 2024 13:49:35 -0700 Subject: [PATCH] ocp-nvme: Add a different formatting for JSON output The current OCP JSON format for the SMART extended log page is not condusive to metric collection via tools like Prometheus. So we add a new output mode that uses all lower case and underscores (instead of spaces). This should help with metric collection. At the same time we clean up some of the field names. We add a new argument (--output-format-version) to allow us to select which output version we want. Documentation updated to reflect this change and mark this as experimental. Fixes #2577. Signed-off-by: Stephen Bates --- Documentation/nvme-ocp-smart-add-log.txt | 10 +- plugins/ocp/ocp-print-json.c | 125 ++++++++++++++++++++++- plugins/ocp/ocp-print-stdout.c | 2 +- plugins/ocp/ocp-print.c | 4 +- plugins/ocp/ocp-print.h | 4 +- plugins/ocp/ocp-smart-extended-log.c | 11 +- 6 files changed, 146 insertions(+), 10 deletions(-) diff --git a/Documentation/nvme-ocp-smart-add-log.txt b/Documentation/nvme-ocp-smart-add-log.txt index 66a55a90ec..1b9b746e2d 100644 --- a/Documentation/nvme-ocp-smart-add-log.txt +++ b/Documentation/nvme-ocp-smart-add-log.txt @@ -9,7 +9,7 @@ compliant device SYNOPSIS -------- [verse] -'nvme ocp smart-add-log' [--output-format= | -o ] +'nvme ocp smart-add-log' [--output-format= | -o ] [--output-format-version=] DESCRIPTION ----------- @@ -22,6 +22,10 @@ device (ex: /dev/nvme0) or block device (ex: /dev/nvme0n1). This will only work on OCP compliant devices supporting this feature. Results for any other device are undefined. +EXPERIMENTAL. The --output-format-version can be set to 2 to generate field names +for the outputs that are easier to process via scripts. Note this is +experimental and the field names are subject to change. + On success it returns 0, error code otherwise. OPTIONS @@ -31,6 +35,10 @@ OPTIONS Set the reporting format to 'normal' or 'json'. Only one output format can be used at a time. The default is normal. +--output-format-version=:: + Set the field labels in the reporting format to either '1' + (the original) or '2'. The default is 1. + EXAMPLES -------- * Has the program issue a smart-add-log command to retrieve the 0xC0 log page. diff --git a/plugins/ocp/ocp-print-json.c b/plugins/ocp/ocp-print-json.c index e62dfb3ab8..acb3f89610 100644 --- a/plugins/ocp/ocp-print-json.c +++ b/plugins/ocp/ocp-print-json.c @@ -136,7 +136,7 @@ static void json_fw_activation_history(const struct fw_activation_history *fw_hi printf("\n"); } -static void json_smart_extended_log(void *data) +static void json_smart_extended_log_v1(void *data) { struct json_object *root; struct json_object *pmuw; @@ -248,6 +248,129 @@ static void json_smart_extended_log(void *data) json_free_object(root); } +static void json_smart_extended_log_v2(void *data) +{ + struct json_object *root; + struct json_object *pmuw; + struct json_object *pmur; + uint16_t smart_log_ver = 0; + __u8 *log_data = data; + char guid[40]; + + root = json_create_object(); + pmuw = json_create_object(); + pmur = json_create_object(); + + json_object_add_value_uint64(pmuw, "hi", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_PMUW + 8] & 0xFFFFFFFFFFFFFFFF)); + json_object_add_value_uint64(pmuw, "lo", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_PMUW] & 0xFFFFFFFFFFFFFFFF)); + json_object_add_value_object(root, "physical_media_units_written", pmuw); + json_object_add_value_uint64(pmur, "hi", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_PMUR + 8] & 0xFFFFFFFFFFFFFFFF)); + json_object_add_value_uint64(pmur, "lo", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_PMUR] & 0xFFFFFFFFFFFFFFFF)); + json_object_add_value_object(root, "physical_media_units_read", pmur); + json_object_add_value_uint64(root, "bad_user_nand_blocks_raw", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_BUNBR] & 0x0000FFFFFFFFFFFF)); + json_object_add_value_uint(root, "bad_user_nand_blocks_normalized", + (uint16_t)le16_to_cpu(*(uint16_t *)&log_data[SCAO_BUNBN])); + json_object_add_value_uint64(root, "bad_system_nand_blocks_raw", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_BSNBR] & 0x0000FFFFFFFFFFFF)); + json_object_add_value_uint(root, "bad_system_nand_blocks_normalized", + (uint16_t)le16_to_cpu(*(uint16_t *)&log_data[SCAO_BSNBN])); + json_object_add_value_uint64(root, "xor_recovery_count", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_XRC])); + json_object_add_value_uint64(root, "uncorrectable_read_errors", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_UREC])); + json_object_add_value_uint64(root, "soft_ecc_error_count", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_SEEC])); + json_object_add_value_uint(root, "end_to_end_detected_errors", + (uint32_t)le32_to_cpu(*(uint32_t *)&log_data[SCAO_EEDC])); + json_object_add_value_uint(root, "end_to_end_corrected_errors", + (uint32_t)le32_to_cpu(*(uint32_t *)&log_data[SCAO_EECE])); + json_object_add_value_uint(root, "system_data_percent_used", + (__u8)log_data[SCAO_SDPU]); + json_object_add_value_uint64(root, "refresh_count", + (uint64_t)(le64_to_cpu(*(uint64_t *)&log_data[SCAO_RFSC]) & 0x00FFFFFFFFFFFFFF)); + json_object_add_value_uint(root, "max_user_data_erase_count", + (uint32_t)le32_to_cpu(*(uint32_t *)&log_data[SCAO_MXUDEC])); + json_object_add_value_uint(root, "min_user_data_erase_count", + (uint32_t)le32_to_cpu(*(uint32_t *)&log_data[SCAO_MNUDEC])); + json_object_add_value_uint(root, "thermal_throttling_events", + (__u8)log_data[SCAO_NTTE]); + json_object_add_value_uint(root, "current_throttling_status", + (__u8)log_data[SCAO_CTS]); + json_object_add_value_uint64(root, "pcie_correctable_errors", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_PCEC])); + json_object_add_value_uint(root, "incomplete_shutdowns", + (uint32_t)le32_to_cpu(*(uint32_t *)&log_data[SCAO_ICS])); + json_object_add_value_uint(root, "percent_free_blocks", + (__u8)log_data[SCAO_PFB]); + json_object_add_value_uint(root, "capacitor_health", + (uint16_t)le16_to_cpu(*(uint16_t *)&log_data[SCAO_CPH])); + json_object_add_value_uint64(root, "unaligned_io", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_UIO])); + json_object_add_value_uint64(root, "security_version_number", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_SVN])); + json_object_add_value_uint64(root, "nuse_namespace_utilization", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_NUSE])); + json_object_add_value_uint128(root, "plp_start_count", + le128_to_cpu(&log_data[SCAO_PSC])); + json_object_add_value_uint128(root, "endurance_estimate", + le128_to_cpu(&log_data[SCAO_EEST])); + smart_log_ver = (uint16_t)le16_to_cpu(*(uint16_t *)&log_data[SCAO_LPV]); + + json_object_add_value_uint(root, "log_page_version", smart_log_ver); + + memset((void *)guid, 0, 40); + sprintf((char *)guid, "0x%"PRIx64"%"PRIx64"", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_LPG + 8]), + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_LPG])); + json_object_add_value_string(root, "log_page_guid", guid); + + switch (smart_log_ver) { + case 0 ... 1: + break; + default: + case 4: + json_object_add_value_uint(root, "nvme_command_set_errata_version", + (__u8)log_data[SCAO_NCSEV]); + json_object_add_value_uint(root, "lowest_permitted_firmware_revision", + le64_to_cpu(*(uint64_t *)&log_data[SCAO_PSCC])); + fallthrough; + case 2 ... 3: + json_object_add_value_uint(root, "errata_version_field", + (__u8)log_data[SCAO_EVF]); + json_object_add_value_uint(root, "point_version_field", + le16_to_cpu(*(uint16_t *)&log_data[SCAO_PVF])); + json_object_add_value_uint(root, "minor_version_field", + le16_to_cpu(*(uint16_t *)&log_data[SCAO_MIVF])); + json_object_add_value_uint(root, "major_version_field", + (__u8)log_data[SCAO_MAVF]); + json_object_add_value_uint(root, "nvme_base_errata_version", + (__u8)log_data[SCAO_NBEV]); + json_object_add_value_uint(root, "pcie_link_retraining_count", + le64_to_cpu(*(uint64_t *)&log_data[SCAO_PLRC])); + json_object_add_value_uint(root, "power_state_change_count", + le64_to_cpu(*(uint64_t *)&log_data[SCAO_PSCC])); + } + json_print_object(root, NULL); + printf("\n"); + json_free_object(root); +} + +static void json_smart_extended_log(void *data, unsigned version) +{ + switch (version) { + case 1: + default: + json_smart_extended_log_v1(data); + break; + case 2: + json_smart_extended_log_v2(data); + } +} static void json_telemetry_log(struct ocp_telemetry_parse_options *options) { print_ocp_telemetry_json(options); diff --git a/plugins/ocp/ocp-print-stdout.c b/plugins/ocp/ocp-print-stdout.c index f0bd78f790..e66a1af4ff 100644 --- a/plugins/ocp/ocp-print-stdout.c +++ b/plugins/ocp/ocp-print-stdout.c @@ -98,7 +98,7 @@ static void stdout_fw_activation_history(const struct fw_activation_history *fw_ printf("\n"); } -static void stdout_smart_extended_log(void *data) +static void stdout_smart_extended_log(void *data, unsigned ignored) { uint16_t smart_log_ver = 0; __u8 *log_data = data; diff --git a/plugins/ocp/ocp-print.c b/plugins/ocp/ocp-print.c index 916c653d4a..7004f42675 100644 --- a/plugins/ocp/ocp-print.c +++ b/plugins/ocp/ocp-print.c @@ -36,9 +36,9 @@ void ocp_fw_act_history(const struct fw_activation_history *fw_history, nvme_pri ocp_print(fw_act_history, flags, fw_history); } -void ocp_smart_extended_log(void *data, nvme_print_flags_t flags) +void ocp_smart_extended_log(void *data, unsigned version, nvme_print_flags_t flags) { - ocp_print(smart_extended_log, flags, data); + ocp_print(smart_extended_log, flags, data, version); } void ocp_show_telemetry_log(struct ocp_telemetry_parse_options *options, nvme_print_flags_t flags) diff --git a/plugins/ocp/ocp-print.h b/plugins/ocp/ocp-print.h index 85655b9340..48ea9b4e8c 100644 --- a/plugins/ocp/ocp-print.h +++ b/plugins/ocp/ocp-print.h @@ -10,7 +10,7 @@ struct ocp_print_ops { void (*hwcomp_log)(struct hwcomp_log *log, __u32 id, bool list); void (*fw_act_history)(const struct fw_activation_history *fw_history); - void (*smart_extended_log)(void *data); + void (*smart_extended_log)(void *data, unsigned version); void (*telemetry_log)(struct ocp_telemetry_parse_options *options); void (*c3_log)(struct nvme_dev *dev, struct ssd_latency_monitor_log *log_data); void (*c5_log)(struct nvme_dev *dev, struct unsupported_requirement_log *log_data); @@ -36,7 +36,7 @@ static inline struct ocp_print_ops *ocp_get_json_print_ops(nvme_print_flags_t fl void ocp_show_hwcomp_log(struct hwcomp_log *log, __u32 id, bool list, nvme_print_flags_t flags); void ocp_fw_act_history(const struct fw_activation_history *fw_history, nvme_print_flags_t flags); -void ocp_smart_extended_log(void *data, nvme_print_flags_t flags); +void ocp_smart_extended_log(void *data, unsigned version, nvme_print_flags_t flags); void ocp_show_telemetry_log(struct ocp_telemetry_parse_options *options, nvme_print_flags_t flags); void ocp_c3_log(struct nvme_dev *dev, struct ssd_latency_monitor_log *log_data, nvme_print_flags_t flags); diff --git a/plugins/ocp/ocp-smart-extended-log.c b/plugins/ocp/ocp-smart-extended-log.c index 42e77771df..7ea7c97968 100644 --- a/plugins/ocp/ocp-smart-extended-log.c +++ b/plugins/ocp/ocp-smart-extended-log.c @@ -27,7 +27,8 @@ static __u8 scao_guid[GUID_LEN] = { 0xC9, 0x14, 0xD5, 0xAF }; -static int get_c0_log_page(struct nvme_dev *dev, char *format) +static int get_c0_log_page(struct nvme_dev *dev, char *format, + unsigned format_version) { nvme_print_flags_t fmt; __u8 *data; @@ -86,7 +87,7 @@ static int get_c0_log_page(struct nvme_dev *dev, char *format) } /* print the data */ - ocp_smart_extended_log(data, fmt); + ocp_smart_extended_log(data, format_version, fmt); } else { fprintf(stderr, "ERROR : OCP : Unable to read C0 data from buffer\n"); } @@ -105,14 +106,17 @@ int ocp_smart_add_log(int argc, char **argv, struct command *cmd, struct config { char *output_format; + unsigned int output_format_version; }; struct config cfg = { .output_format = "normal", + .output_format_version = 1, }; OPT_ARGS(opts) = { OPT_FMT("output-format", 'o', &cfg.output_format, "output Format: normal|json"), + OPT_UINT("output-format-version", 0, &cfg.output_format_version, "output Format version: 1|2"), OPT_END() }; @@ -120,7 +124,8 @@ int ocp_smart_add_log(int argc, char **argv, struct command *cmd, if (ret) return ret; - ret = get_c0_log_page(dev, cfg.output_format); + ret = get_c0_log_page(dev, cfg.output_format, + cfg.output_format_version); if (ret) fprintf(stderr, "ERROR : OCP : Failure reading the C0 Log Page, ret = %d\n", ret);