Skip to content

Commit

Permalink
ocp-nvme: Update ocp v2.6 Smart Information Extended
Browse files Browse the repository at this point in the history
Log Identifier C0h.
Spec Documents:
https://www.opencompute.org/documents/datacenter-nvme-ssd-specification-v2-6-2-pdf

Signed-off-by: Minsik Jeon <[email protected]>
Co-authored-by: Steven Seungcheol Lee <[email protected]>
  • Loading branch information
hmi-jeon and sc108-lee committed Jan 8, 2025
1 parent 60e9fb5 commit 4135424
Show file tree
Hide file tree
Showing 3 changed files with 231 additions and 45 deletions.
92 changes: 92 additions & 0 deletions plugins/ocp/ocp-print-json.c
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,10 @@ static void json_smart_extended_log_v1(struct ocp_smart_extended_log *log)
struct json_object *pmur;
uint16_t smart_log_ver = 0;
uint16_t dssd_version = 0;
int i = 0;
char guid[40];
char ascii_arr[65];
char *ascii = ascii_arr;

root = json_create_object();
pmuw = json_create_object();
Expand Down Expand Up @@ -228,6 +231,49 @@ static void json_smart_extended_log_v1(struct ocp_smart_extended_log *log)
log->nvme_cmdset_errata_version);
json_object_add_value_uint(root, "Lowest Permitted Firmware Revision",
le64_to_cpu(log->lowest_permitted_fw_rev));
json_object_add_value_uint(root, "NVMe Over Pcie Errata Version",
log->nvme_over_pcie_errate_version);
json_object_add_value_uint(root, "NVMe Mi Errata Version",
log->nvme_mi_errata_version);
json_object_add_value_uint(root, "Total media dies",
le16_to_cpu(log->total_media_dies));
json_object_add_value_uint(root, "Total die failure tolerance",
le16_to_cpu(log->total_die_failure_tolerance));
json_object_add_value_uint(root, "Max temperature recorded",
le16_to_cpu(log->max_temperature_recorded));
json_object_add_value_uint64(root, "Nand avg erase count",
le64_to_cpu(log->nand_avg_erase_count));
json_object_add_value_uint(root, "Command timeouts",
le32_to_cpu(log->command_timeouts));
json_object_add_value_uint(root, "Sys area program fail count raw",
le32_to_cpu(log->sys_area_program_fail_count_raw));
json_object_add_value_uint(root, "Sys area program fail count noralized",
log->sys_area_program_fail_count_normalized);
json_object_add_value_uint(root, "Sys area uncorrectable read count raw",
le32_to_cpu(log->sys_area_uncorr_read_count_raw));
json_object_add_value_uint(root, "Sys area uncorrectable read count noralized",
log->sys_area_uncorr_read_count_normalized);
json_object_add_value_uint(root, "Sys area erase fail count raw",
le32_to_cpu(log->sys_area_erase_fail_count_raw));
json_object_add_value_uint(root, "Sys area erase fail count noralized",
log->sys_area_erase_fail_count_normalized);
json_object_add_value_uint(root, "Max peak power capability",
le16_to_cpu(log->max_peak_power_capability));
json_object_add_value_uint(root, "Current max avg power",
le16_to_cpu(log->current_max_avg_power));
json_object_add_value_uint64(root, "Lifetime power consumed",
int48_to_long(log->lifetime_power_consumed));
memset((void *)ascii, 0, 65);
for (i = 0; i < 8; i++)
ascii += sprintf(ascii, "%c", log->dssd_firmware_revision[i]);
json_object_add_value_string(root, "Dssd firmware revision", ascii_arr);
json_object_add_value_string(root, "Dssd firmware build UUID",
util_uuid_to_string(log->dssd_firmware_build_uuid));
ascii = ascii_arr;
memset((void *)ascii, 0, 65);
for (i = 0; i < 64; i++)
ascii += sprintf(ascii, "%c", log->dssd_firmware_build_label[i]);
json_object_add_value_string(root, "Dssd firmware build label", ascii_arr);
fallthrough;
case 2 ... 3:
json_object_add_value_uint(root, "Errata Version Field",
Expand Down Expand Up @@ -257,9 +303,12 @@ static void json_smart_extended_log_v2(struct ocp_smart_extended_log *log)
struct json_object *root;
struct json_object *pmuw;
struct json_object *pmur;
int i = 0;
uint16_t smart_log_ver = 0;
uint16_t dssd_version = 0;
char guid[40];
char ascii_arr[65];
char *ascii = ascii_arr;

root = json_create_object();
pmuw = json_create_object();
Expand Down Expand Up @@ -342,6 +391,49 @@ static void json_smart_extended_log_v2(struct ocp_smart_extended_log *log)
log->nvme_cmdset_errata_version);
json_object_add_value_uint(root, "lowest_permitted_firmware_revision",
le64_to_cpu(log->lowest_permitted_fw_rev));
json_object_add_value_uint(root, "nvme_over_pcie_errata_version",
log->nvme_over_pcie_errate_version);
json_object_add_value_uint(root, "nvme_mi_errata_version",
log->nvme_mi_errata_version);
json_object_add_value_uint(root, "total_media_dies",
le16_to_cpu(log->total_media_dies));
json_object_add_value_uint(root, "total_die_failure_tolerance",
le16_to_cpu(log->total_die_failure_tolerance));
json_object_add_value_uint(root, "max_temperature_recorded",
le16_to_cpu(log->max_temperature_recorded));
json_object_add_value_uint64(root, "nand_avg_erase_count",
le64_to_cpu(log->nand_avg_erase_count));
json_object_add_value_uint(root, "command_timeouts",
le32_to_cpu(log->command_timeouts));
json_object_add_value_uint(root, "sys_area_program_fail_count_raw",
le32_to_cpu(log->sys_area_program_fail_count_raw));
json_object_add_value_uint(root, "sys_area_program_fail_count_noralized",
log->sys_area_program_fail_count_normalized);
json_object_add_value_uint(root, "sys_area_uncorrectable_read_count_raw",
le32_to_cpu(log->sys_area_uncorr_read_count_raw));
json_object_add_value_uint(root, "sys_area_uncorrectable_read_count_noralized",
log->sys_area_uncorr_read_count_normalized);
json_object_add_value_uint(root, "sys_area_erase_fail_count_raw",
le32_to_cpu(log->sys_area_erase_fail_count_raw));
json_object_add_value_uint(root, "sys_area_erase_fail_count_noralized",
log->sys_area_erase_fail_count_normalized);
json_object_add_value_uint(root, "max_peak_power_capability",
le16_to_cpu(log->max_peak_power_capability));
json_object_add_value_uint(root, "current_max_avg_power",
le16_to_cpu(log->current_max_avg_power));
json_object_add_value_uint64(root, "lifetime_power_consumed",
int48_to_long(log->lifetime_power_consumed));
memset((void *)ascii, 0, 65);
for (i = 0; i < 8; i++)
ascii += sprintf(ascii, "%c", log->dssd_firmware_revision[i]);
json_object_add_value_string(root, "dssd_firmware_revision", ascii_arr);
json_object_add_value_string(root, "dssd_firmware_build_uuid",
util_uuid_to_string(log->dssd_firmware_build_uuid));
ascii = ascii_arr;
memset((void *)ascii, 0, 65);
for (i = 0; i < 64; i++)
ascii += sprintf(ascii, "%c", log->dssd_firmware_build_label[i]);
json_object_add_value_string(root, "dssd_firmware_build_label", ascii_arr);
fallthrough;
case 2 ... 3:
json_object_add_value_uint(root, "errata_version_field",
Expand Down
45 changes: 45 additions & 0 deletions plugins/ocp/ocp-print-stdout.c
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ static void stdout_smart_extended_log(struct ocp_smart_extended_log *log, unsign
{
uint16_t smart_log_ver = 0;
uint16_t dssd_version = 0;
int i = 0;

printf("SMART Cloud Attributes :-\n");

Expand Down Expand Up @@ -172,6 +173,50 @@ static void stdout_smart_extended_log(struct ocp_smart_extended_log *log, unsign
log->nvme_cmdset_errata_version);
printf(" Lowest Permitted Firmware Revision %"PRIu64"\n",
le64_to_cpu(log->lowest_permitted_fw_rev));
printf(" NVMe Over Pcie Errata Version %d\n",
log->nvme_over_pcie_errate_version);
printf(" NVMe Mi Errata Version %d\n",
log->nvme_mi_errata_version);
printf(" Total media dies %"PRIu16"\n",
le16_to_cpu(log->total_media_dies));
printf(" Total die failure tolerance %"PRIu16"\n",
le16_to_cpu(log->total_die_failure_tolerance));
printf(" Media dies offline %"PRIu16"\n",
le16_to_cpu(log->media_dies_offline));
printf(" Max temperature recorded %d\n",
le16_to_cpu(log->max_temperature_recorded));
printf(" Nand avg erase count %"PRIu64"\n",
le64_to_cpu(log->nand_avg_erase_count));
printf(" Command timeouts %"PRIu32"\n",
le32_to_cpu(log->command_timeouts));
printf(" Sys area program fail count raw %"PRIu32"\n",
le32_to_cpu(log->sys_area_program_fail_count_raw));
printf(" Sys area program fail count noralized %d\n",
le32_to_cpu(log->sys_area_program_fail_count_normalized));
printf(" Sys area uncorrectable read count raw %"PRIu32"\n",
le32_to_cpu(log->sys_area_uncorr_read_count_raw));
printf(" Sys area uncorrectable read count noralized %d\n",
le32_to_cpu(log->sys_area_uncorr_read_count_normalized));
printf(" Sys area erase fail count raw %"PRIu32"\n",
le32_to_cpu(log->sys_area_erase_fail_count_raw));
printf(" Sys area erase fail count noralized %d\n",
le32_to_cpu(log->sys_area_erase_fail_count_normalized));
printf(" Max peak power capability %"PRIu16"\n",
le16_to_cpu(log->max_peak_power_capability));
printf(" Current max avg power %"PRIu16"\n",
le16_to_cpu(log->current_max_avg_power));
printf(" Lifetime power consumed %"PRIu64"\n",
int48_to_long(log->lifetime_power_consumed));
printf(" Dssd firmware revision ");
for (i = 0; i < sizeof(log->dssd_firmware_revision); i++)
printf("%c", log->dssd_firmware_revision[i]);
printf("\n");
printf(" Dssd firmware build UUID %s\n",
util_uuid_to_string(log->dssd_firmware_build_uuid));
printf(" Dssd firmware build label ");
for (i = 0; i < sizeof(log->dssd_firmware_build_label); i++)
printf("%c", log->dssd_firmware_build_label[i]);
printf("\n");
fallthrough;
case 2 ... 3:
printf(" Errata Version Field %d\n",
Expand Down
139 changes: 94 additions & 45 deletions plugins/ocp/ocp-smart-extended-log.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,47 +16,72 @@ struct command;
struct plugin;

/**
* struct ocp_smart_extended_log - SMART / Health Information Extended
* @physical_media_units_written: Physical Media Units Written
* @physical_media_units_read: Physical Media Units Read
* @bad_user_nand_blocks_raw: Bad User NAND Blocks raw
* @bad_user_nand_blocks_normalized: Bad User NAND Blocks normalized
* @bad_system_nand_blocks_raw: Bad System NAND Blocks raw
* @bad_system_nand_blocks_normalized: Bad System NAND Blocks normalized
* @xor_recovery_count: XOR Recovery Count
* @uncorrectable_read_err_count: Uncorrectable Read Error Count
* @soft_ecc_err_count: Soft ECC Error Count
* @end_to_end_detected_err: End to End detected errors
* @end_to_end_corrected_err: End to End corrected errors
* @system_data_used_percent: System data percent used
* @refresh_counts: Refresh Counts
* @user_data_erase_count_max: Max User data erase counts
* @user_data_erase_count_min: Min User data erase counts
* @thermal_throttling_event_count: Number of Thermal throttling events
* @dssd_errata_version: DSSD Errata Version
* @dssd_point_version: DSSD Point Version
* @dssd_minor_version: DSSD Minor Version
* @dssd_major_version: DSSD Major Version
* @pcie_correctable_err_count: PCIe Correctable Error Count
* @incomplete_shoutdowns: Incomplete Shutdowns
* @rsvd116: Reserved
* @percent_free_blocks: Percent free blocks
* @rsvd121: Reserved
* @capacitor_health: Capacitor health
* @nvme_base_errata_version: NVM Express Base Errata Version
* @nvme_cmdset_errata_version: NVMe Command Set Errata Version
* @rsvd132: Reserved
* @unaligned_io: Unaligned I/O
* @security_version: Security Version Number
* @total_nuse: Total NUSE - Namespace utilization
* @plp_start_count: PLP start count
* @endurance_estimate: Endurance Estimate
* @pcie_link_retaining_count: PCIe Link Retraining Count
* @power_state_change_count: Power State Change Count
* @lowest_permitted_fw_rev: Lowest Permitted Firmware Revision -------------
* @rsvd216: Reserved
* @log_page_version: Log page version
* @log_page_guid: Log page GUID
* struct ocp_smart_extended_log - SMART / Health Information Extended
* @physical_media_units_written: Physical Media Units Written
* @physical_media_units_read: Physical Media Units Read
* @bad_user_nand_blocks_raw: Bad User NAND Blocks raw
* @bad_user_nand_blocks_normalized: Bad User NAND Blocks normalized
* @bad_system_nand_blocks_raw: Bad System NAND Blocks raw
* @bad_system_nand_blocks_normalized: Bad System NAND Blocks normalized
* @xor_recovery_count: XOR Recovery Count
* @uncorrectable_read_err_count: Uncorrectable Read Error Count
* @soft_ecc_err_count: Soft ECC Error Count
* @end_to_end_detected_err: End to End detected errors
* @end_to_end_corrected_err: End to End corrected errors
* @system_data_used_percent: System data percent used
* @refresh_counts: Refresh Counts
* @user_data_erase_count_max: Max User data erase counts
* @user_data_erase_count_min: Min User data erase counts
* @thermal_throttling_event_count: Number of Thermal throttling events
* @dssd_errata_version: DSSD Errata Version
* @dssd_point_version: DSSD Point Version
* @dssd_minor_version: DSSD Minor Version
* @dssd_major_version: DSSD Major Version
* @pcie_correctable_err_count: PCIe Correctable Error Count
* @incomplete_shoutdowns: Incomplete Shutdowns
* @rsvd116: Reserved
* @percent_free_blocks: Percent free blocks
* @rsvd121: Reserved
* @capacitor_health: Capacitor health
* @nvme_base_errata_version: NVM Express Base Errata Version
* @nvme_cmdset_errata_version: NVMe Command Set Errata Version
* @rsvd132: Reserved
* @nvme_over_pcie_errate_version: NVMe Over Pcie Errata Version
* @nvme_mi_errata_version: NVMe MI Errata Version
* @unaligned_io: Unaligned I/O
* @security_version: Security Version Number
* @total_nuse: Total NUSE - Namespace utilization
* @plp_start_count: PLP start count
* @endurance_estimate: Endurance Estimate
* @pcie_link_retaining_count: PCIe Link Retraining Count
* @power_state_change_count: Power State Change Count
* @lowest_permitted_fw_rev: Lowest Permitted Firmware Revision -------------
* @rsvd216: Reserved
* @total_media_dies: Total media dies
* @total_die_failure_tolerance: Total die failure tolerance
* @media_dies_offline: Media dies offline
* @max_temperature_recorded: Max temperature recorded
* @rsvd223: Reserved
* @nand_avg_erase_count: Nand avg erase count
* @command_timeouts: Command timeouts
* @sys_area_program_fail_count_raw: Sys area program fail count raw
* @sys_area_program_fail_count_normalized: Sys area program fail count noralized
* @revd241: Reserved
* @sys_area_uncorr_read_count_raw: Sys area uncorrectable read count raw
* @sys_area_uncorr_read_count_normalized: Sys area uncorrectable read count noralized
* @revd249: Reserved
* @sys_area_erase_fail_count_raw: Sys area erase fail count raw
* @sys_area_erase_fail_count_normalized: Sys area erase fail count noralized
* @revd257: Reserved
* @max_peak_power_capability: Max peak power capability
* @current_max_avg_power: Current max avg power
* @lifetime_power_consumed: Lifetime power consumed
* @dssd_firmware_revision: Dssd firmware revision
* @dssd_firmware_build_uuid: Dssd firmware build UUID
* @dssd_firmware_build_label: Dssd firmware build label
* @revd358: Reserved
* @log_page_version: Log page version
* @log_page_guid: Log page GUID
*/
struct ocp_smart_extended_log {
__u8 physical_media_units_written[16]; /* [15:0] */
Expand Down Expand Up @@ -88,7 +113,9 @@ struct ocp_smart_extended_log {
__le16 capacitor_health; /* [129:128] */
__u8 nvme_base_errata_version; /* [130] */
__u8 nvme_cmdset_errata_version; /* [131] */
__u8 rsvd132[4]; /* [135:132] */
__u8 nvme_over_pcie_errate_version; /* [132] */
__u8 nvme_mi_errata_version; /* [133] */
__u8 rsvd134[2]; /* [135:134] */
__le64 unaligned_io; /* [143:136] */
__le64 security_version; /* [151:144] */
__le64 total_nuse; /* [159:152] */
Expand All @@ -97,9 +124,31 @@ struct ocp_smart_extended_log {
__le64 pcie_link_retaining_count; /* [199:192] */
__le64 power_state_change_count; /* [207:200] */
__le64 lowest_permitted_fw_rev; /* [215:208] */
__u8 rsvd216[278]; /* [493:216] */
__le16 log_page_version; /* [495:494] */
__u8 log_page_guid[16]; /* [511:496] */
__le16 total_media_dies; /* [217:216] */
__le16 total_die_failure_tolerance; /* [219:218] */
__le16 media_dies_offline; /* [221:220] */
__u8 max_temperature_recorded; /* [222] */
__u8 rsvd223; /* [223] */
__le64 nand_avg_erase_count; /* [231:224] */
__le32 command_timeouts; /* [235:232] */
__le32 sys_area_program_fail_count_raw; /* [239:236] */
__u8 sys_area_program_fail_count_normalized; /* [240] */
__u8 rsvd241[3]; /* [243:241] */
__le32 sys_area_uncorr_read_count_raw; /* [247:244] */
__u8 sys_area_uncorr_read_count_normalized; /* [248] */
__u8 rsvd249[3]; /* [251:249] */
__le32 sys_area_erase_fail_count_raw; /* [255:252] */
__u8 sys_area_erase_fail_count_normalized; /* [256] */
__u8 rsvd257[3]; /* [259:257] */
__le16 max_peak_power_capability; /* [261:260] */
__le16 current_max_avg_power; /* [263:262] */
__u8 lifetime_power_consumed[6]; /* [269:264] */
__u8 dssd_firmware_revision[8]; /* [277:270] */
__u8 dssd_firmware_build_uuid[16]; /* [293:278] */
__u8 dssd_firmware_build_label[64]; /* [375:294] */
__u8 rsvd358[136]; /* [493:358] */
__le16 log_page_version; /* [495:494] */
__u8 log_page_guid[16]; /* [511:496] */
};

int ocp_smart_add_log(int argc, char **argv, struct command *cmd,
Expand Down

0 comments on commit 4135424

Please sign in to comment.