Skip to content

Commit

Permalink
plugins/solidigm: Automatic retry smaller log chunk size.
Browse files Browse the repository at this point in the history
Retry to retrieve telemetry and PEL with smaller chunk size, because
some systems are failing to retrieve telemetry in 256KB chunks.

Signed-off-by: Leonardo da Cunha <[email protected]>
  • Loading branch information
lgdacunh authored and igaw committed Oct 9, 2024
1 parent a5f155c commit 9465ffc
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 22 deletions.
31 changes: 16 additions & 15 deletions plugins/solidigm/solidigm-internal-logs.c
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,6 @@ struct ilog {
int count;
struct nvme_id_ctrl id_ctrl;
enum nvme_telemetry_da max_da;
__u32 max_tx;
};

static void print_nlog_header(__u8 *buffer)
Expand Down Expand Up @@ -522,19 +521,14 @@ static int ilog_ensure_dump_id_ctrl(struct ilog *ilog)
if (ilog->id_ctrl.lpa & 0x40)
ilog->max_da = NVME_TELEMETRY_DA_4;

/* assuming CAP.MPSMIN is zero minimum Memory Page Size is at least 4096 bytes */
ilog->max_tx = (1 << ilog->id_ctrl.mdts) * NVME_LOG_PAGE_PDU_SIZE;
if (ilog->max_tx > DRIVER_MAX_TX_256K)
ilog->max_tx = DRIVER_MAX_TX_256K;

return err;
}

static int ilog_dump_telemetry(struct ilog *ilog, enum log_type ttype)
{
int err = 0;
enum nvme_telemetry_da da;
size_t max_data_tx;
size_t mdts;
const char *file_name;
struct nvme_feat_host_behavior prev = {0};
bool host_behavior_changed = false;
Expand All @@ -545,7 +539,7 @@ static int ilog_dump_telemetry(struct ilog *ilog, enum log_type ttype)
return err;

da = ilog->max_da;
max_data_tx = ilog->max_tx;
mdts = ilog->id_ctrl.mdts;

if (da == 4) {
__u32 result;
Expand All @@ -564,16 +558,16 @@ static int ilog_dump_telemetry(struct ilog *ilog, enum log_type ttype)
case HIT:
file_name = "lid_0x07_lsp_0x01_lsi_0x0000.bin";
log.desc = "Host Initiated Telemetry";
err = nvme_get_telemetry_log(dev_fd(ilog->dev), true, false, false, max_data_tx, da,
(struct nvme_telemetry_log **) &log.buffer,
&log.buffer_size);
err = sldgm_dynamic_telemetry(dev_fd(ilog->dev), true, false, false, mdts,
da, (struct nvme_telemetry_log **) &log.buffer,
&log.buffer_size);
break;
case CIT:
file_name = "lid_0x08_lsp_0x00_lsi_0x0000.bin";
log.desc = "Controller Initiated Telemetry";
err = nvme_get_telemetry_log(dev_fd(ilog->dev), false, true, true, max_data_tx, da,
(struct nvme_telemetry_log **) &log.buffer,
&log.buffer_size);
err = sldgm_dynamic_telemetry(dev_fd(ilog->dev), false, true, true, mdts,
da, (struct nvme_telemetry_log **) &log.buffer,
&log.buffer_size);
break;
default:
return -EINVAL;
Expand Down Expand Up @@ -749,6 +743,7 @@ static int ilog_dump_pel(struct ilog *ilog)
void *pevent_log_full;
int err;
struct nvme_get_log_args args;
size_t max_data_tx;

_cleanup_free_ struct nvme_persistent_event_log *pevent = NULL;

Expand Down Expand Up @@ -794,7 +789,13 @@ static int ilog_dump_pel(struct ilog *ilog)
.rae = false,
.ot = false,
};
err = nvme_get_log_page(dev_fd(ilog->dev), ilog->max_tx, &args);

max_data_tx = (1 << ilog->id_ctrl.mdts) * NVME_LOG_PAGE_PDU_SIZE;
do {
err = nvme_get_log_page(dev_fd(ilog->dev), max_data_tx, &args);
max_data_tx /= 2;
} while (err == -EPERM && max_data_tx >= NVME_LOG_PAGE_PDU_SIZE);

if (err)
return err;

Expand Down
2 changes: 1 addition & 1 deletion plugins/solidigm/solidigm-nvme.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

#include "cmd.h"

#define SOLIDIGM_PLUGIN_VERSION "1.7"
#define SOLIDIGM_PLUGIN_VERSION "1.8"

PLUGIN(NAME("solidigm", "Solidigm vendor specific extensions", SOLIDIGM_PLUGIN_VERSION),
COMMAND_LIST(
Expand Down
13 changes: 9 additions & 4 deletions plugins/solidigm/solidigm-telemetry.c
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,8 @@ int solidigm_get_telemetry_log(int argc, char **argv, struct command *cmd, struc

if (!cfg.is_input_file) {
size_t max_data_tx;
size_t power2;
__u8 mdts = 0;

err = nvme_get_telemetry_max(dev_fd(dev), NULL, &max_data_tx);
if (err < 0) {
Expand All @@ -155,11 +157,14 @@ int solidigm_get_telemetry_log(int argc, char **argv, struct command *cmd, struc
SOLIDIGM_LOG_WARNING("Failed to acquire identify ctrl %d!", err);
goto close_fd;
}
if (max_data_tx > DRIVER_MAX_TX_256K)
max_data_tx = DRIVER_MAX_TX_256K;
power2 = max_data_tx / NVME_LOG_PAGE_PDU_SIZE;
while (power2 && !(1 & power2)) {
power2 >>= 1;
mdts++;
}

err = nvme_get_telemetry_log(dev_fd(dev), cfg.host_gen, cfg.ctrl_init, true,
max_data_tx, cfg.data_area, &tl.log, &tl.log_size);
err = sldgm_dynamic_telemetry(dev_fd(dev), cfg.host_gen, cfg.ctrl_init, true,
mdts, cfg.data_area, &tl.log, &tl.log_size);
if (err < 0) {
SOLIDIGM_LOG_WARNING("get-telemetry-log: %s",
nvme_strerror(errno));
Expand Down
16 changes: 16 additions & 0 deletions plugins/solidigm/solidigm-util.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,19 @@ int sldgm_get_uuid_index(struct nvme_dev *dev, __u8 *index)

return sldgm_find_uuid_index(&uuid_list, index);
}

int sldgm_dynamic_telemetry(int dev_fd, bool create, bool ctrl, bool log_page, __u8 mtds,
enum nvme_telemetry_da da, struct nvme_telemetry_log **log_buffer,
size_t *log_buffer_size)
{
int err;
size_t max_data_tx = (1 << mtds) * NVME_LOG_PAGE_PDU_SIZE;

do {
err = nvme_get_telemetry_log(dev_fd, create, ctrl, log_page, max_data_tx, da,
log_buffer, log_buffer_size);
max_data_tx /= 2;
create = false;
} while (err == -EPERM && max_data_tx >= NVME_LOG_PAGE_PDU_SIZE);
return err;
}
5 changes: 3 additions & 2 deletions plugins/solidigm/solidigm-util.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@

#include "nvme.h"

#define DRIVER_MAX_TX_256K (256 * 1024)

int sldgm_find_uuid_index(struct nvme_id_uuid_list *uuid_list, __u8 *index);
int sldgm_get_uuid_index(struct nvme_dev *dev, __u8 *index);
int sldgm_dynamic_telemetry(int dev_fd, bool create, bool ctrl, bool log_page, __u8 mtds,
enum nvme_telemetry_da da, struct nvme_telemetry_log **log_buffer,
size_t *log_buffer_size);

0 comments on commit 9465ffc

Please sign in to comment.