Skip to content

Commit

Permalink
Merge pull request #23 from opencomputeproject/bugfix/dwell_time
Browse files Browse the repository at this point in the history
Fixed the LMT sequence issue to actually wait for "dwell_time"
  • Loading branch information
sksekar authored Aug 27, 2024
2 parents 0f2e04a + b0e34ca commit 1278d80
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 39 deletions.
57 changes: 30 additions & 27 deletions src/pci_lmt/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def info_lane_margin_on_device_list(self, args: argparse.Namespace):
for lane in range(dev.device_info.width):
dev.lane_errors[lane] = status_str

def setup_lane_margin_on_device_list(self):
def setup_lane_margin_on_device_list(self, voltage_or_timing="TIMING", steps=16, up_down=0, left_right_none=0):
for dev in self._primed_devices:
for lane in range(dev.device_info.width):
dev.set_error_count_limit(
Expand All @@ -92,6 +92,21 @@ def setup_lane_margin_on_device_list(self):
error_count_limit=self.error_count_limit,
)

if voltage_or_timing == "TIMING":
dev.step_margin_timing_offset_right_left_of_default(
lane=lane,
receiver_number=self.receiver_number,
left_right_none=left_right_none,
steps=steps,
)
else:
dev.step_margin_voltage_offset_up_down_of_default(
lane=lane,
receiver_number=self.receiver_number,
up_down=up_down,
steps=steps,
)

# FIXME: `voltage_or_timing` should be an enum; dont use strings for magic constants
# pylint: disable=too-many-branches
def collect_lane_margin_on_device_list(
Expand All @@ -109,7 +124,7 @@ def collect_lane_margin_on_device_list(
receiver_number=ty.cast(int, self.receiver_number),
step=steps,
)
stepper = {}

if voltage_or_timing == "TIMING":
# FIXME: move this string construction in a separate method; or better make it
# into a typed enum
Expand All @@ -120,45 +135,28 @@ def collect_lane_margin_on_device_list(
lane_result.margin_type += "left"
else:
lane_result.margin_type += "none"
stepper = dev.step_margin_timing_offset_right_left_of_default(
lane=lane,
receiver_number=self.receiver_number,
left_right_none=left_right_none,
steps=steps,
)

if voltage_or_timing == "VOLTAGE":
margin_status = dev.decode_step_margin_timing_offset_right_left_of_default(lane=lane)
else:
lane_result.margin_type = "voltage_"
if up_down == 0:
lane_result.margin_type += "up"
elif up_down == 1:
lane_result.margin_type += "down"
else:
lane_result.margin_type += "none"
stepper = dev.step_margin_voltage_offset_up_down_of_default(
lane=lane,
receiver_number=self.receiver_number,
up_down=up_down,
steps=steps,
)
margin_status = dev.decode_step_margin_voltage_offset_up_down_of_default(lane=lane)

sampler = dev.fetch_sample_count(lane=lane, receiver_number=self.receiver_number)
if stepper["error"] or sampler["error"]:
if margin_status["error"] or sampler["error"]:
lane_result.error = True
lane_result.error_msg = stepper["error"] if stepper["error"] else sampler["error"]
elif stepper["error_count"] == 0:
lane_result.error = False
lane_result.sample_count = sampler["sample_count"]
lane_result.sample_count_bits = sampler["sample_count_bits"]
lane_result.error_count = 0
lane_result.ber = 0.0
lane_result.error_msg = margin_status["error"] if margin_status["error"] else sampler["error"]
else:
# TODO Check if this needs to be divided by Sampling (aka, 64)
lane_result.error = False
lane_result.sample_count = sampler["sample_count"]
lane_result.sample_count_bits = sampler["sample_count_bits"]
lane_result.error_count = stepper["error_count"]
lane_result.ber = stepper["error_count"] / sampler["sample_count_bits"]
lane_result.error_count = margin_status["error_count"]
lane_result.ber = margin_status["error_count"] / sampler["sample_count_bits"]

results.append(lane_result)

Expand Down Expand Up @@ -238,7 +236,12 @@ def collect_lmt_on_bdfs(
devices.no_command_on_device_list()
devices.clear_error_log_on_device_list()
devices.normal_settings_on_device_list()
devices.setup_lane_margin_on_device_list()
devices.setup_lane_margin_on_device_list(
voltage_or_timing=devices.voltage_or_timing,
steps=devices.steps,
up_down=devices.up_down,
left_right_none=devices.left_right_none,
)

start_time = time.time()
time.sleep(args.dwell_time)
Expand Down
55 changes: 43 additions & 12 deletions src/pci_lmt/pcie_lane_margining.py
Original file line number Diff line number Diff line change
Expand Up @@ -631,8 +631,14 @@ def step_margin_timing_offset_right_left_of_default(
if receiver_number not in [*range(0x1, 0x7)]:
return {"error": f"ERROR: StepMarginTimingOffsetRightLeftOfDefault - BAD receiver_number {receiver_number}"}

if left_right_none == -1:
margin_payload = steps
if not self.device_info.ind_left_right_timing:
if left_right_none == -1:
margin_payload = steps
else:
return {
"error": "ERROR: StepMarginTimingOffsetRightLeftOfDefault - "
"Rcvr doesn't support independent left/right margining"
}
elif left_right_none in (0, 1):
margin_payload = left_right_none << 6 | steps
else:
Expand Down Expand Up @@ -673,16 +679,24 @@ def decode_step_margin_timing_offset_right_left_of_default(self, lane: int):
# number of errors detected as defined in Section 8.4.4. Note that MErrorCount might be greater than
# Error Count Limit.
# Margin Payload[5:0] = MErrorCount
ret = self.decode_margining_lane_status_register(lane=lane)
start_time = time.time()
while ret["margin_type_status"] != 0x3:
while True:
ret = self.decode_margining_lane_status_register(lane=lane)
step_margin_execution_status = (ret["margin_payload_status"] & 0xC0) >> 6
error_count = (ret["margin_payload_status"] & 0x3F) >> 0
margin_type_status = ret["margin_type_status"]

if margin_type_status == 0x3:
if step_margin_execution_status == 0x2:
# Setup done. Margining in progress.
break
if step_margin_execution_status == 0x3:
# Unsupported operation
return {"error": "ERROR: decode_StepMarginTimingOffsetRightLeftOfDefault - unsupported operation"}

if time.time() - start_time > TIMEOUT:
return {"error": "ERROR: decode_StepMarginTimingOffsetRightLeftOfDefault - timedout"}

step_margin_execution_status = (ret["margin_payload_status"] & 0xC0) >> 6
error_count = (ret["margin_payload_status"] & 0x3F) >> 0
margin_type_status = ret["margin_type_status"]
return {
"error": None,
"margin_type_status": margin_type_status,
Expand Down Expand Up @@ -719,7 +733,15 @@ def step_margin_voltage_offset_up_down_of_default(
if receiver_number not in [*range(0x1, 0x7)]:
return {"error": f"ERROR: StepMarginVoltageOffsetUpDownOfDefault - BAD receiver_number {receiver_number}"}

if up_down in (0, 1):
if not self.device_info.ind_up_down_voltage:
if up_down == -1:
margin_payload = steps
else:
return {
"error": "ERROR: StepMarginVoltageOffsetUpDownOfDefault - "
"Rcvr doesn't support independent up/down margining"
}
elif up_down in (0, 1):
margin_payload = up_down << 6 | steps
else:
return {"error": f"ERROR: StepMarginVoltageOffsetUpDownOfDefault - BAD UpDown {up_down}"}
Expand Down Expand Up @@ -759,14 +781,23 @@ def decode_step_margin_voltage_offset_up_down_of_default(self, lane: int):
# Margin Payload[5:0] = MErrorCount
ret = self.decode_margining_lane_status_register(lane=lane)
start_time = time.time()
while ret["margin_type_status"] != 0x4:
while True:
ret = self.decode_margining_lane_status_register(lane=lane)
step_margin_execution_status = (ret["margin_payload_status"] & 0xC0) >> 6
error_count = (ret["margin_payload_status"] & 0x3F) >> 0
margin_type_status = ret["margin_type_status"]

if margin_type_status == 0x4:
if step_margin_execution_status == 0x2:
# Setup done. Margining in progress.
break
if step_margin_execution_status == 0x3:
# Unsupported operation
return {"error": "ERROR: decode_StepMarginVoltageOffsetUpDownOfDefault - unsupported operation"}

if time.time() - start_time > TIMEOUT:
return {"error": "ERROR: decode_StepMarginVoltageOffsetUpDownOfDefault - timedout"}

step_margin_execution_status = (ret["margin_payload_status"] & 0xC0) >> 6
error_count = (ret["margin_payload_status"] & 0x3F) >> 0
margin_type_status = ret["margin_type_status"]
return {
"error": None,
"margin_type_status": margin_type_status,
Expand Down

0 comments on commit 1278d80

Please sign in to comment.