Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Mellanox] implement platform wait in python code #201

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 32 additions & 68 deletions device/mellanox/x86_64-mlnx_msn2700-r0/platform_wait
Original file line number Diff line number Diff line change
@@ -1,68 +1,32 @@
#!/bin/bash

declare -r SYSLOG_LOGGER="/usr/bin/logger"
declare -r SYSLOG_IDENTIFIER="platform_wait"
declare -r SYSLOG_ERROR="error"
declare -r SYSLOG_NOTICE="notice"
declare -r SYSLOG_INFO="info"

declare -r HW_MGMT_CONFIG="/var/run/hw-management/config"

declare -r ASIC_INIT_DONE="${HW_MGMT_CONFIG}/asics_init_done"
declare -r NUM_ASICS="${HW_MGMT_CONFIG}/asic_num"
declare -r ASIC_CHIPUP_COMPLETED="${HW_MGMT_CONFIG}/asic_chipup_completed"

declare -r EXIT_SUCCESS="0"
declare -r EXIT_TIMEOUT="1"

function log_error() {
eval "${SYSLOG_LOGGER} -t ${SYSLOG_IDENTIFIER} -p ${SYSLOG_ERROR} $@"
}

function log_notice() {
eval "${SYSLOG_LOGGER} -t ${SYSLOG_IDENTIFIER} -p ${SYSLOG_NOTICE} $@"
}

function log_info() {
eval "${SYSLOG_LOGGER} -t ${SYSLOG_IDENTIFIER} -p ${SYSLOG_INFO} $@"
}

function wait_for_asic_chipup() {

local _ASIC_INIT="0"
local _ASIC_COUNT="0"
local _ASICS_CHIPUP="0"

local -i _WDOG_CNT="1"
local -ir _WDOG_MAX="300"

local -r _TIMEOUT="1s"

while [[ "${_WDOG_CNT}" -le "${_WDOG_MAX}" ]]; do
_ASIC_INIT="$(cat ${ASIC_INIT_DONE} 2>&1)"
_ASIC_COUNT="$(cat ${NUM_ASICS} 2>&1)"
_ASICS_CHIPUP="$(cat ${ASIC_CHIPUP_COMPLETED} 2>&1)"

if [[ "${_ASIC_INIT}" -eq 1 && "${_ASIC_COUNT}" -eq "${_ASICS_CHIPUP}" ]]; then
return "${EXIT_SUCCESS}"
fi

let "_WDOG_CNT++"
sleep "${_TIMEOUT}"
done

log_error "Mellanox ASIC is not ready: INIT: ${_ASIC_INIT}, NUM_ASIC: ${_ASIC_COUNT}, CHIPUP: ${_ASICS_CHIPUP} timeout...."
return "${EXIT_TIMEOUT}"
}

log_info "Wait for Mellanox ASIC to be ready"

wait_for_asic_chipup
EXIT_CODE="$?"
if [[ "${EXIT_CODE}" != "${EXIT_SUCCESS}" ]]; then
exit "${EXIT_CODE}"
fi

log_notice "Mellanox ASIC is ready"

exit "${EXIT_SUCCESS}"
#!/usr/bin/python3

#
# Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES.
# Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import sys
from sonic_platform.device_data import DeviceDataManager
from sonic_py_common.logger import Logger


logger = Logger(log_identifier='platform_wait')
logger.log_notice('Nvidia: Wait for PMON dependencies to be ready')
if DeviceDataManager.wait_platform_ready():
logger.log_notice('Nvidia: PMON dependencies are ready')
sys.exit(0)
else:
logger.log_error('Nvidia: PMON dependencies are not ready: timeout')
sys.exit(-1)
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import glob
import os
import time

from . import utils

Expand Down Expand Up @@ -167,7 +168,11 @@ def is_psu_hotswapable(cls):
@classmethod
@utils.read_only_cache()
def get_sfp_count(cls):
return utils.read_int_from_file('/run/hw-management/config/module_counter')
from sonic_py_common import device_info
platform_path = device_info.get_path_to_platform_dir()
platform_json_path = os.path.join(platform_path, 'platform.json')
platform_data = utils.load_json_file(platform_json_path)
return len(platform_data['chassis']['sfps'])

@classmethod
def get_linecard_sfp_count(cls, lc_index):
Expand Down Expand Up @@ -234,3 +239,32 @@ def get_cpld_component_list(cls):
# Currently, only fetching BIOS version is supported
return ComponentCPLDSN2201.get_component_list()
return ComponentCPLD.get_component_list()

@classmethod
@utils.read_only_cache()
def is_independent_mode(cls):
from sonic_py_common import device_info
_, hwsku_dir = device_info.get_paths_to_platform_and_hwsku_dirs()
sai_profile_file = os.path.join(hwsku_dir, 'sai.profile')
data = utils.read_key_value_file(sai_profile_file, delimeter='=')
return data.get('SAI_INDEPENDENT_MODULE_MODE') == '1'

@classmethod
def wait_platform_ready(cls):
"""
Wait for Nvidia platform related services(SDK, hw-management) ready
Returns:
bool: True if wait success else timeout
"""
conditions = []
sysfs_nodes = ['power_mode', 'power_mode_policy', 'present', 'reset', 'status', 'statuserror']
if cls.is_independent_mode():
sysfs_nodes.extend(['control', 'frequency', 'frequency_support', 'hw_present', 'hw_reset',
'power_good', 'power_limit', 'power_on', 'temperature/input'])
else:
conditions.append(lambda: utils.read_int_from_file('/var/run/hw-management/config/asics_init_done') == 1)
sfp_count = cls.get_sfp_count()
for sfp_index in range(sfp_count):
for sysfs_node in sysfs_nodes:
conditions.append(lambda: os.path.exists(f'/sys/module/sx_core/asic0/module{sfp_index}/{sysfs_node}'))
return utils.wait_until_conditions(conditions, 300, 1)
16 changes: 16 additions & 0 deletions platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@
SFP_SYSFS_STATUS_ERROR = 'statuserror'
SFP_SYSFS_PRESENT = 'present'
SFP_SYSFS_RESET = 'reset'
SFP_SYSFS_HWRESET = 'hw_reset'
SFP_SYSFS_POWER_MODE = 'power_mode'
SFP_SYSFS_POWER_MODE_POLICY = 'power_mode_policy'
POWER_MODE_POLICY_HIGH = 1
Expand Down Expand Up @@ -792,6 +793,21 @@ def get_xcvr_api(self):
self._xcvr_api.get_tx_fault = self.get_tx_fault
return self._xcvr_api

def is_sw_control(self):
if not DeviceDataManager.is_independent_mode():
return False

db = utils.DbUtils.get_db_instance('STATE_DB')
control_type = db.get('STATE_DB', f'TRANSCEIVER_MODULES_MGMT|{self.sdk_index}', 'control_type')
control_file_value = utils.read_int_from_file(f'/sys/module/sx_core/asic0/module{self.sdk_index}/control')

if control_type == 'SW_CONTROL' and control_file_value == 1:
return True
elif control_type == 'FW_CONTROL' and control_file_value == 0:
return False
else:
raise Exception(f'Module {self.sdk_index} is in initialization, please retry later')


class RJ45Port(NvidiaSFPCommon):
"""class derived from SFP, representing RJ45 ports"""
Expand Down
59 changes: 54 additions & 5 deletions platform/mellanox/mlnx-platform-api/sonic_platform/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2020-2021 NVIDIA CORPORATION & AFFILIATES.
# Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES.
# Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -19,6 +19,7 @@
import subprocess
import json
import sys
import threading
import time
import os
from sonic_py_common import device_info
Expand Down Expand Up @@ -100,15 +101,15 @@ def read_float_from_file(file_path, default=0.0, raise_exception=False, log_func
return read_from_file(file_path=file_path, target_type=float, default=default, raise_exception=raise_exception, log_func=log_func)


def _key_value_converter(content):
def _key_value_converter(content, delimeter):
ret = {}
for line in content.splitlines():
k,v = line.split(':')
k,v = line.split(delimeter)
ret[k.strip()] = v.strip()
return ret


def read_key_value_file(file_path, default={}, raise_exception=False, log_func=logger.log_error):
def read_key_value_file(file_path, default={}, raise_exception=False, log_func=logger.log_error, delimeter=':'):
"""Read file content and parse the content to a dict. The file content should like:
key1:value1
key2:value2
Expand All @@ -119,7 +120,8 @@ def read_key_value_file(file_path, default={}, raise_exception=False, log_func=l
raise_exception (bool, optional): If exception should be raised or hiden. Defaults to False.
log_func (optional): logger function.. Defaults to logger.log_error.
"""
return read_from_file(file_path=file_path, target_type=_key_value_converter, default=default, raise_exception=raise_exception, log_func=log_func)
converter = lambda content: _key_value_converter(content, delimeter)
return read_from_file(file_path=file_path, target_type=converter, default=default, raise_exception=raise_exception, log_func=log_func)


def write_file(file_path, content, raise_exception=False, log_func=logger.log_error):
Expand Down Expand Up @@ -285,3 +287,50 @@ def wait_until(predict, timeout, interval=1, *args, **kwargs):
time.sleep(interval)
timeout -= interval
return False


def wait_until_conditions(conditions, timeout, interval=1):
"""
Wait until all the conditions become true
Args:
conditions (list): a list of callable which generate True|False
timeout (int): wait time in seconds
interval (int, optional): interval to check the predict. Defaults to 1.

Returns:
bool: True if wait success else False
"""
while timeout > 0:
pending_conditions = []
for condition in conditions:
if not condition():
pending_conditions.append(condition)
if not pending_conditions:
return True
conditions = pending_conditions
time.sleep(interval)
timeout -= interval
return False


class DbUtils:
lock = threading.Lock()
db_instances = threading.local()

@classmethod
def get_db_instance(cls, db_name, **kargs):
try:
if not hasattr(cls.db_instances, 'data'):
with cls.lock:
if not hasattr(cls.db_instances, 'data'):
cls.db_instances.data = {}

if db_name not in cls.db_instances.data:
from swsscommon.swsscommon import SonicV2Connector
db = SonicV2Connector(use_unix_socket_path=True)
db.connect(db_name)
cls.db_instances.data[db_name] = db
return cls.db_instances.data[db_name]
except Exception as e:
logger.log_error(f'Failed to get DB instance for DB {db_name} - {e}')
raise e
29 changes: 29 additions & 0 deletions platform/mellanox/mlnx-platform-api/tests/test_device_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,5 +52,34 @@ def test_get_linecard_max_port_count(self):
def test_get_bios_component(self):
assert DeviceDataManager.get_bios_component() is not None

@mock.patch('sonic_py_common.device_info.get_paths_to_platform_and_hwsku_dirs', mock.MagicMock(return_value=('', '/tmp')))
@mock.patch('sonic_platform.device_data.utils.read_key_value_file')
def test_is_independent_mode(self, mock_read):
mock_read.return_value = {}
assert not DeviceDataManager.is_independent_mode()
mock_read.return_value = {'SAI_INDEPENDENT_MODULE_MODE': '1'}
assert DeviceDataManager.is_independent_mode()

@mock.patch('sonic_py_common.device_info.get_path_to_platform_dir', mock.MagicMock(return_value='/tmp'))
@mock.patch('sonic_platform.device_data.utils.load_json_file')
def test_get_sfp_count(self, mock_load_json):
mock_load_json.return_value = {
'chassis': {
'sfps': [1,2,3]
}
}
assert DeviceDataManager.get_sfp_count() == 3

@mock.patch('sonic_platform.device_data.time.sleep', mock.MagicMock())
@mock.patch('sonic_platform.device_data.DeviceDataManager.get_sfp_count', mock.MagicMock(return_value=3))
@mock.patch('sonic_platform.device_data.utils.read_int_from_file', mock.MagicMock(return_value=1))
@mock.patch('sonic_platform.device_data.os.path.exists')
@mock.patch('sonic_platform.device_data.DeviceDataManager.is_independent_mode')
def test_wait_platform_ready(self, mock_is_indep, mock_exists):
mock_exists.return_value = True
mock_is_indep.return_value = True
assert DeviceDataManager.wait_platform_ready()
mock_is_indep.return_value = False
assert DeviceDataManager.wait_platform_ready()
mock_exists.return_value = False
assert not DeviceDataManager.wait_platform_ready()
25 changes: 25 additions & 0 deletions platform/mellanox/mlnx-platform-api/tests/test_sfp.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,3 +299,28 @@ def test_set_lpmode(self):
assert not sfp.set_lpmode(True)
assert sfp.set_lpmode(False)
assert not sfp.set_lpmode(False)

@mock.patch('sonic_platform.utils.read_int_from_file')
@mock.patch('sonic_platform.device_data.DeviceDataManager.is_independent_mode')
@mock.patch('sonic_platform.utils.DbUtils.get_db_instance')
def test_is_sw_control(self, mock_get_db, mock_mode, mock_read):
sfp = SFP(0)
mock_mode.return_value = False
assert not sfp.is_sw_control()
mock_mode.return_value = True

mock_db = mock.MagicMock()
mock_get_db.return_value = mock_db
mock_db.get = mock.MagicMock(return_value=None)
with pytest.raises(Exception):
sfp.is_sw_control()

mock_read.return_value = 0
mock_db.get.return_value = 'FW_CONTROL'
assert not sfp.is_sw_control()
mock_read.return_value = 1
mock_db.get.return_value = 'SW_CONTROL'
assert sfp.is_sw_control()
mock_read.return_value = 0
with pytest.raises(Exception):
sfp.is_sw_control()
10 changes: 10 additions & 0 deletions platform/mellanox/mlnx-platform-api/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,3 +191,13 @@ def test_read_key_value_file(self):
mock_os_open = mock.mock_open(read_data='a:b')
with mock.patch('sonic_platform.utils.open', mock_os_open):
assert utils.read_key_value_file('some_file') == {'a':'b'}
mock_os_open = mock.mock_open(read_data='a=b')
with mock.patch('sonic_platform.utils.open', mock_os_open):
assert utils.read_key_value_file('some_file', delimeter='=') == {'a':'b'}

@mock.patch('sonic_platform.utils.time.sleep', mock.MagicMock())
def test_wait_until_conditions(self):
conditions = [lambda: True]
assert utils.wait_until_conditions(conditions, 1)
conditions = [lambda: False]
assert not utils.wait_until_conditions(conditions, 1)