Skip to content

Commit

Permalink
fix: corrections in suprocess and backends
Browse files Browse the repository at this point in the history
  • Loading branch information
JuanPedroGHM committed Jul 31, 2023
1 parent 35a0b3d commit bb895cd
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 18 deletions.
32 changes: 18 additions & 14 deletions perun/backend/nvml.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""Nvidia Mangement Library Source definition."""
from typing import Callable, Set
from typing import Callable, List, Set

import numpy as np
import pynvml
Expand Down Expand Up @@ -40,28 +40,26 @@ def close(self):
pynvml.nvmlShutdown()

def visibleSensors(self) -> Set[str]:
"""
Return string ids of visible devices.
"""Return string ids of visible devices.
Returns:
Set[str]: Set with device string ids
:return: Set with string ids.
:rtype: Set[str]
"""
devices = set()
for i in range(pynvml.nvmlDeviceGetCount()):
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
devices.add(pynvml.nvmlDeviceGetUUID(handle))
return devices

def getSensors(self, deviceList: Set[str]):
"""
Gather device objects based on a set of device ids.
Args:
deviceList (Set[str]): Set containing devices ids
def getSensors(self, deviceList: Set[str]) -> List[Sensor]:
"""Gather sensor objects based on a set of device ids.
Returns:
List[Device]: Device objects
:param deviceList: Set containing device ids.
:type deviceList: Set[str]
:return: List with sensor objects
:rtype: List[Sensor]
"""
pynvml.nvmlInit()

def getCallback(handle) -> Callable[[], np.number]:
def func() -> np.number:
Expand All @@ -70,10 +68,13 @@ def func() -> np.number:
return func

devices = []

for deviceId in deviceList:
try:
log.debug(f"Getting handle from '{deviceId}'")
handle = pynvml.nvmlDeviceGetHandleByUUID(deviceId)
index = pynvml.nvmlDeviceGetIndex(handle)
log.debug(f"Index: {index} - Handle : {handle}")

name = f"CUDA:{index}"
device_type = DeviceType.GPU
Expand All @@ -82,7 +83,10 @@ def func() -> np.number:
"name": pynvml.nvmlDeviceGetName(handle),
**self.metadata,
}
max_power = np.uint32(pynvml.nvmlDeviceGetPowerManagementLimit(handle))
max_power = np.uint32(
pynvml.nvmlDeviceGetPowerManagementDefaultLimit(handle)
)
log.debug(f"Device {deviceId} Max Power : {max_power}")

data_type = MetricMetaData(
Unit.WATT,
Expand Down
1 change: 1 addition & 0 deletions perun/backend/psutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
class PSUTILBackend(Backend):
"""PSUTIL Backend class."""

id: str = "psutil"
name: str = "PSUTIL"
description: str = "Obtain hardware data from psutil"

Expand Down
2 changes: 1 addition & 1 deletion perun/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"price_factor": 32.51, # Cent/kWh
},
"monitor": {
"sampling_rate": 5,
"sampling_rate": 1,
},
"output": {
"app_name": None,
Expand Down
7 changes: 4 additions & 3 deletions perun/subprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
def perunSubprocess(
queue: Queue,
rank: int,
backends: List[Backend],
backends: Dict[str, Backend],
l_sensors_config: Dict[str, Set[str]],
sp_ready_event,
start_event,
Expand Down Expand Up @@ -46,9 +46,10 @@ def perunSubprocess(
Sampling rate in seconds
"""
lSensors: List[Sensor] = []
for backend in backends:
log.debug(f"SP: backends -- {backends}")
log.debug(f"SP: l_sensor_config -- {l_sensors_config}")
for backend in backends.values():
if backend.name in l_sensors_config:
backend.setup()
lSensors += backend.getSensors(l_sensors_config[backend.name])

timesteps = []
Expand Down

0 comments on commit bb895cd

Please sign in to comment.