diff --git a/.github/workflows/test_api_cpu.yaml b/.github/workflows/test_api_cpu.yaml index d2b36ad2..126e500b 100644 --- a/.github/workflows/test_api_cpu.yaml +++ b/.github/workflows/test_api_cpu.yaml @@ -51,4 +51,4 @@ jobs: HF_TOKEN: ${{ secrets.HF_TOKEN }} PUSH_REPO_ID: optimum-benchmark/cpu run: | - pytest -s -k "api and cpu" + pytest tests/test_api.py -s -k "api and cpu" diff --git a/.github/workflows/test_api_cuda.yaml b/.github/workflows/test_api_cuda.yaml index f69c1f33..c62b69a0 100644 --- a/.github/workflows/test_api_cuda.yaml +++ b/.github/workflows/test_api_cuda.yaml @@ -48,4 +48,4 @@ jobs: HF_TOKEN: ${{ secrets.HF_TOKEN }} PUSH_REPO_ID: optimum-benchmark/cuda run: | - pytest -s -x -k "api and cuda" + pytest tests/test_api.py -x -s -k "api and cuda" diff --git a/.github/workflows/test_api_misc.yaml b/.github/workflows/test_api_misc.yaml index d3175df1..29339eab 100644 --- a/.github/workflows/test_api_misc.yaml +++ b/.github/workflows/test_api_misc.yaml @@ -59,4 +59,4 @@ jobs: HF_TOKEN: ${{ secrets.HF_TOKEN }} PUSH_REPO_ID: optimum-benchmark/misc-${{ matrix.os }}-${{ matrix.python }} run: | - pytest -s -k "api and not (cpu or cuda or rocm or mps)" + pytest tests/test_api.py -s -k "api and not (cpu or cuda or rocm or mps)" diff --git a/.github/workflows/test_api_rocm.yaml b/.github/workflows/test_api_rocm.yaml index 26c1d74f..170c2a0e 100644 --- a/.github/workflows/test_api_rocm.yaml +++ b/.github/workflows/test_api_rocm.yaml @@ -54,4 +54,4 @@ jobs: HF_TOKEN: ${{ secrets.HF_TOKEN }} PUSH_REPO_ID: optimum-benchmark/rocm run: | - pytest -s -x -k "api and cuda" + pytest tests/test_api.py -x -s -k "api and cuda" diff --git a/.github/workflows/test_cli_cpu_ipex.yaml b/.github/workflows/test_cli_cpu_ipex.yaml index ea64c2e1..d6b94d3e 100644 --- a/.github/workflows/test_cli_cpu_ipex.yaml +++ b/.github/workflows/test_cli_cpu_ipex.yaml @@ -48,4 +48,4 @@ jobs: pip install -e .[testing,ipex,diffusers,timm] - name: Run tests - run: pytest -s -k "cli and cpu and ipex" + run: pytest tests/test_cli.py -s -k "cli and cpu and ipex" diff --git a/.github/workflows/test_cli_cpu_llama_cpp.yaml b/.github/workflows/test_cli_cpu_llama_cpp.yaml index 97ae1b0d..05d43683 100644 --- a/.github/workflows/test_cli_cpu_llama_cpp.yaml +++ b/.github/workflows/test_cli_cpu_llama_cpp.yaml @@ -48,4 +48,4 @@ jobs: pip install -e .[testing,llama-cpp] - name: Run tests - run: pytest -s -k "llama_cpp" + run: pytest tests/test_cli.py -s -k "llama_cpp" diff --git a/.github/workflows/test_cli_cpu_neural_compressor.yaml b/.github/workflows/test_cli_cpu_neural_compressor.yaml index 243be796..435f4216 100644 --- a/.github/workflows/test_cli_cpu_neural_compressor.yaml +++ b/.github/workflows/test_cli_cpu_neural_compressor.yaml @@ -48,4 +48,4 @@ jobs: pip install -e .[testing,neural-compressor,diffusers,timm] - name: Run tests - run: pytest -s -k "cli and cpu and neural_compressor" + run: pytest tests/test_cli.py -s -k "cli and cpu and neural_compressor" diff --git a/.github/workflows/test_cli_cpu_onnxruntime.yaml b/.github/workflows/test_cli_cpu_onnxruntime.yaml index 7cdee6eb..21e65235 100644 --- a/.github/workflows/test_cli_cpu_onnxruntime.yaml +++ b/.github/workflows/test_cli_cpu_onnxruntime.yaml @@ -48,4 +48,4 @@ jobs: pip install -e .[testing,onnxruntime,diffusers,timm] - name: Run tests - run: pytest -s -k "cli and cpu and onnxruntime" + run: pytest tests/test_cli.py -s -k "cli and cpu and onnxruntime" diff --git a/.github/workflows/test_cli_cpu_openvino.yaml b/.github/workflows/test_cli_cpu_openvino.yaml index d13659b5..4612370c 100644 --- a/.github/workflows/test_cli_cpu_openvino.yaml +++ b/.github/workflows/test_cli_cpu_openvino.yaml @@ -48,4 +48,4 @@ jobs: pip install -e .[testing,openvino,diffusers,timm] - name: Run tests - run: pytest -s -k "cli and cpu and openvino" + run: pytest tests/test_cli.py -s -k "cli and cpu and openvino" diff --git a/.github/workflows/test_cli_cpu_py_txi.yaml b/.github/workflows/test_cli_cpu_py_txi.yaml index b8ed69a9..d07f6170 100644 --- a/.github/workflows/test_cli_cpu_py_txi.yaml +++ b/.github/workflows/test_cli_cpu_py_txi.yaml @@ -48,4 +48,4 @@ jobs: pip install -e .[testing,py-txi] - name: Run tests - run: pytest -s -k "cli and cpu and py_txi" + run: pytest tests/test_cli.py -s -k "cli and cpu and py_txi" diff --git a/.github/workflows/test_cli_cpu_pytorch.yaml b/.github/workflows/test_cli_cpu_pytorch.yaml index 85f18ff4..fef2a772 100644 --- a/.github/workflows/test_cli_cpu_pytorch.yaml +++ b/.github/workflows/test_cli_cpu_pytorch.yaml @@ -48,4 +48,4 @@ jobs: pip install -e .[testing,diffusers,timm,peft] - name: Run tests - run: pytest -s -k "cli and cpu and pytorch" + run: pytest tests/test_cli.py -s -k "cli and cpu and pytorch" diff --git a/.github/workflows/test_cli_cuda_onnxruntime.yaml b/.github/workflows/test_cli_cuda_onnxruntime.yaml index 78a0a120..db71c24a 100644 --- a/.github/workflows/test_cli_cuda_onnxruntime.yaml +++ b/.github/workflows/test_cli_cuda_onnxruntime.yaml @@ -46,4 +46,4 @@ jobs: - name: Run tests run: | - pytest -x -s -k "cli and cuda and onnxruntime" + pytest tests/test_cli.py -x -s -k "cli and cuda and onnxruntime" diff --git a/.github/workflows/test_cli_cuda_py_txi.yaml b/.github/workflows/test_cli_cuda_py_txi.yaml index 6c6848e8..d653893e 100644 --- a/.github/workflows/test_cli_cuda_py_txi.yaml +++ b/.github/workflows/test_cli_cuda_py_txi.yaml @@ -47,4 +47,4 @@ jobs: pip install -e .[testing,py-txi] - name: Run tests - run: pytest -s -k "cli and cuda and py_txi" + run: pytest tests/test_cli.py -x -s -k "cli and cuda and py_txi" diff --git a/.github/workflows/test_cli_cuda_pytorch.yaml b/.github/workflows/test_cli_cuda_pytorch.yaml index 1577a4f0..a6aac2c6 100644 --- a/.github/workflows/test_cli_cuda_pytorch.yaml +++ b/.github/workflows/test_cli_cuda_pytorch.yaml @@ -47,7 +47,7 @@ jobs: - name: Run tests run: | - pytest -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed)" + pytest tests/test_cli.py -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed)" run_cli_cuda_pytorch_multi_gpu_tests: if: ${{ @@ -76,4 +76,4 @@ jobs: - name: Run tests run: | - pytest -x -s -k "cli and cuda and pytorch and (dp or ddp or device_map or deepspeed)" + FORCE_SERIAL=1 pytest tests/test_cli.py -x -s -k "cli and cuda and pytorch and (dp or ddp or device_map or deepspeed)" diff --git a/.github/workflows/test_cli_cuda_tensorrt_llm.yaml b/.github/workflows/test_cli_cuda_tensorrt_llm.yaml index 6a8f7829..9d8126e1 100644 --- a/.github/workflows/test_cli_cuda_tensorrt_llm.yaml +++ b/.github/workflows/test_cli_cuda_tensorrt_llm.yaml @@ -46,4 +46,4 @@ jobs: - name: Run tests run: | - pytest -x -s -k "cli and cuda and tensorrt_llm" + pytest tests/test_cli.py -x -s -k "cli and cuda and tensorrt_llm" diff --git a/.github/workflows/test_cli_cuda_torch_ort.yaml b/.github/workflows/test_cli_cuda_torch_ort.yaml index 9b4ede42..8f80a190 100644 --- a/.github/workflows/test_cli_cuda_torch_ort.yaml +++ b/.github/workflows/test_cli_cuda_torch_ort.yaml @@ -48,7 +48,7 @@ jobs: - name: Run tests run: | - pytest -x -s -k "cli and cuda and torch_ort and not (dp or ddp or device_map) and not (peft)" + pytest tests/test_cli.py -x -s -k "cli and cuda and torch_ort and not (dp or ddp or device_map) and not (peft)" run_cli_cuda_torch_ort_multi_gpu_tests: if: ${{ @@ -78,4 +78,4 @@ jobs: - name: Run tests run: | - pytest -x -s -k "cli and cuda and torch_ort and (dp or ddp or device_map) and not (peft)" + FORCE_SERIAL=1 pytest tests/test_cli.py -x -s -k "cli and cuda and torch_ort and (dp or ddp or device_map) and not (peft)" diff --git a/.github/workflows/test_cli_cuda_vllm.yaml b/.github/workflows/test_cli_cuda_vllm.yaml index b6b991e2..7868950c 100644 --- a/.github/workflows/test_cli_cuda_vllm.yaml +++ b/.github/workflows/test_cli_cuda_vllm.yaml @@ -47,7 +47,7 @@ jobs: - name: Run tests run: | - FORCE_SERIAL=1 pytest -x -s -k "cli and cuda and vllm and not (tp or pp)" + FORCE_SERIAL=1 pytest tests/test_cli.py -x -s -k "cli and cuda and vllm and not (tp or pp)" run_cli_cuda_vllm_multi_gpu_tests: if: ${{ @@ -76,4 +76,4 @@ jobs: - name: Run tests run: | - FORCE_SERIAL=1 pytest -x -s -k "cli and cuda and vllm and (tp or pp)" + FORCE_SERIAL=1 pytest tests/test_cli.py -x -s -k "cli and cuda and vllm and (tp or pp)" diff --git a/.github/workflows/test_cli_misc.yaml b/.github/workflows/test_cli_misc.yaml index ff4faa62..cad1c7b3 100644 --- a/.github/workflows/test_cli_misc.yaml +++ b/.github/workflows/test_cli_misc.yaml @@ -61,4 +61,4 @@ jobs: pip install -e .[testing] - name: Run tests - run: pytest -s -k "cli and not (cpu or cuda or rocm or mps)" + run: pytest tests/test_cli.py -s -k "cli and not (cpu or cuda or rocm or mps)" diff --git a/.github/workflows/test_cli_rocm_pytorch.yaml b/.github/workflows/test_cli_rocm_pytorch.yaml index 675d8792..d7616135 100644 --- a/.github/workflows/test_cli_rocm_pytorch.yaml +++ b/.github/workflows/test_cli_rocm_pytorch.yaml @@ -40,7 +40,6 @@ jobs: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES - --env HIP_VISIBLE_DEVICES=0 steps: - name: Checkout code @@ -52,7 +51,7 @@ jobs: - name: Run tests run: | - pytest -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not bnb" + pytest tests/test_cli.py -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not bnb" run_cli_rocm_pytorch_multi_gpu_tests: if: ${{ @@ -74,7 +73,6 @@ jobs: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES - --env HIP_VISIBLE_DEVICES=0,1 steps: - name: Checkout code @@ -86,4 +84,4 @@ jobs: - name: Run tests run: | - pytest -x -s -k "cli and cuda and pytorch and (dp or ddp or device_map or deepspeed) and not bnb" + FORCE_SERIAL=1 pytest tests/test_cli.py -x -s -k "cli and cuda and pytorch and (dp or ddp or device_map or deepspeed) and not bnb" diff --git a/optimum_benchmark/trackers/energy.py b/optimum_benchmark/trackers/energy.py index a02a0adc..6f904deb 100644 --- a/optimum_benchmark/trackers/energy.py +++ b/optimum_benchmark/trackers/energy.py @@ -5,14 +5,11 @@ from logging import getLogger from typing import List, Literal, Optional, Union -from ..import_utils import is_codecarbon_available, is_torch_available, is_torch_distributed_available +from ..import_utils import is_codecarbon_available, is_torch_available if is_torch_available(): import torch -if is_torch_distributed_available(): - import torch.distributed - if is_codecarbon_available(): from codecarbon import EmissionsTracker, OfflineEmissionsTracker from codecarbon.output import EmissionsData @@ -115,9 +112,7 @@ def __init__(self, backend: str, device: str, device_ids: Optional[Union[str, in self.device_ids = device_ids self.is_gpu = self.device == "cuda" - self.is_engine = self.backend in ["vllm", "tensorrt-llm"] self.is_pytorch_cuda = (self.backend, self.device) == ("pytorch", "cuda") - self.is_distributed = is_torch_distributed_available() and torch.distributed.is_initialized() LOGGER.info("\t+ Tracking CPU and RAM energy") @@ -188,9 +183,6 @@ def __init__(self, backend: str, device: str, device_ids: Optional[Union[str, in @contextmanager def track(self, file_prefix: str = "task"): - if not self.is_engine and self.is_distributed: - torch.distributed.barrier() - if self.is_pytorch_cuda: torch.cuda.synchronize() @@ -198,9 +190,6 @@ def track(self, file_prefix: str = "task"): yield - if not self.is_engine and self.is_distributed: - torch.distributed.barrier() - if self.is_pytorch_cuda: torch.cuda.synchronize() diff --git a/optimum_benchmark/trackers/latency.py b/optimum_benchmark/trackers/latency.py index abad0b87..343a04d7 100644 --- a/optimum_benchmark/trackers/latency.py +++ b/optimum_benchmark/trackers/latency.py @@ -4,11 +4,6 @@ from logging import getLogger from typing import List, Literal, Optional, Union -from ..import_utils import is_torch_distributed_available - -if is_torch_distributed_available(): - import torch.distributed - import numpy as np import torch from transformers import LogitsProcessor, TrainerCallback @@ -123,9 +118,7 @@ def __init__(self, device: str, backend: str): self.device = device self.backend = backend - self.is_engine = self.backend in ["vllm", "tensorrt-llm"] self.is_pytorch_cuda = (self.backend, self.device) == ("pytorch", "cuda") - self.is_distributed = is_torch_distributed_available() and torch.distributed.is_initialized() if self.is_pytorch_cuda: LOGGER.info("\t+ Tracking latency using Pytorch CUDA events") @@ -143,17 +136,11 @@ def reset(self): @contextmanager def track(self): - if not self.is_engine and self.is_distributed: - torch.distributed.barrier() - if self.is_pytorch_cuda: yield from self._pytorch_cuda_latency() else: yield from self._cpu_latency() - if not self.is_engine and self.is_distributed: - torch.distributed.barrier() - def _pytorch_cuda_latency(self): self.start_events.append(torch.cuda.Event(enable_timing=True)) self.start_events[-1].record() @@ -259,9 +246,7 @@ def __init__(self, device: str, backend: str): self.device = device self.backend = backend - self.is_engine = self.backend in ["vllm", "tensorrt-llm"] self.is_pytorch_cuda = (self.backend, self.device) == ("pytorch", "cuda") - self.is_distributed = is_torch_distributed_available() and torch.distributed.is_initialized() if self.is_pytorch_cuda: LOGGER.info("\t+ Tracking latency using Pytorch CUDA events") @@ -292,17 +277,12 @@ def track(self): self.prefilled = False self.per_token_events.append([]) - if not self.is_engine and self.is_distributed: - torch.distributed.barrier() - if self.is_pytorch_cuda: self.prefill_start_events.append(torch.cuda.Event(enable_timing=True)) self.prefill_start_events[-1].record() else: self.prefill_start_events.append(time.perf_counter()) - # this is where generate is called, - # and for each decoded token, we record an event yield if self.is_pytorch_cuda: @@ -311,9 +291,6 @@ def track(self): else: self.decode_end_events.append(time.perf_counter()) - if not self.is_engine and self.is_distributed: - torch.distributed.barrier() - self.prefilled = False def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor): diff --git a/optimum_benchmark/trackers/memory.py b/optimum_benchmark/trackers/memory.py index b6330051..ba515d51 100644 --- a/optimum_benchmark/trackers/memory.py +++ b/optimum_benchmark/trackers/memory.py @@ -11,15 +11,12 @@ is_pynvml_available, is_pyrsmi_available, is_torch_available, - is_torch_distributed_available, ) from ..system_utils import is_nvidia_system, is_rocm_system if is_rocm_system() and is_pyrsmi_available(): from pyrsmi import rocml -if is_torch_distributed_available(): - import torch.distributed if is_nvidia_system() and is_pynvml_available(): import pynvml @@ -102,9 +99,7 @@ def __init__(self, device: str, backend: str, device_ids: Optional[Union[str, in self.monitored_pid = os.getpid() self.is_gpu = device == "cuda" - self.is_engine = backend in ["vllm", "tensorrt-llm"] self.is_pytorch_cuda = (self.backend, self.device) == ("pytorch", "cuda") - self.is_distributed = is_torch_distributed_available() and torch.distributed.is_initialized() LOGGER.info(f"\t+ Tracking RAM memory of process [{self.monitored_pid}]") @@ -147,9 +142,6 @@ def reset(self): @contextmanager def track(self): - if not self.is_engine and self.is_distributed: - torch.distributed.barrier() - if self.is_pytorch_cuda: yield from self._cuda_pytorch_memory() elif self.is_gpu: @@ -157,21 +149,18 @@ def track(self): else: yield from self._cpu_memory() - if not self.is_engine and self.is_distributed: - torch.distributed.barrier() - def _cuda_pytorch_memory(self): self.max_allocated_memory = 0 self.max_reserved_memory = 0 - torch.cuda.synchronize() - for device in range(self.num_pytorch_devices): try: torch.cuda.reset_peak_memory_stats(device=device) except Exception as e: LOGGER.warning(f"\t\t+ Could not reset max memory stats for device {device}: {e}") + torch.cuda.synchronize() + yield from self._gpu_memory() torch.cuda.synchronize() diff --git a/tests/configs/_inference_.yaml b/tests/configs/_inference_.yaml index 29148001..82b2fcd6 100644 --- a/tests/configs/_inference_.yaml +++ b/tests/configs/_inference_.yaml @@ -11,10 +11,11 @@ scenario: input_shapes: batch_size: 1 + sequence_length: 16 generate_kwargs: - max_new_tokens: 5 - min_new_tokens: 5 + max_new_tokens: 16 + min_new_tokens: 16 call_kwargs: num_inference_steps: 2