From a8acc048836b277a215d1eaebd5ff796700b86d6 Mon Sep 17 00:00:00 2001 From: bachvudinh Date: Fri, 19 Jul 2024 11:21:28 +0700 Subject: [PATCH 1/2] add gpu utils --- tests/test_case.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_case.py b/tests/test_case.py index 6743849..7185dc0 100644 --- a/tests/test_case.py +++ b/tests/test_case.py @@ -115,7 +115,7 @@ def setUpClass(cls): print(f"Found {model_save_dir}. Skipping download.") # Model loading using vllm cls.tokenizer = AutoTokenizer.from_pretrained(model_save_dir) - cls.llm = LLM(model_save_dir, tokenizer=model_save_dir) + cls.llm = LLM(model_save_dir, tokenizer=model_save_dir, gpu_memory_utilization=0.3) # Load dataset data_save_dir = os.path.join(args.cache_dir, args.data_dir) From f729efed9218604361ed516605faabb3575d4c0b Mon Sep 17 00:00:00 2001 From: bachvudinh Date: Fri, 19 Jul 2024 12:06:11 +0700 Subject: [PATCH 2/2] decrese offload memory --- tests/test_case.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_case.py b/tests/test_case.py index 7185dc0..39e8800 100644 --- a/tests/test_case.py +++ b/tests/test_case.py @@ -115,7 +115,7 @@ def setUpClass(cls): print(f"Found {model_save_dir}. Skipping download.") # Model loading using vllm cls.tokenizer = AutoTokenizer.from_pretrained(model_save_dir) - cls.llm = LLM(model_save_dir, tokenizer=model_save_dir, gpu_memory_utilization=0.3) + cls.llm = LLM(model_save_dir, tokenizer=model_save_dir, gpu_memory_utilization=0.6) # Load dataset data_save_dir = os.path.join(args.cache_dir, args.data_dir)