Commit 433c4a4 1 parent ef533d2 commit 433c4a4 Copy full SHA for 433c4a4
File tree 2 files changed +1
-8
lines changed
2 files changed +1
-8
lines changed Original file line number Diff line number Diff line change @@ -1024,13 +1024,6 @@ def initialize_model_parallel(
1024
1024
backend = backend or torch .distributed .get_backend (
1025
1025
get_world_group ().device_group )
1026
1026
1027
- if (world_size
1028
- != tensor_model_parallel_size * pipeline_model_parallel_size ):
1029
- raise RuntimeError (
1030
- f"world_size ({ world_size } ) is not equal to "
1031
- f"tensor_model_parallel_size ({ tensor_model_parallel_size } ) x "
1032
- f"pipeline_model_parallel_size ({ pipeline_model_parallel_size } )" )
1033
-
1034
1027
# Build the tensor model-parallel groups.
1035
1028
num_tensor_model_parallel_groups : int = (world_size //
1036
1029
tensor_model_parallel_size )
Original file line number Diff line number Diff line change @@ -101,7 +101,7 @@ def _init_executor(self) -> None:
101
101
# - MASTER_PORT
102
102
distributed_init_method = "env://"
103
103
rank = int (os .environ ["RANK" ])
104
- local_rank = rank
104
+ local_rank = int ( os . environ [ "LOCAL_RANK" ])
105
105
is_driver_worker = True
106
106
kwargs = dict (
107
107
vllm_config = self .vllm_config ,
You can’t perform that action at this time.
0 commit comments