diff --git a/src/axolotl/core/trainer_builder.py b/src/axolotl/core/trainer_builder.py index f4a2f90019..c656392de9 100755 --- a/src/axolotl/core/trainer_builder.py +++ b/src/axolotl/core/trainer_builder.py @@ -1301,7 +1301,7 @@ def build(self, total_num_steps): else max(min(int(0.005 * total_num_steps), 10), 1) ) - training_arguments_kwargs = {} + training_arguments_kwargs = {"average_tokens_across_devices": True} if self.cfg.bf16 == "full": training_arguments_kwargs["bf16_full_eval"] = True else: @@ -1808,7 +1808,7 @@ def get_post_trainer_create_callbacks(self, trainer): return callbacks def build_training_arguments(self, total_num_steps): - training_args_kwargs = {} + training_args_kwargs = {"average_tokens_across_devices": True} for arg in [ "adam_beta1", "adam_beta2",