From 88d78a454bc7b76166970ce4e00a59798b6b5871 Mon Sep 17 00:00:00 2001 From: Richard Gong Date: Thu, 15 Feb 2024 23:52:28 +0000 Subject: [PATCH] Add inference to CI --- .github/workflows/ci-cd.yml | 4 + ci/check_inference.py | 21 +++ ci/check_loss.py | 5 +- ci/prep_for_ci.py | 9 +- config/mixtral.yml | 4 +- config/mixtral_out_of_box.yml | 104 +++++++++++++ mixtral_error.txt | 276 ++++++++++++++++++++++++++++++++++ src/inference.py | 20 ++- 8 files changed, 430 insertions(+), 13 deletions(-) create mode 100644 ci/check_inference.py create mode 100644 config/mixtral_out_of_box.yml create mode 100644 mixtral_error.txt diff --git a/.github/workflows/ci-cd.yml b/.github/workflows/ci-cd.yml index 129b996..a02b56a 100644 --- a/.github/workflows/ci-cd.yml +++ b/.github/workflows/ci-cd.yml @@ -43,3 +43,7 @@ jobs: - name: Check training results run: | python ci/check_loss.py + + - name: Check inference results + run: | + python ci/check_inference.py diff --git a/ci/check_inference.py b/ci/check_inference.py new file mode 100644 index 0000000..a2a925b --- /dev/null +++ b/ci/check_inference.py @@ -0,0 +1,21 @@ +import subprocess + + +if __name__ == "__main__": + + with open(".last_run_name", "r") as f: + run_name = f.read().strip() + + prompt = """[INST] Using the schema context below, generate a SQL query that answers the question. +CREATE TABLE head (age INTEGER) +How many heads of the departments are older than 56 ? [/INST] """ + + p = subprocess.Popen(["modal", "run", "src.inference", "--run-folder", f"/runs/{run_name}", "--prompt", prompt], stdout=subprocess.PIPE) + output = "" + + for line in iter(p.stdout.readline, b''): + output += line.decode() + print(line.decode()) + + print("Asserting that the output contains the expected SQL query") + assert "[SQL] SELECT" in output and "[/SQL]" in output diff --git a/ci/check_loss.py b/ci/check_loss.py index 848b1f3..4013056 100644 --- a/ci/check_loss.py +++ b/ci/check_loss.py @@ -27,5 +27,8 @@ train_loss = float(results["TrainingLoss"].iloc[-1]) val_loss = float(results["ValidationLoss"].iloc[-1]) + # Arbitrary threshold + max_loss = 10 if b"Mixtral" in contents else 0.25 + print(f"Loss: {train_loss:.2f} (training), {val_loss:.2f} (validation)") - sys.exit(val_loss > 0.25) # Arbitrary threshold + sys.exit(val_loss > max_loss) diff --git a/ci/prep_for_ci.py b/ci/prep_for_ci.py index 06cb132..fdb3bad 100644 --- a/ci/prep_for_ci.py +++ b/ci/prep_for_ci.py @@ -7,10 +7,15 @@ @click.option("--data") def main(config: str, data: str): """Set the config to train for only one epoch and truncate the dataset.""" - train_set_size = 1000 - val_set_size = 64 with open(config) as f: cfg = yaml.safe_load(f.read()) + + if cfg["sample_packing"]: + train_set_size = 2048 + else: + train_set_size = 1024 + val_set_size = 64 + cfg["val_set_size"] = val_set_size cfg["num_epochs"] = 1 cfg.pop("eval_steps", None) # Evaluate once at the end of the epoch diff --git a/config/mixtral.yml b/config/mixtral.yml index 301842d..2bd6b8e 100644 --- a/config/mixtral.yml +++ b/config/mixtral.yml @@ -1,4 +1,4 @@ -base_model: mistralai/Mixtral-8x7B-v0.1 +base_model: mistralai/Mixtral-8x7B-Instruct-v0.1 model_type: AutoModelForCausalLM tokenizer_type: LlamaTokenizer trust_remote_code: true @@ -69,7 +69,7 @@ wandb_name: wandb_log_model: gradient_accumulation_steps: 1 -micro_batch_size: 16 +micro_batch_size: 8 num_epochs: 1 optimizer: adamw_bnb_8bit lr_scheduler: cosine diff --git a/config/mixtral_out_of_box.yml b/config/mixtral_out_of_box.yml new file mode 100644 index 0000000..888653f --- /dev/null +++ b/config/mixtral_out_of_box.yml @@ -0,0 +1,104 @@ +base_model: mistralai/Mixtral-8x7B-v0.1 +model_type: AutoModelForCausalLM +tokenizer_type: LlamaTokenizer +trust_remote_code: true + +load_in_8bit: false +load_in_4bit: true +strict: false + +datasets: + # This will be the path used for the data when it is saved to the Volume in the cloud. + - path: data.jsonl + ds_type: json + type: + # JSONL file contains question, context, answer fields per line. + # This gets mapped to instruction, input, output axolotl tags. + field_instruction: question + field_input: context + field_output: answer + # Format is used by axolotl to generate the prompt. + format: |- + [INST] Using the schema context below, generate a SQL query that answers the question. + {input} + {instruction} [/INST] + +dataset_prepared_path: last_run_prepared +val_set_size: 0.0 +output_dir: ./qlora-out + +## You can optionally freeze the entire model and unfreeze a subset of parameters +unfrozen_parameters: +# - lm_head.* +# - model.embed_tokens.* +# - model.layers.2[0-9]+.block_sparse_moe.gate.* +# - model.layers.2[0-9]+.block_sparse_moe.experts.* +# - model.layers.3[0-9]+.block_sparse_moe.gate.* +# - model.layers.3[0-9]+.block_sparse_moe.experts.* + +model_config: + output_router_logits: true + +adapter: qlora +lora_model_dir: + +sequence_len: 4096 +sample_packing: true +pad_to_sequence_len: true + +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_linear: true +lora_fan_in_fan_out: +#lora_target_modules: +# - gate +# - q_proj +# - k_proj +# - v_proj +# - o_proj +# - w1 +# - w2 +# - w3 + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 2 +micro_batch_size: 1 +num_epochs: 1 +optimizer: adamw_bnb_8bit +lr_scheduler: cosine +learning_rate: 0.0002 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: false + +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true + +loss_watchdog_threshold: 5.0 +loss_watchdog_patience: 3 + +warmup_steps: 10 +evals_per_epoch: 4 +eval_table_size: +eval_max_new_tokens: 128 +saves_per_epoch: 1 +debug: +deepspeed: /root/axolotl/deepspeed_configs/zero2.json +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: diff --git a/mixtral_error.txt b/mixtral_error.txt new file mode 100644 index 0000000..72b4106 --- /dev/null +++ b/mixtral_error.txt @@ -0,0 +1,276 @@ + +Starting training run in /runs/axo-2024-02-14-16-25-53-8ebe. +Using 2 NVIDIA H100 80GB HBM3 GPU(s). +The following values were not passed to `accelerate launch` and had defaults used instead: + `--num_processes` was set to a value of `2` + More than one GPU was found, enabling multi-GPU training. + If this was unintended please pass in `--num_processes=1`. + `--num_machines` was set to a value of `1` + `--mixed_precision` was set to a value of `'no'` + `--dynamo_backend` was set to a value of `'no'` +To avoid this warning pass in values for each of the problematic parameters or run `accelerate config`. +/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/bitsandbytes/cuda_setup/main.py:107: UserWarning: + +================================================================================ +WARNING: Manual override via BNB_CUDA_VERSION env variable detected! +BNB_CUDA_VERSION=XXX can be used to load a bitsandbytes version that is different from the PyTorch CUDA version. +If this was unintended set the BNB_CUDA_VERSION variable to an empty string: export BNB_CUDA_VERSION= +If you use the manual override make sure the right libcudart.so is in your LD_LIBRARY_PATH +For example by adding the following to your .bashrc: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH::58] [PID:34] PyTorch version 2.1.2+cu121 available. +[2024-02-14 16:26:13,926] [INFO] [datasets.:58] [PID:33] PyTorch version 2.1.2+cu121 available. +[2024-02-14 16:26:15,357] [INFO] [real_accelerator.py:191:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2024-02-14 16:26:15,357] [INFO] [real_accelerator.py:191:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2024-02-14 16:26:17,670] [WARNING] [axolotl.validate_config:309] [PID:34] [RANK:1] `trust_remote_code` is set to true. Please make sure that you reviewed the remote code/model. +[2024-02-14 16:26:17,670] [WARNING] [axolotl.validate_config:309] [PID:33] [RANK:0] `trust_remote_code` is set to true. Please make sure that you reviewed the remote code/model. +[2024-02-14 16:26:17,671] [WARNING] [axolotl.validate_config:547] [PID:33] [RANK:0] conflicting optimizer: adamw_bnb_8bit used alongside deepspeed optimizer. +[2024-02-14 16:26:17,671] [WARNING] [axolotl.validate_config:547] [PID:34] [RANK:1] conflicting optimizer: adamw_bnb_8bit used alongside deepspeed optimizer. +[2024-02-14 16:26:17,671] [DEBUG] [axolotl.normalize_config:74] [PID:33] [RANK:0] bf16 support detected, enabling for this configuration. +[2024-02-14 16:26:17,671] [DEBUG] [axolotl.normalize_config:74] [PID:34] [RANK:1] bf16 support detected, enabling for this configuration. +[2024-02-14 16:26:17,754] [INFO] [axolotl.normalize_config:176] [PID:34] [RANK:1] GPU memory usage baseline: 0.000GB (+0.546GB misc) +[2024-02-14 16:26:17,965] [INFO] [axolotl.normalize_config:176] [PID:33] [RANK:0] GPU memory usage baseline: 0.000GB (+0.546GB misc) +[2024-02-14 16:26:17,976] [WARNING] [axolotl.scripts.check_user_token:433] [PID:34] [RANK:1] Error verifying HuggingFace token. Remember to log in using `huggingface-cli login` and get your access token from https://huggingface.co/settings/tokens if you want to use gated models or datasets. + dP dP dP + 88 88 88 + .d8888b. dP. .dP .d8888b. 88 .d8888b. d8888P 88 + 88' `88 `8bd8' 88' `88 88 88' `88 88 88 + 88. .88 .d88b. 88. .88 88 88. .88 88 88 + `88888P8 dP' `dP `88888P' dP `88888P' dP dP + + + +[2024-02-14 16:26:17,999] [WARNING] [axolotl.scripts.check_user_token:433] [PID:33] [RANK:0] Error verifying HuggingFace token. Remember to log in using `huggingface-cli login` and get your access token from https://huggingface.co/settings/tokens if you want to use gated models or datasets. +[2024-02-14 16:26:18,271] [DEBUG] [axolotl.load_tokenizer:245] [PID:34] [RANK:1] EOS: 2 / +[2024-02-14 16:26:18,271] [DEBUG] [axolotl.load_tokenizer:246] [PID:34] [RANK:1] BOS: 1 / +[2024-02-14 16:26:18,271] [DEBUG] [axolotl.load_tokenizer:247] [PID:34] [RANK:1] PAD: 2 / +[2024-02-14 16:26:18,271] [DEBUG] [axolotl.load_tokenizer:248] [PID:34] [RANK:1] UNK: 0 / +[2024-02-14 16:26:18,271] [INFO] [axolotl.load_tokenizer:259] [PID:34] [RANK:1] No Chat template selected. Consider adding a chat template for easier inference. +[2024-02-14 16:26:18,297] [DEBUG] [axolotl.load_tokenizer:245] [PID:33] [RANK:0] EOS: 2 / +[2024-02-14 16:26:18,297] [DEBUG] [axolotl.load_tokenizer:246] [PID:33] [RANK:0] BOS: 1 / +[2024-02-14 16:26:18,297] [DEBUG] [axolotl.load_tokenizer:247] [PID:33] [RANK:0] PAD: 2 / +[2024-02-14 16:26:18,297] [DEBUG] [axolotl.load_tokenizer:248] [PID:33] [RANK:0] UNK: 0 / +[2024-02-14 16:26:18,297] [INFO] [axolotl.load_tokenizer:259] [PID:33] [RANK:0] No Chat template selected. Consider adding a chat template for easier inference. +[2024-02-14 16:26:18,297] [INFO] [axolotl.load_tokenized_prepared_datasets:191] [PID:33] [RANK:0] Unable to find prepared dataset in last_run_prepared/f296ca80661a80bf05a90dcbd89b0525 +[2024-02-14 16:26:18,297] [INFO] [axolotl.load_tokenized_prepared_datasets:192] [PID:33] [RANK:0] Loading raw datasets... +[2024-02-14 16:26:18,297] [WARNING] [axolotl.load_tokenized_prepared_datasets:194] [PID:33] [RANK:0] Processing datasets during training can lead to VRAM instability. Please pre-process your dataset. +[2024-02-14 16:26:18,297] [INFO] [axolotl.load_tokenized_prepared_datasets:201] [PID:33] [RANK:0] No seed provided, using default seed of 42 +Generating train split: 0 examples [00:00, ? examples/s]Generating train split: 4000 examples [00:00, 177704.04 examples/s] +Tokenizing Prompts (num_proc=64): 97%|█████████▋| 3899/4000 [00:01<00:00, 4913.35 examples/s]Tokenizing Prompts (num_proc=64): 100%|██████████| 4000/4000 [00:01<00:00, 2704.15 examples/s] +[2024-02-14 16:26:20,939] [INFO] [axolotl.load_tokenized_prepared_datasets:414] [PID:33] [RANK:0] merging datasets +Dropping Long Sequences (num_proc=208): 79%|███████▉ | 3164/4000 [00:02<00:00, 3194.24 examples/s]Dropping Long Sequences (num_proc=208): 100%|██████████| 4000/4000 [00:02<00:00, 1871.49 examples/s] +[2024-02-14 16:26:28,469] [INFO] [axolotl.load_tokenized_prepared_datasets:424] [PID:33] [RANK:0] Saving merged prepared dataset to disk... last_run_prepared/f296ca80661a80bf05a90dcbd89b0525 +[2024-02-14 16:26:28,469] [INFO] [axolotl.load_tokenized_prepared_datasets:191] [PID:34] [RANK:1] Unable to find prepared dataset in last_run_prepared/f296ca80661a80bf05a90dcbd89b0525 +[2024-02-14 16:26:28,469] [INFO] [axolotl.load_tokenized_prepared_datasets:192] [PID:34] [RANK:1] Loading raw datasets... +[2024-02-14 16:26:28,469] [WARNING] [axolotl.load_tokenized_prepared_datasets:194] [PID:34] [RANK:1] Processing datasets during training can lead to VRAM instability. Please pre-process your dataset. +[2024-02-14 16:26:28,469] [INFO] [axolotl.load_tokenized_prepared_datasets:201] [PID:34] [RANK:1] No seed provided, using default seed of 42 +Saving the dataset (1/1 shards): 100%|██████████| 4000/4000 [00:00<00:00, 74271.95 examples/s]Saving the dataset (1/1 shards): 100%|██████████| 4000/4000 [00:00<00:00, 73209.56 examples/s] +[2024-02-14 16:26:28,650] [INFO] [axolotl.load_tokenized_prepared_datasets:414] [PID:34] [RANK:1] merging datasets +[2024-02-14 16:26:28,662] [DEBUG] [axolotl.log:61] [PID:33] [RANK:0] total_num_tokens: 456586 +[2024-02-14 16:26:28,679] [DEBUG] [axolotl.log:61] [PID:33] [RANK:0] `total_supervised_tokens: 184809` +[2024-02-14 16:26:28,679] [DEBUG] [axolotl.log:61] [PID:33] [RANK:0] total_num_steps: 975 +[2024-02-14 16:26:28,686] [DEBUG] [axolotl.train.log:61] [PID:33] [RANK:0] loading tokenizer... mistralai/Mixtral-8x7B-v0.1 +[2024-02-14 16:26:28,755] [DEBUG] [axolotl.load_tokenizer:245] [PID:33] [RANK:0] EOS: 2 / +[2024-02-14 16:26:28,756] [DEBUG] [axolotl.load_tokenizer:246] [PID:33] [RANK:0] BOS: 1 / +[2024-02-14 16:26:28,756] [DEBUG] [axolotl.load_tokenizer:247] [PID:33] [RANK:0] PAD: 2 / +[2024-02-14 16:26:28,756] [DEBUG] [axolotl.load_tokenizer:248] [PID:33] [RANK:0] UNK: 0 / +[2024-02-14 16:26:28,756] [INFO] [axolotl.load_tokenizer:259] [PID:33] [RANK:0] No Chat template selected. Consider adding a chat template for easier inference. +[2024-02-14 16:26:28,756] [DEBUG] [axolotl.train.log:61] [PID:33] [RANK:0] loading model and peft_config... +[2024-02-14 16:26:29,006] [DEBUG] [axolotl.load_tokenizer:245] [PID:34] [RANK:1] EOS: 2 / +[2024-02-14 16:26:29,006] [DEBUG] [axolotl.load_tokenizer:246] [PID:34] [RANK:1] BOS: 1 / +[2024-02-14 16:26:29,006] [DEBUG] [axolotl.load_tokenizer:247] [PID:34] [RANK:1] PAD: 2 / +[2024-02-14 16:26:29,006] [DEBUG] [axolotl.load_tokenizer:248] [PID:34] [RANK:1] UNK: 0 / +[2024-02-14 16:26:29,006] [INFO] [axolotl.load_tokenizer:259] [PID:34] [RANK:1] No Chat template selected. Consider adding a chat template for easier inference. +Loading checkpoint shards: 0%| | 0/19 [00:00", line 198, in _run_module_as_main + File "", line 88, in _run_code + File "/workspace/axolotl/src/axolotl/cli/train.py", line 59, in +[2024-02-14 16:26:30,495] [ERROR] [axolotl.load_model:612] [PID:34] [RANK:1] Trying to set a tensor of shape torch.Size([32000, 4096]) in "weight" (which has shape torch.Size([0])), this look incorrect. +Traceback (most recent call last): + File "/workspace/axolotl/src/axolotl/utils/models.py", line 605, in load_model + model = AutoModelForCausalLM.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py", line 567, in from_pretrained + return model_class.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/transformers/modeling_utils.py", line 3504, in from_pretrained + ) = cls._load_pretrained_model( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/transformers/modeling_utils.py", line 3924, in _load_pretrained_model + new_error_msgs, offload_index, state_dict_index = _load_state_dict_into_meta_model( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/transformers/modeling_utils.py", line 805, in _load_state_dict_into_meta_model + set_module_tensor_to_device(model, param_name, param_device, **set_module_kwargs) + File "/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/accelerate/utils/modeling.py", line 310, in set_module_tensor_to_device + raise ValueError( +ValueError: Trying to set a tensor of shape torch.Size([32000, 4096]) in "weight" (which has shape torch.Size([0])), this look incorrect. +[2024-02-14 16:26:30,495] [ERROR] [axolotl.load_model:612] [PID:33] [RANK:0] Trying to set a tensor of shape torch.Size([32000, 4096]) in "weight" (which has shape torch.Size([0])), this look incorrect. +Traceback (most recent call last): + File "/workspace/axolotl/src/axolotl/utils/models.py", line 605, in load_model + model = AutoModelForCausalLM.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py", line 567, in from_pretrained + return model_class.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/transformers/modeling_utils.py", line 3504, in from_pretrained + ) = cls._load_pretrained_model( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/transformers/modeling_utils.py", line 3924, in _load_pretrained_model + new_error_msgs, offload_index, state_dict_index = _load_state_dict_into_meta_model( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/transformers/modeling_utils.py", line 805, in _load_state_dict_into_meta_model + set_module_tensor_to_device(model, param_name, param_device, **set_module_kwargs) + File "/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/accelerate/utils/modeling.py", line 310, in set_module_tensor_to_device + raise ValueError( +ValueError: Trying to set a tensor of shape torch.Size([32000, 4096]) in "weight" (which has shape torch.Size([0])), this look incorrect. +Traceback (most recent call last): + File "", line 198, in _run_module_as_main + File "", line 88, in _run_code + File "/workspace/axolotl/src/axolotl/cli/train.py", line 59, in + fire.Fire(do_cli)fire.Fire(do_cli) + + File "/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/fire/core.py", line 141, in Fire + File "/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/fire/core.py", line 141, in Fire + component_trace = _Fire(component, args, parsed_flag_args, context, name)component_trace = _Fire(component, args, parsed_flag_args, context, name) + + ^ ^ ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^ File "/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/fire/core.py", line 475, in _Fire +^ + File "/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/fire/core.py", line 475, in _Fire + component, remaining_args = _CallAndUpdateTrace( + component, remaining_args = _CallAndUpdateTrace( + ^ ^ ^ ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^ File "/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/fire/core.py", line 691, in _CallAndUpdateTrace +^^ + File "/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/fire/core.py", line 691, in _CallAndUpdateTrace + component = fn(*varargs, **kwargs) + component = fn(*varargs, **kwargs) + ^ ^ ^ ^ ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^ File "/workspace/axolotl/src/axolotl/cli/train.py", line 35, in do_cli +^^^ + File "/workspace/axolotl/src/axolotl/cli/train.py", line 35, in do_cli + return do_train(parsed_cfg, parsed_cli_args) + return do_train(parsed_cfg, parsed_cli_args) + ^ ^^ ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^ File "/workspace/axolotl/src/axolotl/cli/train.py", line 55, in do_train +^ + File "/workspace/axolotl/src/axolotl/cli/train.py", line 55, in do_train + return train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + return train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + ^^ ^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^ File "/workspace/axolotl/src/axolotl/train.py", line 84, in train +^^^ + File "/workspace/axolotl/src/axolotl/train.py", line 84, in train + model, peft_config = load_model(cfg, tokenizer, inference=cli_args.inference)model, peft_config = load_model(cfg, tokenizer, inference=cli_args.inference) + + ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^ + File "/workspace/axolotl/src/axolotl/utils/models.py", line 613, in load_model + File "/workspace/axolotl/src/axolotl/utils/models.py", line 613, in load_model + raise err +raise err + File "/workspace/axolotl/src/axolotl/utils/models.py", line 605, in load_model + File "/workspace/axolotl/src/axolotl/utils/models.py", line 605, in load_model + model = AutoModelForCausalLM.from_pretrained( +model = AutoModelForCausalLM.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^ + File "/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py", line 567, in from_pretrained + File "/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py", line 567, in from_pretrained + return model_class.from_pretrained(return model_class.from_pretrained( + + ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^ + File "/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/transformers/modeling_utils.py", line 3504, in from_pretrained + File "/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/transformers/modeling_utils.py", line 3504, in from_pretrained + ) = cls._load_pretrained_model() = cls._load_pretrained_model( + + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^ + File "/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/transformers/modeling_utils.py", line 3924, in _load_pretrained_model + File "/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/transformers/modeling_utils.py", line 3924, in _load_pretrained_model + new_error_msgs, offload_index, state_dict_index = _load_state_dict_into_meta_model( + new_error_msgs, offload_index, state_dict_index = _load_state_dict_into_meta_model( + ^ ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^ File "/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/transformers/modeling_utils.py", line 805, in _load_state_dict_into_meta_model + + File "/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/transformers/modeling_utils.py", line 805, in _load_state_dict_into_meta_model + set_module_tensor_to_device(model, param_name, param_device, **set_module_kwargs) +set_module_tensor_to_device(model, param_name, param_device, **set_module_kwargs) + File "/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/accelerate/utils/modeling.py", line 310, in set_module_tensor_to_device + File "/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/accelerate/utils/modeling.py", line 310, in set_module_tensor_to_device + raise ValueError( + ValueErrorraise ValueError( +: ValueErrorTrying to set a tensor of shape torch.Size([32000, 4096]) in "weight" (which has shape torch.Size([0])), this look incorrect. +: Trying to set a tensor of shape torch.Size([32000, 4096]) in "weight" (which has shape torch.Size([0])), this look incorrect. +[2024-02-14 16:26:33,404] torch.distributed.elastic.multiprocessing.api: [ERROR] failed (exitcode: 1) local_rank: 0 (pid: 33) of binary: /root/miniconda3/envs/py3.11/bin/python3 +Traceback (most recent call last): + File "/root/miniconda3/envs/py3.11/bin/accelerate", line 8, in + sys.exit(main()) + ^^^^^^ + File "/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/accelerate/commands/accelerate_cli.py", line 47, in main + args.func(args) + File "/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/accelerate/commands/launch.py", line 1014, in launch_command + multi_gpu_launcher(args) + File "/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/accelerate/commands/launch.py", line 672, in multi_gpu_launcher + distrib_run.run(args) + File "/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/torch/distributed/run.py", line 797, in run + elastic_launch( + File "/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/torch/distributed/launcher/api.py", line 134, in __call__ + return launch_agent(self._config, self._entrypoint, list(args)) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/torch/distributed/launcher/api.py", line 264, in launch_agent + raise ChildFailedError( +torch.distributed.elastic.multiprocessing.errors.ChildFailedError: +============================================================ +axolotl.cli.train FAILED +------------------------------------------------------------ +Failures: +[1]: + time : 2024-02-14_16:26:33 + host : localhost + rank : 1 (local_rank: 1) + exitcode : 1 (pid: 34) + error_file: + traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html +------------------------------------------------------------ +Root Cause (first observed failure): +[0]: + time : 2024-02-14_16:26:33 + host : localhost + rank : 0 (local_rank: 0) + exitcode : 1 (pid: 33) + error_file: + traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html +============================================================ +Traceback (most recent call last): + File "/pkg/modal/_container_entrypoint.py", line 397, in handle_input_exception + yield + File "/pkg/modal/_container_entrypoint.py", line 535, in run_inputs + res = imp_fun.fun(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/root/src/train.py", line 62, in train + run_cmd(TRAIN_CMD, run_folder) + File "/root/src/train.py", line 42, in run_cmd + exit(exit_code) + File "", line 26, in __call__ +SystemExit: 1 diff --git a/src/inference.py b/src/inference.py index b4f0481..bd05c6d 100644 --- a/src/inference.py +++ b/src/inference.py @@ -43,7 +43,7 @@ async def completion(self, input: str): t0 = time.time() index, tokens = 0, 0 async for request_output in results_generator: - if "\ufffd" == request_output.outputs[0].text[-1]: + if request_output.outputs[0].text and "\ufffd" == request_output.outputs[0].text[-1]: continue yield request_output.outputs[0].text[index:] index = len(request_output.outputs[0].text) @@ -58,10 +58,14 @@ async def completion(self, input: str): @stub.local_entrypoint() -def inference_main(run_folder: str): - text = input( - "Enter a prompt (including the prompt template, e.g. [INST] ... [/INST]):\n" - ) - print("Loading model ...") - for chunk in Inference(f"{run_folder}/lora-out/merged").completion.remote_gen(text): - print(chunk, end="") +def inference_main(run_folder: str, prompt: str = ""): + if prompt: + for chunk in Inference(f"{run_folder}/lora-out/merged").completion.remote_gen(prompt): + print(chunk, end="") + else: + prompt = input( + "Enter a prompt (including the prompt template, e.g. [INST] ... [/INST]):\n" + ) + print("Loading model ...") + for chunk in Inference(f"{run_folder}/lora-out/merged").completion.remote_gen(prompt): + print(chunk, end="")