We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
example of unsloth training.
from unsloth import FastLanguageModel from trl import SFTTrainer from transformers import TrainingArguments, DataCollatorForSeq2Seq from unsloth import is_bfloat16_supported from datasets import load_dataset import torch class AdaptiveTrainer(SFTTrainer): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.prev_eval_loss = float('inf') self.no_improve_count = 0 def evaluation_step(self, *args, **kwargs): output = super().evaluation_step(*args, **kwargs) current_eval_loss = output['eval_loss'] # Adaptive Learning Rate Adjustment if current_eval_loss > self.prev_eval_loss: new_lr = self.args.learning_rate * 0.9 # Reduce learning rate if loss increased print(f"Decreased learning rate to: {new_lr}") self.args.learning_rate = new_lr self.no_improve_count += 1 else: new_lr = self.args.learning_rate * 1.05 # Slightly increase if loss decreased print(f"Increased learning rate to: {new_lr}") self.args.learning_rate = new_lr self.no_improve_count = 0 self.prev_eval_loss = current_eval_loss return output def training_step(self, *args, **kwargs): # Adjust gradient clipping based on gradient norms if self.state.global_step > 0 and self.state.global_step % self.args.eval_steps == 0: current_grad_norm = torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.args.max_grad_norm) print(f"Adjusted gradient clipping to: {current_grad_norm}") return super().training_step(*args, **kwargs) def print_memory_stats(stage): gpu_stats = torch.cuda.get_device_properties(0) used_memory = round(torch.cuda.memory_reserved() / 1024 / 1024 / 1024, 3) max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3) print(f"Stage: {stage}") print(f"GPU: {gpu_stats.name}") print(f"Max memory: {max_memory} GB") print(f"Memory reserved: {used_memory} GB") max_seq_length = 2048 dtype = None load_in_4bit = True print("Loading model") model, tokenizer = FastLanguageModel.from_pretrained( model_name="unsloth/Mistral-Nemo-Instruct-2407-bnb-4bit", max_seq_length=max_seq_length, dtype=dtype, load_in_4bit=load_in_4bit, token="" ) print("Loading Laura") model = FastLanguageModel.get_peft_model( model, r=16, target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], lora_alpha=16, lora_dropout=0, bias="none", use_gradient_checkpointing="unsloth", random_state=3407, use_rslora=False, loftq_config=None, ) print("Loading dataset") custom_prompt = """Source: {} Repository: {} File: {} Label: {} Content: {} """ EOS_TOKEN = tokenizer.eos_token def formatting_prompts_func(examples): sources = examples["source"] repositories = examples["repository"] files = examples["file"] labels = examples["label"] contents = examples["content"] texts = [] for source, repository, file, label, content in zip(sources, repositories, files, labels, contents): text = f"Source: {source}\nRepository: {repository}\nFile: {file}\nLabel: {label}\nContent:\n```\n{content}\n```\n" texts.append(text) tokenized_texts = tokenizer(texts, truncation=True, padding=True, max_length=max_seq_length) tokenized_texts["labels"] = tokenized_texts["input_ids"].copy() # Add labels for loss calculation return tokenized_texts dataset_path = "autogen_python_dataset.json" dataset = load_dataset("json", data_files=dataset_path, split="train") dataset = dataset.map(formatting_prompts_func, batched=True, remove_columns=dataset.column_names) train_size = int(0.8 * len(dataset)) val_size = len(dataset) - train_size train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size]) data_collator = DataCollatorForSeq2Seq(tokenizer) print_memory_stats("After loading dataset") # Check memory after loading dataset trainer = AdaptiveTrainer( model=model, tokenizer=tokenizer, train_dataset=train_dataset, eval_dataset=val_dataset, data_collator=data_collator, max_seq_length=max_seq_length, dataset_num_proc=2, packing=False, dataset_text_field="input_ids", args=TrainingArguments( per_device_train_batch_size=2, gradient_accumulation_steps=4, warmup_steps=5, num_train_epochs=1, # We will loop over epochs dynamically learning_rate=2e-4, fp16=not is_bfloat16_supported(), bf16=is_bfloat16_supported(), logging_steps=1, evaluation_strategy="steps", eval_steps=10, save_steps=10, save_total_limit=2, optim="adamw_8bit", weight_decay=0.01, lr_scheduler_type="cosine", # Use cosine annealing scheduler gradient_checkpointing=True, seed=3407, output_dir="outputs", fp16_full_eval=True, per_device_eval_batch_size=2, eval_accumulation_steps=4, resume_from_checkpoint=True, # Resume from last checkpoint max_grad_norm=1.0, # Gradient clipping to stabilize training ), ) print_memory_stats("After initializing trainer") # Check memory after initializing trainer target_loss = 1.0 # Set your target loss here patience = 3 # Number of epochs to wait for improvement before stopping while True: trainer_stats = trainer.train() if trainer.state.global_step >= trainer.args.num_train_epochs * len(train_dataset) / trainer.args.per_device_train_batch_size: break if trainer.prev_eval_loss <= target_loss: print(f"Target loss of {target_loss} achieved.") break if trainer.no_improve_count >= patience: print(f"No improvement for {patience} epochs, stopping training.") break used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3) gpu_stats = torch.cuda.get_device_properties(0) max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3) used_percentage = round(used_memory / max_memory * 100, 3) print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.") print(f"{round(trainer_stats.metrics['train_runtime'] / 60, 2)} minutes used for training.") print(f"Peak reserved memory = {used_memory} GB.") print(f"Peak reserved memory % of max memory = {used_percentage} %.") print_memory_stats("After training") # Check memory after training model.save_pretrained("lora_model") tokenizer.save_pretrained("lora_model") model.save_pretrained_gguf("model", tokenizer, quantization_method="q4_k_m")```
The text was updated successfully, but these errors were encountered:
No branches or pull requests
example of unsloth training.
The text was updated successfully, but these errors were encountered: