Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Slurm training #8

Closed
wants to merge 16 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,9 @@ dmypy.json

# Pyre type checker
.pyre/

# Exclude .DS_Store files everywhere
*.DS_Store

# Exclude node_modules
.vscode/
1 change: 1 addition & 0 deletions checkthat/task1/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""init file for main module."""
37 changes: 37 additions & 0 deletions checkthat/task1/auth.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/bin/bash
#SBATCH --gres=gpu:0
#SBATCH --partition=gpuA100
#SBATCH --time=1:00:00
#SBATCH --job-name=setup_authenticator.sh
#SBATCH --output=hf_test.out
# Load necessary modules, if required
# module load python/3.9 # Adjust this according to your environment

# Activate your Python environment
# source ~/bhome/env/checkthat2024_env/bin/activate

# Explicitly specify the path to the correct Python executable
# PYTHON="~/bhome/env/checkthat2024_env/bin/"
PYTHON="~/.conda/envs/CLEF_checkthat2024/bin"
uenv miniconda3-py39

# Activate the Conda environment
#conda activate ~/bhome/env/checkthat2024_env
conda activate ~/.conda/envs/CLEF_checkthat2024

export HF_HOME=~/bhome/clef2024-checkthat/checkthat/task1
# Create necessary directories
mkdir -p $HF_HOME $WANDB_CACHE_DIR

# Store the Hugging Face token
echo 'KEY' > $HF_HOME/token
chmod 600 $HF_HOME/token

# Log in to wandb
export WANDB_API_KEY='KEY'
export WANDB_CACHE_DIR=~/bhome/clef2024-checkthat/checkthat/task1
wandb login KEY

# Test the Hugging Face API with a Python script
#$PYTHON test_start.py
python -u test_hf_login.py
21 changes: 21 additions & 0 deletions checkthat/task1/conda_setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/bash
#SBATCH --gres=gpu:0
#SBATCH --partition=gpuA100
#SBATCH --time=1:00:00
#SBATCH --job-name=conda_setup
#SBATCH --output=conda_setup.out

module load cuda/12.2.0 cudnn/8.8.0 # Load CUDA and cuDNN modules

# Activate the user environment (uenv)
uenv verbose cuda-12.2.0 cudnn-12.x-8.8.0
uenv miniconda3-py39

# Create and activate the Conda environment
conda create -n CLEF_checkthat2024 -c pytorch pytorch torchvision torchaudio pytorch-cuda=12.1 -c nvidia -y
conda activate CLEF_checkthat2024

# Install Python packages
pip install torch torchvision torchaudio
pip install transformers[torch]
pip install -r requirements.txt
116 changes: 116 additions & 0 deletions checkthat/task1/depricated_scripts/main_test_all.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import os
import torch
import pandas as pd
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from models.custom_model import CustomModel
from tokenization.tokenizer import TextDataset
from metrics.compute_metrics import compute_metrics
import wandb
from tokenization.tokenizer import TextDataset

def find_latest_checkpoint(model_dir):
"""Find the latest checkpoint in the given directory."""
checkpoint_dirs = [os.path.join(model_dir, d) for d in os.listdir(model_dir) if os.path.isdir(os.path.join(model_dir, d)) and 'checkpoint' in d]
if not checkpoint_dirs:
raise ValueError("No checkpoint directories found in the given model directory.")
latest_checkpoint = sorted(checkpoint_dirs, key=lambda x: int(x.split('-')[-1]))[-1]
return latest_checkpoint

def find_models_with_checkpoints(base_dir):
"""Find models and their latest checkpoints in a structured directory."""
model_info = []
for model_name in os.listdir(base_dir):
model_path = os.path.join(base_dir, model_name)
if not os.path.isdir(model_path):
continue
try:
latest_checkpoint = find_latest_checkpoint(model_path)
model_info.append((model_name, latest_checkpoint))
print(f"Model: {model_name}, Latest Checkpoint: {latest_checkpoint}")
except Exception as e:
print(f"Failed to find checkpoint for model {model_name}: {str(e)}")
return model_info

def run_prediction(model_name, dataset_list, tokenizer, model_path, has_labels: bool):
"""Run prediction on the dataset, compute metrics if labels are present, and write results to a .tsv file."""
device = 'cuda'
label_map = {0: 'no', 1: 'yes'}

# Detect language from model name and select dataset
lang = model_name.split('_')[-2] # Assumes format like 'modelname_lang_'
dataset = dataset_list[lang]

# Initialize Weights & Biases
run_name = f"TEST__{model_path}"
wandb.init(project="Clef2024", entity="aarnes", name=run_name)

# Load the model from the checkpoint
model = CustomModel.from_pretrained(model_path)
model.to(device)
model.eval()

# Load the dataset, with or without labels
test_dataset = TextDataset(dataset["test"], tokenizer, None if not has_labels else label_map)
all_logits = []
all_labels = []
results = []

with torch.no_grad():
for i, batch in enumerate(test_dataset):
input_ids, attention_mask = batch['input_ids'].to(device), batch['attention_mask'].to(device)
output = model(input_ids=input_ids, attention_mask=attention_mask)
logits = output.logits
predictions = logits.argmax(-1).cpu().numpy()

# Collect logits and labels for metric calculation if labels are present
if has_labels and 'labels' in batch:
labels = batch['labels'].cpu().numpy()
all_logits.append(logits)
all_labels.append(torch.tensor(labels))
for label, pred in zip(labels, predictions):
results.append((i, label_map[pred], model_name))
else:
for pred in predictions:
results.append((i, label_map[pred], model_name))

# If labels were present, calculate metrics
if has_labels:
all_logits = torch.cat(all_logits)
all_labels = torch.cat(all_labels)
predictions = all_logits.argmax(-1)
metrics = compute_metrics((predictions, all_labels))
wandb.log(metrics)

# Save results to a .tsv file
df = pd.DataFrame(results, columns=['sentence_id', 'prediction', 'model_name'])
df.to_csv(f"{model_path}_predictions.tsv", sep='\t', index=False)

# Finish Weights & Biases logging
wandb.finish()


if __name__ == "__main__":
# Define the dataset list for each language
dataset_list = {
"en":"iai-group/clef2024_checkthat_task1_en",
"ar":"iai-group/clef2024_checkthat_task1_ar",
"es":"iai-group/clef2024_checkthat_task1_es",
"nl":"iai-group/clef2024_checkthat_task1_nl",
}

label_map = {"Yes": 1, "No": 0}


# Load models and run prediction
base_dir = "./trained_models"
i = 0

model_info = find_models_with_checkpoints(base_dir)
for model_name, checkpoint_path in model_info:
tokenizer = AutoTokenizer.from_pretrained(checkpoint_path) # General tokenizer
model = AutoModelForSequenceClassification.from_pretrained(checkpoint_path) # Model for prediction
tokenized_data = TextDataset(dataset_list.values()[i], tokenizer, label_map)
run_prediction(model_name, dataset_list, tokenizer, checkpoint_path, has_labels=True)
i += 1


44 changes: 44 additions & 0 deletions checkthat/task1/depricated_scripts/main_train_all.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""Main training script for training on all languages."""
from datasets import load_dataset
from tokenization.normalize_DatasetDict_featues import rename_features
from training_scripts.training import run_training
from transformers import AutoTokenizer
from training_scripts.train_config import get_training_arguments

def main():
dataset_list = [
"iai-group/clef2024_checkthat_task1_en",
"iai-group/clef2024_checkthat_task1_ar",
"iai-group/clef2024_checkthat_task1_es",
"iai-group/clef2024_checkthat_task1_nl",
]
label_map = {"Yes": 1, "No": 0}

model_name_en = "FacebookAI/roberta-large"
multilingual_model = "FacebookAI/xlm-roberta-large"
seeds = [42, 81, 1024, 6, 10]
tokenizer = AutoTokenizer.from_pretrained(model_name_en)

"""Training model on trainset for each seed and each language"""
for seed in seeds:
for dataset_name in dataset_list:
dataset = load_dataset(dataset_name)
if "tweet_text" in dataset["train"].column_names:
dataset = rename_features(dataset)
tokenizer = AutoTokenizer.from_pretrained(multilingual_model)
training_args = get_training_arguments(multilingual_model, seed, dataset_name)
run_training(seed, dataset, model_name_en, tokenizer, label_map, training_args)
else:
training_args = get_training_arguments(model_name_en, seed, dataset_name)
run_training(seed, dataset, model_name_en, tokenizer, label_map, training_args)


if __name__ == "__main__":
import torch

print(torch.cuda.is_available())
print(torch.cuda.current_device())
print(torch.cuda.device(0))
print(torch.cuda.device_count())
print(torch.cuda.get_device_name(0))
main()
8 changes: 8 additions & 0 deletions checkthat/task1/depricated_scripts/metrics/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from .compute_metrics import (
compute_metrics,
accuracy_metric,
precision_metric,
recall_metric,
f1_metric,
)
from .metrics_logger import MetricsLoggerCallback, compute_custom_metrics
38 changes: 38 additions & 0 deletions checkthat/task1/depricated_scripts/metrics/compute_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""Function to compute four metrics: accuracy, precision, recall, and F1-score.

Metrics will be passed to wandb for logging.
"""
from evaluate import load

"""Compute accuracy, precision, recall, and F1-score metrics."""
accuracy_metric = load("accuracy")
precision_metric = load("precision")
recall_metric = load("recall")
f1_metric = load("f1")


def compute_metrics(eval_pred):
"""Compute accuracy, precision, recall, and F1-score metrics.

Args:
eval_pred: Tuple of logits and labels.

Returns:
dict: Dictionary containing the computed metrics.
"""
logits, labels = eval_pred
predictions = logits.argmax(-1)
return {
"accuracy": accuracy_metric.compute(
predictions=predictions, references=labels
)["accuracy"],
"precision": precision_metric.compute(
predictions=predictions, references=labels, average="weighted"
)["precision"],
"recall": recall_metric.compute(
predictions=predictions, references=labels, average="weighted"
)["recall"],
"f1": f1_metric.compute(
predictions=predictions, references=labels, average="weighted"
)["f1"],
}
61 changes: 61 additions & 0 deletions checkthat/task1/depricated_scripts/metrics/metrics_logger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
"""Sets up the logging for the metrics using Weights and Biases."""
import wandb
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score
from transformers import TrainerCallback


def compute_custom_metrics(logits, labels):
"""Compute precision, recall, and F1-score from model logits and true
labels.

Args:
logits (np.array): Logits returned by the model. Shape (num_samples, num_classes).
labels (np.array): True labels. Shape (num_samples,).

Returns:
tuple: precision, recall, F1-score
"""

predictions = np.argmax(logits, axis=1) # Convert logits to predictions

# Calculate metrics
precision = precision_score(labels, predictions, average="binary", pos_label=1)
recall = recall_score(labels, predictions, average="binary", pos_label=1)
f1 = f1_score(labels, predictions, average="binary", pos_label=1)

return precision, recall, f1


class MetricsLoggerCallback(TrainerCallback):
"""Custom callback for logging additional metrics to wandb."""

def on_evaluate(self, args, state, **kwargs):
# Assuming 'logits' and 'labels' are part of the outputs collected during evaluation
logits = kwargs["logits"]
labels = kwargs["labels"]

# Compute custom metrics
precision, recall, f1 = compute_custom_metrics(logits, labels)

# Log custom metrics to wandb
wandb.log(
{
"precision": precision,
"recall": recall,
"f1_score": f1,
"epoch": state.epoch,
}
)


callback_map = {
"MetricsLoggerCallback": MetricsLoggerCallback,
}


def get_callbacks(callback_names):
"""Create a list of callback instances from a list of callback names."""
return [
callback_map[name]() for name in callback_names if name in callback_map
]
50 changes: 50 additions & 0 deletions checkthat/task1/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
"""Will run script to run training and testing. (unlabeled tests yet to ble implemented)

Argument parser is used to specify the model name and dataset name.
"""
import argparse
from datasets import load_dataset
from training_scripts.training import run_training
from transformers import AutoTokenizer
from tokenization.tokenizer import TextDataset


def main(args):
"""Run training."""
label_map = {"Yes": 1, "No": 0}

tokenizer = AutoTokenizer.from_pretrained(args.model_name)

dataset = load_dataset(args.dataset)

dataset_language = args.dataset.split("_")[-2:]

train_dataset = TextDataset(dataset["train"], tokenizer, label_map)
eval_dataset = TextDataset(dataset["validation"], tokenizer, label_map)
test_dataset = TextDataset(dataset["test"], tokenizer, label_map)

run_training(train_dataset, eval_dataset, args.model_name, label_map, dataset_language, test_dataset)


if __name__ == "__main__":

parser = argparse.ArgumentParser(description="Run training and testing.")

# parser.add_argument(
# "--test", action="store_true", help="Whether to run testing"
# )
parser.add_argument(
"--model_name",
type=str,
default="FacebookAI/roberta-large", # For English language
help="Name of the model",
)
parser.add_argument(
"--dataset",
type=str,
default="iai-group/clef2024_checkthat_task1_en", # For English language
help="Name of the dataset from the iai-group/clef2024_checkthat_task1_* datasets",
)

args = parser.parse_args()
main(args)
Loading
Loading