iai-group · peteraarnes · Apr 21, 2024 · May 1, 2024 · May 1, 2024 · May 1, 2024
diff --git a/.gitignore b/.gitignore
@@ -127,3 +127,9 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+
+# Exclude .DS_Store files everywhere
+*.DS_Store
+
+# Exclude node_modules
+.vscode/
diff --git a/checkthat/task1/__init__.py b/checkthat/task1/__init__.py
@@ -0,0 +1 @@
+"""init file for main module."""
diff --git a/checkthat/task1/auth.sh b/checkthat/task1/auth.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+#SBATCH --gres=gpu:0
+#SBATCH --partition=gpuA100
+#SBATCH --time=1:00:00
+#SBATCH --job-name=setup_authenticator.sh
+#SBATCH --output=hf_test.out
+# Load necessary modules, if required
+# module load python/3.9  # Adjust this according to your environment
+
+# Activate your Python environment
+# source ~/bhome/env/checkthat2024_env/bin/activate
+
+# Explicitly specify the path to the correct Python executable
+# PYTHON="~/bhome/env/checkthat2024_env/bin/"
+PYTHON="~/.conda/envs/CLEF_checkthat2024/bin"
+uenv miniconda3-py39
+
+# Activate the Conda environment
+#conda activate ~/bhome/env/checkthat2024_env
+conda activate ~/.conda/envs/CLEF_checkthat2024
+
+export HF_HOME=~/bhome/clef2024-checkthat/checkthat/task1
+# Create necessary directories
+mkdir -p $HF_HOME $WANDB_CACHE_DIR
+
+# Store the Hugging Face token
+echo 'KEY' > $HF_HOME/token
+chmod 600 $HF_HOME/token
+
+# Log in to wandb
+export WANDB_API_KEY='KEY'
+export WANDB_CACHE_DIR=~/bhome/clef2024-checkthat/checkthat/task1
+wandb login KEY
+
+# Test the Hugging Face API with a Python script
+#$PYTHON test_start.py
+python -u test_hf_login.py
diff --git a/checkthat/task1/conda_setup.sh b/checkthat/task1/conda_setup.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+#SBATCH --gres=gpu:0
+#SBATCH --partition=gpuA100
+#SBATCH --time=1:00:00
+#SBATCH --job-name=conda_setup
+#SBATCH --output=conda_setup.out
+
+module load cuda/12.2.0 cudnn/8.8.0 # Load CUDA and cuDNN modules
+
+# Activate the user environment (uenv)
+uenv verbose cuda-12.2.0 cudnn-12.x-8.8.0
+uenv miniconda3-py39
+
+# Create and activate the Conda environment
+conda create -n CLEF_checkthat2024 -c pytorch pytorch torchvision torchaudio pytorch-cuda=12.1 -c nvidia -y
+conda activate CLEF_checkthat2024
+
+# Install Python packages
+pip install torch torchvision torchaudio
+pip install transformers[torch]
+pip install -r requirements.txt
diff --git a/checkthat/task1/depricated_scripts/main_test_all.py b/checkthat/task1/depricated_scripts/main_test_all.py
@@ -0,0 +1,116 @@
+import os
+import torch
+import pandas as pd
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+from models.custom_model import CustomModel
+from tokenization.tokenizer import TextDataset
+from metrics.compute_metrics import compute_metrics
+import wandb
+from tokenization.tokenizer import TextDataset
+
+def find_latest_checkpoint(model_dir):
+    """Find the latest checkpoint in the given directory."""
+    checkpoint_dirs = [os.path.join(model_dir, d) for d in os.listdir(model_dir) if os.path.isdir(os.path.join(model_dir, d)) and 'checkpoint' in d]
+    if not checkpoint_dirs:
+        raise ValueError("No checkpoint directories found in the given model directory.")
+    latest_checkpoint = sorted(checkpoint_dirs, key=lambda x: int(x.split('-')[-1]))[-1]
+    return latest_checkpoint
+
+def find_models_with_checkpoints(base_dir):
+    """Find models and their latest checkpoints in a structured directory."""
+    model_info = []
+    for model_name in os.listdir(base_dir):
+        model_path = os.path.join(base_dir, model_name)
+        if not os.path.isdir(model_path):
+            continue
+        try:
+            latest_checkpoint = find_latest_checkpoint(model_path)
+            model_info.append((model_name, latest_checkpoint))
+            print(f"Model: {model_name}, Latest Checkpoint: {latest_checkpoint}")
+        except Exception as e:
+            print(f"Failed to find checkpoint for model {model_name}: {str(e)}")
+    return model_info
+
+def run_prediction(model_name, dataset_list, tokenizer, model_path, has_labels: bool):
+    """Run prediction on the dataset, compute metrics if labels are present, and write results to a .tsv file."""
+    device = 'cuda'
+    label_map = {0: 'no', 1: 'yes'}
+
+    # Detect language from model name and select dataset
+    lang = model_name.split('_')[-2]  # Assumes format like 'modelname_lang_'
+    dataset = dataset_list[lang]
+
+    # Initialize Weights & Biases
+    run_name = f"TEST__{model_path}"
+    wandb.init(project="Clef2024", entity="aarnes", name=run_name)
+
+    # Load the model from the checkpoint
+    model = CustomModel.from_pretrained(model_path)
+    model.to(device)
+    model.eval()
+
+    # Load the dataset, with or without labels
+    test_dataset = TextDataset(dataset["test"], tokenizer, None if not has_labels else label_map)
+    all_logits = []
+    all_labels = []
+    results = []
+
+    with torch.no_grad():
+        for i, batch in enumerate(test_dataset):
+            input_ids, attention_mask = batch['input_ids'].to(device), batch['attention_mask'].to(device)
+            output = model(input_ids=input_ids, attention_mask=attention_mask)
+            logits = output.logits
+            predictions = logits.argmax(-1).cpu().numpy()
+
+            # Collect logits and labels for metric calculation if labels are present
+            if has_labels and 'labels' in batch:
+                labels = batch['labels'].cpu().numpy()
+                all_logits.append(logits)
+                all_labels.append(torch.tensor(labels))
+                for label, pred in zip(labels, predictions):
+                    results.append((i, label_map[pred], model_name))
+            else:
+                for pred in predictions:
+                    results.append((i, label_map[pred], model_name))
+
+    # If labels were present, calculate metrics
+    if has_labels:
+        all_logits = torch.cat(all_logits)
+        all_labels = torch.cat(all_labels)
+        predictions = all_logits.argmax(-1)
+        metrics = compute_metrics((predictions, all_labels))
+        wandb.log(metrics)
+
+    # Save results to a .tsv file
+    df = pd.DataFrame(results, columns=['sentence_id', 'prediction', 'model_name'])
+    df.to_csv(f"{model_path}_predictions.tsv", sep='\t', index=False)
+
+    # Finish Weights & Biases logging
+    wandb.finish()
+
+
+if __name__ == "__main__":
+    # Define the dataset list for each language
+    dataset_list = {
+        "en":"iai-group/clef2024_checkthat_task1_en",
+        "ar":"iai-group/clef2024_checkthat_task1_ar",
+        "es":"iai-group/clef2024_checkthat_task1_es",
+        "nl":"iai-group/clef2024_checkthat_task1_nl",
+    }
+
+    label_map = {"Yes": 1, "No": 0}
+
+
+    # Load models and run prediction
+    base_dir = "./trained_models"
+    i = 0
+
+    model_info = find_models_with_checkpoints(base_dir)
+    for model_name, checkpoint_path in model_info:
+        tokenizer = AutoTokenizer.from_pretrained(checkpoint_path)  # General tokenizer
+        model = AutoModelForSequenceClassification.from_pretrained(checkpoint_path)  # Model for prediction
+        tokenized_data = TextDataset(dataset_list.values()[i], tokenizer, label_map)
+        run_prediction(model_name, dataset_list, tokenizer, checkpoint_path, has_labels=True)
+        i += 1
+
+
diff --git a/checkthat/task1/depricated_scripts/main_train_all.py b/checkthat/task1/depricated_scripts/main_train_all.py
@@ -0,0 +1,44 @@
+"""Main training script for training on all languages."""
+from datasets import load_dataset
+from tokenization.normalize_DatasetDict_featues import rename_features
+from training_scripts.training import run_training
+from transformers import AutoTokenizer
+from training_scripts.train_config import get_training_arguments
+
+def main():
+    dataset_list = [
+        "iai-group/clef2024_checkthat_task1_en",
+        "iai-group/clef2024_checkthat_task1_ar",
+        "iai-group/clef2024_checkthat_task1_es",
+        "iai-group/clef2024_checkthat_task1_nl",
+    ]
+    label_map = {"Yes": 1, "No": 0}
+
+    model_name_en = "FacebookAI/roberta-large"
+    multilingual_model = "FacebookAI/xlm-roberta-large"
+    seeds = [42, 81, 1024, 6, 10]
+    tokenizer = AutoTokenizer.from_pretrained(model_name_en)
+
+    """Training model on trainset for each seed and each language"""
+    for seed in seeds:
+        for dataset_name in dataset_list:
+            dataset = load_dataset(dataset_name)
+            if "tweet_text" in dataset["train"].column_names:
+                dataset = rename_features(dataset)
+                tokenizer = AutoTokenizer.from_pretrained(multilingual_model)
+                training_args = get_training_arguments(multilingual_model, seed, dataset_name)
+                run_training(seed, dataset, model_name_en, tokenizer, label_map, training_args)
+            else:
+                training_args = get_training_arguments(model_name_en, seed, dataset_name)
+                run_training(seed, dataset, model_name_en, tokenizer, label_map, training_args)
+
+
+if __name__ == "__main__":
+    import torch
+
+    print(torch.cuda.is_available())
+    print(torch.cuda.current_device())
+    print(torch.cuda.device(0))
+    print(torch.cuda.device_count())
+    print(torch.cuda.get_device_name(0))
+    main()
diff --git a/checkthat/task1/depricated_scripts/metrics/__init__.py b/checkthat/task1/depricated_scripts/metrics/__init__.py
@@ -0,0 +1,8 @@
+from .compute_metrics import (
+    compute_metrics,
+    accuracy_metric,
+    precision_metric,
+    recall_metric,
+    f1_metric,
+)
+from .metrics_logger import MetricsLoggerCallback, compute_custom_metrics
diff --git a/checkthat/task1/depricated_scripts/metrics/compute_metrics.py b/checkthat/task1/depricated_scripts/metrics/compute_metrics.py
@@ -0,0 +1,38 @@
+"""Function to compute four metrics: accuracy, precision, recall, and F1-score.
+
+Metrics will be passed to wandb for logging.
+"""
+from evaluate import load
+
+"""Compute accuracy, precision, recall, and F1-score metrics."""
+accuracy_metric = load("accuracy")
+precision_metric = load("precision")
+recall_metric = load("recall")
+f1_metric = load("f1")
+
+
+def compute_metrics(eval_pred):
+    """Compute accuracy, precision, recall, and F1-score metrics.
+
+    Args:
+        eval_pred: Tuple of logits and labels.
+
+    Returns:
+        dict: Dictionary containing the computed metrics.
+    """
+    logits, labels = eval_pred
+    predictions = logits.argmax(-1)
+    return {
+        "accuracy": accuracy_metric.compute(
+            predictions=predictions, references=labels
+        )["accuracy"],
+        "precision": precision_metric.compute(
+            predictions=predictions, references=labels, average="weighted"
+        )["precision"],
+        "recall": recall_metric.compute(
+            predictions=predictions, references=labels, average="weighted"
+        )["recall"],
+        "f1": f1_metric.compute(
+            predictions=predictions, references=labels, average="weighted"
+        )["f1"],
+    }
diff --git a/checkthat/task1/depricated_scripts/metrics/metrics_logger.py b/checkthat/task1/depricated_scripts/metrics/metrics_logger.py
@@ -0,0 +1,61 @@
+"""Sets up the logging for the metrics using Weights and Biases."""
+import wandb
+import numpy as np
+from sklearn.metrics import precision_score, recall_score, f1_score
+from transformers import TrainerCallback
+
+
+def compute_custom_metrics(logits, labels):
+    """Compute precision, recall, and F1-score from model logits and true
+    labels.
+
+    Args:
+    logits (np.array): Logits returned by the model. Shape (num_samples, num_classes).
+    labels (np.array): True labels. Shape (num_samples,).
+
+    Returns:
+    tuple: precision, recall, F1-score
+    """
+
+    predictions = np.argmax(logits, axis=1)  # Convert logits to predictions
+
+    # Calculate metrics
+    precision = precision_score(labels, predictions, average="binary", pos_label=1)
+    recall = recall_score(labels, predictions, average="binary", pos_label=1)
+    f1 = f1_score(labels, predictions, average="binary", pos_label=1)
+
+    return precision, recall, f1
+
+
+class MetricsLoggerCallback(TrainerCallback):
+    """Custom callback for logging additional metrics to wandb."""
+
+    def on_evaluate(self, args, state, **kwargs):
+        # Assuming 'logits' and 'labels' are part of the outputs collected during evaluation
+        logits = kwargs["logits"]
+        labels = kwargs["labels"]
+
+        # Compute custom metrics
+        precision, recall, f1 = compute_custom_metrics(logits, labels)
+
+        # Log custom metrics to wandb
+        wandb.log(
+            {
+                "precision": precision,
+                "recall": recall,
+                "f1_score": f1,
+                "epoch": state.epoch,
+            }
+        )
+
+
+callback_map = {
+    "MetricsLoggerCallback": MetricsLoggerCallback,
+}
+
+
+def get_callbacks(callback_names):
+    """Create a list of callback instances from a list of callback names."""
+    return [
+        callback_map[name]() for name in callback_names if name in callback_map
+    ]
diff --git a/checkthat/task1/main.py b/checkthat/task1/main.py
@@ -0,0 +1,50 @@
+"""Will run script to run training and testing. (unlabeled tests yet to ble implemented)
+
+Argument parser is used to specify the model name and dataset name.
+"""
+import argparse
+from datasets import load_dataset
+from training_scripts.training import run_training
+from transformers import AutoTokenizer
+from tokenization.tokenizer import TextDataset
+
+
+def main(args):
+    """Run training."""
+    label_map = {"Yes": 1, "No": 0}
+
+    tokenizer = AutoTokenizer.from_pretrained(args.model_name)
+
+    dataset = load_dataset(args.dataset)
+
+    dataset_language = args.dataset.split("_")[-2:]
+
+    train_dataset = TextDataset(dataset["train"], tokenizer, label_map)
+    eval_dataset = TextDataset(dataset["validation"], tokenizer, label_map)
+    test_dataset = TextDataset(dataset["test"], tokenizer, label_map)
+
+    run_training(train_dataset, eval_dataset, args.model_name, label_map, dataset_language, test_dataset)
+
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser(description="Run training and testing.")
+
+    # parser.add_argument(
+    #     "--test", action="store_true", help="Whether to run testing"
+    # )
+    parser.add_argument(
+        "--model_name",
+        type=str,
+        default="FacebookAI/roberta-large",  # For English language
+        help="Name of the model",
+    )
+    parser.add_argument(
+        "--dataset",
+        type=str,
+        default="iai-group/clef2024_checkthat_task1_en",  # For English language
+        help="Name of the dataset from the iai-group/clef2024_checkthat_task1_* datasets",
+    )
+
+    args = parser.parse_args()
+    main(args)