From dcad4a97f68c2644536d40596fe65837e5c6b6ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20R=C3=B8ysland=20Aarnes?= Date: Sun, 21 Apr 2024 20:27:40 +0200 Subject: [PATCH 01/16] Abstract model class. Added conftest string so commit wouldn't bug out --- checkthat/task1/models/transformer_model.py | 22 ++++++++++ conftest.py | 1 + tests/test_models/test_transformer_model.py | 47 +++++++++++++++++++++ 3 files changed, 70 insertions(+) create mode 100644 checkthat/task1/models/transformer_model.py create mode 100644 tests/test_models/test_transformer_model.py diff --git a/checkthat/task1/models/transformer_model.py b/checkthat/task1/models/transformer_model.py new file mode 100644 index 0000000..d82c05d --- /dev/null +++ b/checkthat/task1/models/transformer_model.py @@ -0,0 +1,22 @@ +"""Module contains the transformer model for Task 1. + +Abstract class for later use. +""" + +from abc import ABC, abstractmethod +import torch.nn as nn + + +class Model(ABC, nn.Module): + def __init__(self): + """Constructor for the Model class.""" + super(Model, self).__init__() + + @abstractmethod + def forward(self, x): + """Forward pass of the model. + + Args: + x: Input tensor. + """ + pass diff --git a/conftest.py b/conftest.py index e69de29..5405deb 100644 --- a/conftest.py +++ b/conftest.py @@ -0,0 +1 @@ +"""Module for pytest configuration and fixtures.""" diff --git a/tests/test_models/test_transformer_model.py b/tests/test_models/test_transformer_model.py new file mode 100644 index 0000000..4381d36 --- /dev/null +++ b/tests/test_models/test_transformer_model.py @@ -0,0 +1,47 @@ +"""Tests for the transformer model.""" + +import pytest +import torch +from checkthat.task1.models.transformer_model import Model + + +class ConcreteModel(Model): + """Concrete model for testing purposes.""" + + def forward(self, x): + """Forward pass of the model. + + This method takes an input tensor and returns an output tensor where + each element is doubled. + + Args: + x (torch.Tensor): Input tensor. + + Returns: + torch.Tensor: Output tensor, where each element is doubled. + """ + return x * 2 + + +def test_model_cannot_be_instantiated(): + """Test if the Model class cannot be instantiated. + + This test checks whether attempting to instantiate the abstract + Model class raises a TypeError, as expected for abstract classes. + """ + with pytest.raises(TypeError): + Model() # Directly test instantiation without assignment + + +def test_concrete_model(): + """Test if the ConcreteModel class works correctly. + + This test verifies that the ConcreteModel class's forward method + processes input tensors correctly by doubling each element. + """ + x = torch.tensor([1.0, 2.0, 3.0]) + model = ConcreteModel() + expected_output = torch.tensor([2.0, 4.0, 6.0]) + assert torch.equal( + model(x), expected_output + ), "The output tensor does not match the expected doubled values." From 12172f28258cb6293394ecdc7542cbb24ad66b69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20R=C3=B8ysland=20Aarnes?= Date: Wed, 1 May 2024 16:46:30 +0200 Subject: [PATCH 02/16] Added files for training and most tests. Made changes to requirements.txt. --- .gitignore | 3 + checkthat/task1/__init__.py | 1 + checkthat/task1/main.py | 48 +++++++++++ checkthat/task1/main_train_all.py | 37 ++++++++ checkthat/task1/metrics/__init__.py | 8 ++ checkthat/task1/metrics/compute_metrics.py | 38 +++++++++ checkthat/task1/metrics/metrics_logger.py | 61 ++++++++++++++ checkthat/task1/models/__init__.py | 3 + ...transformer_model.py => abstract_model.py} | 17 ++-- checkthat/task1/models/custom_model.py | 29 +++++++ checkthat/task1/tokenization/__init__.py | 4 + .../normalize_DatasetDict_featues.py | 22 +++++ checkthat/task1/tokenization/tokenizer.py | 41 +++++++++ checkthat/task1/training_config.yaml | 18 ++++ checkthat/task1/training_scripts/__init__.py | 4 + .../task1/training_scripts/train_config.py | 19 +++++ checkthat/task1/training_scripts/training.py | 72 ++++++++++++++++ requirements.txt | 76 +++++++++++++++++ tests/__init__.py | 2 +- tests/metrics/test_compute_metrics.py | 50 +++++++++++ tests/metrics/test_metrics_logger.py | 64 ++++++++++++++ tests/models/__init__.py | 0 .../test_abstract_model.py} | 12 +-- tests/models/test_custom_model.py | 41 +++++++++ .../test_normalize_DatasetDict_features.py | 84 +++++++++++++++++++ tests/tokenization/test_tokenizer.py | 34 ++++++++ tests/training_scripts/test_train_config.py | 23 +++++ tests/training_scripts/test_training.py | 34 ++++++++ 28 files changed, 824 insertions(+), 21 deletions(-) create mode 100644 checkthat/task1/__init__.py create mode 100644 checkthat/task1/main.py create mode 100644 checkthat/task1/main_train_all.py create mode 100644 checkthat/task1/metrics/__init__.py create mode 100644 checkthat/task1/metrics/compute_metrics.py create mode 100644 checkthat/task1/metrics/metrics_logger.py create mode 100644 checkthat/task1/models/__init__.py rename checkthat/task1/models/{transformer_model.py => abstract_model.py} (52%) create mode 100644 checkthat/task1/models/custom_model.py create mode 100644 checkthat/task1/tokenization/__init__.py create mode 100644 checkthat/task1/tokenization/normalize_DatasetDict_featues.py create mode 100644 checkthat/task1/tokenization/tokenizer.py create mode 100644 checkthat/task1/training_config.yaml create mode 100644 checkthat/task1/training_scripts/__init__.py create mode 100644 checkthat/task1/training_scripts/train_config.py create mode 100644 checkthat/task1/training_scripts/training.py create mode 100644 tests/metrics/test_compute_metrics.py create mode 100644 tests/metrics/test_metrics_logger.py create mode 100644 tests/models/__init__.py rename tests/{test_models/test_transformer_model.py => models/test_abstract_model.py} (76%) create mode 100644 tests/models/test_custom_model.py create mode 100644 tests/tokenization/test_normalize_DatasetDict_features.py create mode 100644 tests/tokenization/test_tokenizer.py create mode 100644 tests/training_scripts/test_train_config.py create mode 100644 tests/training_scripts/test_training.py diff --git a/.gitignore b/.gitignore index b6e4761..4059efb 100644 --- a/.gitignore +++ b/.gitignore @@ -127,3 +127,6 @@ dmypy.json # Pyre type checker .pyre/ + +# Exclude .DS_Store files everywhere +*.DS_Store \ No newline at end of file diff --git a/checkthat/task1/__init__.py b/checkthat/task1/__init__.py new file mode 100644 index 0000000..a1ef5bd --- /dev/null +++ b/checkthat/task1/__init__.py @@ -0,0 +1 @@ +"""init file for main module.""" diff --git a/checkthat/task1/main.py b/checkthat/task1/main.py new file mode 100644 index 0000000..cc7ff58 --- /dev/null +++ b/checkthat/task1/main.py @@ -0,0 +1,48 @@ +"""Will run script to run training and testing. (test yet to be implemented) + +Argument parser is used to specify the model name and dataset name. +""" +import argparse +from datasets import load_dataset +from training_scripts.training import run_training +from transformers import AutoTokenizer + + +def main(args): + """Run training.""" + tokenizer = AutoTokenizer.from_pretrained(args.model_name) + + dataset = load_dataset(args.dataset) + label_map = {"No": 0, "Yes": 1} # Label map for the dataset + + seeds = [42, 81, 1024, 6, 10] # Seeds for reproducibility + if args.train: + for seed in seeds: + run_training(seed, dataset, args.model_name, tokenizer, label_map) + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description="Run training and testing.") + + parser.add_argument( + "--train", action="store_true", help="Whether to run training" + ) + parser.add_argument( + "--test", action="store_true", help="Whether to run testing" + ) + parser.add_argument( + "--model_name", + type=str, + default="FacebookAI/roberta-large", # For English language + help="Name of the model", + ) + parser.add_argument( + "--dataset", + type=str, + default="iai-group/clef2024_checkthat_task1_en", # For English language + help="Name of the dataset", + ) + + args = parser.parse_args() + main(args) diff --git a/checkthat/task1/main_train_all.py b/checkthat/task1/main_train_all.py new file mode 100644 index 0000000..2725a2f --- /dev/null +++ b/checkthat/task1/main_train_all.py @@ -0,0 +1,37 @@ +"""Main training script for training on all languages.""" +from datasets import load_dataset +from tokenization.normalize_DatasetDict_featues import rename_features +from transformers import AutoTokenizer +from training_scripts.training import run_training + + +def main(): + en, ar, es, nl = ( + "iai-group/clef2024_checkthat_task1_en", + "iai-group/clef2024_checkthat_task1_ar", + "iai-group/clef2024_checkthat_task1_es", + "iai-group/clef2024_checkthat_task1_nl", + ) + + dataset_list = [en, ar, es, nl] + label_map = {"No": 0, "Yes": 1} # Label map for the dataset + + model_name_en = "FacebookAI/roberta-large" + multilingual_model = "FacebookAI/xlm-roberta-large" + + seeds = [42, 81, 1024, 6, 10] # Seeds for reproducibility + + tokenizer = AutoTokenizer.from_pretrained(model_name_en) + + for seed, dataset in zip(seeds, dataset_list): + dataset = load_dataset(dataset) + # Normalize dataset features if not already normalized (intended for twitter dataset) + if dataset["train"]["tweet_text"]: + dataset = rename_features(dataset) + tokenizer = AutoTokenizer.from_pretrained(multilingual_model) + + run_training(seed, dataset, model_name_en, tokenizer, label_map) + + +if __name__ == "__main__": + main() diff --git a/checkthat/task1/metrics/__init__.py b/checkthat/task1/metrics/__init__.py new file mode 100644 index 0000000..f910853 --- /dev/null +++ b/checkthat/task1/metrics/__init__.py @@ -0,0 +1,8 @@ +from .compute_metrics import ( + compute_metrics, + accuracy_metric, + precision_metric, + recall_metric, + f1_metric, +) +from .metrics_logger import MetricsLoggerCallback, compute_custom_metrics diff --git a/checkthat/task1/metrics/compute_metrics.py b/checkthat/task1/metrics/compute_metrics.py new file mode 100644 index 0000000..2ec982b --- /dev/null +++ b/checkthat/task1/metrics/compute_metrics.py @@ -0,0 +1,38 @@ +"""Function to compute four metrics: accuracy, precision, recall, and F1-score. + +Metrics will be passed to wandb for logging. +""" +from evaluate import load + +"""Compute accuracy, precision, recall, and F1-score metrics.""" +accuracy_metric = load("accuracy") +precision_metric = load("precision") +recall_metric = load("recall") +f1_metric = load("f1") + + +def compute_metrics(eval_pred): + """Compute accuracy, precision, recall, and F1-score metrics. + + Args: + eval_pred: Tuple of logits and labels. + + Returns: + dict: Dictionary containing the computed metrics. + """ + logits, labels = eval_pred + predictions = logits.argmax(-1) + return { + "accuracy": accuracy_metric.compute( + predictions=predictions, references=labels + )["accuracy"], + "precision": precision_metric.compute( + predictions=predictions, references=labels, average="weighted" + )["precision"], + "recall": recall_metric.compute( + predictions=predictions, references=labels, average="weighted" + )["recall"], + "f1": f1_metric.compute( + predictions=predictions, references=labels, average="weighted" + )["f1"], + } diff --git a/checkthat/task1/metrics/metrics_logger.py b/checkthat/task1/metrics/metrics_logger.py new file mode 100644 index 0000000..fedcd90 --- /dev/null +++ b/checkthat/task1/metrics/metrics_logger.py @@ -0,0 +1,61 @@ +"""Sets up the logging for the metrics using Weights and Biases.""" +import wandb +import numpy as np +from sklearn.metrics import precision_score, recall_score, f1_score +from transformers import TrainerCallback + + +def compute_custom_metrics(logits, labels): + """Compute precision, recall, and F1-score from model logits and true + labels. + + Args: + logits (np.array): Logits returned by the model. Shape (num_samples, num_classes). + labels (np.array): True labels. Shape (num_samples,). + + Returns: + tuple: precision, recall, F1-score + """ + + predictions = np.argmax(logits, axis=1) # Convert logits to predictions + + # Calculate metrics + precision = precision_score(labels, predictions, average="binary") + recall = recall_score(labels, predictions, average="binary") + f1 = f1_score(labels, predictions, average="binary") + + return precision, recall, f1 + + +class MetricsLoggerCallback(TrainerCallback): + """Custom callback for logging additional metrics to wandb.""" + + def on_evaluate(self, args, state, **kwargs): + # Assuming 'logits' and 'labels' are part of the outputs collected during evaluation + logits = kwargs["logits"] + labels = kwargs["labels"] + + # Compute custom metrics + precision, recall, f1 = compute_custom_metrics(logits, labels) + + # Log custom metrics to wandb + wandb.log( + { + "precision": precision, + "recall": recall, + "f1_score": f1, + "epoch": state.epoch, + } + ) + + +callback_map = { + "MetricsLoggerCallback": MetricsLoggerCallback, +} + + +def get_callbacks(callback_names): + """Create a list of callback instances from a list of callback names.""" + return [ + callback_map[name]() for name in callback_names if name in callback_map + ] diff --git a/checkthat/task1/models/__init__.py b/checkthat/task1/models/__init__.py new file mode 100644 index 0000000..a0e880a --- /dev/null +++ b/checkthat/task1/models/__init__.py @@ -0,0 +1,3 @@ +"""init file for models module.""" +from .custom_model import CustomModel +from .abstract_model import Model diff --git a/checkthat/task1/models/transformer_model.py b/checkthat/task1/models/abstract_model.py similarity index 52% rename from checkthat/task1/models/transformer_model.py rename to checkthat/task1/models/abstract_model.py index d82c05d..290fd34 100644 --- a/checkthat/task1/models/transformer_model.py +++ b/checkthat/task1/models/abstract_model.py @@ -1,22 +1,19 @@ """Module contains the transformer model for Task 1. -Abstract class for later use. +Abstract indended as blueprint custom masked language model class. """ - from abc import ABC, abstractmethod +import torch import torch.nn as nn class Model(ABC, nn.Module): - def __init__(self): + def __init__(self) -> None: """Constructor for the Model class.""" super(Model, self).__init__() + return None @abstractmethod - def forward(self, x): - """Forward pass of the model. - - Args: - x: Input tensor. - """ - pass + def forward(self, x) -> torch.Tensor: + """Forward pass of the model.""" + return x diff --git a/checkthat/task1/models/custom_model.py b/checkthat/task1/models/custom_model.py new file mode 100644 index 0000000..cb5ed31 --- /dev/null +++ b/checkthat/task1/models/custom_model.py @@ -0,0 +1,29 @@ +"""Custom model for sequence classification tasks dervied from abstract class +model.py.""" +from .abstract_model import Model +from transformers import AutoModelForSequenceClassification + + +class CustomModel(Model): + def __init__(self, model_name: str, num_labels: int): + """Constructor for the CustomModel class. + + Args: + model_name (str): Accepts huggingface model name + num_labels (int): Number of labels in the dataset + """ + super(CustomModel, self).__init__() + self.model = AutoModelForSequenceClassification.from_pretrained( + model_name, num_labels=num_labels + ) + + def forward(self, input_ids, attention_mask=None, labels=None): + """Forward pass of the model. + + Including labels in the forward pass so the model can calculate + loss. + """ + output = self.model( + input_ids=input_ids, attention_mask=attention_mask, labels=labels + ) + return output diff --git a/checkthat/task1/tokenization/__init__.py b/checkthat/task1/tokenization/__init__.py new file mode 100644 index 0000000..089eb1f --- /dev/null +++ b/checkthat/task1/tokenization/__init__.py @@ -0,0 +1,4 @@ +"""init file for the tokenization module.""" + +from .tokenizer import TextDataset +from .normalize_DatasetDict_featues import rename_features diff --git a/checkthat/task1/tokenization/normalize_DatasetDict_featues.py b/checkthat/task1/tokenization/normalize_DatasetDict_featues.py new file mode 100644 index 0000000..a953dd6 --- /dev/null +++ b/checkthat/task1/tokenization/normalize_DatasetDict_featues.py @@ -0,0 +1,22 @@ +"""For the datasets that do not follow the english dataset format, we need to +rename the features to match the english dataset format.""" + + +def rename_features(data) -> dict: + """Hacky function intended to use for twitter data to it uses same features + as other english dataset.""" + # Iterate over each split (train, validation, test) + feature_name_mapping = { + "tweet_text": "Text", + } + for split_name in data.keys(): + # Get the dataset for the current split + split_dataset = data[split_name] + + # Rename each feature in the dataset using the mapping + for old_name, new_name in feature_name_mapping.items(): + split_dataset = split_dataset.rename_column(old_name, new_name) + + # Update the dataset in the DatasetDict + data[split_name] = split_dataset + return data diff --git a/checkthat/task1/tokenization/tokenizer.py b/checkthat/task1/tokenization/tokenizer.py new file mode 100644 index 0000000..b4e5848 --- /dev/null +++ b/checkthat/task1/tokenization/tokenizer.py @@ -0,0 +1,41 @@ +"""Tokenizer for the task1 datasets.""" +import torch +from torch.utils.data import Dataset + + +class TextDataset(Dataset): + """Takes a list of dictionaries containing text and class labels. + + Args: + Dataset: Dataset class from torch.utils.data + """ + + def __init__(self, data, tokenizer, label_map): + """Initialize the TextDataset class.""" + self.data = data + self.tokenizer = tokenizer + self.label_map = label_map + + def __len__(self): + """Return the length of the dataset.""" + return len(self.data) + + def __getitem__(self, idx): + """Tokenize the text and return a dictionary containing the + tokenized.""" + item = self.data[idx] + encoded = self.tokenizer.encode_plus( + item["Text"], + add_special_tokens=True, + truncation=True, + padding="max_length", + return_attention_mask=True, + return_tensors="pt", + ) + + label_id = self.label_map[item["class_label"]] + return { + "input_ids": encoded["input_ids"].squeeze(0), + "attention_mask": encoded["attention_mask"].squeeze(0), + "labels": torch.tensor(label_id), + } diff --git a/checkthat/task1/training_config.yaml b/checkthat/task1/training_config.yaml new file mode 100644 index 0000000..a9b9f41 --- /dev/null +++ b/checkthat/task1/training_config.yaml @@ -0,0 +1,18 @@ +training_arguments: + evaluation_strategy: 'IntervalStrategy.epoch' # To change evaluation strategy comment out the line and uncomment the next line two lines + # eval_steps: 500 + # evaluation_strategy: 'steps' # evaluate after some number of steps + output_dir: './results' # output directory + save_total_limit: 5 # number of maximum checkpoints to save + num_train_epochs: 2 # number of training epochs + per_device_train_batch_size: 32 # batch size for training + per_device_eval_batch_size: 32 # batch size for evaluation + warmup_steps: 500 # number of warmup steps for learning rate scheduler + weight_decay: 0.01 # strength of weight decay + logging_dir: './logs' # directory for storing logs + logging_steps: 10 + load_best_model_at_end: True + metric_for_best_model: 'loss' # metric to use for saving best model + report_to: 'wandb' # report to wandb + + diff --git a/checkthat/task1/training_scripts/__init__.py b/checkthat/task1/training_scripts/__init__.py new file mode 100644 index 0000000..422193e --- /dev/null +++ b/checkthat/task1/training_scripts/__init__.py @@ -0,0 +1,4 @@ +"""init file for training module.""" + +from .training import run_training +from .train_config import get_training_arguments diff --git a/checkthat/task1/training_scripts/train_config.py b/checkthat/task1/training_scripts/train_config.py new file mode 100644 index 0000000..96c8651 --- /dev/null +++ b/checkthat/task1/training_scripts/train_config.py @@ -0,0 +1,19 @@ +"""Module to load training arguments from a yaml file.""" + +import yaml +from transformers import TrainingArguments + + +def load_config(file_path): + """Load configuration from a yaml file.""" + with open(file_path, "r") as file: + config = yaml.safe_load(file) + return config + + +def get_training_arguments(): + """Unpack training arguments from the config file and return as a + TrainingArguments object.""" + config = load_config("training_config.yaml") + training_args = config["training_arguments"] + return TrainingArguments(**training_args) diff --git a/checkthat/task1/training_scripts/training.py b/checkthat/task1/training_scripts/training.py new file mode 100644 index 0000000..c6e849a --- /dev/null +++ b/checkthat/task1/training_scripts/training.py @@ -0,0 +1,72 @@ +"""Training script for the model. + +This script trains the model for a single seed. +""" +import wandb +from transformers import Trainer, EarlyStoppingCallback +from checkthat.task1.tokenization.tokenizer import TextDataset +from checkthat.task1.models.custom_model import CustomModel +from checkthat.task1.metrics.compute_metrics import compute_metrics +from checkthat.task1.training_scripts.train_config import get_training_arguments +import random +import numpy as np +import torch + + +def set_seed(seed): + """Set seed for reproducibility.""" + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(seed) + + +def run_training(seed, dataset, model_name, tokenizer, label_map): + """Start training the model for a single seed. + + Args: + seed: seed for reproducibility + dataset: dataset dictionary containing train and validation splits + model_name: huggingface model name + tokenizer: huggerface tokenizer/same as model name + label_map: dictionary mapping labels to integers + """ + # Initialize wandb run + set_seed(seed) + run_name = f"{model_name}_{seed}" + wandb.init( + project="Clef2024", + entity="aarnes", + name=run_name, + config={"seed": seed}, + ) + + # Prepare datasets + train_dataset = TextDataset(dataset["train"], tokenizer, label_map) + eval_dataset = TextDataset(dataset["validation"], tokenizer, label_map) + test_dataset = TextDataset(dataset["test"], tokenizer, label_map) + + training_arguments = get_training_arguments() + training_arguments.run_name = ( + run_name # Optional, sync the name with Trainer's internal wandb run + ) + + # Creating a Trainer instance with training arguments and datasets + trainer = Trainer( + model=CustomModel(model_name, num_labels=len(label_map)), + args=training_arguments, + train_dataset=train_dataset, + eval_dataset=eval_dataset, + test_dataset=test_dataset, + compute_metrics=compute_metrics, + callbacks=[ + EarlyStoppingCallback(early_stopping_patience=3) + ], # Early stopping callback + ) + + # Train the model + trainer.train() + + # Finish the wandb run after each seed + wandb.finish() diff --git a/requirements.txt b/requirements.txt index 7cd7337..7f5bd59 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,79 @@ docformatter pre-commit pydocstyle==6.1.1 deep_translator + +accelerate==0.29.3 +aiohttp==3.9.5 +aiosignal==1.3.1 +appdirs==1.4.4 +async-timeout==4.0.3 +attrs==23.2.0 +Brotli==1.0.9 +certifi==2024.2.2 +chardet==4.0.0 +charset-normalizer==2.0.4 +click==8.1.7 +datasets==2.19.0 +dill==0.3.8 +docker-pycreds==0.4.0 +evaluate==0.4.2 +exceptiongroup==1.2.1 +filelock==3.13.1 +frozenlist==1.4.1 +fsspec==2023.10.0 +huggingface-hub==0.22.2 +idna==3.4 +importlib-metadata==7.0.1 +iniconfig==2.0.0 +Jinja2==3.1.3 +joblib==1.4.0 +MarkupSafe==2.1.5 +mpmath==1.3.0 +multidict==6.0.5 +multiprocess==0.70.16 +networkx==3.2.1 +numpy==1.26.4 +packaging==23.2 +pandas==2.2.2 +pillow==10.3.0 +pip==23.3.1 +pluggy==1.5.0 +protobuf==4.25.3 +psutil==5.9.8 +pyarrow==16.0.0 +pyarrow-hotfix==0.6 +PySocks==1.7.1 +pytest==8.1.1 +pytest-mock==3.14.0 +python-dateutil==2.9.0.post0 +pytz==2024.1 +PyYAML==6.0.1 +regex==2023.10.3 +requests==2.31.0 +safetensors==0.4.2 +scikit-learn==1.4.2 +scipy==1.13.0 +sentry-sdk==1.45.0 +setproctitle==1.3.3 +setuptools==68.2.2 +six==1.16.0 +smmap==5.0.1 +sympy==1.12 +threadpoolctl==3.4.0 +tokenizers==0.19.0 +toml==0.10.2 +tomli==2.0.1 +torch==2.2.2 +torchaudio==2.2.2 +torchvision==0.17.2 +tqdm==4.65.0 +transformers==4.40.0 +types-PyYAML==6.0.12.20240311 +typing_extensions==4.11.0 +tzdata==2024.1 +urllib3==2.1.0 +wandb==0.16.6 +wheel==0.41.2 +xxhash==3.4.1 +yarl==1.9.4 +zipp==3.17.0 diff --git a/tests/__init__.py b/tests/__init__.py index f4c5f52..8e84bfd 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1 +1 @@ -"""Module level init for tests.""" +"""Module level init for tests.""" diff --git a/tests/metrics/test_compute_metrics.py b/tests/metrics/test_compute_metrics.py new file mode 100644 index 0000000..ac8d3bb --- /dev/null +++ b/tests/metrics/test_compute_metrics.py @@ -0,0 +1,50 @@ +import pytest +import numpy as np +from unittest.mock import MagicMock +from evaluate import load +from checkthat.task1.metrics.compute_metrics import compute_metrics + + +@pytest.fixture +def mock_metrics(mocker): + """Mock the load function to return predefined metric values. + + The mock function returns predefined values for accuracy, precision, + recall, and f1. + """ + mocker.patch( + "evaluate.load", + side_effect=lambda metric_name: MagicMock( + compute=MagicMock( + return_value={ + "accuracy": 0.75, + "precision": 0.875, + "recall": 0.75, + "f1": 0.7666666666666667, + } + if metric_name in ["accuracy", "precision", "recall", "f1"] + else None + ) + ), + ) + + +def test_compute_metrics(mock_metrics): + """Test the compute_metrics function.""" + logits = np.array( + [ + [0.1, 0.9], # Predicted class 1 + [0.8, 0.2], # Predicted class 0 + [0.6, 0.4], # Predicted class 0 (incorrect, should be 1) + [0.3, 0.7], # Predicted class 1 (correct) + ] + ) + labels = np.array([1, 0, 1, 1]) + eval_pred = (logits, labels) + + results = compute_metrics(eval_pred) + + assert results["accuracy"] == 0.75 + assert results["precision"] == pytest.approx(0.875) + assert results["recall"] == 0.75 + assert results["f1"] == pytest.approx(0.7666666666666667) diff --git a/tests/metrics/test_metrics_logger.py b/tests/metrics/test_metrics_logger.py new file mode 100644 index 0000000..b6191eb --- /dev/null +++ b/tests/metrics/test_metrics_logger.py @@ -0,0 +1,64 @@ +"""Test for the metrics_logger module.""" + +import numpy as np +from unittest.mock import patch +from checkthat.task1.metrics.metrics_logger import ( + compute_custom_metrics, + MetricsLoggerCallback, + get_callbacks, +) # Replace 'your_module' with the actual module name + +# Test for compute_custom_metrics +def test_compute_custom_metrics(): + """Mock test for compute_custom_metrics.""" + # Define mock logits and labels + logits = np.array([[0.1, 0.9], [0.8, 0.2], [0.55, 0.45]]) + labels = np.array([1, 0, 1]) + + # Expected results + precision, recall, f1 = compute_custom_metrics(logits, labels) + + # Assert conditions + assert precision >= 0, "Precision should be non-negative" + assert recall >= 0, "Recall should be non-negative" + assert f1 >= 0, "F1 score should be non-negative" + # You can add more detailed assertions here based on known input and output + + +# Test for MetricsLoggerCallback +@patch( + "checkthat.task1.metrics.metrics_logger.wandb.log" +) # Mock the wandb.log method +def test_metrics_logger_callback(mock_log): + """Mock test for MetricsLoggerCallback.""" + # Create an instance of MetricsLoggerCallback + callback = MetricsLoggerCallback() + + # Create mock arguments + args = None # Depending on the real use case, populate this correctly + state = type( + "state", (object,), {"epoch": 1} + ) # Mock state with an epoch attribute + logits = np.array( + [[10, 0], [0, 10]] + ) # Very clear separation of class predictions + labels = np.array([0, 1]) # Correct labels aligning with logits + + # Execute the on_evaluate method + callback.on_evaluate(args, state, logits=logits, labels=labels) + + # Check that wandb.log was called with expected values + mock_log.assert_called_with( + {"precision": 1.0, "recall": 1.0, "f1_score": 1.0, "epoch": 1} + ) + + +# Test for get_callbacks +def test_get_callbacks(): + """Test for get_callbacks.""" + # Get callback instances + callbacks = get_callbacks(["MetricsLoggerCallback"]) + # Check that the correct callbacks are returned + assert len(callbacks) == 1 and isinstance( + callbacks[0], MetricsLoggerCallback + ), "Should return an instance of MetricsLoggerCallback" diff --git a/tests/models/__init__.py b/tests/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_models/test_transformer_model.py b/tests/models/test_abstract_model.py similarity index 76% rename from tests/test_models/test_transformer_model.py rename to tests/models/test_abstract_model.py index 4381d36..e741ff6 100644 --- a/tests/test_models/test_transformer_model.py +++ b/tests/models/test_abstract_model.py @@ -1,8 +1,7 @@ """Tests for the transformer model.""" - import pytest import torch -from checkthat.task1.models.transformer_model import Model +from checkthat.task1.models.abstract_model import Model class ConcreteModel(Model): @@ -14,9 +13,6 @@ def forward(self, x): This method takes an input tensor and returns an output tensor where each element is doubled. - Args: - x (torch.Tensor): Input tensor. - Returns: torch.Tensor: Output tensor, where each element is doubled. """ @@ -24,11 +20,7 @@ def forward(self, x): def test_model_cannot_be_instantiated(): - """Test if the Model class cannot be instantiated. - - This test checks whether attempting to instantiate the abstract - Model class raises a TypeError, as expected for abstract classes. - """ + """Test if the Model class cannot be instantiated.""" with pytest.raises(TypeError): Model() # Directly test instantiation without assignment diff --git a/tests/models/test_custom_model.py b/tests/models/test_custom_model.py new file mode 100644 index 0000000..380ec6e --- /dev/null +++ b/tests/models/test_custom_model.py @@ -0,0 +1,41 @@ +"""Tests for the CustomModel class.""" +import pytest +from unittest.mock import patch +from transformers import AutoModelForSequenceClassification +import torch +from checkthat.task1.models.custom_model import CustomModel + + +def test_custom_model_initialization(): + """Test for the initialization of the CustomModel class. + + The model should be initialized with the correct model name and + number of labels. + """ + model_name = "bert-base-uncased" + num_labels = 2 + with patch.object( + AutoModelForSequenceClassification, "from_pretrained", return_value=None + ) as mock_method: + model = CustomModel(model_name, num_labels) + mock_method.assert_called_once_with(model_name, num_labels=num_labels) + + +def test_custom_model_forward(): + """Test for the forward method of the CustomModel class. + + The forward method should return a dictionary with the key 'loss' + when labels are provided. + """ + model_name = "bert-base-uncased" + num_labels = 2 + model = CustomModel(model_name, num_labels) + input_ids = torch.randint(0, 1000, (1, 10)) + attention_mask = torch.ones(1, 10) + labels = torch.tensor([1]) + + with patch.object( + AutoModelForSequenceClassification, "from_pretrained", return_value=None + ): + output = model.forward(input_ids, attention_mask, labels) + assert "loss" in output.keys() diff --git a/tests/tokenization/test_normalize_DatasetDict_features.py b/tests/tokenization/test_normalize_DatasetDict_features.py new file mode 100644 index 0000000..ff3228d --- /dev/null +++ b/tests/tokenization/test_normalize_DatasetDict_features.py @@ -0,0 +1,84 @@ +"""Test cases for the normalize_DatasetDict_features function.""" +import pytest +from datasets import DatasetDict, Dataset +from checkthat.task1.tokenization.normalize_DatasetDict_featues import ( + rename_features, +) + + +@pytest.fixture +def sample_data(): + """Fixture providing sample data.""" + # Create a DatasetDict with sample data + train_data = { + "tweet_id": [1, 2, 3], + "tweet_text": ["text1", "text2", "text3"], + "class_label": [0, 1, 0], + } + validation_data = { + "tweet_id": [4, 5], + "tweet_text": ["text4", "text5"], + "class_label": [1, 0], + } + test_data = { + "tweet_id": [6, 7], + "tweet_text": ["text6", "text7"], + "class_label": [0, 1], + } + return DatasetDict( + { + "train": Dataset.from_dict(train_data), + "validation": Dataset.from_dict(validation_data), + "test": Dataset.from_dict(test_data), + } + ) + + +@pytest.fixture +def expected_data(): + """Fixture providing the expected data after renaming 'tweet_text' to + 'Text'.""" + # Define the expected result after renaming 'tweet_text' to 'Text' + train_data = { + "tweet_id": [1, 2, 3], + "Text": ["text1", "text2", "text3"], + "class_label": [0, 1, 0], + } + validation_data = { + "tweet_id": [4, 5], + "Text": ["text4", "text5"], + "class_label": [1, 0], + } + test_data = { + "tweet_id": [6, 7], + "Text": ["text6", "text7"], + "class_label": [0, 1], + } + return DatasetDict( + { + "train": Dataset.from_dict(train_data), + "validation": Dataset.from_dict(validation_data), + "test": Dataset.from_dict(test_data), + } + ) + + +def test_rename_features(sample_data, expected_data): + """Test for the rename_features function.""" + # Call the function to rename features + result = rename_features(sample_data) + + # Compare individual datasets within result and expected_data + for split_name in sample_data.keys(): + result_dataset = result[split_name] + expected_dataset = expected_data[split_name] + + # Check if feature names are the same + assert result_dataset.features == expected_dataset.features + + # Check if number of rows is the same + assert len(result_dataset) == len(expected_dataset) + + # Check if each row in result matches corresponding row in expected_data + for result_row, expected_row in zip(result_dataset, expected_dataset): + assert result_row == expected_row diff --git a/tests/tokenization/test_tokenizer.py b/tests/tokenization/test_tokenizer.py new file mode 100644 index 0000000..3973e75 --- /dev/null +++ b/tests/tokenization/test_tokenizer.py @@ -0,0 +1,34 @@ +import pytest +from checkthat.task1.tokenization.tokenizer import TextDataset +from transformers import AutoTokenizer + + +def test_text_dataset_length(): + """Test the length of the TextDataset. + + The length of the dataset should be equal to the number of data + samples. + """ + data = [{"Text": "Example text", "class_label": "Yes"}] + tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") + label_map = {"Yes": 1} + + dataset = TextDataset(data, tokenizer, label_map) + assert len(dataset) == 1 + + +def test_text_dataset_getitem(): + """Test the __getitem__ method of the TextDataset. + + The __getitem__ method should return a dictionary with the keys + 'input_ids', 'attention_mask', and 'labels'. + """ + data = [{"Text": "Example text", "class_label": "Yes"}] + tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") + label_map = {"Yes": 1} + + dataset = TextDataset(data, tokenizer, label_map) + item = dataset[0] + assert "input_ids" in item + assert "attention_mask" in item + assert "labels" in item diff --git a/tests/training_scripts/test_train_config.py b/tests/training_scripts/test_train_config.py new file mode 100644 index 0000000..a41a361 --- /dev/null +++ b/tests/training_scripts/test_train_config.py @@ -0,0 +1,23 @@ +import pytest +from unittest.mock import patch, mock_open +from checkthat.task1.training_scripts.train_config import ( + load_config, + get_training_arguments, +) + + +def test_load_config(): + """Test for load_config.""" + with patch("builtins.open", mock_open(read_data="training_arguments: {}")): + config = load_config("dummy_path") + assert config == {"training_arguments": {}} + + +def test_get_training_arguments(): + """Test for get_training_arguments.""" + with patch( + "checkthat.task1.training_scripts.train_config.load_config", + return_value={"training_arguments": {"output_dir": "test"}}, + ): + training_args = get_training_arguments() + assert training_args.output_dir == "test" diff --git a/tests/training_scripts/test_training.py b/tests/training_scripts/test_training.py new file mode 100644 index 0000000..5c743fc --- /dev/null +++ b/tests/training_scripts/test_training.py @@ -0,0 +1,34 @@ +import pytest +from unittest.mock import patch, MagicMock +from checkthat.task1.training_scripts.training import run_training + + +@patch("checkthat.task1.training_scripts.training.wandb.init") +@patch("checkthat.task1.training_scripts.training.Trainer") +@patch( + "checkthat.task1.training_scripts.train_config.load_config" +) # Mock load_config function +def test_run_training(mock_load_config, mock_trainer, mock_wandb_init): + # Setup the mocks + mock_trainer.return_value.train.return_value = None + mock_wandb_init.return_value = None + mock_load_config.return_value = { + "training_arguments": { + "output_dir": "some/path", + "evaluation_strategy": "steps", + # Add other needed arguments + } + } + + # Call the function + run_training( + seed=42, + dataset={"train": [], "validation": [], "test": []}, + model_name="bert-base-uncased", + tokenizer=MagicMock(), + label_map={}, + ) + + # Assertions + mock_wandb_init.assert_called_once() + mock_trainer.assert_called_once() From ce8166b0b02b286ec65ba369c570487411e958cc Mon Sep 17 00:00:00 2001 From: = Date: Wed, 1 May 2024 19:15:58 +0200 Subject: [PATCH 03/16] Small fixes --- checkthat/task1/main_train_all.py | 27 ++++++++++++------- checkthat/task1/models/custom_model.py | 4 ++- checkthat/task1/training_config.yaml | 8 +++--- .../task1/training_scripts/train_config.py | 2 +- checkthat/task1/training_scripts/training.py | 17 +++++++----- requirements.txt | 1 - 6 files changed, 36 insertions(+), 23 deletions(-) diff --git a/checkthat/task1/main_train_all.py b/checkthat/task1/main_train_all.py index 2725a2f..97e37dd 100644 --- a/checkthat/task1/main_train_all.py +++ b/checkthat/task1/main_train_all.py @@ -1,8 +1,8 @@ """Main training script for training on all languages.""" from datasets import load_dataset from tokenization.normalize_DatasetDict_featues import rename_features -from transformers import AutoTokenizer from training_scripts.training import run_training +from transformers import AutoTokenizer def main(): @@ -23,15 +23,24 @@ def main(): tokenizer = AutoTokenizer.from_pretrained(model_name_en) - for seed, dataset in zip(seeds, dataset_list): - dataset = load_dataset(dataset) - # Normalize dataset features if not already normalized (intended for twitter dataset) - if dataset["train"]["tweet_text"]: - dataset = rename_features(dataset) - tokenizer = AutoTokenizer.from_pretrained(multilingual_model) - - run_training(seed, dataset, model_name_en, tokenizer, label_map) + for dataset in dataset_list: + for seed in seeds: + dataset = load_dataset(dataset) + # Normalize dataset features if not already normalized (intended for twitter dataset) + if "tweet_text" in dataset["train"].column_names: + dataset = rename_features(dataset) + tokenizer = AutoTokenizer.from_pretrained(multilingual_model) + run_training(seed, dataset, multilingual_model, tokenizer, label_map) + else: + run_training(seed, dataset, model_name_en, tokenizer, label_map) if __name__ == "__main__": + import torch + + print(torch.cuda.is_available()) + print(torch.cuda.current_device()) + print(torch.cuda.device(0)) + print(torch.cuda.device_count()) + print(torch.cuda.get_device_name(0)) main() diff --git a/checkthat/task1/models/custom_model.py b/checkthat/task1/models/custom_model.py index cb5ed31..07f7cf6 100644 --- a/checkthat/task1/models/custom_model.py +++ b/checkthat/task1/models/custom_model.py @@ -5,7 +5,7 @@ class CustomModel(Model): - def __init__(self, model_name: str, num_labels: int): + def __init__(self, model_name: str, num_labels: int, device: str): """Constructor for the CustomModel class. Args: @@ -13,6 +13,8 @@ def __init__(self, model_name: str, num_labels: int): num_labels (int): Number of labels in the dataset """ super(CustomModel, self).__init__() + if device is not None: + self.to(device) self.model = AutoModelForSequenceClassification.from_pretrained( model_name, num_labels=num_labels ) diff --git a/checkthat/task1/training_config.yaml b/checkthat/task1/training_config.yaml index a9b9f41..b74fa79 100644 --- a/checkthat/task1/training_config.yaml +++ b/checkthat/task1/training_config.yaml @@ -1,12 +1,12 @@ training_arguments: - evaluation_strategy: 'IntervalStrategy.epoch' # To change evaluation strategy comment out the line and uncomment the next line two lines + evaluation_strategy: 'epoch' # To change evaluation strategy comment out the line and uncomment the next line two lines # eval_steps: 500 # evaluation_strategy: 'steps' # evaluate after some number of steps output_dir: './results' # output directory save_total_limit: 5 # number of maximum checkpoints to save num_train_epochs: 2 # number of training epochs - per_device_train_batch_size: 32 # batch size for training - per_device_eval_batch_size: 32 # batch size for evaluation + per_device_train_batch_size: 16 # batch size for training + per_device_eval_batch_size: 16 # batch size for evaluation warmup_steps: 500 # number of warmup steps for learning rate scheduler weight_decay: 0.01 # strength of weight decay logging_dir: './logs' # directory for storing logs @@ -14,5 +14,5 @@ training_arguments: load_best_model_at_end: True metric_for_best_model: 'loss' # metric to use for saving best model report_to: 'wandb' # report to wandb - + save_strategy: 'epoch' # save model after each epoch diff --git a/checkthat/task1/training_scripts/train_config.py b/checkthat/task1/training_scripts/train_config.py index 96c8651..69121ba 100644 --- a/checkthat/task1/training_scripts/train_config.py +++ b/checkthat/task1/training_scripts/train_config.py @@ -14,6 +14,6 @@ def load_config(file_path): def get_training_arguments(): """Unpack training arguments from the config file and return as a TrainingArguments object.""" - config = load_config("training_config.yaml") + config = load_config("checkthat/task1/training_config.yaml") training_args = config["training_arguments"] return TrainingArguments(**training_args) diff --git a/checkthat/task1/training_scripts/training.py b/checkthat/task1/training_scripts/training.py index c6e849a..b8847c4 100644 --- a/checkthat/task1/training_scripts/training.py +++ b/checkthat/task1/training_scripts/training.py @@ -4,14 +4,18 @@ """ import wandb from transformers import Trainer, EarlyStoppingCallback -from checkthat.task1.tokenization.tokenizer import TextDataset -from checkthat.task1.models.custom_model import CustomModel -from checkthat.task1.metrics.compute_metrics import compute_metrics -from checkthat.task1.training_scripts.train_config import get_training_arguments +from tokenization.tokenizer import TextDataset +from models.custom_model import CustomModel +from metrics.compute_metrics import compute_metrics +from training_scripts.train_config import get_training_arguments import random import numpy as np import torch - +import os +import torch.cuda +import torch +torch.backends.cuda.matmul.allow_tf32 = True +torch.backends.cudnn.allow_tf32 = True def set_seed(seed): """Set seed for reproducibility.""" @@ -54,11 +58,10 @@ def run_training(seed, dataset, model_name, tokenizer, label_map): # Creating a Trainer instance with training arguments and datasets trainer = Trainer( - model=CustomModel(model_name, num_labels=len(label_map)), + model=CustomModel(model_name, num_labels=len(label_map), device='cuda'), args=training_arguments, train_dataset=train_dataset, eval_dataset=eval_dataset, - test_dataset=test_dataset, compute_metrics=compute_metrics, callbacks=[ EarlyStoppingCallback(early_stopping_patience=3) diff --git a/requirements.txt b/requirements.txt index 7f5bd59..0bb079a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -41,7 +41,6 @@ numpy==1.26.4 packaging==23.2 pandas==2.2.2 pillow==10.3.0 -pip==23.3.1 pluggy==1.5.0 protobuf==4.25.3 psutil==5.9.8 From 856c8df6bb85c78da11c54e1819303dc17b506e7 Mon Sep 17 00:00:00 2001 From: = Date: Wed, 1 May 2024 21:42:14 +0200 Subject: [PATCH 04/16] Optimized code and fixed save for main_train_all.py so folders get organised for each model and language --- checkthat/task1/main_train_all.py | 31 ++++++++---------- checkthat/task1/training_config.yaml | 4 +-- .../task1/training_scripts/train_config.py | 18 ++++++---- checkthat/task1/training_scripts/training.py | 6 ++-- requirements.txt | Bin 1336 -> 3520 bytes 5 files changed, 30 insertions(+), 29 deletions(-) diff --git a/checkthat/task1/main_train_all.py b/checkthat/task1/main_train_all.py index 97e37dd..9498045 100644 --- a/checkthat/task1/main_train_all.py +++ b/checkthat/task1/main_train_all.py @@ -3,37 +3,34 @@ from tokenization.normalize_DatasetDict_featues import rename_features from training_scripts.training import run_training from transformers import AutoTokenizer - +from training_scripts.train_config import get_training_arguments def main(): - en, ar, es, nl = ( + dataset_list = [ "iai-group/clef2024_checkthat_task1_en", "iai-group/clef2024_checkthat_task1_ar", "iai-group/clef2024_checkthat_task1_es", "iai-group/clef2024_checkthat_task1_nl", - ) - - dataset_list = [en, ar, es, nl] - label_map = {"No": 0, "Yes": 1} # Label map for the dataset - - model_name_en = "FacebookAI/roberta-large" - multilingual_model = "FacebookAI/xlm-roberta-large" - - seeds = [42, 81, 1024, 6, 10] # Seeds for reproducibility + ] + label_map = {"No": 0, "Yes": 1} + model_name_en = "distilbert/distilroberta-base" + multilingual_model = "FacebookAI/xlm-roberta-base" + seeds = [42, 81, 1024, 6, 10] tokenizer = AutoTokenizer.from_pretrained(model_name_en) - for dataset in dataset_list: + for dataset_name in dataset_list: for seed in seeds: - dataset = load_dataset(dataset) - # Normalize dataset features if not already normalized (intended for twitter dataset) + dataset = load_dataset(dataset_name) if "tweet_text" in dataset["train"].column_names: dataset = rename_features(dataset) tokenizer = AutoTokenizer.from_pretrained(multilingual_model) - run_training(seed, dataset, multilingual_model, tokenizer, label_map) + training_args = get_training_arguments(multilingual_model, seed, dataset_name) + # run training with these arguments else: - run_training(seed, dataset, model_name_en, tokenizer, label_map) - + training_args = get_training_arguments(model_name_en, seed, dataset_name) + run_training(seed, dataset, model_name_en, tokenizer, label_map, training_args) + # run training with these arguments if __name__ == "__main__": import torch diff --git a/checkthat/task1/training_config.yaml b/checkthat/task1/training_config.yaml index b74fa79..3480178 100644 --- a/checkthat/task1/training_config.yaml +++ b/checkthat/task1/training_config.yaml @@ -1,10 +1,11 @@ training_arguments: + save_strategy: 'epoch' # save model after each epoch evaluation_strategy: 'epoch' # To change evaluation strategy comment out the line and uncomment the next line two lines # eval_steps: 500 # evaluation_strategy: 'steps' # evaluate after some number of steps output_dir: './results' # output directory save_total_limit: 5 # number of maximum checkpoints to save - num_train_epochs: 2 # number of training epochs + num_train_epochs: 5 # number of training epochs per_device_train_batch_size: 16 # batch size for training per_device_eval_batch_size: 16 # batch size for evaluation warmup_steps: 500 # number of warmup steps for learning rate scheduler @@ -14,5 +15,4 @@ training_arguments: load_best_model_at_end: True metric_for_best_model: 'loss' # metric to use for saving best model report_to: 'wandb' # report to wandb - save_strategy: 'epoch' # save model after each epoch diff --git a/checkthat/task1/training_scripts/train_config.py b/checkthat/task1/training_scripts/train_config.py index 69121ba..277f7fb 100644 --- a/checkthat/task1/training_scripts/train_config.py +++ b/checkthat/task1/training_scripts/train_config.py @@ -1,19 +1,23 @@ -"""Module to load training arguments from a yaml file.""" - import yaml from transformers import TrainingArguments - def load_config(file_path): """Load configuration from a yaml file.""" with open(file_path, "r") as file: config = yaml.safe_load(file) return config - -def get_training_arguments(): +def get_training_arguments(model_name, seed, dataset_name): """Unpack training arguments from the config file and return as a - TrainingArguments object.""" + TrainingArguments object, with dynamically adjusted output directory based on model name, seed, and dataset.""" config = load_config("checkthat/task1/training_config.yaml") training_args = config["training_arguments"] - return TrainingArguments(**training_args) + + # Extract a short language identifier from the dataset name + language_code = dataset_name.split('_')[-1] # Assuming the dataset name ends with a language code + + # Modify the output_dir dynamically + model_name_safe = model_name.replace('/', '_') # Replace '/' with '_' for filesystem compatibility + training_args['output_dir'] = f"./results/{model_name_safe}_seed_{seed}_{language_code}" + + return TrainingArguments(**training_args) \ No newline at end of file diff --git a/checkthat/task1/training_scripts/training.py b/checkthat/task1/training_scripts/training.py index b8847c4..3b9a744 100644 --- a/checkthat/task1/training_scripts/training.py +++ b/checkthat/task1/training_scripts/training.py @@ -26,7 +26,7 @@ def set_seed(seed): torch.cuda.manual_seed_all(seed) -def run_training(seed, dataset, model_name, tokenizer, label_map): +def run_training(seed, dataset, model_name, tokenizer, label_map, training_arguments): """Start training the model for a single seed. Args: @@ -44,14 +44,14 @@ def run_training(seed, dataset, model_name, tokenizer, label_map): entity="aarnes", name=run_name, config={"seed": seed}, - ) + ) # Prepare datasets train_dataset = TextDataset(dataset["train"], tokenizer, label_map) eval_dataset = TextDataset(dataset["validation"], tokenizer, label_map) test_dataset = TextDataset(dataset["test"], tokenizer, label_map) - training_arguments = get_training_arguments() + # training_arguments = get_training_arguments() training_arguments.run_name = ( run_name # Optional, sync the name with Trainer's internal wandb run ) diff --git a/requirements.txt b/requirements.txt index 0bb079ace081eaecc8180d3892136ecee15c0951..5d2c97de202f66d51c1e76a6f556f6e2f28e23e0 100644 GIT binary patch literal 3520 zcmZ{nOOF#t5QOWDw7&!y;r1{LhdnHpl~y1japJV{8-pK>+lHC>@$ANz(dlZ>va*JG zWL9QmWMo$H&(CR@>$5CdeI3iRob>&7eO{Iqr73Up@1*=zp2@N)hw@qWQ)$CvA{+Ks z1A8a0b=k{%8$RPW|Due0pH^oV@;R37V!x5kO>ghYgHF%ONLe;zCoG3@*3Tfq4^)W< zBpIjCU%`t9XlFV(jgy9`Qj|qD&(Wr>O@_x8bpn<)km?bgzg%j*c4_c$Z?7UJ+?8 zZ&*A<N#*%<=5UX%e^8{CyG3%%C+KIOPSrwkR1guNwAyTQ{_1H=_y#r zIt_$(f#|jDuy0mZ5uL8?%_D{t=S(%$TIWxy+>E+vA~?pMVf`S6 zPEsJ1k;gI6f+yp%xLND7iON?(4jUt#IQMbM*M$14^i3#vAAGG8ov1KGk0!mo@L@}2 zM(cx#Z-s{@V$`Tvj#@R&pCB8K{eih3U)y)9(3BvA4k&6EfFN z_}qrQ#!sI%6har!K+Q$QanJ8gyrcchH6AD|Z6igvS1y#s-iE)rl`D69d7R3R=*w4~ zAST$W>e)!%2K#=)!3#Ub_3c$33{*T#ROha@;)Pk6UQ|Yg+~}}}$37HLHxb#bH>~hF zoCkAJ5w@z2&2vy4<^+%0fmSx6&oMX8rWGW1|xxxttZcaZwM>xl(kI>-)w zP{yIc>7^$PPw&@jKQdo?)g9-KrCyBfgW`NtM!H@V2R}6rx+Dwi_vNm)_OrHop{`cNwhYK-k}U{bGq`>6IMI}@LgwsyPmCm zlzpVA3&l*oo7moo3rx9%P$=8liXk5nT_sz^ELa`p$(WgrsiKg<^ON20gaTF1WM^jS z$ug!XZ+y>w&Ro9i$i{tMwd>8yoAP@5@bUu%+MpBME_9z)5!=@jM;%p5Oe?Pn?i#E0QYgVJ!5Qx1JT}{o0i0`rBDi8hu^+@xEnG9iE)H_WCZ5 z-0nnnMrJKBeN&WO#Gu;SL8t4xF83lGaNYRcL^P{{y}^5=lXm`pKHND^(@Uu&-y%Nk zH^$$J^6PtvoPJMc&PQQqZ~G-t-`(cw9D2U)Tje12c2-U3jN5;5;S$9<3k>oi;f;vS z^Nu4MEPDIF`VV<?ipOX zu{T=0x$zsNL%0ZSM>=SZc8Pg2BAh8@Zfx8(Cy0Tk$l-aK&TZfJG&85~l1(VyNX$gW zTK1fcAf#)D_jV-X;wqA+X>8xy7d4B!2IHrxZ%5QHDa|eyHBaq{OgVyW#BCgRnWj_Q zX$=vL7{+#JJNi3E@lF%E_MI$eaSP0$8&2>KWLJ}C{!H$wSy-B&n7n&%Ml4cKWWBSu zX?g>eSDK9&glU_$e@C0yc?^m6?VD0^#oW#X&&2b0f z9^>@1IL{vW%0RD(mmD7)yek+{LF1*A#I`~}K!?)ne!aSIU5w>s-7l3*#pdQ)E=aFDBpi^vx_#4AVlP zEkXKK{UZk1b}w#r7PFpF9d3O{Ia)52h*z{cQ(*as`F7VP3OejtIw}Ka%$l%iM)9Br z{IJ7pD$|N7f#~>wVCA}_DXv(GO6@$$tM`~s?^9EnTw2f-+h8}G)xS1;D-JKvQ}6vmj)Ln)TW^A_rN z(9@nhwtduD)cb+WtJefh&h)V|S4=0ID&6m^DZrr&^n^u%tB3L@dbm_SFpS>O{~$V( AjsO4v From cc4c9705a77300aee5a8c9846d99e3c8fe078589 Mon Sep 17 00:00:00 2001 From: = Date: Thu, 2 May 2024 00:04:42 +0200 Subject: [PATCH 05/16] Small fix and changed traning config to evaluate on step instead of epoch --- checkthat/task1/main_train_all.py | 7 +++---- checkthat/task1/training_config.yaml | 10 +++++----- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/checkthat/task1/main_train_all.py b/checkthat/task1/main_train_all.py index 9498045..500573a 100644 --- a/checkthat/task1/main_train_all.py +++ b/checkthat/task1/main_train_all.py @@ -14,8 +14,8 @@ def main(): ] label_map = {"No": 0, "Yes": 1} - model_name_en = "distilbert/distilroberta-base" - multilingual_model = "FacebookAI/xlm-roberta-base" + model_name_en = "FacebookAI/roberta-large" + multilingual_model = "FacebookAI/xlm-roberta-large" seeds = [42, 81, 1024, 6, 10] tokenizer = AutoTokenizer.from_pretrained(model_name_en) @@ -26,11 +26,10 @@ def main(): dataset = rename_features(dataset) tokenizer = AutoTokenizer.from_pretrained(multilingual_model) training_args = get_training_arguments(multilingual_model, seed, dataset_name) - # run training with these arguments + run_training(seed, dataset, model_name_en, tokenizer, label_map, training_args) else: training_args = get_training_arguments(model_name_en, seed, dataset_name) run_training(seed, dataset, model_name_en, tokenizer, label_map, training_args) - # run training with these arguments if __name__ == "__main__": import torch diff --git a/checkthat/task1/training_config.yaml b/checkthat/task1/training_config.yaml index 3480178..a4e9d66 100644 --- a/checkthat/task1/training_config.yaml +++ b/checkthat/task1/training_config.yaml @@ -1,11 +1,11 @@ training_arguments: - save_strategy: 'epoch' # save model after each epoch - evaluation_strategy: 'epoch' # To change evaluation strategy comment out the line and uncomment the next line two lines - # eval_steps: 500 - # evaluation_strategy: 'steps' # evaluate after some number of steps + # save_strategy: 'epoch' # save model after each epoch + # evaluation_strategy: 'epoch' # To change evaluation strategy comment out the line and uncomment the next line two lines + eval_steps: 500 + evaluation_strategy: 'steps' # evaluate after some number of steps output_dir: './results' # output directory save_total_limit: 5 # number of maximum checkpoints to save - num_train_epochs: 5 # number of training epochs + num_train_epochs: 50 # number of training epochs per_device_train_batch_size: 16 # batch size for training per_device_eval_batch_size: 16 # batch size for evaluation warmup_steps: 500 # number of warmup steps for learning rate scheduler From 232757ab8db90dba3376a842851f14c873a530a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20R=C3=B8ysland=20Aarnes?= Date: Thu, 2 May 2024 10:06:40 +0200 Subject: [PATCH 06/16] Added so that wandb will report what language model is being used --- checkthat/task1/training_scripts/__init__.py | 1 + checkthat/task1/training_scripts/train_config.py | 7 ++++++- checkthat/task1/training_scripts/training.py | 5 +++-- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/checkthat/task1/training_scripts/__init__.py b/checkthat/task1/training_scripts/__init__.py index 422193e..57c4fe1 100644 --- a/checkthat/task1/training_scripts/__init__.py +++ b/checkthat/task1/training_scripts/__init__.py @@ -2,3 +2,4 @@ from .training import run_training from .train_config import get_training_arguments +from .train_config import get_language diff --git a/checkthat/task1/training_scripts/train_config.py b/checkthat/task1/training_scripts/train_config.py index 277f7fb..9de1546 100644 --- a/checkthat/task1/training_scripts/train_config.py +++ b/checkthat/task1/training_scripts/train_config.py @@ -20,4 +20,9 @@ def get_training_arguments(model_name, seed, dataset_name): model_name_safe = model_name.replace('/', '_') # Replace '/' with '_' for filesystem compatibility training_args['output_dir'] = f"./results/{model_name_safe}_seed_{seed}_{language_code}" - return TrainingArguments(**training_args) \ No newline at end of file + return TrainingArguments(**training_args) + +def get_language(dataset_name): + """Extract the language code from the dataset name.""" + dataset_language = dataset_name.split('_')[-1] # Assuming the dataset name ends with a language code + return dataset_language \ No newline at end of file diff --git a/checkthat/task1/training_scripts/training.py b/checkthat/task1/training_scripts/training.py index 3b9a744..f34fb2d 100644 --- a/checkthat/task1/training_scripts/training.py +++ b/checkthat/task1/training_scripts/training.py @@ -8,6 +8,7 @@ from models.custom_model import CustomModel from metrics.compute_metrics import compute_metrics from training_scripts.train_config import get_training_arguments +from training_scripts.train_config import get_language import random import numpy as np import torch @@ -26,7 +27,7 @@ def set_seed(seed): torch.cuda.manual_seed_all(seed) -def run_training(seed, dataset, model_name, tokenizer, label_map, training_arguments): +def run_training(seed, dataset, model_name, tokenizer, label_map, training_arguments, dataset_language): """Start training the model for a single seed. Args: @@ -38,7 +39,7 @@ def run_training(seed, dataset, model_name, tokenizer, label_map, training_argum """ # Initialize wandb run set_seed(seed) - run_name = f"{model_name}_{seed}" + run_name = f"{model_name}_{seed}_{dataset_language}" wandb.init( project="Clef2024", entity="aarnes", From aac9f8f4c076b6043549d9c607aa55d4e5434e8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20R=C3=B8ysland=20Aarnes?= Date: Thu, 2 May 2024 14:10:58 +0200 Subject: [PATCH 07/16] Fixing some errors in logic --- checkthat/task1/main_train_all.py | 6 +-- checkthat/task1/metrics/metrics_logger.py | 6 +-- checkthat/task1/test_scripts/__init__.py | 0 .../test_scripts/load_from_checkpoints.py | 37 +++++++++++++++++++ .../task1/test_scripts/test_devtest_test.py | 15 ++++++++ checkthat/task1/training_config.yaml | 13 ++++--- checkthat/task1/training_scripts/training.py | 2 - 7 files changed, 65 insertions(+), 14 deletions(-) create mode 100644 checkthat/task1/test_scripts/__init__.py create mode 100644 checkthat/task1/test_scripts/load_from_checkpoints.py create mode 100644 checkthat/task1/test_scripts/test_devtest_test.py diff --git a/checkthat/task1/main_train_all.py b/checkthat/task1/main_train_all.py index 500573a..6a30194 100644 --- a/checkthat/task1/main_train_all.py +++ b/checkthat/task1/main_train_all.py @@ -12,15 +12,15 @@ def main(): "iai-group/clef2024_checkthat_task1_es", "iai-group/clef2024_checkthat_task1_nl", ] - label_map = {"No": 0, "Yes": 1} + label_map = {"Yes": 1, "No": 0} model_name_en = "FacebookAI/roberta-large" multilingual_model = "FacebookAI/xlm-roberta-large" seeds = [42, 81, 1024, 6, 10] tokenizer = AutoTokenizer.from_pretrained(model_name_en) - for dataset_name in dataset_list: - for seed in seeds: + for seed in seeds: + for dataset_name in dataset_list: dataset = load_dataset(dataset_name) if "tweet_text" in dataset["train"].column_names: dataset = rename_features(dataset) diff --git a/checkthat/task1/metrics/metrics_logger.py b/checkthat/task1/metrics/metrics_logger.py index fedcd90..309b438 100644 --- a/checkthat/task1/metrics/metrics_logger.py +++ b/checkthat/task1/metrics/metrics_logger.py @@ -20,9 +20,9 @@ def compute_custom_metrics(logits, labels): predictions = np.argmax(logits, axis=1) # Convert logits to predictions # Calculate metrics - precision = precision_score(labels, predictions, average="binary") - recall = recall_score(labels, predictions, average="binary") - f1 = f1_score(labels, predictions, average="binary") + precision = precision_score(labels, predictions, average="macro", pos_label=1) + recall = recall_score(labels, predictions, average="macro", pos_label=1) + f1 = f1_score(labels, predictions, average="macro", pos_label=1) return precision, recall, f1 diff --git a/checkthat/task1/test_scripts/__init__.py b/checkthat/task1/test_scripts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/checkthat/task1/test_scripts/load_from_checkpoints.py b/checkthat/task1/test_scripts/load_from_checkpoints.py new file mode 100644 index 0000000..eee440f --- /dev/null +++ b/checkthat/task1/test_scripts/load_from_checkpoints.py @@ -0,0 +1,37 @@ +import os +from transformers import AutoModelForSequenceClassification +import torch + +def find_latest_checkpoint(model_dir): + """Find the latest checkpoint in the given directory.""" + checkpoint_dirs = [os.path.join(model_dir, d) for d in os.listdir(model_dir) if os.path.isdir(os.path.join(model_dir, d)) and 'checkpoint' in d] + if not checkpoint_dirs: + raise ValueError("No checkpoint directories found in the given model directory.") + + # Sort directories to find the one with the highest step (assuming naming convention includes "checkpoint-") + latest_checkpoint = sorted(checkpoint_dirs, key=lambda x: int(x.split('-')[-1]))[-1] + return latest_checkpoint + +def load_model_from_dir(base_dir): + """Load models from a structured directory of models. + Args: + base_dir (str): Directory containing subdirectories of models named like 'FacebookAI/xlm-roberta-base_10_en' + Returns: + models (dict): Dictionary with keys as model names and values as loaded model objects. + """ + models = {} + for model_name in os.listdir(base_dir): + model_path = os.path.join(base_dir, model_name) + if not os.path.isdir(model_path): + continue + + try: + latest_checkpoint = find_latest_checkpoint(model_path) + model = AutoModelForSequenceClassification.from_pretrained(latest_checkpoint) + models[model_name] = model + print(f"Loaded model from {latest_checkpoint}") + except Exception as e: + print(f"Failed to load model from {model_path}: {str(e)}") + + return models + diff --git a/checkthat/task1/test_scripts/test_devtest_test.py b/checkthat/task1/test_scripts/test_devtest_test.py new file mode 100644 index 0000000..456abac --- /dev/null +++ b/checkthat/task1/test_scripts/test_devtest_test.py @@ -0,0 +1,15 @@ +from load_from_checkpoints import load_model_from_dir + + +base_dir = "./results" +models = load_model_from_dir(base_dir) + + +dataset_list = [ + "iai-group/clef2024_checkthat_task1_en", + "iai-group/clef2024_checkthat_task1_ar", + ] + + + +dataset = load_dataset(dataset_name) diff --git a/checkthat/task1/training_config.yaml b/checkthat/task1/training_config.yaml index a4e9d66..a6680ad 100644 --- a/checkthat/task1/training_config.yaml +++ b/checkthat/task1/training_config.yaml @@ -1,11 +1,12 @@ training_arguments: - # save_strategy: 'epoch' # save model after each epoch - # evaluation_strategy: 'epoch' # To change evaluation strategy comment out the line and uncomment the next line two lines - eval_steps: 500 - evaluation_strategy: 'steps' # evaluate after some number of steps + save_strategy: 'epoch' # save model after each epoch + evaluation_strategy: 'epoch' # To change evaluation strategy comment out the line and uncomment the next line two lines + greater_is_better: True + # eval_steps: 500 + # evaluation_strategy: 'steps' # evaluate after some number of steps output_dir: './results' # output directory save_total_limit: 5 # number of maximum checkpoints to save - num_train_epochs: 50 # number of training epochs + num_train_epochs: 100 # number of training epochs per_device_train_batch_size: 16 # batch size for training per_device_eval_batch_size: 16 # batch size for evaluation warmup_steps: 500 # number of warmup steps for learning rate scheduler @@ -13,6 +14,6 @@ training_arguments: logging_dir: './logs' # directory for storing logs logging_steps: 10 load_best_model_at_end: True - metric_for_best_model: 'loss' # metric to use for saving best model + metric_for_best_model: "f1" # metric to use for saving best model report_to: 'wandb' # report to wandb diff --git a/checkthat/task1/training_scripts/training.py b/checkthat/task1/training_scripts/training.py index f34fb2d..5cfa9d9 100644 --- a/checkthat/task1/training_scripts/training.py +++ b/checkthat/task1/training_scripts/training.py @@ -12,7 +12,6 @@ import random import numpy as np import torch -import os import torch.cuda import torch torch.backends.cuda.matmul.allow_tf32 = True @@ -50,7 +49,6 @@ def run_training(seed, dataset, model_name, tokenizer, label_map, training_argum # Prepare datasets train_dataset = TextDataset(dataset["train"], tokenizer, label_map) eval_dataset = TextDataset(dataset["validation"], tokenizer, label_map) - test_dataset = TextDataset(dataset["test"], tokenizer, label_map) # training_arguments = get_training_arguments() training_arguments.run_name = ( From d3f35a6f02d91fb2bf153e7aed151cb6146eb412 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20R=C3=B8ysland=20Aarnes?= Date: Thu, 2 May 2024 15:35:31 +0200 Subject: [PATCH 08/16] Creating run tests that should handle both labeled and unlabeled data --- .gitignore | 5 ++- checkthat/task1/main_train_all.py | 20 +++++++++++ .../test_scripts/load_from_checkpoints.py | 1 + checkthat/task1/test_scripts/run_tests.py | 34 +++++++++++++++++++ .../task1/test_scripts/test_devtest_test.py | 15 -------- checkthat/task1/training_config.yaml | 5 ++- checkthat/task1/training_scripts/training.py | 11 +++--- 7 files changed, 66 insertions(+), 25 deletions(-) create mode 100644 checkthat/task1/test_scripts/run_tests.py delete mode 100644 checkthat/task1/test_scripts/test_devtest_test.py diff --git a/.gitignore b/.gitignore index 4059efb..1f08207 100644 --- a/.gitignore +++ b/.gitignore @@ -129,4 +129,7 @@ dmypy.json .pyre/ # Exclude .DS_Store files everywhere -*.DS_Store \ No newline at end of file +*.DS_Store + +# Exclude node_modules +.vscode/ \ No newline at end of file diff --git a/checkthat/task1/main_train_all.py b/checkthat/task1/main_train_all.py index 6a30194..cb40b3f 100644 --- a/checkthat/task1/main_train_all.py +++ b/checkthat/task1/main_train_all.py @@ -4,6 +4,8 @@ from training_scripts.training import run_training from transformers import AutoTokenizer from training_scripts.train_config import get_training_arguments +from test_scripts.load_from_checkpoints import load_model_from_dir +from test_scripts.run_tests import run_testing def main(): dataset_list = [ @@ -19,6 +21,7 @@ def main(): seeds = [42, 81, 1024, 6, 10] tokenizer = AutoTokenizer.from_pretrained(model_name_en) + """Training model on trainset for each seed and each language""" for seed in seeds: for dataset_name in dataset_list: dataset = load_dataset(dataset_name) @@ -31,6 +34,23 @@ def main(): training_args = get_training_arguments(model_name_en, seed, dataset_name) run_training(seed, dataset, model_name_en, tokenizer, label_map, training_args) + + + """Testing model on testset""" + base_dir = "./results" + models = load_model_from_dir(base_dir) + for model_name, model in models.items(): + i += 1 # Incrementing i to get the model name for each model + for dataset_name in dataset_list: + dataset = load_dataset(dataset_name) + if "tweet_text" in dataset["test"].column_names: + dataset = rename_features(dataset) + tokenizer = AutoTokenizer.from_pretrained(multilingual_model) + run_testing(model, dataset, tokenizer, label_map) + else: + tokenizer = AutoTokenizer.from_pretrained(model_name) + run_testing(model, dataset, tokenizer, label_map, model.keys()[i]) # model.keys()[i] to get the model name + if __name__ == "__main__": import torch diff --git a/checkthat/task1/test_scripts/load_from_checkpoints.py b/checkthat/task1/test_scripts/load_from_checkpoints.py index eee440f..2c8cb6f 100644 --- a/checkthat/task1/test_scripts/load_from_checkpoints.py +++ b/checkthat/task1/test_scripts/load_from_checkpoints.py @@ -35,3 +35,4 @@ def load_model_from_dir(base_dir): return models + \ No newline at end of file diff --git a/checkthat/task1/test_scripts/run_tests.py b/checkthat/task1/test_scripts/run_tests.py new file mode 100644 index 0000000..d4b101d --- /dev/null +++ b/checkthat/task1/test_scripts/run_tests.py @@ -0,0 +1,34 @@ +import torch +import wandb +from tokenization.tokenizer import TextDataset +from models.custom_model import CustomModel +from metrics.compute_metrics import compute_metrics + +def run_testing(model_name, dataset, tokenizer, label_map, model_named_trained): + """Run testing on the given model and dataset.""" + + run_name = f"TEST__{model_named_trained}" + wandb.init(project="Clef2024", entity="aarnes", name=run_name) + + # Assuming TextDataset provides the input in the correct format + test_dataset = TextDataset(dataset["test"], tokenizer, label_map) + model = CustomModel(model_name=model_name, num_labels=len(label_map), device='cuda') + model.eval() + + logits = [] + labels = [] + + with torch.no_grad(): + for batch in test_dataset: + input_ids, attention_mask, label = batch['input_ids'].to('cuda'), batch['attention_mask'].to('cuda'), batch['labels'].to('cuda') + output = model(input_ids=input_ids, attention_mask=attention_mask) + logits.append(output.logits) # Adjust according to how outputs are structured + labels.append(label) + + logits = torch.cat(logits) + labels = torch.cat(labels) + predictions = logits.argmax(-1) + metrics = compute_metrics((predictions, labels)) + wandb.log(metrics) + wandb.finish() + diff --git a/checkthat/task1/test_scripts/test_devtest_test.py b/checkthat/task1/test_scripts/test_devtest_test.py deleted file mode 100644 index 456abac..0000000 --- a/checkthat/task1/test_scripts/test_devtest_test.py +++ /dev/null @@ -1,15 +0,0 @@ -from load_from_checkpoints import load_model_from_dir - - -base_dir = "./results" -models = load_model_from_dir(base_dir) - - -dataset_list = [ - "iai-group/clef2024_checkthat_task1_en", - "iai-group/clef2024_checkthat_task1_ar", - ] - - - -dataset = load_dataset(dataset_name) diff --git a/checkthat/task1/training_config.yaml b/checkthat/task1/training_config.yaml index a6680ad..e59b0e1 100644 --- a/checkthat/task1/training_config.yaml +++ b/checkthat/task1/training_config.yaml @@ -5,7 +5,7 @@ training_arguments: # eval_steps: 500 # evaluation_strategy: 'steps' # evaluate after some number of steps output_dir: './results' # output directory - save_total_limit: 5 # number of maximum checkpoints to save + save_total_limit: 3 # number of maximum checkpoints to save num_train_epochs: 100 # number of training epochs per_device_train_batch_size: 16 # batch size for training per_device_eval_batch_size: 16 # batch size for evaluation @@ -15,5 +15,4 @@ training_arguments: logging_steps: 10 load_best_model_at_end: True metric_for_best_model: "f1" # metric to use for saving best model - report_to: 'wandb' # report to wandb - + report_to: 'wandb' # report to wandb \ No newline at end of file diff --git a/checkthat/task1/training_scripts/training.py b/checkthat/task1/training_scripts/training.py index 5cfa9d9..cc23944 100644 --- a/checkthat/task1/training_scripts/training.py +++ b/checkthat/task1/training_scripts/training.py @@ -25,20 +25,19 @@ def set_seed(seed): if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed) - -def run_training(seed, dataset, model_name, tokenizer, label_map, training_arguments, dataset_language): +def run_training(seed, dataset, model, tokenizer, label_map, training_arguments, dataset_language): """Start training the model for a single seed. Args: seed: seed for reproducibility dataset: dataset dictionary containing train and validation splits - model_name: huggingface model name + model: huggingface model name tokenizer: huggerface tokenizer/same as model name label_map: dictionary mapping labels to integers """ # Initialize wandb run set_seed(seed) - run_name = f"{model_name}_{seed}_{dataset_language}" + run_name = f"{model}_{seed}_{dataset_language}" wandb.init( project="Clef2024", entity="aarnes", @@ -57,11 +56,11 @@ def run_training(seed, dataset, model_name, tokenizer, label_map, training_argum # Creating a Trainer instance with training arguments and datasets trainer = Trainer( - model=CustomModel(model_name, num_labels=len(label_map), device='cuda'), + model=CustomModel(model, num_labels=len(label_map), device='cuda'), args=training_arguments, train_dataset=train_dataset, eval_dataset=eval_dataset, - compute_metrics=compute_metrics, + compute_metrics=compute_metrics(), callbacks=[ EarlyStoppingCallback(early_stopping_patience=3) ], # Early stopping callback From 8aca646eca22d89a6c93ae8b50564c1dbe692e93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20R=C3=B8ysland=20Aarnes?= Date: Thu, 2 May 2024 17:01:26 +0200 Subject: [PATCH 09/16] Starting to create an script to test all models. Next iteration will do it dynamically according to language. --- checkthat/task1/main_test_all.py | 116 ++++++++++++++++++ checkthat/task1/main_train_all.py | 20 +-- .../test_scripts/load_from_checkpoints.py | 38 ------ checkthat/task1/test_scripts/run_tests.py | 99 ++++++++++++--- checkthat/task1/tokenization/tokenizer.py | 24 ++-- checkthat/task1/training_config.yaml | 2 +- 6 files changed, 213 insertions(+), 86 deletions(-) create mode 100644 checkthat/task1/main_test_all.py diff --git a/checkthat/task1/main_test_all.py b/checkthat/task1/main_test_all.py new file mode 100644 index 0000000..efe38ba --- /dev/null +++ b/checkthat/task1/main_test_all.py @@ -0,0 +1,116 @@ +import os +import torch +import pandas as pd +from transformers import AutoModelForSequenceClassification, AutoTokenizer +from task1.models.custom_model import CustomModel +from task1.tokenization.tokenizer import TextDataset +from task1.metrics.compute_metrics import compute_metrics +import wandb +from tokenization.tokenizer import TextDataset + +def find_latest_checkpoint(model_dir): + """Find the latest checkpoint in the given directory.""" + checkpoint_dirs = [os.path.join(model_dir, d) for d in os.listdir(model_dir) if os.path.isdir(os.path.join(model_dir, d)) and 'checkpoint' in d] + if not checkpoint_dirs: + raise ValueError("No checkpoint directories found in the given model directory.") + latest_checkpoint = sorted(checkpoint_dirs, key=lambda x: int(x.split('-')[-1]))[-1] + return latest_checkpoint + +def find_models_with_checkpoints(base_dir): + """Find models and their latest checkpoints in a structured directory.""" + model_info = [] + for model_name in os.listdir(base_dir): + model_path = os.path.join(base_dir, model_name) + if not os.path.isdir(model_path): + continue + try: + latest_checkpoint = find_latest_checkpoint(model_path) + model_info.append((model_name, latest_checkpoint)) + print(f"Model: {model_name}, Latest Checkpoint: {latest_checkpoint}") + except Exception as e: + print(f"Failed to find checkpoint for model {model_name}: {str(e)}") + return model_info + +def run_prediction(model_name, dataset_list, tokenizer, model_path, has_labels: bool): + """Run prediction on the dataset, compute metrics if labels are present, and write results to a .tsv file.""" + device = 'cuda' + label_map = {0: 'no', 1: 'yes'} + + # Detect language from model name and select dataset + lang = model_name.split('_')[-2] # Assumes format like 'modelname_lang_' + dataset = dataset_list[lang] + + # Initialize Weights & Biases + run_name = f"TEST__{model_path}" + wandb.init(project="Clef2024", entity="aarnes", name=run_name) + + # Load the model from the checkpoint + model = CustomModel.from_pretrained(model_path) + model.to(device) + model.eval() + + # Load the dataset, with or without labels + test_dataset = TextDataset(dataset["test"], tokenizer, None if not has_labels else label_map) + all_logits = [] + all_labels = [] + results = [] + + with torch.no_grad(): + for i, batch in enumerate(test_dataset): + input_ids, attention_mask = batch['input_ids'].to(device), batch['attention_mask'].to(device) + output = model(input_ids=input_ids, attention_mask=attention_mask) + logits = output.logits + predictions = logits.argmax(-1).cpu().numpy() + + # Collect logits and labels for metric calculation if labels are present + if has_labels and 'labels' in batch: + labels = batch['labels'].cpu().numpy() + all_logits.append(logits) + all_labels.append(torch.tensor(labels)) + for label, pred in zip(labels, predictions): + results.append((i, label_map[pred], model_name)) + else: + for pred in predictions: + results.append((i, label_map[pred], model_name)) + + # If labels were present, calculate metrics + if has_labels: + all_logits = torch.cat(all_logits) + all_labels = torch.cat(all_labels) + predictions = all_logits.argmax(-1) + metrics = compute_metrics((predictions, all_labels)) + wandb.log(metrics) + + # Save results to a .tsv file + df = pd.DataFrame(results, columns=['sentence_id', 'prediction', 'model_name']) + df.to_csv(f"{model_path}_predictions.tsv", sep='\t', index=False) + + # Finish Weights & Biases logging + wandb.finish() + + +if __name__ == "__main__": + # Define the dataset list for each language + dataset_list = { + "en":"iai-group/clef2024_checkthat_task1_en", + "ar":"iai-group/clef2024_checkthat_task1_ar", + "es":"iai-group/clef2024_checkthat_task1_es", + "nl":"iai-group/clef2024_checkthat_task1_nl", + } + + label_map = {"Yes": 1, "No": 0} + + + # Load models and run prediction + base_dir = "./trained_models" + i = 0 + + model_info = find_models_with_checkpoints(base_dir) + for model_name, checkpoint_path in model_info: + tokenizer = AutoTokenizer.from_pretrained(checkpoint_path) # General tokenizer + model = AutoModelForSequenceClassification.from_pretrained(checkpoint_path) # Model for prediction + tokenized_data = TextDataset(dataset_list.values()[i], tokenizer, label_map) + run_prediction(model_name, dataset_list, tokenizer, checkpoint_path, has_labels=True) + i += 1 + + diff --git a/checkthat/task1/main_train_all.py b/checkthat/task1/main_train_all.py index cb40b3f..6303fce 100644 --- a/checkthat/task1/main_train_all.py +++ b/checkthat/task1/main_train_all.py @@ -4,8 +4,8 @@ from training_scripts.training import run_training from transformers import AutoTokenizer from training_scripts.train_config import get_training_arguments -from test_scripts.load_from_checkpoints import load_model_from_dir -from test_scripts.run_tests import run_testing +from test_scripts.load_from_checkpoints import find_latest_checkpoint +from test_scripts.run_tests import run_prediction def main(): dataset_list = [ @@ -35,22 +35,6 @@ def main(): run_training(seed, dataset, model_name_en, tokenizer, label_map, training_args) - - """Testing model on testset""" - base_dir = "./results" - models = load_model_from_dir(base_dir) - for model_name, model in models.items(): - i += 1 # Incrementing i to get the model name for each model - for dataset_name in dataset_list: - dataset = load_dataset(dataset_name) - if "tweet_text" in dataset["test"].column_names: - dataset = rename_features(dataset) - tokenizer = AutoTokenizer.from_pretrained(multilingual_model) - run_testing(model, dataset, tokenizer, label_map) - else: - tokenizer = AutoTokenizer.from_pretrained(model_name) - run_testing(model, dataset, tokenizer, label_map, model.keys()[i]) # model.keys()[i] to get the model name - if __name__ == "__main__": import torch diff --git a/checkthat/task1/test_scripts/load_from_checkpoints.py b/checkthat/task1/test_scripts/load_from_checkpoints.py index 2c8cb6f..e69de29 100644 --- a/checkthat/task1/test_scripts/load_from_checkpoints.py +++ b/checkthat/task1/test_scripts/load_from_checkpoints.py @@ -1,38 +0,0 @@ -import os -from transformers import AutoModelForSequenceClassification -import torch - -def find_latest_checkpoint(model_dir): - """Find the latest checkpoint in the given directory.""" - checkpoint_dirs = [os.path.join(model_dir, d) for d in os.listdir(model_dir) if os.path.isdir(os.path.join(model_dir, d)) and 'checkpoint' in d] - if not checkpoint_dirs: - raise ValueError("No checkpoint directories found in the given model directory.") - - # Sort directories to find the one with the highest step (assuming naming convention includes "checkpoint-") - latest_checkpoint = sorted(checkpoint_dirs, key=lambda x: int(x.split('-')[-1]))[-1] - return latest_checkpoint - -def load_model_from_dir(base_dir): - """Load models from a structured directory of models. - Args: - base_dir (str): Directory containing subdirectories of models named like 'FacebookAI/xlm-roberta-base_10_en' - Returns: - models (dict): Dictionary with keys as model names and values as loaded model objects. - """ - models = {} - for model_name in os.listdir(base_dir): - model_path = os.path.join(base_dir, model_name) - if not os.path.isdir(model_path): - continue - - try: - latest_checkpoint = find_latest_checkpoint(model_path) - model = AutoModelForSequenceClassification.from_pretrained(latest_checkpoint) - models[model_name] = model - print(f"Loaded model from {latest_checkpoint}") - except Exception as e: - print(f"Failed to load model from {model_path}: {str(e)}") - - return models - - \ No newline at end of file diff --git a/checkthat/task1/test_scripts/run_tests.py b/checkthat/task1/test_scripts/run_tests.py index d4b101d..1768771 100644 --- a/checkthat/task1/test_scripts/run_tests.py +++ b/checkthat/task1/test_scripts/run_tests.py @@ -1,34 +1,95 @@ import torch -import wandb -from tokenization.tokenizer import TextDataset +import pandas as pd from models.custom_model import CustomModel +from tokenization.tokenizer import TextDataset +import wandb from metrics.compute_metrics import compute_metrics -def run_testing(model_name, dataset, tokenizer, label_map, model_named_trained): - """Run testing on the given model and dataset.""" - + +import os +from transformers import AutoModelForSequenceClassification +import torch + +def find_latest_checkpoint(model_dir): + """Find the latest checkpoint in the given directory.""" + checkpoint_dirs = [os.path.join(model_dir, d) for d in os.listdir(model_dir) if os.path.isdir(os.path.join(model_dir, d)) and 'checkpoint' in d] + if not checkpoint_dirs: + raise ValueError("No checkpoint directories found in the given model directory.") + latest_checkpoint = sorted(checkpoint_dirs, key=lambda x: int(x.split('-')[-1]))[-1] + return latest_checkpoint + +def find_models_with_checkpoints(base_dir): + """Find models and their latest checkpoints in a structured directory. + Args: + base_dir (str): Directory containing subdirectories of models + Returns: + model_info (list): List of tuples containing model name and path to the latest checkpoint. + """ + model_info = [] + for model_name in os.listdir(base_dir): + model_path = os.path.join(base_dir, model_name) + if not os.path.isdir(model_path): + continue + + try: + latest_checkpoint = find_latest_checkpoint(model_path) + model_info.append((model_name, latest_checkpoint)) + print(f"Model: {model_name}, Latest Checkpoint: {latest_checkpoint}") + except Exception as e: + print(f"Failed to find checkpoint for model {model_name}: {str(e)}") + + return model_info + + + +def run_prediction(model_name, dataset, tokenizer, model_named_trained, has_labels: bool): + """Run prediction on the dataset, compute metrics if labels are present, and write results to a .tsv file.""" + + device = 'cuda' + label_map = {0: 'no', 1: 'yes'} # Ensure this mapping is correct for your model + + # Initialize Weights & Biases run_name = f"TEST__{model_named_trained}" wandb.init(project="Clef2024", entity="aarnes", name=run_name) - # Assuming TextDataset provides the input in the correct format - test_dataset = TextDataset(dataset["test"], tokenizer, label_map) - model = CustomModel(model_name=model_name, num_labels=len(label_map), device='cuda') + model = CustomModel(model_name=model_name, num_labels=len(label_map), device=device) model.eval() - logits = [] - labels = [] + # Load the dataset, with or without labels + test_dataset = TextDataset(dataset["test"], tokenizer, None if not has_labels else label_map) + all_logits = [] + all_labels = [] + results = [] with torch.no_grad(): - for batch in test_dataset: - input_ids, attention_mask, label = batch['input_ids'].to('cuda'), batch['attention_mask'].to('cuda'), batch['labels'].to('cuda') + for i, batch in enumerate(test_dataset): + input_ids, attention_mask = batch['input_ids'].to(device), batch['attention_mask'].to(device) output = model(input_ids=input_ids, attention_mask=attention_mask) - logits.append(output.logits) # Adjust according to how outputs are structured - labels.append(label) + logits = output.logits + predictions = logits.argmax(-1).cpu().numpy() + + # Collect logits and labels for metric calculation if labels are present + if has_labels and 'labels' in batch: + labels = batch['labels'].cpu().numpy() + all_logits.append(logits) + all_labels.append(torch.tensor(labels)) + for label, pred in zip(labels, predictions): + results.append((i, label_map[pred], model_named_trained)) + else: + for pred in predictions: + results.append((i, label_map[pred], model_named_trained)) - logits = torch.cat(logits) - labels = torch.cat(labels) - predictions = logits.argmax(-1) - metrics = compute_metrics((predictions, labels)) + # If labels were present, calculate metrics + if has_labels: + all_logits = torch.cat(all_logits) + all_labels = torch.cat(all_labels) + predictions = all_logits.argmax(-1) + metrics = compute_metrics((predictions, all_labels)) wandb.log(metrics) - wandb.finish() + # Save results to a .tsv file + df = pd.DataFrame(results, columns=['sentence_id', 'prediction', 'model_name']) + df.to_csv(f"{model_named_trained}_predictions.tsv", sep='\t', index=False) + + # Finish Weights & Biases logging + wandb.finish() diff --git a/checkthat/task1/tokenization/tokenizer.py b/checkthat/task1/tokenization/tokenizer.py index b4e5848..27c9303 100644 --- a/checkthat/task1/tokenization/tokenizer.py +++ b/checkthat/task1/tokenization/tokenizer.py @@ -1,16 +1,16 @@ -"""Tokenizer for the task1 datasets.""" import torch from torch.utils.data import Dataset - class TextDataset(Dataset): - """Takes a list of dictionaries containing text and class labels. + """Takes a list of dictionaries containing text and optionally class labels. Args: - Dataset: Dataset class from torch.utils.data + data (list): A list of dictionaries with keys 'Text' and optionally 'class_label'. + tokenizer: Tokenizer instance for text processing. + label_map (dict, optional): A dictionary mapping class labels to integers. None if unlabeled. """ - def __init__(self, data, tokenizer, label_map): + def __init__(self, data, tokenizer, label_map=None): """Initialize the TextDataset class.""" self.data = data self.tokenizer = tokenizer @@ -21,8 +21,8 @@ def __len__(self): return len(self.data) def __getitem__(self, idx): - """Tokenize the text and return a dictionary containing the - tokenized.""" + """Tokenize the text and return a dictionary containing the tokenized data. + If labels are present, include them, otherwise only return inputs.""" item = self.data[idx] encoded = self.tokenizer.encode_plus( item["Text"], @@ -33,9 +33,13 @@ def __getitem__(self, idx): return_tensors="pt", ) - label_id = self.label_map[item["class_label"]] - return { + result = { "input_ids": encoded["input_ids"].squeeze(0), "attention_mask": encoded["attention_mask"].squeeze(0), - "labels": torch.tensor(label_id), } + + if 'class_label' in item and self.label_map is not None: + label_id = self.label_map[item["class_label"]] + result["labels"] = torch.tensor(label_id) + + return result diff --git a/checkthat/task1/training_config.yaml b/checkthat/task1/training_config.yaml index e59b0e1..4ec4bfc 100644 --- a/checkthat/task1/training_config.yaml +++ b/checkthat/task1/training_config.yaml @@ -4,7 +4,7 @@ training_arguments: greater_is_better: True # eval_steps: 500 # evaluation_strategy: 'steps' # evaluate after some number of steps - output_dir: './results' # output directory + output_dir: './trained_models' # output directory save_total_limit: 3 # number of maximum checkpoints to save num_train_epochs: 100 # number of training epochs per_device_train_batch_size: 16 # batch size for training From ef31efb9d223365f24401c72d17ff347f2f8b087 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20R=C3=B8ysland=20Aarnes?= Date: Thu, 2 May 2024 17:04:56 +0200 Subject: [PATCH 10/16] Paths fix --- checkthat/task1/main_test_all.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/checkthat/task1/main_test_all.py b/checkthat/task1/main_test_all.py index efe38ba..cd4615b 100644 --- a/checkthat/task1/main_test_all.py +++ b/checkthat/task1/main_test_all.py @@ -2,9 +2,9 @@ import torch import pandas as pd from transformers import AutoModelForSequenceClassification, AutoTokenizer -from task1.models.custom_model import CustomModel -from task1.tokenization.tokenizer import TextDataset -from task1.metrics.compute_metrics import compute_metrics +from models.custom_model import CustomModel +from tokenization.tokenizer import TextDataset +from metrics.compute_metrics import compute_metrics import wandb from tokenization.tokenizer import TextDataset @@ -112,5 +112,5 @@ def run_prediction(model_name, dataset_list, tokenizer, model_path, has_labels: tokenized_data = TextDataset(dataset_list.values()[i], tokenizer, label_map) run_prediction(model_name, dataset_list, tokenizer, checkpoint_path, has_labels=True) i += 1 - + From 448b27793371d159982af247cd931043b0499624 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20R=C3=B8ysland=20Aarnes?= Date: Thu, 2 May 2024 17:20:33 +0200 Subject: [PATCH 11/16] Added slurm --- checkthat/task1/main_train_all.py | 2 -- checkthat/task1/slurm/conda_setup.sh | 14 ++++++++++++++ checkthat/task1/slurm/start_train_all.sh | 15 +++++++++++++++ checkthat/task1/training_config.yaml | 2 +- 4 files changed, 30 insertions(+), 3 deletions(-) create mode 100644 checkthat/task1/slurm/conda_setup.sh create mode 100644 checkthat/task1/slurm/start_train_all.sh diff --git a/checkthat/task1/main_train_all.py b/checkthat/task1/main_train_all.py index 6303fce..f9a398d 100644 --- a/checkthat/task1/main_train_all.py +++ b/checkthat/task1/main_train_all.py @@ -4,8 +4,6 @@ from training_scripts.training import run_training from transformers import AutoTokenizer from training_scripts.train_config import get_training_arguments -from test_scripts.load_from_checkpoints import find_latest_checkpoint -from test_scripts.run_tests import run_prediction def main(): dataset_list = [ diff --git a/checkthat/task1/slurm/conda_setup.sh b/checkthat/task1/slurm/conda_setup.sh new file mode 100644 index 0000000..92721eb --- /dev/null +++ b/checkthat/task1/slurm/conda_setup.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --gres=gpu:0 +#SBATCH --partition=gpuA100 +#SBATCH --time=1:00:00 +#SBATCH --job-name=conda_setup +#SBATCH --output=conda_setup.out + +uenv verbose cuda-12.2.0 cudnn-12.x-8.8.0 +uenv miniconda3-py39 +conda create -n transformer_cuda12 -c pytorch pytorch torchvision torchaudio pytorch-cuda=12.1 -c nvidia -y +conda activate transformer_cuda12 +pip3 install torch torchvision torchaudio +pip3 install transformers[torch] +pip3 install -r requirements.txt \ No newline at end of file diff --git a/checkthat/task1/slurm/start_train_all.sh b/checkthat/task1/slurm/start_train_all.sh new file mode 100644 index 0000000..3781a0d --- /dev/null +++ b/checkthat/task1/slurm/start_train_all.sh @@ -0,0 +1,15 @@ +#!/bin/bash +#SBATCH --gres=gpu:6 +#SBATCH --partition=gpuA100 +#SBATCH --time=1:00:00 +#SBATCH --job-name=CLEF2024_task1_training +#SBATCH --output=start_train_all.out + +# Activate environment +uenv verbose cuda-12.2.0 cudnn-12.x-8.8.0 +uenv miniconda3-py39 +conda activate transformer_cuda12 +PATH=~/.local/bin:$PATH +echo $PATH +# Run the Python script that uses the GPU +TOKENIZERS_PARALLELISM=false python -u main_train_all.py \ No newline at end of file diff --git a/checkthat/task1/training_config.yaml b/checkthat/task1/training_config.yaml index 4ec4bfc..955fc59 100644 --- a/checkthat/task1/training_config.yaml +++ b/checkthat/task1/training_config.yaml @@ -6,7 +6,7 @@ training_arguments: # evaluation_strategy: 'steps' # evaluate after some number of steps output_dir: './trained_models' # output directory save_total_limit: 3 # number of maximum checkpoints to save - num_train_epochs: 100 # number of training epochs + num_train_epochs: 1 # number of training epochs per_device_train_batch_size: 16 # batch size for training per_device_eval_batch_size: 16 # batch size for evaluation warmup_steps: 500 # number of warmup steps for learning rate scheduler From ec3da9e76bc427dd5e3259393e033f455f4292bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20R=C3=B8ysland=20Aarnes?= Date: Fri, 3 May 2024 11:22:27 +0200 Subject: [PATCH 12/16] Moving thing around and simplifying requirements.txt --- checkthat/task1/{slurm => }/conda_setup.sh | 0 checkthat/task1/requirements.txt | Bin 0 -> 416 bytes checkthat/task1/{slurm => }/start_train_all.sh | 0 requirements.txt | Bin 3520 -> 0 bytes 4 files changed, 0 insertions(+), 0 deletions(-) rename checkthat/task1/{slurm => }/conda_setup.sh (100%) create mode 100644 checkthat/task1/requirements.txt rename checkthat/task1/{slurm => }/start_train_all.sh (100%) delete mode 100644 requirements.txt diff --git a/checkthat/task1/slurm/conda_setup.sh b/checkthat/task1/conda_setup.sh similarity index 100% rename from checkthat/task1/slurm/conda_setup.sh rename to checkthat/task1/conda_setup.sh diff --git a/checkthat/task1/requirements.txt b/checkthat/task1/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..54eeae829d4928f2fcc2b3e8983e5d72456cf401 GIT binary patch literal 416 zcmY*V%L>9U5S+8%rzmc8eTKEn#CDf_T_wg)*Ttf?8m;I@rrm$tVS{RTW{<$#TTb}?0FKb zvDJ;rp7jnFa(XH^oNrqWnq`a_U2)U2U*>S9enH+p>%m%3y%~{7ciP`J4>-+~&te98 zDEK?(V5NN>nNcOvPL6$^Xb}IYeKVx;wUH-tmk2%JNY{=(QYoLu9uFcF9hg%u{0p4i BN0a~n literal 0 HcmV?d00001 diff --git a/checkthat/task1/slurm/start_train_all.sh b/checkthat/task1/start_train_all.sh similarity index 100% rename from checkthat/task1/slurm/start_train_all.sh rename to checkthat/task1/start_train_all.sh diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 5d2c97de202f66d51c1e76a6f556f6e2f28e23e0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3520 zcmZ{nOOF#t5QOWDw7&!y;r1{LhdnHpl~y1japJV{8-pK>+lHC>@$ANz(dlZ>va*JG zWL9QmWMo$H&(CR@>$5CdeI3iRob>&7eO{Iqr73Up@1*=zp2@N)hw@qWQ)$CvA{+Ks z1A8a0b=k{%8$RPW|Due0pH^oV@;R37V!x5kO>ghYgHF%ONLe;zCoG3@*3Tfq4^)W< zBpIjCU%`t9XlFV(jgy9`Qj|qD&(Wr>O@_x8bpn<)km?bgzg%j*c4_c$Z?7UJ+?8 zZ&*A<N#*%<=5UX%e^8{CyG3%%C+KIOPSrwkR1guNwAyTQ{_1H=_y#r zIt_$(f#|jDuy0mZ5uL8?%_D{t=S(%$TIWxy+>E+vA~?pMVf`S6 zPEsJ1k;gI6f+yp%xLND7iON?(4jUt#IQMbM*M$14^i3#vAAGG8ov1KGk0!mo@L@}2 zM(cx#Z-s{@V$`Tvj#@R&pCB8K{eih3U)y)9(3BvA4k&6EfFN z_}qrQ#!sI%6har!K+Q$QanJ8gyrcchH6AD|Z6igvS1y#s-iE)rl`D69d7R3R=*w4~ zAST$W>e)!%2K#=)!3#Ub_3c$33{*T#ROha@;)Pk6UQ|Yg+~}}}$37HLHxb#bH>~hF zoCkAJ5w@z2&2vy4<^+%0fmSx6&oMX8rWGW1|xxxttZcaZwM>xl(kI>-)w zP{yIc>7^$PPw&@jKQdo?)g9-KrCyBfgW`NtM!H@V2R}6rx+Dwi_vNm)_OrHop{`cNwhYK-k}U{bGq`>6IMI}@LgwsyPmCm zlzpVA3&l*oo7moo3rx9%P$=8liXk5nT_sz^ELa`p$(WgrsiKg<^ON20gaTF1WM^jS z$ug!XZ+y>w&Ro9i$i{tMwd>8yoAP@5@bUu%+MpBME_9z)5!=@jM;%p5Oe?Pn?i#E0QYgVJ!5Qx1JT}{o0i0`rBDi8hu^+@xEnG9iE)H_WCZ5 z-0nnnMrJKBeN&WO#Gu;SL8t4xF83lGaNYRcL^P{{y}^5=lXm`pKHND^(@Uu&-y%Nk zH^$$J^6PtvoPJMc&PQQqZ~G-t-`(cw9D2U)Tje12c2-U3jN5;5;S$9<3k>oi;f;vS z^Nu4MEPDIF`VV< Date: Fri, 3 May 2024 19:45:21 +0200 Subject: [PATCH 13/16] Tweaks so model and tokenizer get's saved --- checkthat/task1/conda_setup.sh | 2 +- checkthat/task1/requirements.txt | Bin 416 -> 404 bytes checkthat/task1/test_load.py | 17 ++++ checkthat/task1/test_scripts/__init__.py | 0 .../test_scripts/load_from_checkpoints.py | 0 checkthat/task1/test_scripts/run_tests.py | 95 ------------------ checkthat/task1/training_scripts/training.py | 4 + 7 files changed, 22 insertions(+), 96 deletions(-) create mode 100644 checkthat/task1/test_load.py delete mode 100644 checkthat/task1/test_scripts/__init__.py delete mode 100644 checkthat/task1/test_scripts/load_from_checkpoints.py delete mode 100644 checkthat/task1/test_scripts/run_tests.py diff --git a/checkthat/task1/conda_setup.sh b/checkthat/task1/conda_setup.sh index 92721eb..a8e1890 100644 --- a/checkthat/task1/conda_setup.sh +++ b/checkthat/task1/conda_setup.sh @@ -8,7 +8,7 @@ uenv verbose cuda-12.2.0 cudnn-12.x-8.8.0 uenv miniconda3-py39 conda create -n transformer_cuda12 -c pytorch pytorch torchvision torchaudio pytorch-cuda=12.1 -c nvidia -y -conda activate transformer_cuda12 +conda activate transformer_cudlsa12 pip3 install torch torchvision torchaudio pip3 install transformers[torch] pip3 install -r requirements.txt \ No newline at end of file diff --git a/checkthat/task1/requirements.txt b/checkthat/task1/requirements.txt index 54eeae829d4928f2fcc2b3e8983e5d72456cf401..73315d450939b1db79a5e93494b33502eba93f4c 100644 GIT binary patch delta 11 ScmZ3$JcW5f1LNc#MiBrQFaw|f delta 11 ScmbQjynuN_1LNcpMg;&D+yj&V diff --git a/checkthat/task1/test_load.py b/checkthat/task1/test_load.py new file mode 100644 index 0000000..739dc20 --- /dev/null +++ b/checkthat/task1/test_load.py @@ -0,0 +1,17 @@ +import torch +from models.custom_model import CustomModel +from transformers import AutoConfig, AutoModelForSequenceClassification, AutoTokenizer + +path = "checkthat/task1/test_model_xlm_roberta/model.safetensors" + +print("test") +print(path) + +model = CustomModel("xlm-roberta-base", 2, "cpu") +model.load_state_dict(torch.load(path, map_location="cpu")) + + +# config = AutoConfig.from_pretrained(path) +# tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base") +# model = AutoModelForSequenceClassification.from_pretrained(path, config=config) + diff --git a/checkthat/task1/test_scripts/__init__.py b/checkthat/task1/test_scripts/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/checkthat/task1/test_scripts/load_from_checkpoints.py b/checkthat/task1/test_scripts/load_from_checkpoints.py deleted file mode 100644 index e69de29..0000000 diff --git a/checkthat/task1/test_scripts/run_tests.py b/checkthat/task1/test_scripts/run_tests.py deleted file mode 100644 index 1768771..0000000 --- a/checkthat/task1/test_scripts/run_tests.py +++ /dev/null @@ -1,95 +0,0 @@ -import torch -import pandas as pd -from models.custom_model import CustomModel -from tokenization.tokenizer import TextDataset -import wandb -from metrics.compute_metrics import compute_metrics - - -import os -from transformers import AutoModelForSequenceClassification -import torch - -def find_latest_checkpoint(model_dir): - """Find the latest checkpoint in the given directory.""" - checkpoint_dirs = [os.path.join(model_dir, d) for d in os.listdir(model_dir) if os.path.isdir(os.path.join(model_dir, d)) and 'checkpoint' in d] - if not checkpoint_dirs: - raise ValueError("No checkpoint directories found in the given model directory.") - latest_checkpoint = sorted(checkpoint_dirs, key=lambda x: int(x.split('-')[-1]))[-1] - return latest_checkpoint - -def find_models_with_checkpoints(base_dir): - """Find models and their latest checkpoints in a structured directory. - Args: - base_dir (str): Directory containing subdirectories of models - Returns: - model_info (list): List of tuples containing model name and path to the latest checkpoint. - """ - model_info = [] - for model_name in os.listdir(base_dir): - model_path = os.path.join(base_dir, model_name) - if not os.path.isdir(model_path): - continue - - try: - latest_checkpoint = find_latest_checkpoint(model_path) - model_info.append((model_name, latest_checkpoint)) - print(f"Model: {model_name}, Latest Checkpoint: {latest_checkpoint}") - except Exception as e: - print(f"Failed to find checkpoint for model {model_name}: {str(e)}") - - return model_info - - - -def run_prediction(model_name, dataset, tokenizer, model_named_trained, has_labels: bool): - """Run prediction on the dataset, compute metrics if labels are present, and write results to a .tsv file.""" - - device = 'cuda' - label_map = {0: 'no', 1: 'yes'} # Ensure this mapping is correct for your model - - # Initialize Weights & Biases - run_name = f"TEST__{model_named_trained}" - wandb.init(project="Clef2024", entity="aarnes", name=run_name) - - model = CustomModel(model_name=model_name, num_labels=len(label_map), device=device) - model.eval() - - # Load the dataset, with or without labels - test_dataset = TextDataset(dataset["test"], tokenizer, None if not has_labels else label_map) - all_logits = [] - all_labels = [] - results = [] - - with torch.no_grad(): - for i, batch in enumerate(test_dataset): - input_ids, attention_mask = batch['input_ids'].to(device), batch['attention_mask'].to(device) - output = model(input_ids=input_ids, attention_mask=attention_mask) - logits = output.logits - predictions = logits.argmax(-1).cpu().numpy() - - # Collect logits and labels for metric calculation if labels are present - if has_labels and 'labels' in batch: - labels = batch['labels'].cpu().numpy() - all_logits.append(logits) - all_labels.append(torch.tensor(labels)) - for label, pred in zip(labels, predictions): - results.append((i, label_map[pred], model_named_trained)) - else: - for pred in predictions: - results.append((i, label_map[pred], model_named_trained)) - - # If labels were present, calculate metrics - if has_labels: - all_logits = torch.cat(all_logits) - all_labels = torch.cat(all_labels) - predictions = all_logits.argmax(-1) - metrics = compute_metrics((predictions, all_labels)) - wandb.log(metrics) - - # Save results to a .tsv file - df = pd.DataFrame(results, columns=['sentence_id', 'prediction', 'model_name']) - df.to_csv(f"{model_named_trained}_predictions.tsv", sep='\t', index=False) - - # Finish Weights & Biases logging - wandb.finish() diff --git a/checkthat/task1/training_scripts/training.py b/checkthat/task1/training_scripts/training.py index cc23944..b2f3833 100644 --- a/checkthat/task1/training_scripts/training.py +++ b/checkthat/task1/training_scripts/training.py @@ -68,6 +68,10 @@ def run_training(seed, dataset, model, tokenizer, label_map, training_arguments, # Train the model trainer.train() + model.save_pretrained("./trained_models") + + # Save the tokenizer + tokenizer.save_pretrained("./trained_models") # Finish the wandb run after each seed wandb.finish() From 06c4ac3c1d265fdbbb007d17e515dfedde6a72bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20R=C3=B8ysland=20Aarnes?= Date: Sun, 5 May 2024 21:05:34 +0200 Subject: [PATCH 14/16] Implemented sweep functionality for training --- checkthat/task1/auth.sh | 37 +++++++ checkthat/task1/conda_setup.sh | 19 ++-- .../{ => depricated_scripts}/main_test_all.py | 0 .../main_train_all.py | 0 checkthat/task1/main.py | 28 +++--- checkthat/task1/requirements.txt | Bin 404 -> 434 bytes checkthat/task1/start_train.sh | 29 ++++++ checkthat/task1/sweep.yaml | 29 ++++++ checkthat/task1/test_load.py | 17 ---- checkthat/task1/training_scripts/training.py | 95 ++++++++++-------- 10 files changed, 177 insertions(+), 77 deletions(-) create mode 100644 checkthat/task1/auth.sh rename checkthat/task1/{ => depricated_scripts}/main_test_all.py (100%) rename checkthat/task1/{ => depricated_scripts}/main_train_all.py (100%) create mode 100644 checkthat/task1/start_train.sh create mode 100644 checkthat/task1/sweep.yaml delete mode 100644 checkthat/task1/test_load.py diff --git a/checkthat/task1/auth.sh b/checkthat/task1/auth.sh new file mode 100644 index 0000000..cbbf433 --- /dev/null +++ b/checkthat/task1/auth.sh @@ -0,0 +1,37 @@ +#!/bin/bash +#SBATCH --gres=gpu:0 +#SBATCH --partition=gpuA100 +#SBATCH --time=1:00:00 +#SBATCH --job-name=setup_authenticator.sh +#SBATCH --output=hf_test.out +# Load necessary modules, if required +# module load python/3.9 # Adjust this according to your environment + +# Activate your Python environment +# source ~/bhome/env/checkthat2024_env/bin/activate + +# Explicitly specify the path to the correct Python executable +# PYTHON="~/bhome/env/checkthat2024_env/bin/" +PYTHON="~/.conda/envs/CLEF_checkthat2024/bin" +uenv miniconda3-py39 + +# Activate the Conda environment +#conda activate ~/bhome/env/checkthat2024_env +conda activate ~/.conda/envs/CLEF_checkthat2024 + +export HF_HOME=~/bhome/clef2024-checkthat/checkthat/task1 +# Create necessary directories +mkdir -p $HF_HOME $WANDB_CACHE_DIR + +# Store the Hugging Face token +echo 'KEY' > $HF_HOME/token +chmod 600 $HF_HOME/token + +# Log in to wandb +export WANDB_API_KEY='KEY' +export WANDB_CACHE_DIR=~/bhome/clef2024-checkthat/checkthat/task1 +wandb login KEY + +# Test the Hugging Face API with a Python script +#$PYTHON test_start.py +python -u test_hf_login.py diff --git a/checkthat/task1/conda_setup.sh b/checkthat/task1/conda_setup.sh index a8e1890..6305e07 100644 --- a/checkthat/task1/conda_setup.sh +++ b/checkthat/task1/conda_setup.sh @@ -1,14 +1,21 @@ #!/bin/bash #SBATCH --gres=gpu:0 -#SBATCH --partition=gpuA100 +#SBATCH --partition=gpuA100 #SBATCH --time=1:00:00 #SBATCH --job-name=conda_setup #SBATCH --output=conda_setup.out +module load cuda/12.2.0 cudnn/8.8.0 # Load CUDA and cuDNN modules + +# Activate the user environment (uenv) uenv verbose cuda-12.2.0 cudnn-12.x-8.8.0 uenv miniconda3-py39 -conda create -n transformer_cuda12 -c pytorch pytorch torchvision torchaudio pytorch-cuda=12.1 -c nvidia -y -conda activate transformer_cudlsa12 -pip3 install torch torchvision torchaudio -pip3 install transformers[torch] -pip3 install -r requirements.txt \ No newline at end of file + +# Create and activate the Conda environment +conda create -n CLEF_checkthat2024 -c pytorch pytorch torchvision torchaudio pytorch-cuda=12.1 -c nvidia -y +conda activate CLEF_checkthat2024 + +# Install Python packages +pip install torch torchvision torchaudio +pip install transformers[torch] +pip install -r requirements.txt \ No newline at end of file diff --git a/checkthat/task1/main_test_all.py b/checkthat/task1/depricated_scripts/main_test_all.py similarity index 100% rename from checkthat/task1/main_test_all.py rename to checkthat/task1/depricated_scripts/main_test_all.py diff --git a/checkthat/task1/main_train_all.py b/checkthat/task1/depricated_scripts/main_train_all.py similarity index 100% rename from checkthat/task1/main_train_all.py rename to checkthat/task1/depricated_scripts/main_train_all.py diff --git a/checkthat/task1/main.py b/checkthat/task1/main.py index cc7ff58..738f2fe 100644 --- a/checkthat/task1/main.py +++ b/checkthat/task1/main.py @@ -1,4 +1,4 @@ -"""Will run script to run training and testing. (test yet to be implemented) +"""Will run script to run training and testing. (unlabeled tests yet to ble implemented) Argument parser is used to specify the model name and dataset name. """ @@ -6,31 +6,33 @@ from datasets import load_dataset from training_scripts.training import run_training from transformers import AutoTokenizer +from tokenization.tokenizer import TextDataset def main(args): """Run training.""" + label_map = {"Yes": 1, "No": 0} + tokenizer = AutoTokenizer.from_pretrained(args.model_name) dataset = load_dataset(args.dataset) - label_map = {"No": 0, "Yes": 1} # Label map for the dataset - seeds = [42, 81, 1024, 6, 10] # Seeds for reproducibility - if args.train: - for seed in seeds: - run_training(seed, dataset, args.model_name, tokenizer, label_map) + dataset_language = args.dataset.split("_")[-2:] + + train_dataset = TextDataset(dataset["train"], tokenizer, label_map) + eval_dataset = TextDataset(dataset["validation"], tokenizer, label_map) + test_dataset = TextDataset(dataset["test"], tokenizer, label_map) + + run_training(train_dataset, eval_dataset, args.model_name, label_map, dataset_language, test_dataset) if __name__ == "__main__": parser = argparse.ArgumentParser(description="Run training and testing.") - parser.add_argument( - "--train", action="store_true", help="Whether to run training" - ) - parser.add_argument( - "--test", action="store_true", help="Whether to run testing" - ) + # parser.add_argument( + # "--test", action="store_true", help="Whether to run testing" + # ) parser.add_argument( "--model_name", type=str, @@ -41,7 +43,7 @@ def main(args): "--dataset", type=str, default="iai-group/clef2024_checkthat_task1_en", # For English language - help="Name of the dataset", + help="Name of the dataset from the iai-group/clef2024_checkthat_task1_* datasets", ) args = parser.parse_args() diff --git a/checkthat/task1/requirements.txt b/checkthat/task1/requirements.txt index 73315d450939b1db79a5e93494b33502eba93f4c..34938acef0ef25d67c97c989227235f80a7bfaa8 100644 GIT binary patch delta 38 ocmbQjyoq_k6h^rWhEj%fAk1XQ1CnVBi44gMsSLUdAn_yy0KVP`lmGw# delta 7 OcmdnQJcW6~6h;6GAOeyA diff --git a/checkthat/task1/start_train.sh b/checkthat/task1/start_train.sh new file mode 100644 index 0000000..f2dd707 --- /dev/null +++ b/checkthat/task1/start_train.sh @@ -0,0 +1,29 @@ +#!/bin/bash +#SBATCH --gres=gpu:1 +#SBATCH --partition=gpuA100 +#SBATCH --time=1:00:00 +#SBATCH --job-name=CLEF2024_task1_training +#SBATCH --output=start_train_all.out + +# Load CUDA and cuDNN modules +module load cuda/12.2.0 cudnn/8.8.0 + +# Activate the user environment (uenv) +uenv verbose cuda-12.2.0 cudnn-12.x-8.8.0 +uenv miniconda3-py39 + +# Activate the Conda environment +#conda activate ~/bhome/env/checkthat2024_env + +conda ~/.conda/envs/CLEF_checkthat2024 + + +# Add user's local bin directory to the PATH +PATH=~/.local/bin:$PATH +echo $PATH + +# Disable tokenizers parallelism for better GPU utilization +export TOKENIZERS_PARALLELISM=false + +# Run the Python script that uses the GPU +python -u main_train_all.py \ No newline at end of file diff --git a/checkthat/task1/sweep.yaml b/checkthat/task1/sweep.yaml new file mode 100644 index 0000000..f4a5e5d --- /dev/null +++ b/checkthat/task1/sweep.yaml @@ -0,0 +1,29 @@ +# Porgram to run +program: main.py + +# Sweep method can be grid, random, bayesian +method: random + +# Project for sweep +project: testsweep +entity: iai-group + +# Metrics to optimize +metric: + name: f1, + goal: maximize + + +parameters: + metric_for_best_model: + values: ["f1"] + hidden_dropout_prob: + values: [0.1, 0.2, 0.3] # Define discrete steps for grid search + epochs: + values: [10, 20, 50] # Convert range to discrete values + batch_size: + values: [16, 32] + learning_rate: + values: [0.000025, 0.00005, 0.000075, 0.0001] # Define steps for learning rate +seed: + values: [42, 123, 2023, 1002, 95] \ No newline at end of file diff --git a/checkthat/task1/test_load.py b/checkthat/task1/test_load.py deleted file mode 100644 index 739dc20..0000000 --- a/checkthat/task1/test_load.py +++ /dev/null @@ -1,17 +0,0 @@ -import torch -from models.custom_model import CustomModel -from transformers import AutoConfig, AutoModelForSequenceClassification, AutoTokenizer - -path = "checkthat/task1/test_model_xlm_roberta/model.safetensors" - -print("test") -print(path) - -model = CustomModel("xlm-roberta-base", 2, "cpu") -model.load_state_dict(torch.load(path, map_location="cpu")) - - -# config = AutoConfig.from_pretrained(path) -# tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base") -# model = AutoModelForSequenceClassification.from_pretrained(path, config=config) - diff --git a/checkthat/task1/training_scripts/training.py b/checkthat/task1/training_scripts/training.py index b2f3833..0f1e10b 100644 --- a/checkthat/task1/training_scripts/training.py +++ b/checkthat/task1/training_scripts/training.py @@ -9,69 +9,82 @@ from metrics.compute_metrics import compute_metrics from training_scripts.train_config import get_training_arguments from training_scripts.train_config import get_language -import random import numpy as np import torch import torch.cuda import torch torch.backends.cuda.matmul.allow_tf32 = True torch.backends.cudnn.allow_tf32 = True +from transformers import AutoModelForSequenceClassification, AutoTokenizer, TrainingArguments -def set_seed(seed): - """Set seed for reproducibility.""" - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - if torch.cuda.is_available(): - torch.cuda.manual_seed_all(seed) -def run_training(seed, dataset, model, tokenizer, label_map, training_arguments, dataset_language): - """Start training the model for a single seed. - Args: - seed: seed for reproducibility - dataset: dataset dictionary containing train and validation splits - model: huggingface model name - tokenizer: huggerface tokenizer/same as model name - label_map: dictionary mapping labels to integers - """ - # Initialize wandb run - set_seed(seed) - run_name = f"{model}_{seed}_{dataset_language}" - wandb.init( - project="Clef2024", +def run_training(train_dataset, eval_dataset, model_name, label_map, dataset_language, test_dataset=None,): + """Run training sweep. Evaluate on validation set and test set.""" + + run_name = wandb.init( + project="sweep_test", entity="aarnes", - name=run_name, - config={"seed": seed}, - ) + reinit=True + ).name - # Prepare datasets - train_dataset = TextDataset(dataset["train"], tokenizer, label_map) - eval_dataset = TextDataset(dataset["validation"], tokenizer, label_map) + # Load model and tokenizer from Hugging Face + hf_model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=len(label_map)) + hf_tokenizer = AutoTokenizer.from_pretrained(model_name) - # training_arguments = get_training_arguments() - training_arguments.run_name = ( - run_name # Optional, sync the name with Trainer's internal wandb run + + # Define training arguments + training_arguments = TrainingArguments( + output_dir="./results", # Directory to save model and tokenizer + evaluation_strategy="epoch", + learning_rate=wandb.config.learning_rate, + per_device_train_batch_size=wandb.config.batch_size, + num_train_epochs=wandb.config.epochs, + logging_dir='./logs', + logging_steps=10, + do_train=True, + do_eval=True, + load_best_model_at_end=True, + save_strategy="epoch", # Save model at the end of each epoch + save_total_limit=1, # Optional: limits the total amount of checkpoints, deleting older + report_to="wandb", + run_name=run_name, ) - # Creating a Trainer instance with training arguments and datasets + # Create a Trainer instance trainer = Trainer( - model=CustomModel(model, num_labels=len(label_map), device='cuda'), + model=hf_model, args=training_arguments, train_dataset=train_dataset, eval_dataset=eval_dataset, - compute_metrics=compute_metrics(), - callbacks=[ - EarlyStoppingCallback(early_stopping_patience=3) - ], # Early stopping callback + compute_metrics=compute_metrics, + callbacks=[EarlyStoppingCallback(early_stopping_patience=3)] ) # Train the model trainer.train() - model.save_pretrained("./trained_models") + # Evaluate the model on the test dataset + test_output = trainer.predict(test_dataset) + test_results = {f"test_{k}": v for k, v in test_output.metrics.items()} + + + + # Evaluate the model + eval_results = trainer.evaluate() + + # Log evaluation and test results to W&B + wandb.log({"eval_results": eval_results}) + wandb.log({"test_results": test_results}) - # Save the tokenizer - tokenizer.save_pretrained("./trained_models") + # Save model and tokenizer at the end of training + model_path = f"{training_arguments.output_dir}/{run_name}_model_{dataset_language}" + tokenizer_path = f"{training_arguments.output_dir}/{run_name}_tokenizer_{dataset_language}" - # Finish the wandb run after each seed + hf_model.save_pretrained(model_path) + hf_tokenizer.save_pretrained(tokenizer_path) + + # Ensure the W&B run is finished wandb.finish() + + # Return paths for model and tokenizer for user reference + return model_path, tokenizer_path \ No newline at end of file From 9476daeba1f4084ca5676f290f26ec8c3de2cc26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20R=C3=B8ysland=20Aarnes?= Date: Mon, 6 May 2024 10:55:05 +0200 Subject: [PATCH 15/16] Restructuring folders (simplified compute metrics). Minor alterations to training scripts. --- .../metrics/__init__.py | 0 .../metrics/compute_metrics.py | 0 .../metrics/metrics_logger.py | 6 +-- checkthat/task1/main.py | 2 +- checkthat/task1/start_train.sh | 50 ++++++++++++++++--- checkthat/task1/training_scripts/training.py | 31 ++++++++---- 6 files changed, 70 insertions(+), 19 deletions(-) rename checkthat/task1/{ => depricated_scripts}/metrics/__init__.py (100%) rename checkthat/task1/{ => depricated_scripts}/metrics/compute_metrics.py (100%) rename checkthat/task1/{ => depricated_scripts}/metrics/metrics_logger.py (84%) diff --git a/checkthat/task1/metrics/__init__.py b/checkthat/task1/depricated_scripts/metrics/__init__.py similarity index 100% rename from checkthat/task1/metrics/__init__.py rename to checkthat/task1/depricated_scripts/metrics/__init__.py diff --git a/checkthat/task1/metrics/compute_metrics.py b/checkthat/task1/depricated_scripts/metrics/compute_metrics.py similarity index 100% rename from checkthat/task1/metrics/compute_metrics.py rename to checkthat/task1/depricated_scripts/metrics/compute_metrics.py diff --git a/checkthat/task1/metrics/metrics_logger.py b/checkthat/task1/depricated_scripts/metrics/metrics_logger.py similarity index 84% rename from checkthat/task1/metrics/metrics_logger.py rename to checkthat/task1/depricated_scripts/metrics/metrics_logger.py index 309b438..c8a2e4d 100644 --- a/checkthat/task1/metrics/metrics_logger.py +++ b/checkthat/task1/depricated_scripts/metrics/metrics_logger.py @@ -20,9 +20,9 @@ def compute_custom_metrics(logits, labels): predictions = np.argmax(logits, axis=1) # Convert logits to predictions # Calculate metrics - precision = precision_score(labels, predictions, average="macro", pos_label=1) - recall = recall_score(labels, predictions, average="macro", pos_label=1) - f1 = f1_score(labels, predictions, average="macro", pos_label=1) + precision = precision_score(labels, predictions, average="binary", pos_label=1) + recall = recall_score(labels, predictions, average="binary", pos_label=1) + f1 = f1_score(labels, predictions, average="binary", pos_label=1) return precision, recall, f1 diff --git a/checkthat/task1/main.py b/checkthat/task1/main.py index 738f2fe..f7394f4 100644 --- a/checkthat/task1/main.py +++ b/checkthat/task1/main.py @@ -45,6 +45,6 @@ def main(args): default="iai-group/clef2024_checkthat_task1_en", # For English language help="Name of the dataset from the iai-group/clef2024_checkthat_task1_* datasets", ) - + args = parser.parse_args() main(args) diff --git a/checkthat/task1/start_train.sh b/checkthat/task1/start_train.sh index f2dd707..2e16f63 100644 --- a/checkthat/task1/start_train.sh +++ b/checkthat/task1/start_train.sh @@ -1,9 +1,9 @@ #!/bin/bash -#SBATCH --gres=gpu:1 +#SBATCH --gres=gpu:8 #SBATCH --partition=gpuA100 -#SBATCH --time=1:00:00 -#SBATCH --job-name=CLEF2024_task1_training -#SBATCH --output=start_train_all.out +#SBATCH --time=24:00:00 +#SBATCH --job-name=checkthat_training +#SBATCH --output=checkthat_training.out # Load CUDA and cuDNN modules module load cuda/12.2.0 cudnn/8.8.0 @@ -25,5 +25,43 @@ echo $PATH # Disable tokenizers parallelism for better GPU utilization export TOKENIZERS_PARALLELISM=false -# Run the Python script that uses the GPU -python -u main_train_all.py \ No newline at end of file +PROJECT_NAME="EN-SWEEP-no-data-alter" + +run_sweep_and_agent () { + # Ensure the PROJECT_NAME environment variable is set + if [[ -z "$PROJECT_NAME" ]]; then + echo "Error: PROJECT_NAME must be set." + return 1 + fi + + echo "Initializing sweep using sweep.yaml in project: $PROJECT_NAME..." + + # Run the wandb sweep command using a fixed file path + wandb sweep --project "$PROJECT_NAME" "sweep.yaml" > temp_output.txt 2>&1 + + # Check if the wandb sweep command succeeded + if [ $? -ne 0 ]; then + echo "Error: Failed to initialize sweep. See output below:" + cat temp_output.txt + return 1 + fi + + # Extract the sweep ID using awk + SWEEP_ID=$(awk '/wandb agent/{ match($0, /wandb agent (.+)/, arr); print arr[1]; }' temp_output.txt) + + # Check if the sweep ID was extracted successfully + if [[ -z "$SWEEP_ID" ]]; then + echo "Error: Failed to extract sweep ID from output." + cat temp_output.txt + return 1 + fi + + # Cleanup: Remove the temporary output file + rm temp_output.txt + + # Run the wandb agent command + echo "Starting wandb agent for sweep ID: $SWEEP_ID" + wandb agent $SWEEP_ID +} + +run_sweep_and_agent = EN-SWEEP-no-data-alter \ No newline at end of file diff --git a/checkthat/task1/training_scripts/training.py b/checkthat/task1/training_scripts/training.py index 0f1e10b..dd314bb 100644 --- a/checkthat/task1/training_scripts/training.py +++ b/checkthat/task1/training_scripts/training.py @@ -3,12 +3,16 @@ This script trains the model for a single seed. """ import wandb -from transformers import Trainer, EarlyStoppingCallback +from transformers import Trainer, EarlyStoppingCallback, EvalPrediction +from sklearn.metrics import precision_recall_fscore_support, accuracy_score + from tokenization.tokenizer import TextDataset from models.custom_model import CustomModel -from metrics.compute_metrics import compute_metrics +#from metrics.compute_metrics import compute_metrics from training_scripts.train_config import get_training_arguments from training_scripts.train_config import get_language +from sklearn.metrics import precision_recall_fscore_support, accuracy_score + import numpy as np import torch import torch.cuda @@ -17,6 +21,12 @@ torch.backends.cudnn.allow_tf32 = True from transformers import AutoModelForSequenceClassification, AutoTokenizer, TrainingArguments +def compute_metrics(p: EvalPrediction): + preds = np.argmax(p.predictions, axis=1) + labels = p.label_ids + precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro', pos_label=1) + acc = accuracy_score(labels, preds) + return {"accuracy": acc, "f1": f1, "precision": precision, "recall": recall} def run_training(train_dataset, eval_dataset, model_name, label_map, dataset_language, test_dataset=None,): @@ -35,7 +45,7 @@ def run_training(train_dataset, eval_dataset, model_name, label_map, dataset_lan # Define training arguments training_arguments = TrainingArguments( - output_dir="./results", # Directory to save model and tokenizer + output_dir=f"./results_{dataset_language}", # Directory to save model and tokenizer evaluation_strategy="epoch", learning_rate=wandb.config.learning_rate, per_device_train_batch_size=wandb.config.batch_size, @@ -61,6 +71,7 @@ def run_training(train_dataset, eval_dataset, model_name, label_map, dataset_lan callbacks=[EarlyStoppingCallback(early_stopping_patience=3)] ) + # Train the model trainer.train() # Evaluate the model on the test dataset @@ -69,12 +80,7 @@ def run_training(train_dataset, eval_dataset, model_name, label_map, dataset_lan - # Evaluate the model - eval_results = trainer.evaluate() - - # Log evaluation and test results to W&B - wandb.log({"eval_results": eval_results}) - wandb.log({"test_results": test_results}) + # Save model and tokenizer at the end of training model_path = f"{training_arguments.output_dir}/{run_name}_model_{dataset_language}" @@ -83,6 +89,13 @@ def run_training(train_dataset, eval_dataset, model_name, label_map, dataset_lan hf_model.save_pretrained(model_path) hf_tokenizer.save_pretrained(tokenizer_path) + # Evaluate the model + eval_results = trainer.evaluate() + + # Log evaluation and test results to W&B + wandb.log({"eval_results": eval_results}) + wandb.log({"test_results": test_results}) + # Ensure the W&B run is finished wandb.finish() From 799b544e464b86556f2d7f7d8fa28c91bc63e2f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20R=C3=B8ysland=20Aarnes?= Date: Mon, 6 May 2024 13:20:16 +0200 Subject: [PATCH 16/16] Tweaks --- checkthat/task1/sweep.yaml | 2 +- checkthat/task1/training_scripts/training.py | 13 +++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/checkthat/task1/sweep.yaml b/checkthat/task1/sweep.yaml index f4a5e5d..f3de7d0 100644 --- a/checkthat/task1/sweep.yaml +++ b/checkthat/task1/sweep.yaml @@ -20,7 +20,7 @@ parameters: hidden_dropout_prob: values: [0.1, 0.2, 0.3] # Define discrete steps for grid search epochs: - values: [10, 20, 50] # Convert range to discrete values + values: [50] # Convert range to discrete values batch_size: values: [16, 32] learning_rate: diff --git a/checkthat/task1/training_scripts/training.py b/checkthat/task1/training_scripts/training.py index dd314bb..ee3ce84 100644 --- a/checkthat/task1/training_scripts/training.py +++ b/checkthat/task1/training_scripts/training.py @@ -43,24 +43,25 @@ def run_training(train_dataset, eval_dataset, model_name, label_map, dataset_lan hf_tokenizer = AutoTokenizer.from_pretrained(model_name) - # Define training arguments training_arguments = TrainingArguments( - output_dir=f"./results_{dataset_language}", # Directory to save model and tokenizer + output_dir=f"./results_{dataset_language}", evaluation_strategy="epoch", learning_rate=wandb.config.learning_rate, per_device_train_batch_size=wandb.config.batch_size, num_train_epochs=wandb.config.epochs, logging_dir='./logs', - logging_steps=10, + logging_steps=100, do_train=True, do_eval=True, load_best_model_at_end=True, - save_strategy="epoch", # Save model at the end of each epoch - save_total_limit=1, # Optional: limits the total amount of checkpoints, deleting older + metric_for_best_model="f1", # Here you specify the metric from your sweep config + greater_is_better=True, # Since the goal is to maximize + save_strategy="epoch", + save_total_limit=1, report_to="wandb", run_name=run_name, ) - + # Create a Trainer instance trainer = Trainer( model=hf_model,