From 67ad7adef80116c2f3e23eedff0d1931bfc11b8f Mon Sep 17 00:00:00 2001 From: porteratzo Date: Thu, 5 Oct 2023 08:46:33 -0700 Subject: [PATCH 1/7] llm tutorial --- openfl-tutorials/Federated_PyTorch_LLM.ipynb | 474 +++++++++++++++++++ 1 file changed, 474 insertions(+) create mode 100644 openfl-tutorials/Federated_PyTorch_LLM.ipynb diff --git a/openfl-tutorials/Federated_PyTorch_LLM.ipynb b/openfl-tutorials/Federated_PyTorch_LLM.ipynb new file mode 100644 index 0000000000..4ace9f3d30 --- /dev/null +++ b/openfl-tutorials/Federated_PyTorch_LLM.ipynb @@ -0,0 +1,474 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Federated PyTorch TinyImageNet Tutorial" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook is an example of Transfer Learning \n", + "\n", + "Custom DataLoader is used with OpenFL Python API" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Install dependencies if not already installed\n", + "!pip install torch torchvision peft transformers sentencepiece huggingface_hub accelerate datasets evaluate seqeval\n", + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from typing import Any, Mapping\n", + "import numpy as np\n", + "import openfl.native as fx\n", + "import torch\n", + "import torch as pt\n", + "from accelerate import Accelerator\n", + "from datasets import Dataset, load_dataset, load_metric\n", + "from openfl.federated import PyTorchTaskRunner, TaskRunner\n", + "from openfl.federated.task.runner_pt import change_tags\n", + "from openfl.utilities import Metric, TensorKey\n", + "from openfl.utilities.data_splitters import EqualNumPyDataSplitter\n", + "from peft import LoraConfig, TaskType, get_peft_model\n", + "from peft.utils import get_peft_model_state_dict, set_peft_model_state_dict\n", + "from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss\n", + "from torch.optim import AdamW\n", + "from torch.utils.data import DataLoader\n", + "from tqdm import tqdm\n", + "import torch.nn as nn\n", + "\n", + "from transformers import (AutoConfig, AutoModelForSequenceClassification,\n", + " AutoTokenizer, DataCollatorWithPadding)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "After importing the required packages, the next step is setting up our openfl workspace. To do this, simply run the `fx.init()` command as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Setup default workspace, logging, etc.\n", + "fx.init('torch_cnn_mnist')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we are ready to define our dataset and model to perform federated learning on. The dataset should be composed of a numpy arrayWe start with a simple fully connected model that is trained on the MNIST dataset. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Download the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_glue_mrpc_dataset(tokenizer):\n", + " dataset = load_dataset(\"glue\", \"mrpc\")\n", + "\n", + " def tokenize_function(examples):\n", + " # max_length=None => use the model max length (it's actually the default)\n", + " outputs = tokenizer(\n", + " examples[\"sentence1\"],\n", + " examples[\"sentence2\"],\n", + " truncation=True,\n", + " max_length=None,\n", + " )\n", + " return outputs\n", + "\n", + " tokenized_datasets = dataset.map(\n", + " tokenize_function,\n", + " batched=True,\n", + " remove_columns=[\"idx\", \"sentence1\", \"sentence2\"],\n", + " )\n", + " tokenized_datasets = tokenized_datasets.rename_column(\"label\", \"labels\")\n", + " tokenized_datasets.set_format(\"torch\")\n", + " data_collator = DataCollatorWithPadding(tokenizer=tokenizer, padding=\"longest\")\n", + " return data_collator, tokenized_datasets\n", + "\n", + "base_model_name = \"roberta-large\"\n", + "padding_side = \"right\"\n", + "tokenizer = AutoTokenizer.from_pretrained(base_model_name, padding_side=padding_side)\n", + "if getattr(tokenizer, \"pad_token_id\") is None:\n", + " tokenizer.pad_token_id = tokenizer.eos_token_id\n", + "data_collator, tokenized_datasets = get_glue_mrpc_dataset(tokenizer)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Describe the dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class GlueMrpc(Dataset):\n", + " \"\"\"\n", + " Has 5.8k pairs of sentences with annotations if the two sentences are equivalent\n", + " \"\"\" \n", + " def get_shape(self):\n", + " \n", + " if not hasattr(self, 'saved_shape'):\n", + " self.saved_shape = max([len(i) for i in self.data['input_ids']])\n", + " return self.saved_shape\n", + "\n", + "train_set = GlueMrpc.from_dict(tokenized_datasets['train'].to_dict())\n", + "valid_set = GlueMrpc.from_dict(tokenized_datasets['test'].to_dict())\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Implement Federated dataset\n", + "We have to implement `split` method" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class GlueMrpcFederatedDataset(DataLoader):\n", + " def __init__(self, train_set, valid_set, batch_size, data_collator=None):\n", + " self.data_splitter = EqualNumPyDataSplitter()\n", + " if isinstance(train_set,Dataset):\n", + " self.train_set = GlueMrpc.from_dict(train_set.to_dict())\n", + " else:\n", + " self.train_set = train_set\n", + " \n", + " if isinstance(valid_set,Dataset):\n", + " self.valid_set = GlueMrpc.from_dict(valid_set.to_dict())\n", + " else:\n", + " self.valid_set = valid_set \n", + " \n", + " self.batch_size = batch_size\n", + " self.data_collator = data_collator\n", + " \n", + " def split(self, num_collaborators):\n", + " train_split = self.data_splitter.split(self.train_set, num_collaborators)\n", + " valid_split = self.data_splitter.split(self.valid_set, num_collaborators)\n", + " return [\n", + " GlueMrpcFederatedDataset(\n", + " self.train_set.select(train_split[i]),\n", + " self.valid_set.select(valid_split[i]),\n", + " self.batch_size\n", + " )\n", + " for i in range(num_collaborators)\n", + " ]\n", + " \n", + " def get_feature_shape(self):\n", + " return self.train_set.get_shape()\n", + " \n", + " def get_train_loader(self, num_batches=None):\n", + " return DataLoader(self.train_set, batch_size=self.batch_size, collate_fn=data_collator)\n", + " \n", + " def get_valid_loader(self):\n", + " return DataLoader(self.valid_set, collate_fn=data_collator)\n", + " \n", + " def get_train_data_size(self):\n", + " return len(self.train_set)\n", + " \n", + " def get_valid_data_size(self):\n", + " return len(self.valid_set)\n", + " \n", + "fl_data = GlueMrpcFederatedDataset(train_set, valid_set, batch_size=32)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Define model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class LLMTaskRunner(PyTorchTaskRunner):\n", + " def __init__(self, base_model_name, data_loader, device=None, metric=None, **kwargs):\n", + " kwargs['data_loader'] = data_loader\n", + " super().__init__(device, **kwargs)\n", + " self.base_model_name = base_model_name\n", + " self.metric = metric\n", + " self._init_model()\n", + " self._init_optimizer()\n", + " \n", + " def _init_model(self):\n", + " model = AutoModelForSequenceClassification.from_pretrained(\n", + " self.base_model_name, return_dict=True)\n", + " peft_config = LoraConfig(task_type=TaskType.SEQ_CLS, inference_mode=False, r=16, lora_alpha=16, lora_dropout=0.1, bias=\"all\")\n", + " self.model = get_peft_model(model, peft_config)\n", + " \n", + " def _init_optimizer(self):\n", + " no_decay = [\"bias\", \"LayerNorm.weight\"]\n", + " optimizer_grouped_parameters = [\n", + " {\n", + " \"params\": [p for n, p in self.model.named_parameters() if not any(nd in n for nd in no_decay)],\n", + " \"weight_decay\": 0.01,\n", + " },\n", + " {\n", + " \"params\": [p for n, p in self.model.named_parameters() if any(nd in n for nd in no_decay)],\n", + " \"weight_decay\": 0.0,\n", + " },\n", + " ]\n", + " self.optimizer = AdamW(optimizer_grouped_parameters, lr=0.01)\n", + " \n", + " self.training_round_completed = False\n", + " self.initialize_tensorkeys_for_functions()\n", + " \n", + " def state_dict(self):\n", + " return get_peft_model_state_dict(self.model)\n", + " \n", + " def load_state_dict(self, state_dict: Mapping[str, Any], strict: bool = True):\n", + " return set_peft_model_state_dict(\n", + " self.model, state_dict\n", + " )\n", + " \n", + " def validate(self, col_name, round_num, input_tensor_dict,\n", + " use_tqdm=False, **kwargs):\n", + " \"\"\"Validate.\n", + "\n", + " Run validation of the model on the local data.\n", + "\n", + " Args:\n", + " col_name: Name of the collaborator\n", + " round_num: What round is it\n", + " input_tensor_dict: Required input tensors (for model)\n", + " use_tqdm (bool): Use tqdm to print a progress bar (Default=True)\n", + "\n", + " Returns:\n", + " global_output_dict: Tensors to send back to the aggregator\n", + " local_output_dict: Tensors to maintain in the local TensorDB\n", + "\n", + " \"\"\"\n", + " self.rebuild_model(round_num, input_tensor_dict, validation=True)\n", + " self.model.eval()\n", + " self.model.to(self.device)\n", + " val_score = 0\n", + " total_samples = 0\n", + "\n", + " loader = self.data_loader.get_valid_loader()\n", + " if use_tqdm:\n", + " loader = tqdm(loader, desc='validate')\n", + "\n", + " with pt.no_grad():\n", + " for sample in loader:\n", + " samples = sample['input_ids'].shape[0]\n", + " total_samples += samples\n", + " output = self.model(**sample)\n", + " # get the index of the max log-probability\n", + " logits = output.logits\n", + " predictions = torch.argmax(logits, dim=-1)\n", + " metric.add_batch(predictions=predictions, references=sample['labels'])\n", + " val_score = metric.compute()['accuracy']\n", + "\n", + " origin = col_name\n", + " suffix = 'validate'\n", + " if kwargs['apply'] == 'local':\n", + " suffix += '_local'\n", + " else:\n", + " suffix += '_agg'\n", + " tags = ('metric',)\n", + " tags = change_tags(tags, add_field=suffix)\n", + " # TODO figure out a better way to pass in metric for this pytorch\n", + " # validate function\n", + " output_tensor_dict = {\n", + " TensorKey('acc', origin, round_num, True, tags):\n", + " np.array(val_score)\n", + " }\n", + "\n", + " # Empty list represents metrics that should only be stored locally\n", + " return output_tensor_dict, {}\n", + "\n", + " def train_epoch(self, batch_generator) -> Metric:\n", + " \"\"\"Train single epoch.\n", + "\n", + " Override this function in order to use custom training.\n", + "\n", + " Args:\n", + " batch_generator: Train dataset batch generator. Yields (samples, targets) tuples of\n", + " size = `self.data_loader.batch_size`.\n", + " Returns:\n", + " Metric: An object containing name and np.ndarray value.\n", + " \"\"\"\n", + " losses = []\n", + " for sample in batch_generator:\n", + " self.optimizer.zero_grad()\n", + " output = self.model(**sample)\n", + " loss = output.loss\n", + " loss.backward()\n", + " torch.nn.utils.clip_grad_norm_(self.model.parameters(),1.0)\n", + " self.model.step()\n", + " losses.append(loss.detach().cpu().numpy())\n", + " loss = np.mean(losses)\n", + " if self.model.config.problem_type == \"regression\":\n", + " loss_fct = MSELoss()\n", + " elif self.model.config.problem_type == \"single_label_classification\":\n", + " loss_fct = CrossEntropyLoss()\n", + " elif self.model.config.problem_type == \"multi_label_classification\":\n", + " loss_fct = BCEWithLogitsLoss()\n", + " return Metric(name=loss_fct._get_name(), value=np.array(loss))\n", + " \n", + " \n", + " def save_native(self, filepath, model_state_dict_key='model_state_dict',\n", + " optimizer_state_dict_key='optimizer_state_dict', **kwargs):\n", + " \"\"\"\n", + " Save model and optimizer states in a picked file specified by the \\\n", + " filepath. model_/optimizer_state_dicts are stored in the keys provided. \\\n", + " Uses pt.save().\n", + "\n", + " Args:\n", + " filepath (string) : Path to pickle file to be\n", + " created by pt.save().\n", + " model_state_dict_key (string) : key for model state dict\n", + " in pickled file.\n", + " optimizer_state_dict_key (string) : key for optimizer state\n", + " dict in picked file.\n", + " kwargs : unused\n", + "\n", + " Returns:\n", + " None\n", + " \"\"\"\n", + " pickle_dict = {\n", + " model_state_dict_key: get_peft_model_state_dict(self.model),\n", + " optimizer_state_dict_key: self.optimizer.state_dict()\n", + " }\n", + " pt.save(pickle_dict, filepath)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "num_collaborators = 2\n", + "metric = load_metric('glue', \"mrpc\")\n", + "collaborator_models = [\n", + " LLMTaskRunner(\n", + " base_model_name,\n", + " data_loader=data_slice,\n", + " metric=metric\n", + " )\n", + " for data_slice in fl_data.split(num_collaborators)]\n", + "collaborators = {'one':collaborator_models[0],'two':collaborator_models[1]}#, 'three':collaborator_models[2]}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Original TinyImageNet dataset\n", + "print(f'Original training data size: {len(fl_data.train_set)}')\n", + "print(f'Original validation data size: {len(fl_data.valid_set)}\\n')\n", + "\n", + "#Collaborator one's data\n", + "for i, model in enumerate(collaborator_models):\n", + " print(f'Collaborator {i}\\'s training data size: {len(model.data_loader.train_set)}')\n", + " print(f'Collaborator {i}\\'s validation data size: {len(model.data_loader.valid_set)}\\n')\n", + "\n", + "#Collaborator three's data\n", + "#print(f'Collaborator three\\'s training data size: {len(collaborator_models[2].data_loader.X_train)}')\n", + "#print(f'Collaborator three\\'s validation data size: {len(collaborator_models[2].data_loader.X_valid)}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Run experiment, return trained FederatedModel\n", + "final_fl_model = fx.run_experiment(collaborators,{'aggregator.settings.rounds_to_train':3})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Save final model\n", + "final_fl_model.save_native('final_model.pth')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "llama-env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From 600bfc97ce0bfd43121148efc7f485ba5052a8c2 Mon Sep 17 00:00:00 2001 From: porteratzo Date: Thu, 5 Oct 2023 08:50:59 -0700 Subject: [PATCH 2/7] small fix --- openfl-tutorials/Federated_PyTorch_LLM.ipynb | 534 ++++++++++++++++++- 1 file changed, 520 insertions(+), 14 deletions(-) diff --git a/openfl-tutorials/Federated_PyTorch_LLM.ipynb b/openfl-tutorials/Federated_PyTorch_LLM.ipynb index 4ace9f3d30..ca41b3c13d 100644 --- a/openfl-tutorials/Federated_PyTorch_LLM.ipynb +++ b/openfl-tutorials/Federated_PyTorch_LLM.ipynb @@ -30,7 +30,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -66,9 +66,134 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Creating Workspace Directories\n", + "Creating Workspace Templates\n", + "Collecting torch==1.13.1 (from -r /home/oamontoy/.local/workspace/requirements.txt (line 1))\n", + " Using cached torch-1.13.1-cp38-cp38-manylinux1_x86_64.whl (887.4 MB)\n", + "Requirement already satisfied: torchvision==0.14.1 in ./llama-env/lib/python3.8/site-packages (from -r /home/oamontoy/.local/workspace/requirements.txt (line 2)) (0.14.1)\n", + "Requirement already satisfied: tensorboard in ./llama-env/lib/python3.8/site-packages (from -r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (2.14.0)\n", + "Requirement already satisfied: wheel>=0.38.0 in ./llama-env/lib/python3.8/site-packages (from -r /home/oamontoy/.local/workspace/requirements.txt (line 4)) (0.41.2)\n", + "Requirement already satisfied: typing-extensions in ./llama-env/lib/python3.8/site-packages (from torch==1.13.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 1)) (4.8.0)\n", + "Requirement already satisfied: nvidia-cuda-runtime-cu11==11.7.99 in ./llama-env/lib/python3.8/site-packages (from torch==1.13.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 1)) (11.7.99)\n", + "Requirement already satisfied: nvidia-cudnn-cu11==8.5.0.96 in ./llama-env/lib/python3.8/site-packages (from torch==1.13.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 1)) (8.5.0.96)\n", + "Requirement already satisfied: nvidia-cublas-cu11==11.10.3.66 in ./llama-env/lib/python3.8/site-packages (from torch==1.13.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 1)) (11.10.3.66)\n", + "Requirement already satisfied: nvidia-cuda-nvrtc-cu11==11.7.99 in ./llama-env/lib/python3.8/site-packages (from torch==1.13.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 1)) (11.7.99)\n", + "Requirement already satisfied: numpy in ./llama-env/lib/python3.8/site-packages (from torchvision==0.14.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 2)) (1.24.4)\n", + "Requirement already satisfied: requests in ./llama-env/lib/python3.8/site-packages (from torchvision==0.14.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 2)) (2.31.0)\n", + "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in ./llama-env/lib/python3.8/site-packages (from torchvision==0.14.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 2)) (10.0.1)\n", + "Requirement already satisfied: setuptools in ./llama-env/lib/python3.8/site-packages (from nvidia-cublas-cu11==11.10.3.66->torch==1.13.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 1)) (68.2.2)\n", + "Requirement already satisfied: absl-py>=0.4 in ./llama-env/lib/python3.8/site-packages (from tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (2.0.0)\n", + "Requirement already satisfied: grpcio>=1.48.2 in ./llama-env/lib/python3.8/site-packages (from tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (1.48.2)\n", + "Requirement already satisfied: google-auth<3,>=1.6.3 in ./llama-env/lib/python3.8/site-packages (from tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (2.23.0)\n", + "Requirement already satisfied: google-auth-oauthlib<1.1,>=0.5 in ./llama-env/lib/python3.8/site-packages (from tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (1.0.0)\n", + "Requirement already satisfied: markdown>=2.6.8 in ./llama-env/lib/python3.8/site-packages (from tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (3.4.4)\n", + "Requirement already satisfied: protobuf>=3.19.6 in ./llama-env/lib/python3.8/site-packages (from tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (3.19.6)\n", + "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in ./llama-env/lib/python3.8/site-packages (from tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (0.7.1)\n", + "Requirement already satisfied: werkzeug>=1.0.1 in ./llama-env/lib/python3.8/site-packages (from tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (2.3.7)\n", + "Requirement already satisfied: cachetools<6.0,>=2.0.0 in ./llama-env/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (5.3.1)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in ./llama-env/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (0.3.0)\n", + "Requirement already satisfied: rsa<5,>=3.1.4 in ./llama-env/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (4.9)\n", + "Requirement already satisfied: urllib3<2.0 in ./llama-env/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (1.26.16)\n", + "Requirement already satisfied: requests-oauthlib>=0.7.0 in ./llama-env/lib/python3.8/site-packages (from google-auth-oauthlib<1.1,>=0.5->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (1.3.1)\n", + "Requirement already satisfied: six>=1.5.2 in ./llama-env/lib/python3.8/site-packages (from grpcio>=1.48.2->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (1.16.0)\n", + "Requirement already satisfied: importlib-metadata>=4.4 in ./llama-env/lib/python3.8/site-packages (from markdown>=2.6.8->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (6.8.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in ./llama-env/lib/python3.8/site-packages (from requests->torchvision==0.14.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 2)) (3.2.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in ./llama-env/lib/python3.8/site-packages (from requests->torchvision==0.14.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 2)) (3.4)\n", + "Requirement already satisfied: certifi>=2017.4.17 in ./llama-env/lib/python3.8/site-packages (from requests->torchvision==0.14.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 2)) (2023.7.22)\n", + "Requirement already satisfied: MarkupSafe>=2.1.1 in ./llama-env/lib/python3.8/site-packages (from werkzeug>=1.0.1->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (2.1.3)\n", + "Requirement already satisfied: zipp>=0.5 in ./llama-env/lib/python3.8/site-packages (from importlib-metadata>=4.4->markdown>=2.6.8->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (3.17.0)\n", + "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in ./llama-env/lib/python3.8/site-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (0.5.0)\n", + "Requirement already satisfied: oauthlib>=3.0.0 in ./llama-env/lib/python3.8/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<1.1,>=0.5->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (3.2.2)\n", + "Installing collected packages: torch\n", + " Attempting uninstall: torch\n", + " Found existing installation: torch 2.1.0\n", + " Uninstalling torch-2.1.0:\n", + " Successfully uninstalled torch-2.1.0\n", + "Successfully installed torch-1.13.1\n", + "Successfully installed packages from /home/oamontoy/.local/workspace/requirements.txt.\n", + "\n", + "New workspace directory structure:\n", + "workspace\n", + "├── logs\n", + "│ └── cnn_mnist\n", + "│ ├── events.out.tfevents.1695850586.M50CYP2SBSTD.111429.0\n", + "│ ├── events.out.tfevents.1695942084.M50CYP2SBSTD.4924.0\n", + "│ ├── events.out.tfevents.1695849809.M50CYP2SBSTD.107313.0\n", + "│ ├── events.out.tfevents.1695850472.M50CYP2SBSTD.110437.0\n", + "│ ├── events.out.tfevents.1695942744.M50CYP2SBSTD.15635.0\n", + "│ ├── events.out.tfevents.1696008244.M50CYP2SBSTD.98097.0\n", + "│ ├── events.out.tfevents.1695850981.M50CYP2SBSTD.114740.0\n", + "│ ├── events.out.tfevents.1695939101.M50CYP2SBSTD.143673.0\n", + "│ ├── events.out.tfevents.1695850850.M50CYP2SBSTD.113094.0\n", + "│ ├── events.out.tfevents.1695850404.M50CYP2SBSTD.109391.0\n", + "│ ├── events.out.tfevents.1695942232.M50CYP2SBSTD.7126.0\n", + "│ └── events.out.tfevents.1695849986.M50CYP2SBSTD.107937.0\n", + "├── .workspace\n", + "├── final_model.pth\n", + "├── plan\n", + "│ ├── plan.yaml\n", + "│ ├── defaults\n", + "│ ├── data.yaml\n", + "│ └── cols.yaml\n", + "├── agg_to_col_two_signed_cert.zip\n", + "├── requirements.txt\n", + "├── data\n", + "├── save\n", + "│ ├── torch_cnn_mnist_best.pbuf\n", + "│ ├── torch_cnn_mnist_last.pbuf\n", + "│ └── torch_cnn_mnist_init.pbuf\n", + "├── agg_to_col_one_signed_cert.zip\n", + "├── src\n", + "│ ├── pt_cnn.py\n", + "│ ├── mnist_utils.py\n", + "│ ├── __pycache__\n", + "│ │ ├── __init__.cpython-38.pyc\n", + "│ │ └── mnist_utils.cpython-38.pyc\n", + "│ ├── ptmnist_inmemory.py\n", + "│ └── __init__.py\n", + "└── cert\n", + "\n", + "8 directories, 30 files\n", + "Setting Up Certificate Authority...\n", + "\n", + "1. Create Root CA\n", + "1.1 Create Directories\n", + "1.2 Create Database\n", + "1.3 Create CA Request and Certificate\n", + "2. Create Signing Certificate\n", + "2.1 Create Directories\n", + "2.2 Create Database\n", + "2.3 Create Signing Certificate CSR\n", + "2.4 Sign Signing Certificate CSR\n", + "3 Create Certificate Chain\n", + "\n", + "Done.\n", + "Creating AGGREGATOR certificate key pair with following settings: CN=\u001b[31mm50cyp2sbstd\u001b[0m, SAN=\u001b[31mDNS:m50cyp2sbstd\u001b[0m\n", + " Writing AGGREGATOR certificate key pair to: \u001b[32m/home/oamontoy/workspace/cert/server\u001b[0m\n", + "The CSR Hash for file \u001b[32mserver/agg_m50cyp2sbstd.csr\u001b[0m = \u001b[31md49a1328c9e8ccfb65a4d583018704fd9d24b3301bb800ceb9f50b591937e1a5f8f419238b5e4c24af732693d37ce088\u001b[0m\n", + " Signing AGGREGATOR certificate\n", + "Creating COLLABORATOR certificate key pair with following settings: CN=\u001b[31mone\u001b[0m, SAN=\u001b[31mDNS:one\u001b[0m\n", + " Moving COLLABORATOR certificate to: \u001b[32m/home/oamontoy/workspace/cert/col_one\u001b[0m\n", + "The CSR Hash for file \u001b[32mcol_one.csr\u001b[0m = \u001b[31m0caea6371d4b13f51be51507794c4c18e0a9cb408f286f2f81a4b179380b15b3215e94d739ec952065fbc7eb3b2edbba\u001b[0m\n", + " Signing COLLABORATOR certificate\n", + "\n", + "Registering \u001b[32mone\u001b[0m in \u001b[32m/home/oamontoy/.local/workspace/plan/cols.yaml\u001b[0m\n", + "Creating COLLABORATOR certificate key pair with following settings: CN=\u001b[31mtwo\u001b[0m, SAN=\u001b[31mDNS:two\u001b[0m\n", + " Moving COLLABORATOR certificate to: \u001b[32m/home/oamontoy/workspace/cert/col_two\u001b[0m\n", + "The CSR Hash for file \u001b[32mcol_two.csr\u001b[0m = \u001b[31m3e6ffe3d25d39bb6f3f1fb851eb8da60d4cbf4e0bee78ad0f7731cc0e6bb47433830523f2c39dc0ca7f0ce79b69cc6c3\u001b[0m\n", + " Signing COLLABORATOR certificate\n", + "\n", + "Registering \u001b[32mtwo\u001b[0m in \u001b[32m/home/oamontoy/.local/workspace/plan/cols.yaml\u001b[0m\n" + ] + } + ], "source": [ "#Setup default workspace, logging, etc.\n", "fx.init('torch_cnn_mnist')" @@ -90,7 +215,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -134,7 +259,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -162,7 +287,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -221,7 +346,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -339,7 +464,7 @@ " loss = output.loss\n", " loss.backward()\n", " torch.nn.utils.clip_grad_norm_(self.model.parameters(),1.0)\n", - " self.model.step()\n", + " self.model.zero_grad()\n", " losses.append(loss.detach().cpu().numpy())\n", " loss = np.mean(losses)\n", " if self.model.config.problem_type == \"regression\":\n", @@ -379,9 +504,54 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_159004/1723172838.py:2: FutureWarning: load_metric is deprecated and will be removed in the next major version of datasets. Use 'evaluate.load' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate\n", + " metric = load_metric('glue', \"mrpc\")\n", + "Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" + ] + }, + { + "data": { + "text/html": [ + "
[08:48:31] WARNING  tried to remove tensor: __opt_state_needed not present in the tensor dict                                                       utils.py:172\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m[08:48:31]\u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m tried to remove tensor: __opt_state_needed not present in the tensor dict \u001b]8;id=932122;file:///home/oamontoy/workspace/sec-openfl/openfl/utilities/utils.py\u001b\\\u001b[2mutils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=685149;file:///home/oamontoy/workspace/sec-openfl/openfl/utilities/utils.py#172\u001b\\\u001b[2m172\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" + ] + }, + { + "data": { + "text/html": [ + "
[08:48:34] WARNING  tried to remove tensor: __opt_state_needed not present in the tensor dict                                                       utils.py:172\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m[08:48:34]\u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m tried to remove tensor: __opt_state_needed not present in the tensor dict \u001b]8;id=38894;file:///home/oamontoy/workspace/sec-openfl/openfl/utilities/utils.py\u001b\\\u001b[2mutils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=682120;file:///home/oamontoy/workspace/sec-openfl/openfl/utilities/utils.py#172\u001b\\\u001b[2m172\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "num_collaborators = 2\n", "metric = load_metric('glue', \"mrpc\")\n", @@ -397,9 +567,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Original training data size: 3668\n", + "Original validation data size: 1725\n", + "\n", + "Collaborator 0's training data size: 1834\n", + "Collaborator 0's validation data size: 863\n", + "\n", + "Collaborator 1's training data size: 1834\n", + "Collaborator 1's validation data size: 862\n", + "\n" + ] + } + ], "source": [ "#Original TinyImageNet dataset\n", "print(f'Original training data size: {len(fl_data.train_set)}')\n", @@ -417,9 +603,329 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
           INFO     Updating aggregator.settings.rounds_to_train to 3...                                                                           native.py:102\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Updating aggregator.settings.rounds_to_train to \u001b[1;36m3\u001b[0m\u001b[33m...\u001b[0m \u001b]8;id=22181;file:///home/oamontoy/workspace/sec-openfl/openfl/native/native.py\u001b\\\u001b[2mnative.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=265323;file:///home/oamontoy/workspace/sec-openfl/openfl/native/native.py#102\u001b\\\u001b[2m102\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
           INFO     FL-Plan hash is 86b08340e96ba9e485169da1f860ea968811d1bf2e6867774fae4398426dd33c6ae56ca202002d393e3a4d91f946c1bc                 plan.py:235\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m FL-Plan hash is \u001b[34m86b08340e96ba9e485169da1f860ea968811d1bf2e6867774fae4398426dd33c6ae56ca202002d393e3a4d91f946c1bc\u001b[0m \u001b]8;id=965130;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py\u001b\\\u001b[2mplan.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=969631;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py#235\u001b\\\u001b[2m235\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
           INFO     Building 🡆 Object NoCompressionPipeline from openfl.pipelines Module.                                                            plan.py:171\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Building \u001b[31m🡆\u001b[0m Object \u001b[31mNoCompressionPipeline\u001b[0m from \u001b[31mopenfl.pipelines\u001b[0m Module. \u001b]8;id=418157;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py\u001b\\\u001b[2mplan.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=469081;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py#171\u001b\\\u001b[2m171\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
           INFO     Creating Initial Weights File    🠆 save/torch_cnn_mnist_init.pbuf                                                              native.py:277\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Creating Initial Weights File 🠆 save/torch_cnn_mnist_init.pbuf \u001b]8;id=949808;file:///home/oamontoy/workspace/sec-openfl/openfl/native/native.py\u001b\\\u001b[2mnative.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=806002;file:///home/oamontoy/workspace/sec-openfl/openfl/native/native.py#277\u001b\\\u001b[2m277\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
           INFO     Starting Experiment...                                                                                                         native.py:281\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Starting Experiment\u001b[33m...\u001b[0m \u001b]8;id=768304;file:///home/oamontoy/workspace/sec-openfl/openfl/native/native.py\u001b\\\u001b[2mnative.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=820559;file:///home/oamontoy/workspace/sec-openfl/openfl/native/native.py#281\u001b\\\u001b[2m281\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
           INFO     Building 🡆 Object RandomGroupedAssigner from openfl.component Module.                                                            plan.py:171\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Building \u001b[31m🡆\u001b[0m Object \u001b[31mRandomGroupedAssigner\u001b[0m from \u001b[31mopenfl.component\u001b[0m Module. \u001b]8;id=959697;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py\u001b\\\u001b[2mplan.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=588178;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py#171\u001b\\\u001b[2m171\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
           INFO     Building 🡆 Object CutoffTimeBasedStragglerHandling from openfl.component.straggler_handling_functions Module.                    plan.py:171\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Building \u001b[31m🡆\u001b[0m Object \u001b[31mCutoffTimeBasedStragglerHandling\u001b[0m from \u001b[31mopenfl.component.straggler_handling_functions\u001b[0m Module. \u001b]8;id=199471;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py\u001b\\\u001b[2mplan.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=897448;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py#171\u001b\\\u001b[2m171\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
           INFO     Importing 🡆 Object write_metric from src.mnist_utils Module.                                                                     plan.py:199\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Importing \u001b[31m🡆\u001b[0m Object \u001b[31mwrite_metric\u001b[0m from \u001b[31msrc.mnist_utils\u001b[0m Module. \u001b]8;id=125418;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py\u001b\\\u001b[2mplan.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=640295;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py#199\u001b\\\u001b[2m199\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/oamontoy/workspace/llama-env/lib/python3.8/site-packages/torchvision/io/image.py:13: UserWarning: Failed to load image Python extension: libtorch_cuda_cu.so: cannot open shared object file: No such file or directory\n", + " warn(f\"Failed to load image Python extension: {e}\")\n" + ] + }, + { + "data": { + "text/html": [ + "
[08:48:35] INFO     Building 🡆 Object Aggregator from openfl.component Module.                                                                       plan.py:171\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m[08:48:35]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Building \u001b[31m🡆\u001b[0m Object \u001b[31mAggregator\u001b[0m from \u001b[31mopenfl.component\u001b[0m Module. \u001b]8;id=299428;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py\u001b\\\u001b[2mplan.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=109248;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py#171\u001b\\\u001b[2m171\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
           INFO     Using custom log metric: <function write_metric at 0x7f5c68ad28b0>                                                          aggregator.py:97\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Using custom log metric: \u001b[1m<\u001b[0m\u001b[1;95mfunction\u001b[0m\u001b[39m write_metric at \u001b[0m\u001b[1;36m0x7f5c68ad28b0\u001b[0m\u001b[1m>\u001b[0m \u001b]8;id=655419;file:///home/oamontoy/workspace/sec-openfl/openfl/component/aggregator/aggregator.py\u001b\\\u001b[2maggregator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=161984;file:///home/oamontoy/workspace/sec-openfl/openfl/component/aggregator/aggregator.py#97\u001b\\\u001b[2m97\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
           INFO     Building 🡆 Object Collaborator from openfl.component Module.                                                                     plan.py:171\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Building \u001b[31m🡆\u001b[0m Object \u001b[31mCollaborator\u001b[0m from \u001b[31mopenfl.component\u001b[0m Module. \u001b]8;id=390053;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py\u001b\\\u001b[2mplan.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=853120;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py#171\u001b\\\u001b[2m171\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
           INFO     Building 🡆 Object Collaborator from openfl.component Module.                                                                     plan.py:171\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Building \u001b[31m🡆\u001b[0m Object \u001b[31mCollaborator\u001b[0m from \u001b[31mopenfl.component\u001b[0m Module. \u001b]8;id=612439;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py\u001b\\\u001b[2mplan.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=601521;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py#171\u001b\\\u001b[2m171\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
           INFO     Waiting for tasks...                                                                                                     collaborator.py:178\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Waiting for tasks\u001b[33m...\u001b[0m \u001b]8;id=806029;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py\u001b\\\u001b[2mcollaborator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=928412;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py#178\u001b\\\u001b[2m178\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
           INFO     Sending tasks to collaborator one for round 0                                                                              aggregator.py:329\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Sending tasks to collaborator one for round \u001b[1;36m0\u001b[0m \u001b]8;id=515202;file:///home/oamontoy/workspace/sec-openfl/openfl/component/aggregator/aggregator.py\u001b\\\u001b[2maggregator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=103453;file:///home/oamontoy/workspace/sec-openfl/openfl/component/aggregator/aggregator.py#329\u001b\\\u001b[2m329\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
           INFO     Received the following tasks: ['aggregated_model_validation', 'train', 'locally_tuned_model_validation']                 collaborator.py:168\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Received the following tasks: \u001b[1m[\u001b[0m\u001b[32m'aggregated_model_validation'\u001b[0m, \u001b[32m'train'\u001b[0m, \u001b[32m'locally_tuned_model_validation'\u001b[0m\u001b[1m]\u001b[0m \u001b]8;id=266582;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py\u001b\\\u001b[2mcollaborator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=901781;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py#168\u001b\\\u001b[2m168\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
[08:48:36] INFO     Using TaskRunner subclassing API                                                                                         collaborator.py:253\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m[08:48:36]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Using TaskRunner subclassing API \u001b]8;id=457102;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py\u001b\\\u001b[2mcollaborator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=223379;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py#253\u001b\\\u001b[2m253\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/oamontoy/workspace/sec-openfl/openfl/federated/task/runner_pt.py:284: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:206.)\n", + " new_state[k] = pt.from_numpy(tensor_dict.pop(k)).to(device)\n", + "You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n" + ] + }, + { + "data": { + "text/html": [ + "
[08:49:31] METRIC   Round 0, collaborator one is sending metric for task aggregated_model_validation: acc   0.673233                         collaborator.py:415\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m[08:49:31]\u001b[0m\u001b[2;36m \u001b[0mMETRIC Round \u001b[1;36m0\u001b[0m, collaborator one is sending metric for task aggregated_model_validation: acc \u001b[1;36m0.673233\u001b[0m \u001b]8;id=512008;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py\u001b\\\u001b[2mcollaborator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=179476;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py#415\u001b\\\u001b[2m415\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
           INFO     Collaborator one is sending task results for aggregated_model_validation, round 0                                          aggregator.py:520\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Collaborator one is sending task results for aggregated_model_validation, round \u001b[1;36m0\u001b[0m \u001b]8;id=921737;file:///home/oamontoy/workspace/sec-openfl/openfl/component/aggregator/aggregator.py\u001b\\\u001b[2maggregator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=943670;file:///home/oamontoy/workspace/sec-openfl/openfl/component/aggregator/aggregator.py#520\u001b\\\u001b[2m520\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
           METRIC   Round 0, collaborator validate_agg aggregated_model_validation result acc:      0.673233                                   aggregator.py:559\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0mMETRIC Round \u001b[1;36m0\u001b[0m, collaborator validate_agg aggregated_model_validation result acc: \u001b[1;36m0.673233\u001b[0m \u001b]8;id=778930;file:///home/oamontoy/workspace/sec-openfl/openfl/component/aggregator/aggregator.py\u001b\\\u001b[2maggregator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=605180;file:///home/oamontoy/workspace/sec-openfl/openfl/component/aggregator/aggregator.py#559\u001b\\\u001b[2m559\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
           INFO     Using TaskRunner subclassing API                                                                                         collaborator.py:253\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Using TaskRunner subclassing API \u001b]8;id=239383;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py\u001b\\\u001b[2mcollaborator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=814426;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py#253\u001b\\\u001b[2m253\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
           INFO     Run 0 epoch of 0 round                                                                                                      runner_pt.py:155\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Run \u001b[1;36m0\u001b[0m epoch of \u001b[1;36m0\u001b[0m round \u001b]8;id=118270;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/task/runner_pt.py\u001b\\\u001b[2mrunner_pt.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=848215;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/task/runner_pt.py#155\u001b\\\u001b[2m155\u001b[0m\u001b]8;;\u001b\\\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "ename": "AttributeError", + "evalue": "'RobertaForSequenceClassification' object has no attribute 'step'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m~/workspace/llama-env/lib/python3.8/site-packages/peft/peft_model.py:434\u001b[0m, in \u001b[0;36mPeftModel.__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 433\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 434\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39;49m()\u001b[39m.\u001b[39;49m\u001b[39m__getattr__\u001b[39;49m(name) \u001b[39m# defer to nn.Module's logic\u001b[39;00m\n\u001b[1;32m 435\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mAttributeError\u001b[39;00m:\n", + "File \u001b[0;32m~/workspace/llama-env/lib/python3.8/site-packages/torch/nn/modules/module.py:1695\u001b[0m, in \u001b[0;36m__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 1688\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mparameters\u001b[39m(\u001b[39mself\u001b[39m, recurse: \u001b[39mbool\u001b[39m \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Iterator[Parameter]:\n\u001b[1;32m 1689\u001b[0m \u001b[39m \u001b[39m\u001b[39mr\u001b[39m\u001b[39m\"\"\"Returns an iterator over module parameters.\u001b[39;00m\n\u001b[1;32m 1690\u001b[0m \n\u001b[1;32m 1691\u001b[0m \u001b[39m This is typically passed to an optimizer.\u001b[39;00m\n\u001b[1;32m 1692\u001b[0m \n\u001b[1;32m 1693\u001b[0m \u001b[39m Args:\u001b[39;00m\n\u001b[1;32m 1694\u001b[0m \u001b[39m recurse (bool): if True, then yields parameters of this module\u001b[39;00m\n\u001b[0;32m-> 1695\u001b[0m \u001b[39m and all submodules. Otherwise, yields only parameters that\u001b[39;00m\n\u001b[1;32m 1696\u001b[0m \u001b[39m are direct members of this module.\u001b[39;00m\n\u001b[1;32m 1697\u001b[0m \n\u001b[1;32m 1698\u001b[0m \u001b[39m Yields:\u001b[39;00m\n\u001b[1;32m 1699\u001b[0m \u001b[39m Parameter: module parameter\u001b[39;00m\n\u001b[1;32m 1700\u001b[0m \n\u001b[1;32m 1701\u001b[0m \u001b[39m Example::\u001b[39;00m\n\u001b[1;32m 1702\u001b[0m \n\u001b[1;32m 1703\u001b[0m \u001b[39m >>> # xdoctest: +SKIP(\"undefined vars\")\u001b[39;00m\n\u001b[1;32m 1704\u001b[0m \u001b[39m >>> for param in model.parameters():\u001b[39;00m\n\u001b[1;32m 1705\u001b[0m \u001b[39m >>> print(type(param), param.size())\u001b[39;00m\n\u001b[1;32m 1706\u001b[0m \u001b[39m (20L,)\u001b[39;00m\n\u001b[1;32m 1707\u001b[0m \u001b[39m (20L, 1L, 5L, 5L)\u001b[39;00m\n\u001b[1;32m 1708\u001b[0m \n\u001b[1;32m 1709\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m 1710\u001b[0m \u001b[39mfor\u001b[39;00m name, param \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnamed_parameters(recurse\u001b[39m=\u001b[39mrecurse):\n", + "\u001b[0;31mAttributeError\u001b[0m: 'PeftModelForSequenceClassification' object has no attribute 'step'", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m~/workspace/llama-env/lib/python3.8/site-packages/peft/tuners/lora.py:492\u001b[0m, in \u001b[0;36mLoraModel.__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 491\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 492\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39;49m()\u001b[39m.\u001b[39;49m\u001b[39m__getattr__\u001b[39;49m(name) \u001b[39m# defer to nn.Module's logic\u001b[39;00m\n\u001b[1;32m 493\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mAttributeError\u001b[39;00m:\n", + "File \u001b[0;32m~/workspace/llama-env/lib/python3.8/site-packages/torch/nn/modules/module.py:1695\u001b[0m, in \u001b[0;36m__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 1688\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mparameters\u001b[39m(\u001b[39mself\u001b[39m, recurse: \u001b[39mbool\u001b[39m \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Iterator[Parameter]:\n\u001b[1;32m 1689\u001b[0m \u001b[39m \u001b[39m\u001b[39mr\u001b[39m\u001b[39m\"\"\"Returns an iterator over module parameters.\u001b[39;00m\n\u001b[1;32m 1690\u001b[0m \n\u001b[1;32m 1691\u001b[0m \u001b[39m This is typically passed to an optimizer.\u001b[39;00m\n\u001b[1;32m 1692\u001b[0m \n\u001b[1;32m 1693\u001b[0m \u001b[39m Args:\u001b[39;00m\n\u001b[1;32m 1694\u001b[0m \u001b[39m recurse (bool): if True, then yields parameters of this module\u001b[39;00m\n\u001b[0;32m-> 1695\u001b[0m \u001b[39m and all submodules. Otherwise, yields only parameters that\u001b[39;00m\n\u001b[1;32m 1696\u001b[0m \u001b[39m are direct members of this module.\u001b[39;00m\n\u001b[1;32m 1697\u001b[0m \n\u001b[1;32m 1698\u001b[0m \u001b[39m Yields:\u001b[39;00m\n\u001b[1;32m 1699\u001b[0m \u001b[39m Parameter: module parameter\u001b[39;00m\n\u001b[1;32m 1700\u001b[0m \n\u001b[1;32m 1701\u001b[0m \u001b[39m Example::\u001b[39;00m\n\u001b[1;32m 1702\u001b[0m \n\u001b[1;32m 1703\u001b[0m \u001b[39m >>> # xdoctest: +SKIP(\"undefined vars\")\u001b[39;00m\n\u001b[1;32m 1704\u001b[0m \u001b[39m >>> for param in model.parameters():\u001b[39;00m\n\u001b[1;32m 1705\u001b[0m \u001b[39m >>> print(type(param), param.size())\u001b[39;00m\n\u001b[1;32m 1706\u001b[0m \u001b[39m (20L,)\u001b[39;00m\n\u001b[1;32m 1707\u001b[0m \u001b[39m (20L, 1L, 5L, 5L)\u001b[39;00m\n\u001b[1;32m 1708\u001b[0m \n\u001b[1;32m 1709\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m 1710\u001b[0m \u001b[39mfor\u001b[39;00m name, param \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnamed_parameters(recurse\u001b[39m=\u001b[39mrecurse):\n", + "\u001b[0;31mAttributeError\u001b[0m: 'LoraModel' object has no attribute 'step'", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m/home/oamontoy/workspace/sec-openfl/openfl-tutorials/Federated_PyTorch_LLM.ipynb Cell 18\u001b[0m line \u001b[0;36m2\n\u001b[1;32m 1\u001b[0m \u001b[39m#Run experiment, return trained FederatedModel\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m final_fl_model \u001b[39m=\u001b[39m fx\u001b[39m.\u001b[39;49mrun_experiment(collaborators,{\u001b[39m'\u001b[39;49m\u001b[39maggregator.settings.rounds_to_train\u001b[39;49m\u001b[39m'\u001b[39;49m:\u001b[39m3\u001b[39;49m})\n", + "File \u001b[0;32m~/workspace/sec-openfl/openfl/native/native.py:295\u001b[0m, in \u001b[0;36mrun_experiment\u001b[0;34m(collaborator_dict, override_config)\u001b[0m\n\u001b[1;32m 293\u001b[0m \u001b[39mfor\u001b[39;00m col \u001b[39min\u001b[39;00m plan\u001b[39m.\u001b[39mauthorized_cols:\n\u001b[1;32m 294\u001b[0m collaborator \u001b[39m=\u001b[39m collaborators[col]\n\u001b[0;32m--> 295\u001b[0m collaborator\u001b[39m.\u001b[39;49mrun_simulation()\n\u001b[1;32m 297\u001b[0m \u001b[39m# Set the weights for the final model\u001b[39;00m\n\u001b[1;32m 298\u001b[0m model\u001b[39m.\u001b[39mrebuild_model(\n\u001b[1;32m 299\u001b[0m rounds_to_train \u001b[39m-\u001b[39m \u001b[39m1\u001b[39m, aggregator\u001b[39m.\u001b[39mlast_tensor_dict, validation\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m)\n", + "File \u001b[0;32m~/workspace/sec-openfl/openfl/component/collaborator/collaborator.py:170\u001b[0m, in \u001b[0;36mCollaborator.run_simulation\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 168\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlogger\u001b[39m.\u001b[39minfo(\u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39mReceived the following tasks: \u001b[39m\u001b[39m{\u001b[39;00mtasks\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m)\n\u001b[1;32m 169\u001b[0m \u001b[39mfor\u001b[39;00m task \u001b[39min\u001b[39;00m tasks:\n\u001b[0;32m--> 170\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mdo_task(task, round_number)\n\u001b[1;32m 171\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlogger\u001b[39m.\u001b[39minfo(\u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39mAll tasks completed on \u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcollaborator_name\u001b[39m}\u001b[39;00m\u001b[39m \u001b[39m\u001b[39m'\u001b[39m\n\u001b[1;32m 172\u001b[0m \u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39mfor round \u001b[39m\u001b[39m{\u001b[39;00mround_number\u001b[39m}\u001b[39;00m\u001b[39m...\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[1;32m 173\u001b[0m \u001b[39mbreak\u001b[39;00m\n", + "File \u001b[0;32m~/workspace/sec-openfl/openfl/component/collaborator/collaborator.py:255\u001b[0m, in \u001b[0;36mCollaborator.do_task\u001b[0;34m(self, task, round_number)\u001b[0m\n\u001b[1;32m 252\u001b[0m func \u001b[39m=\u001b[39m \u001b[39mgetattr\u001b[39m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtask_runner, func_name)\n\u001b[1;32m 253\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlogger\u001b[39m.\u001b[39minfo(\u001b[39m'\u001b[39m\u001b[39mUsing TaskRunner subclassing API\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[0;32m--> 255\u001b[0m global_output_tensor_dict, local_output_tensor_dict \u001b[39m=\u001b[39m func(\n\u001b[1;32m 256\u001b[0m col_name\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mcollaborator_name,\n\u001b[1;32m 257\u001b[0m round_num\u001b[39m=\u001b[39;49mround_number,\n\u001b[1;32m 258\u001b[0m input_tensor_dict\u001b[39m=\u001b[39;49minput_tensor_dict,\n\u001b[1;32m 259\u001b[0m \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 261\u001b[0m \u001b[39m# Save global and local output_tensor_dicts to TensorDB\u001b[39;00m\n\u001b[1;32m 262\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtensor_db\u001b[39m.\u001b[39mcache_tensor(global_output_tensor_dict)\n", + "File \u001b[0;32m~/workspace/sec-openfl/openfl/federated/task/runner_pt.py:159\u001b[0m, in \u001b[0;36mPyTorchTaskRunner.train_batches\u001b[0;34m(self, col_name, round_num, input_tensor_dict, use_tqdm, epochs, **kwargs)\u001b[0m\n\u001b[1;32m 157\u001b[0m \u001b[39mif\u001b[39;00m use_tqdm:\n\u001b[1;32m 158\u001b[0m loader \u001b[39m=\u001b[39m tqdm\u001b[39m.\u001b[39mtqdm(loader, desc\u001b[39m=\u001b[39m\u001b[39m'\u001b[39m\u001b[39mtrain epoch\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[0;32m--> 159\u001b[0m metric \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mtrain_epoch(loader)\n\u001b[1;32m 160\u001b[0m \u001b[39m# Output metric tensors (scalar)\u001b[39;00m\n\u001b[1;32m 161\u001b[0m origin \u001b[39m=\u001b[39m col_name\n", + "\u001b[1;32m/home/oamontoy/workspace/sec-openfl/openfl-tutorials/Federated_PyTorch_LLM.ipynb Cell 18\u001b[0m line \u001b[0;36m1\n\u001b[1;32m 113\u001b[0m loss\u001b[39m.\u001b[39mbackward()\n\u001b[1;32m 114\u001b[0m torch\u001b[39m.\u001b[39mnn\u001b[39m.\u001b[39mutils\u001b[39m.\u001b[39mclip_grad_norm_(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mmodel\u001b[39m.\u001b[39mparameters(),\u001b[39m1.0\u001b[39m)\n\u001b[0;32m--> 115\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mmodel\u001b[39m.\u001b[39;49mstep()\n\u001b[1;32m 116\u001b[0m losses\u001b[39m.\u001b[39mappend(loss\u001b[39m.\u001b[39mdetach()\u001b[39m.\u001b[39mcpu()\u001b[39m.\u001b[39mnumpy())\n\u001b[1;32m 117\u001b[0m loss \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mmean(losses)\n", + "File \u001b[0;32m~/workspace/llama-env/lib/python3.8/site-packages/peft/peft_model.py:436\u001b[0m, in \u001b[0;36mPeftModel.__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 434\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39m\u001b[39m__getattr__\u001b[39m(name) \u001b[39m# defer to nn.Module's logic\u001b[39;00m\n\u001b[1;32m 435\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mAttributeError\u001b[39;00m:\n\u001b[0;32m--> 436\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mgetattr\u001b[39;49m(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mbase_model, name)\n", + "File \u001b[0;32m~/workspace/llama-env/lib/python3.8/site-packages/peft/tuners/lora.py:494\u001b[0m, in \u001b[0;36mLoraModel.__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 492\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39m\u001b[39m__getattr__\u001b[39m(name) \u001b[39m# defer to nn.Module's logic\u001b[39;00m\n\u001b[1;32m 493\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mAttributeError\u001b[39;00m:\n\u001b[0;32m--> 494\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mgetattr\u001b[39;49m(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mmodel, name)\n", + "File \u001b[0;32m~/workspace/llama-env/lib/python3.8/site-packages/torch/nn/modules/module.py:1695\u001b[0m, in \u001b[0;36m__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 1688\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mparameters\u001b[39m(\u001b[39mself\u001b[39m, recurse: \u001b[39mbool\u001b[39m \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Iterator[Parameter]:\n\u001b[1;32m 1689\u001b[0m \u001b[39m \u001b[39m\u001b[39mr\u001b[39m\u001b[39m\"\"\"Returns an iterator over module parameters.\u001b[39;00m\n\u001b[1;32m 1690\u001b[0m \n\u001b[1;32m 1691\u001b[0m \u001b[39m This is typically passed to an optimizer.\u001b[39;00m\n\u001b[1;32m 1692\u001b[0m \n\u001b[1;32m 1693\u001b[0m \u001b[39m Args:\u001b[39;00m\n\u001b[1;32m 1694\u001b[0m \u001b[39m recurse (bool): if True, then yields parameters of this module\u001b[39;00m\n\u001b[0;32m-> 1695\u001b[0m \u001b[39m and all submodules. Otherwise, yields only parameters that\u001b[39;00m\n\u001b[1;32m 1696\u001b[0m \u001b[39m are direct members of this module.\u001b[39;00m\n\u001b[1;32m 1697\u001b[0m \n\u001b[1;32m 1698\u001b[0m \u001b[39m Yields:\u001b[39;00m\n\u001b[1;32m 1699\u001b[0m \u001b[39m Parameter: module parameter\u001b[39;00m\n\u001b[1;32m 1700\u001b[0m \n\u001b[1;32m 1701\u001b[0m \u001b[39m Example::\u001b[39;00m\n\u001b[1;32m 1702\u001b[0m \n\u001b[1;32m 1703\u001b[0m \u001b[39m >>> # xdoctest: +SKIP(\"undefined vars\")\u001b[39;00m\n\u001b[1;32m 1704\u001b[0m \u001b[39m >>> for param in model.parameters():\u001b[39;00m\n\u001b[1;32m 1705\u001b[0m \u001b[39m >>> print(type(param), param.size())\u001b[39;00m\n\u001b[1;32m 1706\u001b[0m \u001b[39m (20L,)\u001b[39;00m\n\u001b[1;32m 1707\u001b[0m \u001b[39m (20L, 1L, 5L, 5L)\u001b[39;00m\n\u001b[1;32m 1708\u001b[0m \n\u001b[1;32m 1709\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m 1710\u001b[0m \u001b[39mfor\u001b[39;00m name, param \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnamed_parameters(recurse\u001b[39m=\u001b[39mrecurse):\n\u001b[1;32m 1711\u001b[0m \u001b[39myield\u001b[39;00m param\n", + "\u001b[0;31mAttributeError\u001b[0m: 'RobertaForSequenceClassification' object has no attribute 'step'" + ] + } + ], "source": [ "#Run experiment, return trained FederatedModel\n", "final_fl_model = fx.run_experiment(collaborators,{'aggregator.settings.rounds_to_train':3})" From 3cacb715548646461bd9d19bb3c814a4f098af0d Mon Sep 17 00:00:00 2001 From: porteratzo Date: Tue, 17 Oct 2023 10:57:43 -0700 Subject: [PATCH 3/7] fixes --- openfl-tutorials/Federated_PyTorch_LLM.ipynb | 680 +++---------------- 1 file changed, 105 insertions(+), 575 deletions(-) diff --git a/openfl-tutorials/Federated_PyTorch_LLM.ipynb b/openfl-tutorials/Federated_PyTorch_LLM.ipynb index ca41b3c13d..78b9978604 100644 --- a/openfl-tutorials/Federated_PyTorch_LLM.ipynb +++ b/openfl-tutorials/Federated_PyTorch_LLM.ipynb @@ -30,7 +30,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -39,9 +39,8 @@ "import openfl.native as fx\n", "import torch\n", "import torch as pt\n", - "from accelerate import Accelerator\n", "from datasets import Dataset, load_dataset, load_metric\n", - "from openfl.federated import PyTorchTaskRunner, TaskRunner\n", + "from openfl.federated import PyTorchTaskRunner\n", "from openfl.federated.task.runner_pt import change_tags\n", "from openfl.utilities import Metric, TensorKey\n", "from openfl.utilities.data_splitters import EqualNumPyDataSplitter\n", @@ -52,9 +51,9 @@ "from torch.utils.data import DataLoader\n", "from tqdm import tqdm\n", "import torch.nn as nn\n", - "\n", - "from transformers import (AutoConfig, AutoModelForSequenceClassification,\n", - " AutoTokenizer, DataCollatorWithPadding)" + "from transformers.trainer_pt_utils import get_parameter_names\n", + "from transformers import (AutoModelForSequenceClassification,\n", + " AutoTokenizer, DataCollatorWithPadding, get_scheduler)" ] }, { @@ -66,134 +65,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Creating Workspace Directories\n", - "Creating Workspace Templates\n", - "Collecting torch==1.13.1 (from -r /home/oamontoy/.local/workspace/requirements.txt (line 1))\n", - " Using cached torch-1.13.1-cp38-cp38-manylinux1_x86_64.whl (887.4 MB)\n", - "Requirement already satisfied: torchvision==0.14.1 in ./llama-env/lib/python3.8/site-packages (from -r /home/oamontoy/.local/workspace/requirements.txt (line 2)) (0.14.1)\n", - "Requirement already satisfied: tensorboard in ./llama-env/lib/python3.8/site-packages (from -r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (2.14.0)\n", - "Requirement already satisfied: wheel>=0.38.0 in ./llama-env/lib/python3.8/site-packages (from -r /home/oamontoy/.local/workspace/requirements.txt (line 4)) (0.41.2)\n", - "Requirement already satisfied: typing-extensions in ./llama-env/lib/python3.8/site-packages (from torch==1.13.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 1)) (4.8.0)\n", - "Requirement already satisfied: nvidia-cuda-runtime-cu11==11.7.99 in ./llama-env/lib/python3.8/site-packages (from torch==1.13.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 1)) (11.7.99)\n", - "Requirement already satisfied: nvidia-cudnn-cu11==8.5.0.96 in ./llama-env/lib/python3.8/site-packages (from torch==1.13.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 1)) (8.5.0.96)\n", - "Requirement already satisfied: nvidia-cublas-cu11==11.10.3.66 in ./llama-env/lib/python3.8/site-packages (from torch==1.13.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 1)) (11.10.3.66)\n", - "Requirement already satisfied: nvidia-cuda-nvrtc-cu11==11.7.99 in ./llama-env/lib/python3.8/site-packages (from torch==1.13.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 1)) (11.7.99)\n", - "Requirement already satisfied: numpy in ./llama-env/lib/python3.8/site-packages (from torchvision==0.14.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 2)) (1.24.4)\n", - "Requirement already satisfied: requests in ./llama-env/lib/python3.8/site-packages (from torchvision==0.14.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 2)) (2.31.0)\n", - "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in ./llama-env/lib/python3.8/site-packages (from torchvision==0.14.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 2)) (10.0.1)\n", - "Requirement already satisfied: setuptools in ./llama-env/lib/python3.8/site-packages (from nvidia-cublas-cu11==11.10.3.66->torch==1.13.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 1)) (68.2.2)\n", - "Requirement already satisfied: absl-py>=0.4 in ./llama-env/lib/python3.8/site-packages (from tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (2.0.0)\n", - "Requirement already satisfied: grpcio>=1.48.2 in ./llama-env/lib/python3.8/site-packages (from tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (1.48.2)\n", - "Requirement already satisfied: google-auth<3,>=1.6.3 in ./llama-env/lib/python3.8/site-packages (from tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (2.23.0)\n", - "Requirement already satisfied: google-auth-oauthlib<1.1,>=0.5 in ./llama-env/lib/python3.8/site-packages (from tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (1.0.0)\n", - "Requirement already satisfied: markdown>=2.6.8 in ./llama-env/lib/python3.8/site-packages (from tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (3.4.4)\n", - "Requirement already satisfied: protobuf>=3.19.6 in ./llama-env/lib/python3.8/site-packages (from tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (3.19.6)\n", - "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in ./llama-env/lib/python3.8/site-packages (from tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (0.7.1)\n", - "Requirement already satisfied: werkzeug>=1.0.1 in ./llama-env/lib/python3.8/site-packages (from tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (2.3.7)\n", - "Requirement already satisfied: cachetools<6.0,>=2.0.0 in ./llama-env/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (5.3.1)\n", - "Requirement already satisfied: pyasn1-modules>=0.2.1 in ./llama-env/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (0.3.0)\n", - "Requirement already satisfied: rsa<5,>=3.1.4 in ./llama-env/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (4.9)\n", - "Requirement already satisfied: urllib3<2.0 in ./llama-env/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (1.26.16)\n", - "Requirement already satisfied: requests-oauthlib>=0.7.0 in ./llama-env/lib/python3.8/site-packages (from google-auth-oauthlib<1.1,>=0.5->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (1.3.1)\n", - "Requirement already satisfied: six>=1.5.2 in ./llama-env/lib/python3.8/site-packages (from grpcio>=1.48.2->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (1.16.0)\n", - "Requirement already satisfied: importlib-metadata>=4.4 in ./llama-env/lib/python3.8/site-packages (from markdown>=2.6.8->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (6.8.0)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in ./llama-env/lib/python3.8/site-packages (from requests->torchvision==0.14.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 2)) (3.2.0)\n", - "Requirement already satisfied: idna<4,>=2.5 in ./llama-env/lib/python3.8/site-packages (from requests->torchvision==0.14.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 2)) (3.4)\n", - "Requirement already satisfied: certifi>=2017.4.17 in ./llama-env/lib/python3.8/site-packages (from requests->torchvision==0.14.1->-r /home/oamontoy/.local/workspace/requirements.txt (line 2)) (2023.7.22)\n", - "Requirement already satisfied: MarkupSafe>=2.1.1 in ./llama-env/lib/python3.8/site-packages (from werkzeug>=1.0.1->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (2.1.3)\n", - "Requirement already satisfied: zipp>=0.5 in ./llama-env/lib/python3.8/site-packages (from importlib-metadata>=4.4->markdown>=2.6.8->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (3.17.0)\n", - "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in ./llama-env/lib/python3.8/site-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (0.5.0)\n", - "Requirement already satisfied: oauthlib>=3.0.0 in ./llama-env/lib/python3.8/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<1.1,>=0.5->tensorboard->-r /home/oamontoy/.local/workspace/requirements.txt (line 3)) (3.2.2)\n", - "Installing collected packages: torch\n", - " Attempting uninstall: torch\n", - " Found existing installation: torch 2.1.0\n", - " Uninstalling torch-2.1.0:\n", - " Successfully uninstalled torch-2.1.0\n", - "Successfully installed torch-1.13.1\n", - "Successfully installed packages from /home/oamontoy/.local/workspace/requirements.txt.\n", - "\n", - "New workspace directory structure:\n", - "workspace\n", - "├── logs\n", - "│ └── cnn_mnist\n", - "│ ├── events.out.tfevents.1695850586.M50CYP2SBSTD.111429.0\n", - "│ ├── events.out.tfevents.1695942084.M50CYP2SBSTD.4924.0\n", - "│ ├── events.out.tfevents.1695849809.M50CYP2SBSTD.107313.0\n", - "│ ├── events.out.tfevents.1695850472.M50CYP2SBSTD.110437.0\n", - "│ ├── events.out.tfevents.1695942744.M50CYP2SBSTD.15635.0\n", - "│ ├── events.out.tfevents.1696008244.M50CYP2SBSTD.98097.0\n", - "│ ├── events.out.tfevents.1695850981.M50CYP2SBSTD.114740.0\n", - "│ ├── events.out.tfevents.1695939101.M50CYP2SBSTD.143673.0\n", - "│ ├── events.out.tfevents.1695850850.M50CYP2SBSTD.113094.0\n", - "│ ├── events.out.tfevents.1695850404.M50CYP2SBSTD.109391.0\n", - "│ ├── events.out.tfevents.1695942232.M50CYP2SBSTD.7126.0\n", - "│ └── events.out.tfevents.1695849986.M50CYP2SBSTD.107937.0\n", - "├── .workspace\n", - "├── final_model.pth\n", - "├── plan\n", - "│ ├── plan.yaml\n", - "│ ├── defaults\n", - "│ ├── data.yaml\n", - "│ └── cols.yaml\n", - "├── agg_to_col_two_signed_cert.zip\n", - "├── requirements.txt\n", - "├── data\n", - "├── save\n", - "│ ├── torch_cnn_mnist_best.pbuf\n", - "│ ├── torch_cnn_mnist_last.pbuf\n", - "│ └── torch_cnn_mnist_init.pbuf\n", - "├── agg_to_col_one_signed_cert.zip\n", - "├── src\n", - "│ ├── pt_cnn.py\n", - "│ ├── mnist_utils.py\n", - "│ ├── __pycache__\n", - "│ │ ├── __init__.cpython-38.pyc\n", - "│ │ └── mnist_utils.cpython-38.pyc\n", - "│ ├── ptmnist_inmemory.py\n", - "│ └── __init__.py\n", - "└── cert\n", - "\n", - "8 directories, 30 files\n", - "Setting Up Certificate Authority...\n", - "\n", - "1. Create Root CA\n", - "1.1 Create Directories\n", - "1.2 Create Database\n", - "1.3 Create CA Request and Certificate\n", - "2. Create Signing Certificate\n", - "2.1 Create Directories\n", - "2.2 Create Database\n", - "2.3 Create Signing Certificate CSR\n", - "2.4 Sign Signing Certificate CSR\n", - "3 Create Certificate Chain\n", - "\n", - "Done.\n", - "Creating AGGREGATOR certificate key pair with following settings: CN=\u001b[31mm50cyp2sbstd\u001b[0m, SAN=\u001b[31mDNS:m50cyp2sbstd\u001b[0m\n", - " Writing AGGREGATOR certificate key pair to: \u001b[32m/home/oamontoy/workspace/cert/server\u001b[0m\n", - "The CSR Hash for file \u001b[32mserver/agg_m50cyp2sbstd.csr\u001b[0m = \u001b[31md49a1328c9e8ccfb65a4d583018704fd9d24b3301bb800ceb9f50b591937e1a5f8f419238b5e4c24af732693d37ce088\u001b[0m\n", - " Signing AGGREGATOR certificate\n", - "Creating COLLABORATOR certificate key pair with following settings: CN=\u001b[31mone\u001b[0m, SAN=\u001b[31mDNS:one\u001b[0m\n", - " Moving COLLABORATOR certificate to: \u001b[32m/home/oamontoy/workspace/cert/col_one\u001b[0m\n", - "The CSR Hash for file \u001b[32mcol_one.csr\u001b[0m = \u001b[31m0caea6371d4b13f51be51507794c4c18e0a9cb408f286f2f81a4b179380b15b3215e94d739ec952065fbc7eb3b2edbba\u001b[0m\n", - " Signing COLLABORATOR certificate\n", - "\n", - "Registering \u001b[32mone\u001b[0m in \u001b[32m/home/oamontoy/.local/workspace/plan/cols.yaml\u001b[0m\n", - "Creating COLLABORATOR certificate key pair with following settings: CN=\u001b[31mtwo\u001b[0m, SAN=\u001b[31mDNS:two\u001b[0m\n", - " Moving COLLABORATOR certificate to: \u001b[32m/home/oamontoy/workspace/cert/col_two\u001b[0m\n", - "The CSR Hash for file \u001b[32mcol_two.csr\u001b[0m = \u001b[31m3e6ffe3d25d39bb6f3f1fb851eb8da60d4cbf4e0bee78ad0f7731cc0e6bb47433830523f2c39dc0ca7f0ce79b69cc6c3\u001b[0m\n", - " Signing COLLABORATOR certificate\n", - "\n", - "Registering \u001b[32mtwo\u001b[0m in \u001b[32m/home/oamontoy/.local/workspace/plan/cols.yaml\u001b[0m\n" - ] - } - ], + "outputs": [], "source": [ "#Setup default workspace, logging, etc.\n", "fx.init('torch_cnn_mnist')" @@ -215,7 +89,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -242,7 +116,7 @@ " data_collator = DataCollatorWithPadding(tokenizer=tokenizer, padding=\"longest\")\n", " return data_collator, tokenized_datasets\n", "\n", - "base_model_name = \"roberta-large\"\n", + "base_model_name = \"roberta-base\"\n", "padding_side = \"right\"\n", "tokenizer = AutoTokenizer.from_pretrained(base_model_name, padding_side=padding_side)\n", "if getattr(tokenizer, \"pad_token_id\") is None:\n", @@ -259,7 +133,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -287,13 +161,13 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class GlueMrpcFederatedDataset(DataLoader):\n", " def __init__(self, train_set, valid_set, batch_size, data_collator=None):\n", - " self.data_splitter = EqualNumPyDataSplitter()\n", + " self.data_splitter = EqualNumPyDataSplitter(shuffle=True)\n", " if isinstance(train_set,Dataset):\n", " self.train_set = GlueMrpc.from_dict(train_set.to_dict())\n", " else:\n", @@ -326,7 +200,7 @@ " return DataLoader(self.train_set, batch_size=self.batch_size, collate_fn=data_collator)\n", " \n", " def get_valid_loader(self):\n", - " return DataLoader(self.valid_set, collate_fn=data_collator)\n", + " return DataLoader(self.valid_set, batch_size=self.batch_size, collate_fn=data_collator)\n", " \n", " def get_train_data_size(self):\n", " return len(self.train_set)\n", @@ -334,7 +208,8 @@ " def get_valid_data_size(self):\n", " return len(self.valid_set)\n", " \n", - "fl_data = GlueMrpcFederatedDataset(train_set, valid_set, batch_size=32)" + "fl_data = GlueMrpcFederatedDataset(train_set, valid_set, batch_size=32)\n", + "metric = load_metric('glue', \"mrpc\")" ] }, { @@ -346,52 +221,82 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class LLMTaskRunner(PyTorchTaskRunner):\n", - " def __init__(self, base_model_name, data_loader, device=None, metric=None, **kwargs):\n", - " kwargs['data_loader'] = data_loader\n", + " def __init__(\n", + " self, base_model_name, data_loader, device=None, metric=None, **kwargs\n", + " ):\n", + " kwargs[\"data_loader\"] = data_loader\n", " super().__init__(device, **kwargs)\n", " self.base_model_name = base_model_name\n", " self.metric = metric\n", " self._init_model()\n", " self._init_optimizer()\n", - " \n", + " self.save_models = []\n", + "\n", " def _init_model(self):\n", " model = AutoModelForSequenceClassification.from_pretrained(\n", - " self.base_model_name, return_dict=True)\n", - " peft_config = LoraConfig(task_type=TaskType.SEQ_CLS, inference_mode=False, r=16, lora_alpha=16, lora_dropout=0.1, bias=\"all\")\n", + " self.base_model_name, return_dict=True\n", + " )\n", + " peft_config = LoraConfig(\n", + " task_type=TaskType.SEQ_CLS,\n", + " inference_mode=False,\n", + " r=16,\n", + " lora_alpha=16,\n", + " lora_dropout=0.1,\n", + " bias=\"lora_only\",\n", + " )\n", " self.model = get_peft_model(model, peft_config)\n", - " \n", + "\n", " def _init_optimizer(self):\n", - " no_decay = [\"bias\", \"LayerNorm.weight\"]\n", + " ALL_LAYERNORM_LAYERS = [nn.LayerNorm]\n", + " decay_parameters = get_parameter_names(self.model, ALL_LAYERNORM_LAYERS)\n", + " decay_parameters = [name for name in decay_parameters if \"bias\" not in name]\n", + "\n", " optimizer_grouped_parameters = [\n", - " {\n", - " \"params\": [p for n, p in self.model.named_parameters() if not any(nd in n for nd in no_decay)],\n", - " \"weight_decay\": 0.01,\n", - " },\n", - " {\n", - " \"params\": [p for n, p in self.model.named_parameters() if any(nd in n for nd in no_decay)],\n", - " \"weight_decay\": 0.0,\n", - " },\n", - " ]\n", - " self.optimizer = AdamW(optimizer_grouped_parameters, lr=0.01)\n", - " \n", + " {\n", + " \"params\": [\n", + " p\n", + " for n, p in self.model.named_parameters()\n", + " if (n in decay_parameters and p.requires_grad)\n", + " ],\n", + " \"weight_decay\": 0.01,\n", + " },\n", + " {\n", + " \"params\": [\n", + " p\n", + " for n, p in self.model.named_parameters()\n", + " if (n not in decay_parameters and p.requires_grad)\n", + " ],\n", + " \"weight_decay\": 0.0,\n", + " },\n", + " ]\n", + " self.optimizer = AdamW(optimizer_grouped_parameters, lr=0.001)\n", + " self.lr_scheduler = get_scheduler(\n", + " name=\"linear\",\n", + " optimizer=self.optimizer,\n", + " num_warmup_steps=0,\n", + " num_training_steps=len(self.data_loader.train_set) * 5,\n", + " )\n", + "\n", " self.training_round_completed = False\n", " self.initialize_tensorkeys_for_functions()\n", - " \n", + "\n", + " def train(self):\n", + " return self.model.train()\n", + "\n", " def state_dict(self):\n", " return get_peft_model_state_dict(self.model)\n", - " \n", + "\n", " def load_state_dict(self, state_dict: Mapping[str, Any], strict: bool = True):\n", - " return set_peft_model_state_dict(\n", - " self.model, state_dict\n", - " )\n", - " \n", - " def validate(self, col_name, round_num, input_tensor_dict,\n", - " use_tqdm=False, **kwargs):\n", + " return set_peft_model_state_dict(self.model, state_dict)\n", + "\n", + " def validate(\n", + " self, col_name, round_num, input_tensor_dict, use_tqdm=False, **kwargs\n", + " ):\n", " \"\"\"Validate.\n", "\n", " Run validation of the model on the local data.\n", @@ -407,40 +312,42 @@ " local_output_dict: Tensors to maintain in the local TensorDB\n", "\n", " \"\"\"\n", + " self.save_models.append(input_tensor_dict.copy())\n", " self.rebuild_model(round_num, input_tensor_dict, validation=True)\n", " self.model.eval()\n", + " \n", + "\n", " self.model.to(self.device)\n", " val_score = 0\n", " total_samples = 0\n", "\n", " loader = self.data_loader.get_valid_loader()\n", " if use_tqdm:\n", - " loader = tqdm(loader, desc='validate')\n", + " loader = tqdm(loader, desc=\"validate\")\n", "\n", " with pt.no_grad():\n", " for sample in loader:\n", - " samples = sample['input_ids'].shape[0]\n", + " samples = sample[\"input_ids\"].shape[0]\n", " total_samples += samples\n", " output = self.model(**sample)\n", " # get the index of the max log-probability\n", " logits = output.logits\n", " predictions = torch.argmax(logits, dim=-1)\n", - " metric.add_batch(predictions=predictions, references=sample['labels'])\n", - " val_score = metric.compute()['accuracy']\n", + " metric.add_batch(predictions=predictions, references=sample[\"labels\"])\n", + " val_score = metric.compute()[\"accuracy\"]\n", "\n", " origin = col_name\n", - " suffix = 'validate'\n", - " if kwargs['apply'] == 'local':\n", - " suffix += '_local'\n", + " suffix = \"validate\"\n", + " if kwargs[\"apply\"] == \"local\":\n", + " suffix += \"_local\"\n", " else:\n", - " suffix += '_agg'\n", - " tags = ('metric',)\n", + " suffix += \"_agg\"\n", + " tags = (\"metric\",)\n", " tags = change_tags(tags, add_field=suffix)\n", " # TODO figure out a better way to pass in metric for this pytorch\n", " # validate function\n", " output_tensor_dict = {\n", - " TensorKey('acc', origin, round_num, True, tags):\n", - " np.array(val_score)\n", + " TensorKey(\"acc\", origin, round_num, True, tags): np.array(val_score)\n", " }\n", "\n", " # Empty list represents metrics that should only be stored locally\n", @@ -459,12 +366,13 @@ " \"\"\"\n", " losses = []\n", " for sample in batch_generator:\n", - " self.optimizer.zero_grad()\n", + " self.model.zero_grad()\n", " output = self.model(**sample)\n", " loss = output.loss\n", " loss.backward()\n", - " torch.nn.utils.clip_grad_norm_(self.model.parameters(),1.0)\n", - " self.model.zero_grad()\n", + " torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)\n", + " self.optimizer.step()\n", + " self.lr_scheduler.step()\n", " losses.append(loss.detach().cpu().numpy())\n", " loss = np.mean(losses)\n", " if self.model.config.problem_type == \"regression\":\n", @@ -474,10 +382,14 @@ " elif self.model.config.problem_type == \"multi_label_classification\":\n", " loss_fct = BCEWithLogitsLoss()\n", " return Metric(name=loss_fct._get_name(), value=np.array(loss))\n", - " \n", - " \n", - " def save_native(self, filepath, model_state_dict_key='model_state_dict',\n", - " optimizer_state_dict_key='optimizer_state_dict', **kwargs):\n", + "\n", + " def save_native(\n", + " self,\n", + " filepath,\n", + " model_state_dict_key=\"model_state_dict\",\n", + " optimizer_state_dict_key=\"optimizer_state_dict\",\n", + " **kwargs,\n", + " ):\n", " \"\"\"\n", " Save model and optimizer states in a picked file specified by the \\\n", " filepath. model_/optimizer_state_dicts are stored in the keys provided. \\\n", @@ -497,64 +409,18 @@ " \"\"\"\n", " pickle_dict = {\n", " model_state_dict_key: get_peft_model_state_dict(self.model),\n", - " optimizer_state_dict_key: self.optimizer.state_dict()\n", + " optimizer_state_dict_key: self.optimizer.state_dict(),\n", " }\n", " pt.save(pickle_dict, filepath)" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_159004/1723172838.py:2: FutureWarning: load_metric is deprecated and will be removed in the next major version of datasets. Use 'evaluate.load' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate\n", - " metric = load_metric('glue', \"mrpc\")\n", - "Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']\n", - "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" - ] - }, - { - "data": { - "text/html": [ - "
[08:48:31] WARNING  tried to remove tensor: __opt_state_needed not present in the tensor dict                                                       utils.py:172\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[08:48:31]\u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m tried to remove tensor: __opt_state_needed not present in the tensor dict \u001b]8;id=932122;file:///home/oamontoy/workspace/sec-openfl/openfl/utilities/utils.py\u001b\\\u001b[2mutils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=685149;file:///home/oamontoy/workspace/sec-openfl/openfl/utilities/utils.py#172\u001b\\\u001b[2m172\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']\n", - "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" - ] - }, - { - "data": { - "text/html": [ - "
[08:48:34] WARNING  tried to remove tensor: __opt_state_needed not present in the tensor dict                                                       utils.py:172\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[08:48:34]\u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m tried to remove tensor: __opt_state_needed not present in the tensor dict \u001b]8;id=38894;file:///home/oamontoy/workspace/sec-openfl/openfl/utilities/utils.py\u001b\\\u001b[2mutils.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=682120;file:///home/oamontoy/workspace/sec-openfl/openfl/utilities/utils.py#172\u001b\\\u001b[2m172\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "num_collaborators = 2\n", - "metric = load_metric('glue', \"mrpc\")\n", "collaborator_models = [\n", " LLMTaskRunner(\n", " base_model_name,\n", @@ -567,25 +433,9 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Original training data size: 3668\n", - "Original validation data size: 1725\n", - "\n", - "Collaborator 0's training data size: 1834\n", - "Collaborator 0's validation data size: 863\n", - "\n", - "Collaborator 1's training data size: 1834\n", - "Collaborator 1's validation data size: 862\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "#Original TinyImageNet dataset\n", "print(f'Original training data size: {len(fl_data.train_set)}')\n", @@ -603,332 +453,12 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
           INFO     Updating aggregator.settings.rounds_to_train to 3...                                                                           native.py:102\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Updating aggregator.settings.rounds_to_train to \u001b[1;36m3\u001b[0m\u001b[33m...\u001b[0m \u001b]8;id=22181;file:///home/oamontoy/workspace/sec-openfl/openfl/native/native.py\u001b\\\u001b[2mnative.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=265323;file:///home/oamontoy/workspace/sec-openfl/openfl/native/native.py#102\u001b\\\u001b[2m102\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
           INFO     FL-Plan hash is 86b08340e96ba9e485169da1f860ea968811d1bf2e6867774fae4398426dd33c6ae56ca202002d393e3a4d91f946c1bc                 plan.py:235\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m FL-Plan hash is \u001b[34m86b08340e96ba9e485169da1f860ea968811d1bf2e6867774fae4398426dd33c6ae56ca202002d393e3a4d91f946c1bc\u001b[0m \u001b]8;id=965130;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py\u001b\\\u001b[2mplan.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=969631;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py#235\u001b\\\u001b[2m235\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
           INFO     Building 🡆 Object NoCompressionPipeline from openfl.pipelines Module.                                                            plan.py:171\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Building \u001b[31m🡆\u001b[0m Object \u001b[31mNoCompressionPipeline\u001b[0m from \u001b[31mopenfl.pipelines\u001b[0m Module. \u001b]8;id=418157;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py\u001b\\\u001b[2mplan.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=469081;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py#171\u001b\\\u001b[2m171\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
           INFO     Creating Initial Weights File    🠆 save/torch_cnn_mnist_init.pbuf                                                              native.py:277\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Creating Initial Weights File 🠆 save/torch_cnn_mnist_init.pbuf \u001b]8;id=949808;file:///home/oamontoy/workspace/sec-openfl/openfl/native/native.py\u001b\\\u001b[2mnative.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=806002;file:///home/oamontoy/workspace/sec-openfl/openfl/native/native.py#277\u001b\\\u001b[2m277\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
           INFO     Starting Experiment...                                                                                                         native.py:281\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Starting Experiment\u001b[33m...\u001b[0m \u001b]8;id=768304;file:///home/oamontoy/workspace/sec-openfl/openfl/native/native.py\u001b\\\u001b[2mnative.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=820559;file:///home/oamontoy/workspace/sec-openfl/openfl/native/native.py#281\u001b\\\u001b[2m281\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
           INFO     Building 🡆 Object RandomGroupedAssigner from openfl.component Module.                                                            plan.py:171\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Building \u001b[31m🡆\u001b[0m Object \u001b[31mRandomGroupedAssigner\u001b[0m from \u001b[31mopenfl.component\u001b[0m Module. \u001b]8;id=959697;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py\u001b\\\u001b[2mplan.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=588178;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py#171\u001b\\\u001b[2m171\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
           INFO     Building 🡆 Object CutoffTimeBasedStragglerHandling from openfl.component.straggler_handling_functions Module.                    plan.py:171\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Building \u001b[31m🡆\u001b[0m Object \u001b[31mCutoffTimeBasedStragglerHandling\u001b[0m from \u001b[31mopenfl.component.straggler_handling_functions\u001b[0m Module. \u001b]8;id=199471;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py\u001b\\\u001b[2mplan.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=897448;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py#171\u001b\\\u001b[2m171\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
           INFO     Importing 🡆 Object write_metric from src.mnist_utils Module.                                                                     plan.py:199\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Importing \u001b[31m🡆\u001b[0m Object \u001b[31mwrite_metric\u001b[0m from \u001b[31msrc.mnist_utils\u001b[0m Module. \u001b]8;id=125418;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py\u001b\\\u001b[2mplan.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=640295;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py#199\u001b\\\u001b[2m199\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/oamontoy/workspace/llama-env/lib/python3.8/site-packages/torchvision/io/image.py:13: UserWarning: Failed to load image Python extension: libtorch_cuda_cu.so: cannot open shared object file: No such file or directory\n", - " warn(f\"Failed to load image Python extension: {e}\")\n" - ] - }, - { - "data": { - "text/html": [ - "
[08:48:35] INFO     Building 🡆 Object Aggregator from openfl.component Module.                                                                       plan.py:171\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[08:48:35]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Building \u001b[31m🡆\u001b[0m Object \u001b[31mAggregator\u001b[0m from \u001b[31mopenfl.component\u001b[0m Module. \u001b]8;id=299428;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py\u001b\\\u001b[2mplan.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=109248;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py#171\u001b\\\u001b[2m171\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
           INFO     Using custom log metric: <function write_metric at 0x7f5c68ad28b0>                                                          aggregator.py:97\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Using custom log metric: \u001b[1m<\u001b[0m\u001b[1;95mfunction\u001b[0m\u001b[39m write_metric at \u001b[0m\u001b[1;36m0x7f5c68ad28b0\u001b[0m\u001b[1m>\u001b[0m \u001b]8;id=655419;file:///home/oamontoy/workspace/sec-openfl/openfl/component/aggregator/aggregator.py\u001b\\\u001b[2maggregator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=161984;file:///home/oamontoy/workspace/sec-openfl/openfl/component/aggregator/aggregator.py#97\u001b\\\u001b[2m97\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
           INFO     Building 🡆 Object Collaborator from openfl.component Module.                                                                     plan.py:171\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Building \u001b[31m🡆\u001b[0m Object \u001b[31mCollaborator\u001b[0m from \u001b[31mopenfl.component\u001b[0m Module. \u001b]8;id=390053;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py\u001b\\\u001b[2mplan.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=853120;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py#171\u001b\\\u001b[2m171\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
           INFO     Building 🡆 Object Collaborator from openfl.component Module.                                                                     plan.py:171\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Building \u001b[31m🡆\u001b[0m Object \u001b[31mCollaborator\u001b[0m from \u001b[31mopenfl.component\u001b[0m Module. \u001b]8;id=612439;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py\u001b\\\u001b[2mplan.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=601521;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/plan/plan.py#171\u001b\\\u001b[2m171\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
           INFO     Waiting for tasks...                                                                                                     collaborator.py:178\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Waiting for tasks\u001b[33m...\u001b[0m \u001b]8;id=806029;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py\u001b\\\u001b[2mcollaborator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=928412;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py#178\u001b\\\u001b[2m178\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
           INFO     Sending tasks to collaborator one for round 0                                                                              aggregator.py:329\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Sending tasks to collaborator one for round \u001b[1;36m0\u001b[0m \u001b]8;id=515202;file:///home/oamontoy/workspace/sec-openfl/openfl/component/aggregator/aggregator.py\u001b\\\u001b[2maggregator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=103453;file:///home/oamontoy/workspace/sec-openfl/openfl/component/aggregator/aggregator.py#329\u001b\\\u001b[2m329\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
           INFO     Received the following tasks: ['aggregated_model_validation', 'train', 'locally_tuned_model_validation']                 collaborator.py:168\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Received the following tasks: \u001b[1m[\u001b[0m\u001b[32m'aggregated_model_validation'\u001b[0m, \u001b[32m'train'\u001b[0m, \u001b[32m'locally_tuned_model_validation'\u001b[0m\u001b[1m]\u001b[0m \u001b]8;id=266582;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py\u001b\\\u001b[2mcollaborator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=901781;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py#168\u001b\\\u001b[2m168\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[08:48:36] INFO     Using TaskRunner subclassing API                                                                                         collaborator.py:253\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[08:48:36]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Using TaskRunner subclassing API \u001b]8;id=457102;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py\u001b\\\u001b[2mcollaborator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=223379;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py#253\u001b\\\u001b[2m253\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/oamontoy/workspace/sec-openfl/openfl/federated/task/runner_pt.py:284: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:206.)\n", - " new_state[k] = pt.from_numpy(tensor_dict.pop(k)).to(device)\n", - "You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n" - ] - }, - { - "data": { - "text/html": [ - "
[08:49:31] METRIC   Round 0, collaborator one is sending metric for task aggregated_model_validation: acc   0.673233                         collaborator.py:415\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[08:49:31]\u001b[0m\u001b[2;36m \u001b[0mMETRIC Round \u001b[1;36m0\u001b[0m, collaborator one is sending metric for task aggregated_model_validation: acc \u001b[1;36m0.673233\u001b[0m \u001b]8;id=512008;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py\u001b\\\u001b[2mcollaborator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=179476;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py#415\u001b\\\u001b[2m415\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
           INFO     Collaborator one is sending task results for aggregated_model_validation, round 0                                          aggregator.py:520\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Collaborator one is sending task results for aggregated_model_validation, round \u001b[1;36m0\u001b[0m \u001b]8;id=921737;file:///home/oamontoy/workspace/sec-openfl/openfl/component/aggregator/aggregator.py\u001b\\\u001b[2maggregator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=943670;file:///home/oamontoy/workspace/sec-openfl/openfl/component/aggregator/aggregator.py#520\u001b\\\u001b[2m520\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
           METRIC   Round 0, collaborator validate_agg aggregated_model_validation result acc:      0.673233                                   aggregator.py:559\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0mMETRIC Round \u001b[1;36m0\u001b[0m, collaborator validate_agg aggregated_model_validation result acc: \u001b[1;36m0.673233\u001b[0m \u001b]8;id=778930;file:///home/oamontoy/workspace/sec-openfl/openfl/component/aggregator/aggregator.py\u001b\\\u001b[2maggregator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=605180;file:///home/oamontoy/workspace/sec-openfl/openfl/component/aggregator/aggregator.py#559\u001b\\\u001b[2m559\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
           INFO     Using TaskRunner subclassing API                                                                                         collaborator.py:253\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Using TaskRunner subclassing API \u001b]8;id=239383;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py\u001b\\\u001b[2mcollaborator.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=814426;file:///home/oamontoy/workspace/sec-openfl/openfl/component/collaborator/collaborator.py#253\u001b\\\u001b[2m253\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
           INFO     Run 0 epoch of 0 round                                                                                                      runner_pt.py:155\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Run \u001b[1;36m0\u001b[0m epoch of \u001b[1;36m0\u001b[0m round \u001b]8;id=118270;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/task/runner_pt.py\u001b\\\u001b[2mrunner_pt.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=848215;file:///home/oamontoy/workspace/sec-openfl/openfl/federated/task/runner_pt.py#155\u001b\\\u001b[2m155\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "ename": "AttributeError", - "evalue": "'RobertaForSequenceClassification' object has no attribute 'step'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "File \u001b[0;32m~/workspace/llama-env/lib/python3.8/site-packages/peft/peft_model.py:434\u001b[0m, in \u001b[0;36mPeftModel.__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 433\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 434\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39;49m()\u001b[39m.\u001b[39;49m\u001b[39m__getattr__\u001b[39;49m(name) \u001b[39m# defer to nn.Module's logic\u001b[39;00m\n\u001b[1;32m 435\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mAttributeError\u001b[39;00m:\n", - "File \u001b[0;32m~/workspace/llama-env/lib/python3.8/site-packages/torch/nn/modules/module.py:1695\u001b[0m, in \u001b[0;36m__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 1688\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mparameters\u001b[39m(\u001b[39mself\u001b[39m, recurse: \u001b[39mbool\u001b[39m \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Iterator[Parameter]:\n\u001b[1;32m 1689\u001b[0m \u001b[39m \u001b[39m\u001b[39mr\u001b[39m\u001b[39m\"\"\"Returns an iterator over module parameters.\u001b[39;00m\n\u001b[1;32m 1690\u001b[0m \n\u001b[1;32m 1691\u001b[0m \u001b[39m This is typically passed to an optimizer.\u001b[39;00m\n\u001b[1;32m 1692\u001b[0m \n\u001b[1;32m 1693\u001b[0m \u001b[39m Args:\u001b[39;00m\n\u001b[1;32m 1694\u001b[0m \u001b[39m recurse (bool): if True, then yields parameters of this module\u001b[39;00m\n\u001b[0;32m-> 1695\u001b[0m \u001b[39m and all submodules. Otherwise, yields only parameters that\u001b[39;00m\n\u001b[1;32m 1696\u001b[0m \u001b[39m are direct members of this module.\u001b[39;00m\n\u001b[1;32m 1697\u001b[0m \n\u001b[1;32m 1698\u001b[0m \u001b[39m Yields:\u001b[39;00m\n\u001b[1;32m 1699\u001b[0m \u001b[39m Parameter: module parameter\u001b[39;00m\n\u001b[1;32m 1700\u001b[0m \n\u001b[1;32m 1701\u001b[0m \u001b[39m Example::\u001b[39;00m\n\u001b[1;32m 1702\u001b[0m \n\u001b[1;32m 1703\u001b[0m \u001b[39m >>> # xdoctest: +SKIP(\"undefined vars\")\u001b[39;00m\n\u001b[1;32m 1704\u001b[0m \u001b[39m >>> for param in model.parameters():\u001b[39;00m\n\u001b[1;32m 1705\u001b[0m \u001b[39m >>> print(type(param), param.size())\u001b[39;00m\n\u001b[1;32m 1706\u001b[0m \u001b[39m (20L,)\u001b[39;00m\n\u001b[1;32m 1707\u001b[0m \u001b[39m (20L, 1L, 5L, 5L)\u001b[39;00m\n\u001b[1;32m 1708\u001b[0m \n\u001b[1;32m 1709\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m 1710\u001b[0m \u001b[39mfor\u001b[39;00m name, param \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnamed_parameters(recurse\u001b[39m=\u001b[39mrecurse):\n", - "\u001b[0;31mAttributeError\u001b[0m: 'PeftModelForSequenceClassification' object has no attribute 'step'", - "\nDuring handling of the above exception, another exception occurred:\n", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "File \u001b[0;32m~/workspace/llama-env/lib/python3.8/site-packages/peft/tuners/lora.py:492\u001b[0m, in \u001b[0;36mLoraModel.__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 491\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 492\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39;49m()\u001b[39m.\u001b[39;49m\u001b[39m__getattr__\u001b[39;49m(name) \u001b[39m# defer to nn.Module's logic\u001b[39;00m\n\u001b[1;32m 493\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mAttributeError\u001b[39;00m:\n", - "File \u001b[0;32m~/workspace/llama-env/lib/python3.8/site-packages/torch/nn/modules/module.py:1695\u001b[0m, in \u001b[0;36m__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 1688\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mparameters\u001b[39m(\u001b[39mself\u001b[39m, recurse: \u001b[39mbool\u001b[39m \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Iterator[Parameter]:\n\u001b[1;32m 1689\u001b[0m \u001b[39m \u001b[39m\u001b[39mr\u001b[39m\u001b[39m\"\"\"Returns an iterator over module parameters.\u001b[39;00m\n\u001b[1;32m 1690\u001b[0m \n\u001b[1;32m 1691\u001b[0m \u001b[39m This is typically passed to an optimizer.\u001b[39;00m\n\u001b[1;32m 1692\u001b[0m \n\u001b[1;32m 1693\u001b[0m \u001b[39m Args:\u001b[39;00m\n\u001b[1;32m 1694\u001b[0m \u001b[39m recurse (bool): if True, then yields parameters of this module\u001b[39;00m\n\u001b[0;32m-> 1695\u001b[0m \u001b[39m and all submodules. Otherwise, yields only parameters that\u001b[39;00m\n\u001b[1;32m 1696\u001b[0m \u001b[39m are direct members of this module.\u001b[39;00m\n\u001b[1;32m 1697\u001b[0m \n\u001b[1;32m 1698\u001b[0m \u001b[39m Yields:\u001b[39;00m\n\u001b[1;32m 1699\u001b[0m \u001b[39m Parameter: module parameter\u001b[39;00m\n\u001b[1;32m 1700\u001b[0m \n\u001b[1;32m 1701\u001b[0m \u001b[39m Example::\u001b[39;00m\n\u001b[1;32m 1702\u001b[0m \n\u001b[1;32m 1703\u001b[0m \u001b[39m >>> # xdoctest: +SKIP(\"undefined vars\")\u001b[39;00m\n\u001b[1;32m 1704\u001b[0m \u001b[39m >>> for param in model.parameters():\u001b[39;00m\n\u001b[1;32m 1705\u001b[0m \u001b[39m >>> print(type(param), param.size())\u001b[39;00m\n\u001b[1;32m 1706\u001b[0m \u001b[39m (20L,)\u001b[39;00m\n\u001b[1;32m 1707\u001b[0m \u001b[39m (20L, 1L, 5L, 5L)\u001b[39;00m\n\u001b[1;32m 1708\u001b[0m \n\u001b[1;32m 1709\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m 1710\u001b[0m \u001b[39mfor\u001b[39;00m name, param \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnamed_parameters(recurse\u001b[39m=\u001b[39mrecurse):\n", - "\u001b[0;31mAttributeError\u001b[0m: 'LoraModel' object has no attribute 'step'", - "\nDuring handling of the above exception, another exception occurred:\n", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m/home/oamontoy/workspace/sec-openfl/openfl-tutorials/Federated_PyTorch_LLM.ipynb Cell 18\u001b[0m line \u001b[0;36m2\n\u001b[1;32m 1\u001b[0m \u001b[39m#Run experiment, return trained FederatedModel\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m final_fl_model \u001b[39m=\u001b[39m fx\u001b[39m.\u001b[39;49mrun_experiment(collaborators,{\u001b[39m'\u001b[39;49m\u001b[39maggregator.settings.rounds_to_train\u001b[39;49m\u001b[39m'\u001b[39;49m:\u001b[39m3\u001b[39;49m})\n", - "File \u001b[0;32m~/workspace/sec-openfl/openfl/native/native.py:295\u001b[0m, in \u001b[0;36mrun_experiment\u001b[0;34m(collaborator_dict, override_config)\u001b[0m\n\u001b[1;32m 293\u001b[0m \u001b[39mfor\u001b[39;00m col \u001b[39min\u001b[39;00m plan\u001b[39m.\u001b[39mauthorized_cols:\n\u001b[1;32m 294\u001b[0m collaborator \u001b[39m=\u001b[39m collaborators[col]\n\u001b[0;32m--> 295\u001b[0m collaborator\u001b[39m.\u001b[39;49mrun_simulation()\n\u001b[1;32m 297\u001b[0m \u001b[39m# Set the weights for the final model\u001b[39;00m\n\u001b[1;32m 298\u001b[0m model\u001b[39m.\u001b[39mrebuild_model(\n\u001b[1;32m 299\u001b[0m rounds_to_train \u001b[39m-\u001b[39m \u001b[39m1\u001b[39m, aggregator\u001b[39m.\u001b[39mlast_tensor_dict, validation\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m)\n", - "File \u001b[0;32m~/workspace/sec-openfl/openfl/component/collaborator/collaborator.py:170\u001b[0m, in \u001b[0;36mCollaborator.run_simulation\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 168\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlogger\u001b[39m.\u001b[39minfo(\u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39mReceived the following tasks: \u001b[39m\u001b[39m{\u001b[39;00mtasks\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m)\n\u001b[1;32m 169\u001b[0m \u001b[39mfor\u001b[39;00m task \u001b[39min\u001b[39;00m tasks:\n\u001b[0;32m--> 170\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mdo_task(task, round_number)\n\u001b[1;32m 171\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlogger\u001b[39m.\u001b[39minfo(\u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39mAll tasks completed on \u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcollaborator_name\u001b[39m}\u001b[39;00m\u001b[39m \u001b[39m\u001b[39m'\u001b[39m\n\u001b[1;32m 172\u001b[0m \u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39mfor round \u001b[39m\u001b[39m{\u001b[39;00mround_number\u001b[39m}\u001b[39;00m\u001b[39m...\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[1;32m 173\u001b[0m \u001b[39mbreak\u001b[39;00m\n", - "File \u001b[0;32m~/workspace/sec-openfl/openfl/component/collaborator/collaborator.py:255\u001b[0m, in \u001b[0;36mCollaborator.do_task\u001b[0;34m(self, task, round_number)\u001b[0m\n\u001b[1;32m 252\u001b[0m func \u001b[39m=\u001b[39m \u001b[39mgetattr\u001b[39m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtask_runner, func_name)\n\u001b[1;32m 253\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlogger\u001b[39m.\u001b[39minfo(\u001b[39m'\u001b[39m\u001b[39mUsing TaskRunner subclassing API\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[0;32m--> 255\u001b[0m global_output_tensor_dict, local_output_tensor_dict \u001b[39m=\u001b[39m func(\n\u001b[1;32m 256\u001b[0m col_name\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mcollaborator_name,\n\u001b[1;32m 257\u001b[0m round_num\u001b[39m=\u001b[39;49mround_number,\n\u001b[1;32m 258\u001b[0m input_tensor_dict\u001b[39m=\u001b[39;49minput_tensor_dict,\n\u001b[1;32m 259\u001b[0m \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 261\u001b[0m \u001b[39m# Save global and local output_tensor_dicts to TensorDB\u001b[39;00m\n\u001b[1;32m 262\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtensor_db\u001b[39m.\u001b[39mcache_tensor(global_output_tensor_dict)\n", - "File \u001b[0;32m~/workspace/sec-openfl/openfl/federated/task/runner_pt.py:159\u001b[0m, in \u001b[0;36mPyTorchTaskRunner.train_batches\u001b[0;34m(self, col_name, round_num, input_tensor_dict, use_tqdm, epochs, **kwargs)\u001b[0m\n\u001b[1;32m 157\u001b[0m \u001b[39mif\u001b[39;00m use_tqdm:\n\u001b[1;32m 158\u001b[0m loader \u001b[39m=\u001b[39m tqdm\u001b[39m.\u001b[39mtqdm(loader, desc\u001b[39m=\u001b[39m\u001b[39m'\u001b[39m\u001b[39mtrain epoch\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[0;32m--> 159\u001b[0m metric \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mtrain_epoch(loader)\n\u001b[1;32m 160\u001b[0m \u001b[39m# Output metric tensors (scalar)\u001b[39;00m\n\u001b[1;32m 161\u001b[0m origin \u001b[39m=\u001b[39m col_name\n", - "\u001b[1;32m/home/oamontoy/workspace/sec-openfl/openfl-tutorials/Federated_PyTorch_LLM.ipynb Cell 18\u001b[0m line \u001b[0;36m1\n\u001b[1;32m 113\u001b[0m loss\u001b[39m.\u001b[39mbackward()\n\u001b[1;32m 114\u001b[0m torch\u001b[39m.\u001b[39mnn\u001b[39m.\u001b[39mutils\u001b[39m.\u001b[39mclip_grad_norm_(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mmodel\u001b[39m.\u001b[39mparameters(),\u001b[39m1.0\u001b[39m)\n\u001b[0;32m--> 115\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mmodel\u001b[39m.\u001b[39;49mstep()\n\u001b[1;32m 116\u001b[0m losses\u001b[39m.\u001b[39mappend(loss\u001b[39m.\u001b[39mdetach()\u001b[39m.\u001b[39mcpu()\u001b[39m.\u001b[39mnumpy())\n\u001b[1;32m 117\u001b[0m loss \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mmean(losses)\n", - "File \u001b[0;32m~/workspace/llama-env/lib/python3.8/site-packages/peft/peft_model.py:436\u001b[0m, in \u001b[0;36mPeftModel.__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 434\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39m\u001b[39m__getattr__\u001b[39m(name) \u001b[39m# defer to nn.Module's logic\u001b[39;00m\n\u001b[1;32m 435\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mAttributeError\u001b[39;00m:\n\u001b[0;32m--> 436\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mgetattr\u001b[39;49m(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mbase_model, name)\n", - "File \u001b[0;32m~/workspace/llama-env/lib/python3.8/site-packages/peft/tuners/lora.py:494\u001b[0m, in \u001b[0;36mLoraModel.__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 492\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39m\u001b[39m__getattr__\u001b[39m(name) \u001b[39m# defer to nn.Module's logic\u001b[39;00m\n\u001b[1;32m 493\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mAttributeError\u001b[39;00m:\n\u001b[0;32m--> 494\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mgetattr\u001b[39;49m(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mmodel, name)\n", - "File \u001b[0;32m~/workspace/llama-env/lib/python3.8/site-packages/torch/nn/modules/module.py:1695\u001b[0m, in \u001b[0;36m__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 1688\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mparameters\u001b[39m(\u001b[39mself\u001b[39m, recurse: \u001b[39mbool\u001b[39m \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Iterator[Parameter]:\n\u001b[1;32m 1689\u001b[0m \u001b[39m \u001b[39m\u001b[39mr\u001b[39m\u001b[39m\"\"\"Returns an iterator over module parameters.\u001b[39;00m\n\u001b[1;32m 1690\u001b[0m \n\u001b[1;32m 1691\u001b[0m \u001b[39m This is typically passed to an optimizer.\u001b[39;00m\n\u001b[1;32m 1692\u001b[0m \n\u001b[1;32m 1693\u001b[0m \u001b[39m Args:\u001b[39;00m\n\u001b[1;32m 1694\u001b[0m \u001b[39m recurse (bool): if True, then yields parameters of this module\u001b[39;00m\n\u001b[0;32m-> 1695\u001b[0m \u001b[39m and all submodules. Otherwise, yields only parameters that\u001b[39;00m\n\u001b[1;32m 1696\u001b[0m \u001b[39m are direct members of this module.\u001b[39;00m\n\u001b[1;32m 1697\u001b[0m \n\u001b[1;32m 1698\u001b[0m \u001b[39m Yields:\u001b[39;00m\n\u001b[1;32m 1699\u001b[0m \u001b[39m Parameter: module parameter\u001b[39;00m\n\u001b[1;32m 1700\u001b[0m \n\u001b[1;32m 1701\u001b[0m \u001b[39m Example::\u001b[39;00m\n\u001b[1;32m 1702\u001b[0m \n\u001b[1;32m 1703\u001b[0m \u001b[39m >>> # xdoctest: +SKIP(\"undefined vars\")\u001b[39;00m\n\u001b[1;32m 1704\u001b[0m \u001b[39m >>> for param in model.parameters():\u001b[39;00m\n\u001b[1;32m 1705\u001b[0m \u001b[39m >>> print(type(param), param.size())\u001b[39;00m\n\u001b[1;32m 1706\u001b[0m \u001b[39m (20L,)\u001b[39;00m\n\u001b[1;32m 1707\u001b[0m \u001b[39m (20L, 1L, 5L, 5L)\u001b[39;00m\n\u001b[1;32m 1708\u001b[0m \n\u001b[1;32m 1709\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m 1710\u001b[0m \u001b[39mfor\u001b[39;00m name, param \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnamed_parameters(recurse\u001b[39m=\u001b[39mrecurse):\n\u001b[1;32m 1711\u001b[0m \u001b[39myield\u001b[39;00m param\n", - "\u001b[0;31mAttributeError\u001b[0m: 'RobertaForSequenceClassification' object has no attribute 'step'" - ] - } - ], + "outputs": [], "source": [ "#Run experiment, return trained FederatedModel\n", - "final_fl_model = fx.run_experiment(collaborators,{'aggregator.settings.rounds_to_train':3})" + "final_fl_model = fx.run_experiment(collaborators,{'aggregator.settings.rounds_to_train':1,\"tasks.train.kwargs.epochs\":10})" ] }, { From 00a7d92a978400071c1036828d7ff23c5dde8542 Mon Sep 17 00:00:00 2001 From: porteratzo Date: Thu, 26 Oct 2023 09:52:32 -0700 Subject: [PATCH 4/7] changes --- openfl-tutorials/Federated_PyTorch_LLM.ipynb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/openfl-tutorials/Federated_PyTorch_LLM.ipynb b/openfl-tutorials/Federated_PyTorch_LLM.ipynb index 78b9978604..ba5b2bede9 100644 --- a/openfl-tutorials/Federated_PyTorch_LLM.ipynb +++ b/openfl-tutorials/Federated_PyTorch_LLM.ipynb @@ -4,14 +4,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Federated PyTorch TinyImageNet Tutorial" + "# Federated PyTorch LLM Tutorial" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "This notebook is an example of Transfer Learning \n", + "This notebook is an example of LLM fine-tuning\n", "\n", "Custom DataLoader is used with OpenFL Python API" ] @@ -77,7 +77,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Now we are ready to define our dataset and model to perform federated learning on. The dataset should be composed of a numpy arrayWe start with a simple fully connected model that is trained on the MNIST dataset. " + "Now we are ready to define our dataset and model to perform federated learning on. The dataset should be composed of a numpy arrayWe start with a simple Roberta model that is trained on the glue mrpc dataset. " ] }, { @@ -458,7 +458,7 @@ "outputs": [], "source": [ "#Run experiment, return trained FederatedModel\n", - "final_fl_model = fx.run_experiment(collaborators,{'aggregator.settings.rounds_to_train':1,\"tasks.train.kwargs.epochs\":10})" + "final_fl_model = fx.run_experiment(collaborators,{'aggregator.settings.rounds_to_train':10,\"tasks.train.kwargs.epochs\":2})" ] }, { From d1b0b1e9d9a6e7f420d32242b0e22c1932f14a43 Mon Sep 17 00:00:00 2001 From: porteratzo Date: Mon, 27 May 2024 10:58:41 -0700 Subject: [PATCH 5/7] Phi finetuning demo --- .../Phi3/Workflow_Interface_Phi3.ipynb | 639 ++++++++++++++++++ 1 file changed, 639 insertions(+) create mode 100644 openfl-tutorials/experimental/Phi3/Workflow_Interface_Phi3.ipynb diff --git a/openfl-tutorials/experimental/Phi3/Workflow_Interface_Phi3.ipynb b/openfl-tutorials/experimental/Phi3/Workflow_Interface_Phi3.ipynb new file mode 100644 index 0000000000..ebba043085 --- /dev/null +++ b/openfl-tutorials/experimental/Phi3/Workflow_Interface_Phi3.ipynb @@ -0,0 +1,639 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "14821d97", + "metadata": {}, + "source": [ + "# Workflow Interface\n", + "## Fine-tuning Phi3 using OpenFL\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/intel/openfl/blob/develop/openfl-tutorials/experimental/Phi3/Workflow_Interface_Phi3.ipynb)" + ] + }, + { + "cell_type": "markdown", + "id": "bd059520", + "metadata": {}, + "source": [ + "In this tutorial, we build on the ideas from the [first](https://github.com/intel/openfl/blob/develop/openfl-tutorials/experimental/Workflow_Interface_101_MNIST.ipynb) quick start notebook, and demonstrate how to fine-tune a Large Language Model (LLM) in a federated learning workflow. \n", + "\n", + "We will fine-tune **Microsoft's [Phi3](https://huggingface.co/docs/transformers/main/en/model_doc/phi3)** model on the [Math_10k](https://github.com/AGI-Edgerunners/LLM-Adapters/tree/main) dataset, an open-source mathematical question-answer pair dataset collected from multiple smaller math datasets." + ] + }, + { + "cell_type": "markdown", + "id": "39c3d86a", + "metadata": {}, + "source": [ + "# What is it?" + ] + }, + { + "cell_type": "markdown", + "id": "a7989e72", + "metadata": {}, + "source": [ + "The workflow interface is a new way of composing federated learning expermients with OpenFL. It was borne through conversations with researchers and existing users who had novel use cases that didn't quite fit the standard horizontal federated learning paradigm. " + ] + }, + { + "cell_type": "markdown", + "id": "124ae236-2e33-4349-9979-f506d796276d", + "metadata": {}, + "source": [ + "### Installing OpenFL\n", + "- Lets now install OpenFL and the necessary dependencies for the workflow interface by running the cell below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c808dd12-6795-4203-9221-0f6b43fc785f", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install git+https://github.com/intel/openfl.git\n", + "!cd /home/oamontoy/workspace/openfl/openfl-tutorials/experimental/Phi3 && pip install -r ../requirements_workflow_interface.txt\n", + "!pip install numpy --upgrade\n", + "!pip install transformers peft datasets trl" + ] + }, + { + "cell_type": "markdown", + "id": "fc8e35da", + "metadata": {}, + "source": [ + "## Import libraries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0c5c9347", + "metadata": {}, + "outputs": [], + "source": [ + "import hashlib\n", + "import os\n", + "\n", + "import numpy as np\n", + "import requests\n", + "import torch\n", + "import transformers\n", + "from datasets import load_dataset\n", + "from peft import LoraConfig, get_peft_model\n", + "from peft.utils import get_peft_model_state_dict, set_peft_model_state_dict\n", + "from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments\n", + "from transformers.trainer_callback import PrinterCallback\n", + "from trl import SFTTrainer\n", + "\n", + "from openfl.experimental.interface import Aggregator, Collaborator, FLSpec\n", + "from openfl.experimental.placement import aggregator, collaborator\n", + "from openfl.experimental.runtime import LocalRuntime" + ] + }, + { + "cell_type": "markdown", + "id": "b8c24994-1b30-4f03-82ba-5a58bb347b70", + "metadata": {}, + "source": [ + "### Acquiring and preprocessing dataset\n", + "We can download the dataset directly from the [LLM-Adapters\n", + " repository](https://github.com/AGI-Edgerunners/LLM-Adapters)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6674c17-1652-4e87-a885-bc10bf3624c6", + "metadata": {}, + "outputs": [], + "source": [ + "def file_checksum(file_path, algorithm=\"sha256\"):\n", + " hash_func = hashlib.new(algorithm)\n", + " with open(file_path, \"rb\") as f:\n", + " for chunk in iter(lambda: f.read(4096), b\"\"):\n", + " hash_func.update(chunk)\n", + " return hash_func.hexdigest()\n", + "\n", + "\n", + "if not os.path.exists(\"math_10k.json\"):\n", + " r = requests.get(\n", + " \"https://raw.githubusercontent.com/AGI-Edgerunners/LLM-Adapters/main/ft-training_set/math_10k.json\",\n", + " # \"math_10k.json\", timeout=10\n", + " )\n", + " with open(\n", + " \"math_10k.json\",\n", + " \"wb\",\n", + " ) as f:\n", + " f.write(r.content)\n", + "\n", + " actual_checksum = file_checksum(\"math_10k.json\")\n", + " if (\n", + " actual_checksum\n", + " != \"0342d0d860ad8592b579329337c90e42eefd3d9f2898043140cbd120630418b8\"\n", + " ):\n", + " raise ValueError(\n", + " \"Checksum verification failed. The file may have been altered.\"\n", + " )\n", + "\n", + "raw_dataset = load_dataset(\"json\", data_files=\"math_10k.json\")" + ] + }, + { + "cell_type": "markdown", + "id": "c9c16b3f-963e-4531-94a5-258d7f61fe08", + "metadata": {}, + "source": [ + "## Initialize arguments and configurations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8a9fb316-c274-4236-a468-8fadccf27c1f", + "metadata": {}, + "outputs": [], + "source": [ + "training_config = {\n", + " \"bf16\": True,\n", + " \"use_ipex\": False,\n", + " \"use_cpu\": True,\n", + " \"do_eval\": False,\n", + " \"learning_rate\": 5.0e-06,\n", + " \"log_level\": \"info\",\n", + " \"logging_steps\": 20,\n", + " \"logging_strategy\": \"steps\",\n", + " \"lr_scheduler_type\": \"cosine\",\n", + " \"num_train_epochs\": 1,\n", + " \"max_steps\": -1,\n", + " \"output_dir\": \"./checkpoint_dir\",\n", + " \"overwrite_output_dir\": True,\n", + " \"per_device_eval_batch_size\": 1,\n", + " \"per_device_train_batch_size\": 1,\n", + " \"remove_unused_columns\": True,\n", + " \"save_steps\": 100,\n", + " \"save_total_limit\": 1,\n", + " \"seed\": 0,\n", + " \"gradient_checkpointing\": True,\n", + " \"gradient_checkpointing_kwargs\": {\"use_reentrant\": False},\n", + " \"gradient_accumulation_steps\": 1,\n", + " \"warmup_ratio\": 0.2,\n", + "}\n", + "\n", + "peft_config = {\n", + " \"r\": 1,\n", + " \"lora_alpha\": 2,\n", + " \"lora_dropout\": 0.05,\n", + " \"bias\": \"none\",\n", + " \"task_type\": \"CAUSAL_LM\",\n", + " \"target_modules\": \"all-linear\",\n", + " \"modules_to_save\": None,\n", + "}\n", + "model_kwargs = dict(\n", + " use_cache=False,\n", + " trust_remote_code=True,\n", + " torch_dtype=torch.bfloat16,\n", + " device_map=None,\n", + ")\n", + "train_conf = TrainingArguments(**training_config)\n", + "peft_conf = LoraConfig(**peft_config)" + ] + }, + { + "cell_type": "markdown", + "id": "ab360bb3-bdf6-4aed-966c-e21fc4d51847", + "metadata": {}, + "source": [ + "## Load and initialize model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7fddae57-ba98-4445-a37d-2f9e188c2cd8", + "metadata": {}, + "outputs": [], + "source": [ + "checkpoint_path = \"microsoft/Phi-3-mini-4k-instruct\"\n", + "model = AutoModelForCausalLM.from_pretrained(\n", + " checkpoint_path, return_dict=True, **model_kwargs\n", + ")\n", + "model = get_peft_model(model, peft_conf)\n", + "\n", + "tokenizer = AutoTokenizer.from_pretrained(checkpoint_path)\n", + "sequence_max_length = 512\n", + "val_set_size = 2000\n", + "tokenizer.pad_token_id = 0 # unk. we want this to be different from the eos token\n", + "tokenizer.padding_side = \"left\" # Allow batched inference" + ] + }, + { + "cell_type": "markdown", + "id": "ee99692c-0f1f-46d7-86c9-c2e0bfc52839", + "metadata": {}, + "source": [ + "## Preprocess dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ee344bba-89c2-45e9-946f-262ca81b93fc", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "def generate_prompt(data_point):\n", + " if data_point[\"input\"]:\n", + " return f\"\"\"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. \n", + "\n", + " ### Instruction:\n", + " {data_point[\"instruction\"]}\n", + " \n", + " ### Input:\n", + " {data_point[\"input\"]}\n", + " \n", + " ### Response:\n", + " {data_point[\"output\"]}\"\"\"\n", + " else:\n", + " return f\"\"\"Below is an instruction that describes a task. Write a response that appropriately completes the request. \n", + "\n", + " ### Instruction:\n", + " {data_point[\"instruction\"]}\n", + " \n", + " ### Response:\n", + " {data_point[\"output\"]}\"\"\"\n", + "\n", + "\n", + "def tokenize(prompt, add_eos_token=True):\n", + " # there's probably a way to do this with the tokenizer settings\n", + " # but again, gotta move fast\n", + " result = tokenizer(\n", + " prompt,\n", + " truncation=True,\n", + " max_length=sequence_max_length,\n", + " padding=False,\n", + " return_tensors=None,\n", + " )\n", + " if (\n", + " result[\"input_ids\"][-1] != tokenizer.eos_token_id\n", + " and len(result[\"input_ids\"]) < sequence_max_length\n", + " and add_eos_token\n", + " ):\n", + " result[\"input_ids\"].append(tokenizer.eos_token_id)\n", + " result[\"attention_mask\"].append(1)\n", + "\n", + " result[\"labels\"] = result[\"input_ids\"].copy()\n", + "\n", + " return result\n", + "\n", + "\n", + "def generate_and_tokenize_prompt(data_point):\n", + " full_prompt = generate_prompt(data_point)\n", + " tokenized_full_prompt = tokenize(full_prompt)\n", + " user_prompt = generate_prompt({**data_point, \"output\": \"\"})\n", + " tokenized_user_prompt = tokenize(user_prompt, add_eos_token=False)\n", + " user_prompt_len = len(tokenized_user_prompt[\"input_ids\"])\n", + "\n", + " tokenized_full_prompt[\"labels\"] = [-100] * user_prompt_len + tokenized_full_prompt[\n", + " \"labels\"\n", + " ][user_prompt_len:]\n", + " return tokenized_full_prompt\n", + "\n", + "\n", + "train_val = raw_dataset[\"train\"].train_test_split(\n", + " test_size=val_set_size, shuffle=True, seed=42\n", + ")\n", + "\n", + "processed_train_dataset = train_val[\"train\"].shuffle().map(generate_and_tokenize_prompt)\n", + "processed_test_dataset = train_val[\"test\"].shuffle().map(generate_and_tokenize_prompt)\n" + ] + }, + { + "cell_type": "markdown", + "id": "df64996d-3afa-443e-b897-761af04793c0", + "metadata": {}, + "source": [ + "Next we import the `FLSpec`, `LocalRuntime`, and placement decorators.\n", + "\n", + "- `FLSpec` – Defines the flow specification. User defined flows are subclasses of this.\n", + "- `Runtime` – Defines where the flow runs, infrastructure for task transitions (how information gets sent). The `LocalRuntime` runs the flow on a single node.\n", + "- `aggregator/collaborator` - placement decorators that define where the task will be assigned" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4064a34-f152-4da3-a5c6-285ac5ffc280", + "metadata": {}, + "outputs": [], + "source": [ + "def FedAvg(peft_params, model, weights=None):\n", + " state_dicts = peft_params\n", + " state_dict = get_peft_model_state_dict(model)\n", + " for key in peft_params[0]:\n", + " dtype = state_dicts[0][key].dtype\n", + " state_dict[key] = torch.from_numpy(\n", + " np.average(\n", + " [state[key].to(torch.float).numpy() for state in state_dicts], axis=0, weights=weights\n", + " )\n", + " ).to(dtype)\n", + " set_peft_model_state_dict(model, state_dict)\n", + " return model" + ] + }, + { + "attachments": { + "image.png": { + "image/png": "" + } + }, + "cell_type": "markdown", + "id": "8e406db6", + "metadata": { + "scrolled": true + }, + "source": [ + "Now we come to the flow definition. The OpenFL Workflow Interface adopts the conventions set by Metaflow, that every workflow begins with `start` and concludes with the `end` task. The aggregator begins with an optionally passed in model and optimizer. The aggregator begins the flow with the `start` task, where the list of collaborators is extracted from the runtime (`self.collaborators = self.runtime.collaborators`) and is then used as the list of participants to run the task listed in `self.next`, `aggregated_model_validation`. The model, optimizer, and anything that is not explicitly excluded from the next function will be passed from the `start` function on the aggregator to the `aggregated_model_validation` task on the collaborator. Where the tasks run is determined by the placement decorator that precedes each task definition (`@aggregator` or `@collaborator`). Once each of the collaborators (defined in the runtime) complete the `aggregated_model_validation` task, they pass their current state onto the `train` task, from `train` to `local_model_validation`, and then finally to `join` at the aggregator. It is in `join` that an average is taken of the model weights, and the next round can begin.\n", + "\n", + "![image.png](attachment:image.png)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "689e3373-a782-402b-b30e-f63648127ca4", + "metadata": {}, + "outputs": [], + "source": [ + "class FederatedFlow(FLSpec):\n", + " def __init__(self, model=None, optimizer=None, rounds=3, **kwargs):\n", + " super().__init__(**kwargs)\n", + " if model is not None:\n", + " self.model = model\n", + " self.peft_params = get_peft_model_state_dict(self.model)\n", + " self.optimizer = optimizer\n", + " else:\n", + " raise ValueError(\"No model inputted\")\n", + "\n", + " self.rounds = rounds\n", + "\n", + " @aggregator\n", + " def start(self):\n", + " print(f\"Performing initialization for model\")\n", + " self.collaborators = self.runtime.collaborators\n", + " self.private = 10\n", + " self.current_round = 0\n", + " self.next(\n", + " self.aggregated_model_validation,\n", + " foreach=\"collaborators\",\n", + " exclude=[\"model\"],\n", + " )\n", + "\n", + " @collaborator\n", + " def aggregated_model_validation(self):\n", + " print(f\"Performing aggregated model validation for collaborator {self.input}\")\n", + " self.model = AutoModelForCausalLM.from_pretrained(\n", + " checkpoint_path, return_dict=True, **model_kwargs\n", + " )\n", + " self.model = get_peft_model(self.model, peft_conf)\n", + " set_peft_model_state_dict(self.model, self.peft_params)\n", + " trainer = SFTTrainer(\n", + " model=self.model,\n", + " args=train_conf,\n", + " peft_config=peft_conf,\n", + " train_dataset=self.train_dataset,\n", + " eval_dataset=self.eval_dataset,\n", + " max_seq_length=sequence_max_length,\n", + " dataset_text_field=\"text\",\n", + " tokenizer=tokenizer,\n", + " packing=True,\n", + " data_collator=transformers.DataCollatorForSeq2Seq(\n", + " tokenizer, pad_to_multiple_of=8, return_tensors=\"pt\", padding=True\n", + " ),\n", + " )\n", + "\n", + " trainer.remove_callback(PrinterCallback)\n", + " out = trainer.evaluate()\n", + " self.agg_validation_score = out[\"eval_loss\"]\n", + " print(f\"{self.input} value of {self.agg_validation_score}\")\n", + " self.next(self.train)\n", + "\n", + " @collaborator\n", + " def train(self):\n", + " trainer = SFTTrainer(\n", + " model=self.model,\n", + " args=train_conf,\n", + " peft_config=peft_conf,\n", + " train_dataset=self.train_dataset,\n", + " eval_dataset=self.eval_dataset,\n", + " max_seq_length=sequence_max_length,\n", + " dataset_text_field=\"text\",\n", + " tokenizer=tokenizer,\n", + " packing=True,\n", + " data_collator=transformers.DataCollatorForSeq2Seq(\n", + " tokenizer, pad_to_multiple_of=8, return_tensors=\"pt\", padding=True\n", + " ),\n", + " )\n", + "\n", + " out = trainer.train()\n", + " self.loss = out.training_loss\n", + " trainer.save_model()\n", + " self.training_completed = True\n", + " self.next(self.local_model_validation)\n", + "\n", + " @collaborator\n", + " def local_model_validation(self):\n", + " trainer = SFTTrainer(\n", + " model=self.model,\n", + " args=train_conf,\n", + " peft_config=peft_conf,\n", + " train_dataset=processed_train_dataset,\n", + " eval_dataset=processed_test_dataset,\n", + " max_seq_length=sequence_max_length,\n", + " dataset_text_field=\"text\",\n", + " tokenizer=tokenizer,\n", + " packing=True,\n", + " data_collator=transformers.DataCollatorForSeq2Seq(\n", + " tokenizer, pad_to_multiple_of=8, return_tensors=\"pt\", padding=True\n", + " ),\n", + " )\n", + " out = trainer.evaluate()\n", + " self.local_validation_score = out[\"eval_loss\"]\n", + " self.peft_params = get_peft_model_state_dict(self.model)\n", + " print(f\"Doing local model validation for collaborator {self.input}\")\n", + " self.next(self.join, exclude=[\"training_completed\", \"model\"])\n", + "\n", + " @aggregator\n", + " def join(self, inputs):\n", + " self.average_loss = sum(input.loss for input in inputs) / len(inputs)\n", + " self.aggregated_model_accuracy = sum(\n", + " input.agg_validation_score for input in inputs\n", + " ) / len(inputs)\n", + " self.local_model_accuracy = sum(\n", + " input.local_validation_score for input in inputs\n", + " ) / len(inputs)\n", + " print(\n", + " f\"Average aggregated model validation values = {self.aggregated_model_accuracy}\"\n", + " )\n", + " print(f\"Average training loss = {self.average_loss}\")\n", + " print(f\"Average local model validation values = {self.local_model_accuracy}\")\n", + "\n", + " self.model = FedAvg([input.peft_params for input in inputs], self.model)\n", + "\n", + " self.model.save_pretrained(\"./aggregated/model\")\n", + " tokenizer.save_pretrained(\"./aggregated/tokenizer\")\n", + " self.current_round += 1\n", + " if self.current_round < self.rounds:\n", + " self.next(\n", + " self.aggregated_model_validation,\n", + " foreach=\"collaborators\",\n", + " exclude=[\"private\"],\n", + " )\n", + " else:\n", + " self.next(self.end)\n", + "\n", + " @aggregator\n", + " def end(self):\n", + " print(f\"This is the end of the flow\")" + ] + }, + { + "cell_type": "markdown", + "id": "4376c157-9f1f-412b-a3d4-adb6e8b39425", + "metadata": {}, + "source": [ + "You'll notice in the `FederatedFlow` definition above that there were certain attributes that the flow was not initialized with, namely the `train_dataset` and `eval_dataset` for each of the collaborators. These are **private_attributes** that are exposed only throught the runtime. Each participant has it's own set of private attributes: a dictionary where the key is the attribute name, and the value is the object that will be made accessible through that participant's task. \n", + "\n", + "Below, we segment shards of the MedQuAD dataset for **three collaborators**: Portland, Seattle, and Chandler. Each has their own slice of the dataset that's accessible via the `train_dataset` or `eval_dataset` attribute. Note that the private attributes are flexible, and you can choose to pass in a completely different type of object to any of the collaborators or aggregator (with an arbitrary name). These private attributes will always be filtered out of the current state when transfering from collaborator to aggregator, or vice versa. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4b53fb67-5b44-4bdf-a9ea-b6d10b20aad1", + "metadata": {}, + "outputs": [], + "source": [ + "# Setup participants\n", + "aggregator = Aggregator()\n", + "aggregator.private_attributes = {}\n", + "\n", + "# Setup collaborators with private attributes\n", + "collaborator_names = [\n", + " \"Portland\",\n", + " \"Seattle\",\n", + "]\n", + "collaborators = [Collaborator(name=name) for name in collaborator_names]\n", + "\n", + "for idx, current_collaborator in enumerate(collaborators):\n", + " # Set the private attributes of the Collaborator to include their specific training and testing data loaders\n", + " current_collaborator.private_attributes = {\n", + " \"train_dataset\": processed_train_dataset.shard(\n", + " num_shards=len(collaborators), index=idx\n", + " ),\n", + " \"eval_dataset\": processed_test_dataset.shard(\n", + " num_shards=len(collaborators), index=idx\n", + " ),\n", + " }\n", + "\n", + "local_runtime = LocalRuntime(\n", + " aggregator=aggregator, collaborators=collaborators, backend=\"single_process\"\n", + ")\n", + "print(f\"Local runtime collaborators = {local_runtime.collaborators}\")" + ] + }, + { + "cell_type": "markdown", + "id": "0bc693d1-1e16-43ad-aeb8-3af50fca14f2", + "metadata": {}, + "source": [ + "Now that we have our flow and runtime defined, let's run the experiment! " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "674def50-5e31-4abb-ad2d-ac74cb5d093d", + "metadata": {}, + "outputs": [], + "source": [ + "flflow = FederatedFlow(model, rounds=2)\n", + "flflow.runtime = local_runtime\n", + "flflow.run()" + ] + }, + { + "cell_type": "markdown", + "id": "c32e0844", + "metadata": {}, + "source": [ + "Now that the flow has completed, let's get the final model accuracy:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "863761fe", + "metadata": {}, + "outputs": [], + "source": [ + "print(f'\\nFinal aggregated model accuracy for {flflow.rounds} rounds of training: {flflow.aggregated_model_accuracy}')" + ] + }, + { + "cell_type": "markdown", + "id": "426f2395", + "metadata": {}, + "source": [ + "# Congratulations!\n", + "Now that you've completed this notebook, check out our [other tutorials](https://github.com/securefederatedai/openfl/tree/886704508b8b3b0638372003d72e0bcf7f2e7114/openfl-tutorials/experimental), including:\n", + "\n", + "- Using the LocalRuntime Ray Backend for dedicated GPU access\n", + "- Vertical Federated Learning\n", + "- Model Watermarking\n", + "- Differential Privacy\n", + "- And More!" + ] + }, + { + "cell_type": "markdown", + "id": "6d29bbc9-ccc2-4185-b36a-bc6dfc1a9753", + "metadata": {}, + "source": [ + "# Reference\n", + "\n", + " @ARTICLE{hu2023llm, \n", + " author = {Zhiqiang Hu and Yihuai Lan and Lei Wang and Wanyu Xu and Ee-Peng Lim and Roy Ka-Wei Lee and Lidong Bing and Soujanya Poria},\n", + " title = {LLM-Adapters: An Adapter Family for Parameter-Efficient Fine-Tuning of Large Language Models},\n", + " journal = {arXiv preprint arXiv:2304.01933},\n", + " year = {2023}\n", + "}\n", + "\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 0a2ce1796e0699711d55b3756460fe8e00b7f837 Mon Sep 17 00:00:00 2001 From: porteratzo Date: Wed, 5 Jun 2024 08:26:24 -0700 Subject: [PATCH 6/7] change variables --- .../Phi3/Workflow_Interface_Phi3.ipynb | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/openfl-tutorials/experimental/Phi3/Workflow_Interface_Phi3.ipynb b/openfl-tutorials/experimental/Phi3/Workflow_Interface_Phi3.ipynb index ebba043085..613ca2fc42 100644 --- a/openfl-tutorials/experimental/Phi3/Workflow_Interface_Phi3.ipynb +++ b/openfl-tutorials/experimental/Phi3/Workflow_Interface_Phi3.ipynb @@ -307,8 +307,8 @@ " test_size=val_set_size, shuffle=True, seed=42\n", ")\n", "\n", - "processed_train_dataset = train_val[\"train\"].shuffle().map(generate_and_tokenize_prompt)\n", - "processed_test_dataset = train_val[\"test\"].shuffle().map(generate_and_tokenize_prompt)\n" + "processed_train_dataset = train_val[\"train\"].shuffle().map(generate_and_tokenize_prompt).select(range(3))\n", + "processed_test_dataset = train_val[\"test\"].shuffle().map(generate_and_tokenize_prompt).select(range(3))\n" ] }, { @@ -482,6 +482,7 @@ " print(f\"Average local model validation values = {self.local_model_accuracy}\")\n", "\n", " self.model = FedAvg([input.peft_params for input in inputs], self.model)\n", + " self.peft_params = get_peft_model_state_dict(self.model)\n", "\n", " self.model.save_pretrained(\"./aggregated/model\")\n", " tokenizer.save_pretrained(\"./aggregated/tokenizer\")\n", @@ -490,7 +491,7 @@ " self.next(\n", " self.aggregated_model_validation,\n", " foreach=\"collaborators\",\n", - " exclude=[\"private\"],\n", + " exclude=[\"model\"],\n", " )\n", " else:\n", " self.next(self.end)\n", @@ -518,29 +519,29 @@ "outputs": [], "source": [ "# Setup participants\n", - "aggregator = Aggregator()\n", - "aggregator.private_attributes = {}\n", + "_aggregator = Aggregator()\n", + "_aggregator.private_attributes = {}\n", "\n", "# Setup collaborators with private attributes\n", "collaborator_names = [\n", " \"Portland\",\n", " \"Seattle\",\n", "]\n", - "collaborators = [Collaborator(name=name) for name in collaborator_names]\n", + "_collaborators = [Collaborator(name=name) for name in collaborator_names]\n", "\n", - "for idx, current_collaborator in enumerate(collaborators):\n", + "for idx, current_collaborator in enumerate(_collaborators):\n", " # Set the private attributes of the Collaborator to include their specific training and testing data loaders\n", " current_collaborator.private_attributes = {\n", " \"train_dataset\": processed_train_dataset.shard(\n", - " num_shards=len(collaborators), index=idx\n", + " num_shards=len(_collaborators), index=idx\n", " ),\n", " \"eval_dataset\": processed_test_dataset.shard(\n", - " num_shards=len(collaborators), index=idx\n", + " num_shards=len(_collaborators), index=idx\n", " ),\n", " }\n", "\n", "local_runtime = LocalRuntime(\n", - " aggregator=aggregator, collaborators=collaborators, backend=\"single_process\"\n", + " aggregator=_aggregator, collaborators=_collaborators, backend=\"single_process\"\n", ")\n", "print(f\"Local runtime collaborators = {local_runtime.collaborators}\")" ] From 1f96a4b57a9d1adbaefafc53fbcc5ec10975ef95 Mon Sep 17 00:00:00 2001 From: porteratzo Date: Mon, 10 Jun 2024 15:30:37 -0700 Subject: [PATCH 7/7] deleted unnecesary file --- openfl-tutorials/Federated_PyTorch_LLM.ipynb | 510 ------------------- 1 file changed, 510 deletions(-) delete mode 100644 openfl-tutorials/Federated_PyTorch_LLM.ipynb diff --git a/openfl-tutorials/Federated_PyTorch_LLM.ipynb b/openfl-tutorials/Federated_PyTorch_LLM.ipynb deleted file mode 100644 index ba5b2bede9..0000000000 --- a/openfl-tutorials/Federated_PyTorch_LLM.ipynb +++ /dev/null @@ -1,510 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Federated PyTorch LLM Tutorial" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This notebook is an example of LLM fine-tuning\n", - "\n", - "Custom DataLoader is used with OpenFL Python API" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#Install dependencies if not already installed\n", - "!pip install torch torchvision peft transformers sentencepiece huggingface_hub accelerate datasets evaluate seqeval\n", - "%load_ext autoreload\n", - "%autoreload 2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from typing import Any, Mapping\n", - "import numpy as np\n", - "import openfl.native as fx\n", - "import torch\n", - "import torch as pt\n", - "from datasets import Dataset, load_dataset, load_metric\n", - "from openfl.federated import PyTorchTaskRunner\n", - "from openfl.federated.task.runner_pt import change_tags\n", - "from openfl.utilities import Metric, TensorKey\n", - "from openfl.utilities.data_splitters import EqualNumPyDataSplitter\n", - "from peft import LoraConfig, TaskType, get_peft_model\n", - "from peft.utils import get_peft_model_state_dict, set_peft_model_state_dict\n", - "from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss\n", - "from torch.optim import AdamW\n", - "from torch.utils.data import DataLoader\n", - "from tqdm import tqdm\n", - "import torch.nn as nn\n", - "from transformers.trainer_pt_utils import get_parameter_names\n", - "from transformers import (AutoModelForSequenceClassification,\n", - " AutoTokenizer, DataCollatorWithPadding, get_scheduler)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "After importing the required packages, the next step is setting up our openfl workspace. To do this, simply run the `fx.init()` command as follows:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#Setup default workspace, logging, etc.\n", - "fx.init('torch_cnn_mnist')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now we are ready to define our dataset and model to perform federated learning on. The dataset should be composed of a numpy arrayWe start with a simple Roberta model that is trained on the glue mrpc dataset. " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Download the data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def get_glue_mrpc_dataset(tokenizer):\n", - " dataset = load_dataset(\"glue\", \"mrpc\")\n", - "\n", - " def tokenize_function(examples):\n", - " # max_length=None => use the model max length (it's actually the default)\n", - " outputs = tokenizer(\n", - " examples[\"sentence1\"],\n", - " examples[\"sentence2\"],\n", - " truncation=True,\n", - " max_length=None,\n", - " )\n", - " return outputs\n", - "\n", - " tokenized_datasets = dataset.map(\n", - " tokenize_function,\n", - " batched=True,\n", - " remove_columns=[\"idx\", \"sentence1\", \"sentence2\"],\n", - " )\n", - " tokenized_datasets = tokenized_datasets.rename_column(\"label\", \"labels\")\n", - " tokenized_datasets.set_format(\"torch\")\n", - " data_collator = DataCollatorWithPadding(tokenizer=tokenizer, padding=\"longest\")\n", - " return data_collator, tokenized_datasets\n", - "\n", - "base_model_name = \"roberta-base\"\n", - "padding_side = \"right\"\n", - "tokenizer = AutoTokenizer.from_pretrained(base_model_name, padding_side=padding_side)\n", - "if getattr(tokenizer, \"pad_token_id\") is None:\n", - " tokenizer.pad_token_id = tokenizer.eos_token_id\n", - "data_collator, tokenized_datasets = get_glue_mrpc_dataset(tokenizer)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Describe the dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "class GlueMrpc(Dataset):\n", - " \"\"\"\n", - " Has 5.8k pairs of sentences with annotations if the two sentences are equivalent\n", - " \"\"\" \n", - " def get_shape(self):\n", - " \n", - " if not hasattr(self, 'saved_shape'):\n", - " self.saved_shape = max([len(i) for i in self.data['input_ids']])\n", - " return self.saved_shape\n", - "\n", - "train_set = GlueMrpc.from_dict(tokenized_datasets['train'].to_dict())\n", - "valid_set = GlueMrpc.from_dict(tokenized_datasets['test'].to_dict())\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Implement Federated dataset\n", - "We have to implement `split` method" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "class GlueMrpcFederatedDataset(DataLoader):\n", - " def __init__(self, train_set, valid_set, batch_size, data_collator=None):\n", - " self.data_splitter = EqualNumPyDataSplitter(shuffle=True)\n", - " if isinstance(train_set,Dataset):\n", - " self.train_set = GlueMrpc.from_dict(train_set.to_dict())\n", - " else:\n", - " self.train_set = train_set\n", - " \n", - " if isinstance(valid_set,Dataset):\n", - " self.valid_set = GlueMrpc.from_dict(valid_set.to_dict())\n", - " else:\n", - " self.valid_set = valid_set \n", - " \n", - " self.batch_size = batch_size\n", - " self.data_collator = data_collator\n", - " \n", - " def split(self, num_collaborators):\n", - " train_split = self.data_splitter.split(self.train_set, num_collaborators)\n", - " valid_split = self.data_splitter.split(self.valid_set, num_collaborators)\n", - " return [\n", - " GlueMrpcFederatedDataset(\n", - " self.train_set.select(train_split[i]),\n", - " self.valid_set.select(valid_split[i]),\n", - " self.batch_size\n", - " )\n", - " for i in range(num_collaborators)\n", - " ]\n", - " \n", - " def get_feature_shape(self):\n", - " return self.train_set.get_shape()\n", - " \n", - " def get_train_loader(self, num_batches=None):\n", - " return DataLoader(self.train_set, batch_size=self.batch_size, collate_fn=data_collator)\n", - " \n", - " def get_valid_loader(self):\n", - " return DataLoader(self.valid_set, batch_size=self.batch_size, collate_fn=data_collator)\n", - " \n", - " def get_train_data_size(self):\n", - " return len(self.train_set)\n", - " \n", - " def get_valid_data_size(self):\n", - " return len(self.valid_set)\n", - " \n", - "fl_data = GlueMrpcFederatedDataset(train_set, valid_set, batch_size=32)\n", - "metric = load_metric('glue', \"mrpc\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Define model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "class LLMTaskRunner(PyTorchTaskRunner):\n", - " def __init__(\n", - " self, base_model_name, data_loader, device=None, metric=None, **kwargs\n", - " ):\n", - " kwargs[\"data_loader\"] = data_loader\n", - " super().__init__(device, **kwargs)\n", - " self.base_model_name = base_model_name\n", - " self.metric = metric\n", - " self._init_model()\n", - " self._init_optimizer()\n", - " self.save_models = []\n", - "\n", - " def _init_model(self):\n", - " model = AutoModelForSequenceClassification.from_pretrained(\n", - " self.base_model_name, return_dict=True\n", - " )\n", - " peft_config = LoraConfig(\n", - " task_type=TaskType.SEQ_CLS,\n", - " inference_mode=False,\n", - " r=16,\n", - " lora_alpha=16,\n", - " lora_dropout=0.1,\n", - " bias=\"lora_only\",\n", - " )\n", - " self.model = get_peft_model(model, peft_config)\n", - "\n", - " def _init_optimizer(self):\n", - " ALL_LAYERNORM_LAYERS = [nn.LayerNorm]\n", - " decay_parameters = get_parameter_names(self.model, ALL_LAYERNORM_LAYERS)\n", - " decay_parameters = [name for name in decay_parameters if \"bias\" not in name]\n", - "\n", - " optimizer_grouped_parameters = [\n", - " {\n", - " \"params\": [\n", - " p\n", - " for n, p in self.model.named_parameters()\n", - " if (n in decay_parameters and p.requires_grad)\n", - " ],\n", - " \"weight_decay\": 0.01,\n", - " },\n", - " {\n", - " \"params\": [\n", - " p\n", - " for n, p in self.model.named_parameters()\n", - " if (n not in decay_parameters and p.requires_grad)\n", - " ],\n", - " \"weight_decay\": 0.0,\n", - " },\n", - " ]\n", - " self.optimizer = AdamW(optimizer_grouped_parameters, lr=0.001)\n", - " self.lr_scheduler = get_scheduler(\n", - " name=\"linear\",\n", - " optimizer=self.optimizer,\n", - " num_warmup_steps=0,\n", - " num_training_steps=len(self.data_loader.train_set) * 5,\n", - " )\n", - "\n", - " self.training_round_completed = False\n", - " self.initialize_tensorkeys_for_functions()\n", - "\n", - " def train(self):\n", - " return self.model.train()\n", - "\n", - " def state_dict(self):\n", - " return get_peft_model_state_dict(self.model)\n", - "\n", - " def load_state_dict(self, state_dict: Mapping[str, Any], strict: bool = True):\n", - " return set_peft_model_state_dict(self.model, state_dict)\n", - "\n", - " def validate(\n", - " self, col_name, round_num, input_tensor_dict, use_tqdm=False, **kwargs\n", - " ):\n", - " \"\"\"Validate.\n", - "\n", - " Run validation of the model on the local data.\n", - "\n", - " Args:\n", - " col_name: Name of the collaborator\n", - " round_num: What round is it\n", - " input_tensor_dict: Required input tensors (for model)\n", - " use_tqdm (bool): Use tqdm to print a progress bar (Default=True)\n", - "\n", - " Returns:\n", - " global_output_dict: Tensors to send back to the aggregator\n", - " local_output_dict: Tensors to maintain in the local TensorDB\n", - "\n", - " \"\"\"\n", - " self.save_models.append(input_tensor_dict.copy())\n", - " self.rebuild_model(round_num, input_tensor_dict, validation=True)\n", - " self.model.eval()\n", - " \n", - "\n", - " self.model.to(self.device)\n", - " val_score = 0\n", - " total_samples = 0\n", - "\n", - " loader = self.data_loader.get_valid_loader()\n", - " if use_tqdm:\n", - " loader = tqdm(loader, desc=\"validate\")\n", - "\n", - " with pt.no_grad():\n", - " for sample in loader:\n", - " samples = sample[\"input_ids\"].shape[0]\n", - " total_samples += samples\n", - " output = self.model(**sample)\n", - " # get the index of the max log-probability\n", - " logits = output.logits\n", - " predictions = torch.argmax(logits, dim=-1)\n", - " metric.add_batch(predictions=predictions, references=sample[\"labels\"])\n", - " val_score = metric.compute()[\"accuracy\"]\n", - "\n", - " origin = col_name\n", - " suffix = \"validate\"\n", - " if kwargs[\"apply\"] == \"local\":\n", - " suffix += \"_local\"\n", - " else:\n", - " suffix += \"_agg\"\n", - " tags = (\"metric\",)\n", - " tags = change_tags(tags, add_field=suffix)\n", - " # TODO figure out a better way to pass in metric for this pytorch\n", - " # validate function\n", - " output_tensor_dict = {\n", - " TensorKey(\"acc\", origin, round_num, True, tags): np.array(val_score)\n", - " }\n", - "\n", - " # Empty list represents metrics that should only be stored locally\n", - " return output_tensor_dict, {}\n", - "\n", - " def train_epoch(self, batch_generator) -> Metric:\n", - " \"\"\"Train single epoch.\n", - "\n", - " Override this function in order to use custom training.\n", - "\n", - " Args:\n", - " batch_generator: Train dataset batch generator. Yields (samples, targets) tuples of\n", - " size = `self.data_loader.batch_size`.\n", - " Returns:\n", - " Metric: An object containing name and np.ndarray value.\n", - " \"\"\"\n", - " losses = []\n", - " for sample in batch_generator:\n", - " self.model.zero_grad()\n", - " output = self.model(**sample)\n", - " loss = output.loss\n", - " loss.backward()\n", - " torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)\n", - " self.optimizer.step()\n", - " self.lr_scheduler.step()\n", - " losses.append(loss.detach().cpu().numpy())\n", - " loss = np.mean(losses)\n", - " if self.model.config.problem_type == \"regression\":\n", - " loss_fct = MSELoss()\n", - " elif self.model.config.problem_type == \"single_label_classification\":\n", - " loss_fct = CrossEntropyLoss()\n", - " elif self.model.config.problem_type == \"multi_label_classification\":\n", - " loss_fct = BCEWithLogitsLoss()\n", - " return Metric(name=loss_fct._get_name(), value=np.array(loss))\n", - "\n", - " def save_native(\n", - " self,\n", - " filepath,\n", - " model_state_dict_key=\"model_state_dict\",\n", - " optimizer_state_dict_key=\"optimizer_state_dict\",\n", - " **kwargs,\n", - " ):\n", - " \"\"\"\n", - " Save model and optimizer states in a picked file specified by the \\\n", - " filepath. model_/optimizer_state_dicts are stored in the keys provided. \\\n", - " Uses pt.save().\n", - "\n", - " Args:\n", - " filepath (string) : Path to pickle file to be\n", - " created by pt.save().\n", - " model_state_dict_key (string) : key for model state dict\n", - " in pickled file.\n", - " optimizer_state_dict_key (string) : key for optimizer state\n", - " dict in picked file.\n", - " kwargs : unused\n", - "\n", - " Returns:\n", - " None\n", - " \"\"\"\n", - " pickle_dict = {\n", - " model_state_dict_key: get_peft_model_state_dict(self.model),\n", - " optimizer_state_dict_key: self.optimizer.state_dict(),\n", - " }\n", - " pt.save(pickle_dict, filepath)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "num_collaborators = 2\n", - "collaborator_models = [\n", - " LLMTaskRunner(\n", - " base_model_name,\n", - " data_loader=data_slice,\n", - " metric=metric\n", - " )\n", - " for data_slice in fl_data.split(num_collaborators)]\n", - "collaborators = {'one':collaborator_models[0],'two':collaborator_models[1]}#, 'three':collaborator_models[2]}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#Original TinyImageNet dataset\n", - "print(f'Original training data size: {len(fl_data.train_set)}')\n", - "print(f'Original validation data size: {len(fl_data.valid_set)}\\n')\n", - "\n", - "#Collaborator one's data\n", - "for i, model in enumerate(collaborator_models):\n", - " print(f'Collaborator {i}\\'s training data size: {len(model.data_loader.train_set)}')\n", - " print(f'Collaborator {i}\\'s validation data size: {len(model.data_loader.valid_set)}\\n')\n", - "\n", - "#Collaborator three's data\n", - "#print(f'Collaborator three\\'s training data size: {len(collaborator_models[2].data_loader.X_train)}')\n", - "#print(f'Collaborator three\\'s validation data size: {len(collaborator_models[2].data_loader.X_valid)}')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#Run experiment, return trained FederatedModel\n", - "final_fl_model = fx.run_experiment(collaborators,{'aggregator.settings.rounds_to_train':10,\"tasks.train.kwargs.epochs\":2})" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#Save final model\n", - "final_fl_model.save_native('final_model.pth')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "llama-env", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.0" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -}