Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New iterator #4

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
329 changes: 329 additions & 0 deletions examples/torch-parallel.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,329 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import os\n",
"import requests\n",
"import pyxis as px"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Let us start downloading the data\n",
"\n",
"We will use the MNIST dataset, this version is already in numpy array and is provided by tensorflow."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Downloads dataset\n",
"if not os.path.exists(\"mnist.npz\"):\n",
" with open(\"mnist.npz\", 'wb') as fout:\n",
" response = requests.get(\"https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz\", stream=True)\n",
" response.raise_for_status()\n",
" for block in response.iter_content(4096):\n",
" fout.write(block)\n",
"# Loads arrays\n",
"\n",
"with np.load(\"./mnist.npz\") as f:\n",
" x_train, y_train = f['x_train'], f['y_train']\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Creates train database\n",
"\n",
"We will begin by creating a small dataset to test with. It will consist of `60000` samples from the training partion of mnist."
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"with px.Writer(dirpath='mnist_train', map_size_limit=256, ram_gb_limit=1) as db:\n",
" db.put_samples('X', x_train.astype(np.float32), 'y', y_train.astype(np.long))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Checks dataset"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"pyxis.Reader\n",
"Location:\t\t'mnist_train'\n",
"Number of samples:\t60000\n",
"Data keys (0th sample):\n",
"\t'X' <- dtype: float32, shape: (28, 28)\n",
"\t'y' <- dtype: int64, shape: ()\n"
]
}
],
"source": [
"with px.Reader('mnist_train') as db:\n",
" print(db)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Train 1 epoch using pytorch dataloader\n",
"\n",
"We will train 1 epoch using the normal dataloader and torch.utils.data.DataLoader"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"try:\n",
" import torch\n",
" import torch.utils.data\n",
"except ImportError:\n",
" raise ImportError('Could not import the PyTorch library `torch` or '\n",
" '`torch.utils.data`. Please refer to '\n",
" 'https://pytorch.org/ for installation instructions.')\n",
"import pyxis.torch as pxt"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Initialize pytorch methods"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
"# Not optimal network, but big enough to simulate some gradient load\n",
"network = torch.nn.Sequential(\n",
" torch.nn.Flatten(),\n",
" torch.nn.Linear(28*28,32),\n",
" torch.nn.ReLU(),\n",
" torch.nn.Linear(32,32),\n",
" torch.nn.ReLU(),\n",
" torch.nn.Linear(32,32),\n",
" torch.nn.ReLU(),\n",
" torch.nn.Linear(32,10)) \n",
"network.to(device)\n",
"criterion = torch.nn.CrossEntropyLoss()\n",
"optimizer = torch.optim.RMSprop(network.parameters(), lr=1e-5, weight_decay=1e-2)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Initialize classical dataloaders"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"dataset = pxt.TorchDataset('mnist_train')\n",
"use_cuda = True and torch.cuda.is_available()\n",
"kwargs = {\"num_workers\": 2, \"pin_memory\": True} if use_cuda else {}\n",
"loader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=False,**kwargs)\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"def train_epoch():\n",
" for i, d in enumerate(loader):\n",
" data,target = d['X'],d['y']\n",
" data, target = data.to(device), target.to(device)\n",
" optimizer.zero_grad()\n",
" output = network(data)\n",
" loss = criterion(output, target)\n",
" loss.backward()\n",
" optimizer.step()\n",
" print('Train[{}/{} ({:.0f}%)]\\tLoss: {:.6f}'.format(\n",
" i * len(data), len(loader.dataset),\n",
" 100. * i / len(loader), loss.item()))"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Train[59968/60000 (100%)]\tLoss: 0.682663\n",
"Train[59968/60000 (100%)]\tLoss: 0.292598\n",
"Train[59968/60000 (100%)]\tLoss: 0.182662\n",
"Train[59968/60000 (100%)]\tLoss: 0.138513\n",
"Train[59968/60000 (100%)]\tLoss: 0.125213\n",
"Train[59968/60000 (100%)]\tLoss: 0.114652\n",
"Train[59968/60000 (100%)]\tLoss: 0.108128\n",
"Train[59968/60000 (100%)]\tLoss: 0.103621\n",
"4.44 s ± 62.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%timeit train_epoch()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Initialize pytorch methods again "
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
"# Not optimal network, but big enough to simulate some gradient load\n",
"network = torch.nn.Sequential(\n",
" torch.nn.Flatten(),\n",
" torch.nn.Linear(28*28,32),\n",
" torch.nn.ReLU(),\n",
" torch.nn.Linear(32,32),\n",
" torch.nn.ReLU(),\n",
" torch.nn.Linear(32,32),\n",
" torch.nn.ReLU(),\n",
" torch.nn.Linear(32,10)) \n",
"network.to(device)\n",
"criterion = torch.nn.CrossEntropyLoss()\n",
"optimizer = torch.optim.RMSprop(network.parameters(), lr=1e-5, weight_decay=1e-2)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Initialize pyxis iterator"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"iterator = pxt.TorchIterator(\n",
" device = device,\n",
" dir_path = 'mnist_train',\n",
" keys=('X','y'),\n",
" batch_size=32,\n",
" num_worker=2,\n",
" pre_fetcher_queue=100,\n",
" device_transfer_queue=2,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"def train_epoch_with_pyxis():\n",
" for i, d in enumerate(iterator):\n",
" data,target = d\n",
" optimizer.zero_grad()\n",
" output = network(data)\n",
" loss = criterion(output, target)\n",
" loss.backward()\n",
" optimizer.step()\n",
" print('Train[{}/{} ({:.0f}%)]\\tLoss: {:.6f}'.format(\n",
" i * len(data), len(loader.dataset),\n",
" 100. * i / len(loader), loss.item()))"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Train[599680/60000 (100%)]\tLoss: 0.418909\n",
"Train[599680/60000 (100%)]\tLoss: 0.345567\n",
"Train[599680/60000 (100%)]\tLoss: 0.131335\n",
"Train[599680/60000 (100%)]\tLoss: 0.163470\n",
"Train[599680/60000 (100%)]\tLoss: 0.200821\n",
"Train[599680/60000 (100%)]\tLoss: 0.117903\n",
"Train[599680/60000 (100%)]\tLoss: 0.149772\n",
"Train[599680/60000 (100%)]\tLoss: 0.104580\n",
"3.6 s ± 67.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%timeit train_epoch_with_pyxis()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
2 changes: 1 addition & 1 deletion pyxis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@
from .pyxis import *
from .iterators import *

__version__ = "0.4.dev0"
__version__ = "0.5.dev0"
Loading