diff --git a/atac/PeakVI.ipynb b/atac/PeakVI.ipynb index 5d0b603..bd772d5 100644 --- a/atac/PeakVI.ipynb +++ b/atac/PeakVI.ipynb @@ -23,10 +23,10 @@ "execution_count": 1, "metadata": { "execution": { - "iopub.execute_input": "2023-12-06T20:43:14.570844Z", - "iopub.status.busy": "2023-12-06T20:43:14.570715Z", - "iopub.status.idle": "2023-12-06T20:43:14.573054Z", - "shell.execute_reply": "2023-12-06T20:43:14.572638Z" + "iopub.execute_input": "2024-02-11T16:35:17.727642Z", + "iopub.status.busy": "2024-02-11T16:35:17.727504Z", + "iopub.status.idle": "2024-02-11T16:35:17.730028Z", + "shell.execute_reply": "2024-02-11T16:35:17.729749Z" } }, "outputs": [], @@ -45,10 +45,10 @@ "base_uri": "https://localhost:8080/" }, "execution": { - "iopub.execute_input": "2023-12-06T20:43:14.574525Z", - "iopub.status.busy": "2023-12-06T20:43:14.574425Z", - "iopub.status.idle": "2023-12-06T20:43:17.694239Z", - "shell.execute_reply": "2023-12-06T20:43:17.693766Z" + "iopub.execute_input": "2024-02-11T16:35:17.731483Z", + "iopub.status.busy": "2024-02-11T16:35:17.731323Z", + "iopub.status.idle": "2024-02-11T16:35:20.803056Z", + "shell.execute_reply": "2024-02-11T16:35:20.802672Z" }, "id": "tcbBdgYoMf8O", "outputId": "c7fc0a25-9760-42d6-8768-e17b560cbdee" @@ -72,10 +72,10 @@ "execution_count": 3, "metadata": { "execution": { - "iopub.execute_input": "2023-12-06T20:43:17.696057Z", - "iopub.status.busy": "2023-12-06T20:43:17.695775Z", - "iopub.status.idle": "2023-12-06T20:43:17.699616Z", - "shell.execute_reply": "2023-12-06T20:43:17.699175Z" + "iopub.execute_input": "2024-02-11T16:35:20.804966Z", + "iopub.status.busy": "2024-02-11T16:35:20.804676Z", + "iopub.status.idle": "2024-02-11T16:35:20.808174Z", + "shell.execute_reply": "2024-02-11T16:35:20.807846Z" } }, "outputs": [ @@ -111,10 +111,10 @@ "execution_count": 4, "metadata": { "execution": { - "iopub.execute_input": "2023-12-06T20:43:17.718097Z", - "iopub.status.busy": "2023-12-06T20:43:17.717974Z", - "iopub.status.idle": "2023-12-06T20:43:17.726194Z", - "shell.execute_reply": "2023-12-06T20:43:17.725734Z" + "iopub.execute_input": "2024-02-11T16:35:20.826929Z", + "iopub.status.busy": "2024-02-11T16:35:20.826798Z", + "iopub.status.idle": "2024-02-11T16:35:20.835069Z", + "shell.execute_reply": "2024-02-11T16:35:20.834730Z" } }, "outputs": [], @@ -148,10 +148,10 @@ "execution_count": 5, "metadata": { "execution": { - "iopub.execute_input": "2023-12-06T20:43:17.727688Z", - "iopub.status.busy": "2023-12-06T20:43:17.727579Z", - "iopub.status.idle": "2023-12-06T20:43:17.730030Z", - "shell.execute_reply": "2023-12-06T20:43:17.729733Z" + "iopub.execute_input": "2024-02-11T16:35:20.836658Z", + "iopub.status.busy": "2024-02-11T16:35:20.836538Z", + "iopub.status.idle": "2024-02-11T16:35:20.839148Z", + "shell.execute_reply": "2024-02-11T16:35:20.838815Z" } }, "outputs": [], @@ -174,10 +174,10 @@ "execution_count": 6, "metadata": { "execution": { - "iopub.execute_input": "2023-12-06T20:43:17.731355Z", - "iopub.status.busy": "2023-12-06T20:43:17.731248Z", - "iopub.status.idle": "2023-12-06T20:43:28.011364Z", - "shell.execute_reply": "2023-12-06T20:43:28.010929Z" + "iopub.execute_input": "2024-02-11T16:35:20.840644Z", + "iopub.status.busy": "2024-02-11T16:35:20.840512Z", + "iopub.status.idle": "2024-02-11T16:35:28.097334Z", + "shell.execute_reply": "2024-02-11T16:35:28.096955Z" } }, "outputs": [ @@ -185,9 +185,309 @@ "name": "stderr", "output_type": "stream", "text": [ - "Downloading data from 'https://cf.10xgenomics.com/samples/cell-atac/1.2.0/atac_pbmc_5k_nextgem/atac_pbmc_5k_nextgem_filtered_peak_bc_matrix.tar.gz' to file '/tmp/tmpzthhoioq/atac_pbmc_5k'.\n", - "100%|████████████████████████████████████████| 114M/114M [00:00<00:00, 410GB/s]\n", - "Untarring contents of '/tmp/tmpzthhoioq/atac_pbmc_5k' to '/tmp/tmpzthhoioq/atac_pbmc_5k.untar'\n" + "Downloading data from 'https://cf.10xgenomics.com/samples/cell-atac/1.2.0/atac_pbmc_5k_nextgem/atac_pbmc_5k_nextgem_filtered_peak_bc_matrix.tar.gz' to file '/tmp/tmpam52suaq/atac_pbmc_5k'.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " 0%| | 0.00/114M [00:00:119: FutureWarning: SparseDataset is deprecated and will be removed in late 2024. It has been replaced by the public classes CSRDataset and CSCDataset.\n", + "\n", + "For instance checks, use `isinstance(X, (anndata.experimental.CSRDataset, anndata.experimental.CSCDataset))` instead.\n", + "\n", + "For creation, use `anndata.experimental.sparse_dataset(X)` instead.\n", + "\n" + ] + } + ], + "source": [ + "scvi.model.PEAKVI.setup_anndata(adata)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "onvBxehyMf8R" + }, + "source": [ + "We can now create a PeakVI model object and train it!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```{important}\n", + "The default `max_epochs` is set to `500`, but in practice PeakVI stops early once the model converges (we quantify convergence with the model's validation reconstruction loss). This is especially the case for larger datasets, which require fewer training epochs to converge since each epoch lets the model view more data. \n", + "\n", + "This means that the estimated training runtime is usually an overestimate of the actual runtime. For the data used in this tutorial, it typically converges with around half of `max_epochs`!\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "execution": { + "iopub.execute_input": "2024-02-11T16:35:29.457031Z", + "iopub.status.busy": "2024-02-11T16:35:29.456917Z", + "iopub.status.idle": "2024-02-11T16:37:28.123776Z", + "shell.execute_reply": "2024-02-11T16:37:28.123348Z" + }, + "id": "cclCyKFOMf8R", + "outputId": "c6ae6f35-d3bb-4efb-e246-fb4ac0130bc4" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (cuda), used: True\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "TPU available: False, using: 0 TPU cores\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "IPU available: False, using: 0 IPUs\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "HPU available: False, using: 0 HPUs\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/env/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.\n", + "/env/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Training: 0%| | 0/500 [00:00:119: FutureWarning: SparseDataset is deprecated and will be removed in late 2024. It has been replaced by the public classes CSRDataset and CSCDataset.\n", - "\n", - "For instance checks, use `isinstance(X, (anndata.experimental.CSRDataset, anndata.experimental.CSCDataset))` instead.\n", - "\n", - "For creation, use `anndata.experimental.sparse_dataset(X)` instead.\n", - "\n" + "\r", + "Epoch 273/500: 55%|█████▍ | 273/500 [01:57<01:38, 2.31it/s, v_num=1, train_loss_step=1.19e+7, train_loss_epoch=2.01e+8]" ] - } - ], - "source": [ - "scvi.model.PEAKVI.setup_anndata(adata)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "onvBxehyMf8R" - }, - "source": [ - "We can now create a PeakVI model object and train it!" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "```{important}\n", - "The default `max_epochs` is set to `500`, but in practice PeakVI stops early once the model converges (we quantify convergence with the model's validation reconstruction loss). This is especially the case for larger datasets, which require fewer training epochs to converge since each epoch lets the model view more data. \n", - "\n", - "This means that the estimated training runtime is usually an overestimate of the actual runtime. For the data used in this tutorial, it typically converges with around half of `max_epochs`!\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" }, - "execution": { - "iopub.execute_input": "2023-12-06T20:43:54.117583Z", - "iopub.status.busy": "2023-12-06T20:43:54.117467Z", - "iopub.status.idle": "2023-12-06T20:45:52.177173Z", - "shell.execute_reply": "2023-12-06T20:45:52.176804Z" + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Epoch 274/500: 55%|█████▍ | 273/500 [01:57<01:38, 2.31it/s, v_num=1, train_loss_step=1.19e+7, train_loss_epoch=2.01e+8]" + ] }, - "id": "cclCyKFOMf8R", - "outputId": "c6ae6f35-d3bb-4efb-e246-fb4ac0130bc4" - }, - "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "GPU available: True (cuda), used: True\n", - "TPU available: False, using: 0 TPU cores\n", - "IPU available: False, using: 0 IPUs\n", - "HPU available: False, using: 0 HPUs\n", - "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n", - "/env/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.\n", - "/env/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.\n" + "\r", + "Epoch 274/500: 55%|█████▍ | 274/500 [01:57<01:38, 2.31it/s, v_num=1, train_loss_step=1.19e+7, train_loss_epoch=2.01e+8]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Epoch 274/500: 55%|█████▍ | 274/500 [01:57<01:38, 2.31it/s, v_num=1, train_loss_step=1.14e+7, train_loss_epoch=2.01e+8]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r", + "Epoch 274/500: 55%|█████▍ | 274/500 [01:57<01:37, 2.33it/s, v_num=1, train_loss_step=1.14e+7, train_loss_epoch=2.01e+8]" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch 274/500: 55%|█████▍ | 274/500 [01:56<01:36, 2.34it/s, v_num=1, train_loss_step=1.14e+7, train_loss_epoch=2.01e+8]\n", + "\n", "Monitored metric reconstruction_loss_validation did not improve in the last 50 records. Best score: 13018.062. Signaling Trainer to stop.\n" ] } @@ -410,10 +7338,10 @@ "execution_count": 11, "metadata": { "execution": { - "iopub.execute_input": "2023-12-06T20:45:52.178735Z", - "iopub.status.busy": "2023-12-06T20:45:52.178625Z", - "iopub.status.idle": "2023-12-06T20:45:52.332641Z", - "shell.execute_reply": "2023-12-06T20:45:52.332186Z" + "iopub.execute_input": "2024-02-11T16:37:28.125386Z", + "iopub.status.busy": "2024-02-11T16:37:28.125236Z", + "iopub.status.idle": "2024-02-11T16:37:28.197454Z", + "shell.execute_reply": "2024-02-11T16:37:28.197010Z" }, "id": "v5w3u7ZgMf8S" }, @@ -437,10 +7365,10 @@ "execution_count": 12, "metadata": { "execution": { - "iopub.execute_input": "2023-12-06T20:45:52.334337Z", - "iopub.status.busy": "2023-12-06T20:45:52.334222Z", - "iopub.status.idle": "2023-12-06T20:45:52.456531Z", - "shell.execute_reply": "2023-12-06T20:45:52.456180Z" + "iopub.execute_input": "2024-02-11T16:37:28.199070Z", + "iopub.status.busy": "2024-02-11T16:37:28.198950Z", + "iopub.status.idle": "2024-02-11T16:37:28.321578Z", + "shell.execute_reply": "2024-02-11T16:37:28.321224Z" }, "id": "_q34gv_hMf8S" }, @@ -449,7 +7377,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[34mINFO \u001b[0m File \u001b[35m/tmp/tmpzthhoioq/peakvi_pbmc/\u001b[0m\u001b[95mmodel.pt\u001b[0m already downloaded \n" + "\u001b[34mINFO \u001b[0m File \u001b[35m/tmp/tmpam52suaq/peakvi_pbmc/\u001b[0m\u001b[95mmodel.pt\u001b[0m already downloaded \n" ] } ], @@ -476,10 +7404,10 @@ "base_uri": "https://localhost:8080/" }, "execution": { - "iopub.execute_input": "2023-12-06T20:45:52.458359Z", - "iopub.status.busy": "2023-12-06T20:45:52.458212Z", - "iopub.status.idle": "2023-12-06T20:45:52.683851Z", - "shell.execute_reply": "2023-12-06T20:45:52.683441Z" + "iopub.execute_input": "2024-02-11T16:37:28.323353Z", + "iopub.status.busy": "2024-02-11T16:37:28.323200Z", + "iopub.status.idle": "2024-02-11T16:37:28.560014Z", + "shell.execute_reply": "2024-02-11T16:37:28.559606Z" }, "id": "shzoLMjQMf8S", "outputId": "acce7cb6-4c20-43a1-8b18-967805f30fa6" @@ -518,10 +7446,10 @@ "execution_count": 14, "metadata": { "execution": { - "iopub.execute_input": "2023-12-06T20:45:52.685311Z", - "iopub.status.busy": "2023-12-06T20:45:52.685198Z", - "iopub.status.idle": "2023-12-06T20:46:04.514331Z", - "shell.execute_reply": "2023-12-06T20:46:04.513874Z" + "iopub.execute_input": "2024-02-11T16:37:28.561504Z", + "iopub.status.busy": "2024-02-11T16:37:28.561394Z", + "iopub.status.idle": "2024-02-11T16:37:40.377583Z", + "shell.execute_reply": "2024-02-11T16:37:40.377140Z" }, "id": "CAzMk_4qMf8T" }, @@ -546,10 +7474,10 @@ "height": 367 }, "execution": { - "iopub.execute_input": "2023-12-06T20:46:04.515920Z", - "iopub.status.busy": "2023-12-06T20:46:04.515806Z", - "iopub.status.idle": "2023-12-06T20:46:04.661965Z", - "shell.execute_reply": "2023-12-06T20:46:04.661536Z" + "iopub.execute_input": "2024-02-11T16:37:40.379195Z", + "iopub.status.busy": "2024-02-11T16:37:40.379083Z", + "iopub.status.idle": "2024-02-11T16:37:40.502265Z", + "shell.execute_reply": "2024-02-11T16:37:40.501967Z" }, "id": "gPm9j0aaMf8T", "outputId": "edee858e-9f14-4ca6-a039-888b9de13c2c" @@ -559,7 +7487,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/env/lib/python3.11/site-packages/scanpy/plotting/_tools/scatterplots.py:1251: FutureWarning: The default value of 'ignore' for the `na_action` parameter in pandas.Categorical.map is deprecated and will be changed to 'None' in a future version. Please set na_action to the desired value to avoid seeing this warning\n", + "/env/lib/python3.11/site-packages/scanpy/plotting/_tools/scatterplots.py:1234: FutureWarning: The default value of 'ignore' for the `na_action` parameter in pandas.Categorical.map is deprecated and will be changed to 'None' in a future version. Please set na_action to the desired value to avoid seeing this warning\n", " color_vector = pd.Categorical(values.map(color_map))\n", "/env/lib/python3.11/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored\n", " cax = scatter(\n" @@ -612,10 +7540,10 @@ "height": 277 }, "execution": { - "iopub.execute_input": "2023-12-06T20:46:04.663421Z", - "iopub.status.busy": "2023-12-06T20:46:04.663308Z", - "iopub.status.idle": "2023-12-06T20:46:18.373440Z", - "shell.execute_reply": "2023-12-06T20:46:18.373010Z" + "iopub.execute_input": "2024-02-11T16:37:40.503797Z", + "iopub.status.busy": "2024-02-11T16:37:40.503606Z", + "iopub.status.idle": "2024-02-11T16:37:55.381555Z", + "shell.execute_reply": "2024-02-11T16:37:55.381143Z" }, "id": "YptioB5PMf8T", "outputId": "3d0d93f1-1254-4cff-ab03-ac117b3944b2" @@ -625,10 +7553,124 @@ "name": "stdout", "output_type": "stream", "text": [ - "DE...: 100%|██████████| 1/1 [00:03<00:00, 3.49s/it]\n", - "DE...: 100%|██████████| 1/1 [00:03<00:00, 3.47s/it]\n", - "DE...: 100%|██████████| 1/1 [00:03<00:00, 3.35s/it]\n", - "DE...: 100%|██████████| 1/1 [00:03<00:00, 3.38s/it]\n" + "\r", + "DE...: 0%| | 0/1 [00:00