From c5544ba020c0731a3e5934a93d6b7b4cb36bd58e Mon Sep 17 00:00:00 2001 From: Elizabeth Berrigan Date: Sun, 31 Mar 2024 12:42:09 -0700 Subject: [PATCH] Delete notebooks for now --- MultiDicotPipeline.ipynb | 733 --------------------------------------- 1 file changed, 733 deletions(-) delete mode 100644 MultiDicotPipeline.ipynb diff --git a/MultiDicotPipeline.ipynb b/MultiDicotPipeline.ipynb deleted file mode 100644 index 7939493..0000000 --- a/MultiDicotPipeline.ipynb +++ /dev/null @@ -1,733 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "from sleap_roots import Series, find_all_series\n", - "from sleap_roots import MultipleDicotPipeline\n", - "from sleap_roots.trait_pipelines import Pipeline\n", - "\n", - "import numpy as np\n", - "import pandas as pd\n", - "import json\n", - "\n", - "from pathlib import Path" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "csv_path = \"tests/data/multiple_arabidopsis_11do/merged_proofread_samples_03122024.csv\" # For sample information (count, group)\n", - "folder_path = \"tests/data/multiple_arabidopsis_11do\" # Location of h5 files and predictions\n", - "primary_name = \"primary\" # For loading primary root predictions\n", - "lateral_name = \"lateral\" # For loading lateral root predictions" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['tests/data/multiple_arabidopsis_11do/6039_1.h5',\n", - " 'tests/data/multiple_arabidopsis_11do/7327_2.h5',\n", - " 'tests/data/multiple_arabidopsis_11do/9535_1.h5',\n", - " 'tests/data/multiple_arabidopsis_11do/997_1.h5']" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Find all h5 files in the folder\n", - "all_h5s = find_all_series(folder_path)\n", - "all_h5s" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[Series(h5_path='tests/data/multiple_arabidopsis_11do/6039_1.h5', primary_labels=Labels(labeled_frames=67, videos=1, skeletons=1, tracks=0), lateral_labels=Labels(labeled_frames=68, videos=1, skeletons=1, tracks=0), crown_labels=None, video=Video(filename=\"tests/data/multiple_arabidopsis_11do/6039_1.h5\", shape=(72, 1088, 2048, 1), dataset=vol, backend=HDF5Video), csv_path='tests/data/multiple_arabidopsis_11do/merged_proofread_samples_03122024.csv'),\n", - " Series(h5_path='tests/data/multiple_arabidopsis_11do/7327_2.h5', primary_labels=Labels(labeled_frames=43, videos=1, skeletons=1, tracks=0), lateral_labels=Labels(labeled_frames=31, videos=1, skeletons=1, tracks=0), crown_labels=None, video=Video(filename=\"tests/data/multiple_arabidopsis_11do/7327_2.h5\", shape=(72, 1088, 2048, 1), dataset=vol, backend=HDF5Video), csv_path='tests/data/multiple_arabidopsis_11do/merged_proofread_samples_03122024.csv'),\n", - " Series(h5_path='tests/data/multiple_arabidopsis_11do/9535_1.h5', primary_labels=Labels(labeled_frames=42, videos=1, skeletons=1, tracks=0), lateral_labels=Labels(labeled_frames=36, videos=1, skeletons=1, tracks=0), crown_labels=None, video=Video(filename=\"tests/data/multiple_arabidopsis_11do/9535_1.h5\", shape=(72, 1088, 2048, 1), dataset=vol, backend=HDF5Video), csv_path='tests/data/multiple_arabidopsis_11do/merged_proofread_samples_03122024.csv'),\n", - " Series(h5_path='tests/data/multiple_arabidopsis_11do/997_1.h5', primary_labels=Labels(labeled_frames=72, videos=1, skeletons=1, tracks=0), lateral_labels=Labels(labeled_frames=72, videos=1, skeletons=1, tracks=0), crown_labels=None, video=Video(filename=\"tests/data/multiple_arabidopsis_11do/997_1.h5\", shape=(72, 1088, 2048, 1), dataset=vol, backend=HDF5Video), csv_path='tests/data/multiple_arabidopsis_11do/merged_proofread_samples_03122024.csv')]" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load the cylinder series (one per h5 file)\n", - "all_series = [Series.load(h5_path=h5, primary_name=primary_name, lateral_name=lateral_name, csv_path=csv_path) for h5 in all_h5s]\n", - "all_series" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "# Get the first series in the list\n", - "series = all_series[0]" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "First sample has name 6039_1\n", - "First sample has genotype 6039\n" - ] - } - ], - "source": [ - "print(f\"First sample has name {series.series_name}\")\n", - "print(f\"First sample has genotype {series.group}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "# Initialize the pipeline\n", - "pipeline = MultipleDicotPipeline()" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Aggregated traits saved to 6039_1.all_frames_traits.json\n", - "Summary statistics saved to 6039_1.all_frames_summary.csv\n" - ] - } - ], - "source": [ - "# Get the traits of the first sample\n", - "first_sample_traits = pipeline.compute_multiple_dicots_traits(series=series, write_json=True, write_csv=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
lateral_count_minlateral_count_maxlateral_count_meanlateral_count_medianlateral_count_stdlateral_count_p5lateral_count_p25lateral_count_p75lateral_count_p95lateral_lengths_min...network_distribution_ratio_p95network_solidity_minnetwork_solidity_maxnetwork_solidity_meannetwork_solidity_mediannetwork_solidity_stdnetwork_solidity_p5network_solidity_p25network_solidity_p75network_solidity_p95
0175.082095.01.2401763.04.06.07.03.777593...0.7571330.0411210.1505040.0622550.0572760.019820.0428150.0482310.0700950.098175
\n", - "

1 rows × 315 columns

\n", - "
" - ], - "text/plain": [ - " lateral_count_min lateral_count_max lateral_count_mean \\\n", - "0 1 7 5.08209 \n", - "\n", - " lateral_count_median lateral_count_std lateral_count_p5 \\\n", - "0 5.0 1.240176 3.0 \n", - "\n", - " lateral_count_p25 lateral_count_p75 lateral_count_p95 \\\n", - "0 4.0 6.0 7.0 \n", - "\n", - " lateral_lengths_min ... network_distribution_ratio_p95 \\\n", - "0 3.777593 ... 0.757133 \n", - "\n", - " network_solidity_min network_solidity_max network_solidity_mean \\\n", - "0 0.041121 0.150504 0.062255 \n", - "\n", - " network_solidity_median network_solidity_std network_solidity_p5 \\\n", - "0 0.057276 0.01982 0.042815 \n", - "\n", - " network_solidity_p25 network_solidity_p75 network_solidity_p95 \n", - "0 0.048231 0.070095 0.098175 \n", - "\n", - "[1 rows x 315 columns]" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pd.DataFrame([first_sample_traits[\"summary_stats\"]])" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Processing series '6039_1'\n", - "Finished processing group '6039'\n", - "Aggregated traits for group 6039 saved to 6039.grouped_traits.json\n", - "Finished processing group '6039'\n", - "Summary statistics for group 6039 saved to 6039.grouped_summary.csv\n", - "Processing series '7327_2'\n", - "Finished processing group '7327'\n", - "Aggregated traits for group 7327 saved to 7327.grouped_traits.json\n", - "Finished processing group '7327'\n", - "Summary statistics for group 7327 saved to 7327.grouped_summary.csv\n", - "Processing series '9535_1'\n", - "Finished processing group '9535'\n", - "Aggregated traits for group 9535 saved to 9535.grouped_traits.json\n", - "Finished processing group '9535'\n", - "Summary statistics for group 9535 saved to 9535.grouped_summary.csv\n", - "Processing series '997_1'\n", - "Finished processing group '997'\n", - "Aggregated traits for group 997 saved to 997.grouped_traits.json\n", - "Finished processing group '997'\n", - "Summary statistics for group 997 saved to 997.grouped_summary.csv\n" - ] - } - ], - "source": [ - "# Get the traits grouped by genotype\n", - "grouped_traits = pipeline.compute_multiple_dicots_traits_for_groups(series_list=list(all_series), write_json=True, write_csv=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "4" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(grouped_traits)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
lateral_count_minlateral_count_maxlateral_count_meanlateral_count_medianlateral_count_stdlateral_count_p5lateral_count_p25lateral_count_p75lateral_count_p95lateral_lengths_min...network_distribution_ratio_p95network_solidity_minnetwork_solidity_maxnetwork_solidity_meannetwork_solidity_mediannetwork_solidity_stdnetwork_solidity_p5network_solidity_p25network_solidity_p75network_solidity_p95
0175.082095.01.2401763.04.06.07.03.777593...0.7571330.0411210.1505040.0622550.0572760.019820.0428150.0482310.0700950.098175
\n", - "

1 rows × 315 columns

\n", - "
" - ], - "text/plain": [ - " lateral_count_min lateral_count_max lateral_count_mean \\\n", - "0 1 7 5.08209 \n", - "\n", - " lateral_count_median lateral_count_std lateral_count_p5 \\\n", - "0 5.0 1.240176 3.0 \n", - "\n", - " lateral_count_p25 lateral_count_p75 lateral_count_p95 \\\n", - "0 4.0 6.0 7.0 \n", - "\n", - " lateral_lengths_min ... network_distribution_ratio_p95 \\\n", - "0 3.777593 ... 0.757133 \n", - "\n", - " network_solidity_min network_solidity_max network_solidity_mean \\\n", - "0 0.041121 0.150504 0.062255 \n", - "\n", - " network_solidity_median network_solidity_std network_solidity_p5 \\\n", - "0 0.057276 0.01982 0.042815 \n", - "\n", - " network_solidity_p25 network_solidity_p75 network_solidity_p95 \n", - "0 0.048231 0.070095 0.098175 \n", - "\n", - "[1 rows x 315 columns]" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pd.DataFrame([grouped_traits[0][\"summary_stats\"]])" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "grouped_summary_df = pd.DataFrame([grouped_trait[\"summary_stats\"] for grouped_trait in grouped_traits])" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
lateral_count_minlateral_count_maxlateral_count_meanlateral_count_medianlateral_count_stdlateral_count_p5lateral_count_p25lateral_count_p75lateral_count_p95lateral_lengths_min...network_distribution_ratio_p95network_solidity_minnetwork_solidity_maxnetwork_solidity_meannetwork_solidity_mediannetwork_solidity_stdnetwork_solidity_p5network_solidity_p25network_solidity_p75network_solidity_p95
0175.0820905.01.2401763.004.06.007.03.777593...0.7571330.0411210.1505040.0622550.0572760.0198200.0428150.0482310.0700950.098175
1193.4341091.02.8252601.001.06.008.04.345694...0.6798400.0241680.2934890.0929200.0873950.0620090.0305210.0411960.1255390.214581
21136.0079376.03.0276401.004.08.0011.04.431438...0.6775140.0323770.1665380.0550980.0488880.0230230.0333930.0384700.0658400.092981
3497.0000007.51.9148544.255.58.759.017.140351...0.5503920.0176350.0288670.0211030.0192850.0040370.0176990.0179870.0228160.027564
\n", - "

4 rows × 315 columns

\n", - "
" - ], - "text/plain": [ - " lateral_count_min lateral_count_max lateral_count_mean \\\n", - "0 1 7 5.082090 \n", - "1 1 9 3.434109 \n", - "2 1 13 6.007937 \n", - "3 4 9 7.000000 \n", - "\n", - " lateral_count_median lateral_count_std lateral_count_p5 \\\n", - "0 5.0 1.240176 3.00 \n", - "1 1.0 2.825260 1.00 \n", - "2 6.0 3.027640 1.00 \n", - "3 7.5 1.914854 4.25 \n", - "\n", - " lateral_count_p25 lateral_count_p75 lateral_count_p95 \\\n", - "0 4.0 6.00 7.0 \n", - "1 1.0 6.00 8.0 \n", - "2 4.0 8.00 11.0 \n", - "3 5.5 8.75 9.0 \n", - "\n", - " lateral_lengths_min ... network_distribution_ratio_p95 \\\n", - "0 3.777593 ... 0.757133 \n", - "1 4.345694 ... 0.679840 \n", - "2 4.431438 ... 0.677514 \n", - "3 17.140351 ... 0.550392 \n", - "\n", - " network_solidity_min network_solidity_max network_solidity_mean \\\n", - "0 0.041121 0.150504 0.062255 \n", - "1 0.024168 0.293489 0.092920 \n", - "2 0.032377 0.166538 0.055098 \n", - "3 0.017635 0.028867 0.021103 \n", - "\n", - " network_solidity_median network_solidity_std network_solidity_p5 \\\n", - "0 0.057276 0.019820 0.042815 \n", - "1 0.087395 0.062009 0.030521 \n", - "2 0.048888 0.023023 0.033393 \n", - "3 0.019285 0.004037 0.017699 \n", - "\n", - " network_solidity_p25 network_solidity_p75 network_solidity_p95 \n", - "0 0.048231 0.070095 0.098175 \n", - "1 0.041196 0.125539 0.214581 \n", - "2 0.038470 0.065840 0.092981 \n", - "3 0.017987 0.022816 0.027564 \n", - "\n", - "[4 rows x 315 columns]" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "grouped_summary_df" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Processing series '6039_1'\n", - "Finished processing group '6039'\n", - "Aggregated traits for group 6039 saved to 6039.grouped_traits.json\n", - "Finished processing group '6039'\n", - "Processing series '7327_2'\n", - "Finished processing group '7327'\n", - "Aggregated traits for group 7327 saved to 7327.grouped_traits.json\n", - "Finished processing group '7327'\n", - "Processing series '9535_1'\n", - "Finished processing group '9535'\n", - "Aggregated traits for group 9535 saved to 9535.grouped_traits.json\n", - "Finished processing group '9535'\n", - "Processing series '997_1'\n", - "Finished processing group '997'\n", - "Aggregated traits for group 997 saved to 997.grouped_traits.json\n", - "Finished processing group '997'\n", - "Computed traits for all groups saved to group_summarized_traits.csv\n" - ] - } - ], - "source": [ - "grouped_summary_df = pipeline.compute_batch_multiple_dicots_traits_for_groups(all_series=list(all_series), write_json=True, write_csv=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'6039'" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "grouped_traits[0][\"group\"]" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(4, 316)" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "grouped_summary_df.shape" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "sleap_roots", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.18" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}