From 1bdc3d81ec7134a0f05129bc5d642a4ddbb8eb48 Mon Sep 17 00:00:00 2001 From: Kevin Barnard Date: Tue, 20 Aug 2024 15:07:39 -0700 Subject: [PATCH] style: apply ruff linting & formatting --- examples/tutorial.ipynb | 3410 ++++++++++++++++++++------------------- src/fathomnet/dto.py | 2 +- 2 files changed, 1712 insertions(+), 1700 deletions(-) diff --git a/examples/tutorial.ipynb b/examples/tutorial.ipynb index 78cf272..69fcc5a 100644 --- a/examples/tutorial.ipynb +++ b/examples/tutorial.ipynb @@ -1,1701 +1,1713 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "ifmDW0dirq9D" - }, - "source": [ - "# FathomNet Python API Tutorial\n", - "*So you want to use FathomNet data...*\n", - "\n", - "\"FathomNet" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "QNYBxwg3UsZe" - }, - "source": [ - "## Introduction\n", - "\n", - "> `fathomnet-py` is a client-side API to help scientists, researchers, and developers interact with FathomNet data.\n", - "\n", - "[![tests](https://github.com/fathomnet/fathomnet-py/actions/workflows/tests.yml/badge.svg)](https://github.com/fathomnet/fathomnet-py/actions/workflows/tests.yml)\n", - "[![Documentation Status](https://readthedocs.org/projects/fathomnet-py/badge/?version=latest)](https://fathomnet-py.readthedocs.io/en/latest/?badge=latest)\n", - "\n", - "The [fathomnet-py](https://github.com/fathomnet/fathomnet-py) API offers native Python interaction with the FathomNet REST API, abstracting away the underlying HTTP requests.\n", - "This notebook is designed to walk you through some of the core functionality of the API. \n", - "\n", - "It's split into three parts:\n", - "1. [**API**](#api): API overview and data visualizations\n", - "2. [**Data**](#building-and-exploring-a-dataset-from-fathomnet): Building and exploring a dataset from FathomNet\n", - "3. [**Models**](#inference-with-a-pre-trained-model): Running images from FathomNet through a pre-trained model available on the [FathomNet Model Zoo](https://github.com/fathomnet/models)\n", - "\n", - "This notebook is by no means exhaustive; it serves to show some common \"recipes\" for pulling down and handling FathomNet data in Python. **Full documentation for fathomnet-py is available at [fathomnet-py.readthedocs.io](https://fathomnet-py.readthedocs.io).**\n", - "\n", - "[FathomNet GitHub](https://github.com/fathomnet)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "tM6WRbgWWkV1" - }, - "source": [ - "### Installing `fathomnet-py`\n", - "\n", - "To install fathomnet-py, you will need to have Python 3.7 or greater installed first (as of the time of writing, this notebook ships with Python 3.9). Then, from the command-line:\n", - "\n", - "```bash\n", - "pip install fathomnet\n", - "```\n", - "\n", - "This notebook installs fathomnet-py in the [Setup](#setup) section next, along with some relevant packages for data manipulation and visualization." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ZkYCmUX6r0su" - }, - "source": [ - "\n", - "## Setup" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-uNeHCauXyvC" - }, - "source": [ - "Note: this notebook assumes you are running from a colab environment. If this is not the case you may have to manually install a few packages that are pre-installed in colab such as numpy and pandas. \n", - "\n", - "First, import the auxiliary modules we need for part 1:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import ipywidgets as widgets # Provides embedded widgets\n", - "import ipyleaflet # Provides map widgets\n", - "import requests # Manages HTTP requests\n", - "import numpy as np # Facilitates array/matrix operations\n", - "import plotly.express as px # Generates nice plots\n", - "import random # Generates pseudo-random numbers\n", - "from PIL import Image, ImageFont, ImageDraw # Facilitates image operations\n", - "from io import BytesIO # Interfaces byte data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Next, we'll install a few packages via pip:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "4Q--V_Xrr3js" - }, - "outputs": [], - "source": [ - "!pip install -q -U fathomnet ipyleaflet" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Iazt9ae-sDgm" - }, - "source": [ - "\n", - "## The API\n", - "\n", - "Now that we have fathomnet-py installed, let's see what it can do!\n", - "\n", - "This section will show some of the common calls to pull down FathomNet data, and then we'll render some visualizations of the results." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pkLXdOx-F6Gm" - }, - "source": [ - "### Overview\n", - "\n", - "The two main parts of fathomnet-py are the **modules** and the **data classes**." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "PlGSfHJkKKsy" - }, - "source": [ - "#### Modules\n", - "\n", - "fathomnet-py offers a variety of modules that encapsulate their relevant API operations. In brief:\n", - "\n", - "- `boundingboxes` --- find & manage bounding boxes\n", - "- `darwincore` --- list owner institutions\n", - "- `images` --- find & manage images\n", - "- `geoimages` --- query for geo-images (geographic info only of images)\n", - "- `imagesetuploads` --- find & manage image set uploads\n", - "- `regions` --- list marine regions\n", - "- `stats` --- compute summary statistics\n", - "- `tags` --- find & manage custom image tags\n", - "- `taxa` --- get taxonomic information via a taxa provider\n", - "- `users` --- manage user accounts & list contributors\n", - "- `firebase` & `xapikey` -- authenticate for write-level operations\n", - "\n", - "*Note: We will repeatedly import some of these modules in the notebook to highlight what's being used in each step. In your code, you only need to import a module once.*" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "dSVuXOeAFYcO" - }, - "source": [ - "Each operation (API call) is represented as a function in its given module. For example, to get an image by its universally-unique identifier (UUID), we can import the `fathomnet.api.images` module and call the `find_by_uuid` function." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "AqyGxrO-LSxP" - }, - "outputs": [], - "source": [ - "from fathomnet.api import images\n", - "\n", - "example_image = images.find_by_uuid('79958ac5-832a-488c-9b48-cce7db346497')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "GCkHPRDcFzJn" - }, - "source": [ - "#### Data classes\n", - "\n", - "To facilitate parsing and saving FathomNet data, native Python dataclasses are provided in the `fathomnet.dto` module." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "yGwVcnkqGsXE" - }, - "source": [ - "For example, we can see that the returned image from the `find_by_uuid` call above is of type `AImageDTO`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "nneeYpzEG0EV" - }, - "outputs": [], - "source": [ - "type(example_image)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "s1pBTzGQIQVL" - }, - "source": [ - "These native data representations make it easier to write Python programs around FathomNet data. We'll print out some of the fields here." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "0IzMAU88IpGy" - }, - "outputs": [], - "source": [ - "print('Image URL:', example_image.url)\n", - "\n", - "print('Captured at latitude/longitude', example_image.latitude, example_image.longitude)\n", - "\n", - "print('There are', len(example_image.boundingBoxes), 'bounding boxes:')\n", - "for box in example_image.boundingBoxes:\n", - " print('-', box.concept, 'has area', box.width * box.height)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "2nLFG4fNHfhL" - }, - "source": [ - "We can convert (serialize/deserialize) any of the FathomNet dataclasses to/from JSON or Python dictionaries. Let's print out the contents of that example image as JSON." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "OvT2OyrcHy_K" - }, - "outputs": [], - "source": [ - "print(example_image.to_json(indent=2))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "em4qC6YUKade" - }, - "source": [ - "### Bar chart of concepts with the most bounding boxes\n", - "\n", - "Here we will use a `boundingboxes` operation, called `count_total_by_concept`, to get a quick count of the total number of bounding boxes for every concept in FathomNet. To visualize, we'll make a bar chart of the top `N`.\n", - "\n", - "⚙ Try changing the value of `N` on the right to show more concepts!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "jLcJ0cHaPVoC" - }, - "outputs": [], - "source": [ - "from fathomnet.api import boundingboxes\n", - "\n", - "# Make a bar chart of the top N concepts by bounding boxes\n", - "N = 11 #@param {type:\"slider\", min:5, max:20, step:1}\n", - "\n", - "# Get the number of bounding boxes for all concepts\n", - "concept_counts = boundingboxes.count_total_by_concept()\n", - "\n", - "# Sort by number of bounding boxes\n", - "concept_counts.sort(key=lambda cc: cc.count, reverse=True)\n", - "\n", - "# Get the top N concepts and their counts\n", - "concepts, counts = zip(*((cc.concept, cc.count) for cc in concept_counts[:N]))\n", - "\n", - "# Make a bar chart\n", - "fig = px.bar(\n", - " x=concepts, y=counts, \n", - " labels={'x': 'Concept', 'y': 'Bounding box count'}, \n", - " title=f'Top {N} concepts', \n", - " text_auto=True\n", - ")\n", - "fig.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "nL-_pukeZcXB" - }, - "source": [ - "### Listing images for a concept\n", - "\n", - "Let's say we want to list all of the available images in FathomNet for a given concept. Here, we'll\n", - "1. List all the available concepts (again, using the `boundingboxes` module)\n", - "2. Pick one\n", - "3. Get a list of images for that concept using the `images` module" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "oS585C0JZnqD" - }, - "source": [ - "First, let's list all the available concepts in a choosable box.\n", - "\n", - "We'll call the `find_concepts` function and put the results in a combo box.\n", - "\n", - "⚙ **Pick a concept after running this cell!**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "gOc40XMNaItV" - }, - "outputs": [], - "source": [ - "from fathomnet.api import boundingboxes\n", - "\n", - "# Get a list of all concepts that have at least 1 bounding box\n", - "all_concepts = boundingboxes.find_concepts()\n", - "\n", - "# Print how many there are\n", - "print('FathomNet has', len(concept_counts), 'localized concepts!')\n", - "\n", - "# Pick one!\n", - "concept_combo = widgets.Combobox(\n", - " options=all_concepts,\n", - " description='Pick one:',\n", - " placeholder='Double-click or type here',\n", - " ensure_option=True,\n", - " disabled=False\n", - ")\n", - "concept_combo" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "CJJzxjPYOZQY" - }, - "source": [ - "With our concept selected (if you didn't put anything, it will default to *Chionoecetes tanneri*), we can call the `images` module `find_by_concept` function to get back a list of all images containing a bounding box for that concept." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "0NJoDLJ6ueCx" - }, - "outputs": [], - "source": [ - "from fathomnet.api import images\n", - "\n", - "# Get the selected concept\n", - "selected_concept = concept_combo.value or 'Chionoecetes tanneri'\n", - "\n", - "# List the images FathomNet for that concept\n", - "concept_images = images.find_by_concept(selected_concept)\n", - "\n", - "# Print the total number\n", - "print('Found', len(concept_images), 'images of', selected_concept)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "4BzezsyGPJ0w" - }, - "source": [ - "This next cell will pick a random image, fetch it by its URL, and display it. \n", - "\n", - "⚙ If you want a different image, just re-run this cell." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "xNmguhajsqzV" - }, - "outputs": [], - "source": [ - "# Pick a random image\n", - "random_image = concept_images[random.randrange(len(concept_images))]\n", - "\n", - "# Fetch and show the image\n", - "image_data = requests.get(random_image.url).content\n", - "pil_image = Image.open(BytesIO(image_data))\n", - "display(pil_image)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "YUOvebwiqHzM" - }, - "source": [ - "Then, we'll loop over each bounding box listed and render it (drawing a box & label for it) on the image." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "JTD8_IRhjIjp" - }, - "outputs": [], - "source": [ - "# Concept -> color mapping for bounding boxes\n", - "def color_for_concept(concept: str):\n", - " hash = sum(map(ord, concept)) << 5\n", - " return f'hsl({hash % 360}, 100%, 85%)'\n", - "\n", - "# Draw the bounding boxes and labels on the image\n", - "draw_image = ImageDraw.Draw(pil_image)\n", - "font = ImageFont.truetype('/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf', size=18)\n", - "for box in random_image.boundingBoxes:\n", - " color = color_for_concept(box.concept)\n", - " draw_image.rectangle((box.x, box.y, box.x + box.width, box.y + box.height), width=3, outline=color)\n", - " draw_image.text((box.x, box.y + box.height), box.concept, fill=color, font=font)\n", - "\n", - "# Show the image with overlay\n", - "display(pil_image)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "qKZhbKeXWgHd" - }, - "source": [ - "### Depth histogram\n", - "\n", - "Let's generate a depth histogram; we'll extract the `depthMeters` field from each image (where present) and plot it." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "dUhMWpTzv-es" - }, - "outputs": [], - "source": [ - "# Extract the depth (in meters) from each image\n", - "depths = [\n", - " image.depthMeters\n", - " for image in concept_images \n", - " if image.depthMeters is not None\n", - "]\n", - "\n", - "# Make a horizontal histogram\n", - "fig = px.histogram(y=depths, title=f'{selected_concept} images by depth', labels={'y': 'depth (m)'})\n", - "fig['layout']['yaxis']['autorange'] = 'reversed'\n", - "fig.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "HrtWydOLWXM0" - }, - "source": [ - "### Geographic heatmap\n", - "\n", - "We can use the `latitude` and `longitude` fields to georeference each image. Here, we're generating a heatmap of the images overlaid on the Esri ocean basemap.\n", - "\n", - "⚙ Zoom and pan around -- although the map is centered on the Monterey Bay, see if you can find where other \"hotspots\" are for your concept." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "AeR_4Ad-wT-t" - }, - "outputs": [], - "source": [ - "# Extract the latitude/longitude from each image\n", - "locations = [\n", - " (image.latitude, image.longitude)\n", - " for image in concept_images\n", - " if image.latitude is not None and image.longitude is not None\n", - "]\n", - "\n", - "# Create a map from the Esri Ocean basemap\n", - "center = (36.807, -121.988) # Monterey Bay\n", - "map = ipyleaflet.Map(\n", - " basemap=ipyleaflet.basemaps.Esri.OceanBasemap, \n", - " center=center, \n", - " zoom=10\n", - ")\n", - "map.layout.height = \"800px\"\n", - "\n", - "# Overlay the image locations as a heatmap\n", - "heatmap = ipyleaflet.Heatmap(\n", - " locations=locations,\n", - " radius=20,\n", - " min_opacity=0.5\n", - ")\n", - "map.add_layer(heatmap)\n", - "\n", - "map" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "p4oqvJXTWq__" - }, - "source": [ - "\n", - "## Building and exploring a dataset from FathomNet\n", - "\n", - "FathomNet was built to support researchers seeking underwater imagery to train and test machine learning models. We'll demonstrate some of that functionality here. We'll build a VOC formatted dataset using the FathomNey Python API. Then we'll download a COCO formatted dataset using the `fathomnet-generate` command line tool and explore the contents programmatically.\n", - "\n", - "There are loads of software tools out there to train and run deep learning models. Later in this tutorial we will use [Detectron2](https://github.com/facebookresearch/detectron2), Facebook Research's machine learning library. This is just one of many options. For example, our MBARI colleague [Danelle Cline](https://www.mbari.org/person/danelle-e-cline/) put together a great notebook demonstrating [how to set up FathomNet data to train a YOLOv5 model](https://docs.mbari.org/deepsea-ai/notebooks/fathomnet_train/). \n", - "\n", - "Before getting started well need to install some packages to this workspace" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "KA_RszjyCHJ1" - }, - "outputs": [], - "source": [ - "!pip install -q -U fathomnet pycocotools pandas plotly" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "cIoypIxuR1W1" - }, - "source": [ - "Then import all the needed libraries into the workspace." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "7kAT9rIzD-nN" - }, - "outputs": [], - "source": [ - "import pickle # Data serialization library\n", - "import json # Data storage (JavaScript Object Notation)\n", - "import matplotlib.pyplot as plt # Plotting utilities\n", - "import requests # Manages HTTP requests\n", - "import random # Random number generator\n", - "import numpy as np # Array manipulations\n", - "import pandas as pd # More array manipulations\n", - "import plotly.express as px # Plotting library\n", - "\n", - "# Import coco dataset tools\n", - "from pycocotools.coco import COCO\n", - "\n", - "# Import from pyplot and PIL for easy plotting\n", - "from matplotlib.pyplot import imshow\n", - "from PIL import Image\n", - "import skimage.io as io" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "### Manually create a VOC dataset\n", - " \n", - "Let's say we want to train an object detector to detect *Gersemia juliepackardae* only. We don't want too much training data, so let's limit our query to just 100 images of *G. juliepackardae*.\n", - "\n", - "We can find this data by specifying a *constraint object* in fathomnet-py, then calling a generalized image querying function. To do this, we need to grab `GeoImageConstraints` from the `fathomnet.dto` module:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from fathomnet.dto import GeoImageConstraints" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now, we can make a set of constraints for each bullet point." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "gersemia_constraints = GeoImageConstraints(concept='Gersemia juliepackardae', limit=10)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To query for image data according to these constraints, we'll call the `fathomnet.api.images.find` function." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from fathomnet.api import images\n", - "\n", - "gersemia_images = images.find(gersemia_constraints)\n", - "print(f'Gersemia juliepackardae images: {len(gersemia_images)}')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In order to get this data ready for training, we still need to do two things:\n", - "1. **Download** the images themselves\n", - "2. **Format** the bounding boxes into something the model can understand\n", - "3. **Structure** the directory according to the [perscribed VOC format](https://detectron2.readthedocs.io/en/latest/tutorials/builtin_datasets.html#expected-dataset-structure-for-pascal-voc)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Download the images\n", - "\n", - "No magic here, we just need to download the images (via HTTP) to somewhere the notebook can find them.\n", - "\n", - "*Note: there are more efficient ways to do this.*" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import requests\n", - "from pathlib import Path\n", - "from progressbar import progressbar\n", - "from io import BytesIO\n", - "\n", - "# Create a directory for the images\n", - "data_dir = Path('/content/gersemia_voc')\n", - "image_dir = data_dir / 'JPEGImages'\n", - "image_dir.mkdir(exist_ok=True, parents=True)\n", - "\n", - "# Download each image, saving each new file path to a list\n", - "image_paths = []\n", - "for image in progressbar(gersemia_images, redirect_stdout=True):\n", - " # Format our image file name as the image UUID + .jpg\n", - " image_path = image_dir / f'{image.uuid}.jpg'\n", - " image_paths.append(image_path)\n", - " if image_path.exists(): # Skip re-downloading images\n", - " continue\n", - " \n", - " # Download the image\n", - " image_raw = requests.get(image.url, stream=True).raw\n", - " pil_image = Image.open(image_raw)\n", - " \n", - " # Convert to RGB (ensures consistent colorspace)\n", - " pil_image = pil_image.convert('RGB')\n", - "\n", - " # Save the image\n", - " pil_image.save(image_path)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Format the bounding boxes\n", - "\n", - "We need to get the bounding boxes in a format the model can understand. Image data (`AImageDTO`) objects offer a convenience function to generate Pascal VOC annotations from their internal data. We can leverage this to quickly generate XML annotations of the form:\n", - "\n", - "```xml\n", - "\n", - " images\n", - " {image filename}\n", - " /content/drive/MyDrive/fathomnet-workshop-tests/images/{image filename}\n", - " \n", - " FathomNet\n", - " \n", - " \n", - " {image width}\n", - " {image height}\n", - " 3\n", - " \n", - " 0\n", - " \n", - " {concept}[ ({altConcept})]\n", - " Unspecified\n", - " 0\n", - " 0\n", - " 0\n", - " \n", - " {x}\n", - " {x + width}\n", - " {y}\n", - " {y + height}\n", - " \n", - " \n", - " ...\n", - "\n", - "```" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We will likewise write up all the annotations in the preferred VOC directory structure. \n", - "\n", - "At the same time, we'll filter out any bounding boxes besides *G. juliepackardae*." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "xml_dir = data_dir / 'Annotations'\n", - "xml_dir.mkdir(exist_ok=True, parents=True)\n", - "\n", - "for image, image_path in zip(gersemia_images, image_paths):\n", - " xml_path = xml_dir / image_path.with_suffix('.xml').name\n", - " image.boundingBoxes = list(filter( # filter only Gersemia juliepackardae\n", - " lambda box: box.concept == 'Gersemia juliepackardae', image.boundingBoxes\n", - " ))\n", - " pascal_voc = image.to_pascal_voc(path=str(image_path), pretty_print=True)\n", - " xml_path.write_text(pascal_voc)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This is great to do explicitly since it illustrates all the API tools. But you can also use the `fathomnet-generate` command line tool to make the dataset with a single line of code. Do note, however, that the following command will download **all** available images and annotation of the coral. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# ! fathomnet-generate -c \"Gersemia juliepackardae\" --format coco --img-download '/content/gersemia_voc/JPEGImages' --output '/content/gersemia_voc/Annotations'" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "QjUISR3bsIZA" - }, - "source": [ - "### Build a COCO dataset\n", - "\n", - "Let's take a look at how we can leverage the Python API to download some images and bounding boxes from FathomNet. We're going to set up a training dataset that can be used for model training either in this notebook or in the Dockerized container we shared earlier. \n", - "\n", - "As we saw before, we can use the `fathomnet.api.images` module to search for images by concept. We can use that same functionality to set up a data set for model training. For an example scroll down to the final section of this notebook where we demonstrate how to use it to set up a VOC dataset.\n", - "\n", - "Here we'll use the handy `fathomnet-generate` command line tool to download FathomNet data and automatically organize it according to the [Microsoft Common Objects in COntext](https://cocodataset.org/#home) (COCO) standards. " - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "CYXNIUQEnazh" - }, - "source": [ - "#### COCO Formatting" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "COCO is a large annotated image dataset containing bounding boxes and segmentation masks for 91 categories in 100s of thousands of images. The bounding box annotations for object detection are organized into a standard format to make it easier to work with. Typically, the annotation files are distributed as json serialized nested dictionaries.\n", - "\n", - "```\n", - "{\n", - " \"info\": info,\n", - " \"images\": [image],\n", - " \"categories\": [category]\n", - " \"annotations\": [annotation]\n", - "}\n", - "```\n", - "\n", - "Each field contains inter-related pieces of information for your model training code to use. `info` is a dictionary that contains some metadata about the entire dataset:\n", - "\n", - "```\n", - "info = {\n", - " \"year\": 2023,\n", - " \"version\": \"0\",\n", - " \"description\": \"Generated by FathomNet\",\n", - " \"contributor\": \"FathomNet\",\n", - " \"url\": \"https://fathomnet.org\",\n", - " \"date_created\": \"2023/02/23\"\n", - "}\n", - "```\n", - "\n", - "The `images` field consists of a list of `image` objects that specify the file name, image dimensions, permanent url, etc. \n", - "\n", - "```\n", - "image = {\n", - " \"id\": 1,\n", - " \"width\": 1920,\n", - " \"height\": 1080,\n", - " \"file_name\": \"754e6a28-a8eb-4cb3-a0b9-3f2d5daacbae.png\",\n", - " \"license\": 0,\n", - " \"flickr_url\": \"https://fathomnet.org/static/m3/staging/Doc%20Ricketts/images/0861/00_12_12_05.png\",\n", - " \"coco_url\": \"https://fathomnet.org/static/m3/staging/Doc%20Ricketts/images/0861/00_12_12_05.png\",\n", - " \"date_captured\": \"2016-06-16 00:00:00\"\n", - "}\n", - "```\n", - "\n", - "The `categories` field is a list of `category` objects organized by numeric ids.\n", - "\n", - "```\n", - "category = {\n", - " \"id\": 2,\n", - " \"name\": \"Actinernus\",\n", - " \"supercategory\": \"\"\n", - "}\n", - "```\n", - "\n", - "Finally, `annotations` is a list of `annotation` objects that bring it all together. \n", - "\n", - "```\n", - "annotation = {\n", - " \"id\": 1,\n", - " \"image_id\": 1,\n", - " \"category_id\": 2,\n", - " \"segmentation\": [],\n", - " \"area\": 51200.0,\n", - " \"bbox\": [\n", - " 200.0,\n", - " 433.0,\n", - " 256.0,\n", - " 200.0\n", - " ],\n", - " \"iscrowd\": 0\n", - "}\n", - "```\n", - "\n", - "Note that the `id` fields are specific to the list of objects. The example `annotation` object tells us that in the image associated with `image_id` 1, there a bounding box located at position `[200, 433]` with a width of `256` pixels and a heigh of `200` pixels. The label associated with that bounding box is `category_id` 2 which corresponds the anemone \"Actinernus\".\n", - "\n", - "Whew, that is a mouthful! Fortunately we can auto populate all those fields with the `fathomnet-generate` command line tool. First, print out the docs. \n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "! fathomnet-generate " - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As you can see, there are lots of options built into to the tool. Let's start with a simple query to see how it works. We'll execute the command below.\n", - "\n", - "The `-c` flag accepts a comma seperated list of concepts to query for in FathomNet. The `--count` flag tells the tools to just report how many bounding boxes are associated with each concept rather than actually building the dataset. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "! fathomnet-generate -c \"Aegina rosea\" --count" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Great, now we know there are about 29 bounding boxes associated with these two concents in FathomNet. This is a reasonable amount of data to download in real time for this workshop. \n", - "\n", - "To download the data we'll need to remove the `--count` flag, tell the scipt we want COCO format, and specify where we want the dataset and images. This should take about five minutes to download all the images and annotations." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "! fathomnet-generate -c \"Aegina rosea\" --format coco --img-download 'demo_dataset/images' --output 'demo_dataset'" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here the `--format` flag tells the script to build a `coco` dataset. The `--image-download` and `--output` flags specify where all the output should live. When executed, the above line will make a new directory called `demo_dataset`. In the folder will be a COCO formatted dataset called `dataset.json` and a directory of `images` with 29 images with UUID file names.\n", - "\n", - "#### Explore the dataset\n", - "\n", - "We can use the handy `pycocotools` provided by the COCO dataset maintainers to visualize some of the images and annotations. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Create a coco dataset object\n", - "dataset = COCO('demo_dataset/dataset.json')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Show all the categories names\n", - "cats = dataset.loadCats(dataset.getCatIds())\n", - "nms = [cat['name'] for cat in cats]\n", - "print('Categories: \\n{}\\n'.format(' \\n'.join(nms)))" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can also see the raw category objects by just printing the cat object." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cats" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The format conforms to the COCO standards. As a sanity check, we can then use pycocotools to see how many annotations are associated with each category. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cat_ids = dataset.getCatIds(catNms=nms)\n", - "for cid in cat_ids:\n", - " anns = dataset.getAnnIds(catIds=cid)\n", - " print(f\"{cats[cid-1]['name']} has {len(anns)} annotations\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This is what we expect based on the output of `fathomnet-generate` when we used the `--count` flag. \n", - "\n", - "Now lets grab and image and look at the bounding box. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Grab all the image ids associated with Aegina citrea\n", - "img_ids = dataset.getImgIds(catIds=cat_ids[0])\n", - "\n", - "# Select one of them at random\n", - "img = dataset.loadImgs(img_ids[np.random.randint(0, len(img_ids))])[0]\n", - "\n", - "# Get the annotations in the image\n", - "anns_ids = dataset.getAnnIds(imgIds=img['id'], catIds=cat_ids[0], iscrowd=None)\n", - "anns = dataset.loadAnns(anns_ids)\n", - "\n", - "# ...and display it \n", - "im = io.imread(f\"demo_dataset/images/{img['file_name']}\")\n", - "plt.axis('off')\n", - "plt.imshow(im)\n", - "dataset.showAnns(anns, draw_bbox=True)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Data exploration \n", - "\n", - "Now we'll pull down a more more complicated data set to do some visualization of what is in the whole dataset. We'll use `fathomnet-generate` to get all the data collected in Monterey Bay in calendar year 2018. Note, the following command will only produce the dataset JSON document, we won't download the actual images to save time. If in the future you want to save the images, append the `--img-download` flag as in the earlier example." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "! fathomnet-generate --output 'demo_coco' --format coco \\\n", - " --max-latitude 37.0538 --min-latitude 36.4458 \\\n", - " --max-longitude -121.7805 --min-longitude -122.5073 \\\n", - " --start 2018-01-01 --end 2018-12-31" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The command will generate a new dataset object in `demo_coco/dataset.json`. Let's see what is in it. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Create a new coco dataset object\n", - "dataset = COCO('demo_coco/dataset.json')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Look at some high level information. \n", - "print(f'Number of images = {len(dataset.getImgIds())}')\n", - "print(f'Number of annotations = {len(dataset.getAnnIds())}')\n", - "print(f'Number of categories = {len(dataset.getCatIds())}')" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "So now we have a more complicated dataset. 154 categories with 1554 annotations. We can check what the most abundant category is." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "all_anns = dataset.loadAnns(dataset.getAnnIds())\n", - "all_anns = pd.DataFrame(all_anns) # Make it into a pandas data frame for easy manipulation\n", - "\n", - "# See what the most abundant category is \n", - "cid = all_anns['category_id'].value_counts().idxmax()\n", - "print(f\"Most abundant category id = {cid}\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "So that tells us what the most abundant category id is. But what organism is that? " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dataset.loadCats(ids=[cid])[0]['name']" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Very cool. But I'm a programmer not a marine biologist. What the heck is that thing?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Get all images that contain at least one annotation with that category id\n", - "img_ids = dataset.getImgIds(catIds=cid)\n", - "\n", - "# Select one of them at random\n", - "img = dataset.loadImgs(img_ids[np.random.randint(0, len(img_ids))])[0]\n", - "\n", - "# Get the annotations in the image\n", - "anns_ids = dataset.getAnnIds(imgIds=img['id'], catIds=cid, iscrowd=None)\n", - "anns = dataset.loadAnns(anns_ids)\n", - "\n", - "# ...and display it\n", - "im = io.imread(img['coco_url']) # this downloads the image\n", - "plt.axis('off')\n", - "plt.imshow(im)\n", - "dataset.showAnns(anns, draw_bbox=True)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "It looks like we got a big ol' coral for our most abundant critter. \n", - "\n", - "This tells us some useful stuff about the most abundant data, but how can we get a sense of the species distribution in this dataset?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Add a column to our dataframe that has all the plain language names of the category ids\n", - "all_anns['name'] = all_anns['category_id'].apply(lambda xx: dataset.loadCats(ids=[xx])[0]['name'])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Plot the distribution of the counts\n", - "fig = px.histogram(all_anns, x='name', title='Concept counts', labels={'name': 'Concept', 'count': 'Number of annotations'})\n", - "\n", - "fig.show()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "That is pretty illegible. What happens if we just look at the 20 most abundant organisms?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Plot the 20 most abundant\n", - "top_anns = all_anns['name'].value_counts().head(20)\n", - "\n", - "fig = px.histogram(top_anns, x=top_anns.index, y=top_anns.values, title='Top 20 concepts', labels={'x': 'Concept', 'y': 'Number of annotations'})\n", - "fig.show()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Much better. But together these two plots tells us some important things; namely, that we are dealing with a long tailed distribution. That might impact how we do our model training. \n", - "\n", - "There are all sorts of other metadata manipulations we can do to explore the structure of our data. This type of exercise is important to ensure that you know what is going into your model before training. That will help you both produce a better model and more efficently diagnose errors. " - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Model training\n", - "\n", - "In the interest of time, we will not train a model in this workshop session. We have set up a [seperate notebook](https://github.com/fathomnet/fathomnet-py/blob/detectron2-train-demo/train_demo.ipynb) that illustrates how to get COCO formatted dataset with `fathomnet-generate` and train a RetinaNet mode using `detectron2`. We hope you'll find it helpful!" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "jrTNntfuGRLQ" - }, - "source": [ - "### Inference with a pre-trained model\n", - "\n", - "A big feature of FathomNet is the *ModelZoo*, a repository for users to share their models with the community. For the moment, [we are advising users](https://medium.com/fathomnet/how-to-upload-your-ml-model-to-fathomnet-68b933dd55bd) to upload their models on Zenodo to generate a DOI and then share them on our GitHub page. We have provided a number of our models as a starting point. \n", - "\n", - "First install some additional packages. You may see a red ERROR message in this cell -- you can disregard it. **This will take a couple minutes.** Grab a coffee! " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!pip install pyyaml==5.4.1 'git+https://github.com/facebookresearch/detectron2.git'" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "⚠ Detectron doesn't play nice with some installed package versions; you may see a mesasge asking you to restart the runtime. Press that button, or run this cell:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "try:\n", - " import detectron2\n", - "except:\n", - " print('Restarting runtime...')\n", - " exit()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now grab all the packages we need." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import torchvision # Library of datasets, models, and image transforms\n", - "import pickle # Data serialization library\n", - "import json # Data storage (JavaScript Object Notation)\n", - "import matplotlib.pyplot as plt # Plotting utilities\n", - "import torch # Tensor library for manipulating large models and data\n", - "import requests # Manages HTTP requests\n", - "import random # Random number generator\n", - "import numpy as np # Array manipulations\n", - "\n", - "# Import key functions & modules from detectron2\n", - "from detectron2 import model_zoo\n", - "from detectron2.data import Metadata\n", - "from detectron2.structures import BoxMode\n", - "from detectron2.utils.visualizer import Visualizer\n", - "from detectron2.config import get_cfg\n", - "from detectron2.utils.visualizer import ColorMode\n", - "from detectron2.modeling import build_model\n", - "from detectron2.checkpoint import DetectionCheckpointer\n", - "import detectron2.data.transforms as T\n", - "\n", - "# Import from pyplot and PIL for easy plotting\n", - "from matplotlib.pyplot import imshow\n", - "from PIL import Image" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "YhJe6TvnsYc9" - }, - "source": [ - "#### Download a model from the FathomNet model zoo\n", - "For this section of the workshop, we'll download the [MBARI Benthic Supercategory Detector](https://zenodo.org/record/5571043). This Retinanet model was fine tuned with FathomNet data from a version originally trained on COCO images. To train this system, we grouped many of our fine grained classes together into 20 'supercategories' that hopefully encode some generally morphological informatoin about the group. All the training data was drawn from MBARI imagery collected in Monterey Bay. \n", - "\n", - "We will run `wget` to actually do the download. This command will let Colab download resources from a URL. We will start by getting the weights from the repository on Zenodo.\n", - "\n", - "First, let's download the model weights. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "dg41P_T_IFQx" - }, - "outputs": [], - "source": [ - "!wget -nc https://zenodo.org/record/5571043/files/model_final.pth " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "SZB9LkTkOjiX" - }, - "source": [ - "Now we'll grab the model file that declares the structure of the model." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "7lVKx77wO944" - }, - "outputs": [], - "source": [ - "!wget -nc https://zenodo.org/record/5571043/files/fathomnet_config_v2_1280.yaml" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "zRsLNBqlsdEy" - }, - "source": [ - "#### Run inference\n", - "We can actually run images through our network now that we have the model architecture and the weights from training. Before we run anything we will need to load the model into memory and set several parameters that will dictate what we see in the output. \n", - "\n", - "First set the paths so the `detectron2` toolbox will know where to look for your files." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "VnS-jK4MWu2m" - }, - "outputs": [], - "source": [ - "CONFIG_FILE = \"fathomnet_config_v2_1280.yaml\" # training configuration file\n", - "WEIGHT_FILE = \"model_final.pth\" # fathomnet model weights" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "x4jJAAAyb36c" - }, - "source": [ - "Now set Non-Maximal Suppresion (NMS) and Score thresholds. These parameters dictate which of the proposed regions the algorithm displays. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "02DDZCQ-dmyz" - }, - "outputs": [], - "source": [ - "NMS_THRESH = 0.45 # Set an NMS threshold to filter all the boxes proposed by the model\n", - "SCORE_THRESH = 0.3 # Set the model score threshold to suppress low confidence annotations" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pdrEGCt3d5pI" - }, - "source": [ - "You have to explicitly tell the model what the names of the classes are. The system outputs a number, not a label. You can think of this as a look-up table." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "_i-EmxtXeEIB" - }, - "outputs": [], - "source": [ - "fathomnet_metadata = Metadata(\n", - " name='fathomnet_val',\n", - " thing_classes=[\n", - " 'Anemone',\n", - " 'Fish',\n", - " 'Eel',\n", - " 'Gastropod',\n", - " 'Sea star',\n", - " 'Feather star',\n", - " 'Sea cucumber',\n", - " 'Urchin',\n", - " 'Glass sponge',\n", - " 'Sea fan',\n", - " 'Soft coral',\n", - " 'Sea pen',\n", - " 'Stony coral',\n", - " 'Ray',\n", - " 'Crab',\n", - " 'Shrimp',\n", - " 'Squat lobster',\n", - " 'Flatfish',\n", - " 'Sea spider',\n", - " 'Worm'\n", - " ]\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "3KA9vCCweOZH" - }, - "source": [ - "With all the parameters and file paths set up, you can now point Detectron to the configurations using the `get_cfg()` function." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "nC50iab9eIne" - }, - "outputs": [], - "source": [ - "cfg = get_cfg()\n", - "cfg.merge_from_file(model_zoo.get_config_file(\"COCO-Detection/retinanet_R_50_FPN_3x.yaml\"))\n", - "cfg.merge_from_file(CONFIG_FILE)\n", - "cfg.MODEL.RETINANET.SCORE_THRESH_TEST = SCORE_THRESH\n", - "cfg.MODEL.WEIGHTS = WEIGHT_FILE " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Akq8aB8ieik3" - }, - "source": [ - "Load in all the model weights and set the thresholds. This actually instantiates the model in your workspace. The `model` object is what will ingest the images and return outputs for us to look at. \n", - "\n", - "⚠ *If this cell returns a* `RuntimeError: No CUDA GPUs are available` *you will need to update your settings. Click the Runtime dropdown menu, select \"Change runtime type\" and select GPU in the \"Hardware accelarator\" box. You will then need to rerun the detectron2 install via pip.* " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "VrtudCKeefcM" - }, - "outputs": [], - "source": [ - "model = build_model(cfg) # returns a torch.nn.Module\n", - "checkpointer = DetectionCheckpointer(model)\n", - "checkpointer.load(cfg.MODEL.WEIGHTS) # This sets the weights to the pre-trained values dowloaded from Zenodo\n", - "model.eval() # Tell detectron that this model will only run inference" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "HELDrDbTInnO" - }, - "source": [ - "Before putting images through the network, you need to define some preprocessing steps. At training time, you might set up a series of random affine transformations to help guard against overfitting. Since this network is already trained, we just need to resize time images to a standard dimension." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "NTIOGKlQI7nX" - }, - "outputs": [], - "source": [ - "aug = T.ResizeShortestEdge(\n", - " short_edge_length=[cfg.INPUT.MIN_SIZE_TEST], \n", - " max_size=cfg.INPUT.MAX_SIZE_TEST, \n", - " sample_style=\"choice\"\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "vbOymMvZJLH2" - }, - "source": [ - "Finally, we need to set up an extra NMS layer since by default `detectron2` models only do intra-class comparisions between bounding boxes. We need to do another NMS run between classes. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "wAomTUA2O2s2" - }, - "outputs": [], - "source": [ - "post_process_nms = torchvision.ops.nms" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "lnL6nUpqYyzu" - }, - "source": [ - "We'll need to grab a random (or not so random) image to run through the network." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "JRm3aVQwGbab" - }, - "outputs": [], - "source": [ - "from fathomnet.api import boundingboxes, images\n", - "\n", - "# Get a list of all concepts\n", - "all_concepts = boundingboxes.find_concepts()\n", - "\n", - "# Pick one at random, or set one yourself, e.g.:\n", - "# concept = 'Chionoecetes tanneri'\n", - "concept = all_concepts[random.randrange(len(all_concepts))]\n", - "\n", - "# List the images of the concept in FathomNet\n", - "concept_images = images.find_by_concept(concept)\n", - "\n", - "print(f'{len(concept_images)} images of {concept}')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "yxMyTgddZ_Ro" - }, - "source": [ - "Finally, you have everything loaded up to run the image through the model." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "piXtsgAbAwlI" - }, - "outputs": [], - "source": [ - "# Pick a random image\n", - "image = concept_images[random.randrange(len(concept_images))]\n", - "\n", - "# Fetch the image\n", - "im = np.array(Image.open(requests.get(image.url, stream=True).raw))\n", - "\n", - "im_height,im_width,_ = im.shape # Grab the image dimensions\n", - "\n", - "# Use detectron's visualization tool to plot the bounding boxes\n", - "v_inf = Visualizer(\n", - " im,\n", - " metadata=fathomnet_metadata, \n", - " scale=1.0, \n", - " instance_mode=ColorMode.IMAGE\n", - ")\n", - "\n", - "# Transform the image in the desired input shape\n", - "im_transformed = aug.get_transform(im).apply_image(im)\n", - "\n", - "# Actually crank it through the model\n", - "with torch.no_grad():\n", - " im_tensor = torch.as_tensor(im_transformed.astype('float32').transpose(2, 0, 1))\n", - " model_outputs = model([{\n", - " 'image': im_tensor, \n", - " 'height': im_height, \n", - " 'width': im_width\n", - " }])[0]\n", - "\n", - "# Run the second stage NMS to ensure limited interclass overlap\n", - "model_outputs['instances'] = model_outputs['instances'][\n", - " post_process_nms(\n", - " model_outputs['instances'].pred_boxes.tensor, \n", - " model_outputs['instances'].scores, \n", - " NMS_THRESH\n", - " ).to('cpu').tolist()\n", - "]\n", - "\n", - "# Use the visualization tool to plot the bounding boxes on top of the image\n", - "out_inf_raw = v_inf.draw_instance_predictions(model_outputs[\"instances\"].to(\"cpu\"))\n", - "out_pil = Image.fromarray(out_inf_raw.get_image())\n", - "\n", - "# Show it\n", - "display(out_pil)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "id": "L9_G5azl_OXC" - }, - "source": [ - "## That's all, folks!\n", - "\n", - "At this point, you have\n", - "1. Used the FathomNet Python API to pull down and visualize concepts, images, and ancillary data\n", - "2. Downloaded images and bounding boxes locally (all that data is still in the notebook instance, in truth)\n", - "3. Explored the data and visualized the distribution of classes\n", - "4. Run a pre-trained model from the FathomNet model zoo\n", - "\n", - "We hope this notebook has helped you understand the FathomNet Python API. Thanks for attending the workshop! \n", - "\n", - "If you have any feedback or suggestions, please open an issue on the [fathomnet-py issues page](https://github.com/fathomnet/fathomnet-py/issues). We very much appreciate your thoughts." - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "collapsed_sections": [], - "name": "FathomNet Python API Tutorial.ipynb", - "provenance": [] - }, - "kernelspec": { - "display_name": "fathomnet-workshop", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.9" - }, - "vscode": { - "interpreter": { - "hash": "479dcefd6aa122000a9bf2047ad8a14064136050b27ce2aad10422690bbfca90" - } - } - }, - "nbformat": 4, - "nbformat_minor": 0 + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "ifmDW0dirq9D" + }, + "source": [ + "# FathomNet Python API Tutorial\n", + "*So you want to use FathomNet data...*\n", + "\n", + "\"FathomNet" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "QNYBxwg3UsZe" + }, + "source": [ + "## Introduction\n", + "\n", + "> `fathomnet-py` is a client-side API to help scientists, researchers, and developers interact with FathomNet data.\n", + "\n", + "[![tests](https://github.com/fathomnet/fathomnet-py/actions/workflows/tests.yml/badge.svg)](https://github.com/fathomnet/fathomnet-py/actions/workflows/tests.yml)\n", + "[![Documentation Status](https://readthedocs.org/projects/fathomnet-py/badge/?version=latest)](https://fathomnet-py.readthedocs.io/en/latest/?badge=latest)\n", + "\n", + "The [fathomnet-py](https://github.com/fathomnet/fathomnet-py) API offers native Python interaction with the FathomNet REST API, abstracting away the underlying HTTP requests.\n", + "This notebook is designed to walk you through some of the core functionality of the API. \n", + "\n", + "It's split into three parts:\n", + "1. [**API**](#api): API overview and data visualizations\n", + "2. [**Data**](#building-and-exploring-a-dataset-from-fathomnet): Building and exploring a dataset from FathomNet\n", + "3. [**Models**](#inference-with-a-pre-trained-model): Running images from FathomNet through a pre-trained model available on the [FathomNet Model Zoo](https://github.com/fathomnet/models)\n", + "\n", + "This notebook is by no means exhaustive; it serves to show some common \"recipes\" for pulling down and handling FathomNet data in Python. **Full documentation for fathomnet-py is available at [fathomnet-py.readthedocs.io](https://fathomnet-py.readthedocs.io).**\n", + "\n", + "[FathomNet GitHub](https://github.com/fathomnet)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tM6WRbgWWkV1" + }, + "source": [ + "### Installing `fathomnet-py`\n", + "\n", + "To install fathomnet-py, you will need to have Python 3.7 or greater installed first (as of the time of writing, this notebook ships with Python 3.9). Then, from the command-line:\n", + "\n", + "```bash\n", + "pip install fathomnet\n", + "```\n", + "\n", + "This notebook installs fathomnet-py in the [Setup](#setup) section next, along with some relevant packages for data manipulation and visualization." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZkYCmUX6r0su" + }, + "source": [ + "\n", + "## Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-uNeHCauXyvC" + }, + "source": [ + "Note: this notebook assumes you are running from a colab environment. If this is not the case you may have to manually install a few packages that are pre-installed in colab such as numpy and pandas. \n", + "\n", + "First, import the auxiliary modules we need for part 1:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets as widgets # Provides embedded widgets\n", + "import ipyleaflet # Provides map widgets\n", + "import requests # Manages HTTP requests\n", + "import plotly.express as px # Generates nice plots\n", + "import random # Generates pseudo-random numbers\n", + "from PIL import Image, ImageFont, ImageDraw # Facilitates image operations\n", + "from io import BytesIO # Interfaces byte data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we'll install a few packages via pip:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4Q--V_Xrr3js" + }, + "outputs": [], + "source": [ + "!pip install -q -U fathomnet ipyleaflet" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Iazt9ae-sDgm" + }, + "source": [ + "\n", + "## The API\n", + "\n", + "Now that we have fathomnet-py installed, let's see what it can do!\n", + "\n", + "This section will show some of the common calls to pull down FathomNet data, and then we'll render some visualizations of the results." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pkLXdOx-F6Gm" + }, + "source": [ + "### Overview\n", + "\n", + "The two main parts of fathomnet-py are the **modules** and the **data classes**." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PlGSfHJkKKsy" + }, + "source": [ + "#### Modules\n", + "\n", + "fathomnet-py offers a variety of modules that encapsulate their relevant API operations. In brief:\n", + "\n", + "- `boundingboxes` --- find & manage bounding boxes\n", + "- `darwincore` --- list owner institutions\n", + "- `images` --- find & manage images\n", + "- `geoimages` --- query for geo-images (geographic info only of images)\n", + "- `imagesetuploads` --- find & manage image set uploads\n", + "- `regions` --- list marine regions\n", + "- `stats` --- compute summary statistics\n", + "- `tags` --- find & manage custom image tags\n", + "- `taxa` --- get taxonomic information via a taxa provider\n", + "- `users` --- manage user accounts & list contributors\n", + "- `firebase` & `xapikey` -- authenticate for write-level operations\n", + "\n", + "*Note: We will repeatedly import some of these modules in the notebook to highlight what's being used in each step. In your code, you only need to import a module once.*" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dSVuXOeAFYcO" + }, + "source": [ + "Each operation (API call) is represented as a function in its given module. For example, to get an image by its universally-unique identifier (UUID), we can import the `fathomnet.api.images` module and call the `find_by_uuid` function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "AqyGxrO-LSxP" + }, + "outputs": [], + "source": [ + "from fathomnet.api import images\n", + "\n", + "example_image = images.find_by_uuid(\"79958ac5-832a-488c-9b48-cce7db346497\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GCkHPRDcFzJn" + }, + "source": [ + "#### Data classes\n", + "\n", + "To facilitate parsing and saving FathomNet data, native Python dataclasses are provided in the `fathomnet.dto` module." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yGwVcnkqGsXE" + }, + "source": [ + "For example, we can see that the returned image from the `find_by_uuid` call above is of type `AImageDTO`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nneeYpzEG0EV" + }, + "outputs": [], + "source": [ + "type(example_image)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "s1pBTzGQIQVL" + }, + "source": [ + "These native data representations make it easier to write Python programs around FathomNet data. We'll print out some of the fields here." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0IzMAU88IpGy" + }, + "outputs": [], + "source": [ + "print(\"Image URL:\", example_image.url)\n", + "\n", + "print(\"Captured at latitude/longitude\", example_image.latitude, example_image.longitude)\n", + "\n", + "print(\"There are\", len(example_image.boundingBoxes), \"bounding boxes:\")\n", + "for box in example_image.boundingBoxes:\n", + " print(\"-\", box.concept, \"has area\", box.width * box.height)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2nLFG4fNHfhL" + }, + "source": [ + "We can convert (serialize/deserialize) any of the FathomNet dataclasses to/from JSON or Python dictionaries. Let's print out the contents of that example image as JSON." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OvT2OyrcHy_K" + }, + "outputs": [], + "source": [ + "print(example_image.to_json(indent=2))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "em4qC6YUKade" + }, + "source": [ + "### Bar chart of concepts with the most bounding boxes\n", + "\n", + "Here we will use a `boundingboxes` operation, called `count_total_by_concept`, to get a quick count of the total number of bounding boxes for every concept in FathomNet. To visualize, we'll make a bar chart of the top `N`.\n", + "\n", + "⚙ Try changing the value of `N` on the right to show more concepts!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jLcJ0cHaPVoC" + }, + "outputs": [], + "source": [ + "from fathomnet.api import boundingboxes\n", + "\n", + "# Make a bar chart of the top N concepts by bounding boxes\n", + "N = 11 # @param {type:\"slider\", min:5, max:20, step:1}\n", + "\n", + "# Get the number of bounding boxes for all concepts\n", + "concept_counts = boundingboxes.count_total_by_concept()\n", + "\n", + "# Sort by number of bounding boxes\n", + "concept_counts.sort(key=lambda cc: cc.count, reverse=True)\n", + "\n", + "# Get the top N concepts and their counts\n", + "concepts, counts = zip(*((cc.concept, cc.count) for cc in concept_counts[:N]))\n", + "\n", + "# Make a bar chart\n", + "fig = px.bar(\n", + " x=concepts,\n", + " y=counts,\n", + " labels={\"x\": \"Concept\", \"y\": \"Bounding box count\"},\n", + " title=f\"Top {N} concepts\",\n", + " text_auto=True,\n", + ")\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nL-_pukeZcXB" + }, + "source": [ + "### Listing images for a concept\n", + "\n", + "Let's say we want to list all of the available images in FathomNet for a given concept. Here, we'll\n", + "1. List all the available concepts (again, using the `boundingboxes` module)\n", + "2. Pick one\n", + "3. Get a list of images for that concept using the `images` module" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oS585C0JZnqD" + }, + "source": [ + "First, let's list all the available concepts in a choosable box.\n", + "\n", + "We'll call the `find_concepts` function and put the results in a combo box.\n", + "\n", + "⚙ **Pick a concept after running this cell!**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gOc40XMNaItV" + }, + "outputs": [], + "source": [ + "from fathomnet.api import boundingboxes\n", + "\n", + "# Get a list of all concepts that have at least 1 bounding box\n", + "all_concepts = boundingboxes.find_concepts()\n", + "\n", + "# Print how many there are\n", + "print(\"FathomNet has\", len(concept_counts), \"localized concepts!\")\n", + "\n", + "# Pick one!\n", + "concept_combo = widgets.Combobox(\n", + " options=all_concepts,\n", + " description=\"Pick one:\",\n", + " placeholder=\"Double-click or type here\",\n", + " ensure_option=True,\n", + " disabled=False,\n", + ")\n", + "concept_combo" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CJJzxjPYOZQY" + }, + "source": [ + "With our concept selected (if you didn't put anything, it will default to *Chionoecetes tanneri*), we can call the `images` module `find_by_concept` function to get back a list of all images containing a bounding box for that concept." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0NJoDLJ6ueCx" + }, + "outputs": [], + "source": [ + "from fathomnet.api import images\n", + "\n", + "# Get the selected concept\n", + "selected_concept = concept_combo.value or \"Chionoecetes tanneri\"\n", + "\n", + "# List the images FathomNet for that concept\n", + "concept_images = images.find_by_concept(selected_concept)\n", + "\n", + "# Print the total number\n", + "print(\"Found\", len(concept_images), \"images of\", selected_concept)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4BzezsyGPJ0w" + }, + "source": [ + "This next cell will pick a random image, fetch it by its URL, and display it. \n", + "\n", + "⚙ If you want a different image, just re-run this cell." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xNmguhajsqzV" + }, + "outputs": [], + "source": [ + "# Pick a random image\n", + "random_image = concept_images[random.randrange(len(concept_images))]\n", + "\n", + "# Fetch and show the image\n", + "image_data = requests.get(random_image.url).content\n", + "pil_image = Image.open(BytesIO(image_data))\n", + "display(pil_image)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YUOvebwiqHzM" + }, + "source": [ + "Then, we'll loop over each bounding box listed and render it (drawing a box & label for it) on the image." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JTD8_IRhjIjp" + }, + "outputs": [], + "source": [ + "# Concept -> color mapping for bounding boxes\n", + "def color_for_concept(concept: str):\n", + " hash = sum(map(ord, concept)) << 5\n", + " return f\"hsl({hash % 360}, 100%, 85%)\"\n", + "\n", + "\n", + "# Draw the bounding boxes and labels on the image\n", + "draw_image = ImageDraw.Draw(pil_image)\n", + "font = ImageFont.truetype(\n", + " \"/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf\", size=18\n", + ")\n", + "for box in random_image.boundingBoxes:\n", + " color = color_for_concept(box.concept)\n", + " draw_image.rectangle(\n", + " (box.x, box.y, box.x + box.width, box.y + box.height), width=3, outline=color\n", + " )\n", + " draw_image.text((box.x, box.y + box.height), box.concept, fill=color, font=font)\n", + "\n", + "# Show the image with overlay\n", + "display(pil_image)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qKZhbKeXWgHd" + }, + "source": [ + "### Depth histogram\n", + "\n", + "Let's generate a depth histogram; we'll extract the `depthMeters` field from each image (where present) and plot it." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dUhMWpTzv-es" + }, + "outputs": [], + "source": [ + "# Extract the depth (in meters) from each image\n", + "depths = [\n", + " image.depthMeters for image in concept_images if image.depthMeters is not None\n", + "]\n", + "\n", + "# Make a horizontal histogram\n", + "fig = px.histogram(\n", + " y=depths, title=f\"{selected_concept} images by depth\", labels={\"y\": \"depth (m)\"}\n", + ")\n", + "fig[\"layout\"][\"yaxis\"][\"autorange\"] = \"reversed\"\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HrtWydOLWXM0" + }, + "source": [ + "### Geographic heatmap\n", + "\n", + "We can use the `latitude` and `longitude` fields to georeference each image. Here, we're generating a heatmap of the images overlaid on the Esri ocean basemap.\n", + "\n", + "⚙ Zoom and pan around -- although the map is centered on the Monterey Bay, see if you can find where other \"hotspots\" are for your concept." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "AeR_4Ad-wT-t" + }, + "outputs": [], + "source": [ + "# Extract the latitude/longitude from each image\n", + "locations = [\n", + " (image.latitude, image.longitude)\n", + " for image in concept_images\n", + " if image.latitude is not None and image.longitude is not None\n", + "]\n", + "\n", + "# Create a map from the Esri Ocean basemap\n", + "center = (36.807, -121.988) # Monterey Bay\n", + "map = ipyleaflet.Map(\n", + " basemap=ipyleaflet.basemaps.Esri.OceanBasemap, center=center, zoom=10\n", + ")\n", + "map.layout.height = \"800px\"\n", + "\n", + "# Overlay the image locations as a heatmap\n", + "heatmap = ipyleaflet.Heatmap(locations=locations, radius=20, min_opacity=0.5)\n", + "map.add_layer(heatmap)\n", + "\n", + "map" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "p4oqvJXTWq__" + }, + "source": [ + "\n", + "## Building and exploring a dataset from FathomNet\n", + "\n", + "FathomNet was built to support researchers seeking underwater imagery to train and test machine learning models. We'll demonstrate some of that functionality here. We'll build a VOC formatted dataset using the FathomNey Python API. Then we'll download a COCO formatted dataset using the `fathomnet-generate` command line tool and explore the contents programmatically.\n", + "\n", + "There are loads of software tools out there to train and run deep learning models. Later in this tutorial we will use [Detectron2](https://github.com/facebookresearch/detectron2), Facebook Research's machine learning library. This is just one of many options. For example, our MBARI colleague [Danelle Cline](https://www.mbari.org/person/danelle-e-cline/) put together a great notebook demonstrating [how to set up FathomNet data to train a YOLOv5 model](https://docs.mbari.org/deepsea-ai/notebooks/fathomnet_train/). \n", + "\n", + "Before getting started well need to install some packages to this workspace" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "KA_RszjyCHJ1" + }, + "outputs": [], + "source": [ + "!pip install -q -U fathomnet pycocotools pandas plotly" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cIoypIxuR1W1" + }, + "source": [ + "Then import all the needed libraries into the workspace." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7kAT9rIzD-nN" + }, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt # Plotting utilities\n", + "import requests # Manages HTTP requests\n", + "import random # Random number generator\n", + "import numpy as np # Array manipulations\n", + "import pandas as pd # More array manipulations\n", + "import plotly.express as px # Plotting library\n", + "\n", + "# Import coco dataset tools\n", + "from pycocotools.coco import COCO\n", + "\n", + "# Import from pyplot and PIL for easy plotting\n", + "from PIL import Image\n", + "import skimage.io as io" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "### Manually create a VOC dataset\n", + " \n", + "Let's say we want to train an object detector to detect *Gersemia juliepackardae* only. We don't want too much training data, so let's limit our query to just 100 images of *G. juliepackardae*.\n", + "\n", + "We can find this data by specifying a *constraint object* in fathomnet-py, then calling a generalized image querying function. To do this, we need to grab `GeoImageConstraints` from the `fathomnet.dto` module:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from fathomnet.dto import GeoImageConstraints" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, we can make a set of constraints for each bullet point." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gersemia_constraints = GeoImageConstraints(concept=\"Gersemia juliepackardae\", limit=10)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To query for image data according to these constraints, we'll call the `fathomnet.api.images.find` function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from fathomnet.api import images\n", + "\n", + "gersemia_images = images.find(gersemia_constraints)\n", + "print(f\"Gersemia juliepackardae images: {len(gersemia_images)}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In order to get this data ready for training, we still need to do two things:\n", + "1. **Download** the images themselves\n", + "2. **Format** the bounding boxes into something the model can understand\n", + "3. **Structure** the directory according to the [perscribed VOC format](https://detectron2.readthedocs.io/en/latest/tutorials/builtin_datasets.html#expected-dataset-structure-for-pascal-voc)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Download the images\n", + "\n", + "No magic here, we just need to download the images (via HTTP) to somewhere the notebook can find them.\n", + "\n", + "*Note: there are more efficient ways to do this.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "from pathlib import Path\n", + "from progressbar import progressbar\n", + "from io import BytesIO\n", + "\n", + "# Create a directory for the images\n", + "data_dir = Path(\"/content/gersemia_voc\")\n", + "image_dir = data_dir / \"JPEGImages\"\n", + "image_dir.mkdir(exist_ok=True, parents=True)\n", + "\n", + "# Download each image, saving each new file path to a list\n", + "image_paths = []\n", + "for image in progressbar(gersemia_images, redirect_stdout=True):\n", + " # Format our image file name as the image UUID + .jpg\n", + " image_path = image_dir / f\"{image.uuid}.jpg\"\n", + " image_paths.append(image_path)\n", + " if image_path.exists(): # Skip re-downloading images\n", + " continue\n", + "\n", + " # Download the image\n", + " image_raw = requests.get(image.url, stream=True).raw\n", + " pil_image = Image.open(image_raw)\n", + "\n", + " # Convert to RGB (ensures consistent colorspace)\n", + " pil_image = pil_image.convert(\"RGB\")\n", + "\n", + " # Save the image\n", + " pil_image.save(image_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Format the bounding boxes\n", + "\n", + "We need to get the bounding boxes in a format the model can understand. Image data (`AImageDTO`) objects offer a convenience function to generate Pascal VOC annotations from their internal data. We can leverage this to quickly generate XML annotations of the form:\n", + "\n", + "```xml\n", + "\n", + " images\n", + " {image filename}\n", + " /content/drive/MyDrive/fathomnet-workshop-tests/images/{image filename}\n", + " \n", + " FathomNet\n", + " \n", + " \n", + " {image width}\n", + " {image height}\n", + " 3\n", + " \n", + " 0\n", + " \n", + " {concept}[ ({altConcept})]\n", + " Unspecified\n", + " 0\n", + " 0\n", + " 0\n", + " \n", + " {x}\n", + " {x + width}\n", + " {y}\n", + " {y + height}\n", + " \n", + " \n", + " ...\n", + "\n", + "```" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will likewise write up all the annotations in the preferred VOC directory structure. \n", + "\n", + "At the same time, we'll filter out any bounding boxes besides *G. juliepackardae*." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "xml_dir = data_dir / \"Annotations\"\n", + "xml_dir.mkdir(exist_ok=True, parents=True)\n", + "\n", + "for image, image_path in zip(gersemia_images, image_paths):\n", + " xml_path = xml_dir / image_path.with_suffix(\".xml\").name\n", + " image.boundingBoxes = list(\n", + " filter( # filter only Gersemia juliepackardae\n", + " lambda box: box.concept == \"Gersemia juliepackardae\", image.boundingBoxes\n", + " )\n", + " )\n", + " pascal_voc = image.to_pascal_voc(path=str(image_path), pretty_print=True)\n", + " xml_path.write_text(pascal_voc)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is great to do explicitly since it illustrates all the API tools. But you can also use the `fathomnet-generate` command line tool to make the dataset with a single line of code. Do note, however, that the following command will download **all** available images and annotation of the coral. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# ! fathomnet-generate -c \"Gersemia juliepackardae\" --format coco --img-download '/content/gersemia_voc/JPEGImages' --output '/content/gersemia_voc/Annotations'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "QjUISR3bsIZA" + }, + "source": [ + "### Build a COCO dataset\n", + "\n", + "Let's take a look at how we can leverage the Python API to download some images and bounding boxes from FathomNet. We're going to set up a training dataset that can be used for model training either in this notebook or in the Dockerized container we shared earlier. \n", + "\n", + "As we saw before, we can use the `fathomnet.api.images` module to search for images by concept. We can use that same functionality to set up a data set for model training. For an example scroll down to the final section of this notebook where we demonstrate how to use it to set up a VOC dataset.\n", + "\n", + "Here we'll use the handy `fathomnet-generate` command line tool to download FathomNet data and automatically organize it according to the [Microsoft Common Objects in COntext](https://cocodataset.org/#home) (COCO) standards. " + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "CYXNIUQEnazh" + }, + "source": [ + "#### COCO Formatting" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "COCO is a large annotated image dataset containing bounding boxes and segmentation masks for 91 categories in 100s of thousands of images. The bounding box annotations for object detection are organized into a standard format to make it easier to work with. Typically, the annotation files are distributed as json serialized nested dictionaries.\n", + "\n", + "```\n", + "{\n", + " \"info\": info,\n", + " \"images\": [image],\n", + " \"categories\": [category]\n", + " \"annotations\": [annotation]\n", + "}\n", + "```\n", + "\n", + "Each field contains inter-related pieces of information for your model training code to use. `info` is a dictionary that contains some metadata about the entire dataset:\n", + "\n", + "```\n", + "info = {\n", + " \"year\": 2023,\n", + " \"version\": \"0\",\n", + " \"description\": \"Generated by FathomNet\",\n", + " \"contributor\": \"FathomNet\",\n", + " \"url\": \"https://fathomnet.org\",\n", + " \"date_created\": \"2023/02/23\"\n", + "}\n", + "```\n", + "\n", + "The `images` field consists of a list of `image` objects that specify the file name, image dimensions, permanent url, etc. \n", + "\n", + "```\n", + "image = {\n", + " \"id\": 1,\n", + " \"width\": 1920,\n", + " \"height\": 1080,\n", + " \"file_name\": \"754e6a28-a8eb-4cb3-a0b9-3f2d5daacbae.png\",\n", + " \"license\": 0,\n", + " \"flickr_url\": \"https://fathomnet.org/static/m3/staging/Doc%20Ricketts/images/0861/00_12_12_05.png\",\n", + " \"coco_url\": \"https://fathomnet.org/static/m3/staging/Doc%20Ricketts/images/0861/00_12_12_05.png\",\n", + " \"date_captured\": \"2016-06-16 00:00:00\"\n", + "}\n", + "```\n", + "\n", + "The `categories` field is a list of `category` objects organized by numeric ids.\n", + "\n", + "```\n", + "category = {\n", + " \"id\": 2,\n", + " \"name\": \"Actinernus\",\n", + " \"supercategory\": \"\"\n", + "}\n", + "```\n", + "\n", + "Finally, `annotations` is a list of `annotation` objects that bring it all together. \n", + "\n", + "```\n", + "annotation = {\n", + " \"id\": 1,\n", + " \"image_id\": 1,\n", + " \"category_id\": 2,\n", + " \"segmentation\": [],\n", + " \"area\": 51200.0,\n", + " \"bbox\": [\n", + " 200.0,\n", + " 433.0,\n", + " 256.0,\n", + " 200.0\n", + " ],\n", + " \"iscrowd\": 0\n", + "}\n", + "```\n", + "\n", + "Note that the `id` fields are specific to the list of objects. The example `annotation` object tells us that in the image associated with `image_id` 1, there a bounding box located at position `[200, 433]` with a width of `256` pixels and a heigh of `200` pixels. The label associated with that bounding box is `category_id` 2 which corresponds the anemone \"Actinernus\".\n", + "\n", + "Whew, that is a mouthful! Fortunately we can auto populate all those fields with the `fathomnet-generate` command line tool. First, print out the docs. \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! fathomnet-generate " + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As you can see, there are lots of options built into to the tool. Let's start with a simple query to see how it works. We'll execute the command below.\n", + "\n", + "The `-c` flag accepts a comma seperated list of concepts to query for in FathomNet. The `--count` flag tells the tools to just report how many bounding boxes are associated with each concept rather than actually building the dataset. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! fathomnet-generate -c \"Aegina rosea\" --count" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Great, now we know there are about 29 bounding boxes associated with these two concents in FathomNet. This is a reasonable amount of data to download in real time for this workshop. \n", + "\n", + "To download the data we'll need to remove the `--count` flag, tell the scipt we want COCO format, and specify where we want the dataset and images. This should take about five minutes to download all the images and annotations." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! fathomnet-generate -c \"Aegina rosea\" --format coco --img-download 'demo_dataset/images' --output 'demo_dataset'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here the `--format` flag tells the script to build a `coco` dataset. The `--image-download` and `--output` flags specify where all the output should live. When executed, the above line will make a new directory called `demo_dataset`. In the folder will be a COCO formatted dataset called `dataset.json` and a directory of `images` with 29 images with UUID file names.\n", + "\n", + "#### Explore the dataset\n", + "\n", + "We can use the handy `pycocotools` provided by the COCO dataset maintainers to visualize some of the images and annotations. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create a coco dataset object\n", + "dataset = COCO(\"demo_dataset/dataset.json\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Show all the categories names\n", + "cats = dataset.loadCats(dataset.getCatIds())\n", + "nms = [cat[\"name\"] for cat in cats]\n", + "print(\"Categories: \\n{}\\n\".format(\" \\n\".join(nms)))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can also see the raw category objects by just printing the cat object." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cats" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The format conforms to the COCO standards. As a sanity check, we can then use pycocotools to see how many annotations are associated with each category. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cat_ids = dataset.getCatIds(catNms=nms)\n", + "for cid in cat_ids:\n", + " anns = dataset.getAnnIds(catIds=cid)\n", + " print(f\"{cats[cid-1]['name']} has {len(anns)} annotations\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is what we expect based on the output of `fathomnet-generate` when we used the `--count` flag. \n", + "\n", + "Now lets grab and image and look at the bounding box. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Grab all the image ids associated with Aegina citrea\n", + "img_ids = dataset.getImgIds(catIds=cat_ids[0])\n", + "\n", + "# Select one of them at random\n", + "img = dataset.loadImgs(img_ids[np.random.randint(0, len(img_ids))])[0]\n", + "\n", + "# Get the annotations in the image\n", + "anns_ids = dataset.getAnnIds(imgIds=img[\"id\"], catIds=cat_ids[0], iscrowd=None)\n", + "anns = dataset.loadAnns(anns_ids)\n", + "\n", + "# ...and display it\n", + "im = io.imread(f\"demo_dataset/images/{img['file_name']}\")\n", + "plt.axis(\"off\")\n", + "plt.imshow(im)\n", + "dataset.showAnns(anns, draw_bbox=True)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Data exploration \n", + "\n", + "Now we'll pull down a more more complicated data set to do some visualization of what is in the whole dataset. We'll use `fathomnet-generate` to get all the data collected in Monterey Bay in calendar year 2018. Note, the following command will only produce the dataset JSON document, we won't download the actual images to save time. If in the future you want to save the images, append the `--img-download` flag as in the earlier example." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! fathomnet-generate --output 'demo_coco' --format coco \\\n", + " --max-latitude 37.0538 --min-latitude 36.4458 \\\n", + " --max-longitude -121.7805 --min-longitude -122.5073 \\\n", + " --start 2018-01-01 --end 2018-12-31" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The command will generate a new dataset object in `demo_coco/dataset.json`. Let's see what is in it. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create a new coco dataset object\n", + "dataset = COCO(\"demo_coco/dataset.json\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Look at some high level information.\n", + "print(f\"Number of images = {len(dataset.getImgIds())}\")\n", + "print(f\"Number of annotations = {len(dataset.getAnnIds())}\")\n", + "print(f\"Number of categories = {len(dataset.getCatIds())}\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "So now we have a more complicated dataset. 154 categories with 1554 annotations. We can check what the most abundant category is." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "all_anns = dataset.loadAnns(dataset.getAnnIds())\n", + "all_anns = pd.DataFrame(\n", + " all_anns\n", + ") # Make it into a pandas data frame for easy manipulation\n", + "\n", + "# See what the most abundant category is\n", + "cid = all_anns[\"category_id\"].value_counts().idxmax()\n", + "print(f\"Most abundant category id = {cid}\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "So that tells us what the most abundant category id is. But what organism is that? " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset.loadCats(ids=[cid])[0][\"name\"]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Very cool. But I'm a programmer not a marine biologist. What the heck is that thing?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get all images that contain at least one annotation with that category id\n", + "img_ids = dataset.getImgIds(catIds=cid)\n", + "\n", + "# Select one of them at random\n", + "img = dataset.loadImgs(img_ids[np.random.randint(0, len(img_ids))])[0]\n", + "\n", + "# Get the annotations in the image\n", + "anns_ids = dataset.getAnnIds(imgIds=img[\"id\"], catIds=cid, iscrowd=None)\n", + "anns = dataset.loadAnns(anns_ids)\n", + "\n", + "# ...and display it\n", + "im = io.imread(img[\"coco_url\"]) # this downloads the image\n", + "plt.axis(\"off\")\n", + "plt.imshow(im)\n", + "dataset.showAnns(anns, draw_bbox=True)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It looks like we got a big ol' coral for our most abundant critter. \n", + "\n", + "This tells us some useful stuff about the most abundant data, but how can we get a sense of the species distribution in this dataset?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Add a column to our dataframe that has all the plain language names of the category ids\n", + "all_anns[\"name\"] = all_anns[\"category_id\"].apply(\n", + " lambda xx: dataset.loadCats(ids=[xx])[0][\"name\"]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Plot the distribution of the counts\n", + "fig = px.histogram(\n", + " all_anns,\n", + " x=\"name\",\n", + " title=\"Concept counts\",\n", + " labels={\"name\": \"Concept\", \"count\": \"Number of annotations\"},\n", + ")\n", + "\n", + "fig.show()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "That is pretty illegible. What happens if we just look at the 20 most abundant organisms?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Plot the 20 most abundant\n", + "top_anns = all_anns[\"name\"].value_counts().head(20)\n", + "\n", + "fig = px.histogram(\n", + " top_anns,\n", + " x=top_anns.index,\n", + " y=top_anns.values,\n", + " title=\"Top 20 concepts\",\n", + " labels={\"x\": \"Concept\", \"y\": \"Number of annotations\"},\n", + ")\n", + "fig.show()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Much better. But together these two plots tells us some important things; namely, that we are dealing with a long tailed distribution. That might impact how we do our model training. \n", + "\n", + "There are all sorts of other metadata manipulations we can do to explore the structure of our data. This type of exercise is important to ensure that you know what is going into your model before training. That will help you both produce a better model and more efficently diagnose errors. " + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Model training\n", + "\n", + "In the interest of time, we will not train a model in this workshop session. We have set up a [seperate notebook](https://github.com/fathomnet/fathomnet-py/blob/detectron2-train-demo/train_demo.ipynb) that illustrates how to get COCO formatted dataset with `fathomnet-generate` and train a RetinaNet mode using `detectron2`. We hope you'll find it helpful!" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "jrTNntfuGRLQ" + }, + "source": [ + "### Inference with a pre-trained model\n", + "\n", + "A big feature of FathomNet is the *ModelZoo*, a repository for users to share their models with the community. For the moment, [we are advising users](https://medium.com/fathomnet/how-to-upload-your-ml-model-to-fathomnet-68b933dd55bd) to upload their models on Zenodo to generate a DOI and then share them on our GitHub page. We have provided a number of our models as a starting point. \n", + "\n", + "First install some additional packages. You may see a red ERROR message in this cell -- you can disregard it. **This will take a couple minutes.** Grab a coffee! " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install pyyaml==5.4.1 'git+https://github.com/facebookresearch/detectron2.git'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "⚠ Detectron doesn't play nice with some installed package versions; you may see a mesasge asking you to restart the runtime. Press that button, or run this cell:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " import detectron2 # noqa: F401\n", + "except ImportError:\n", + " print(\"Restarting runtime...\")\n", + " exit()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now grab all the packages we need." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import torchvision # Library of datasets, models, and image transforms\n", + "import matplotlib.pyplot as plt # Plotting utilities\n", + "import torch # Tensor library for manipulating large models and data\n", + "import requests # Manages HTTP requests\n", + "import random # Random number generator\n", + "import numpy as np # Array manipulations\n", + "\n", + "# Import key functions & modules from detectron2\n", + "from detectron2 import model_zoo\n", + "from detectron2.data import Metadata\n", + "from detectron2.utils.visualizer import Visualizer\n", + "from detectron2.config import get_cfg\n", + "from detectron2.utils.visualizer import ColorMode\n", + "from detectron2.modeling import build_model\n", + "from detectron2.checkpoint import DetectionCheckpointer\n", + "import detectron2.data.transforms as T\n", + "\n", + "# Import from pyplot and PIL for easy plotting\n", + "from PIL import Image" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YhJe6TvnsYc9" + }, + "source": [ + "#### Download a model from the FathomNet model zoo\n", + "For this section of the workshop, we'll download the [MBARI Benthic Supercategory Detector](https://zenodo.org/record/5571043). This Retinanet model was fine tuned with FathomNet data from a version originally trained on COCO images. To train this system, we grouped many of our fine grained classes together into 20 'supercategories' that hopefully encode some generally morphological informatoin about the group. All the training data was drawn from MBARI imagery collected in Monterey Bay. \n", + "\n", + "We will run `wget` to actually do the download. This command will let Colab download resources from a URL. We will start by getting the weights from the repository on Zenodo.\n", + "\n", + "First, let's download the model weights. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dg41P_T_IFQx" + }, + "outputs": [], + "source": [ + "!wget -nc https://zenodo.org/record/5571043/files/model_final.pth " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SZB9LkTkOjiX" + }, + "source": [ + "Now we'll grab the model file that declares the structure of the model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7lVKx77wO944" + }, + "outputs": [], + "source": [ + "!wget -nc https://zenodo.org/record/5571043/files/fathomnet_config_v2_1280.yaml" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zRsLNBqlsdEy" + }, + "source": [ + "#### Run inference\n", + "We can actually run images through our network now that we have the model architecture and the weights from training. Before we run anything we will need to load the model into memory and set several parameters that will dictate what we see in the output. \n", + "\n", + "First set the paths so the `detectron2` toolbox will know where to look for your files." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VnS-jK4MWu2m" + }, + "outputs": [], + "source": [ + "CONFIG_FILE = \"fathomnet_config_v2_1280.yaml\" # training configuration file\n", + "WEIGHT_FILE = \"model_final.pth\" # fathomnet model weights" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "x4jJAAAyb36c" + }, + "source": [ + "Now set Non-Maximal Suppresion (NMS) and Score thresholds. These parameters dictate which of the proposed regions the algorithm displays. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "02DDZCQ-dmyz" + }, + "outputs": [], + "source": [ + "NMS_THRESH = 0.45 # Set an NMS threshold to filter all the boxes proposed by the model\n", + "SCORE_THRESH = (\n", + " 0.3 # Set the model score threshold to suppress low confidence annotations\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pdrEGCt3d5pI" + }, + "source": [ + "You have to explicitly tell the model what the names of the classes are. The system outputs a number, not a label. You can think of this as a look-up table." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_i-EmxtXeEIB" + }, + "outputs": [], + "source": [ + "fathomnet_metadata = Metadata(\n", + " name=\"fathomnet_val\",\n", + " thing_classes=[\n", + " \"Anemone\",\n", + " \"Fish\",\n", + " \"Eel\",\n", + " \"Gastropod\",\n", + " \"Sea star\",\n", + " \"Feather star\",\n", + " \"Sea cucumber\",\n", + " \"Urchin\",\n", + " \"Glass sponge\",\n", + " \"Sea fan\",\n", + " \"Soft coral\",\n", + " \"Sea pen\",\n", + " \"Stony coral\",\n", + " \"Ray\",\n", + " \"Crab\",\n", + " \"Shrimp\",\n", + " \"Squat lobster\",\n", + " \"Flatfish\",\n", + " \"Sea spider\",\n", + " \"Worm\",\n", + " ],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3KA9vCCweOZH" + }, + "source": [ + "With all the parameters and file paths set up, you can now point Detectron to the configurations using the `get_cfg()` function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nC50iab9eIne" + }, + "outputs": [], + "source": [ + "cfg = get_cfg()\n", + "cfg.merge_from_file(\n", + " model_zoo.get_config_file(\"COCO-Detection/retinanet_R_50_FPN_3x.yaml\")\n", + ")\n", + "cfg.merge_from_file(CONFIG_FILE)\n", + "cfg.MODEL.RETINANET.SCORE_THRESH_TEST = SCORE_THRESH\n", + "cfg.MODEL.WEIGHTS = WEIGHT_FILE" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Akq8aB8ieik3" + }, + "source": [ + "Load in all the model weights and set the thresholds. This actually instantiates the model in your workspace. The `model` object is what will ingest the images and return outputs for us to look at. \n", + "\n", + "⚠ *If this cell returns a* `RuntimeError: No CUDA GPUs are available` *you will need to update your settings. Click the Runtime dropdown menu, select \"Change runtime type\" and select GPU in the \"Hardware accelarator\" box. You will then need to rerun the detectron2 install via pip.* " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VrtudCKeefcM" + }, + "outputs": [], + "source": [ + "model = build_model(cfg) # returns a torch.nn.Module\n", + "checkpointer = DetectionCheckpointer(model)\n", + "checkpointer.load(\n", + " cfg.MODEL.WEIGHTS\n", + ") # This sets the weights to the pre-trained values dowloaded from Zenodo\n", + "model.eval() # Tell detectron that this model will only run inference" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HELDrDbTInnO" + }, + "source": [ + "Before putting images through the network, you need to define some preprocessing steps. At training time, you might set up a series of random affine transformations to help guard against overfitting. Since this network is already trained, we just need to resize time images to a standard dimension." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NTIOGKlQI7nX" + }, + "outputs": [], + "source": [ + "aug = T.ResizeShortestEdge(\n", + " short_edge_length=[cfg.INPUT.MIN_SIZE_TEST],\n", + " max_size=cfg.INPUT.MAX_SIZE_TEST,\n", + " sample_style=\"choice\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vbOymMvZJLH2" + }, + "source": [ + "Finally, we need to set up an extra NMS layer since by default `detectron2` models only do intra-class comparisions between bounding boxes. We need to do another NMS run between classes. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "wAomTUA2O2s2" + }, + "outputs": [], + "source": [ + "post_process_nms = torchvision.ops.nms" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lnL6nUpqYyzu" + }, + "source": [ + "We'll need to grab a random (or not so random) image to run through the network." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JRm3aVQwGbab" + }, + "outputs": [], + "source": [ + "from fathomnet.api import boundingboxes, images\n", + "\n", + "# Get a list of all concepts\n", + "all_concepts = boundingboxes.find_concepts()\n", + "\n", + "# Pick one at random, or set one yourself, e.g.:\n", + "# concept = 'Chionoecetes tanneri'\n", + "concept = all_concepts[random.randrange(len(all_concepts))]\n", + "\n", + "# List the images of the concept in FathomNet\n", + "concept_images = images.find_by_concept(concept)\n", + "\n", + "print(f\"{len(concept_images)} images of {concept}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yxMyTgddZ_Ro" + }, + "source": [ + "Finally, you have everything loaded up to run the image through the model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "piXtsgAbAwlI" + }, + "outputs": [], + "source": [ + "# Pick a random image\n", + "image = concept_images[random.randrange(len(concept_images))]\n", + "\n", + "# Fetch the image\n", + "im = np.array(Image.open(requests.get(image.url, stream=True).raw))\n", + "\n", + "im_height, im_width, _ = im.shape # Grab the image dimensions\n", + "\n", + "# Use detectron's visualization tool to plot the bounding boxes\n", + "v_inf = Visualizer(\n", + " im, metadata=fathomnet_metadata, scale=1.0, instance_mode=ColorMode.IMAGE\n", + ")\n", + "\n", + "# Transform the image in the desired input shape\n", + "im_transformed = aug.get_transform(im).apply_image(im)\n", + "\n", + "# Actually crank it through the model\n", + "with torch.no_grad():\n", + " im_tensor = torch.as_tensor(im_transformed.astype(\"float32\").transpose(2, 0, 1))\n", + " model_outputs = model(\n", + " [{\"image\": im_tensor, \"height\": im_height, \"width\": im_width}]\n", + " )[0]\n", + "\n", + "# Run the second stage NMS to ensure limited interclass overlap\n", + "model_outputs[\"instances\"] = model_outputs[\"instances\"][\n", + " post_process_nms(\n", + " model_outputs[\"instances\"].pred_boxes.tensor,\n", + " model_outputs[\"instances\"].scores,\n", + " NMS_THRESH,\n", + " )\n", + " .to(\"cpu\")\n", + " .tolist()\n", + "]\n", + "\n", + "# Use the visualization tool to plot the bounding boxes on top of the image\n", + "out_inf_raw = v_inf.draw_instance_predictions(model_outputs[\"instances\"].to(\"cpu\"))\n", + "out_pil = Image.fromarray(out_inf_raw.get_image())\n", + "\n", + "# Show it\n", + "display(out_pil)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "L9_G5azl_OXC" + }, + "source": [ + "## That's all, folks!\n", + "\n", + "At this point, you have\n", + "1. Used the FathomNet Python API to pull down and visualize concepts, images, and ancillary data\n", + "2. Downloaded images and bounding boxes locally (all that data is still in the notebook instance, in truth)\n", + "3. Explored the data and visualized the distribution of classes\n", + "4. Run a pre-trained model from the FathomNet model zoo\n", + "\n", + "We hope this notebook has helped you understand the FathomNet Python API. Thanks for attending the workshop! \n", + "\n", + "If you have any feedback or suggestions, please open an issue on the [fathomnet-py issues page](https://github.com/fathomnet/fathomnet-py/issues). We very much appreciate your thoughts." + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "FathomNet Python API Tutorial.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "fathomnet-workshop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + }, + "vscode": { + "interpreter": { + "hash": "479dcefd6aa122000a9bf2047ad8a14064136050b27ce2aad10422690bbfca90" + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 } diff --git a/src/fathomnet/dto.py b/src/fathomnet/dto.py index 3a1175b..9a7d708 100644 --- a/src/fathomnet/dto.py +++ b/src/fathomnet/dto.py @@ -99,7 +99,7 @@ def to_pascal_voc( E.source(E.database("FathomNet")), E.size(E.width(str(self.width)), E.height(str(self.height)), E.depth("3")), E.segmented("0"), - *objects + *objects, ) return etree.tostring(annotation, pretty_print=pretty_print).decode()