Skip to content

Commit

Permalink
2nd figure for weecology
Browse files Browse the repository at this point in the history
  • Loading branch information
bw4sz committed Oct 14, 2024
1 parent 60e7554 commit 26f139b
Show file tree
Hide file tree
Showing 8 changed files with 557 additions and 74 deletions.
22 changes: 22 additions & 0 deletions data_prep/Araujo_2020.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from deepforest import main
from deepforest.utilities import read_file
from deepforest.preprocess import split_raster
import os
import geopandas as gpd
import pandas as pd

gdf = gpd.read_file("/orange/ewhite/DeepForest/Araujo_2020/crown_delineation_shapefile.shp")
gdf = gdf[gdf.geometry.type=="Polygon"]
gdf["image_path"] = "Orthomosaic_WGS84_UTM20S.tif"
gdf["label"] = "Tree"
gdf["source"] = "Araujo et al. 2020"
df = read_file(gdf, root_dir="/orange/ewhite/DeepForest/Araujo_2020/")
df = df[["geometry", "image_path", "label", "source"]]
df["polygon"] = df.geometry.apply(lambda x: x.wkt)
df.drop(columns=["geometry"], inplace=True)
df = pd.DataFrame(df)
split_files = split_raster(df, path_to_raster="/orange/ewhite/DeepForest/Araujo_2020/Orthomosaic_WGS84_UTM20S.tif", root_dir="/orange/ewhite/DeepForest/Araujo_2020/",
base_dir="/orange/ewhite/DeepForest/Araujo_2020/crops/", patch_size=1500, patch_overlap=0)

split_files["image_path"] = split_files["image_path"].apply(lambda x: os.path.join("/orange/ewhite/DeepForest/Araujo_2020/crops/", x))
split_files.to_csv("/orange/ewhite/DeepForest/Araujo_2020/annotations.csv")
78 changes: 37 additions & 41 deletions data_prep/NeonBenchmark.py
Original file line number Diff line number Diff line change
@@ -1,46 +1,42 @@
import glob
import os
import pandas as pd
import shutil
import geopandas as gpd
from deepforest.utilities import read_file

## Train annotations ##
BASE_PATH = "/orange/ewhite/b.weinstein/NeonTreeEvaluation/hand_annotations/"
#convert hand annotations from xml into retinanet format
xmls = glob.glob(BASE_PATH + "*.xml")
annotation_list = []
for xml in xmls:
#check if it is in the directory
image_name = "{}.tif".format(os.path.splitext(os.path.basename(xml))[0])
if os.path.exists(os.path.join(BASE_PATH, image_name)):
print(xml)
annotation = read_file(xml)
annotation_list.append(annotation)

#Collect hand annotations
annotations = pd.concat(annotation_list, ignore_index=True)

#collect shapefile annotations
shps = glob.glob(BASE_PATH + "*.shp")
shps_tifs = glob.glob(BASE_PATH + "*.tif")
shp_results = []
for shp in shps:
print(shp)
rgb = "{}.tif".format(os.path.splitext(shp)[0])
gdf = gpd.read_file(shp)
gdf["label"] = "Tree"
gdf["image_path"] = os.path.join(BASE_PATH, rgb)
shp_df = read_file(gdf, root_dir=BASE_PATH)
shp_df = pd.DataFrame(shp_df)
shp_results.append(shp_df)

shp_results = pd.concat(shp_results, ignore_index=True)
annotations = pd.concat([annotations, shp_results])

#Ensure column order
annotations["source"] = "Weecology_University_Florida"
annotations["label"] = "Tree"
annotations["image_path"] = annotations.image_path.apply(lambda x: os.path.join("/orange/ewhite/DeepForest/NEON_benchmark/images/", x))

annotations.to_csv("/orange/ewhite/DeepForest/NEON_benchmark/University_of_Florida.csv")
# Define the base path
BASE_PATH = "/orange/ewhite/b.weinstein/NeonTreeEvaluation/hand_annotations/crops"

# Load all CSV files in the specified directory
csv_files = glob.glob(os.path.join(BASE_PATH, "*.csv"))
csv_list = []

for csv_file in csv_files:
print(csv_file)
df = read_file(csv_file)
df["image_path"] = df["image_path"].apply(lambda x: os.path.join(BASE_PATH, x))
df["source"] = "Weecology_University_Florida"
df["label"] = "Tree"
csv_list.append(df)

# Concatenate all CSV dataframes
annotations = pd.concat(csv_list, ignore_index=True)

# Save the combined annotations to a CSV file
output_path = "/orange/ewhite/DeepForest/NEON_benchmark/University_of_Florida.csv"

# Save the combined annotations to a CSV file
annotations.to_csv(output_path, index=False)

# Load the existing annotations file
existing_annotations_path = "/orange/ewhite/DeepForest/NEON_benchmark/NeonTreeEvaluation_annotations.csv"
existing_annotations = pd.read_csv(existing_annotations_path)

# Check for overlapping data based on a common column, e.g., 'image_path'
overlapping_data = pd.merge(annotations, existing_annotations, on='image_path', how='inner')

# Print the overlapping data
print("Overlapping data:")
print(overlapping_data)
annotations.to_csv(output_path, index=False)


6 changes: 4 additions & 2 deletions data_prep/collect_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@

TreePoints = [
"/orange/ewhite/DeepForest/TreeFormer/all_images/annotations.csv",
"/orange/ewhite/DeepForest/Ventura_2022/urban-tree-detection-data/images/annotations.csv"]
"/orange/ewhite/DeepForest/Ventura_2022/urban-tree-detection-data/images/annotations.csv",
"/orange/ewhite/MillionTrees/NEON_points/annotations.csv"]

TreePolygons = [
"/orange/ewhite/DeepForest/Jansen_2023/pngs/annotations.csv",
Expand All @@ -31,7 +32,8 @@
"/orange/ewhite/DeepForest/Wagner_Australia/annotations.csv",
"/orange/ewhite/DeepForest/Alejandro_Chile/alejandro/annotations.csv",
"/orange/ewhite/DeepForest/UrbanLondon/annotations.csv",
"/orange/ewhite/DeepForest/OliveTrees_spain/Dataset_RGB/annotations.csv"
"/orange/ewhite/DeepForest/OliveTrees_spain/Dataset_RGB/annotations.csv",
"/orange/ewhite/DeepForest/Araujo_2020/annotations.csv"
]

# Current errors
Expand Down
4 changes: 4 additions & 0 deletions docs/datasets.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ ISPRS Journal of Photogrammetry and Remote Sensing, Volume 206, 2023

**Location** [NEON sites](https://www.neonscience.org/field-sites/explore-field-sites) within the United States

An extension of this published resource was made by the Weecology Lab at the University of Florida

![sample_image](public/Weecology_University_Florida.png)

### World Resources Institute

NAIP Imagery from across the United States
Expand Down
11 changes: 11 additions & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,17 @@ The MillionTrees seeks to collect a million tree locations to create a global be
:alt: Image Placeholder
:width: 50%

Current Status
--------------

There are 3 datasets available for the MillionTrees benchmark:

* TreeBoxes: A dataset of 282,288 tree crowns from 9 sources.

* TreePolygons: A dataset of 362,751 tree crowns from 8 sources.

* TreePoints: A dataset of 191,614 tree stems from 2 sources.

Contact
-------

Expand Down
Binary file added docs/public/Weecology_University_Florida.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
47 changes: 16 additions & 31 deletions examples/Datasets.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 1,
"metadata": {},
"outputs": [
{
Expand All @@ -21,6 +21,12 @@
}
],
"source": [
"import os\n",
"import sys\n",
"\n",
"if os.path.basename(os.getcwd()) == 'examples':\n",
" sys.path.append(\"../\")\n",
" \n",
"import milliontrees\n",
"from torchvision import transforms\n",
"\n",
Expand Down Expand Up @@ -49,26 +55,9 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"metadata": {},
"outputs": [
{
"ename": "KeyError",
"evalue": "\"None of [Index(['xmin', 'ymin', 'xmax', 'ymax'], dtype='object')] are in the [columns]\"",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[3], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Load the box dataset\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmilliontrees\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m get_dataset\n\u001b[0;32m----> 3\u001b[0m dataset \u001b[38;5;241m=\u001b[39m \u001b[43mget_dataset\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mTreeBoxes\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mroot_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/orange/ewhite/DeepForest/MillionTrees/\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/MillionTrees/milliontrees/get_dataset.py:47\u001b[0m, in \u001b[0;36mget_dataset\u001b[0;34m(dataset, version, unlabeled, **dataset_kwargs)\u001b[0m\n\u001b[1;32m 45\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 46\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmilliontrees\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdatasets\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mTreeBoxes\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m TreeBoxesDataset \u001b[38;5;66;03m# type:ignore\u001b[39;00m\n\u001b[0;32m---> 47\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mTreeBoxesDataset\u001b[49m\u001b[43m(\u001b[49m\u001b[43mversion\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mversion\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mdataset_kwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/MillionTrees/milliontrees/datasets/TreeBoxes.py:87\u001b[0m, in \u001b[0;36mTreeBoxesDataset.__init__\u001b[0;34m(self, version, root_dir, download, split_scheme)\u001b[0m\n\u001b[1;32m 84\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_input_array \u001b[38;5;241m=\u001b[39m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mfilename\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mvalues\n\u001b[1;32m 86\u001b[0m \u001b[38;5;66;03m# Box labels\u001b[39;00m\n\u001b[0;32m---> 87\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_y_array \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mtensor(\u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mxmin\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mymin\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mxmax\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mymax\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241m.\u001b[39mvalues\u001b[38;5;241m.\u001b[39mastype(\u001b[38;5;28mfloat\u001b[39m))\n\u001b[1;32m 89\u001b[0m \u001b[38;5;66;03m# Labels -> just 'Tree'\u001b[39;00m\n\u001b[1;32m 90\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_n_classes \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n",
"File \u001b[0;32m/orange/ewhite/b.weinstein/miniconda3/envs/MillionTrees/lib/python3.10/site-packages/pandas/core/frame.py:3899\u001b[0m, in \u001b[0;36mDataFrame.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3897\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_iterator(key):\n\u001b[1;32m 3898\u001b[0m key \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(key)\n\u001b[0;32m-> 3899\u001b[0m indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_indexer_strict\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcolumns\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m[\u001b[38;5;241m1\u001b[39m]\n\u001b[1;32m 3901\u001b[0m \u001b[38;5;66;03m# take() does not accept boolean indexers\u001b[39;00m\n\u001b[1;32m 3902\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(indexer, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdtype\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mbool\u001b[39m:\n",
"File \u001b[0;32m/orange/ewhite/b.weinstein/miniconda3/envs/MillionTrees/lib/python3.10/site-packages/pandas/core/indexes/base.py:6114\u001b[0m, in \u001b[0;36mIndex._get_indexer_strict\u001b[0;34m(self, key, axis_name)\u001b[0m\n\u001b[1;32m 6111\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 6112\u001b[0m keyarr, indexer, new_indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_reindex_non_unique(keyarr)\n\u001b[0;32m-> 6114\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_raise_if_missing\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkeyarr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindexer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis_name\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6116\u001b[0m keyarr \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtake(indexer)\n\u001b[1;32m 6117\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(key, Index):\n\u001b[1;32m 6118\u001b[0m \u001b[38;5;66;03m# GH 42790 - Preserve name from an Index\u001b[39;00m\n",
"File \u001b[0;32m/orange/ewhite/b.weinstein/miniconda3/envs/MillionTrees/lib/python3.10/site-packages/pandas/core/indexes/base.py:6175\u001b[0m, in \u001b[0;36mIndex._raise_if_missing\u001b[0;34m(self, key, indexer, axis_name)\u001b[0m\n\u001b[1;32m 6173\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m use_interval_msg:\n\u001b[1;32m 6174\u001b[0m key \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(key)\n\u001b[0;32m-> 6175\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNone of [\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mkey\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m] are in the [\u001b[39m\u001b[38;5;132;01m{\u001b[39;00maxis_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m]\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 6177\u001b[0m not_found \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(ensure_index(key)[missing_mask\u001b[38;5;241m.\u001b[39mnonzero()[\u001b[38;5;241m0\u001b[39m]]\u001b[38;5;241m.\u001b[39munique())\n\u001b[1;32m 6178\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mnot_found\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m not in index\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
"\u001b[0;31mKeyError\u001b[0m: \"None of [Index(['xmin', 'ymin', 'xmax', 'ymax'], dtype='object')] are in the [columns]\""
]
}
],
"outputs": [],
"source": [
"# Load the box dataset\n",
"from milliontrees import get_dataset\n",
Expand All @@ -84,23 +73,19 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 6,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'dataset' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[2], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m train_data \u001b[38;5;241m=\u001b[39m \u001b[43mdataset\u001b[49m\u001b[38;5;241m.\u001b[39mget_subset(\n\u001b[1;32m 2\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtrain\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 3\u001b[0m transform\u001b[38;5;241m=\u001b[39mtransforms\u001b[38;5;241m.\u001b[39mCompose(\n\u001b[1;32m 4\u001b[0m [transforms\u001b[38;5;241m.\u001b[39mResize((\u001b[38;5;241m448\u001b[39m, \u001b[38;5;241m448\u001b[39m)), transforms\u001b[38;5;241m.\u001b[39mToTensor()]\n\u001b[1;32m 5\u001b[0m ),\n\u001b[1;32m 6\u001b[0m )\n",
"\u001b[0;31mNameError\u001b[0m: name 'dataset' is not defined"
"name": "stdout",
"output_type": "stream",
"text": [
"official\n"
]
}
],
"source": [
"dataset.list_subsets()\n",
"print(dataset.split_scheme)\n",
"train_data = dataset.get_subset(\n",
" \"train\",\n",
" transform=transforms.Compose(\n",
Expand Down Expand Up @@ -133,7 +118,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.1.undefined"
"version": "3.10.13"
}
},
"nbformat": 4,
Expand Down
Loading

0 comments on commit 26f139b

Please sign in to comment.