2nd figure for weecology

weecology · Oct 14, 2024 · 26f139b · 26f139b
1 parent 60e7554
commit 26f139b
Show file tree

Hide file tree

Showing 8 changed files with 557 additions and 74 deletions.
diff --git a/data_prep/Araujo_2020.py b/data_prep/Araujo_2020.py
@@ -0,0 +1,22 @@
+from deepforest import main
+from deepforest.utilities import read_file
+from deepforest.preprocess import split_raster
+import os
+import geopandas as gpd
+import pandas as pd
+
+gdf = gpd.read_file("/orange/ewhite/DeepForest/Araujo_2020/crown_delineation_shapefile.shp")
+gdf =  gdf[gdf.geometry.type=="Polygon"]
+gdf["image_path"] = "Orthomosaic_WGS84_UTM20S.tif"
+gdf["label"] = "Tree"
+gdf["source"] = "Araujo et al. 2020"
+df = read_file(gdf, root_dir="/orange/ewhite/DeepForest/Araujo_2020/")
+df = df[["geometry", "image_path", "label", "source"]]
+df["polygon"] = df.geometry.apply(lambda x: x.wkt)
+df.drop(columns=["geometry"], inplace=True)
+df = pd.DataFrame(df)
+split_files = split_raster(df, path_to_raster="/orange/ewhite/DeepForest/Araujo_2020/Orthomosaic_WGS84_UTM20S.tif", root_dir="/orange/ewhite/DeepForest/Araujo_2020/",
+                           base_dir="/orange/ewhite/DeepForest/Araujo_2020/crops/", patch_size=1500, patch_overlap=0)
+
+split_files["image_path"] = split_files["image_path"].apply(lambda x: os.path.join("/orange/ewhite/DeepForest/Araujo_2020/crops/", x))
+split_files.to_csv("/orange/ewhite/DeepForest/Araujo_2020/annotations.csv")
diff --git a/data_prep/NeonBenchmark.py b/data_prep/NeonBenchmark.py
@@ -1,46 +1,42 @@
 import glob
 import os
 import pandas as pd
-import shutil
-import geopandas as gpd
 from deepforest.utilities import read_file
 
-## Train annotations ##
-BASE_PATH = "/orange/ewhite/b.weinstein/NeonTreeEvaluation/hand_annotations/"
-#convert hand annotations from xml into retinanet format
-xmls = glob.glob(BASE_PATH + "*.xml")
-annotation_list = []
-for xml in xmls:
-    #check if it is in the directory
-    image_name = "{}.tif".format(os.path.splitext(os.path.basename(xml))[0])
-    if os.path.exists(os.path.join(BASE_PATH, image_name)):
-        print(xml)
-        annotation = read_file(xml)
-        annotation_list.append(annotation)
-
-#Collect hand annotations
-annotations = pd.concat(annotation_list, ignore_index=True)      
-
-#collect shapefile annotations
-shps = glob.glob(BASE_PATH + "*.shp")
-shps_tifs = glob.glob(BASE_PATH + "*.tif")
-shp_results = []
-for shp in shps: 
-    print(shp)
-    rgb = "{}.tif".format(os.path.splitext(shp)[0])
-    gdf = gpd.read_file(shp)
-    gdf["label"] = "Tree"
-    gdf["image_path"] = os.path.join(BASE_PATH, rgb)
-    shp_df = read_file(gdf, root_dir=BASE_PATH)
-    shp_df = pd.DataFrame(shp_df)        
-    shp_results.append(shp_df)
-
-shp_results = pd.concat(shp_results, ignore_index=True)
-annotations = pd.concat([annotations, shp_results])
-
-#Ensure column order
-annotations["source"] = "Weecology_University_Florida"
-annotations["label"] = "Tree"
-annotations["image_path"] = annotations.image_path.apply(lambda x: os.path.join("/orange/ewhite/DeepForest/NEON_benchmark/images/", x))
-
-annotations.to_csv("/orange/ewhite/DeepForest/NEON_benchmark/University_of_Florida.csv")
+# Define the base path
+BASE_PATH = "/orange/ewhite/b.weinstein/NeonTreeEvaluation/hand_annotations/crops"
+
+# Load all CSV files in the specified directory
+csv_files = glob.glob(os.path.join(BASE_PATH, "*.csv"))
+csv_list = []
+
+for csv_file in csv_files:
+    print(csv_file)
+    df = read_file(csv_file)
+    df["image_path"] = df["image_path"].apply(lambda x: os.path.join(BASE_PATH, x))
+    df["source"] = "Weecology_University_Florida"
+    df["label"] = "Tree"
+    csv_list.append(df)
+
+# Concatenate all CSV dataframes
+annotations = pd.concat(csv_list, ignore_index=True)
+
+# Save the combined annotations to a CSV file
+output_path = "/orange/ewhite/DeepForest/NEON_benchmark/University_of_Florida.csv"
+
+# Save the combined annotations to a CSV file
+annotations.to_csv(output_path, index=False)
+
+# Load the existing annotations file
+existing_annotations_path = "/orange/ewhite/DeepForest/NEON_benchmark/NeonTreeEvaluation_annotations.csv"
+existing_annotations = pd.read_csv(existing_annotations_path)
+
+# Check for overlapping data based on a common column, e.g., 'image_path'
+overlapping_data = pd.merge(annotations, existing_annotations, on='image_path', how='inner')
+
+# Print the overlapping data
+print("Overlapping data:")
+print(overlapping_data)
+annotations.to_csv(output_path, index=False)
+
+
diff --git a/data_prep/collect_tasks.py b/data_prep/collect_tasks.py
@@ -21,7 +21,8 @@
 
 TreePoints = [
     "/orange/ewhite/DeepForest/TreeFormer/all_images/annotations.csv",
-    "/orange/ewhite/DeepForest/Ventura_2022/urban-tree-detection-data/images/annotations.csv"]
+    "/orange/ewhite/DeepForest/Ventura_2022/urban-tree-detection-data/images/annotations.csv",
+    "/orange/ewhite/MillionTrees/NEON_points/annotations.csv"]
 
 TreePolygons = [
     "/orange/ewhite/DeepForest/Jansen_2023/pngs/annotations.csv",
@@ -31,7 +32,8 @@
     "/orange/ewhite/DeepForest/Wagner_Australia/annotations.csv",
     "/orange/ewhite/DeepForest/Alejandro_Chile/alejandro/annotations.csv",
     "/orange/ewhite/DeepForest/UrbanLondon/annotations.csv",
-    "/orange/ewhite/DeepForest/OliveTrees_spain/Dataset_RGB/annotations.csv"
+    "/orange/ewhite/DeepForest/OliveTrees_spain/Dataset_RGB/annotations.csv",
+    "/orange/ewhite/DeepForest/Araujo_2020/annotations.csv"
     ]
 
 # Current errors

diff --git a/docs/datasets.md b/docs/datasets.md
@@ -51,6 +51,10 @@ ISPRS Journal of Photogrammetry and Remote Sensing, Volume 206, 2023
 
 **Location** [NEON sites](https://www.neonscience.org/field-sites/explore-field-sites) within the United States
 
+An extension of this published resource was made by the Weecology Lab at the University of Florida
+
+![sample_image](public/Weecology_University_Florida.png)
+
 ### World Resources Institute 
 
 NAIP Imagery from across the United States

diff --git a/docs/index.rst b/docs/index.rst
@@ -7,6 +7,17 @@ The MillionTrees seeks to collect a million tree locations to create a global be
   :alt: Image Placeholder
   :width: 50%
 
+Current Status
+--------------
+
+There are 3 datasets available for the MillionTrees benchmark:
+
+* TreeBoxes: A dataset of 282,288 tree crowns from 9 sources.
+
+* TreePolygons: A dataset of 362,751 tree crowns from 8 sources.
+
+* TreePoints: A dataset of 191,614 tree stems from 2 sources.
+
 Contact
 -------
 

diff --git a/docs/public/Weecology_University_Florida.png b/docs/public/Weecology_University_Florida.png
diff --git a/examples/Datasets.ipynb b/examples/Datasets.ipynb
@@ -9,7 +9,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [
     {
@@ -21,6 +21,12 @@
     }
    ],
    "source": [
+    "import os\n",
+    "import sys\n",
+    "\n",
+    "if os.path.basename(os.getcwd()) == 'examples':\n",
+    "    sys.path.append(\"../\")\n",
+    "    \n",
     "import milliontrees\n",
     "from torchvision import transforms\n",
     "\n",
@@ -49,26 +55,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
    "metadata": {},
-   "outputs": [
-    {
-     "ename": "KeyError",
-     "evalue": "\"None of [Index(['xmin', 'ymin', 'xmax', 'ymax'], dtype='object')] are in the [columns]\"",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[3], line 3\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;66;03m# Load the box dataset\u001b[39;00m\n\u001b[1;32m      2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmilliontrees\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m get_dataset\n\u001b[0;32m----> 3\u001b[0m dataset \u001b[38;5;241m=\u001b[39m \u001b[43mget_dataset\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mTreeBoxes\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mroot_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/orange/ewhite/DeepForest/MillionTrees/\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/MillionTrees/milliontrees/get_dataset.py:47\u001b[0m, in \u001b[0;36mget_dataset\u001b[0;34m(dataset, version, unlabeled, **dataset_kwargs)\u001b[0m\n\u001b[1;32m     45\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m     46\u001b[0m     \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmilliontrees\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdatasets\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mTreeBoxes\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m TreeBoxesDataset \u001b[38;5;66;03m# type:ignore\u001b[39;00m\n\u001b[0;32m---> 47\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mTreeBoxesDataset\u001b[49m\u001b[43m(\u001b[49m\u001b[43mversion\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mversion\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mdataset_kwargs\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/MillionTrees/milliontrees/datasets/TreeBoxes.py:87\u001b[0m, in \u001b[0;36mTreeBoxesDataset.__init__\u001b[0;34m(self, version, root_dir, download, split_scheme)\u001b[0m\n\u001b[1;32m     84\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_input_array \u001b[38;5;241m=\u001b[39m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mfilename\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mvalues\n\u001b[1;32m     86\u001b[0m \u001b[38;5;66;03m# Box labels\u001b[39;00m\n\u001b[0;32m---> 87\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_y_array \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mtensor(\u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mxmin\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mymin\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mxmax\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mymax\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241m.\u001b[39mvalues\u001b[38;5;241m.\u001b[39mastype(\u001b[38;5;28mfloat\u001b[39m))\n\u001b[1;32m     89\u001b[0m \u001b[38;5;66;03m# Labels -> just 'Tree'\u001b[39;00m\n\u001b[1;32m     90\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_n_classes \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n",
-      "File \u001b[0;32m/orange/ewhite/b.weinstein/miniconda3/envs/MillionTrees/lib/python3.10/site-packages/pandas/core/frame.py:3899\u001b[0m, in \u001b[0;36mDataFrame.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m   3897\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m is_iterator(key):\n\u001b[1;32m   3898\u001b[0m         key \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(key)\n\u001b[0;32m-> 3899\u001b[0m     indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_indexer_strict\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcolumns\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m[\u001b[38;5;241m1\u001b[39m]\n\u001b[1;32m   3901\u001b[0m \u001b[38;5;66;03m# take() does not accept boolean indexers\u001b[39;00m\n\u001b[1;32m   3902\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(indexer, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdtype\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mbool\u001b[39m:\n",
-      "File \u001b[0;32m/orange/ewhite/b.weinstein/miniconda3/envs/MillionTrees/lib/python3.10/site-packages/pandas/core/indexes/base.py:6114\u001b[0m, in \u001b[0;36mIndex._get_indexer_strict\u001b[0;34m(self, key, axis_name)\u001b[0m\n\u001b[1;32m   6111\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   6112\u001b[0m     keyarr, indexer, new_indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_reindex_non_unique(keyarr)\n\u001b[0;32m-> 6114\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_raise_if_missing\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkeyarr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindexer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis_name\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   6116\u001b[0m keyarr \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtake(indexer)\n\u001b[1;32m   6117\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(key, Index):\n\u001b[1;32m   6118\u001b[0m     \u001b[38;5;66;03m# GH 42790 - Preserve name from an Index\u001b[39;00m\n",
-      "File \u001b[0;32m/orange/ewhite/b.weinstein/miniconda3/envs/MillionTrees/lib/python3.10/site-packages/pandas/core/indexes/base.py:6175\u001b[0m, in \u001b[0;36mIndex._raise_if_missing\u001b[0;34m(self, key, indexer, axis_name)\u001b[0m\n\u001b[1;32m   6173\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m use_interval_msg:\n\u001b[1;32m   6174\u001b[0m         key \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(key)\n\u001b[0;32m-> 6175\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNone of [\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mkey\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m] are in the [\u001b[39m\u001b[38;5;132;01m{\u001b[39;00maxis_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m]\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m   6177\u001b[0m not_found \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(ensure_index(key)[missing_mask\u001b[38;5;241m.\u001b[39mnonzero()[\u001b[38;5;241m0\u001b[39m]]\u001b[38;5;241m.\u001b[39munique())\n\u001b[1;32m   6178\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mnot_found\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m not in index\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
-      "\u001b[0;31mKeyError\u001b[0m: \"None of [Index(['xmin', 'ymin', 'xmax', 'ymax'], dtype='object')] are in the [columns]\""
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Load the box dataset\n",
     "from milliontrees import get_dataset\n",
@@ -84,23 +73,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
-     "ename": "NameError",
-     "evalue": "name 'dataset' is not defined",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[2], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m train_data \u001b[38;5;241m=\u001b[39m \u001b[43mdataset\u001b[49m\u001b[38;5;241m.\u001b[39mget_subset(\n\u001b[1;32m      2\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtrain\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m      3\u001b[0m     transform\u001b[38;5;241m=\u001b[39mtransforms\u001b[38;5;241m.\u001b[39mCompose(\n\u001b[1;32m      4\u001b[0m         [transforms\u001b[38;5;241m.\u001b[39mResize((\u001b[38;5;241m448\u001b[39m, \u001b[38;5;241m448\u001b[39m)), transforms\u001b[38;5;241m.\u001b[39mToTensor()]\n\u001b[1;32m      5\u001b[0m     ),\n\u001b[1;32m      6\u001b[0m )\n",
-      "\u001b[0;31mNameError\u001b[0m: name 'dataset' is not defined"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "official\n"
      ]
     }
    ],
    "source": [
-    "dataset.list_subsets()\n",
+    "print(dataset.split_scheme)\n",
     "train_data = dataset.get_subset(\n",
     "    \"train\",\n",
     "    transform=transforms.Compose(\n",
@@ -133,7 +118,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.1.undefined"
+   "version": "3.10.13"
   }
  },
  "nbformat": 4,