Skip to content

Commit a2212f4

Browse files
committed
Added upload and evaluation scripts with simple instructions
1 parent 31e58af commit a2212f4

8 files changed

+1613
-2
lines changed

.gitignore

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
2+
3+
# dependencies
4+
/node_modules
5+
/.pnp
6+
.pnp.js
7+
8+
# testing
9+
/coverage
10+
11+
# next.js
12+
/.next/
13+
/out/
14+
15+
# production
16+
/build
17+
18+
# misc
19+
.DS_Store
20+
*.pem
21+
db.sqlite3
22+
23+
# debug
24+
npm-debug.log*
25+
yarn-debug.log*
26+
yarn-error.log*
27+
28+
# local env files
29+
.env*.local
30+
/api/gl_config.json
31+
/api/*.json
32+
/api/external
33+
34+
# vercel
35+
.vercel
36+
37+
# typescript
38+
*.tsbuildinfo
39+
next-env.d.ts
40+
41+
# python
42+
__pycache__
43+
44+
# vim
45+
*.swp
46+
47+
.env

README.md

+79-2
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,79 @@
1-
# model-evaluation-tool
2-
A simple tool for evaluating the performance of your Groundlight ML model
1+
# Model Evaluation Tool
2+
A simple tool for evaluating the performance of your Groundlight Binary ML model
3+
4+
## Installation
5+
6+
The dependencies for this script can be installed through either using poetry (recommended) or `requirements.txt`.
7+
8+
Using poetry
9+
10+
```bash
11+
poetry install
12+
```
13+
14+
Using `requirements.txt`
15+
```bash
16+
pip install -r requirements.txt
17+
```
18+
19+
## Usage
20+
21+
### Setting Up Your Account
22+
23+
To train a ML model, make sure to create a binary detector on the [Online Dashboard](https://dashboard.groundlight.ai/).
24+
25+
You will also need to create an API Token to start uploading images to the account. You can go [here](https://dashboard.groundlight.ai/reef/my-account/api-tokens) to create one.
26+
27+
After you have created your API token, add the token to your terminal as an variable:
28+
29+
```bash
30+
export GROUNDLIIGHT_API_TOKEN="YOUR_API_TOKEN"
31+
```
32+
33+
### Formatting Dataset
34+
35+
To train or evaluate the ML model with your custom dataset, structure your dataset into the following format:
36+
37+
```bash
38+
└── dataset
39+
├── dataset.csv
40+
└── images
41+
├── 1.jpg
42+
├── 10.jpg
43+
├── 11.jpg
44+
├── 12.jpg
45+
├── 13.jpg
46+
├── 14.jpg
47+
```
48+
49+
The `dataset.csv` file should have two columns: image_name and label (YES/NO), for example:
50+
51+
```bash
52+
1.jpg,YES
53+
11.jpg,NO
54+
12.jpg,YES
55+
13.jpg,YES
56+
14.jpg,NO
57+
```
58+
59+
The corresponding image file should be placed inside the `images` folder.
60+
61+
### Training the Detector
62+
63+
To train the ML model for a detector, simply run the script `label-upload.py` with the following arguments:
64+
65+
```bash
66+
poetry run python label-upload.py --detector-id YOUR_DETECTOR_ID --dataset PATH_TO_DATASET_TRAIN_FOLDER
67+
```
68+
69+
Optionally, set the `--delay` argument to prevent going over the throttling limit of your account.
70+
71+
### Evaluate the Detector
72+
73+
To evaluate the ML model performance for a detector, simply run the script `evaluate-accuracy.py` with the following arguments:
74+
75+
```bash
76+
poetry run python evaluate-accuracy.py --detector-id YOUR_DETECTOR_ID --dataset PATH_TO_DATASET_TEST_FOLDER
77+
```
78+
79+
Optionally, set the `--delay` argument to prevent going over the throttling limit of your account.

evaluate-accuracy.py

+106
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
#!/usr/bin/env python3
2+
"""
3+
A script to evaluate the accuracy of a detector on a given dataset.
4+
It will upload the images to the detector and compare the predicted labels with the ground truth labels.
5+
You can specify the delay between uploads.
6+
"""
7+
8+
import argparse
9+
import os
10+
import PIL
11+
import time
12+
import PIL.Image
13+
import pandas as pd
14+
import logging
15+
16+
from groundlight import Groundlight, Detector, BinaryClassificationResult
17+
from tqdm.auto import tqdm
18+
19+
logger = logging.getLogger(__name__)
20+
logging.basicConfig(level=logging.INFO)
21+
22+
23+
def upload_image(gl: Groundlight, detector: Detector, image: PIL) -> BinaryClassificationResult:
24+
"""
25+
Upload a image with a label to a detector.
26+
27+
Args:
28+
gl: The Groundlight object.
29+
detector: The detector to upload to.
30+
image: The image to upload.
31+
Returns:
32+
The predicted label (YES/NO).
33+
"""
34+
35+
# Convert image to jpg if not already
36+
if image.format != "JPEG":
37+
image = image.convert("RGB")
38+
39+
# Use ask_ml to upload the image and then add the label to the image query
40+
iq = gl.ask_ml(detector=detector, image=image)
41+
return iq.result
42+
43+
44+
if __name__ == "__main__":
45+
parser = argparse.ArgumentParser(description="Evaluate the accuracy of a detector on a given dataset.")
46+
parser.add_argument("--detector-id", type=str, required=True, help="The ID of the detector to evaluate.")
47+
parser.add_argument("--dataset", type=str, required=True, help="The folder containing the dataset.csv and images folder")
48+
parser.add_argument("--delay", type=float, required=False, default=0.1, help="The delay between uploads.")
49+
args = parser.parse_args()
50+
51+
gl = Groundlight()
52+
detector = gl.get_detector(args.detector_id)
53+
54+
# Load the dataset from the CSV file and images from the images folder
55+
# The CSV file should have two columns: image_name and label (YES/NO)
56+
57+
dataset = pd.read_csv(os.path.join(args.dataset, "dataset.csv"))
58+
images = os.listdir(os.path.join(args.dataset, "images"))
59+
60+
logger.info(f"Evaluating {len(dataset)} images on detector {detector.name} with delay {args.delay}.")
61+
62+
# Record the number of correct predictions
63+
# Also record the number of false positives and false negatives
64+
correct = 0
65+
total_processed = 0
66+
false_positives = 0
67+
false_negatives = 0
68+
average_confidence = 0
69+
70+
for image_name, label in tqdm(dataset.values):
71+
if image_name not in images:
72+
logger.warning(f"Image {image_name} not found in images folder.")
73+
continue
74+
75+
if label not in ["YES", "NO"]:
76+
logger.warning(f"Invalid label {label} for image {image_name}. Skipping.")
77+
continue
78+
79+
image = PIL.Image.open(os.path.join(args.dataset, "images", image_name))
80+
result = upload_image(gl=gl, detector=detector, image=image)
81+
82+
if result.label == label:
83+
correct += 1
84+
elif result.label == "YES" and label == "NO":
85+
false_positives += 1
86+
elif result.label == "NO" and label == "YES":
87+
false_negatives += 1
88+
89+
average_confidence += result.confidence
90+
total_processed += 1
91+
92+
time.sleep(args.delay)
93+
94+
# Calculate the accuracy, precision, and recall
95+
accuracy = correct / total_processed if total_processed > 0 else 0
96+
precision = correct / (correct + false_positives) if correct + false_positives > 0 else 0
97+
recall = correct / (correct + false_negatives) if correct + false_negatives > 0 else 0
98+
99+
logger.info(f"Processed {total_processed} images.")
100+
logger.info(f"Correct: {correct}/{total_processed}")
101+
logger.info(f"Average Confidence: {average_confidence / total_processed:.2f}")
102+
logger.info(f"False Positives: {false_positives}")
103+
logger.info(f"False Negatives: {false_negatives}")
104+
logger.info(f"Accuracy: {accuracy:.2f}")
105+
logger.info(f"Precision: {precision:.2f}")
106+
logger.info(f"Recall: {recall:.2f}")

label-upload.py

+72
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
#!/usr/bin/env python3
2+
"""
3+
A script to upload frames with labels to a detector in a controlled manner.
4+
You can specify the delay between uploads.
5+
"""
6+
7+
import argparse
8+
import os
9+
import PIL
10+
import time
11+
import PIL.Image
12+
import pandas as pd
13+
import logging
14+
15+
from groundlight import Groundlight, Detector
16+
from tqdm.auto import tqdm
17+
18+
logger = logging.getLogger(__name__)
19+
logging.basicConfig(level=logging.INFO)
20+
21+
22+
def upload_image(gl: Groundlight, detector: Detector, image: PIL, label: str) -> None:
23+
"""
24+
Upload a image with a label to a detector.
25+
26+
Args:
27+
gl: The Groundlight object.
28+
detector: The detector to upload to.
29+
image: The image to upload.
30+
label: The label to upload.
31+
"""
32+
33+
# Convert image to jpg if not already
34+
if image.format != "JPEG":
35+
image = image.convert("RGB")
36+
37+
if label not in ["YES", "NO"]:
38+
raise ValueError(f"Invalid label: {label}, must be 'YES' or 'NO'.")
39+
40+
# Use ask_ml to upload the image and then add the label to the image query
41+
iq = gl.ask_ml(detector=detector, image=image)
42+
gl.add_label(image_query=iq, label=label)
43+
44+
45+
if __name__ == "__main__":
46+
parser = argparse.ArgumentParser(description="Upload images with labels to a detector.")
47+
parser.add_argument("--detector-id", type=str, required=True, help="The ID of the detector to upload to.")
48+
parser.add_argument("--dataset", type=str, required=True, help="The folder containing the dataset.csv and images folder")
49+
parser.add_argument("--delay", type=float, required=False, default=0.1, help="The delay between uploads.")
50+
args = parser.parse_args()
51+
52+
gl = Groundlight()
53+
detector = gl.get_detector(args.detector_id)
54+
55+
# Load the dataset from the CSV file and images from the images folder
56+
# The CSV file should have two columns: image_name and label (YES/NO)
57+
58+
dataset = pd.read_csv(os.path.join(args.dataset, "dataset.csv"))
59+
images = os.listdir(os.path.join(args.dataset, "images"))
60+
61+
logger.info(f"Uploading {len(dataset)} images to detector {detector.name} with delay {args.delay}.")
62+
63+
for image_name, label in tqdm(dataset.values):
64+
if image_name not in images:
65+
logger.warning(f"Image {image_name} not found in images folder.")
66+
continue
67+
68+
image = PIL.Image.open(os.path.join(args.dataset, "images", image_name))
69+
upload_image(gl=gl, detector=detector, image=image, label=label)
70+
time.sleep(args.delay)
71+
72+
logger.info("Upload complete. Please wait around 10 minutes for the detector to retrain.")

0 commit comments

Comments
 (0)