Skip to content

Commit

Permalink
adding CLI utilities
Browse files Browse the repository at this point in the history
  • Loading branch information
joefutrelle committed Dec 20, 2024
1 parent 868539f commit ac39e1d
Show file tree
Hide file tree
Showing 2 changed files with 129 additions and 0 deletions.
67 changes: 67 additions & 0 deletions score.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@

import time

from ifcb import DataDirectory

from dataloader import IFCB_ASPECT_RATIO

from classifier import load_extract_parallel, load_model, score_distributions


if __name__ == '__main__':
import argparse

parser = argparse.ArgumentParser(description='Score anomalies in point cloud data')
parser.add_argument('data_dir', help='Directory containing point cloud data')
parser.add_argument('--id-file', default=None, help='File containing list of IDs to load')
parser.add_argument('--n-jobs', type=int, default=-1, help='Number of parallel jobs for load/extraction phase')
parser.add_argument('--aspect-ratio', type=float, default=IFCB_ASPECT_RATIO, help='Camera frame aspect ratio (width/height)')
parser.add_argument('--chunk-size', type=int, default=100, help='Number of PIDs to process in each chunk')
parser.add_argument('--model', default='classifier.pkl', help='Model load path')
parser.add_argument('--output', default='scores.csv', help='Output CSV file path')

args = parser.parse_args()

beginning = time.time()

print(f'Loading model from {args.model}')

classifier = load_model(args.model)

then = time.time()

print(f'Extracting features from point clouds in {args.data_dir}')

if args.id_file is not None:
with open(args.id_file, 'r') as f:
pids = [line.strip() for line in f]
else:
pids = []
for bin in DataDirectory(args.data_dir):
pids.append(bin.lid)

feature_results = load_extract_parallel(pids, args.data_dir, aspect_ratio=args.aspect_ratio, n_jobs=args.n_jobs, chunk_size=args.chunk_size)

elapsed = time.time() - then

print(f'Extracted features for {len(feature_results)} point clouds in {elapsed:.2f} seconds')

then = time.time()

print(f'Scoring point clouds using classifier')

results = score_distributions(classifier, feature_results)

print(results)

elapsed = time.time() - then

print(f'Scored {len(results)} point clouds in {elapsed:.2f} seconds')

print('Saving results ...')

with open(args.output, 'w') as csv_file:
csv_file.write('pid,anomaly_score\n')
for scoredict in results:
csv_file.write(f"{scoredict['pid']},{scoredict['anomaly_score']:.4f}\n")

62 changes: 62 additions & 0 deletions train.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import time

from ifcb import DataDirectory

from classifier import load_extract_parallel, save_model, train_classifier
from dataloader import IFCB_ASPECT_RATIO


if __name__ == "__main__":
import argparse

parser = argparse.ArgumentParser(description='Train a classifier on point cloud data')
parser.add_argument('data_dir', help='Directory containing point cloud data')
parser.add_argument('--id-file', default=None, help='File containing list of IDs to load')
parser.add_argument('--n-jobs', type=int, default=-1, help='Number of parallel jobs')
parser.add_argument('--contamination', type=float, default=0.1, help='Expected fraction of anomalous distributions')
parser.add_argument('--aspect-ratio', type=float, default=IFCB_ASPECT_RATIO, help='Camera frame aspect ratio (width/height)')
parser.add_argument('--chunk-size', type=int, default=100, help='Number of PIDs to process in each chunk')
parser.add_argument('--model', default='classifier.pkl', help='Model save/load path')

args = parser.parse_args()

beginning = time.time()

if args.id_file is not None:
with open(args.id_file, 'r') as f:
pids = [line.strip() for line in f]
else:
pids = []
for bin in DataDirectory(args.data_dir):
pids.append(bin.lid)

then = time.time()

print(f'Loading and performing feature extraction on {len(pids)} point clouds')

# Extract features from point clouds
feature_results = load_extract_parallel(pids, args.data_dir, aspect_ratio=args.aspect_ratio, n_jobs=args.n_jobs, chunk_size=args.chunk_size)

elapsed = time.time() - then

print(f'Extracted features for {len(feature_results)} point clouds in {elapsed:.2f} seconds')

then = time.time()

# Train the classifier

print(f'Training classifier')

classifier = train_classifier(feature_results, contamination=args.contamination, n_jobs=args.n_jobs)

elapsed = time.time() - then

print(f'Trained classifier in {elapsed:.2f} seconds')

# save the classifier
save_model(classifier, args.model)

elapsed = time.time() - beginning

print(f'Total load/extract/train time: {elapsed:.2f} seconds')

0 comments on commit ac39e1d

Please sign in to comment.