From 542f2850c3df08343bf9aafb44e7117954f5f918 Mon Sep 17 00:00:00 2001
From: Vincent Lostanlen <vincent.lostanlen@gmail.com>
Date: Tue, 24 Nov 2020 15:14:00 +0100
Subject: [PATCH 01/22] enable export_json (part 1)

---
 birdvoxdetect/core.py | 64 +++++++++++++++++++++++++------------------
 1 file changed, 38 insertions(+), 26 deletions(-)

diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py
index 6200854..58b1388 100644
--- a/birdvoxdetect/core.py
+++ b/birdvoxdetect/core.py
@@ -48,6 +48,7 @@ def process_file(
     export_confidence=False,
     export_context=False,
     export_faults=False,
+    export_json=False,
     export_logger=False,
     threshold=50.0,
     suffix="",
@@ -247,6 +248,10 @@ def process_file(
     if export_context:
         contexts = []
 
+    # Initialize list of probabilistic predictions.
+    if export_json:
+        json_dicts = []
+
     # Print chunk duration.
     logger.info("Chunk duration: {} seconds".format(chunk_duration))
     logger.info("")
@@ -381,12 +386,14 @@ def process_file(
         n_peaks = len(chunk_timestamps)
 
         # Classify species.
-        chunk_df = pd.DataFrame(
-            map(
-                lambda x: classify_species(classifier, chunk_pcen, x, taxonomy),
-                th_peak_locs,
-            )
-        )
+        row = []
+        for th_peak_loc in th_peak_locs:
+            row, json_dict = classify_species(
+                classifier, chunk_pcen, x, taxonomy)
+            rows.append(row)
+            if export_json:
+                json_dicts.append(json_dict)
+        chunk_df = pd.DataFrame(rows)
 
         # Count flight calls.
         if n_peaks>0:
@@ -403,7 +410,7 @@ def process_file(
             logger.info("Number of flight calls in current chunk: 0")
         logger.info("")
 
-        # Export timestamps.
+        # Export checklist.
         chunk_hhmmss = list(map(seconds_to_hhmmss, chunk_timestamps))
         chunk_df["Time (hh:mm:ss)"] = event_hhmmss + chunk_hhmmss
         chunk_df["Confidence (%)"] = th_peak_confidences
@@ -565,12 +572,14 @@ def process_file(
         n_peaks = len(chunk_timestamps)
 
         # Classify species.
-        chunk_df = pd.DataFrame(
-            map(
-                lambda x: classify_species(classifier, chunk_pcen, x, taxonomy),
-                th_peak_locs,
-            )
-        )
+        row = []
+        for th_peak_loc in th_peak_locs:
+            row, json_dict = classify_species(
+                classifier, chunk_pcen, x, taxonomy)
+            rows.append(row)
+            if export_json:
+                json_dicts.append(json_dict)
+        chunk_df = pd.DataFrame(rows)
 
         # Count flight calls.
         if n_peaks>0:
@@ -587,7 +596,7 @@ def process_file(
             logger.info("Number of flight calls in current chunk: 0")
         logger.info("")
 
-        # Export timestamps.
+        # Export checklist.
         chunk_hhmmss = list(map(seconds_to_hhmmss, chunk_timestamps))
         chunk_df["Time (hh:mm:ss)"] = event_hhmmss + chunk_hhmmss
         chunk_df["Confidence (%)"] = th_peak_confidences
@@ -753,12 +762,14 @@ def process_file(
             n_peaks = len(chunk_timestamps)
 
             # Classify species.
-            chunk_df = pd.DataFrame(
-                map(
-                    lambda x: classify_species(classifier, chunk_pcen, x, taxonomy),
-                    th_peak_locs,
-                )
-            )
+            row = []
+            for th_peak_loc in th_peak_locs:
+                row, json_dict = classify_species(
+                    classifier, chunk_pcen, x, taxonomy)
+                rows.append(row)
+                if export_json:
+                    json_dicts.append(json_dict)
+            chunk_df = pd.DataFrame(rows)
 
             # Count flight calls.
             if n_peaks>0:
@@ -775,7 +786,7 @@ def process_file(
                 logger.info("Number of flight calls in current chunk: 0")
             logger.info("")
 
-            # Export timestamps.
+            # Export checklist.
             chunk_hhmmss = list(map(seconds_to_hhmmss, chunk_timestamps))
             chunk_df["Time (hh:mm:ss)"] = event_hhmmss + chunk_hhmmss
             chunk_df["Confidence (%)"] = th_peak_confidences
@@ -944,7 +955,8 @@ def classify_species(classifier, chunk_pcen, th_peak_loc, taxonomy):
     bvc_prediction = birdvoxclassify.predict(pcen_clip, classifier=classifier)
 
     # Format prediction
-    prediction = birdvoxclassify.format_pred(bvc_prediction, taxonomy=taxonomy)
+    formatted_prediction = birdvoxclassify.format_pred(
+        bvc_prediction, taxonomy=taxonomy)
 
     # Get prediction levels.
     pred_levels = list(prediction.keys())
@@ -954,13 +966,13 @@ def classify_species(classifier, chunk_pcen, th_peak_loc, taxonomy):
     for pred_level in pred_levels:
         # List probabilities
         prob_dict = {
-            k: prediction[pred_level][k]["probability"]
-            for k in prediction[pred_level]
+            k: formatted_prediction[pred_level][k]["probability"]
+            for k in formatted_prediction[pred_level]
         }
 
         # Extract class of maximum probability
         argmax_taxon = max(prob_dict.items(), key=operator.itemgetter(1))[0]
-        argmax_dict = prediction[pred_level][argmax_taxon]
+        argmax_dict = formatted_prediction[pred_level][argmax_taxon]
 
         if (pred_level=="coarse"):
             argmax_prediction["Order"] = "other"
@@ -975,7 +987,7 @@ def classify_species(classifier, chunk_pcen, th_peak_loc, taxonomy):
                 alias = aliases["species_4letter_code"]
                 argmax_prediction["Species (4-letter code)"] = alias
 
-    return argmax_prediction
+    return argmax_prediction, formatted_prediction
 
 
 def compute_pcen(audio, sr):

From f0a8ec4261fe525f919b09980766820455345157 Mon Sep 17 00:00:00 2001
From: Vincent Lostanlen <vincent.lostanlen@gmail.com>
Date: Tue, 24 Nov 2020 15:29:56 +0100
Subject: [PATCH 02/22] bugfix th_peak_loc

---
 birdvoxdetect/core.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py
index 58b1388..04cf8b7 100644
--- a/birdvoxdetect/core.py
+++ b/birdvoxdetect/core.py
@@ -389,7 +389,7 @@ def process_file(
         row = []
         for th_peak_loc in th_peak_locs:
             row, json_dict = classify_species(
-                classifier, chunk_pcen, x, taxonomy)
+                classifier, chunk_pcen, th_peak_loc, taxonomy)
             rows.append(row)
             if export_json:
                 json_dicts.append(json_dict)
@@ -575,7 +575,7 @@ def process_file(
         row = []
         for th_peak_loc in th_peak_locs:
             row, json_dict = classify_species(
-                classifier, chunk_pcen, x, taxonomy)
+                classifier, chunk_pcen, th_peak_loc, taxonomy)
             rows.append(row)
             if export_json:
                 json_dicts.append(json_dict)
@@ -765,7 +765,7 @@ def process_file(
             row = []
             for th_peak_loc in th_peak_locs:
                 row, json_dict = classify_species(
-                    classifier, chunk_pcen, x, taxonomy)
+                    classifier, chunk_pcen, th_peak_loc, taxonomy)
                 rows.append(row)
                 if export_json:
                     json_dicts.append(json_dict)

From 07edb48690d7090b1490193888a84ab3b1768946 Mon Sep 17 00:00:00 2001
From: Vincent Lostanlen <vincent.lostanlen@gmail.com>
Date: Tue, 24 Nov 2020 15:31:11 +0100
Subject: [PATCH 03/22] bugfix pred_levels = list(formatted_prediction.keys())

---
 birdvoxdetect/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py
index 04cf8b7..6f8e9bc 100644
--- a/birdvoxdetect/core.py
+++ b/birdvoxdetect/core.py
@@ -959,7 +959,7 @@ def classify_species(classifier, chunk_pcen, th_peak_loc, taxonomy):
         bvc_prediction, taxonomy=taxonomy)
 
     # Get prediction levels.
-    pred_levels = list(prediction.keys())
+    pred_levels = list(formatted_prediction.keys())
 
     # Loop over taxonomical levels.
     argmax_prediction = {}

From 889c084d361bae4a1915e67a4b84f7bb604b5ced Mon Sep 17 00:00:00 2001
From: Vincent Lostanlen <vincent.lostanlen@gmail.com>
Date: Tue, 24 Nov 2020 15:32:56 +0100
Subject: [PATCH 04/22] rows = []

---
 birdvoxdetect/core.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py
index 6f8e9bc..9efe952 100644
--- a/birdvoxdetect/core.py
+++ b/birdvoxdetect/core.py
@@ -386,7 +386,7 @@ def process_file(
         n_peaks = len(chunk_timestamps)
 
         # Classify species.
-        row = []
+        rows = []
         for th_peak_loc in th_peak_locs:
             row, json_dict = classify_species(
                 classifier, chunk_pcen, th_peak_loc, taxonomy)
@@ -572,7 +572,7 @@ def process_file(
         n_peaks = len(chunk_timestamps)
 
         # Classify species.
-        row = []
+        rows = []
         for th_peak_loc in th_peak_locs:
             row, json_dict = classify_species(
                 classifier, chunk_pcen, th_peak_loc, taxonomy)
@@ -762,7 +762,7 @@ def process_file(
             n_peaks = len(chunk_timestamps)
 
             # Classify species.
-            row = []
+            rows = []
             for th_peak_loc in th_peak_locs:
                 row, json_dict = classify_species(
                     classifier, chunk_pcen, th_peak_loc, taxonomy)

From 524e395d1c96a549df3213d41fd313927fc77699 Mon Sep 17 00:00:00 2001
From: Vincent Lostanlen <vincent.lostanlen@gmail.com>
Date: Tue, 24 Nov 2020 16:04:24 +0100
Subject: [PATCH 05/22] predict_proba

---
 birdvoxdetect/core.py | 61 +++++++++++++++++++++++++++++++++++++------
 1 file changed, 53 insertions(+), 8 deletions(-)

diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py
index 9efe952..c363395 100644
--- a/birdvoxdetect/core.py
+++ b/birdvoxdetect/core.py
@@ -14,6 +14,7 @@
 import scipy
 import scipy.signal
 import sklearn
+import socket
 import soundfile as sf
 import traceback
 import warnings
@@ -48,8 +49,8 @@ def process_file(
     export_confidence=False,
     export_context=False,
     export_faults=False,
-    export_json=False,
     export_logger=False,
+    predict_proba=False,
     threshold=50.0,
     suffix="",
     clip_duration=1.0,
@@ -234,6 +235,33 @@ def process_file(
         faultlist_df = pd.DataFrame(columns=faultlist_df_columns)
         faultlist_df.to_csv(faultlist_path, columns=faultlist_df_columns, index=False)
 
+    # Initialize JSON output.
+    if predict_proba:
+        json_path = get_output_path(
+            filepath, suffix + "proba.json", output_dir
+        )
+        # Get MD5 hash.
+        hash_md5 = hashlib.md5()
+        with open(file_path, "rb") as fhandle:
+            for chunk in iter(lambda: fhandle.read(4096), b""):
+                hash_md5.update(chunk)
+        json_header = {
+            "absolute_path": os.path.abspath(filepath),
+            "audio_duration": librosa.get_duration(filepath),
+            "birdvoxdetect_threshold": threshold,
+            "birdvoxactivate_threshold": bva_threshold,
+            "classifier_name": classifier_name,
+            "detector_name": detector_name,
+            "filepath": filepath,
+            "hostname": socket.gethostname(),
+            "md5_checksum": hash_md5.hexdigest(),
+            "versions": {
+                module.__name__: module.__version__ for module in modules
+            }
+        }
+        with open(json_path, "w") as f:
+            json.dump({"header": json_header}, f)
+
     # Create directory of output clips.
     if export_clips:
         clips_dir = get_output_path(filepath, suffix + "clips", output_dir=output_dir)
@@ -248,10 +276,6 @@ def process_file(
     if export_context:
         contexts = []
 
-    # Initialize list of probabilistic predictions.
-    if export_json:
-        json_dicts = []
-
     # Print chunk duration.
     logger.info("Chunk duration: {} seconds".format(chunk_duration))
     logger.info("")
@@ -391,7 +415,7 @@ def process_file(
             row, json_dict = classify_species(
                 classifier, chunk_pcen, th_peak_loc, taxonomy)
             rows.append(row)
-            if export_json:
+            if predict_proba:
                 json_dicts.append(json_dict)
         chunk_df = pd.DataFrame(rows)
 
@@ -420,6 +444,13 @@ def process_file(
         df = df.append(chunk_df)
         df.to_csv(checklist_path, columns=df_columns, index=False)
 
+        # Export probabilities as JSON file.
+        with open(json_path, "w") as f:
+            json.dump({
+                "header": json_header,
+                "events": json_dicts
+            }, f)
+
         # Export clips.
         if export_clips and len(df)>0:
             chunk_zip = zip(
@@ -577,7 +608,7 @@ def process_file(
             row, json_dict = classify_species(
                 classifier, chunk_pcen, th_peak_loc, taxonomy)
             rows.append(row)
-            if export_json:
+            if predict_proba:
                 json_dicts.append(json_dict)
         chunk_df = pd.DataFrame(rows)
 
@@ -606,6 +637,13 @@ def process_file(
         df = df.append(chunk_df)
         df.to_csv(checklist_path, columns=df_columns, index=False)
 
+        # Export probabilities as JSON file.
+        with open(json_path, "w") as f:
+            json.dump({
+                "header": json_header,
+                "events": json_dicts
+            }, f)
+
         # Export clips.
         if export_clips and len(df)>0:
             chunk_zip = zip(
@@ -767,7 +805,7 @@ def process_file(
                 row, json_dict = classify_species(
                     classifier, chunk_pcen, th_peak_loc, taxonomy)
                 rows.append(row)
-                if export_json:
+                if predict_proba:
                     json_dicts.append(json_dict)
             chunk_df = pd.DataFrame(rows)
 
@@ -796,6 +834,13 @@ def process_file(
             df = df.append(chunk_df)
             df.to_csv(checklist_path, columns=df_columns, index=False)
 
+            # Export probabilities as JSON file.
+            with open(json_path, "w") as f:
+                json.dump({
+                    "header": json_header,
+                    "events": json_dicts
+                }, f)
+
             # Export clips.
             if export_clips and len(df)>0:
                 chunk_zip = zip(

From da949bc902eca071ab518877b95e3aaebcac0e38 Mon Sep 17 00:00:00 2001
From: Vincent Lostanlen <vincent.lostanlen@gmail.com>
Date: Tue, 24 Nov 2020 16:06:34 +0100
Subject: [PATCH 06/22] import hashlib

---
 birdvoxdetect/core.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py
index c363395..73c029a 100644
--- a/birdvoxdetect/core.py
+++ b/birdvoxdetect/core.py
@@ -3,6 +3,7 @@
 from contextlib import redirect_stderr
 import datetime
 import h5py
+import hashlib
 import joblib
 import json
 import librosa

From 872f1e525e08011a3eb558cf05648f418db95b24 Mon Sep 17 00:00:00 2001
From: Vincent Lostanlen <vincent.lostanlen@gmail.com>
Date: Tue, 24 Nov 2020 16:07:02 +0100
Subject: [PATCH 07/22] with open(filepath, "rb") as fhandle

---
 birdvoxdetect/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py
index 73c029a..8024198 100644
--- a/birdvoxdetect/core.py
+++ b/birdvoxdetect/core.py
@@ -243,7 +243,7 @@ def process_file(
         )
         # Get MD5 hash.
         hash_md5 = hashlib.md5()
-        with open(file_path, "rb") as fhandle:
+        with open(filepath, "rb") as fhandle:
             for chunk in iter(lambda: fhandle.read(4096), b""):
                 hash_md5.update(chunk)
         json_header = {

From 341261702cd3324ec9848c3742660ca39d33fd1b Mon Sep 17 00:00:00 2001
From: Vincent Lostanlen <vincent.lostanlen@gmail.com>
Date: Tue, 24 Nov 2020 16:07:53 +0100
Subject: [PATCH 08/22] librosa.get_duration(filename=filepath)

---
 birdvoxdetect/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py
index 8024198..6b5af4f 100644
--- a/birdvoxdetect/core.py
+++ b/birdvoxdetect/core.py
@@ -248,7 +248,7 @@ def process_file(
                 hash_md5.update(chunk)
         json_header = {
             "absolute_path": os.path.abspath(filepath),
-            "audio_duration": librosa.get_duration(filepath),
+            "audio_duration": librosa.get_duration(filename=filepath),
             "birdvoxdetect_threshold": threshold,
             "birdvoxactivate_threshold": bva_threshold,
             "classifier_name": classifier_name,

From 766e75e2d2980153d1833feae498a80464832532 Mon Sep 17 00:00:00 2001
From: Vincent Lostanlen <vincent.lostanlen@gmail.com>
Date: Tue, 24 Nov 2020 16:09:15 +0100
Subject: [PATCH 09/22] initialize json_dicts

---
 birdvoxdetect/core.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py
index 6b5af4f..257e4b3 100644
--- a/birdvoxdetect/core.py
+++ b/birdvoxdetect/core.py
@@ -262,6 +262,7 @@ def process_file(
         }
         with open(json_path, "w") as f:
             json.dump({"header": json_header}, f)
+        json_dicts = []
 
     # Create directory of output clips.
     if export_clips:

From 12f04e9db9bd5d7fcb69bd2ed6d7b29f39593518 Mon Sep 17 00:00:00 2001
From: Vincent Lostanlen <vincent.lostanlen@gmail.com>
Date: Tue, 24 Nov 2020 16:15:39 +0100
Subject: [PATCH 10/22] store time and confidence in each JSON event

---
 birdvoxdetect/core.py | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py
index 257e4b3..5422301 100644
--- a/birdvoxdetect/core.py
+++ b/birdvoxdetect/core.py
@@ -413,11 +413,15 @@ def process_file(
 
         # Classify species.
         rows = []
-        for th_peak_loc in th_peak_locs:
+        for peak_id, th_peak_loc in enumerate(th_peak_locs):
             row, json_dict = classify_species(
                 classifier, chunk_pcen, th_peak_loc, taxonomy)
             rows.append(row)
             if predict_proba:
+                chunk_timestamp = chunk_timestamps[peak_id]
+                json_dict["Time (s)"] = chunk_timestamp
+                json_dict["Time (hh:mm:ss)"] = seconds_to_hhmmss(chunk_timestamp)
+                json_dict["Confidence (%)"] = th_peak_confidences[peak_id]
                 json_dicts.append(json_dict)
         chunk_df = pd.DataFrame(rows)
 
@@ -606,11 +610,15 @@ def process_file(
 
         # Classify species.
         rows = []
-        for th_peak_loc in th_peak_locs:
+        for peak_id, th_peak_loc in enumerate(th_peak_locs):
             row, json_dict = classify_species(
                 classifier, chunk_pcen, th_peak_loc, taxonomy)
             rows.append(row)
             if predict_proba:
+                chunk_timestamp = chunk_timestamps[peak_id]
+                json_dict["Time (s)"] = chunk_timestamp
+                json_dict["Time (hh:mm:ss)"] = seconds_to_hhmmss(chunk_timestamp)
+                json_dict["Confidence (%)"] = th_peak_confidences[peak_id]
                 json_dicts.append(json_dict)
         chunk_df = pd.DataFrame(rows)
 
@@ -803,11 +811,15 @@ def process_file(
 
             # Classify species.
             rows = []
-            for th_peak_loc in th_peak_locs:
+            for peak_id, th_peak_loc in enumerate(th_peak_locs):
                 row, json_dict = classify_species(
                     classifier, chunk_pcen, th_peak_loc, taxonomy)
                 rows.append(row)
                 if predict_proba:
+                    chunk_timestamp = chunk_timestamps[peak_id]
+                    json_dict["Time (s)"] = chunk_timestamp
+                    json_dict["Time (hh:mm:ss)"] = seconds_to_hhmmss(chunk_timestamp)
+                    json_dict["Confidence (%)"] = th_peak_confidences[peak_id]
                     json_dicts.append(json_dict)
             chunk_df = pd.DataFrame(rows)
 

From 71c48c7752d575a134f13107260934aba5fd0583 Mon Sep 17 00:00:00 2001
From: Vincent Lostanlen <vincent.lostanlen@gmail.com>
Date: Tue, 24 Nov 2020 16:21:43 +0100
Subject: [PATCH 11/22] convert timestamps to float before json dump

---
 birdvoxdetect/core.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py
index 5422301..1653385 100644
--- a/birdvoxdetect/core.py
+++ b/birdvoxdetect/core.py
@@ -419,9 +419,9 @@ def process_file(
             rows.append(row)
             if predict_proba:
                 chunk_timestamp = chunk_timestamps[peak_id]
-                json_dict["Time (s)"] = chunk_timestamp
+                json_dict["Time (s)"] = float(chunk_timestamp)
                 json_dict["Time (hh:mm:ss)"] = seconds_to_hhmmss(chunk_timestamp)
-                json_dict["Confidence (%)"] = th_peak_confidences[peak_id]
+                json_dict["Confidence (%)"] = float(th_peak_confidences[peak_id])
                 json_dicts.append(json_dict)
         chunk_df = pd.DataFrame(rows)
 
@@ -616,9 +616,9 @@ def process_file(
             rows.append(row)
             if predict_proba:
                 chunk_timestamp = chunk_timestamps[peak_id]
-                json_dict["Time (s)"] = chunk_timestamp
+                json_dict["Time (s)"] = float(chunk_timestamp),
                 json_dict["Time (hh:mm:ss)"] = seconds_to_hhmmss(chunk_timestamp)
-                json_dict["Confidence (%)"] = th_peak_confidences[peak_id]
+                json_dict["Confidence (%)"] = float(th_peak_confidences[peak_id]),
                 json_dicts.append(json_dict)
         chunk_df = pd.DataFrame(rows)
 
@@ -817,9 +817,9 @@ def process_file(
                 rows.append(row)
                 if predict_proba:
                     chunk_timestamp = chunk_timestamps[peak_id]
-                    json_dict["Time (s)"] = chunk_timestamp
+                    json_dict["Time (s)"] = float(chunk_timestamp)
                     json_dict["Time (hh:mm:ss)"] = seconds_to_hhmmss(chunk_timestamp)
-                    json_dict["Confidence (%)"] = th_peak_confidences[peak_id]
+                    json_dict["Confidence (%)"] = float(th_peak_confidences[peak_id])
                     json_dicts.append(json_dict)
             chunk_df = pd.DataFrame(rows)
 

From 32a0da0a985e8a2ba000fca656deba7bf9152ba5 Mon Sep 17 00:00:00 2001
From: Vincent Lostanlen <vincent.lostanlen@gmail.com>
Date: Tue, 24 Nov 2020 18:34:35 +0100
Subject: [PATCH 12/22] rename json header fields

incorporate feedback from justin salamon
---
 birdvoxdetect/core.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py
index 1653385..2513118 100644
--- a/birdvoxdetect/core.py
+++ b/birdvoxdetect/core.py
@@ -246,8 +246,9 @@ def process_file(
         with open(filepath, "rb") as fhandle:
             for chunk in iter(lambda: fhandle.read(4096), b""):
                 hash_md5.update(chunk)
-        json_header = {
-            "absolute_path": os.path.abspath(filepath),
+        json_metadata = {
+            "file_name": os.path.basename(filepath),
+            "file_path": os.path.abspath(filepath),
             "audio_duration": librosa.get_duration(filename=filepath),
             "birdvoxdetect_threshold": threshold,
             "birdvoxactivate_threshold": bva_threshold,
@@ -256,7 +257,7 @@ def process_file(
             "filepath": filepath,
             "hostname": socket.gethostname(),
             "md5_checksum": hash_md5.hexdigest(),
-            "versions": {
+            "package_versions": {
                 module.__name__: module.__version__ for module in modules
             }
         }

From 841164d9286a6df7eab68f44045d13284ca38b77 Mon Sep 17 00:00:00 2001
From: Vincent Lostanlen <vincent.lostanlen@gmail.com>
Date: Tue, 24 Nov 2020 18:35:05 +0100
Subject: [PATCH 13/22] export taxonomy in json

---
 birdvoxdetect/core.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py
index 2513118..5f37e90 100644
--- a/birdvoxdetect/core.py
+++ b/birdvoxdetect/core.py
@@ -262,7 +262,7 @@ def process_file(
             }
         }
         with open(json_path, "w") as f:
-            json.dump({"header": json_header}, f)
+            json.dump({"metadata": json_metadata, "taxonomy": taxonomy}, f)
         json_dicts = []
 
     # Create directory of output clips.
@@ -454,8 +454,9 @@ def process_file(
         # Export probabilities as JSON file.
         with open(json_path, "w") as f:
             json.dump({
-                "header": json_header,
-                "events": json_dicts
+                "events": json_dicts,
+                "metadata": json_metadata,
+                "taxonomy": taxonomy
             }, f)
 
         # Export clips.
@@ -651,8 +652,9 @@ def process_file(
         # Export probabilities as JSON file.
         with open(json_path, "w") as f:
             json.dump({
-                "header": json_header,
-                "events": json_dicts
+                "events": json_dicts,
+                "metadata": json_metadata,
+                "taxonomy": taxonomy
             }, f)
 
         # Export clips.
@@ -852,8 +854,9 @@ def process_file(
             # Export probabilities as JSON file.
             with open(json_path, "w") as f:
                 json.dump({
-                    "header": json_header,
-                    "events": json_dicts
+                    "events": json_dicts,
+                    "metadata": json_metadata,
+                    "taxonomy": taxonomy
                 }, f)
 
             # Export clips.

From d83303a76f0b4bade5d1aecb45049728b7e67860 Mon Sep 17 00:00:00 2001
From: Vincent Lostanlen <vincent.lostanlen@gmail.com>
Date: Tue, 24 Nov 2020 18:35:11 +0100
Subject: [PATCH 14/22] export sensor faults in json

---
 birdvoxdetect/core.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py
index 5f37e90..66fa057 100644
--- a/birdvoxdetect/core.py
+++ b/birdvoxdetect/core.py
@@ -853,9 +853,11 @@ def process_file(
 
             # Export probabilities as JSON file.
             with open(json_path, "w") as f:
+                json_faultlist = faultlist_df.to_json(orient="index")
                 json.dump({
                     "events": json_dicts,
                     "metadata": json_metadata,
+                    "sensor_faults": json.loads(json_faultlist),
                     "taxonomy": taxonomy
                 }, f)
 

From 2dbe3cce39d65d8c681549961946b6e304757e4a Mon Sep 17 00:00:00 2001
From: Vincent Lostanlen <vincent.lostanlen@gmail.com>
Date: Tue, 24 Nov 2020 18:59:19 +0100
Subject: [PATCH 15/22] md5_checksum -> audio_md5_checksum

---
 birdvoxdetect/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py
index 66fa057..152aa3d 100644
--- a/birdvoxdetect/core.py
+++ b/birdvoxdetect/core.py
@@ -250,13 +250,13 @@ def process_file(
             "file_name": os.path.basename(filepath),
             "file_path": os.path.abspath(filepath),
             "audio_duration": librosa.get_duration(filename=filepath),
+            "audio_md5_checksum": hash_md5.hexdigest(),
             "birdvoxdetect_threshold": threshold,
             "birdvoxactivate_threshold": bva_threshold,
             "classifier_name": classifier_name,
             "detector_name": detector_name,
             "filepath": filepath,
             "hostname": socket.gethostname(),
-            "md5_checksum": hash_md5.hexdigest(),
             "package_versions": {
                 module.__name__: module.__version__ for module in modules
             }

From 46324dce0352fadd83d31064a371c23a2d995f85 Mon Sep 17 00:00:00 2001
From: Vincent Lostanlen <vincent.lostanlen@gmail.com>
Date: Tue, 24 Nov 2020 19:22:50 +0100
Subject: [PATCH 16/22] bugfix case predict_proba==False

json_path referenced before assignment
---
 birdvoxdetect/core.py | 43 +++++++++++++++++++++++--------------------
 1 file changed, 23 insertions(+), 20 deletions(-)

diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py
index 152aa3d..a863892 100644
--- a/birdvoxdetect/core.py
+++ b/birdvoxdetect/core.py
@@ -452,12 +452,13 @@ def process_file(
         df.to_csv(checklist_path, columns=df_columns, index=False)
 
         # Export probabilities as JSON file.
-        with open(json_path, "w") as f:
-            json.dump({
-                "events": json_dicts,
-                "metadata": json_metadata,
-                "taxonomy": taxonomy
-            }, f)
+        if predict_proba:
+            with open(json_path, "w") as f:
+                json.dump({
+                    "events": json_dicts,
+                    "metadata": json_metadata,
+                    "taxonomy": taxonomy
+                }, f)
 
         # Export clips.
         if export_clips and len(df)>0:
@@ -650,12 +651,13 @@ def process_file(
         df.to_csv(checklist_path, columns=df_columns, index=False)
 
         # Export probabilities as JSON file.
-        with open(json_path, "w") as f:
-            json.dump({
-                "events": json_dicts,
-                "metadata": json_metadata,
-                "taxonomy": taxonomy
-            }, f)
+        if predict_proba:
+            with open(json_path, "w") as f:
+                json.dump({
+                    "events": json_dicts,
+                    "metadata": json_metadata,
+                    "taxonomy": taxonomy
+                }, f)
 
         # Export clips.
         if export_clips and len(df)>0:
@@ -852,14 +854,15 @@ def process_file(
             df.to_csv(checklist_path, columns=df_columns, index=False)
 
             # Export probabilities as JSON file.
-            with open(json_path, "w") as f:
-                json_faultlist = faultlist_df.to_json(orient="index")
-                json.dump({
-                    "events": json_dicts,
-                    "metadata": json_metadata,
-                    "sensor_faults": json.loads(json_faultlist),
-                    "taxonomy": taxonomy
-                }, f)
+            if predict_proba:
+                with open(json_path, "w") as f:
+                    json_faultlist = faultlist_df.to_json(orient="index")
+                    json.dump({
+                        "events": json_dicts,
+                        "metadata": json_metadata,
+                        "sensor_faults": json.loads(json_faultlist),
+                        "taxonomy": taxonomy
+                    }, f)
 
             # Export clips.
             if export_clips and len(df)>0:

From 8cf6ab7dd0750017ee5424f6c07c617cbe5902ee Mon Sep 17 00:00:00 2001
From: Vincent Lostanlen <vincent.lostanlen@gmail.com>
Date: Tue, 24 Nov 2020 20:09:38 +0100
Subject: [PATCH 17/22] bis

---
 birdvoxdetect/core.py | 68 +++++++++++++++++++++++--------------------
 1 file changed, 37 insertions(+), 31 deletions(-)

diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py
index a863892..2160cc5 100644
--- a/birdvoxdetect/core.py
+++ b/birdvoxdetect/core.py
@@ -12,11 +12,14 @@
 import operator
 import os
 import pandas as pd
+import platform
 import scipy
 import scipy.signal
 import sklearn
 import socket
 import soundfile as sf
+import sys
+import time
 import traceback
 import warnings
 
@@ -224,17 +227,18 @@ def process_file(
         df.to_csv(checklist_path,index=False)
 
     # Initialize fault log as a Pandas DataFrame.
+    faultlist_path = get_output_path(
+        filepath, suffix + "faults.csv", output_dir=output_dir
+    )
+    faultlist_df_columns = [
+        "Start (hh:mm:ss)",
+        "Stop (hh:mm:ss)",
+        "Fault confidence (%)",
+    ]
+    faultlist_df = pd.DataFrame(columns=faultlist_df_columns)
     if export_faults:
-        faultlist_path = get_output_path(
-            filepath, suffix + "faults.csv", output_dir=output_dir
-        )
-        faultlist_df_columns = [
-            "Start (hh:mm:ss)",
-            "Stop (hh:mm:ss)",
-            "Fault confidence (%)",
-        ]
-        faultlist_df = pd.DataFrame(columns=faultlist_df_columns)
-        faultlist_df.to_csv(faultlist_path, columns=faultlist_df_columns, index=False)
+        faultlist_df.to_csv(
+            faultlist_path, columns=faultlist_df_columns, index=False)
 
     # Initialize JSON output.
     if predict_proba:
@@ -353,15 +357,15 @@ def process_file(
             has_sensor_fault = False
 
         # Add first row to sensor fault log.
+        faultlist_df = faultlist_df.append(
+            {
+                "Start (hh:mm:ss)": seconds_to_hhmmss(0.0),
+                "Stop (hh:mm:ss)": seconds_to_hhmmss(queue_length * chunk_duration),
+                "Fault confidence (%)": int(sensor_fault_probability * 100),
+            },
+            ignore_index=True,
+        )
         if export_faults:
-            faultlist_df = faultlist_df.append(
-                {
-                    "Start (hh:mm:ss)": seconds_to_hhmmss(0.0),
-                    "Stop (hh:mm:ss)": seconds_to_hhmmss(queue_length * chunk_duration),
-                    "Fault confidence (%)": int(sensor_fault_probability * 100),
-                },
-                ignore_index=True,
-            )
             faultlist_df.to_csv(
                 faultlist_path, columns=faultlist_df_columns, index=False
             )
@@ -534,23 +538,23 @@ def process_file(
         )[0][1]
 
         # Add row to sensor fault log.
-        has_sensor_fault = sensor_fault_probability > bva_threshold
+        faultlist_df = faultlist_df.append(
+            {
+                "Start (hh:mm:ss)": seconds_to_hhmmss(chunk_id * chunk_duration),
+                "Stop (hh:mm:ss)": seconds_to_hhmmss(
+                    (chunk_id + 1) * chunk_duration
+                ),
+                "Fault confidence (%)": int(sensor_fault_probability * 100),
+            },
+            ignore_index=True,
+        )
         if export_faults:
-            faultlist_df = faultlist_df.append(
-                {
-                    "Start (hh:mm:ss)": seconds_to_hhmmss(chunk_id * chunk_duration),
-                    "Stop (hh:mm:ss)": seconds_to_hhmmss(
-                        (chunk_id + 1) * chunk_duration
-                    ),
-                    "Fault confidence (%)": int(sensor_fault_probability * 100),
-                },
-                ignore_index=True,
-            )
             faultlist_df.to_csv(
                 faultlist_path, columns=faultlist_df_columns, index=False
             )
 
         # If probability of sensor fault is above threshold, exclude chunk.
+        has_sensor_fault = (sensor_fault_probability > bva_threshold)
         if has_sensor_fault:
             logger.info(
                 "Probability of sensor fault: {:5.2f}%".format(
@@ -701,7 +705,7 @@ def process_file(
     # unstable with files shorter than 30 minutes, which is why we issue a
     # warning. Also, we do not try to detect sensor faults in files shorter than
     # 30 minutes.
-    if (n_chunks > 1) and export_faults:
+    if (n_chunks > 1):
         faultlist_df = faultlist_df.append(
             {
                 "Start (hh:mm:ss)": seconds_to_hhmmss(chunk_id * chunk_duration),
@@ -710,7 +714,9 @@ def process_file(
             },
             ignore_index=True,
         )
-        faultlist_df.to_csv(faultlist_path, columns=faultlist_df_columns, index=False)
+        if export_faults:
+            faultlist_df.to_csv(
+                faultlist_path, columns=faultlist_df_columns, index=False)
 
     if (n_chunks > 1) and has_sensor_fault:
         logger.info(

From 3e9d2b557fe7570821ee53a2bf8c0fb425f05151 Mon Sep 17 00:00:00 2001
From: Vincent Lostanlen <vincent.lostanlen@gmail.com>
Date: Tue, 24 Nov 2020 20:09:55 +0100
Subject: [PATCH 18/22] record elapsed time

---
 birdvoxdetect/core.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py
index 2160cc5..6ec6604 100644
--- a/birdvoxdetect/core.py
+++ b/birdvoxdetect/core.py
@@ -66,6 +66,8 @@ def process_file(
     custom_objects=None,
     bva_threshold=0.5,
 ):
+    # Record local time. This will eventually serve to measure elapsed time.
+    start_time = time.time()
 
     # Create output_dir if necessary.
     if output_dir is not None:
@@ -863,6 +865,7 @@ def process_file(
             if predict_proba:
                 with open(json_path, "w") as f:
                     json_faultlist = faultlist_df.to_json(orient="index")
+                    json_metadata["elapsed_time"] = time.time() - start_time
                     json.dump({
                         "events": json_dicts,
                         "metadata": json_metadata,

From 6aa644308249ce3e7f0fafc4b0b837eafbf09fdd Mon Sep 17 00:00:00 2001
From: Vincent Lostanlen <vincent.lostanlen@gmail.com>
Date: Tue, 24 Nov 2020 20:10:15 +0100
Subject: [PATCH 19/22] add sys and platform specs to json metadata

---
 birdvoxdetect/core.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py
index 6ec6604..e5fc261 100644
--- a/birdvoxdetect/core.py
+++ b/birdvoxdetect/core.py
@@ -261,11 +261,17 @@ def process_file(
             "birdvoxactivate_threshold": bva_threshold,
             "classifier_name": classifier_name,
             "detector_name": detector_name,
-            "filepath": filepath,
             "hostname": socket.gethostname(),
+            "machine_time": datetime.datetime.now().astimezone().isoformat(),
             "package_versions": {
                 module.__name__: module.__version__ for module in modules
-            }
+            },
+            "platform_machine": platform.machine(),
+            "platform_processor": platform.processor(),
+            "platform_release": platform.release(),
+            "platform_system": platform.system(),
+            "platform_version": platform.version(),
+            "sys_version": sys.version
         }
         with open(json_path, "w") as f:
             json.dump({"metadata": json_metadata, "taxonomy": taxonomy}, f)

From a721e9976195823940868fa1d2d74089453d675d Mon Sep 17 00:00:00 2001
From: Vincent Lostanlen <vincent.lostanlen@gmail.com>
Date: Tue, 24 Nov 2020 20:20:14 +0100
Subject: [PATCH 20/22] unit tests for predict_proba

---
 tests/test_core.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/tests/test_core.py b/tests/test_core.py
index 787cdf8..7e81fbf 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1,5 +1,6 @@
 import datetime
 import h5py
+import json
 import numpy as np
 import os
 import pandas as pd
@@ -163,6 +164,22 @@ def test_process_file():
     )
     shutil.rmtree(tmpdir)
 
+    # export probabilities as JSON file
+    tmpdir = tempfile.mkdtemp()
+    process_file(
+        os.path.join(TEST_AUDIO_DIR, POSITIVE_MD5 + ".wav"),
+        output_dir=tmpdir,
+        predict_proba=True,
+    )
+    json_path = os.path.join(tmpdir, POSITIVE_MD5 + "_proba.csv")
+    assert os.path.exists(json_path)
+    with open(json_path, "r") as json_file:
+        json_dict = json.load(json_file)
+    assert "events" in json_dict.keys()
+    assert "metadata" in json_dict.keys()
+    assert "taxonomy" in json_dict.keys()
+    shutil.rmtree(tmpdir)
+
     # suffix
     tmpdir = tempfile.mkdtemp()
     process_file(

From 8020e04f774d6975f21fe420a5aa5990cc7d3017 Mon Sep 17 00:00:00 2001
From: Vincent Lostanlen <vincent.lostanlen@gmail.com>
Date: Tue, 24 Nov 2020 21:38:03 +0100
Subject: [PATCH 21/22] bugfix test

---
 tests/test_core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 7e81fbf..9419f26 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -171,7 +171,7 @@ def test_process_file():
         output_dir=tmpdir,
         predict_proba=True,
     )
-    json_path = os.path.join(tmpdir, POSITIVE_MD5 + "_proba.csv")
+    json_path = os.path.join(tmpdir, POSITIVE_MD5 + "_proba.json")
     assert os.path.exists(json_path)
     with open(json_path, "r") as json_file:
         json_dict = json.load(json_file)

From 9725abc91c11d51ba156ace280c0794964e10cd1 Mon Sep 17 00:00:00 2001
From: Vincent Lostanlen <vincent.lostanlen@gmail.com>
Date: Tue, 24 Nov 2020 21:50:16 +0100
Subject: [PATCH 22/22] 96% unit test coverage of CLI

---
 tests/test_cli.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tests/test_cli.py b/tests/test_cli.py
index 860a4bf..41af694 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -183,12 +183,18 @@ def test_main():
 def test_script_main(capsys):
     # Duplicate regression test from test_run just to hit coverage
     tempdir = tempfile.mkdtemp()
-    with patch("sys.argv", ["birdvoxdetect", POSITIVE_PATH, "--output-dir", tempdir]):
+    with patch(
+        "sys.argv",
+        [
+            "birdvoxdetect", POSITIVE_PATH,
+            "--output-dir", tempdir,
+            "--suffix", "cli",
+            "--export-clips"]):
         import birdvoxdetect.__main__
 
     # Check output file created
     outfile = os.path.join(
-        tempdir, "fd79e55d-d3a3-4083-aba1-4f00b545c3d6_checklist.csv"
+        tempdir, "fd79e55d-d3a3-4083-aba1-4f00b545c3d6_cli_checklist.csv"
     )
     assert os.path.isfile(outfile)
     shutil.rmtree(tempdir)