From 542f2850c3df08343bf9aafb44e7117954f5f918 Mon Sep 17 00:00:00 2001 From: Vincent Lostanlen Date: Tue, 24 Nov 2020 15:14:00 +0100 Subject: [PATCH 01/22] enable export_json (part 1) --- birdvoxdetect/core.py | 64 +++++++++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 26 deletions(-) diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py index 6200854..58b1388 100644 --- a/birdvoxdetect/core.py +++ b/birdvoxdetect/core.py @@ -48,6 +48,7 @@ def process_file( export_confidence=False, export_context=False, export_faults=False, + export_json=False, export_logger=False, threshold=50.0, suffix="", @@ -247,6 +248,10 @@ def process_file( if export_context: contexts = [] + # Initialize list of probabilistic predictions. + if export_json: + json_dicts = [] + # Print chunk duration. logger.info("Chunk duration: {} seconds".format(chunk_duration)) logger.info("") @@ -381,12 +386,14 @@ def process_file( n_peaks = len(chunk_timestamps) # Classify species. - chunk_df = pd.DataFrame( - map( - lambda x: classify_species(classifier, chunk_pcen, x, taxonomy), - th_peak_locs, - ) - ) + row = [] + for th_peak_loc in th_peak_locs: + row, json_dict = classify_species( + classifier, chunk_pcen, x, taxonomy) + rows.append(row) + if export_json: + json_dicts.append(json_dict) + chunk_df = pd.DataFrame(rows) # Count flight calls. if n_peaks>0: @@ -403,7 +410,7 @@ def process_file( logger.info("Number of flight calls in current chunk: 0") logger.info("") - # Export timestamps. + # Export checklist. chunk_hhmmss = list(map(seconds_to_hhmmss, chunk_timestamps)) chunk_df["Time (hh:mm:ss)"] = event_hhmmss + chunk_hhmmss chunk_df["Confidence (%)"] = th_peak_confidences @@ -565,12 +572,14 @@ def process_file( n_peaks = len(chunk_timestamps) # Classify species. - chunk_df = pd.DataFrame( - map( - lambda x: classify_species(classifier, chunk_pcen, x, taxonomy), - th_peak_locs, - ) - ) + row = [] + for th_peak_loc in th_peak_locs: + row, json_dict = classify_species( + classifier, chunk_pcen, x, taxonomy) + rows.append(row) + if export_json: + json_dicts.append(json_dict) + chunk_df = pd.DataFrame(rows) # Count flight calls. if n_peaks>0: @@ -587,7 +596,7 @@ def process_file( logger.info("Number of flight calls in current chunk: 0") logger.info("") - # Export timestamps. + # Export checklist. chunk_hhmmss = list(map(seconds_to_hhmmss, chunk_timestamps)) chunk_df["Time (hh:mm:ss)"] = event_hhmmss + chunk_hhmmss chunk_df["Confidence (%)"] = th_peak_confidences @@ -753,12 +762,14 @@ def process_file( n_peaks = len(chunk_timestamps) # Classify species. - chunk_df = pd.DataFrame( - map( - lambda x: classify_species(classifier, chunk_pcen, x, taxonomy), - th_peak_locs, - ) - ) + row = [] + for th_peak_loc in th_peak_locs: + row, json_dict = classify_species( + classifier, chunk_pcen, x, taxonomy) + rows.append(row) + if export_json: + json_dicts.append(json_dict) + chunk_df = pd.DataFrame(rows) # Count flight calls. if n_peaks>0: @@ -775,7 +786,7 @@ def process_file( logger.info("Number of flight calls in current chunk: 0") logger.info("") - # Export timestamps. + # Export checklist. chunk_hhmmss = list(map(seconds_to_hhmmss, chunk_timestamps)) chunk_df["Time (hh:mm:ss)"] = event_hhmmss + chunk_hhmmss chunk_df["Confidence (%)"] = th_peak_confidences @@ -944,7 +955,8 @@ def classify_species(classifier, chunk_pcen, th_peak_loc, taxonomy): bvc_prediction = birdvoxclassify.predict(pcen_clip, classifier=classifier) # Format prediction - prediction = birdvoxclassify.format_pred(bvc_prediction, taxonomy=taxonomy) + formatted_prediction = birdvoxclassify.format_pred( + bvc_prediction, taxonomy=taxonomy) # Get prediction levels. pred_levels = list(prediction.keys()) @@ -954,13 +966,13 @@ def classify_species(classifier, chunk_pcen, th_peak_loc, taxonomy): for pred_level in pred_levels: # List probabilities prob_dict = { - k: prediction[pred_level][k]["probability"] - for k in prediction[pred_level] + k: formatted_prediction[pred_level][k]["probability"] + for k in formatted_prediction[pred_level] } # Extract class of maximum probability argmax_taxon = max(prob_dict.items(), key=operator.itemgetter(1))[0] - argmax_dict = prediction[pred_level][argmax_taxon] + argmax_dict = formatted_prediction[pred_level][argmax_taxon] if (pred_level=="coarse"): argmax_prediction["Order"] = "other" @@ -975,7 +987,7 @@ def classify_species(classifier, chunk_pcen, th_peak_loc, taxonomy): alias = aliases["species_4letter_code"] argmax_prediction["Species (4-letter code)"] = alias - return argmax_prediction + return argmax_prediction, formatted_prediction def compute_pcen(audio, sr): From f0a8ec4261fe525f919b09980766820455345157 Mon Sep 17 00:00:00 2001 From: Vincent Lostanlen Date: Tue, 24 Nov 2020 15:29:56 +0100 Subject: [PATCH 02/22] bugfix th_peak_loc --- birdvoxdetect/core.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py index 58b1388..04cf8b7 100644 --- a/birdvoxdetect/core.py +++ b/birdvoxdetect/core.py @@ -389,7 +389,7 @@ def process_file( row = [] for th_peak_loc in th_peak_locs: row, json_dict = classify_species( - classifier, chunk_pcen, x, taxonomy) + classifier, chunk_pcen, th_peak_loc, taxonomy) rows.append(row) if export_json: json_dicts.append(json_dict) @@ -575,7 +575,7 @@ def process_file( row = [] for th_peak_loc in th_peak_locs: row, json_dict = classify_species( - classifier, chunk_pcen, x, taxonomy) + classifier, chunk_pcen, th_peak_loc, taxonomy) rows.append(row) if export_json: json_dicts.append(json_dict) @@ -765,7 +765,7 @@ def process_file( row = [] for th_peak_loc in th_peak_locs: row, json_dict = classify_species( - classifier, chunk_pcen, x, taxonomy) + classifier, chunk_pcen, th_peak_loc, taxonomy) rows.append(row) if export_json: json_dicts.append(json_dict) From 07edb48690d7090b1490193888a84ab3b1768946 Mon Sep 17 00:00:00 2001 From: Vincent Lostanlen Date: Tue, 24 Nov 2020 15:31:11 +0100 Subject: [PATCH 03/22] bugfix pred_levels = list(formatted_prediction.keys()) --- birdvoxdetect/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py index 04cf8b7..6f8e9bc 100644 --- a/birdvoxdetect/core.py +++ b/birdvoxdetect/core.py @@ -959,7 +959,7 @@ def classify_species(classifier, chunk_pcen, th_peak_loc, taxonomy): bvc_prediction, taxonomy=taxonomy) # Get prediction levels. - pred_levels = list(prediction.keys()) + pred_levels = list(formatted_prediction.keys()) # Loop over taxonomical levels. argmax_prediction = {} From 889c084d361bae4a1915e67a4b84f7bb604b5ced Mon Sep 17 00:00:00 2001 From: Vincent Lostanlen Date: Tue, 24 Nov 2020 15:32:56 +0100 Subject: [PATCH 04/22] rows = [] --- birdvoxdetect/core.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py index 6f8e9bc..9efe952 100644 --- a/birdvoxdetect/core.py +++ b/birdvoxdetect/core.py @@ -386,7 +386,7 @@ def process_file( n_peaks = len(chunk_timestamps) # Classify species. - row = [] + rows = [] for th_peak_loc in th_peak_locs: row, json_dict = classify_species( classifier, chunk_pcen, th_peak_loc, taxonomy) @@ -572,7 +572,7 @@ def process_file( n_peaks = len(chunk_timestamps) # Classify species. - row = [] + rows = [] for th_peak_loc in th_peak_locs: row, json_dict = classify_species( classifier, chunk_pcen, th_peak_loc, taxonomy) @@ -762,7 +762,7 @@ def process_file( n_peaks = len(chunk_timestamps) # Classify species. - row = [] + rows = [] for th_peak_loc in th_peak_locs: row, json_dict = classify_species( classifier, chunk_pcen, th_peak_loc, taxonomy) From 524e395d1c96a549df3213d41fd313927fc77699 Mon Sep 17 00:00:00 2001 From: Vincent Lostanlen Date: Tue, 24 Nov 2020 16:04:24 +0100 Subject: [PATCH 05/22] predict_proba --- birdvoxdetect/core.py | 61 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 53 insertions(+), 8 deletions(-) diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py index 9efe952..c363395 100644 --- a/birdvoxdetect/core.py +++ b/birdvoxdetect/core.py @@ -14,6 +14,7 @@ import scipy import scipy.signal import sklearn +import socket import soundfile as sf import traceback import warnings @@ -48,8 +49,8 @@ def process_file( export_confidence=False, export_context=False, export_faults=False, - export_json=False, export_logger=False, + predict_proba=False, threshold=50.0, suffix="", clip_duration=1.0, @@ -234,6 +235,33 @@ def process_file( faultlist_df = pd.DataFrame(columns=faultlist_df_columns) faultlist_df.to_csv(faultlist_path, columns=faultlist_df_columns, index=False) + # Initialize JSON output. + if predict_proba: + json_path = get_output_path( + filepath, suffix + "proba.json", output_dir + ) + # Get MD5 hash. + hash_md5 = hashlib.md5() + with open(file_path, "rb") as fhandle: + for chunk in iter(lambda: fhandle.read(4096), b""): + hash_md5.update(chunk) + json_header = { + "absolute_path": os.path.abspath(filepath), + "audio_duration": librosa.get_duration(filepath), + "birdvoxdetect_threshold": threshold, + "birdvoxactivate_threshold": bva_threshold, + "classifier_name": classifier_name, + "detector_name": detector_name, + "filepath": filepath, + "hostname": socket.gethostname(), + "md5_checksum": hash_md5.hexdigest(), + "versions": { + module.__name__: module.__version__ for module in modules + } + } + with open(json_path, "w") as f: + json.dump({"header": json_header}, f) + # Create directory of output clips. if export_clips: clips_dir = get_output_path(filepath, suffix + "clips", output_dir=output_dir) @@ -248,10 +276,6 @@ def process_file( if export_context: contexts = [] - # Initialize list of probabilistic predictions. - if export_json: - json_dicts = [] - # Print chunk duration. logger.info("Chunk duration: {} seconds".format(chunk_duration)) logger.info("") @@ -391,7 +415,7 @@ def process_file( row, json_dict = classify_species( classifier, chunk_pcen, th_peak_loc, taxonomy) rows.append(row) - if export_json: + if predict_proba: json_dicts.append(json_dict) chunk_df = pd.DataFrame(rows) @@ -420,6 +444,13 @@ def process_file( df = df.append(chunk_df) df.to_csv(checklist_path, columns=df_columns, index=False) + # Export probabilities as JSON file. + with open(json_path, "w") as f: + json.dump({ + "header": json_header, + "events": json_dicts + }, f) + # Export clips. if export_clips and len(df)>0: chunk_zip = zip( @@ -577,7 +608,7 @@ def process_file( row, json_dict = classify_species( classifier, chunk_pcen, th_peak_loc, taxonomy) rows.append(row) - if export_json: + if predict_proba: json_dicts.append(json_dict) chunk_df = pd.DataFrame(rows) @@ -606,6 +637,13 @@ def process_file( df = df.append(chunk_df) df.to_csv(checklist_path, columns=df_columns, index=False) + # Export probabilities as JSON file. + with open(json_path, "w") as f: + json.dump({ + "header": json_header, + "events": json_dicts + }, f) + # Export clips. if export_clips and len(df)>0: chunk_zip = zip( @@ -767,7 +805,7 @@ def process_file( row, json_dict = classify_species( classifier, chunk_pcen, th_peak_loc, taxonomy) rows.append(row) - if export_json: + if predict_proba: json_dicts.append(json_dict) chunk_df = pd.DataFrame(rows) @@ -796,6 +834,13 @@ def process_file( df = df.append(chunk_df) df.to_csv(checklist_path, columns=df_columns, index=False) + # Export probabilities as JSON file. + with open(json_path, "w") as f: + json.dump({ + "header": json_header, + "events": json_dicts + }, f) + # Export clips. if export_clips and len(df)>0: chunk_zip = zip( From da949bc902eca071ab518877b95e3aaebcac0e38 Mon Sep 17 00:00:00 2001 From: Vincent Lostanlen Date: Tue, 24 Nov 2020 16:06:34 +0100 Subject: [PATCH 06/22] import hashlib --- birdvoxdetect/core.py | 1 + 1 file changed, 1 insertion(+) diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py index c363395..73c029a 100644 --- a/birdvoxdetect/core.py +++ b/birdvoxdetect/core.py @@ -3,6 +3,7 @@ from contextlib import redirect_stderr import datetime import h5py +import hashlib import joblib import json import librosa From 872f1e525e08011a3eb558cf05648f418db95b24 Mon Sep 17 00:00:00 2001 From: Vincent Lostanlen Date: Tue, 24 Nov 2020 16:07:02 +0100 Subject: [PATCH 07/22] with open(filepath, "rb") as fhandle --- birdvoxdetect/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py index 73c029a..8024198 100644 --- a/birdvoxdetect/core.py +++ b/birdvoxdetect/core.py @@ -243,7 +243,7 @@ def process_file( ) # Get MD5 hash. hash_md5 = hashlib.md5() - with open(file_path, "rb") as fhandle: + with open(filepath, "rb") as fhandle: for chunk in iter(lambda: fhandle.read(4096), b""): hash_md5.update(chunk) json_header = { From 341261702cd3324ec9848c3742660ca39d33fd1b Mon Sep 17 00:00:00 2001 From: Vincent Lostanlen Date: Tue, 24 Nov 2020 16:07:53 +0100 Subject: [PATCH 08/22] librosa.get_duration(filename=filepath) --- birdvoxdetect/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py index 8024198..6b5af4f 100644 --- a/birdvoxdetect/core.py +++ b/birdvoxdetect/core.py @@ -248,7 +248,7 @@ def process_file( hash_md5.update(chunk) json_header = { "absolute_path": os.path.abspath(filepath), - "audio_duration": librosa.get_duration(filepath), + "audio_duration": librosa.get_duration(filename=filepath), "birdvoxdetect_threshold": threshold, "birdvoxactivate_threshold": bva_threshold, "classifier_name": classifier_name, From 766e75e2d2980153d1833feae498a80464832532 Mon Sep 17 00:00:00 2001 From: Vincent Lostanlen Date: Tue, 24 Nov 2020 16:09:15 +0100 Subject: [PATCH 09/22] initialize json_dicts --- birdvoxdetect/core.py | 1 + 1 file changed, 1 insertion(+) diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py index 6b5af4f..257e4b3 100644 --- a/birdvoxdetect/core.py +++ b/birdvoxdetect/core.py @@ -262,6 +262,7 @@ def process_file( } with open(json_path, "w") as f: json.dump({"header": json_header}, f) + json_dicts = [] # Create directory of output clips. if export_clips: From 12f04e9db9bd5d7fcb69bd2ed6d7b29f39593518 Mon Sep 17 00:00:00 2001 From: Vincent Lostanlen Date: Tue, 24 Nov 2020 16:15:39 +0100 Subject: [PATCH 10/22] store time and confidence in each JSON event --- birdvoxdetect/core.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py index 257e4b3..5422301 100644 --- a/birdvoxdetect/core.py +++ b/birdvoxdetect/core.py @@ -413,11 +413,15 @@ def process_file( # Classify species. rows = [] - for th_peak_loc in th_peak_locs: + for peak_id, th_peak_loc in enumerate(th_peak_locs): row, json_dict = classify_species( classifier, chunk_pcen, th_peak_loc, taxonomy) rows.append(row) if predict_proba: + chunk_timestamp = chunk_timestamps[peak_id] + json_dict["Time (s)"] = chunk_timestamp + json_dict["Time (hh:mm:ss)"] = seconds_to_hhmmss(chunk_timestamp) + json_dict["Confidence (%)"] = th_peak_confidences[peak_id] json_dicts.append(json_dict) chunk_df = pd.DataFrame(rows) @@ -606,11 +610,15 @@ def process_file( # Classify species. rows = [] - for th_peak_loc in th_peak_locs: + for peak_id, th_peak_loc in enumerate(th_peak_locs): row, json_dict = classify_species( classifier, chunk_pcen, th_peak_loc, taxonomy) rows.append(row) if predict_proba: + chunk_timestamp = chunk_timestamps[peak_id] + json_dict["Time (s)"] = chunk_timestamp + json_dict["Time (hh:mm:ss)"] = seconds_to_hhmmss(chunk_timestamp) + json_dict["Confidence (%)"] = th_peak_confidences[peak_id] json_dicts.append(json_dict) chunk_df = pd.DataFrame(rows) @@ -803,11 +811,15 @@ def process_file( # Classify species. rows = [] - for th_peak_loc in th_peak_locs: + for peak_id, th_peak_loc in enumerate(th_peak_locs): row, json_dict = classify_species( classifier, chunk_pcen, th_peak_loc, taxonomy) rows.append(row) if predict_proba: + chunk_timestamp = chunk_timestamps[peak_id] + json_dict["Time (s)"] = chunk_timestamp + json_dict["Time (hh:mm:ss)"] = seconds_to_hhmmss(chunk_timestamp) + json_dict["Confidence (%)"] = th_peak_confidences[peak_id] json_dicts.append(json_dict) chunk_df = pd.DataFrame(rows) From 71c48c7752d575a134f13107260934aba5fd0583 Mon Sep 17 00:00:00 2001 From: Vincent Lostanlen Date: Tue, 24 Nov 2020 16:21:43 +0100 Subject: [PATCH 11/22] convert timestamps to float before json dump --- birdvoxdetect/core.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py index 5422301..1653385 100644 --- a/birdvoxdetect/core.py +++ b/birdvoxdetect/core.py @@ -419,9 +419,9 @@ def process_file( rows.append(row) if predict_proba: chunk_timestamp = chunk_timestamps[peak_id] - json_dict["Time (s)"] = chunk_timestamp + json_dict["Time (s)"] = float(chunk_timestamp) json_dict["Time (hh:mm:ss)"] = seconds_to_hhmmss(chunk_timestamp) - json_dict["Confidence (%)"] = th_peak_confidences[peak_id] + json_dict["Confidence (%)"] = float(th_peak_confidences[peak_id]) json_dicts.append(json_dict) chunk_df = pd.DataFrame(rows) @@ -616,9 +616,9 @@ def process_file( rows.append(row) if predict_proba: chunk_timestamp = chunk_timestamps[peak_id] - json_dict["Time (s)"] = chunk_timestamp + json_dict["Time (s)"] = float(chunk_timestamp), json_dict["Time (hh:mm:ss)"] = seconds_to_hhmmss(chunk_timestamp) - json_dict["Confidence (%)"] = th_peak_confidences[peak_id] + json_dict["Confidence (%)"] = float(th_peak_confidences[peak_id]), json_dicts.append(json_dict) chunk_df = pd.DataFrame(rows) @@ -817,9 +817,9 @@ def process_file( rows.append(row) if predict_proba: chunk_timestamp = chunk_timestamps[peak_id] - json_dict["Time (s)"] = chunk_timestamp + json_dict["Time (s)"] = float(chunk_timestamp) json_dict["Time (hh:mm:ss)"] = seconds_to_hhmmss(chunk_timestamp) - json_dict["Confidence (%)"] = th_peak_confidences[peak_id] + json_dict["Confidence (%)"] = float(th_peak_confidences[peak_id]) json_dicts.append(json_dict) chunk_df = pd.DataFrame(rows) From 32a0da0a985e8a2ba000fca656deba7bf9152ba5 Mon Sep 17 00:00:00 2001 From: Vincent Lostanlen Date: Tue, 24 Nov 2020 18:34:35 +0100 Subject: [PATCH 12/22] rename json header fields incorporate feedback from justin salamon --- birdvoxdetect/core.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py index 1653385..2513118 100644 --- a/birdvoxdetect/core.py +++ b/birdvoxdetect/core.py @@ -246,8 +246,9 @@ def process_file( with open(filepath, "rb") as fhandle: for chunk in iter(lambda: fhandle.read(4096), b""): hash_md5.update(chunk) - json_header = { - "absolute_path": os.path.abspath(filepath), + json_metadata = { + "file_name": os.path.basename(filepath), + "file_path": os.path.abspath(filepath), "audio_duration": librosa.get_duration(filename=filepath), "birdvoxdetect_threshold": threshold, "birdvoxactivate_threshold": bva_threshold, @@ -256,7 +257,7 @@ def process_file( "filepath": filepath, "hostname": socket.gethostname(), "md5_checksum": hash_md5.hexdigest(), - "versions": { + "package_versions": { module.__name__: module.__version__ for module in modules } } From 841164d9286a6df7eab68f44045d13284ca38b77 Mon Sep 17 00:00:00 2001 From: Vincent Lostanlen Date: Tue, 24 Nov 2020 18:35:05 +0100 Subject: [PATCH 13/22] export taxonomy in json --- birdvoxdetect/core.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py index 2513118..5f37e90 100644 --- a/birdvoxdetect/core.py +++ b/birdvoxdetect/core.py @@ -262,7 +262,7 @@ def process_file( } } with open(json_path, "w") as f: - json.dump({"header": json_header}, f) + json.dump({"metadata": json_metadata, "taxonomy": taxonomy}, f) json_dicts = [] # Create directory of output clips. @@ -454,8 +454,9 @@ def process_file( # Export probabilities as JSON file. with open(json_path, "w") as f: json.dump({ - "header": json_header, - "events": json_dicts + "events": json_dicts, + "metadata": json_metadata, + "taxonomy": taxonomy }, f) # Export clips. @@ -651,8 +652,9 @@ def process_file( # Export probabilities as JSON file. with open(json_path, "w") as f: json.dump({ - "header": json_header, - "events": json_dicts + "events": json_dicts, + "metadata": json_metadata, + "taxonomy": taxonomy }, f) # Export clips. @@ -852,8 +854,9 @@ def process_file( # Export probabilities as JSON file. with open(json_path, "w") as f: json.dump({ - "header": json_header, - "events": json_dicts + "events": json_dicts, + "metadata": json_metadata, + "taxonomy": taxonomy }, f) # Export clips. From d83303a76f0b4bade5d1aecb45049728b7e67860 Mon Sep 17 00:00:00 2001 From: Vincent Lostanlen Date: Tue, 24 Nov 2020 18:35:11 +0100 Subject: [PATCH 14/22] export sensor faults in json --- birdvoxdetect/core.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py index 5f37e90..66fa057 100644 --- a/birdvoxdetect/core.py +++ b/birdvoxdetect/core.py @@ -853,9 +853,11 @@ def process_file( # Export probabilities as JSON file. with open(json_path, "w") as f: + json_faultlist = faultlist_df.to_json(orient="index") json.dump({ "events": json_dicts, "metadata": json_metadata, + "sensor_faults": json.loads(json_faultlist), "taxonomy": taxonomy }, f) From 2dbe3cce39d65d8c681549961946b6e304757e4a Mon Sep 17 00:00:00 2001 From: Vincent Lostanlen Date: Tue, 24 Nov 2020 18:59:19 +0100 Subject: [PATCH 15/22] md5_checksum -> audio_md5_checksum --- birdvoxdetect/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py index 66fa057..152aa3d 100644 --- a/birdvoxdetect/core.py +++ b/birdvoxdetect/core.py @@ -250,13 +250,13 @@ def process_file( "file_name": os.path.basename(filepath), "file_path": os.path.abspath(filepath), "audio_duration": librosa.get_duration(filename=filepath), + "audio_md5_checksum": hash_md5.hexdigest(), "birdvoxdetect_threshold": threshold, "birdvoxactivate_threshold": bva_threshold, "classifier_name": classifier_name, "detector_name": detector_name, "filepath": filepath, "hostname": socket.gethostname(), - "md5_checksum": hash_md5.hexdigest(), "package_versions": { module.__name__: module.__version__ for module in modules } From 46324dce0352fadd83d31064a371c23a2d995f85 Mon Sep 17 00:00:00 2001 From: Vincent Lostanlen Date: Tue, 24 Nov 2020 19:22:50 +0100 Subject: [PATCH 16/22] bugfix case predict_proba==False json_path referenced before assignment --- birdvoxdetect/core.py | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py index 152aa3d..a863892 100644 --- a/birdvoxdetect/core.py +++ b/birdvoxdetect/core.py @@ -452,12 +452,13 @@ def process_file( df.to_csv(checklist_path, columns=df_columns, index=False) # Export probabilities as JSON file. - with open(json_path, "w") as f: - json.dump({ - "events": json_dicts, - "metadata": json_metadata, - "taxonomy": taxonomy - }, f) + if predict_proba: + with open(json_path, "w") as f: + json.dump({ + "events": json_dicts, + "metadata": json_metadata, + "taxonomy": taxonomy + }, f) # Export clips. if export_clips and len(df)>0: @@ -650,12 +651,13 @@ def process_file( df.to_csv(checklist_path, columns=df_columns, index=False) # Export probabilities as JSON file. - with open(json_path, "w") as f: - json.dump({ - "events": json_dicts, - "metadata": json_metadata, - "taxonomy": taxonomy - }, f) + if predict_proba: + with open(json_path, "w") as f: + json.dump({ + "events": json_dicts, + "metadata": json_metadata, + "taxonomy": taxonomy + }, f) # Export clips. if export_clips and len(df)>0: @@ -852,14 +854,15 @@ def process_file( df.to_csv(checklist_path, columns=df_columns, index=False) # Export probabilities as JSON file. - with open(json_path, "w") as f: - json_faultlist = faultlist_df.to_json(orient="index") - json.dump({ - "events": json_dicts, - "metadata": json_metadata, - "sensor_faults": json.loads(json_faultlist), - "taxonomy": taxonomy - }, f) + if predict_proba: + with open(json_path, "w") as f: + json_faultlist = faultlist_df.to_json(orient="index") + json.dump({ + "events": json_dicts, + "metadata": json_metadata, + "sensor_faults": json.loads(json_faultlist), + "taxonomy": taxonomy + }, f) # Export clips. if export_clips and len(df)>0: From 8cf6ab7dd0750017ee5424f6c07c617cbe5902ee Mon Sep 17 00:00:00 2001 From: Vincent Lostanlen Date: Tue, 24 Nov 2020 20:09:38 +0100 Subject: [PATCH 17/22] bis --- birdvoxdetect/core.py | 68 +++++++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 31 deletions(-) diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py index a863892..2160cc5 100644 --- a/birdvoxdetect/core.py +++ b/birdvoxdetect/core.py @@ -12,11 +12,14 @@ import operator import os import pandas as pd +import platform import scipy import scipy.signal import sklearn import socket import soundfile as sf +import sys +import time import traceback import warnings @@ -224,17 +227,18 @@ def process_file( df.to_csv(checklist_path,index=False) # Initialize fault log as a Pandas DataFrame. + faultlist_path = get_output_path( + filepath, suffix + "faults.csv", output_dir=output_dir + ) + faultlist_df_columns = [ + "Start (hh:mm:ss)", + "Stop (hh:mm:ss)", + "Fault confidence (%)", + ] + faultlist_df = pd.DataFrame(columns=faultlist_df_columns) if export_faults: - faultlist_path = get_output_path( - filepath, suffix + "faults.csv", output_dir=output_dir - ) - faultlist_df_columns = [ - "Start (hh:mm:ss)", - "Stop (hh:mm:ss)", - "Fault confidence (%)", - ] - faultlist_df = pd.DataFrame(columns=faultlist_df_columns) - faultlist_df.to_csv(faultlist_path, columns=faultlist_df_columns, index=False) + faultlist_df.to_csv( + faultlist_path, columns=faultlist_df_columns, index=False) # Initialize JSON output. if predict_proba: @@ -353,15 +357,15 @@ def process_file( has_sensor_fault = False # Add first row to sensor fault log. + faultlist_df = faultlist_df.append( + { + "Start (hh:mm:ss)": seconds_to_hhmmss(0.0), + "Stop (hh:mm:ss)": seconds_to_hhmmss(queue_length * chunk_duration), + "Fault confidence (%)": int(sensor_fault_probability * 100), + }, + ignore_index=True, + ) if export_faults: - faultlist_df = faultlist_df.append( - { - "Start (hh:mm:ss)": seconds_to_hhmmss(0.0), - "Stop (hh:mm:ss)": seconds_to_hhmmss(queue_length * chunk_duration), - "Fault confidence (%)": int(sensor_fault_probability * 100), - }, - ignore_index=True, - ) faultlist_df.to_csv( faultlist_path, columns=faultlist_df_columns, index=False ) @@ -534,23 +538,23 @@ def process_file( )[0][1] # Add row to sensor fault log. - has_sensor_fault = sensor_fault_probability > bva_threshold + faultlist_df = faultlist_df.append( + { + "Start (hh:mm:ss)": seconds_to_hhmmss(chunk_id * chunk_duration), + "Stop (hh:mm:ss)": seconds_to_hhmmss( + (chunk_id + 1) * chunk_duration + ), + "Fault confidence (%)": int(sensor_fault_probability * 100), + }, + ignore_index=True, + ) if export_faults: - faultlist_df = faultlist_df.append( - { - "Start (hh:mm:ss)": seconds_to_hhmmss(chunk_id * chunk_duration), - "Stop (hh:mm:ss)": seconds_to_hhmmss( - (chunk_id + 1) * chunk_duration - ), - "Fault confidence (%)": int(sensor_fault_probability * 100), - }, - ignore_index=True, - ) faultlist_df.to_csv( faultlist_path, columns=faultlist_df_columns, index=False ) # If probability of sensor fault is above threshold, exclude chunk. + has_sensor_fault = (sensor_fault_probability > bva_threshold) if has_sensor_fault: logger.info( "Probability of sensor fault: {:5.2f}%".format( @@ -701,7 +705,7 @@ def process_file( # unstable with files shorter than 30 minutes, which is why we issue a # warning. Also, we do not try to detect sensor faults in files shorter than # 30 minutes. - if (n_chunks > 1) and export_faults: + if (n_chunks > 1): faultlist_df = faultlist_df.append( { "Start (hh:mm:ss)": seconds_to_hhmmss(chunk_id * chunk_duration), @@ -710,7 +714,9 @@ def process_file( }, ignore_index=True, ) - faultlist_df.to_csv(faultlist_path, columns=faultlist_df_columns, index=False) + if export_faults: + faultlist_df.to_csv( + faultlist_path, columns=faultlist_df_columns, index=False) if (n_chunks > 1) and has_sensor_fault: logger.info( From 3e9d2b557fe7570821ee53a2bf8c0fb425f05151 Mon Sep 17 00:00:00 2001 From: Vincent Lostanlen Date: Tue, 24 Nov 2020 20:09:55 +0100 Subject: [PATCH 18/22] record elapsed time --- birdvoxdetect/core.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py index 2160cc5..6ec6604 100644 --- a/birdvoxdetect/core.py +++ b/birdvoxdetect/core.py @@ -66,6 +66,8 @@ def process_file( custom_objects=None, bva_threshold=0.5, ): + # Record local time. This will eventually serve to measure elapsed time. + start_time = time.time() # Create output_dir if necessary. if output_dir is not None: @@ -863,6 +865,7 @@ def process_file( if predict_proba: with open(json_path, "w") as f: json_faultlist = faultlist_df.to_json(orient="index") + json_metadata["elapsed_time"] = time.time() - start_time json.dump({ "events": json_dicts, "metadata": json_metadata, From 6aa644308249ce3e7f0fafc4b0b837eafbf09fdd Mon Sep 17 00:00:00 2001 From: Vincent Lostanlen Date: Tue, 24 Nov 2020 20:10:15 +0100 Subject: [PATCH 19/22] add sys and platform specs to json metadata --- birdvoxdetect/core.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/birdvoxdetect/core.py b/birdvoxdetect/core.py index 6ec6604..e5fc261 100644 --- a/birdvoxdetect/core.py +++ b/birdvoxdetect/core.py @@ -261,11 +261,17 @@ def process_file( "birdvoxactivate_threshold": bva_threshold, "classifier_name": classifier_name, "detector_name": detector_name, - "filepath": filepath, "hostname": socket.gethostname(), + "machine_time": datetime.datetime.now().astimezone().isoformat(), "package_versions": { module.__name__: module.__version__ for module in modules - } + }, + "platform_machine": platform.machine(), + "platform_processor": platform.processor(), + "platform_release": platform.release(), + "platform_system": platform.system(), + "platform_version": platform.version(), + "sys_version": sys.version } with open(json_path, "w") as f: json.dump({"metadata": json_metadata, "taxonomy": taxonomy}, f) From a721e9976195823940868fa1d2d74089453d675d Mon Sep 17 00:00:00 2001 From: Vincent Lostanlen Date: Tue, 24 Nov 2020 20:20:14 +0100 Subject: [PATCH 20/22] unit tests for predict_proba --- tests/test_core.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/test_core.py b/tests/test_core.py index 787cdf8..7e81fbf 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1,5 +1,6 @@ import datetime import h5py +import json import numpy as np import os import pandas as pd @@ -163,6 +164,22 @@ def test_process_file(): ) shutil.rmtree(tmpdir) + # export probabilities as JSON file + tmpdir = tempfile.mkdtemp() + process_file( + os.path.join(TEST_AUDIO_DIR, POSITIVE_MD5 + ".wav"), + output_dir=tmpdir, + predict_proba=True, + ) + json_path = os.path.join(tmpdir, POSITIVE_MD5 + "_proba.csv") + assert os.path.exists(json_path) + with open(json_path, "r") as json_file: + json_dict = json.load(json_file) + assert "events" in json_dict.keys() + assert "metadata" in json_dict.keys() + assert "taxonomy" in json_dict.keys() + shutil.rmtree(tmpdir) + # suffix tmpdir = tempfile.mkdtemp() process_file( From 8020e04f774d6975f21fe420a5aa5990cc7d3017 Mon Sep 17 00:00:00 2001 From: Vincent Lostanlen Date: Tue, 24 Nov 2020 21:38:03 +0100 Subject: [PATCH 21/22] bugfix test --- tests/test_core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_core.py b/tests/test_core.py index 7e81fbf..9419f26 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -171,7 +171,7 @@ def test_process_file(): output_dir=tmpdir, predict_proba=True, ) - json_path = os.path.join(tmpdir, POSITIVE_MD5 + "_proba.csv") + json_path = os.path.join(tmpdir, POSITIVE_MD5 + "_proba.json") assert os.path.exists(json_path) with open(json_path, "r") as json_file: json_dict = json.load(json_file) From 9725abc91c11d51ba156ace280c0794964e10cd1 Mon Sep 17 00:00:00 2001 From: Vincent Lostanlen Date: Tue, 24 Nov 2020 21:50:16 +0100 Subject: [PATCH 22/22] 96% unit test coverage of CLI --- tests/test_cli.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 860a4bf..41af694 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -183,12 +183,18 @@ def test_main(): def test_script_main(capsys): # Duplicate regression test from test_run just to hit coverage tempdir = tempfile.mkdtemp() - with patch("sys.argv", ["birdvoxdetect", POSITIVE_PATH, "--output-dir", tempdir]): + with patch( + "sys.argv", + [ + "birdvoxdetect", POSITIVE_PATH, + "--output-dir", tempdir, + "--suffix", "cli", + "--export-clips"]): import birdvoxdetect.__main__ # Check output file created outfile = os.path.join( - tempdir, "fd79e55d-d3a3-4083-aba1-4f00b545c3d6_checklist.csv" + tempdir, "fd79e55d-d3a3-4083-aba1-4f00b545c3d6_cli_checklist.csv" ) assert os.path.isfile(outfile) shutil.rmtree(tempdir)