Skip to content

Commit

Permalink
Merge pull request #36 from spacetimeengineer/add_xml_directory_optio…
Browse files Browse the repository at this point in the history
…n_soundtrap

Added an --xml-dir option to pbp-meta-gen
  • Loading branch information
danellecline authored Sep 5, 2024
2 parents 9c6c4ba + ccfa8df commit 59f22ea
Show file tree
Hide file tree
Showing 5 changed files with 115 additions and 7 deletions.
11 changes: 11 additions & 0 deletions pbp/main_meta_generator.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
from datetime import datetime
from pathlib import Path

Expand Down Expand Up @@ -27,6 +28,15 @@ def main():

log_dir = Path(opts.output_dir)
json_dir = Path(opts.json_base_dir)
if opts.xml_dir is None:
if os.name == "nt":
xml_dir_str = str(opts.uri).replace("file:\\\\\\", "")
else:
xml_dir_str = str(opts.uri).replace("file:///", "")

xml_dir = Path(xml_dir_str)
else:
xml_dir = Path(opts.xml_dir)
log_dir.mkdir(exist_ok=True, parents=True)
json_dir.mkdir(exist_ok=True, parents=True)
start = datetime.strptime(opts.start, "%Y%m%d")
Expand Down Expand Up @@ -62,6 +72,7 @@ def main():
prefixes=opts.prefix,
start=start,
end=end,
xml_dir=xml_dir.as_posix(),
)
generator.run()
except KeyboardInterrupt:
Expand Down
9 changes: 9 additions & 0 deletions pbp/main_meta_generator_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,4 +88,13 @@ def parse_arguments():
"7000.20220902.000000.wav",
)

parser.add_argument(
"--xml-dir",
type=str,
metavar="dir",
required=False,
default=None,
help="Specifies the directory where the log.xml files are located. If not specified, the default is the same directory as the audio files.",
)

return parser.parse_args()
57 changes: 57 additions & 0 deletions pbp/meta_gen/gen_abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,60 @@ def log(self):
# abstract run method
def run(self):
pass


class SoundTrapMetadataGeneratorAbstract(object):
def __init__(
self,
log, # : loguru.Logger,
audio_loc: str,
json_base_dir: str,
prefixes: List[str],
xml_dir: str,
start: datetime,
end: datetime,
seconds_per_file: float = 0.0,
**kwargs,
):
"""
Abstract class for capturing sound wav metadata
:param audio_loc:
The local directory or cloud bucket that contains the wav files
:param json_base_dir:
The local directory to write the json files to
:param prefixes:
The search patterns to match the wav files, e.g. 'MARS'
:param xml_dir
The local directory that contains the log.xml files, defaults to audio_loc if none is specified.
:param start:
The start date to search for wav files
:param end:
The end date to search for wav files
:param seconds_per_file:
The number of seconds per file expected in a wav file to check for missing data. If missing, then no check is done.
:return:
"""
try:
self.audio_loc = audio_loc
self.json_base_dir = json_base_dir
self.df = pd.DataFrame()
self.start = start
self.end = end
self.prefixes = prefixes
self.xml_dir = xml_dir
self._log = log
self._seconds_per_file = None if seconds_per_file == 0 else seconds_per_file
except Exception as e:
raise e

@property
def seconds_per_file(self):
return self._seconds_per_file

@property
def log(self):
return self._log

# abstract run method
def run(self):
pass
43 changes: 36 additions & 7 deletions pbp/meta_gen/gen_soundtrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
# Filename: meta_gen/gen_soundtrap.py
# Description: Captures SoundTrap metadata either from a local directory of S3 bucket
import urllib
import os
from typing import List

import boto3
Expand All @@ -11,12 +10,13 @@
import datetime
import pandas as pd
import pytz
import os

from datetime import timedelta
from pathlib import Path
from progressbar import progressbar

from pbp.meta_gen.gen_abstract import MetadataGeneratorAbstract
from pbp.meta_gen.gen_abstract import SoundTrapMetadataGeneratorAbstract
from pbp.meta_gen.meta_reader import SoundTrapWavFile
from pbp.meta_gen.json_generator import JsonGenerator
from pbp.meta_gen.utils import (
Expand All @@ -27,7 +27,7 @@
)


class SoundTrapMetadataGenerator(MetadataGeneratorAbstract):
class SoundTrapMetadataGenerator(SoundTrapMetadataGeneratorAbstract):
"""
Captures SoundTrap wav file metadata either from a local directory or S3 bucket.
"""
Expand All @@ -41,8 +41,11 @@ def __init__(
uri: str,
json_base_dir: str,
prefixes: List[str],
xml_dir: str,
start: datetime.datetime = START,
end: datetime.datetime = END,
seconds_per_file: float = 0.0,
**kwargs,
):
"""
:param uri:
Expand All @@ -51,13 +54,19 @@ def __init__(
The local directory to write the json files to
:param prefixes:
The search pattern to match the wav files, e.g. 'MARS'
:param xml_dir
The local directory that contains the log.xml files, defaults to audio_loc if none is specified.
:param start:
The start date to search for wav files
:param end:
The end date to search for wav files check is done.
:return:
"""
super().__init__(log, uri, json_base_dir, prefixes, start, end, 0.0)
self.xml_dir = xml_dir

super().__init__(
log, uri, json_base_dir, prefixes, self.xml_dir, start, end, seconds_per_file
)

def run(self):
try:
Expand All @@ -81,21 +90,34 @@ def run(self):

if scheme == "file":
parsed_uri = urllib.parse.urlparse(self.audio_loc)

if os.name == "nt":
wav_path = Path(parsed_uri.path[3:])
else:
wav_path = Path(parsed_uri.path)

for filename in progressbar(
sorted(wav_path.rglob("*.wav")), prefix="Searching : "
):
wav_path = filename.parent / f"{filename.stem}.wav"
xml_path = filename.parent / f"{filename.stem}.log.xml"
xml_path = Path(self.xml_dir + "/" + f"{filename.stem}.log.xml")
start_dt = get_datetime(wav_path, self.prefixes)

# Must have a start date to be valid and also must have a corresponding xml file
if start_dt and xml_path.exists() and start_dt <= start_dt <= end_dt:
if (
start_dt and xml_path.exists() and start_dt <= start_dt <= end_dt
): # TODO : Saying that a str object can not have an .exists()
wav_files.append(
SoundTrapWavFile(wav_path.as_posix(), xml_path, start_dt)
)
else:
if not xml_path.exists():
self.log.error(
"The path set by --xml-dir :"
+ str(xml_path)
+ " could not be located at the user specified directory."
)

else:
# if the audio_loc is a s3 url, then we need to list the files in buckets that cover the start and end
# dates
Expand Down Expand Up @@ -199,6 +221,7 @@ def run(self):
json_dir = Path("tests/json/soundtrap")
log_dir.mkdir(exist_ok=True, parents=True)
json_dir.mkdir(exist_ok=True, parents=True)
xml_dir = Path("s3://pacific-sound-ch01")

log = create_logger(
log_filename_and_level=(
Expand All @@ -211,6 +234,12 @@ def run(self):
start = datetime.datetime(2023, 7, 18)
end = datetime.datetime(2023, 7, 19)
gen = SoundTrapMetadataGenerator(
log, "s3://pacific-sound-ch01", json_dir.as_posix(), ["7000"], start, end
log,
"s3://pacific-sound-ch01",
json_dir.as_posix(),
["7000"],
xml_dir.as_posix(),
start,
end,
)
gen.run()
2 changes: 2 additions & 0 deletions tests/test_meta_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def test_soundtrap_generator_s3():
uri="s3://pacific-sound-ch01",
json_base_dir=json_dir.as_posix(),
prefixes=["7000"],
xml_dir="s3://pacific-sound-ch01",
start=start,
end=end,
)
Expand Down Expand Up @@ -114,6 +115,7 @@ def test_soundtrap_generator_local():
uri=f"file://{wav_dir.as_posix()}",
json_base_dir=json_dir.as_posix(),
prefixes=["6716"],
xml_dir=f"{wav_dir.as_posix()}",
start=start,
end=end,
)
Expand Down

0 comments on commit 59f22ea

Please sign in to comment.