diff --git a/.env b/.env index 5448bbe..1ddf9b2 100644 --- a/.env +++ b/.env @@ -11,10 +11,6 @@ MODEL_BASE_DIR=./model # make sure to get a valid endpoint from a CODEOWNER S3_ENDPOINT_URL=https://some_url -# default bucket and subdir -S3_BUCKET=x-omg-daan-av -S3_FOLDER_IN_BUCKET=assets - # your AWS credentials for the S3 bucket in question AWS_ACCESS_KEY_ID=your-key-id AWS_SECRET_ACCESS_KEY=your-secret-access-key diff --git a/asr.py b/asr.py index 11e97b8..d2c5941 100644 --- a/asr.py +++ b/asr.py @@ -11,8 +11,6 @@ ) from config import ( s3_endpoint_url, - s3_bucket, - s3_folder_in_bucket, w_word_timestamps, w_device, w_model, @@ -23,7 +21,7 @@ from download import download_uri from whisper import run_asr, WHISPER_JSON_FILE -from s3_util import S3Store +from s3_util import S3Store, parse_s3_uri from transcode import try_transcode from daan_transcript import generate_daan_transcript, DAAN_JSON_FILE @@ -142,30 +140,21 @@ def run(input_uri: str, output_uri: str, model=None) -> bool: return True -# if (S3) output_uri is supplied transfers data to (S3) location -def transfer_asr_output(output_path: str, asset_id: str) -> bool: - logger.info(f"Transferring {output_path} to S3 (asset={asset_id})") - if any( - [ - not x - for x in [ - s3_endpoint_url, - s3_bucket, - s3_folder_in_bucket, - ] - ] - ): +# if S3 output_uri is supplied transfers data to S3 location +def transfer_asr_output(output_path: str, output_uri: str) -> bool: + logger.info(f"Transferring {output_path} to S3 (destination={output_uri})") + if not s3_endpoint_url: logger.warning( - "TRANSFER_ON_COMPLETION configured without all the necessary S3 settings" + "Transfer to S3 configured without an S3_ENDPOINT_URL!" ) return False + s3_bucket, s3_folder_in_bucket = parse_s3_uri(output_uri) + s3 = S3Store(s3_endpoint_url) return s3.transfer_to_s3( s3_bucket, - os.path.join( - s3_folder_in_bucket, asset_id - ), # assets/__ + s3_folder_in_bucket, [ os.path.join(output_path, DAAN_JSON_FILE), os.path.join(output_path, WHISPER_JSON_FILE), diff --git a/config.py b/config.py index 143667c..52af57a 100644 --- a/config.py +++ b/config.py @@ -35,10 +35,8 @@ def assert_tuple(param: str) -> str: data_base_dir = os.environ.get("DATA_BASE_DIR", "") model_base_dir = os.environ.get("MODEL_BASE_DIR", "") -# s3 connection params +# s3 connection param s3_endpoint_url = os.environ.get("S3_ENDPOINT_URL", "") -s3_bucket = os.environ.get("S3_BUCKET", "") -s3_folder_in_bucket = os.environ.get("S3_FOLDER_IN_BUCKET", "") # Whisper params w_word_timestamps = assert_bool("W_WORD_TIMESTAMPS") @@ -70,10 +68,8 @@ def assert_tuple(param: str) -> str: assert model_base_dir not in [".", "/"], "Please enter an absolute, non-root path" assert os.path.exists(model_base_dir), "MODEL_BASE_DIR does not exist" -if s3_bucket or s3_endpoint_url or s3_folder_in_bucket: - assert s3_bucket, "Please enter the S3_BUCKET to use" +if s3_endpoint_url: assert validators.url(s3_endpoint_url), "Please enter a valid S3_ENDPOINT_URL" - assert s3_folder_in_bucket, "Please enter a path within the supplied S3 bucket" assert w_device in ["cuda", "cpu"], "Please use either cuda|cpu for W_DEVICE"