Skip to content

Commit

Permalink
Transfer of output should now transfer to output_uri
Browse files Browse the repository at this point in the history
  • Loading branch information
greenw0lf committed Oct 3, 2024
1 parent f3c15a1 commit c36eb5c
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 30 deletions.
4 changes: 0 additions & 4 deletions .env
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,6 @@ MODEL_BASE_DIR=./model
# make sure to get a valid endpoint from a CODEOWNER
S3_ENDPOINT_URL=https://some_url

# default bucket and subdir
S3_BUCKET=x-omg-daan-av
S3_FOLDER_IN_BUCKET=assets

# your AWS credentials for the S3 bucket in question
AWS_ACCESS_KEY_ID=your-key-id
AWS_SECRET_ACCESS_KEY=your-secret-access-key
Expand Down
29 changes: 9 additions & 20 deletions asr.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
)
from config import (
s3_endpoint_url,
s3_bucket,
s3_folder_in_bucket,
w_word_timestamps,
w_device,
w_model,
Expand All @@ -23,7 +21,7 @@

from download import download_uri
from whisper import run_asr, WHISPER_JSON_FILE
from s3_util import S3Store
from s3_util import S3Store, parse_s3_uri
from transcode import try_transcode
from daan_transcript import generate_daan_transcript, DAAN_JSON_FILE

Expand Down Expand Up @@ -142,30 +140,21 @@ def run(input_uri: str, output_uri: str, model=None) -> bool:
return True


# if (S3) output_uri is supplied transfers data to (S3) location
def transfer_asr_output(output_path: str, asset_id: str) -> bool:
logger.info(f"Transferring {output_path} to S3 (asset={asset_id})")
if any(
[
not x
for x in [
s3_endpoint_url,
s3_bucket,
s3_folder_in_bucket,
]
]
):
# if S3 output_uri is supplied transfers data to S3 location
def transfer_asr_output(output_path: str, output_uri: str) -> bool:
logger.info(f"Transferring {output_path} to S3 (destination={output_uri})")
if not s3_endpoint_url:
logger.warning(
"TRANSFER_ON_COMPLETION configured without all the necessary S3 settings"
"Transfer to S3 configured without an S3_ENDPOINT_URL!"
)
return False

s3_bucket, s3_folder_in_bucket = parse_s3_uri(output_uri)

s3 = S3Store(s3_endpoint_url)
return s3.transfer_to_s3(
s3_bucket,
os.path.join(
s3_folder_in_bucket, asset_id
), # assets/<program ID>__<carrier ID>
s3_folder_in_bucket,
[
os.path.join(output_path, DAAN_JSON_FILE),
os.path.join(output_path, WHISPER_JSON_FILE),
Expand Down
8 changes: 2 additions & 6 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,8 @@ def assert_tuple(param: str) -> str:
data_base_dir = os.environ.get("DATA_BASE_DIR", "")
model_base_dir = os.environ.get("MODEL_BASE_DIR", "")

# s3 connection params
# s3 connection param
s3_endpoint_url = os.environ.get("S3_ENDPOINT_URL", "")
s3_bucket = os.environ.get("S3_BUCKET", "")
s3_folder_in_bucket = os.environ.get("S3_FOLDER_IN_BUCKET", "")

# Whisper params
w_word_timestamps = assert_bool("W_WORD_TIMESTAMPS")
Expand Down Expand Up @@ -70,10 +68,8 @@ def assert_tuple(param: str) -> str:
assert model_base_dir not in [".", "/"], "Please enter an absolute, non-root path"
assert os.path.exists(model_base_dir), "MODEL_BASE_DIR does not exist"

if s3_bucket or s3_endpoint_url or s3_folder_in_bucket:
assert s3_bucket, "Please enter the S3_BUCKET to use"
if s3_endpoint_url:
assert validators.url(s3_endpoint_url), "Please enter a valid S3_ENDPOINT_URL"
assert s3_folder_in_bucket, "Please enter a path within the supplied S3 bucket"


assert w_device in ["cuda", "cpu"], "Please use either cuda|cpu for W_DEVICE"
Expand Down

0 comments on commit c36eb5c

Please sign in to comment.