From a907f2cad39bbfb9a17c8a4315163e4a9e537487 Mon Sep 17 00:00:00 2001 From: SG Date: Fri, 23 Feb 2024 19:22:32 -0700 Subject: [PATCH] docker updates --- .../monkeyplug-build-push-vosk-ghcr.yml | 1 + .../monkeyplug-build-push-whisper-ghcr.yml | 1 + README.md | 21 +++++++++++- docker/Dockerfile | 2 +- docker/build_docker.sh | 34 ------------------- docker/monkeyplug-docker.sh | 30 +++++++++++----- src/monkeyplug/monkeyplug.py | 1 - 7 files changed, 44 insertions(+), 46 deletions(-) delete mode 100755 docker/build_docker.sh diff --git a/.github/workflows/monkeyplug-build-push-vosk-ghcr.yml b/.github/workflows/monkeyplug-build-push-vosk-ghcr.yml index 5104c04..6960234 100644 --- a/.github/workflows/monkeyplug-build-push-vosk-ghcr.yml +++ b/.github/workflows/monkeyplug-build-push-vosk-ghcr.yml @@ -9,6 +9,7 @@ on: - '**' - '!.github/workflows/monkeyplug-build-push-whisper-ghcr.yml' - '!.github/workflows/publish-to-pypi.yml' + - '!README.md' pull_request: workflow_dispatch: repository_dispatch: diff --git a/.github/workflows/monkeyplug-build-push-whisper-ghcr.yml b/.github/workflows/monkeyplug-build-push-whisper-ghcr.yml index f9b3a43..6937a27 100644 --- a/.github/workflows/monkeyplug-build-push-whisper-ghcr.yml +++ b/.github/workflows/monkeyplug-build-push-whisper-ghcr.yml @@ -9,6 +9,7 @@ on: - '**' - '!.github/workflows/monkeyplug-build-push-vosk-ghcr.yml' - '!.github/workflows/publish-to-pypi.yml' + - '!README.md' pull_request: workflow_dispatch: repository_dispatch: diff --git a/README.md b/README.md index e7b6626..29bde9f 100644 --- a/README.md +++ b/README.md @@ -96,7 +96,26 @@ Whisper Options: ### Docker -Alternately, a [Dockerfile](./docker/Dockerfile) is provided to allow you to run monkeyplug in Docker. You can pull either the `oci.guero.top/monkeyplug:small` or `oci.guero.top/monkeyplug:large` Docker images, or build with [`build_docker.sh`](./docker/build_docker.sh), then run [`monkeyplug-docker.sh`](./docker/monkeyplug-docker.sh) inside the directory where your audio files are located. +Alternately, a [Dockerfile](./docker/Dockerfile) is provided to allow you to run monkeyplug in Docker. You can pull one of the following images: + +* [VOSK](https://alphacephei.com/vosk/models) + - oci.guero.top/monkeyplug:vosk-small + - oci.guero.top/monkeyplug:vosk-large +* [Whisper](https://github.com/openai/whisper?tab=readme-ov-file#available-models-and-languages) + - oci.guero.top/monkeyplug:whisper-tiny.en + - oci.guero.top/monkeyplug:whisper-tiny + - oci.guero.top/monkeyplug:whisper-base.en + - oci.guero.top/monkeyplug:whisper-base + - oci.guero.top/monkeyplug:whisper-small.en + - oci.guero.top/monkeyplug:whisper-small + - oci.guero.top/monkeyplug:whisper-medium.en + - oci.guero.top/monkeyplug:whisper-medium + - oci.guero.top/monkeyplug:whisper-large-v1 + - oci.guero.top/monkeyplug:whisper-large-v2 + - oci.guero.top/monkeyplug:whisper-large-v3 + - oci.guero.top/monkeyplug:whisper-large + +then run [`monkeyplug-docker.sh`](./docker/monkeyplug-docker.sh) inside the directory where your audio files are located. ## Contributing diff --git a/docker/Dockerfile b/docker/Dockerfile index 94a6476..4cab4c7 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -69,5 +69,5 @@ RUN python3 -m pip install --no-cache openai-whisper && \ mkdir -p "$WHISPER_MODEL_DIR" && \ cd "$WHISPER_MODEL_DIR" && \ echo "Downloading Whisper model \"$WHISPER_MODEL_NAME\"..." && \ - curl -fsSL -o ./"$WHISPER_MODEL_NAME" "$(curl -fsSL https://raw.githubusercontent.com/openai/whisper/main/whisper/__init__.py | grep -P "\"$WHISPER_MODEL_NAME\"\s*:\s*\"https://" | cut -d: -f2- | sed 's/^[[:space:]]*"//' | sed 's/",*$//')" && \ + curl -fsSL -o ./"$WHISPER_MODEL_NAME".pt "$(curl -fsSL https://raw.githubusercontent.com/openai/whisper/main/whisper/__init__.py | grep -P "\"$WHISPER_MODEL_NAME\"\s*:\s*\"https://" | cut -d: -f2- | sed 's/^[[:space:]]*"//' | sed 's/",*$//')" && \ echo "Finished" diff --git a/docker/build_docker.sh b/docker/build_docker.sh deleted file mode 100755 index 8bf8fd3..0000000 --- a/docker/build_docker.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env bash - -set -e -set -o pipefail - -ENCODING="utf-8" - -ENGINE="${CONTAINER_ENGINE:-docker}" - -[[ "$(uname -s)" = 'Darwin' ]] && REALPATH=grealpath || REALPATH=realpath -[[ "$(uname -s)" = 'Darwin' ]] && DIRNAME=gdirname || DIRNAME=dirname -if ! (type "$REALPATH" && type "$DIRNAME" && type "$ENGINE") > /dev/null; then - echo "$(basename "${BASH_SOURCE[0]}") requires $ENGINE, $REALPATH and $DIRNAME" - exit 1 -fi -export SCRIPT_PATH="$($DIRNAME $($REALPATH -e "${BASH_SOURCE[0]}"))" - -pushd "$SCRIPT_PATH"/.. >/dev/null 2>&1 - -BUILD_ARGS=() -if [[ -n "$VOSK_MODEL_URL" ]]; then - BUILD_ARGS+=( --build-arg ) - BUILD_ARGS+=( VOSK_MODEL_URL="$VOSK_MODEL_URL" ) -fi -if [[ -n "$WHISPER_MODEL_NAME" ]]; then - BUILD_ARGS+=( --build-arg ) - BUILD_ARGS+=( WHISPER_MODEL_NAME="$WHISPER_MODEL_NAME" ) -fi - -$ENGINE build -f docker/Dockerfile "${BUILD_ARGS[@]}" -t oci.guero.top/monkeyplug . - - - -popd >/dev/null 2>&1 \ No newline at end of file diff --git a/docker/monkeyplug-docker.sh b/docker/monkeyplug-docker.sh index 855cab0..b97ecd8 100755 --- a/docker/monkeyplug-docker.sh +++ b/docker/monkeyplug-docker.sh @@ -1,19 +1,31 @@ #!/usr/bin/env bash -export MONKEYPLUG_IMAGE="${MONKEYPLUG_IMAGE:-oci.guero.top/monkeyplug:small}" -ENGINE="${CONTAINER_ENGINE:-docker}" -if [[ "$ENGINE" == "podman" ]]; then - CONTAINER_PUID=0 - CONTAINER_PGID=0 +MONKEYPLUG_IMAGE="${MONKEYPLUG_IMAGE:-oci.guero.top/monkeyplug:vosk-small}" +CONTAINER_ENGINE="${CONTAINER_ENGINE:-docker}" + +DEVICE_ARGS=() +ENV_ARGS=() +if [[ "$MONKEYPLUG_IMAGE" =~ .*"whisper".* ]]; then + DEVICE_ARGS+=( --gpus ) + DEVICE_ARGS+=( all ) + ENV_ARGS+=( -e ) + ENV_ARGS+=( MONKEYPLUG_MODE=whisper ) + ENV_ARGS+=( -e ) + ENV_ARGS+=( WHISPER_MODEL_NAME=$(echo "$MONKEYPLUG_IMAGE" | sed 's/.*:whisper-//') ) else - CONTAINER_PUID=$(id -u) - CONTAINER_PGID=$(id -g) + ENV_ARGS+=( -e ) + ENV_ARGS+=( MONKEYPLUG_MODE=vosk ) fi +PUID=$([[ "${CONTAINER_ENGINE}" == "podman" ]] && echo 0 || id -u) +PGID=$([[ "${CONTAINER_ENGINE}" == "podman" ]] && echo 0 || id -g) + # run from directory containing audio file -$ENGINE run --rm -t \ - -u $CONTAINER_PUID:$CONTAINER_PGID \ +$CONTAINER_ENGINE run --rm -t \ + "${DEVICE_ARGS[@]}" \ + "${ENV_ARGS[@]}" \ + -u $PUID:$PGID \ -v "$(realpath "${PWD}"):${PWD}" \ -w "${PWD}" \ "$MONKEYPLUG_IMAGE" "$@" diff --git a/src/monkeyplug/monkeyplug.py b/src/monkeyplug/monkeyplug.py index 827745c..607c209 100755 --- a/src/monkeyplug/monkeyplug.py +++ b/src/monkeyplug/monkeyplug.py @@ -326,7 +326,6 @@ def __init__( for line in lines: lineMap = line.split("|") self.swearsMap[scrubword(lineMap[0])] = lineMap[1] if len(lineMap) > 1 else "*****" - mmguero.eprint(self.swearsMap) if self.debug: mmguero.eprint(f'Input: {self.inputFileSpec}')