From 23fa4d7e3890bd080458e43271d463046604c94c Mon Sep 17 00:00:00 2001 From: spicysama Date: Fri, 18 Oct 2024 14:38:18 +0800 Subject: [PATCH] Fix dockerfile for `pyaudio` (#623) * Readmes, deps, api workers * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix speed loss after compiling * revert log * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add dockerfile dep: gcc * Move READMES in subfolder * Fix dockerfile * Fix dockerfile * restore docker setup * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Leng Yue --- .github/workflows/build-docker-image.yml | 2 +- dockerfile | 4 ++++ dockerfile.dev | 4 ++++ tools/msgpack_api.py | 27 +++++++++++++++++++----- 4 files changed, 31 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build-docker-image.yml b/.github/workflows/build-docker-image.yml index 6a2b7eff..28cb7e7d 100644 --- a/.github/workflows/build-docker-image.yml +++ b/.github/workflows/build-docker-image.yml @@ -5,7 +5,7 @@ on: branches: - main tags: - - 'v*' + - "v*" jobs: build: diff --git a/dockerfile b/dockerfile index f22bea82..8cd7834e 100644 --- a/dockerfile +++ b/dockerfile @@ -18,6 +18,10 @@ ARG DEPENDENCIES=" \ libsox-dev \ build-essential \ cmake \ + libasound-dev \ + portaudio19-dev \ + libportaudio2 \ + libportaudiocpp0 \ ffmpeg" RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ diff --git a/dockerfile.dev b/dockerfile.dev index 2d07296e..ac5d18f6 100644 --- a/dockerfile.dev +++ b/dockerfile.dev @@ -17,6 +17,10 @@ ARG TOOLS=" \ openssh-server \ sudo \ protobuf-compiler \ + libasound-dev \ + portaudio19-dev \ + libportaudio2 \ + libportaudiocpp0 \ cmake" RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ diff --git a/tools/msgpack_api.py b/tools/msgpack_api.py index 56fbec30..77e6861e 100644 --- a/tools/msgpack_api.py +++ b/tools/msgpack_api.py @@ -1,8 +1,14 @@ +import os +from argparse import ArgumentParser +from pathlib import Path + import httpx import ormsgpack from tools.commons import ServeReferenceAudio, ServeTTSRequest +api_key = os.environ.get("FISH_API_KEY", "YOUR_API_KEY") + def audio_request(): # priority: ref_id > references @@ -18,6 +24,8 @@ def audio_request(): streaming=True, ) + api_key = os.environ.get("FISH_API_KEY", "YOUR_API_KEY") + with ( httpx.Client() as client, open("hello.wav", "wb") as f, @@ -27,7 +35,7 @@ def audio_request(): "http://127.0.0.1:8080/v1/tts", content=ormsgpack.packb(request, option=ormsgpack.OPT_SERIALIZE_PYDANTIC), headers={ - "authorization": "Bearer YOUR_API_KEY", + "authorization": f"Bearer {api_key}", "content-type": "application/msgpack", }, timeout=None, @@ -36,11 +44,11 @@ def audio_request(): f.write(chunk) -def asr_request(): +def asr_request(audio_path: Path): # Read the audio file with open( - r"D:\PythonProject\fish-speech\.cache\test_audios\prompts\2648200402409733590.wav", + str(audio_path), "rb", ) as audio_file: audio_data = audio_file.read() @@ -57,7 +65,7 @@ def asr_request(): response = client.post( "https://api.fish.audio/v1/asr", headers={ - "Authorization": "Bearer 8eda4aeed2bc4aec9489b3efad003799", + "Authorization": f"Bearer {api_key}", "Content-Type": "application/msgpack", }, content=ormsgpack.packb(request_data), @@ -74,5 +82,14 @@ def asr_request(): print(f"Start time: {segment['start']}, End time: {segment['end']}") +def parse_args(): + parser = ArgumentParser() + parser.add_argument("--audio_path", type=Path, default="audio/ref/trump.mp3") + + return parser.parse_args() + + if __name__ == "__main__": - asr_request() + args = parse_args() + + asr_request(args.audio_path)