Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

whispervq deployment #151

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,6 @@ old_train.py
*.ipynb
__pycache__
.ipynb_checkpoints
outputs
outputs
*.pt
*.model
3 changes: 3 additions & 0 deletions deployments/whispervq/metadata.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
version: 1
name: whispervq
default: fp16
17 changes: 17 additions & 0 deletions deployments/whispervq/model.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
id: whispervq:fp16
model: whispervq:fp16
name: Ichigo WhisperVQ
version: 1

engine: python-engine

extra_params:
device_id: 0
package_dir: ""

port: 3348
script: src/app.py
log_path: whisper.log
log_level: INFO
command:
- python
24 changes: 24 additions & 0 deletions deployments/whispervq/requirements.cuda.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
openai-whisper==20231117
huggingface_hub
IPython
pyarrow
matplotlib
librosa
soundfile
datasets
encodec
boto3
fire
vector_quantize_pytorch
webdataset
whisperspeech
--extra-index-url https://download.pytorch.org/whl/cu121
torch==2.2.0
torchaudio==2.2.0
numpy==1.26.4
fastapi
uvicorn

python-multipart
transformers
psutil
23 changes: 23 additions & 0 deletions deployments/whispervq/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
openai-whisper==20231117
huggingface_hub
IPython
pyarrow
matplotlib
librosa
soundfile
datasets
encodec
boto3
fire
vector_quantize_pytorch
webdataset
whisperspeech
torch==2.2.0
torchaudio==2.2.0
numpy==1.26.4
fastapi
uvicorn

python-multipart
transformers
psutil
62 changes: 62 additions & 0 deletions deployments/whispervq/src/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@

import argparse
import os
from contextlib import asynccontextmanager
from pathlib import Path
from typing import AsyncGenerator, List

import uvicorn
from dotenv import load_dotenv
from fastapi import APIRouter, FastAPI

from common.utility.logger_utility import LoggerUtility
from services.audio.audio_controller import AudioController
from services.audio.implementation.audio_service import AudioService
from services.health.health_controller import HealthController


@asynccontextmanager
async def application_lifecycle(app: FastAPI) -> AsyncGenerator[None, None]:
try:
AudioService.get_audio_service()
except Exception as e:
LoggerUtility.get_logger().error(f"Error initializing audio service: {e}")
raise e
yield


def create_app() -> FastAPI:
routes: List[APIRouter] = [
HealthController(),
AudioController()
]
app = FastAPI(lifespan=application_lifecycle)
for route in routes:
app.include_router(route)
return app


def parse_argument():
parser = argparse.ArgumentParser(description="WhisperVQ Application")
parser.add_argument('--log_path', type=str,
default='whisper.log', help='The log file path')
parser.add_argument('--log_level', type=str, default='INFO',
choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'TRACE'], help='The log level')
parser.add_argument('--port', type=int, default=3348,
help='The port to run the WhisperVQ app on')
parser.add_argument('--device_id', type=str, default="0",
help='The port to run the WhisperVQ app on')
parser.add_argument('--package_dir', type=str, default="",
help='The package-dir to be extended to sys.path')
return parser.parse_args()


if __name__ == "__main__":
args = parse_argument()
LoggerUtility.init_logger(__name__, args.log_level, args.log_path)

env_path = Path(os.path.dirname(os.path.realpath(__file__))) / "variables" / ".env"
load_dotenv(dotenv_path=env_path)
app: FastAPI = create_app()
print("Server is running at: 0.0.0.0:", args.port)
uvicorn.run(app=app, host="0.0.0.0", port=args.port)
18 changes: 18 additions & 0 deletions deployments/whispervq/src/common/abstract/controller_abstract.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from abc import ABC, abstractmethod

from fastapi import APIRouter


class ControllerAbstract(APIRouter, ABC):
def __init__(self, prefix: str):
super().__init__(prefix=prefix)
self._setup_services()
self._setup_routes()

@abstractmethod
def _setup_services(self) -> None:
pass

@abstractmethod
def _setup_routes(self) -> None:
pass
8 changes: 8 additions & 0 deletions deployments/whispervq/src/common/constant/fastapi_constant.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
class ContentTypeConstant:
application_json: str = "application/json"
text_event_stream: str = "text/event-stream"
audio_wav: str = "audio/wav"

class RestConstant:
post: str = "POST"
get: str = "GET"
38 changes: 38 additions & 0 deletions deployments/whispervq/src/common/utility/convert_utility.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import base64


class ConvertUtility:
@staticmethod
def encode_to_base64(byte_data: bytes) -> str:

try:
base64_encoded = base64.b64encode(byte_data).decode('utf-8')
return base64_encoded
except IOError as e:
raise IOError(f"Error reading audio file: {e}")

@staticmethod
def decode_base64(
base64_string: str
) -> bytes:
"""
Decode a base64 string to audio bytes and optionally save to file.

Args:
base64_string (str): Base64 encoded string
output_path (Optional[Union[str, Path]]): Path to save the decoded audio file

Returns:
bytes: Decoded audio bytes

Raises:
ValueError: If the base64 string is invalid
IOError: If there's an error writing the file
"""
try:
audio_bytes = base64.b64decode(base64_string)
return audio_bytes
except base64.binascii.Error as e:
raise ValueError(f"Invalid base64 string: {e}")
except IOError as e:
raise IOError(f"Error writing audio file: {e}")
13 changes: 13 additions & 0 deletions deployments/whispervq/src/common/utility/generator_utility.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import hashlib
import uuid


class GeneratorUtility:

@staticmethod
def generate_uuid_v4(seed: str = "") -> uuid.UUID:
if not seed:
return uuid.uuid4()
hash_object: hashlib._Hash = hashlib.sha256(seed.encode('utf-8'))
hash_bytes: bytes = hash_object.digest()[:16]
return uuid.UUID(bytes=hash_bytes, version=4)
63 changes: 63 additions & 0 deletions deployments/whispervq/src/common/utility/logger_utility.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import logging
from enum import Enum
from typing import ClassVar, Optional

from uvicorn.config import LOGGING_CONFIG


class LoggerUtility:
"""
This class is used to create a logger object.
"""
_logger: ClassVar[logging.Logger] = None

class LogLevel(Enum):
"""
This class is used to define the log level.
"""
DEBUG = logging.DEBUG
INFO = logging.INFO
WARNING = logging.WARNING
ERROR = logging.ERROR
CRITICAL = logging.CRITICAL

@staticmethod
def init_logger(name: str, log_level: LogLevel = LogLevel.INFO, log_file: Optional[str] = None) -> None:
"""
This method is used to initialize the logger.
"""
if LoggerUtility._logger is None:
LoggerUtility._logger = logging.getLogger(name)
LoggerUtility._logger.setLevel(log_level)
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s')
if log_file:
file_handler = logging.FileHandler(log_file)
file_handler.setFormatter(formatter)
LoggerUtility._logger.addHandler(file_handler)
console_handler = logging.StreamHandler()
console_handler.setFormatter(formatter)
LoggerUtility._logger.addHandler(console_handler)

LOGGING_CONFIG["handlers"]["default"] = {
"class": "logging.FileHandler",
"filename": log_file,
"formatter": "default"
}
LOGGING_CONFIG["handlers"]["access"] = {
"class": "logging.FileHandler",
"filename": log_file,
"formatter": "access"
}
LOGGING_CONFIG["loggers"]["uvicorn.error"]["level"] = log_level
LOGGING_CONFIG["loggers"]["uvicorn.access"]["level"] = log_level

@staticmethod
def get_logger() -> logging.Logger:
"""
This method is used to create a logger object.
"""
if LoggerUtility._logger is None:
raise (Exception("Logger is not initialized."))
else:
return LoggerUtility._logger
23 changes: 23 additions & 0 deletions deployments/whispervq/src/services/audio/audio_controller.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@

from common.abstract.controller_abstract import ControllerAbstract
from common.constant.fastapi_constant import RestConstant
from services.audio.audio_model import AudioModel
from services.audio.implementation.audio_service import AudioService


class AudioController(ControllerAbstract):

_prefix = "/inference"

def __init__(self):
super().__init__(prefix=self._prefix)

def _setup_routes(self):
self.add_api_route("", self.inference,
methods=[RestConstant.post])

def _setup_services(self):
self.audio_service = AudioService.get_audio_service()

async def inference(self, req: AudioModel.Request) -> AudioModel.Response:
return await self.audio_service.inference(req)
11 changes: 11 additions & 0 deletions deployments/whispervq/src/services/audio/audio_interface.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@

from abc import ABC, abstractmethod

from services.audio.audio_model import AudioModel


class AudioInterface(ABC):

@abstractmethod
async def inference(self, req: AudioModel.Request) -> AudioModel.Response:
pass
35 changes: 35 additions & 0 deletions deployments/whispervq/src/services/audio/audio_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from dataclasses import dataclass
from enum import Enum


class AudioFormat(str, Enum):
WAV = "wav" # Supported by both backends
MP3 = "mp3" # Supported by ffmpeg
FLAC = "flac" # Supported by both
AAC = "aac" # Supported by ffmpeg
OGG = "ogg" # Supported by ffmpeg
OPUS = "opus" # Supported by ffmpeg
PCM = "pcm" # Raw PCM data


class AudioModel:
FORMAT_BACKENDS = {
AudioFormat.WAV: ["soundfile", "ffmpeg"],
AudioFormat.MP3: ["ffmpeg"],
AudioFormat.FLAC: ["soundfile", "ffmpeg"],
AudioFormat.AAC: ["ffmpeg"],
AudioFormat.OGG: ["ffmpeg"],
AudioFormat.OPUS: ["ffmpeg"],
AudioFormat.PCM: ["soundfile"]
}

@dataclass
class Request:
data: str
format: AudioFormat = "wav"

@dataclass
class Response:
tokens: str
sample_rate: int
format: AudioFormat
Loading