Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: init ichigo deployment #154

Open
wants to merge 3 commits into
base: feat/fish-speech-deployment
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions deployments/ichigo/model.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
id: ichigo-wrapper:fp16
model: ichigo-wrapper:fp16
name: Ichigo Wrapper
version: 1

port: 22310
script: src/app.py
log_path: ichigo-wrapper.log
log_level: INFO
environment: ""
command:
- python
depends:
- ichigo-0.4:8b-gguf-q4-km
- whispervq:fp16-linux-amd64
- fish-speech:fp16-linux-amd64
engine: python-engine
extra_params:
device_id: 0
package_dir: ""
whisper_port: 3348
ichigo_port: 39281
fish_speech_port: 22312
ichigo_model: ichigo-0.4:8b-gguf-q4-km
19 changes: 19 additions & 0 deletions deployments/ichigo/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
fastapi
uvicorn[standard]
pydantic
websockets >= 13.0
numpy
g711
torch
torchaudio
torchvision
soundfile
PySoundFile
openai>=0.28
pysilero-vad
aiohttp
pydub
python-multipart
scipy
discord
psutil
67 changes: 67 additions & 0 deletions deployments/ichigo/src/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@

import argparse
import os
import sys
from pathlib import Path

from contextlib import asynccontextmanager

from typing import AsyncGenerator, List

import uvicorn
from dotenv import load_dotenv
from fastapi import APIRouter, FastAPI

from common.utility.logger_utility import LoggerUtility
from services.audio.audio_controller import AudioController
from services.audio.implementation.audio_service import AudioService
from services.health.health_controller import HealthController


def create_app() -> FastAPI:
routes: List[APIRouter] = [
HealthController(),
AudioController()
]
app = FastAPI()
for route in routes:
app.include_router(route)
return app


def parse_argument():
parser = argparse.ArgumentParser(description="Ichigo-wrapper Application")
parser.add_argument('--log_path', type=str,
default='Ichigo-wrapper.log', help='The log file path')
parser.add_argument('--log_level', type=str, default='INFO',
choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'TRACE'], help='The log level')
parser.add_argument('--port', type=int, default=22310,
help='The port to run the Ichigo-wrapper app on')
parser.add_argument('--device_id', type=str, default="0",
help='The port to run the Ichigo-wrapper app on')
parser.add_argument('--package_dir', type=str, default="",
help='The package-dir to be extended to sys.path')
parser.add_argument('--whisper_port', type=int, default=3348,
help='The port of whisper vq model')
parser.add_argument('--ichigo_port', type=int, default=39281,
help='The port of ichigo model')
parser.add_argument('--fish_speech_port', type=int, default=22312,
help='The port of fish speech model')
parser.add_argument('--ichigo_model', type=str, default="ichigo:8b-gguf-q4-km",
help='The ichigo model name')
args = parser.parse_args()
return args


if __name__ == "__main__":
args = parse_argument()
LoggerUtility.init_logger(__name__, args.log_level, args.log_path)

env_path = Path(os.path.dirname(os.path.realpath(__file__))
) / "variables" / ".env"
AudioService.initialize(
args.whisper_port, args.ichigo_port, args.fish_speech_port, args.ichigo_model)
load_dotenv(dotenv_path=env_path)
app: FastAPI = create_app()
print("Server is running at: 0.0.0.0:", args.port)
uvicorn.run(app=app, host="0.0.0.0", port=args.port)
18 changes: 18 additions & 0 deletions deployments/ichigo/src/common/abstract/controller_abstract.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from abc import ABC, abstractmethod

from fastapi import APIRouter


class ControllerAbstract(APIRouter, ABC):
def __init__(self, prefix: str):
super().__init__(prefix=prefix)
self._setup_services()
self._setup_routes()

@abstractmethod
def _setup_services(self) -> None:
pass

@abstractmethod
def _setup_routes(self) -> None:
pass
8 changes: 8 additions & 0 deletions deployments/ichigo/src/common/constant/fastapi_constant.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
class ContentTypeConstant:
application_json: str = "application/json"
text_event_stream: str = "text/event-stream"
audio_wav: str = "audio/wav"

class RestConstant:
post: str = "POST"
get: str = "GET"
5 changes: 5 additions & 0 deletions deployments/ichigo/src/common/constant/tts_constant.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
class TTSConstant:
chunk_length: int = 2048
sample_rate: int = 44100
sample_width: int = 2
channels: int = 1
38 changes: 38 additions & 0 deletions deployments/ichigo/src/common/utility/convert_utility.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import base64


class ConvertUtility:
@staticmethod
def encode_to_base64(byte_data: bytes) -> str:

try:
base64_encoded = base64.b64encode(byte_data).decode('utf-8')
return base64_encoded
except IOError as e:
raise IOError(f"Error reading audio file: {e}")

@staticmethod
def decode_base64(
base64_string: str
) -> bytes:
"""
Decode a base64 string to audio bytes and optionally save to file.

Args:
base64_string (str): Base64 encoded string
output_path (Optional[Union[str, Path]]): Path to save the decoded audio file

Returns:
bytes: Decoded audio bytes

Raises:
ValueError: If the base64 string is invalid
IOError: If there's an error writing the file
"""
try:
audio_bytes = base64.b64decode(base64_string)
return audio_bytes
except base64.binascii.Error as e:
raise ValueError(f"Invalid base64 string: {e}")
except IOError as e:
raise IOError(f"Error writing audio file: {e}")
13 changes: 13 additions & 0 deletions deployments/ichigo/src/common/utility/generator_utility.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import hashlib
import uuid


class GeneratorUtility:

@staticmethod
def generate_uuid_v4(seed: str = "") -> uuid.UUID:
if not seed:
return uuid.uuid4()
hash_object: hashlib._Hash = hashlib.sha256(seed.encode('utf-8'))
hash_bytes: bytes = hash_object.digest()[:16]
return uuid.UUID(bytes=hash_bytes, version=4)
63 changes: 63 additions & 0 deletions deployments/ichigo/src/common/utility/logger_utility.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import logging
from enum import Enum
from typing import ClassVar, Optional

from uvicorn.config import LOGGING_CONFIG


class LoggerUtility:
"""
This class is used to create a logger object.
"""
_logger: ClassVar[logging.Logger] = None

class LogLevel(Enum):
"""
This class is used to define the log level.
"""
DEBUG = logging.DEBUG
INFO = logging.INFO
WARNING = logging.WARNING
ERROR = logging.ERROR
CRITICAL = logging.CRITICAL

@staticmethod
def init_logger(name: str, log_level: LogLevel = LogLevel.INFO, log_file: Optional[str] = None) -> None:
"""
This method is used to initialize the logger.
"""
if LoggerUtility._logger is None:
LoggerUtility._logger = logging.getLogger(name)
LoggerUtility._logger.setLevel(log_level)
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s')
if log_file:
file_handler = logging.FileHandler(log_file)
file_handler.setFormatter(formatter)
LoggerUtility._logger.addHandler(file_handler)
console_handler = logging.StreamHandler()
console_handler.setFormatter(formatter)
LoggerUtility._logger.addHandler(console_handler)

LOGGING_CONFIG["handlers"]["default"] = {
"class": "logging.FileHandler",
"filename": log_file,
"formatter": "default"
}
LOGGING_CONFIG["handlers"]["access"] = {
"class": "logging.FileHandler",
"filename": log_file,
"formatter": "access"
}
LOGGING_CONFIG["loggers"]["uvicorn.error"]["level"] = log_level
LOGGING_CONFIG["loggers"]["uvicorn.access"]["level"] = log_level

@staticmethod
def get_logger() -> logging.Logger:
"""
This method is used to create a logger object.
"""
if LoggerUtility._logger is None:
raise (Exception("Logger is not initialized."))
else:
return LoggerUtility._logger
20 changes: 20 additions & 0 deletions deployments/ichigo/src/services/audio/audio_controller.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from common.abstract.controller_abstract import ControllerAbstract
from common.constant.fastapi_constant import RestConstant
from services.audio.audio_model import AudioModel
from services.audio.implementation.audio_service import AudioService

class AudioController(ControllerAbstract):
_prefix = "/inference"

def __init__(self):
super().__init__(prefix=self._prefix)

def _setup_routes(self):
self.add_api_route("", self.inference,
methods=[RestConstant.post])

def _setup_services(self):
self.audio_service = AudioService.get_audio_service()

async def inference(self, req: AudioModel.AudioCompletionRequest) -> AudioModel.Response:
return await self.audio_service.inference(req)
11 changes: 11 additions & 0 deletions deployments/ichigo/src/services/audio/audio_interface.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from abc import ABC, abstractmethod

from services.audio.audio_model import AudioModel


class AudioInterface(ABC):

@abstractmethod
async def inference(self, req: AudioModel.AudioCompletionRequest) -> AudioModel.Response:
pass

47 changes: 47 additions & 0 deletions deployments/ichigo/src/services/audio/audio_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from dataclasses import dataclass, asdict
from enum import Enum
from pydantic import BaseModel
class AudioFormat(str, Enum):
WAV = "wav" # Supported by both backends
MP3 = "mp3" # Supported by ffmpeg
FLAC = "flac" # Supported by both
AAC = "aac" # Supported by ffmpeg
OGG = "ogg" # Supported by ffmpeg
OPUS = "opus" # Supported by ffmpeg
PCM = "pcm" # Raw PCM data

class WhisperRequest(BaseModel):
data: str
format: AudioFormat = "wav"


class FishSpeechRequest(BaseModel):
text: str
normalize: bool = True
format: str = "wav"
latency: str = "balanced"
max_new_tokens: int = 4096
chunk_length: int = 200
repetition_penalty: float = 1.5
streaming: bool = False

class AudioModel:

class AudioCompletionRequest(BaseModel):
messages: list[dict[str, str]]
input_audio: WhisperRequest
model: str = "ichigo:8b-gguf-q4km"
stream: bool = True
temperature: float = 0.7
top_p: float = 0.9
max_tokens: int = 2048
presence_penalty: float = 0.0
frequency_penalty: float = 0.0
stop: list[str] = ["<|eot_id|>"]
output_audio: bool = True

@dataclass()
class Response:
audio: str
text: str
messages: list[dict[str, str]]
Loading