forked from Wordcab/wordcab-transcribe
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path.env
151 lines (151 loc) · 9.18 KB
/
.env
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# --------------------------------------- ⚙️ WORDCAB TRANSCRIBE CONFIGURATION ---------------------------------------- #
#
# Do not remove this file or any of the variables below.
# You can only modify the values of the variables to customize the configuration of the API.
#
# ---------------------------------------------- GENERAL CONFIGURATION ----------------------------------------------- #
#
# The name of the project, used for API documentation.
PROJECT_NAME="Wordcab Transcribe"
# The version of the project, used for API documentation.
VERSION="0.5.3"
# The description of the project, used for API documentation.
DESCRIPTION="💬 ASR FastAPI server using faster-whisper and Auto-Tuning Spectral Clustering for diarization."
# This API prefix is used for all endpoints in the API outside of the status and cortex endpoints.
API_PREFIX="/api/v1"
# Debug mode for FastAPI. It allows for hot reloading when code changes in development.
DEBUG=True
#
# ----------------------------------------------- MODELS CONFIGURATION ----------------------------------------------- #
#
# ----------------------------------------------------- WHISPER ------------------------------------------------------ #
#
# The whisper_model parameter is used to control the model used for ASR.
#
# Cloud models:
# The available models are: tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2
# large-v3, distil-large-v2, and distil-large-v3. Note that the distil models only support "en" as a source_lang.
# You can try different model size, but you should see a trade-off between performance and speed.
#
# Local models:
# You can also link a local folder path to use a custom model. If you do so, you should also mount the folder in the
# docker run command as a volume, or include the model directory in your Dockerfile to bake it into the image.
# Note that for the default tensorrt-llm whisper engine, the simplest way to get a converted model is to use
# hatch to start the server locally once. Specify the WHISPER_MODEL and ALIGN_MODEL here, then run
# "hatch run runtime:launch" in your terminal. This will download and convert these models.
# You'll then find the converted models in cloned_wordcab_transcribe_repo/src/wordcab_transcribe/whisper_models
# Then in your Dockerfile, copy the converted models to the /app/src/wordcab_transcribe/whisper_models folder.
# Example for WHISPER_MODEL: COPY cloned_wordcab_transcribe_repo/src/wordcab_transcribe/whisper_models/large-v3 /app/src/wordcab_transcribe/whisper_models/large-v3
# Example for ALIGN_MODEL: COPY cloned_wordcab_transcribe_repo/src/wordcab_transcribe/whisper_models/tiny /app/src/wordcab_transcribe/whisper_models/tiny
WHISPER_MODEL="large-v3"
# You can specify one of two engines, "faster-whisper" or "tensorrt-llm". At the moment, "faster-whisper" is more
# stable, adjustable, and accurate, while "tensorrt-llm" is faster but less accurate and adjustable.
WHISPER_ENGINE="tensorrt-llm"
# The align model is used for aligning timestamps under the "tensorrt-llm" engine. The available options are:
# "tiny", "small", "base", or "medium".
ALIGN_MODEL="tiny"
# The compute_type parameter is used to control the precision of the model. You can choose between:
# "int8", "int8_float16", "int8_bfloat16", "int16", "float_16", "bfloat16", "float32" if you're
# whisper engine is set to "fast-whisper". If you're using "tensorrt-llm", keep it at "float16".
# The default value is "float16".
COMPUTE_TYPE="float16"
# The extra_languages parameter is used to control the languages that need an extra model to be loaded.
# You can specify multiple languages separated by a comma.
EXTRA_LANGUAGES=
# This is used to control the parallelism of the tokenizers, but should be set to False for now.
TOKENIZERS_PARALLELISM=False
#
# --------------------------------------------------- DIARIZATION ---------------------------------------------------- #
#
# The diarization_backend parameter is used to control the diarization model used. The available options are:
# "longform-diarizer" or "default-diarizer". It's suggested to use "default-diarizer" for better stability.
# The "longform-diarizer" is still being developed.
DIARIZATION_BACKEND="default-diarizer"
# In a MSDD (Multiscale Diarization Decoder) model, the diarization model is trained on multiple window lengths.
# The window_lengths are specified in seconds, and separated by a comma. If not specified, the default value will
# be "1.5, 1.25, 1.0, 0.75, 0.5".
WINDOW_LENGTHS="2.0,1.5,1.0,0.75,0.5"
# The shift_lengths are specified in seconds, and separated by a comma. If not specified, the default value will
# be "0.75, 0.625, 0.5, 0.375, 0.25".
SHIFT_LENGTHS="1.0,0.75,0.625,0.5,0.25"
# The multiscale_weights are float values separated by a comma. If not specified, the default value will be
# "1.0, 1.0, 1.0, 1.0, 1.0".
MULTISCALE_WEIGHTS="1.0,1.0,1.0,1.0,1.0"
#
# --------------------------------------------------- POST-PROCESSING------------------------------------------------- #
#
# This parameter is used to control the punctuation-based alignment. If set to True, the predicted punctuation
# will be used to adjust speaker diarization. The default value is True, but note this comes with a performance
# tradeoff.
ENABLE_PUNCTUATION_BASED_ALIGNMENT=False
#
# ---------------------------------------------- ASR TYPE CONFIGURATION ---------------------------------------------- #
#
# The asr_type parameter is used to control the type of ASR used. The available options are: `async` or `live`.
# * `async` is the default option. It will process the audio files in batches, and return the results when all the
# files are processed.
# * `live` is the option to use when you want to process a live audio stream. It will process the audio in chunks,
# and return the results as soon as they are available. Live option is still a feature in development.
# * `only_transcription` is used to deploy a single transcription server.
# This option is used when you want to deploy each service in a separate server.
# * `only_diarization` is used to deploy a single diarization server.
# This option is used when you want to deploy each service in a separate server.
# Use `live` only if you need live results, otherwise, use `async`.
ASR_TYPE="async"
#
# --------------------------------------------- ENDPOINTS CONFIGURATION ---------------------------------------------- #
#
# Include the cortex endpoint in the API. This endpoint is used to process audio files from the Cortex API.
# Use this only if you deploy the API using Cortex and Kubernetes.
CORTEX_ENDPOINT=True
#
# ---------------------------------------- API AUTHENTICATION CONFIGURATION ------------------------------------------ #
# The API authentication is used to control the access to the API endpoints.
# It's activated only when the debug mode is set to False.
#
# The username and password are the credentials used to authenticate with the API.
USERNAME="admin"
PASSWORD="admin"
# This openssl_key parameter is used to control the key used to encrypt the access tokens.
# You should absolutely change this value before deploying the API in production.
OPENSSL_KEY="0123456789abcdefghijklmnopqrstuvwyz" # <--- CHANGE THIS VALUE
# This openssl_algorithm parameter is used to control the algorithm used to encrypt the access tokens.
# You should in most case not change this value.
OPENSSL_ALGORITHM="HS256"
# The access_token_expire_minutes parameter is used to control the expiration time of the access tokens.
# You can modify it, it's not a critical parameter. Note that this parameter is in minutes.
ACCESS_TOKEN_EXPIRE_MINUTES=30
#
# ---------------------------------------------- CORTEX CONFIGURATION ------------------------------------------------ #
#
# The cortex_api_key parameter is used to control the API key used to authenticate the requests to the cortex endpoint.
WORDCAB_TRANSCRIBE_API_KEY=
#
# ----------------------------------------------- SVIX CONFIGURATION ------------------------------------------------- #
#
# The svix_api_key parameter is used in the cortex implementation to enable webhooks.
SVIX_API_KEY=
# The svix_app_id parameter is used in the cortex implementation to enable webhooks.
SVIX_APP_ID=
#
# ----------------------------------------------- AWS CONFIGURATION ------------------------------------------------- #
#
AWS_ACCESS_KEY_ID=
AWS_SECRET_ACCESS_KEY=
AWS_STORAGE_BUCKET_NAME=
AWS_REGION_NAME=
#
# -------------------------------------------------- REMOTE SERVERS -------------------------------------------------- #
# The remote servers configuration is used to control the number of servers used to process the requests if you don't
# want to group all the services in one server.
#
# The TRANSCRIBE_SERVER_URLS parameter is used to control the URLs of the servers used to process the requests.
# Each url should be separated by a comma and have this format: "host:port".
# e.g. SERVER_URLS="http://1.2.3.4:8000,http://4.3.2.1:8000"
TRANSCRIBE_SERVER_URLS=
# The DIARIZE_SERVER_URLS parameter is used to control the URLs of the servers used to process the requests.
# Each url should be separated by a comma and have this format: "host:port".
# e.g. SERVER_URLS="http://1.2.3.4:8000,http://4.3.2.1:8000"
DIARIZE_SERVER_URLS=
#
# -------------------------------------------------------------------------------------------------------------------- #