-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconstants.py
38 lines (32 loc) · 827 Bytes
/
constants.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# Audio parameters (FIXED)
SAMPLE_RATE = 16000
N_FFT = 400
HOP_LENGTH = 160
N_MELS = 80
# Training parameters
BATCH_SIZE = 32
NUM_WORKERS = 16
MAX_EPOCHS = 50
ATTENTION_CONTEXT_SIZE = (80, 3)
# Path
PRETRAINED_ENCODER_WEIGHT = './weights/small_encoder.pt'
BG_NOISE_PATH = ["/path/to/AudioSet", "/path/to/musan", "/path/to/FSDnoisy18k"]
TRAIN_MANIFEST = ["./data/sample.jsonl"]
VAL_MANIFEST = ["./data/sample.jsonl"]
LOG_DIR = './checkpoints'
# Optimizer and scheduler parameters
TOTAL_STEPS = 3000000
WARMUP_STEPS = 2000
LR = 1e-4
MIN_LR = 1e-5
# Tokenizer parameters
VOCAB_SIZE = 1024
TOKENIZER_MODEL_PATH = './utils/tokenizer_spe_bpe_v1024_pad/tokenizer.model'
RNNT_BLANK = 1024
PAD = 1 # tokenizer.pad_id()
# Greedy decoding paramesters
MAX_SYMBOLS = 3
# Whisper-small parameters
N_STATE = 768
N_HEAD = 12
N_LAYER = 12