config.yaml



## Main class for the model configuration contaning the general hyper parameters for the training of speech enhancement
SE_Config:
    SR : 16000
    L_FRAME : 4096 #2048*2
    L_HOP : 512
    stage : 1
    batchSize : 2
    clean_files_dir : "/home/karan/datasets/edinburgh_se_dataset/clean_trainset_wav"
    noisy_files_dir : "/home/karan/datasets/edinburgh_se_dataset/noisy_trainset_wav"
    clean_files_eval_dir : "/home/karan/datasets/edinburgh_se_dataset/clean_testset_wav_16k"
    noisy_files_eval_dir : "/home/karan/datasets/edinburgh_se_dataset/noisy_testset_wav_16k"
    tuning_clean_files_dir : ""
    tuning_noisy_files_dir : ""
    
    lay1_stream1_bestchkpt_path : "/workspace/coherence_net/checkpoint/Ashwin_SE/layer1/Stream1_epoch250000.pt"
    lay1_stream2_bestchkpt_path : "/workspace/coherence_net/checkpoint/Ashwin_SE/layer1/Stream2_epoch135000.pt"
    lay1_stream3_bestchkpt_path : "/workspace/coherence_net/checkpoint/Ashwin_SE/layer1/Stream3_epoch430000.pt"
    lay1_bestchkpt_path : ""
    
    lay2_stream1_bestchkpt_path : ""
    lay2_stream2_bestchkpt_path : ""
    lay2_stream3_bestchkpt_path : ""
    lay2_bestchkpt_path : ""
    
Coherence_Net_Config:
    NAME : 'Ashwin_SE'
    
    ## Train Config
    checkpoint_path : "checkpoint/"
    LR : 0.0001
    DECAY_RATE : 0.02
    DECAY_STEP : 215000
    FINAL_STEP : 250001
    CKPT_STEP : 25000
    
    train_split : 0.9 # 90 percent 
    
    stream1_steps : 20 # the number of epochs 
    stream2_steps : 20 # the number of epochs
    stream3_steps : 20 # the number of epochs
    stream_integerator_steps : 20 # the number of epochs
    
    ## Layer 1 Config
    layer1:
        num_layers : 2
        num_streams_per_layer : 3    
        
        input_channels : 1   # The number of input channels
        kernel_size : 3      # The kernel size for convolutions
        embedding_dim : 128  # The embedding dim before the dcs
        dcs_num_layers : 3   # number of conv layers in dcs 
        dcs_output_dim : 10  # The output embedding of the dcs blocks
        dcs_dilations : [3, 6, 9] # The dilations in the dcs block the length should be equal to the number of dcs layers
        num_sources : 2      # The number of sources to sperate
        freq_bins : 2049     # the number of input freq bins
        time_frames : 64     # The time frame of each input

        max_pool_kernel : 2  # The max pool layer kernel 
        max_pool_stride : 2  # The max pool layer stride

        stream_integerator_input_channels : 474 #num_streams_per_layer*(embedding_dim + dcs_num_layers*dcs_output_dim)
        stream_integ_embedding : 96
        
        output_channels : 1

    
    ## Layer 2 Config
    layer2:
        num_layers : 2
        num_streams_per_layer : 3
        
        
        kernel_size : 3      # The kernel size for convolutions
        embedding_dim : 128  # The embedding dim before the dcs
        dcs_num_layers : 3   # number of conv layers in dcs 
        dcs_output_dim : 10  # The output embedding of the dcs blocks
        dcs_dilations : [3, 6, 9] # The dilations in the dcs block the length should be equal to the number of dcs layers
        num_sources : 2      # The number of sources to sperate
        freq_bins : 2049     # the number of input freq bins
        time_frames : 64     # The time frame of each input

        max_pool_kernel : 2  # The max pool layer kernel 
        max_pool_stride : 2  # The max pool layer stride

        stream_integerator_input_channels :  474 #num_streams_per_layer*(embedding_dim + dcs_num_layers*dcs_output_dim) 
        
        input_channels : 474  #num_streams_per_layer*(embedding_dim + dcs_num_layers*dcs_output_dim)  # The number of input channels to layer 2 
        stream_integ_embedding : 96
        
        output_channels : 1