-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathadd_aug_data.py
34 lines (29 loc) · 1.16 KB
/
add_aug_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import pandas as pd
import os
import shutil
hindi_start = 35
start_num = 2943
hindi_new_data = []
three_langs = pd.read_csv('Data/three_languages.csv')
all_langs = pd.read_csv('Data/final_data.csv')
hindi_extra = os.listdir('WavFormat/andthis')
for file in hindi_extra:
info = {'speakerid': start_num, 'filename': 1, 'birthplace': 2, 'native_language': 3, 'sex': 4}
start_num += 1
file_name = file.split('_')[0]
new_file_name = f'hindi{hindi_start}'
row_num = all_langs[all_langs['filename'] == file_name].index[0]
info['filename'] = new_file_name
hindi_start += 1
info['birthplace'] = all_langs.loc[row_num].birthplace
info['native_language'] = all_langs.loc[row_num].native_language
info['sex'] = all_langs.loc[row_num].sex
hindi_new_data.append(info)
shutil.move(f'WavFormat/andthis/{file}',f'WavFormat/{new_file_name}.wav')
three_langs.pop('Unnamed: 0')
four_langs = three_langs.copy()
hindi_old = all_langs[all_langs['native_language'] == 'hindi']
hindi_old.pop('Unnamed: 0')
new_hindi = pd.DataFrame(hindi_new_data)
final_data = pd.concat([four_langs, hindi_old, new_hindi], axis=0)
final_data.to_csv('Data/four_languages.csv')