-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
read rttm output from yunitator, and deliver another rttm file with vcm
- Loading branch information
Showing
12 changed files
with
144 additions
and
30 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file was deleted.
Oops, something went wrong.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,41 +1,44 @@ | ||
#!/bin/bash | ||
# runDiarNoisemes.sh | ||
|
||
# run OpenSAT with hard coded models & configs found here and in /vagrant | ||
# assumes Python environment in /home/${user}/ | ||
# usage: runDiarNoisemes.sh <folder containing .wav files to process> | ||
|
||
# Absolute path to this script. /home/user/bin/foo.sh | ||
SCRIPT=$(readlink -f $0) | ||
# Absolute path this script is in. /home/user/bin | ||
BASEDIR=`dirname $SCRIPT` | ||
|
||
|
||
filename=$(basename "$1") | ||
dirname=$(dirname "$1") | ||
extension="${filename##*.}" | ||
basename="${filename%.*}" | ||
|
||
# this is set in user's login .bashrc | ||
export PATH=/home/${USER}/anaconda/bin:$PATH | ||
|
||
# usage introduction | ||
if [ $# -ne 1 ]; then | ||
echo "Usage: runVCM.sh <audiofile>" | ||
exit 1; | ||
fi | ||
|
||
# let's get our bearings: set CWD to path of this script | ||
# direction of scripts and set path | ||
export PATH=/home/${USER}/anaconda/bin:$PATH | ||
SCRIPT=$(readlink -f $0) | ||
BASEDIR=`dirname $SCRIPT` | ||
cd $BASEDIR | ||
echo $BASEDIR | ||
|
||
# make output folder for features, below input folder | ||
mkdir -p $dirname/VCMtemp/ | ||
# check results from Yunitator. If not, run Yunitator first to obtain yunitator_rttm_file | ||
audio_file=$1 | ||
bn=$(basename $audio_file) | ||
dn=$(dirname $audio_file) | ||
yunitator_rttm_file=$dn"/yunitator_"${bn//wav/rttm} # yunicator output | ||
if [ ! -e $yunitator_rttm_file ]; then | ||
echo "Error: Cannot find corresponding SAD outputs. Please run yunicatator first!" | ||
exit 1; | ||
fi | ||
vcm_rttm_file=$dn"/vcm_"${bn//wav/rttm} # vcm output | ||
|
||
|
||
# # make output folder for features, below input folder | ||
# KEEPTEMP=false | ||
# if [ $BASH_ARGV == "--keep-temp" ]; then | ||
# KEEPTEMP=true | ||
# fi | ||
# VCMTEMP=$dn/VCMtemp | ||
# mkdir -p $VCMTEMP | ||
|
||
# do vcm recognition | ||
python2 ./vcm_evaluate.py ${audio_file} ${yunitator_rttm_file} ${vcm_rttm_file} | ||
|
||
echo $dirname/VCMtemp | ||
# # simply remove segmented waves and acoustic features | ||
# if ! $KEEPTEMP; then | ||
# rm -rf $VCMTEMP | ||
# fi | ||
|
||
# first features | ||
./extract-htk-vm2.sh $1 | ||
|
||
# # then confidences | ||
python2 evaluate_vcm.py $dirname/VCMtemp/$basename.htk $dirname/VCMtemp/vcm_$basename.rttm |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
import sys, os, os.path | ||
import torch | ||
from torch.autograd import Variable | ||
from Net import NetLing, NetSyll | ||
try: | ||
import _picke as pickle | ||
except: | ||
import cPickle as pickle | ||
from HTK import HTKFile | ||
import numpy as np | ||
import pandas as pd | ||
import subprocess | ||
import shutil | ||
|
||
|
||
def seg_audio(input_audio, output_audio, onset, duration): | ||
cmd_seg = 'sox ' + input_audio + " " + output_audio + ' trim ' + " " + onset + " " + duration | ||
subprocess.call(cmd_seg, shell=True) | ||
|
||
|
||
def extract_feature(audio, feature): | ||
config = './config/gemaps/eGeMAPSv01a.conf' | ||
opensmile = '~/repos/opensmile-2.3.0/bin/linux_x64_standalone_static/SMILExtract' | ||
# opensmile = '~/tools/opensmile-2.3.0/bin/linux_x64_standalone_static/SMILExtract' | ||
cmd = '{} -C {} -I {} -htkoutput {}'.format(opensmile, config, audio, feature) | ||
subprocess.call(cmd, shell=True) | ||
|
||
|
||
def predict_vcm(model, input, mean_var): | ||
### read normalisation parameters | ||
assert os.path.exists(mean_var) | ||
with open(mean_var, 'rb') as f: | ||
mv = pickle.load(f) | ||
m, v = mv['mean'], mv['var'] | ||
std = lambda feat: (feat - m) / v | ||
|
||
# Load input feature and predict | ||
htk_reader = HTKFile() | ||
htk_reader.load(input) | ||
feat = std(np.array(htk_reader.data)) | ||
input = Variable(torch.from_numpy(feat.astype('float32'))) # .cuda() | ||
output_ling = model(input).data.data.cpu().numpy() | ||
prediction_confidence = output_ling.max() # post propability | ||
|
||
class_names_ling = ['NONL', 'LING'] | ||
cls_ling = np.argmax(output_ling) | ||
predition_vcm = class_names_ling[cls_ling] # prediction | ||
|
||
return predition_vcm, prediction_confidence | ||
|
||
|
||
def main(audio_file, yun_rttm_file, vcm_rttm_file, mean_var, vcm_model): | ||
### check the exist of the temporary folder | ||
tmpdir = os.path.dirname(audio_file) + '/VCMtemp' | ||
assert os.path.exists(tmpdir) | ||
|
||
with open(vcm_rttm_file, 'w+') as vf: | ||
# process each segment one by one. If it is infant vocalisation, do vcm | ||
with open(yun_rttm_file, 'r') as yf: | ||
for line in yf.readlines(): | ||
els = line.split('\t') | ||
file, onset, dur, cls, conf = els[1], els[3], els[4], els[7], els[8] | ||
if 'CHI' in els[7]: | ||
audio_segment = '{}/{}_{}_{}.wav'.format(tmpdir, file.replace('.rttm', ''), onset, dur) | ||
print(audio_segment) | ||
feature_file = audio_segment.replace('wav', 'htk') | ||
|
||
### segment audio file into small subsegments according to the yunitator output | ||
try: | ||
seg_audio(audio_file, audio_segment, onset, dur) | ||
except: | ||
print("Error: Cannot segment the auido: {}, from: {}, length: {}".format(audio_file, onset, dur)) | ||
exit() | ||
|
||
### extract acoustic feature | ||
try: | ||
extract_feature(audio_segment, feature_file) | ||
except: | ||
print("Error: Cannot extract the acoustic features from: {}".format(audio_segment)) | ||
exit() | ||
|
||
### do vcm prediction | ||
try: | ||
vcm_prediction, vcm_confidence = predict_vcm(vcm_model, feature_file, mean_var) | ||
except: | ||
print("Error: Cannot proceed vcm prediction on: {}".format(audio_segment)) | ||
exit() | ||
|
||
### save prediction into rttm file | ||
line = 'SPEAKER\t{}\t1\t{}\t{}\t<NA>\t<NA>\t{}\t{:.2f}\t<NA>\n'.format(file, onset, dur, vcm_prediction, float(vcm_confidence)) | ||
vf.write(line) | ||
|
||
|
||
if __name__ == '__main__': | ||
### global parameters | ||
audio_file = sys.argv[1] # input audio file (daylong recording) | ||
yun_rttm_file = sys.argv[2] # input rttm file, results from yunitator | ||
# audio_file = '/data/work2/DiViMe/vcm/data/example.wav' | ||
# yun_rttm_file = '/data/work2/DiViMe/vcm/data/yunitator_example.rttm' | ||
vcm_rttm_file = yun_rttm_file.replace('yunitator', 'vcm') if len(sys.argv) < 4 else sys.argv[3] | ||
mean_var = './ling.eGeMAPS.func_utt.meanvar' | ||
|
||
### models | ||
net_ling = NetLing(88, 1024, 2) # .cuda() | ||
net_ling.load_state_dict(torch.load('modelLing.pt', map_location=lambda storage, loc: storage)) | ||
# net_syll = NetSyll(88, 1024, 2) #.cuda() | ||
# net_syll.load_state_dict(torch.load('modelSyll.pt', map_location = lambda storage, loc: storage)) | ||
vcm_model = net_ling | ||
|
||
main(audio_file, yun_rttm_file, vcm_rttm_file, mean_var, vcm_model) | ||
|
||
|