Skip to content

Commit

Permalink
read rttm output from yunitator, and deliver another rttm file with vcm
Browse files Browse the repository at this point in the history
  • Loading branch information
MilesICL committed Dec 3, 2018
1 parent a813d5f commit d1ad811
Show file tree
Hide file tree
Showing 12 changed files with 144 additions and 30 deletions.
Empty file modified HTK.py
100644 → 100755
Empty file.
4 changes: 2 additions & 2 deletions Net.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def forward(self, x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return F.log_softmax(x, dim=1)
return F.softmax(x, dim=1)


class NetSyll(nn.Module):
Expand All @@ -29,4 +29,4 @@ def forward(self, x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return F.log_softmax(x, dim=1)
return F.softmax(x, dim=1)
Binary file removed data/VCMtemp/example1_0.htk
Binary file not shown.
1 change: 0 additions & 1 deletion data/VCMtemp/example1_0.rttm_sorted

This file was deleted.

Binary file removed data/example1_0.wav
Binary file not shown.
Binary file removed data/example2_1.wav
Binary file not shown.
Binary file removed data/example3_0.wav
Binary file not shown.
Binary file removed data/example4_1.wav
Binary file not shown.
Empty file modified evaluate_vcm.py
100644 → 100755
Empty file.
57 changes: 30 additions & 27 deletions runVCM.sh
Original file line number Diff line number Diff line change
@@ -1,41 +1,44 @@
#!/bin/bash
# runDiarNoisemes.sh

# run OpenSAT with hard coded models & configs found here and in /vagrant
# assumes Python environment in /home/${user}/
# usage: runDiarNoisemes.sh <folder containing .wav files to process>

# Absolute path to this script. /home/user/bin/foo.sh
SCRIPT=$(readlink -f $0)
# Absolute path this script is in. /home/user/bin
BASEDIR=`dirname $SCRIPT`


filename=$(basename "$1")
dirname=$(dirname "$1")
extension="${filename##*.}"
basename="${filename%.*}"

# this is set in user's login .bashrc
export PATH=/home/${USER}/anaconda/bin:$PATH

# usage introduction
if [ $# -ne 1 ]; then
echo "Usage: runVCM.sh <audiofile>"
exit 1;
fi

# let's get our bearings: set CWD to path of this script
# direction of scripts and set path
export PATH=/home/${USER}/anaconda/bin:$PATH
SCRIPT=$(readlink -f $0)
BASEDIR=`dirname $SCRIPT`
cd $BASEDIR
echo $BASEDIR

# make output folder for features, below input folder
mkdir -p $dirname/VCMtemp/
# check results from Yunitator. If not, run Yunitator first to obtain yunitator_rttm_file
audio_file=$1
bn=$(basename $audio_file)
dn=$(dirname $audio_file)
yunitator_rttm_file=$dn"/yunitator_"${bn//wav/rttm} # yunicator output
if [ ! -e $yunitator_rttm_file ]; then
echo "Error: Cannot find corresponding SAD outputs. Please run yunicatator first!"
exit 1;
fi
vcm_rttm_file=$dn"/vcm_"${bn//wav/rttm} # vcm output


# # make output folder for features, below input folder
# KEEPTEMP=false
# if [ $BASH_ARGV == "--keep-temp" ]; then
# KEEPTEMP=true
# fi
# VCMTEMP=$dn/VCMtemp
# mkdir -p $VCMTEMP

# do vcm recognition
python2 ./vcm_evaluate.py ${audio_file} ${yunitator_rttm_file} ${vcm_rttm_file}

echo $dirname/VCMtemp
# # simply remove segmented waves and acoustic features
# if ! $KEEPTEMP; then
# rm -rf $VCMTEMP
# fi

# first features
./extract-htk-vm2.sh $1

# # then confidences
python2 evaluate_vcm.py $dirname/VCMtemp/$basename.htk $dirname/VCMtemp/vcm_$basename.rttm
Empty file modified structNN.py
100644 → 100755
Empty file.
112 changes: 112 additions & 0 deletions vcm_evaluate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import sys, os, os.path
import torch
from torch.autograd import Variable
from Net import NetLing, NetSyll
try:
import _picke as pickle
except:
import cPickle as pickle
from HTK import HTKFile
import numpy as np
import pandas as pd
import subprocess
import shutil


def seg_audio(input_audio, output_audio, onset, duration):
cmd_seg = 'sox ' + input_audio + " " + output_audio + ' trim ' + " " + onset + " " + duration
subprocess.call(cmd_seg, shell=True)


def extract_feature(audio, feature):
config = './config/gemaps/eGeMAPSv01a.conf'
opensmile = '~/repos/opensmile-2.3.0/bin/linux_x64_standalone_static/SMILExtract'
# opensmile = '~/tools/opensmile-2.3.0/bin/linux_x64_standalone_static/SMILExtract'
cmd = '{} -C {} -I {} -htkoutput {}'.format(opensmile, config, audio, feature)
subprocess.call(cmd, shell=True)


def predict_vcm(model, input, mean_var):
### read normalisation parameters
assert os.path.exists(mean_var)
with open(mean_var, 'rb') as f:
mv = pickle.load(f)
m, v = mv['mean'], mv['var']
std = lambda feat: (feat - m) / v

# Load input feature and predict
htk_reader = HTKFile()
htk_reader.load(input)
feat = std(np.array(htk_reader.data))
input = Variable(torch.from_numpy(feat.astype('float32'))) # .cuda()
output_ling = model(input).data.data.cpu().numpy()
prediction_confidence = output_ling.max() # post propability

class_names_ling = ['NONL', 'LING']
cls_ling = np.argmax(output_ling)
predition_vcm = class_names_ling[cls_ling] # prediction

return predition_vcm, prediction_confidence


def main(audio_file, yun_rttm_file, vcm_rttm_file, mean_var, vcm_model):
### check the exist of the temporary folder
tmpdir = os.path.dirname(audio_file) + '/VCMtemp'
assert os.path.exists(tmpdir)

with open(vcm_rttm_file, 'w+') as vf:
# process each segment one by one. If it is infant vocalisation, do vcm
with open(yun_rttm_file, 'r') as yf:
for line in yf.readlines():
els = line.split('\t')
file, onset, dur, cls, conf = els[1], els[3], els[4], els[7], els[8]
if 'CHI' in els[7]:
audio_segment = '{}/{}_{}_{}.wav'.format(tmpdir, file.replace('.rttm', ''), onset, dur)
print(audio_segment)
feature_file = audio_segment.replace('wav', 'htk')

### segment audio file into small subsegments according to the yunitator output
try:
seg_audio(audio_file, audio_segment, onset, dur)
except:
print("Error: Cannot segment the auido: {}, from: {}, length: {}".format(audio_file, onset, dur))
exit()

### extract acoustic feature
try:
extract_feature(audio_segment, feature_file)
except:
print("Error: Cannot extract the acoustic features from: {}".format(audio_segment))
exit()

### do vcm prediction
try:
vcm_prediction, vcm_confidence = predict_vcm(vcm_model, feature_file, mean_var)
except:
print("Error: Cannot proceed vcm prediction on: {}".format(audio_segment))
exit()

### save prediction into rttm file
line = 'SPEAKER\t{}\t1\t{}\t{}\t<NA>\t<NA>\t{}\t{:.2f}\t<NA>\n'.format(file, onset, dur, vcm_prediction, float(vcm_confidence))
vf.write(line)


if __name__ == '__main__':
### global parameters
audio_file = sys.argv[1] # input audio file (daylong recording)
yun_rttm_file = sys.argv[2] # input rttm file, results from yunitator
# audio_file = '/data/work2/DiViMe/vcm/data/example.wav'
# yun_rttm_file = '/data/work2/DiViMe/vcm/data/yunitator_example.rttm'
vcm_rttm_file = yun_rttm_file.replace('yunitator', 'vcm') if len(sys.argv) < 4 else sys.argv[3]
mean_var = './ling.eGeMAPS.func_utt.meanvar'

### models
net_ling = NetLing(88, 1024, 2) # .cuda()
net_ling.load_state_dict(torch.load('modelLing.pt', map_location=lambda storage, loc: storage))
# net_syll = NetSyll(88, 1024, 2) #.cuda()
# net_syll.load_state_dict(torch.load('modelSyll.pt', map_location = lambda storage, loc: storage))
vcm_model = net_ling

main(audio_file, yun_rttm_file, vcm_rttm_file, mean_var, vcm_model)


0 comments on commit d1ad811

Please sign in to comment.