๋ชจ๋ธ 1์์ ๊ธฐ๋ฅ 3์ผ๋ก ๋ถ๋ฅ(์นดํ ๊ณ ๋ฆฌ :3) ์ธ ๊ฒฝ์ฐ ํด๋น ๋ชจ๋ธ์์ ์ญ๋ณ ๋ถ๋ฅ ์์ ์ํ
[๋ณ๊ฒฝ์ฌํญ]
- ํผ์ฒ๋ฅผ ์ต๋ํ ๋ง์ด ์์ฑํ์ฌ ๋ฐ์ดํฐ๋ฅผ ๊ฐ๊ณตํจ -> drop out ํจ๊ณผ ๊ทน๋ํ ๋ฐ ๊ณผ์ ํฉ ๋ฐฉ์ง, ์ ํ๋ ํฅ์
- ์ด์ ๊ธฐ๋ฅ ๋ณ ๋ถ๋ฆฌ๋์๋ ๋ชจ๋ธ(ver 2.*) ์ ํตํฉํจ
- ๊ฐ ๋ฐ์ดํฐ ๋ณ 193 ํผ์ฒ ์ถ์ถ
- row ํต์ผ ์ํจ (3~4sec)
- ์ํซ์ธ์ฝ๋ฉ ์ํจ
- ๋ผ๋ฒจ์ 1์ฐจ์ ๋ฐฐ์ด๋ก ๋ณ๊ฒฝ -> ์นดํ ๊ณ ๋ฆฌ ๋ณ int๊ฐ ์ถ๋ ฅํ๊ฒ ํจ
[๊ธฐ์กด๊ณผ ๋์ผ]
- ๊ธฐ๋ฅ 1,2์ ๋ํด ์ฌ์ฉ ๋ฐ์ดํฐ๋ ๋ด์๋ํ๊ต MARL์ URBANSOUND8K DATASET ์ผ๋ถ์ ์ผ์ ์ํ์์ ๋ น์ํ ๋ น์ ํ์ผ(.wav)๋ฅผ ํ์ฉ (2,622๊ฐ, 1.96G)
- ๊ธฐ๋ฅ 3์ ๋ํด ํ์น์ญ ์๋ฆผ์์ ํธ๋ฆฌ๊ฑฐ๋ก ์ ์ฉ (894๊ฐ, 0.71G)
import numpy as np
import pandas as pd
#wav ํ์ผ๋ค์ ํผ์ฒ ์์ฑ
#librosa ์ฌ์ฉ
#์ฌ์ฉ ํน์ฑ์ mfcc, chroma_stft, melspectorgram, spectral_contrast, tonnetz๋ก ์ด193
#๋ฅ๋ฌ๋ ๋ชจ๋ธ๋ง ์ฌ์ฉํ ์์ -> ํผ์ฒ ์ถ์ ์๋ต
import glob
import librosa
# ์ค๋์ค ๋ถ๋ฌ์ค๊ธฐ + ํผ์ณ ์์ฑ
# ํผ์ณ 193๊ฐ
# row ํต์ผ ์์ํด
def extract_feature(file_name):
X, sample_rate = librosa.load(file_name)
stft = np.abs(librosa.stft(X))
mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
return mfccs,chroma,mel,contrast,tonnetz
#๋ฐ์ดํฐ ๊ฐ๊ณต
#ํ๋ ฌ๋ก ๋ณํ
def parse_audio_files(filenames):
rows = len(filenames)
# feature๋ ๊ฐ ํ์ผ ๋ณ row(window) * ํผ์ฒ ์ 2์ฐจ์ ํ๋ ฌ
# labels์ ํ์ผ ๋ณ ์นดํ
๊ณ ๋ฆฌ int ๊ฐ
features, labels = np.zeros((rows,193)), np.zeros((rows, 1))
i = 0
for fn in filenames:
try:
mfccs, chroma, mel, contrast,tonnetz = extract_feature(fn)
ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
y_col = int(fn.split('-')[0])
except:
print("error : "+fn)
else:
features[i] = ext_features
labels[i] = y_col
print(y_col)
i += 1
return features, labels
audio_files = []
#0 : ์ฌ์ด๋
#1 : ์๋์ฐจ๊ฐ ๋ค๊ฐ์ค๋ ์๋ฆฌ(์์ง์๋ฆฌ)
#2 : ์๋์ฐจ ๊ฒฝ์ ์๋ฆฌ
#4 : ํ์น์ญ ์๋ด์
audio_files.extend(glob.glob('*.wav'))
print(len(audio_files))
files = audio_files
X, y= parse_audio_files(files)
#?.npz
np.savez('data', X=X, y=y)
[๋ณ๊ฒฝ์ฌํญ]
[๊ธฐ์กด๊ณผ ๋์ผ]
- data.npz ๋ถ๋ฌ์ค๊ธฐ
import glob
import librosa
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# ์๋ฆผ, ์ฐจ๋ ์์ง, ์ฐจ๋ ๊ฒฝ์ , ์งํ์ฒ ํธ๋ฆฌ๊ฑฐ ์
sound_data = np.load('model_1.npz')
X_train = sound_data['X']
y_train = sound_data['y']
X_train.shape, y_train.shape
X_train.shape, y_train.shape
[๋ณ๊ฒฝ์ฌํญ]
- lstm ์ฌ์ฉ
- ์ด์ ๋ชจ๋ธ์ ๊ฒฝ์ฐ ํ๋ผ๋ฏธํฐ ์กฐ์ ์ ์ด์ ์ ๋์์ผ๋ ver 3.5์์๋ layer ๊ตฌ์ฑ์ ์ด์ ์ ๋์ด ์งํ
[๊ธฐ์กด๊ณผ ๋์ผ]
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
import numpy as np
import matplotlib.pyplot as plt
import os
from keras import models
from keras import layers
from keras.layers import *
from keras import optimizers
from keras.layers import LSTM
from keras.models import Sequential
from keras.layers import Dense
import keras.backend as K
from keras.callbacks import EarlyStopping
K.clear_session()
model = Sequential() # Sequeatial Model
model.add(LSTM(20, input_shape=(193, 1))) # (timestep, feature)
model.add(Dense(1)) # output = 1
model.compile(loss='mean_squared_error', optimizer='adam')
model.summary()
#X_train = X_train.values
X_train = X_train.reshape(X_train.shape[0], 193, 1)
early_stop = EarlyStopping(monitor='loss', patience=1, verbose=1)
model.fit(X_train, y_train, epochs=100,
batch_size=30, verbose=1, callbacks=[early_stop])
[๋ณ๊ฒฝ์ฌํญ]
- pkl, json, pb, tflite๋ก ์ ์ฅ
[๊ธฐ์กด๊ณผ ๋์ผ]
# ๋ชจ๋ธ pkl๋ก ์ ์ฅํ๊ธฐ
import joblib
joblib.dump(model, 'model/pkl/model_1.pkl')
# ๋ชจ๋ธ json์ผ๋ก ์ ์ฅํ๊ธฐ
model_1 = model.to_json()
# model = model_from_json(json_string)
# ๋ชจ๋ธ h5๋ก ์ ์ฅํ๊ธฐ
from keras.models import load_model
model.save('model/h5/model_1')
model.save('model/h5/model_1.h5')
# ๋ชจ๋ธ pb๋ก ์ ์ฅํ๊ธฐ
model = keras.models.load_model('model/h5/model_1', compile=False)
model.save('model/pb/',save_format=tf)
#๋ชจ๋ธ tflite ๋ก ์ ์ฅํ๊ธฐ
saved_model_dir='model/pb/'
converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
converter.target_spec.supported_ops=[tf.lite.OpsSet.TFLITE_BUILTINS,
tf.lite.OpsSet.SELECT_TF_OPS]
tfilte_mode=converter.convert()
open('model/tflite/model_1.tflite','wb').write(ftlite_model)
[๋ณ๊ฒฝ์ฌํญ]
- 10์ด๋ก ์ ํ
[๊ธฐ์กด๊ณผ ๋์ผ]
- ๋ธ๋ฃจํฌ์ค ์ด์ดํฐ์ ์ธ๋ถ ๋ง์ดํฌ ์ฌ์ฉ (์ฑ๋ ์กฐ์ )
import librosa
import scipy.signal as signal
import numpy as np
import pandas as pd
import joblib
#ํ๊ฒฝ ํ์ธ
import pyaudio
import wave
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
CHUNK = 1024
RECORD_SECONDS = 10
WAVE_OUTPUT_FILENAME = "test_file.wav"
audio = pyaudio.PyAudio()
# start Recording
stream = audio.open(format=pyaudio.paInt16,
channels=CHANNELS,
rate=RATE,
input=True,
input_device_index=1,
frames_per_buffer=CHUNK)
print ("recording...")
frames = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data)
print ("finished recording")
stream.stop_stream()
stream.close()
audio.terminate()
waveFile = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
waveFile.setnchannels(CHANNELS)
waveFile.setsampwidth(audio.get_sample_size(FORMAT))
waveFile.setframerate(RATE)
waveFile.writeframes(b''.join(frames))
waveFile.close()
[๋ณ๊ฒฝ์ฌํญ]
- ๋ชจ๋ํ ํ ๊ฒ
[๊ธฐ์กด๊ณผ ๋์ผ]
import numpy as np
import pandas as pd
import glob
def extract_feature(file_name):
X, sample_rate = librosa.load(file_name)
stft = np.abs(librosa.stft(X))
mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
return mfccs,chroma,mel,contrast,tonnetz
def parse_audio_files(filenames):
rows = len(filenames)
# feature๋ ๊ฐ ํ์ผ ๋ณ row(window) * ํผ์ฒ ์ 2์ฐจ์ ํ๋ ฌ
# labels์ ํ์ผ ๋ณ ์นดํ
๊ณ ๋ฆฌ int ๊ฐ
features = np.zeros((rows,193))
i = 0
for fn in filenames:
try:
mfccs, chroma, mel, contrast,tonnetz = extract_feature(fn)
ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
except:
print("error : "+fn)
else:
features[i] = ext_features
print("์ฑ๊ณต")
i += 1
return features
audio_files = []
audio_files.extend(glob.glob(WAVE_OUTPUT_FILENAME))
print(len(audio_files))
files = audio_files
X_test = parse_audio_files(files)
[๋ณ๊ฒฝ์ฌํญ]
[๊ธฐ์กด๊ณผ ๋์ผ]
- 0 : ์ฌ์ด๋ , ๋ฏผ๋ฐฉ์ ๋ฑ ์๋ฆผ์
- 1,2 : ์ฐจ๋ ๊ฒฝ์ , ์์ง์๋ฆฌ
- 3 : ํ์น ํธ๋ฆฌ๊ฑฐ
X_test = X_test.reshape(X_test.shape[0], 193, 1)
model = joblib.load('model_2.pkl')
pred = model.predict_proba(X_test)
ans = float(pred)
print(pred)
ans = round(float(pred))
de_label = pd.read_csv('de_train_label.csv', engine='python', index_col = None)
print("์ด๋ฒ ์ญ์ "+de_label['name'][ans]+"์ญ ์
๋๋ค.")