Skip to content

chuckchuck-gojol/model_2

Folders and files

NameName
Last commit message
Last commit date

Latest commit

ย 

History

7 Commits
ย 
ย 
ย 
ย 
ย 
ย 
ย 
ย 
ย 
ย 

Repository files navigation

๋ชจ๋ธ 2 - ๊ธฐ๋Šฅ 3์— ๋Œ€ํ•ด ์ถ”๊ฐ€ ์ฒ˜๋ฆฌ ๊ฐ€๋Šฅํ•œ ๋ชจ๋ธ2

๋ชจ๋ธ 2๋Š” ๋ชจ๋ธ 1์—์„œ ๊ธฐ๋Šฅ 3 ์ง€ํ•˜์ฒ  ํŠธ๋ฆฌ๊ฑฐ ์ธ์‹ ์‹œ ์ž‘๋™๋˜๋Š” ๋ชจ๋ธ์ž„

๋ชจ๋ธ 1์—์„œ ๊ธฐ๋Šฅ 3์œผ๋กœ ๋ถ„๋ฅ˜(์นดํ…Œ๊ณ ๋ฆฌ :3) ์ธ ๊ฒฝ์šฐ ํ•ด๋‹น ๋ชจ๋ธ์—์„œ ์—ญ๋ณ„ ๋ถ„๋ฅ˜ ์ž‘์—… ์ˆ˜ํ–‰

์ตœ์ข… ์ˆ˜์ •์ผ : 20-05-27


1. ๋ฐ์ดํ„ฐ ์ „์ฒ˜๋ฆฌ

1.1 wav ํŒŒ์ผ ๋กœ๋”ฉ ๋ฐ ํ”ผ์ฒ˜ ์ƒ์„ฑ

[๋ณ€๊ฒฝ์‚ฌํ•ญ]

  • ํ”ผ์ฒ˜๋ฅผ ์ตœ๋Œ€ํ•œ ๋งŽ์ด ์ƒ์„ฑํ•˜์—ฌ ๋ฐ์ดํ„ฐ๋ฅผ ๊ฐ€๊ณตํ•จ -> drop out ํšจ๊ณผ ๊ทน๋Œ€ํ™” ๋ฐ ๊ณผ์ ํ•ฉ ๋ฐฉ์ง€, ์ •ํ™•๋„ ํ–ฅ์ƒ
  • ์ด์ „ ๊ธฐ๋Šฅ ๋ณ„ ๋ถ„๋ฆฌ๋˜์—ˆ๋˜ ๋ชจ๋ธ(ver 2.*) ์„ ํ†ตํ•ฉํ•จ
  • ๊ฐ ๋ฐ์ดํ„ฐ ๋ณ„ 193 ํ”ผ์ฒ˜ ์ถ”์ถœ
  • row ํ†ต์ผ ์•ˆํ•จ (3~4sec)
  • ์›ํ•ซ์ธ์ฝ”๋”ฉ ์•ˆํ•จ
  • ๋ผ๋ฒจ์„ 1์ฐจ์› ๋ฐฐ์—ด๋กœ ๋ณ€๊ฒฝ -> ์นดํ…Œ๊ณ ๋ฆฌ ๋ณ„ int๊ฐ’ ์ถœ๋ ฅํ•˜๊ฒŒ ํ•จ

[๊ธฐ์กด๊ณผ ๋™์ผ]

  • ๊ธฐ๋Šฅ 1,2์— ๋Œ€ํ•ด ์‚ฌ์šฉ ๋ฐ์ดํ„ฐ๋Š” ๋‰ด์š•๋Œ€ํ•™๊ต MARL์˜ URBANSOUND8K DATASET ์ผ๋ถ€์™€ ์ผ์ƒ ์ƒํ™œ์—์„œ ๋…น์Œํ•œ ๋…น์Œ ํŒŒ์ผ(.wav)๋ฅผ ํ™œ์šฉ (2,622๊ฐœ, 1.96G)
  • ๊ธฐ๋Šฅ 3์— ๋Œ€ํ•ด ํ™˜์Šน์—ญ ์•Œ๋ฆผ์Œ์„ ํŠธ๋ฆฌ๊ฑฐ๋กœ ์ ์šฉ (894๊ฐœ, 0.71G)
import numpy as np
import pandas as pd
#wav ํŒŒ์ผ๋“ค์˜ ํ”ผ์ฒ˜ ์ƒ์„ฑ
#librosa ์‚ฌ์šฉ
#์‚ฌ์šฉ ํŠน์„ฑ์€ mfcc, chroma_stft, melspectorgram, spectral_contrast, tonnetz๋กœ ์ด193
#๋”ฅ๋Ÿฌ๋‹ ๋ชจ๋ธ๋งŒ ์‚ฌ์šฉํ•  ์˜ˆ์ • -> ํ”ผ์ฒ˜ ์ถ•์†Œ ์ƒ๋žต
import glob
import librosa

# ์˜ค๋””์˜ค ๋ถˆ๋Ÿฌ์˜ค๊ธฐ + ํ”ผ์ณ ์ƒ์„ฑ
# ํ”ผ์ณ 193๊ฐœ
# row ํ†ต์ผ ์•ˆ์‹œํ‚ด
def extract_feature(file_name):
    X, sample_rate = librosa.load(file_name)
    stft = np.abs(librosa.stft(X))
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
    return mfccs,chroma,mel,contrast,tonnetz
    
#๋ฐ์ดํ„ฐ ๊ฐ€๊ณต
#ํ–‰๋ ฌ๋กœ ๋ณ€ํ™˜
def parse_audio_files(filenames):
    rows = len(filenames)
    # feature๋Š” ๊ฐ ํŒŒ์ผ ๋ณ„ row(window) * ํ”ผ์ฒ˜ ์˜ 2์ฐจ์› ํ–‰๋ ฌ
    # labels์€ ํŒŒ์ผ ๋ณ„ ์นดํ…Œ๊ณ ๋ฆฌ int ๊ฐ’
    features, labels = np.zeros((rows,193)), np.zeros((rows, 1))
    i = 0
    for fn in filenames:
        try:
            mfccs, chroma, mel, contrast,tonnetz = extract_feature(fn)
            ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
            y_col = int(fn.split('-')[0])
        except:
            print("error : "+fn)
        else:
            features[i] = ext_features
            labels[i] = y_col
            print(y_col)
            i += 1
    return features, labels

audio_files = []
#0 : ์‚ฌ์ด๋ Œ
#1 : ์ž๋™์ฐจ๊ฐ€ ๋‹ค๊ฐ€์˜ค๋Š” ์†Œ๋ฆฌ(์—”์ง„์†Œ๋ฆฌ)
#2 : ์ž๋™์ฐจ ๊ฒฝ์ ์†Œ๋ฆฌ
#4 : ํ™˜์Šน์—ญ ์•ˆ๋‚ด์Œ
audio_files.extend(glob.glob('*.wav'))

print(len(audio_files))

files = audio_files
X, y= parse_audio_files(files)

#?.npz
np.savez('data', X=X, y=y)

2. ๋ชจ๋ธ๋ง

2.1 ๋ฐ์ดํ„ฐ ๊ตฌ์„ฑ

[๋ณ€๊ฒฝ์‚ฌํ•ญ]


[๊ธฐ์กด๊ณผ ๋™์ผ]

  • data.npz ๋ถˆ๋Ÿฌ์˜ค๊ธฐ
import glob
import librosa
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# ์•Œ๋ฆผ, ์ฐจ๋Ÿ‰ ์—”์ง„, ์ฐจ๋Ÿ‰ ๊ฒฝ์ , ์ง€ํ•˜์ฒ  ํŠธ๋ฆฌ๊ฑฐ ์ˆœ
sound_data = np.load('model_1.npz')
X_train = sound_data['X']
y_train = sound_data['y']
X_train.shape, y_train.shape

X_train.shape, y_train.shape

2.2 ๋ชจ๋ธ ํ•™์Šต

[๋ณ€๊ฒฝ์‚ฌํ•ญ]

  • lstm ์‚ฌ์šฉ
  • ์ด์ „ ๋ชจ๋ธ์˜ ๊ฒฝ์šฐ ํŒŒ๋ผ๋ฏธํ„ฐ ์กฐ์ •์— ์ดˆ์ ์„ ๋‘์—ˆ์œผ๋‚˜ ver 3.5์—์„œ๋Š” layer ๊ตฌ์„ฑ์— ์ดˆ์ ์„ ๋‘์–ด ์ง„ํ–‰

[๊ธฐ์กด๊ณผ ๋™์ผ]

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
import numpy as np
import matplotlib.pyplot as plt
import os

from keras import models
from keras import layers
from keras.layers import *
from keras import optimizers

from keras.layers import LSTM
from keras.models import Sequential
from keras.layers import Dense
import keras.backend as K
from keras.callbacks import EarlyStopping

K.clear_session()
model = Sequential() # Sequeatial Model
model.add(LSTM(20, input_shape=(193, 1))) # (timestep, feature)
model.add(Dense(1)) # output = 1
model.compile(loss='mean_squared_error', optimizer='adam')

model.summary()

#X_train = X_train.values
X_train = X_train.reshape(X_train.shape[0], 193, 1)

early_stop = EarlyStopping(monitor='loss', patience=1, verbose=1)

model.fit(X_train, y_train, epochs=100,
          batch_size=30, verbose=1, callbacks=[early_stop])

2.3 ๋ชจ๋ธ ์ €์žฅ

[๋ณ€๊ฒฝ์‚ฌํ•ญ]

  • pkl, json, pb, tflite๋กœ ์ €์žฅ

[๊ธฐ์กด๊ณผ ๋™์ผ]

# ๋ชจ๋ธ pkl๋กœ ์ €์žฅํ•˜๊ธฐ
import joblib
joblib.dump(model, 'model/pkl/model_1.pkl')

# ๋ชจ๋ธ json์œผ๋กœ ์ €์žฅํ•˜๊ธฐ
model_1 = model.to_json()
# model = model_from_json(json_string)

# ๋ชจ๋ธ h5๋กœ ์ €์žฅํ•˜๊ธฐ
from keras.models import load_model
model.save('model/h5/model_1')
model.save('model/h5/model_1.h5')

# ๋ชจ๋ธ pb๋กœ ์ €์žฅํ•˜๊ธฐ
model = keras.models.load_model('model/h5/model_1', compile=False)
model.save('model/pb/',save_format=tf)

#๋ชจ๋ธ tflite ๋กœ ์ €์žฅํ•˜๊ธฐ
saved_model_dir='model/pb/'
converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
converter.target_spec.supported_ops=[tf.lite.OpsSet.TFLITE_BUILTINS,
                                     tf.lite.OpsSet.SELECT_TF_OPS]
tfilte_mode=converter.convert()
open('model/tflite/model_1.tflite','wb').write(ftlite_model)

3. ํ…Œ์ŠคํŠธ

3.1 ๋…น์Œ ํŒŒ์ผ ์ƒ์„ฑ

[๋ณ€๊ฒฝ์‚ฌํ•ญ]

  • 10์ดˆ๋กœ ์ œํ•œ

[๊ธฐ์กด๊ณผ ๋™์ผ]

  • ๋ธ”๋ฃจํˆฌ์Šค ์ด์–ดํฐ์˜ ์™ธ๋ถ€ ๋งˆ์ดํฌ ์‚ฌ์šฉ (์ฑ„๋„ ์กฐ์ •)
import librosa
import scipy.signal as signal
import numpy as np
import pandas as pd
import joblib

#ํ™˜๊ฒฝ ํ™•์ธ
import pyaudio
import wave
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
CHUNK = 1024
RECORD_SECONDS = 10
WAVE_OUTPUT_FILENAME = "test_file.wav"
audio = pyaudio.PyAudio()

# start Recording
stream = audio.open(format=pyaudio.paInt16,
                    channels=CHANNELS,
                    rate=RATE,
                    input=True,
                    input_device_index=1,
                    frames_per_buffer=CHUNK)
print ("recording...")
frames = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
    data = stream.read(CHUNK)
    frames.append(data)
print ("finished recording")

stream.stop_stream()
stream.close()
audio.terminate()
waveFile = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
waveFile.setnchannels(CHANNELS)
waveFile.setsampwidth(audio.get_sample_size(FORMAT))
waveFile.setframerate(RATE)
waveFile.writeframes(b''.join(frames))
waveFile.close()

3.2 ์ „์ฒ˜๋ฆฌ

[๋ณ€๊ฒฝ์‚ฌํ•ญ]

  • ๋ชจ๋“ˆํ™” ํ•  ๊ฒƒ

[๊ธฐ์กด๊ณผ ๋™์ผ]

import numpy as np
import pandas as pd 
import glob

def extract_feature(file_name):
    X, sample_rate = librosa.load(file_name)
    stft = np.abs(librosa.stft(X))
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
    return mfccs,chroma,mel,contrast,tonnetz

def parse_audio_files(filenames):
    rows = len(filenames)
    # feature๋Š” ๊ฐ ํŒŒ์ผ ๋ณ„ row(window) * ํ”ผ์ฒ˜ ์˜ 2์ฐจ์› ํ–‰๋ ฌ
    # labels์€ ํŒŒ์ผ ๋ณ„ ์นดํ…Œ๊ณ ๋ฆฌ int ๊ฐ’
    features = np.zeros((rows,193))
    i = 0
    for fn in filenames:
        try:
            mfccs, chroma, mel, contrast,tonnetz = extract_feature(fn)
            ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
        except:
            print("error : "+fn)
        else:
            features[i] = ext_features
            print("์„ฑ๊ณต")
            i += 1
    return features

audio_files = []
audio_files.extend(glob.glob(WAVE_OUTPUT_FILENAME))

print(len(audio_files))
files = audio_files
X_test = parse_audio_files(files)

3.3 ๋ชจ๋ธ ์ ์šฉ

[๋ณ€๊ฒฝ์‚ฌํ•ญ]


[๊ธฐ์กด๊ณผ ๋™์ผ]

  • 0 : ์‚ฌ์ด๋ Œ, ๋ฏผ๋ฐฉ์œ„ ๋“ฑ ์•Œ๋ฆผ์Œ
  • 1,2 : ์ฐจ๋Ÿ‰ ๊ฒฝ์ , ์—”์ง„์†Œ๋ฆฌ
  • 3 : ํ™˜์Šน ํŠธ๋ฆฌ๊ฑฐ
X_test = X_test.reshape(X_test.shape[0], 193, 1)

model = joblib.load('model_2.pkl')

pred = model.predict_proba(X_test)
ans = float(pred)

print(pred)

ans = round(float(pred))
de_label = pd.read_csv('de_train_label.csv', engine='python', index_col = None)
print("์ด๋ฒˆ ์—ญ์€ "+de_label['name'][ans]+"์—ญ ์ž…๋‹ˆ๋‹ค.")

About

No description, website, or topics provided.

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published