簡體   English   中英

WAV文件的特征提取

[英]Feature extraction of wav file

我們正在嘗試從.wav文件中提取功能,並始終出現相同的錯誤。

我們嘗試使用python 3.6.6和3.7.4版本,但錯誤是相同的。

import csv
import glob
import os
import librosa
import numpy as np

if __name__ == '__main__':

def extract_feature(file_name):
    x, sample_rate = librosa.load(file_name)
    stft = np.abs(librosa.stft(x))
    mfccs = np.mean(librosa.feature.mfcc(y=x, sr=sample_rate, n_mfcc=40).T, axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
    mel = np.mean(librosa.feature.melspectrogram(x, sr=sample_rate).T, axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T, axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(x),
                                              sr=sample_rate).T, axis=0)
    return mfccs, chroma, mel, contrast, tonnetz


def parse_audio_files(parent_dir, sub_dirs, file_ext="*.wav"):
    full_list = []
    features, labels = np.empty((0, 193)), np.empty(0)
    for label, sub_dir in enumerate(sub_dirs):

        for fn in glob.glob(os.path.join(parent_dir, sub_dir, file_ext)):
            varim = fn.split('/')[2]
            # print(varim)
            try:
                mfccs, chroma, mel, contrast, tonnetz = extract_feature(fn)
            except Exception as e:
                print("Error encountered while parsing file: ", fn)
                continue
            ext_features = np.hstack([mfccs, chroma, mel, contrast, tonnetz])
            features = np.vstack([features, ext_features])
            labels = np.append(labels, fn.split('/')[2])
            # print(var)
            # print(features)
            new_dict = {varim: ext_features}
            print(new_dict)
            full_list.append(new_dict)
            # value = np.array(features, dtype=np.int), np.array(labels, dtype=np.int)
    with open('dog_cat.csv', 'w') as f:
        wr = csv.writer(f)
        wr.writerow(full_list)
    return features, labels


def one_hot_encode(labels):
    n_labels = len(labels)
    n_unique_labels = len(np.unique(labels))
    one_hot_encode = np.zeros((n_labels, n_unique_labels))
    # one_hot_encode[np.arange(n_labels), labels] = 1
    return one_hot_encode


parent_dir = 'cats_dogs'
tr_sub_dirs = ["fold1"]
file_ext1 = "*.wav"

tr_features, tr_labels = parse_audio_files(parent_dir, tr_sub_dirs)

tr_labels = one_hot_encode(tr_labels)

這是我們得到的錯誤

Traceback (most recent call last):
  File "C:/Users/ja/PycharmProjects/catdog/projekt.py", line 61, in 
<module>
    tr_features, tr_labels = parse_audio_files(parent_dir, tr_sub_dirs)
  File "C:/Users/ja/PycharmProjects/catdog/projekt.py", line 27, in 
parse_audio_files
    varim = fn.split('/')[2]
IndexError: list index out of range

我們應該獲得代表.wav文件的數字,因此我們可以將它們分類為貓還是狗。

varim = fn.split('/') 2

fn.split('/')對您不起作用,因為無法進一步拆分,如下所示

在此處輸入圖片說明

在此處輸入圖片說明

如果您運行的目錄結構正確,請進行糾正。

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM