简体   繁体   中英

Error while building list in python

I' am trying to build a list in python. The list contains lists. A single inner list consists of various features of a audio signal like standard deviation, mean frequency etc. But when i print the outer list i get a blank list. Here is my code.

from scipy.io.wavfile import read  # to read wavfiles
import matplotlib.pyplot as plotter
from sklearn.tree import DecisionTreeClassifier as dtc
import numpy as np
import os
import scipy
import math

np.set_printoptions(precision=4)

def __init__(self, criterion="gini", splitter="best", max_depth=None, min_samples_split=10, min_samples_leaf=1, min_weight_fraction_leaf=0, max_features=None, random_state=None, max_leaf_nodes=None, min_impurity_split=1e-7, class_weight=None, presort=False):

        super(DecisionTreeClassifier, self).__init__(criterion=criterion, splitter=splitter, max_depth=max_depth, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, min_weight_fraction_leaf=min_weight_fraction_leaf, max_features=max_features, max_leaf_nodes=max_leaf_nodes, class_weight=class_weight, random_state=random_state, min_impurity_split=min_impurity_split, presort=presort)


fList = []  #feature list
mfList = [] #main feature list
labels = ["angry", "angry", "angry", "angry", "angry", "angry", "fear", "fear", "happy", "happy", "happy", "sad", "sad", "sad", "sad", "sad", "sad", "sad", "sad", "sad", "sad", "sad", "sad", "sad", "sad"]
label = [1,2,3,4,5,6,7,8,9,10]

def stddev(lst,mf):
    sum1 = 0
    len1 = len(lst)-1
    for i in range(len(lst)):
        sum1 += pow((lst[i]-mf),2)
    sd = np.sqrt(sum1/len1)
    fList.append(sd)

def find_iqr(num,num_array=[],*args):
    num_array.sort()
    l=int((int(num)+1)/4)
    m=int((int(num)+1)/2)
    med=num_array[m]
    u=int(3*(int(num)+1)/4)
    fList.append(num_array[l])  #first quantile
    fList.append(med)   #median
    fList.append(num_array[u])  #third quantile
    fList.append(num_array[u]-num_array[l]) #inter quantile range


def build(path1):
    dirlist=os.listdir(path1)
    n=1
    mf=0
    for name in dirlist:
        path=path1+name
        print ("File ",n)
        fs, x = read(path) #fs will have sampling rate and x will have sample #
        #print ("The sampling rate: ",fs)
        #print ("Size: ",x.size)
        #print ("Duration: ",x.size/float(fs),"s")

        '''
        plotter.plot(x)
        plotter.show() #x-axis is in samples 
        t = np.arange(x.size)/float(fs) #creating an array with values as time w.r.t samples
        plotter.plot(t)   #plot t w.r.t x
        plotter.show()
        y = x[100:600]
        plotter.plot(y)
        plotter.show()  # showing close-up of samples 
        '''
        j=0     
        med=0
        for i in x:
            j=j+1
            mf=mf+i
        mf=mf/j
        fList.append(np.max(abs(x)))    #amplitude
        fList.append(mf)    #mean frequency
        find_iqr(j,x)
        fList.append((3*med)-(2*mf))    #mode
        stddev(x,mf)
        #fftc = np.fft.rfft(x).tolist()
        #mr = 20*scipy.log10(scipy.absolute(x)).tolist()
        #fList.append(fftc) #1D dft
        #fList.append(mr)   #magnitude response
        mfList.append(fList)
        fList[:] = []
        n=n+1

path1 = '/home/vishnu/Desktop/Trainingsamples/'
path2 = '/home/vishnu/Desktop/TestSamples/'
clf = dtc() # this class is used to make decision tree
build(path1)
print(mfList)
clf.fit(mfList,label)
mfList[:] = []  #clear mflist
tlist = build(path2)
res = clf.predict(tlist)
print(res)

The following is my output screen:

('File ', 1)
SA1.py:50: RuntimeWarning: invalid value encountered in sqrt
  sd = np.sqrt(sum1/len1)
('File ', 2)
('File ', 3)
('File ', 4)
('File ', 5)
('File ', 6)
('File ', 7)
('File ', 8)
('File ', 9)
('File ', 10)
[[], [], [], [], [], [], [], [], [], []]
Traceback (most recent call last):
  File "SA1.py", line 111, in <module>
    clf.fit(mfList,label)
  File "/home/vishnu/.local/lib/python2.7/site-packages/sklearn/tree/tree.py", line 739, in fit
    X_idx_sorted=X_idx_sorted)
  File "/home/vishnu/.local/lib/python2.7/site-packages/sklearn/tree/tree.py", line 122, in fit
    X = check_array(X, dtype=DTYPE, accept_sparse="csc")
  File "/home/vishnu/.local/lib/python2.7/site-packages/sklearn/utils/validation.py", line 424, in check_array
    context))
ValueError: Found array with 0 feature(s) (shape=(10, 0)) while a minimum of 1 is required.

Here as one can see the line print(mfList) prints the output [[], [], [], [], [], [], [], [], [], []] . This is a list of empty lists. Where lies my mistake? Please guide.

The problem comes from the fList[:] = [] you call at the end. I did a small example to test it:

l = []
ml = []

def f(x):
    for i in range(0, x):
        l.append(i)
    ml.append(l)
    l[:] = []

f(10)
f(5)
print(ml)

This prints ml containing two empty lists:

>>> [[], []]

If I remove the l[:]=[] and replace it with l = [] I get the two lists with their contents inside ml :

>>> [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [0, 1, 2, 3, 4]]

The fList[:]=[] means "Replace all items inside fList with an empty item". You are working with references here and just because you have attached fList to mfList inside that scope doesn't mean you can't still access those elements through fList . So if you replace the elements in fList with new ones (in this case [] ), it will also affect mfList .

you have an mistake in line 103

fList[:] = []

what happens? you append fList to mfList by mfList.append(fList) . fList is called by reference . Thus you append a pointer to a value instead of coping the value. If you run the code snippet above you delete these values which are referenced by the pointer. Thus mfList is empty too.

Instead you could use the following snipped:

fList = []

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM