I have a list of ndarrays:
list1 = [t1, t2, t3, t4, t5]
Each t consists of:
t1 = np.array([[10,0.1],[30,0.05],[30,0.1],[20,0.1],[10,0.05],[10,0.05],[0,0.5],[20,0.05],[10,0.0]], np.float64)
t2 = np.array([[0,0.05],[0,0.05],[30,0],[10,0.25],[10,0.2],[10,0.25],[20,0.1],[20,0.05],[10,0.05]], np.float64)
...
Now I want for the whole list to get for each t the average of the values corresponding to the first element:
t1out = [[0,0.5],[10,(0.1+0.05+0.05+0)/4],[20,(0.1+0.05)/2],[30,0.075]]
t2out = [[0,0.05],[10,0.1875],[20,0.075],[30,0]]
....
After generating the t_1 ... t_n, I want to plot the probabilities over the classes for each t, where the first elements represent the classes (0,10,20,30) and the second elements show the probabilities of which these classes occurr (0.1,0.7,0.15,0). Something like a histogram or a probability distribution in form of a bar plot like:
plt.bar([classes],[probabilities])
plt.bar([item[0] for item in t1out],[item[1] for item in t1out])
This is how you can calculate that with NumPy:
import numpy as np
def mean_by_class(t, classes=None):
# Classes should be passed if you want to ensure
# that all classes are in the output even if they
# are not in the current t vector
if classes is None:
classes = np.unique(t[:, 0])
bins = np.r_[classes, classes[-1] + 1]
h, _ = np.histogram(t[:, 0], bins)
d = np.digitize(t[:, 0], bins, right=True)
out = np.zeros(len(classes), t.dtype)
np.add.at(out, d, t[:, 1])
out /= h.clip(min=1)
return np.c_[classes, out]
t1 = np.array([[10, 0.1 ], [30, 0.05], [30, 0.1 ],
[20, 0.1 ], [10, 0.05], [10, 0.05],
[ 0, 0.5 ], [20, 0.05], [10, 0.0 ]],
dtype=np.float64)
print(mean_by_class(t1))
# [[ 0. 0.5 ]
# [10. 0.05 ]
# [20. 0.075]
# [30. 0.075]]
As a side note, it may not be the best choice to store class values, which are integers, in a float array. You could consider using a structured array instead, for example like this:
import numpy as np
def mean_by_class(t, classes=None):
if classes is None:
classes = np.unique(t['class'])
bins = np.r_[classes, classes[-1] + 1]
h, _ = np.histogram(t['class'], bins)
d = np.digitize(t['class'], bins, right=True)
out = np.zeros(len(classes), t.dtype)
out['class'] = classes
np.add.at(out['p'], d, t['p'])
out['p'] /= h.clip(min=1)
return out
t1 = np.array([(10, 0.1 ), (30, 0.05), (30, 0.1 ),
(20, 0.1 ), (10, 0.05), (10, 0.05),
( 0, 0.5 ), (20, 0.05), (10, 0.0 )],
dtype=[('class', np.int32), ('p', np.float64)])
print(mean_by_class(t1))
# [( 0, 0.5 ) (10, 0.05 ) (20, 0.075) (30, 0.075)]
Here's one approach using itertools.groupby
:
from statistics import mean
from itertools import groupby
def fun(t):
s = sorted(t, key=lambda x:x[0])
return [[k, mean(i[1] for i in v)] for k,v in groupby(s, key=lambda x: x[0])]
fun(t1)
[[0.0, 0.5],
[10.0, 0.05],
[20.0, 0.07500000000000001],
[30.0, 0.07500000000000001]]
And to apply to all arrays:
[fun(t) for t in [t1,t2]]
[[[0.0, 0.5],
[10.0, 0.05],
[20.0, 0.07500000000000001],
[30.0, 0.07500000000000001]],
[[0.0, 0.05], [10.0, 0.1875], [20.0, 0.07500000000000001], [30.0, 0.0]]]
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.