[英]How to calculate the average of several .dat files using python?
所以我有50-60个.dat文件,所有文件都包含m行和n列数字。 我需要取所有文件的平均值,并以相同的格式创建一个新文件。 我必须在python中这样做。 谁能帮我这个?
我写了一些代码..我知道这里有一些不兼容的类型,但我想不出另类,所以我还没有改变任何东西。
#! /usr/bin/python
import os
CC = 1.96
average = []
total = []
count = 0
os.chdir("./")
for files in os.listdir("."):
if files.endswith(".dat"):
infile = open(files)
cur = []
cur = infile.readlines()
for i in xrange(0, len(cur)):
cur[i] = cur[i].split()
total += cur
count += 1
average = [x/count for x in total]
#calculate uncertainty
uncert = []
for files in os.listdir("."):
if files.endswith(".dat"):
infile = open(files)
cur = []
cur = infile.readlines
for i in xrange(0, len(cur)):
cur[i] = cur[i].split()
uncert += (cur - average)**2
uncert = uncert**.5
uncert = uncert*CC
这是一种相当时间和资源有效的方法,它读取值并并行计算所有文件的平均值,但每次只读取一行文件 - 但它会暂时读取整个第一个.dat
文件进入内存以确定每个文件中将有多少行和每列数字。
你没有说你的“数字”是整数还是浮点数或什么,所以这将它们作为浮点读取(即使它们不存在也会起作用)。 无论如何,平均值被计算并输出为浮点数。
更新
我已经修改了我的原始答案,还根据您的评论计算了每行和每列中值的总体标准差( sigma
)。 它在计算它们的平均值之后立即执行此操作,因此不需要再次读取所有数据。 此外,为了响应注释中的建议,添加了上下文管理器以确保关闭所有输入文件。
请注意,标准偏差仅打印并且不会写入输出文件,但对相同或单独的文件执行此操作应该很容易添加。
from contextlib import contextmanager
from itertools import izip
from glob import iglob
from math import sqrt
from sys import exit
@contextmanager
def multi_file_manager(files, mode='rt'):
files = [open(file, mode) for file in files]
yield files
for file in files:
file.close()
# generator function to read, convert, and yield each value from a text file
def read_values(file, datatype=float):
for line in file:
for value in (datatype(word) for word in line.split()):
yield value
# enumerate multiple egual length iterables simultaneously as (i, n0, n1, ...)
def multi_enumerate(*iterables, **kwds):
start = kwds.get('start', 0)
return ((n,)+t for n, t in enumerate(izip(*iterables), start))
DATA_FILE_PATTERN = 'data*.dat'
MIN_DATA_FILES = 2
with multi_file_manager(iglob(DATA_FILE_PATTERN)) as datfiles:
num_files = len(datfiles)
if num_files < MIN_DATA_FILES:
print('Less than {} .dat files were found to process, '
'terminating.'.format(MIN_DATA_FILES))
exit(1)
# determine number of rows and cols from first file
temp = [line.split() for line in datfiles[0]]
num_rows = len(temp)
num_cols = len(temp[0])
datfiles[0].seek(0) # rewind first file
del temp # no longer needed
print '{} .dat files found, each must have {} rows x {} cols\n'.format(
num_files, num_rows, num_cols)
means = []
std_devs = []
divisor = float(num_files-1) # Bessel's correction for sample standard dev
generators = [read_values(file) for file in datfiles]
for _ in xrange(num_rows): # main processing loop
for _ in xrange(num_cols):
# create a sequence of next cell values from each file
values = tuple(next(g) for g in generators)
mean = float(sum(values)) / num_files
means.append(mean)
means_diff_sq = ((value-mean)**2 for value in values)
std_dev = sqrt(sum(means_diff_sq) / divisor)
std_devs.append(std_dev)
print 'Average and (standard deviation) of values:'
with open('means.txt', 'wt') as averages:
for i, mean, std_dev in multi_enumerate(means, std_devs):
print '{:.2f} ({:.2f})'.format(mean, std_dev),
averages.write('{:.2f}'.format(mean)) # note std dev not written
if i % num_cols != num_cols-1: # not last column?
averages.write(' ') # delimiter between values on line
else:
print # newline
averages.write('\n')
我不确定该过程的哪个方面可以解决您的问题,但我将特别回答有关获取所有dat文件的平均值的问题。
假设这样的数据结构:
72 12 94 79 76 5 30 98 97 48
79 95 63 74 70 18 92 20 32 50
77 88 60 98 19 17 14 66 80 24
...
获取文件的平均值:
import glob
import itertools
avgs = []
for datpath in glob.iglob("*.dat"):
with open(datpath, 'r') as f:
str_nums = itertools.chain.from_iterable(i.strip().split() for i in f)
nums = map(int, str_nums)
avg = sum(nums) / len(nums)
avgs.append(avg)
print avgs
它遍历每个.dat
文件,读取和连接行。 将它们转换为int(如果需要可以浮动)并附加平均值。
如果这些文件非常庞大并且您在阅读它们时会关注内存量,那么您可以更明确地遍历每一行并且只保留计数器,就像您的原始示例所做的那样:
for datpath in glob.iglob("*.dat"):
with open(datpath, 'r') as f:
count = 0
total = 0
for line in f:
nums = [int(i) for i in line.strip().split()]
count += len(nums)
total += sum(nums)
avgs.append(total / count)
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.