I'm trying to get my head around the amazing list processing abilities of python (And eventually numpy). I'm converting some C code I wrote to python.
I have a text datafile where first row is a header, and then every odd row is my input data and every even row is my output data. All data space separated. I'm quite chuffed that I managed to read all the data into lists using nested list comprehensions. amazing stuff.
with open('data.txt', 'r') as f:
# get all lines as a list of strings
lines = list(f)
# convert header row to list of ints and get info
header = map(int, lines[0].split(' '))
num_samples = header[0]
input_dim = header[1]
output_dim = header[2]
del header
# bad ass list comprehensions
inputs = [[float(x) for x in l.split()] for l in lines[1::2]]
outputs = [[float(x) for x in l.split()] for l in lines[2::2]]
del x, l, lines
Then I want to produce a new list where each element is a function of a corresponding input-output pair. I couldn't figure out how to do this with any python specific optimizations. Here it is in C-style python:
# calculate position
pos_list = [];
pos_y = 0
for i in range(num_samples):
pantilt = outputs[i];
target = inputs[i];
if(pantilt[0] > 90):
pantilt[0] -=180
pantilt[1] *= -1
elif pantilt[0] < -90:
pantilt[0] += 180
pantilt[1] *= -1
tan_pan = math.tan(math.radians(pantilt[0]))
tan_tilt = math.tan(math.radians(pantilt[1]))
pos = [0, pos_y, 0]
pos[2] = tan_tilt * (target[1] - pos[1]) / math.sqrt(tan_pan * tan_pan + 1)
pos[0] = pos[2] * tan_pan
pos[0] += target[0]
pos[2] += target[2]
pos_list.append(pos)
del pantilt, target, tan_pan, tan_tilt, pos, pos_y
I tried to do it with a comprehension, or map but couldn't figure out how to:
One vectorized approach using boolean-indexing/mask
-
import numpy as np
def mask_vectorized(inputs,outputs,pos_y):
# Create a copy of outputs array for editing purposes
pantilt_2d = outputs[:,:2].copy()
# Get mask correspindig to IF conditional statements in original code
mask_col0_lt = pantilt_2d[:,0]<-90
mask_col0_gt = pantilt_2d[:,0]>90
# Edit the first column as per the statements in original code
pantilt_2d[:,0][mask_col0_gt] -= 180
pantilt_2d[:,0][mask_col0_lt] += 180
# Edit the second column as per the statements in original code
pantilt_2d[ mask_col0_lt | mask_col0_gt,1] *= -1
# Get vectorized tan_pan and tan_tilt
tan_pan_tilt = np.tan(np.radians(pantilt_2d))
# Vectorized calculation for: "tan_tilt * (target[1] .." from original code
V = (tan_pan_tilt[:,1]*(inputs[:,1] - pos_y))/np.sqrt((tan_pan_tilt[:,0]**2)+1)
# Setup output numpy array
pos_array_vectorized = np.empty((num_samples,3))
# Put in values into columns of output array
pos_array_vectorized[:,0] = inputs[:,0] + tan_pan_tilt[:,0]*V
pos_array_vectorized[:,1] = pos_y
pos_array_vectorized[:,2] = inputs[:,2] + V
# Convert to list, if so desired for the final output
# (keeping as numpy array could boost up the performance further)
return pos_array_vectorized.tolist()
Runtime tests
In [415]: # Parameters and setup input arrays
...: num_samples = 1000
...: outputs = np.random.randint(-180,180,(num_samples,5))
...: inputs = np.random.rand(num_samples,6)
...: pos_y = 3.4
...:
In [416]: %timeit original(inputs,outputs,pos_y)
100 loops, best of 3: 2.44 ms per loop
In [417]: %timeit mask_vectorized(inputs,outputs,pos_y)
10000 loops, best of 3: 181 µs per loop
Suppose you read your file into a list, like so:
lines = open('data.txt', 'r').readlines()
The header is this:
lines[0]
The even lines are:
even = lines[1:][::2]
and the odd lines are:
odd = lines[2:][::2]
Now you can create a list using itertools.izip
from these two lists:
itertools.izip(even, odd)
This is a sort of list-like thingy (you can loop over it, or just write list( ... )
around it to make it into a true list), whose each entry is a pair of your input-output data.
If anyone stumbles upon the same question, here are four variations based on Ami's suggestion (functions do1, do1b, do2, do3)
And for those curious, here are the benchmarks (I have ~1000 input-output pairs of data. Maybe with radically more data the benchmarks would vary more)
....
def load_file(filename = 'Sharpy_7.txt'):
global file_data, num_samples, input_dim, output_dim
with open(filename, 'r') as f:
# get all lines as a list of strings
file_data = list(f)
# convert header row to list of ints and get info
header = map(int, file_data[0].split(' '))
num_samples = header[0]
input_dim = header[1]
output_dim = header[2]
f.close()
def calc_pos2(d):
target = d[0]
pantilt = d[1]
if(pantilt[0] > 90):
pantilt[0] -=180
pantilt[1] *= -1
elif pantilt[0] < -90:
pantilt[0] += 180
pantilt[1] *= -1
tan_pan = math.tan(math.radians(pantilt[0]))
tan_tilt = math.tan(math.radians(pantilt[1]))
pos = [0, 0, 0]
pos[2] = tan_tilt * (target[1] - pos[1]) / math.sqrt(tan_pan * tan_pan + 1)
pos[0] = pos[2] * tan_pan
pos[0] += target[0]
pos[2] += target[2]
return pos
def calc_pos(target, pantilt):
if(pantilt[0] > 90):
pantilt[0] -=180
pantilt[1] *= -1
elif pantilt[0] < -90:
pantilt[0] += 180
pantilt[1] *= -1
tan_pan = math.tan(math.radians(pantilt[0]))
tan_tilt = math.tan(math.radians(pantilt[1]))
pos = [0, 0, 0]
pos[2] = tan_tilt * (target[1] - pos[1]) / math.sqrt(tan_pan * tan_pan + 1)
pos[0] = pos[2] * tan_pan
pos[0] += target[0]
pos[2] += target[2]
return pos
def calc_stats():
global pos_array, pos_avg, pos_std
pos_array = np.asarray(pos_list)
pos_avg = np.mean(pos_array, 0)
pos_std = np.std(pos_array, 0)
# map on itertools.izip
def do3():
global pos_list
# bad ass list comprehensions
target_list = [[float(x) for x in l.split()] for l in file_data[1::2]]
pantilt_list = [[float(x) for x in l.split()] for l in file_data[2::2]]
# calculate position
pos_list = map(calc_pos2, itertools.izip(target_list, pantilt_list))
# list comprehension on itertools.izip
def do2():
global pos_list
# bad ass list comprehensions
target_list = [[float(x) for x in l.split()] for l in file_data[1::2]]
pantilt_list = [[float(x) for x in l.split()] for l in file_data[2::2]]
# calculate position
pos_list = [calc_pos(d[0], d[1]) for d in itertools.izip(target_list, pantilt_list)]
# for loop with function call
def do1b():
global pos_list
# bad ass list comprehensions
target_list = [[float(x) for x in l.split()] for l in file_data[1::2]]
pantilt_list = [[float(x) for x in l.split()] for l in file_data[2::2]]
# calculate position
pos_list = [];
for i in range(num_samples):
pos_list.append(calc_pos(target_list[i], pantilt_list[i]))
# for loop with unrolled algorithm
def do1():
global pos_list
# bad ass list comprehensions
target_list = [[float(x) for x in l.split()] for l in file_data[1::2]]
pantilt_list = [[float(x) for x in l.split()] for l in file_data[2::2]]
# calculate position
pos_list = [];
for i in range(num_samples):
pantilt = pantilt_list[i];
target = target_list[i];
if(pantilt[0] > 90):
pantilt[0] -=180
pantilt[1] *= -1
elif pantilt[0] < -90:
pantilt[0] += 180
pantilt[1] *= -1
tan_pan = math.tan(math.radians(pantilt[0]))
tan_tilt = math.tan(math.radians(pantilt[1]))
pos = [0, 0, 0]
pos[2] = tan_tilt * (target[1] - pos[1]) / math.sqrt(tan_pan * tan_pan + 1)
pos[0] = pos[2] * tan_pan
pos[0] += target[0]
pos[2] += target[2]
pos_list.append(pos)
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.