In a deep learning tutorial website, I found this code to read images as below.
cv2.resize(cv2.imread(folder + name, 1), (100, 200)).reshape(3, 100, 200)
This code change the shape of image from (100,200,3) into (3,100,200). I tried to see how the functions change shapes of matrix, and I got strange output.
Please suppose 2x4 RGB image(d) as below.
d = array([[[ 1, 2, 3],[ 4, 5, 6],[ 7, 8, 9], [10, 11, 12]],[[13, 14, 15],[16, 17, 18], [19, 20, 21],[22, 23, 24]]])
d.shape: (2, 4, 3)
After applying reshape, it shows
d.reshape(3,2,4)
array([[[ 1, 2, 3, 4],
[ 5, 6, 7, 8]],
[[ 9, 10, 11, 12],
[ 13, 14, 15, 16]],
[[ 17, 18, 19, 20],
[ 21, 22, 23, 24]]])
But I do not think this is proper representation since we want to represent an image as below.
Therefore, I think we should convert the image as below.
d.reshape(3,2,4)
array([[[ 1, 4, 7, 10],
[ 13, 16, 19, 22]],#R layer
[[ 2, 5, 8, 11],
[ 14, 17, 20, 23]],#G layer
[[ 3, 6, 9, 12],
[ 15, 18, 21, 24]]])#B layer
Is my understanding wrong? Please help me out if you have knowledge.
I put entire code below.
from torch import nn
from torch.nn import functional as F
from torch.autograd import Variable
from sklearn.model_selection import train_test_split
import numpy as np
from collections import Counter
import os
import cv2
import torch.optim as optim
import torch.utils.data
def read_labels(file):
dic = {}
with open(file) as f:
reader = f
for row in reader:
dic[row.split(",")[0]] = row.split(",")[1].rstrip() #rstrip(): eliminate "\n"
return dic
image_names= os.listdir("../train")
label_dic = read_labels("../labels.csv")
labels = []
images =[]
for name in image_names:
images.append(cv2.resize(cv2.imread("../train/"+name,1), (100, 200)).reshape(3,100,200))
labels.append(label_dic[os.path.splitext(name)[0]])
images = np.asarray(images)
"""
Assign numbers for each labels
"""
tmp_labels = labels
uniq_labels = set(tmp_labels) # eliminate duplication
num_breeds = len(Counter(labels)) # number of breeds
uniqu_labels_index = dict((label, i) for i, label in enumerate(uniq_labels)) #create dictionary and assign number for each labels
labels_num = [uniqu_labels_index[label] for i,label in enumerate(labels)]
labels_num = np.array(labels_num)
"""
Data distribution
"""
N = len(images)
N_train = int(N * 0.7)
N_test = int(N*0.2)
X_train, X_tmp, Y_train, Y_tmp = train_test_split(images, labels_num, train_size=N_train)
X_validation, X_test, Y_validation, Y_test = train_test_split(X_tmp, Y_tmp, test_size=N_test)
"""
Model Definition
"""
# CNN Model (2 conv layer)
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(3,34, kernel_size=5,padding= 2),
nn.Dropout2d(),
nn.BatchNorm2d(34),
nn.ReLU(),
nn.MaxPool2d(2))
self.layer2 = nn.Sequential(
nn.Conv2d(34, 68, kernel_size=5,padding= 2),
nn.BatchNorm2d(68),
nn.ReLU(),
nn.MaxPool2d(2))
self.fc1 = nn.Linear(1700,300)
self.fc2 = nn.Linear(300,num_breeds)
def forward(self, x):
out = self.layer1(x)
#print out.data.shape
out = self.layer2(out)
#print out.data.shape
out = out.view(out.size(0), -1)
#print out.data.shape
out =self.fc1(out)
#out = F.dropout(out)
#out = self.fc2(out)
return F.log_softmax(out)
def accuracy(self,outputs,labels):
#for i, (images_val, labels_val) in enumerate(val_loader):
# print images.shape
# images_val = Variable(images_val).float()
# labels_val = Variable(labels_val).float().type(torch.LongTensor)
# outputs_val = CNN(images_val)
inference = np.argmax(outputs.data.numpy(),axis=1)
answers = labels.data.numpy()
correction = np.equal(inference,answers)
return np.sum(correction)/float(len(correction))
CNN = CNN()
"""
Training
"""
batch_size = 100
learning_rate =0.01
# Data Loader (Input Pipeline)
train = torch.utils.data.TensorDataset(torch.from_numpy(X_train), torch.from_numpy(Y_train))
train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True)
val = torch.utils.data.TensorDataset(torch.from_numpy(X_validation), torch.from_numpy(Y_validation))
val_loader = torch.utils.data.DataLoader(val, batch_size=len(X_validation), shuffle=True)
test = torch.utils.data.TensorDataset(torch.from_numpy(X_test), torch.from_numpy(Y_test))
test_loader = torch.utils.data.DataLoader(test, batch_size=len(X_test), shuffle=True)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(CNN.parameters(), lr=learning_rate)
for epoch in range(250): # loop over the dataset multiple times
running_loss = 0.0
for i, (images, labels) in enumerate(train_loader):
images = Variable(images).float()
labels = Variable(labels).float().type(torch.LongTensor)
# Forward + Backward + Optimize
optimizer.zero_grad()
outputs = CNN(images)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.data[0]
accuracy = CNN.accuracy(outputs,labels)
print
print "epoch :",epoch
print 'loss:' ,float(running_loss) / 2000
print "accuracy :",accuracy
running_loss = 0.0
print('Finished Training')
for i, (images, labels) in enumerate(test_loader):
images = Variable(images).float()
labels = Variable(labels).float().type(torch.LongTensor)
optimizer.zero_grad()
outputs = CNN(images)
inference = np.argmax(outputs.data.numpy(),axis=1)
answers = labels.data.numpy()
correction = np.equal(inference,answers)
print np.sum(correction)/float(len(correction))
The reshape
function is taken from Numpy .
The shape
method indicates how many elements you have in you array for each layer. So, in your example:
d = array([
[[ 1, 2, 3],[ 4, 5, 6],[ 7, 8, 9],[10, 11, 12]], #1st layer 1st element (4 lists inside with 3 numbers each)
[[13, 14, 15],[16, 17, 18], [19, 20, 21],[22, 23, 24]] #1st layer 2nd element (4 lists inside with 3 numbers each)
])
The first layer has two lists, the second layer 4 lists and the third has three numbers.
When you call reshape(3,2,4)
you get 3 lists on the first layer, 2 lists on the second layer and four numbers in the third layer keeping the same elements you provided.
It does not change the order of the elements, just change the shape. In your example, if you try to see the modified image with imshow
you will see that the reshape
command has messed up the image.
Try:
image = cv2.imread(folder + name, 1)
cv2.imshow('image',image)
cv2.waitKey(0)
cv2.destroyAllWindows()
Then:
reshapedimage =cv2.resize(cv2.imread(folder + name, 1), (100, 200))
cv2.imshow('image',reshapedimage)
cv2.waitKey(0)
cv2.destroyAllWindows()
You will be able to see what each command is doing to your image
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.