[英]Deep learning for image training
我嘗試訓練圖像並預測其中的文本。 但是當一起訓練圖像時,我遇到了陣列錯誤。 但是現在我正在訓練每個字母的圖像,但是卻出現了一些錯誤。 將添加圖像生成器文件,以幫助創建圖像並將其導入到擬合生成器。
錯誤:
Using TensorFlow backend.
WARNING: Logging before flag parsing goes to stderr.
W0826 09:18:45.040408 3428 deprecation_wrapper.py:119] From C:\Users\workspace\test\venv\lib\site-packages\keras\backend\tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.
W0826 09:18:45.056031 3428 deprecation_wrapper.py:119] From C:\Users\workspace\test\venv\lib\site-packages\keras\backend\tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.
W0826 09:18:45.071652 3428 deprecation_wrapper.py:119] From C:\Users\workspace\test\venv\lib\site-packages\keras\backend\tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.
Traceback (most recent call last):
File "C:/Users/workspace/test/killme.py", line 22, in <module>
o2 = Reshape((len(string.ascii_uppercase), ), name="symbol_{}".format(i+1))(o)
File "C:\Users\workspace\test\venv\lib\site-packages\keras\engine\base_layer.py", line 474, in __call__
output_shape = self.compute_output_shape(input_shape)
File "C:\Users\workspace\test\venv\lib\site-packages\keras\layers\core.py", line 398, in compute_output_shape
input_shape[1:], self.target_shape)
File "C:\Users\workspace\test\venv\lib\site-packages\keras\layers\core.py", line 386, in _fix_unknown_dimension
raise ValueError(msg)
ValueError: total size of new array must be unchanged
碼:
from keras.models import Sequential, Input, Model
from keras.layers import Dense, Reshape
from keras.utils import to_categorical
from keras.layers.convolutional import Conv2D # to add convolutional layers
from keras.layers.convolutional import MaxPooling2D # to add pooling layers
from keras.layers import Flatten # to flatten data for fully connected layers
import string
from generators import ImageGenerator, BasicGenerator
from numpy import reshape
height=20
width=200
font_size=20
i1=Input(shape=(height, width, 1))
character_count=int(width / font_size)
outputs=[]
for i in range(character_count):
o = Dense(len(string.ascii_uppercase), activation='relu')(i1)
o2 = Reshape((len(string.ascii_uppercase), ), name="symbol_{}".format(i+1))(o)
outputs.append(o2)
string_model = Model(inputs=i1, outputs=outputs)
string_model.layers[2].layer.trainable = False
generator = ImageGenerator(height, width, font_size, character_count)
string_model.compile(loss="categorical_crossentropy", optimizer="rmsprop", metrics=["categorical_accuracy"])
string_model.summary()
string_model.fit_generator(generator,epochs=10)
您必須對文本數據進行預處理,然后才將其密集地放入輸出中。 轉換為vocab
是一個更好的主意。 創建CaptionGenerator
使其變得簡單,如下所示。
from vgg16 import VGG16
from keras.applications import inception_v3
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import LSTM, Embedding, TimeDistributed, Dense, RepeatVector, Merge, Activation, Flatten
from keras.preprocessing import image, sequence
from keras.callbacks import ModelCheckpoint
import cPickle as pickle
EMBEDDING_DIM = 128
class CaptionGenerator():
def __init__(self):
self.max_cap_len = None
self.vocab_size = None
self.index_word = None
self.word_index = None
self.total_samples = None
self.encoded_images = pickle.load( open( "encoded_images.p", "rb" ) )
self.variable_initializer()
def variable_initializer(self):
df = pd.read_csv('Flickr8k_text/flickr_8k_train_dataset.txt', delimiter='\t')
nb_samples = df.shape[0]
iter = df.iterrows()
caps = []
for i in range(nb_samples):
x = iter.next()
caps.append(x[1][1])
self.total_samples=0
for text in caps:
self.total_samples+=len(text.split())-1
print "Total samples : "+str(self.total_samples)
words = [txt.split() for txt in caps]
unique = []
for word in words:
unique.extend(word)
unique = list(set(unique))
self.vocab_size = len(unique)
self.word_index = {}
self.index_word = {}
for i, word in enumerate(unique):
self.word_index[word]=i
self.index_word[i]=word
max_len = 0
for caption in caps:
if(len(caption.split()) > max_len):
max_len = len(caption.split())
self.max_cap_len = max_len
print "Vocabulary size: "+str(self.vocab_size)
print "Maximum caption length: "+str(self.max_cap_len)
print "Variables initialization done!"
def data_generator(self, batch_size = 32):
partial_caps = []
next_words = []
images = []
print "Generating data..."
gen_count = 0
df = pd.read_csv('Flickr8k_text/flickr_8k_train_dataset.txt', delimiter='\t')
nb_samples = df.shape[0]
iter = df.iterrows()
caps = []
imgs = []
for i in range(nb_samples):
x = iter.next()
caps.append(x[1][1])
imgs.append(x[1][0])
total_count = 0
while 1:
image_counter = -1
for text in caps:
image_counter+=1
current_image = self.encoded_images[imgs[image_counter]]
for i in range(len(text.split())-1):
total_count+=1
partial = [self.word_index[txt] for txt in text.split()[:i+1]]
partial_caps.append(partial)
next = np.zeros(self.vocab_size)
next[self.word_index[text.split()[i+1]]] = 1
next_words.append(next)
images.append(current_image)
if total_count>=batch_size:
next_words = np.asarray(next_words)
images = np.asarray(images)
partial_caps = sequence.pad_sequences(partial_caps, maxlen=self.max_cap_len, padding='post')
total_count = 0
gen_count+=1
print "yielding count: "+str(gen_count)
yield [[images, partial_caps], next_words]
partial_caps = []
next_words = []
images = []
def load_image(self, path):
img = image.load_img(path, target_size=(224,224))
x = image.img_to_array(img)
return np.asarray(x)
def create_model(self, ret_model = False):
#base_model = VGG16(weights='imagenet', include_top=False, input_shape = (224, 224, 3))
#base_model.trainable=False
image_model = Sequential()
#image_model.add(base_model)
#image_model.add(Flatten())
image_model.add(Dense(EMBEDDING_DIM, input_dim = 4096, activation='relu'))
image_model.add(RepeatVector(self.max_cap_len))
lang_model = Sequential()
lang_model.add(Embedding(self.vocab_size, 256, input_length=self.max_cap_len))
lang_model.add(LSTM(256,return_sequences=True))
lang_model.add(TimeDistributed(Dense(EMBEDDING_DIM)))
model = Sequential()
model.add(Merge([image_model, lang_model], mode='concat'))
model.add(LSTM(1000,return_sequences=False))
model.add(Dense(self.vocab_size))
model.add(Activation('softmax'))
print "Model created!"
if(ret_model==True):
return model
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
return model
def get_word(self,index):
return self.index_word[index]
關注鏈接以獲取更多信息:: https : //github.com/arjun-kava/caption_generator
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.