I'm trying to fit a model to a simple car game that drives a "car" that avoids obstacles. There are some other obstacles that are Ok to collect.
The car can only go left or right.
The issue I'm having is that the car only moves straight to left or right (and crashes when it hits the border of the screen) no matter what parameters I use for training.
I store the x coord of the car, and the obstacle's x and y coordinates in the training data for each frame.
First it generates random actions and saves the ones that are above the score_requirement. Then it trains the model against them using tflearn adamoptimizer.
Lastly, it displays the game with the trained model for 10 games.
What have I done wrong?
Below is the code:
LR = 1e-3
goal_steps = 400
score_requirement = 100
initial_games = 100
class Game:
done = False
x = (display_width * 0.5)
y = (display_height * 0.8)
x_change = 0
x_change_scale = 5
thing_startx = random.randrange(0, display_width-100)
thing_starty = -100
thing_speed = 7
thing_width = 100
thing_height = 100
dodged = 0
state = None
steps_beyond_done = None
def reset(self):
self.done = False
self.x = (display_width * 0.5)
self.y = (display_height * 0.8)
self.x_change = 0
self.x_change_scale = 5
self.thing_startx = random.randrange(0, display_width-100)
self.thing_starty = -100
self.thing_speed = 7
self.thing_width = 100
self.thing_height = 100
self.dodged = 0
self.steps_beyond_done = None
#self.state = np.random.uniform(low=-1, high=1, size=(5,))
self.state = (self.x, self.thing_startx, self.thing_starty)
def step(self, action):
state = self.state
x, thing_startx, thing_starty = state
info = ""
reward = 0
if action == 0:
# right movement
x_change = self.x_change_scale
elif action == 1:
# left movement
x_change = -self.x_change_scale
else:
# stay in position
info = "no change"
x += x_change
thing_starty += self.thing_speed
if self.dodged == 30:
self.x_change_scale = 6
if self.dodged == 50:
self.x_change_scale = 8
if self.dodged == 70:
self.x_change_scale = 10
if self.dodged == 90:
self.x_change_scale = 12
if x > display_width-carWidth or x < 0:
self.done = True
if thing_starty > display_height:
thing_starty = 0 - self.thing_height
thing_startx = random.randrange(0,display_width-100)
self.dodged += 1
self.thing_speed += 0.2
#thing_width += (dodged * 1.2)
if self.y < thing_starty+self.thing_height and self.y+50 > thing_starty:
if x > thing_startx and x < thing_startx + self.thing_width or x+carWidth > thing_startx and x+carWidth < thing_startx+self.thing_width:
# Bonus block
self.done = True
self.state = (x, thing_startx, thing_starty)
observation = np.array(self.state)
if not self.done:
reward = 1
elif self.steps_beyond_done is None:
# Pole just fell!
self.steps_beyond_done = 0
reward = 1
else:
if self.steps_beyond_done == 0:
logger.warning("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.")
self.steps_beyond_done += 1
reward = 0
return observation, reward, self.done, {}
def render(self):
x, thing_startx, thing_starty = self.state
gameDisplay.fill(white)
things(thing_startx, thing_starty, self.thing_width, self.thing_height, block_color)
car(x,self.y)
things_dodged(self.dodged)
pygame.display.update()
clock.tick(60)
env = Game()
env.reset()
pygame.init()
gameDisplay = pygame.display.set_mode((display_width,display_height))
pygame.display.set_caption('Crashed!!!')
clock = pygame.time.Clock()
carImg = pygame.image.load('car.png')
carWidth = 50
pause = True
def things_dodged(count):
font = pygame.font.SysFont(None, 25)
text = font.render('Score: '+str(count), True, black)
gameDisplay.blit(text, (0,0))
def things(thingx, thingy, thingw, thingh, color):
pygame.draw.rect(gameDisplay, color, [thingx, thingy, thingw, thingh])
def car(x,y):
gameDisplay.blit(carImg, (x,y))
#pygame.draw.polygon(gameDisplay, green, ((x-carWidth,y+carWidth),(x+carWidth,y+carWidth),(x,y-carWidth)))
def text_objects(text, font):
textSurface = font.render(text, True, black)
return textSurface, textSurface.get_rect()
def message_display(text):
largeText = pygame.font.Font('freesansbold.ttf',115)
TextSurf, TextRect = text_objects(text, largeText)
TextRect.center = ((display_width/2),(display_height/2))
gameDisplay.blit(TextSurf, TextRect)
pygame.display.update()
time.sleep(2)
game_loop()
def quit_game():
pygame.quit()
quit()
def initial_population():
global env
# [OBS, MOVES]
training_data = []
# all scores:
scores = []
# just the scores that met our threshold:
accepted_scores = []
# iterate through however many games we want:
for _ in range(initial_games):
done = False
score = 0
# moves specifically from this environment:
game_memory = []
# previous observation that we saw
prev_observation = []
# for each frame in 200
for _ in range(goal_steps):
# RENDERING
#env.render()
# choose random action (0 or 1)
action = random.randrange(0,2)
# do it!
observation, reward, done, info = env.step(action)
# notice that the observation is returned FROM the action
# so we'll store the previous observation here, pairing
# the prev observation to the action we'll take.
if len(prev_observation) > 0 :
game_memory.append([prev_observation, action])
prev_observation = observation
score+=reward
if done:
break
# IF our score is higher than our threshold, we'd like to save
# every move we made
# NOTE the reinforcement methodology here.
# all we're doing is reinforcing the score, we're not trying
# to influence the machine in any way as to HOW that score is
# reached.
if score >= score_requirement:
accepted_scores.append(score)
for data in game_memory:
# convert to one-hot (this is the output layer for our neural network)
if data[1] == 1:
output = [0,1]
elif data[1] == 0:
output = [1,0]
# saving our training data
training_data.append([data[0], output])
# reset env to play again
env.reset()
# save overall scores
scores.append(score)
# just in case you wanted to reference later
training_data_save = np.array(training_data)
np.save('saved.npy',training_data_save)
# some stats here, to further illustrate the neural network magic!
print('Average accepted score:',mean(accepted_scores))
print('Median score for accepted scores:',median(accepted_scores))
print(Counter(accepted_scores))
return training_data
def neural_network_model(input_size):
network = input_data(shape=[None, input_size, 1], name='input')
network = fully_connected(network, 128, activation='relu')
network = dropout(network, 0.8)
network = fully_connected(network, 256, activation='relu')
network = dropout(network, 0.8)
network = fully_connected(network, 512, activation='relu')
network = dropout(network, 0.8)
network = fully_connected(network, 256, activation='relu')
network = dropout(network, 0.8)
network = fully_connected(network, 128, activation='relu')
network = dropout(network, 0.8)
network = fully_connected(network, 2, activation='softmax')
network = regression(network, optimizer='adam', learning_rate=LR, loss='categorical_crossentropy', name='targets')
model = tflearn.DNN(network, tensorboard_dir='log')
return model
def train_model(training_data, model=False):
X = np.array([i[0] for i in training_data]).reshape(-1,len(training_data[0][0]),1)
y = [i[1] for i in training_data]
if not model:
model = neural_network_model(input_size = len(X[0]))
model.fit({'input': X}, {'targets': y}, n_epoch=5, snapshot_step=1000, show_metric=True, run_id='openai_learning')
return model
#some_random_games_first()
training_data = initial_population()
model = train_model(training_data)
scores = []
choices = []
for each_game in range(10):
score = 0
game_memory = []
prev_obs = []
env.reset()
for _ in range(goal_steps):
env.render()
if len(prev_obs)==0:
action = random.randrange(0,2)
else:
action = np.argmax(model.predict(prev_obs.reshape(-1,len(prev_obs),1))[0])
choices.append(action)
new_observation, reward, done, info = env.step(action)
prev_obs = new_observation
game_memory.append([new_observation, action])
score+=reward
if done: break
scores.append(score)
print('Average Score:',sum(scores)/len(scores))
print('choice 1:{} choice 0:{}'.format(choices.count(1)/len(choices),choices.count(0)/len(choices)))
print(score_requirement)
pygame.quit()
quit()
From training other AI car models, usually providing the distances to the nearest obstacle or wall in both directions is more immediately useful to the AI than the x and y locations of both the obstacle and the car. In this way, the AI should quickly learn to go left when the distance to an obstacle on the right is very small or vice versa.
There may be some bug preventing the car from turning or accelerating in a different direction, so try setting up a way to control the car manually (with arrow keys or something) to test that everything being sent to the AI model is correct.
我认为你必须把它放在第一位:import pygame, sys
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.