简体   繁体   中英

python car game A.I. W. tensorflow

I'm trying to fit a model to a simple car game that drives a "car" that avoids obstacles. There are some other obstacles that are Ok to collect.

The car can only go left or right.

The issue I'm having is that the car only moves straight to left or right (and crashes when it hits the border of the screen) no matter what parameters I use for training.

I store the x coord of the car, and the obstacle's x and y coordinates in the training data for each frame.

First it generates random actions and saves the ones that are above the score_requirement. Then it trains the model against them using tflearn adamoptimizer.

Lastly, it displays the game with the trained model for 10 games.

What have I done wrong?

Below is the code:

LR = 1e-3
goal_steps = 400
score_requirement = 100
initial_games = 100

class Game:
    done = False
    x = (display_width * 0.5)
    y = (display_height * 0.8)

    x_change = 0
    x_change_scale = 5

    thing_startx = random.randrange(0, display_width-100)
    thing_starty = -100
    thing_speed = 7
    thing_width = 100
    thing_height = 100

    dodged = 0
    state = None
    steps_beyond_done = None

    def reset(self):
        self.done = False

        self.x = (display_width * 0.5)
        self.y = (display_height * 0.8)

        self.x_change = 0
        self.x_change_scale = 5

        self.thing_startx = random.randrange(0, display_width-100)
        self.thing_starty = -100
        self.thing_speed = 7
        self.thing_width = 100
        self.thing_height = 100

        self.dodged = 0
        self.steps_beyond_done = None
        #self.state = np.random.uniform(low=-1, high=1, size=(5,))
        self.state = (self.x, self.thing_startx, self.thing_starty)

    def step(self, action):
        state = self.state
        x, thing_startx, thing_starty = state
        info = ""
        reward = 0

        if action == 0:
            # right movement
            x_change = self.x_change_scale
        elif action == 1:
            # left movement
            x_change = -self.x_change_scale
        else:
            # stay in position
            info = "no change"

        x += x_change
        thing_starty += self.thing_speed

        if self.dodged == 30:
            self.x_change_scale = 6
        if self.dodged == 50:
            self.x_change_scale = 8
        if self.dodged == 70:
            self.x_change_scale = 10
        if self.dodged == 90:
            self.x_change_scale = 12

        if x > display_width-carWidth or x < 0:
            self.done = True
        if thing_starty > display_height:
            thing_starty = 0 - self.thing_height
            thing_startx = random.randrange(0,display_width-100)
            self.dodged += 1
            self.thing_speed += 0.2
            #thing_width += (dodged * 1.2)

        if self.y < thing_starty+self.thing_height and self.y+50 > thing_starty:
            if x > thing_startx and x < thing_startx + self.thing_width or x+carWidth > thing_startx and x+carWidth < thing_startx+self.thing_width:
                # Bonus block
                self.done = True


        self.state = (x, thing_startx, thing_starty)
        observation = np.array(self.state)

        if not self.done:
            reward = 1
        elif self.steps_beyond_done is None:
            # Pole just fell!
            self.steps_beyond_done = 0
            reward = 1
        else:
            if self.steps_beyond_done == 0:
                logger.warning("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.")
            self.steps_beyond_done += 1
            reward = 0


        return observation, reward, self.done, {}


    def render(self):
        x, thing_startx, thing_starty = self.state

        gameDisplay.fill(white)

        things(thing_startx, thing_starty, self.thing_width, self.thing_height, block_color)


        car(x,self.y)
        things_dodged(self.dodged)

        pygame.display.update()
        clock.tick(60)


env = Game()
env.reset()


pygame.init()

gameDisplay = pygame.display.set_mode((display_width,display_height))
pygame.display.set_caption('Crashed!!!')
clock = pygame.time.Clock()

carImg = pygame.image.load('car.png')
carWidth = 50

pause = True

def things_dodged(count):
    font = pygame.font.SysFont(None, 25)
    text = font.render('Score: '+str(count), True, black)
    gameDisplay.blit(text, (0,0))

def things(thingx, thingy, thingw, thingh, color):
    pygame.draw.rect(gameDisplay, color, [thingx, thingy, thingw, thingh])

def car(x,y):
    gameDisplay.blit(carImg, (x,y))
    #pygame.draw.polygon(gameDisplay, green, ((x-carWidth,y+carWidth),(x+carWidth,y+carWidth),(x,y-carWidth)))

def text_objects(text, font):
    textSurface = font.render(text, True, black)
    return textSurface, textSurface.get_rect()

def message_display(text):
    largeText = pygame.font.Font('freesansbold.ttf',115)
    TextSurf, TextRect = text_objects(text, largeText)
    TextRect.center = ((display_width/2),(display_height/2))
    gameDisplay.blit(TextSurf, TextRect)

    pygame.display.update()

    time.sleep(2)

    game_loop()

def quit_game():
    pygame.quit()
    quit()

def initial_population():
    global env
    # [OBS, MOVES]
    training_data = []
    # all scores:
    scores = []
    # just the scores that met our threshold:
    accepted_scores = []
    # iterate through however many games we want:
    for _ in range(initial_games):
        done = False
        score = 0
        # moves specifically from this environment:
        game_memory = []
        # previous observation that we saw
        prev_observation = []
        # for each frame in 200
        for _ in range(goal_steps):
            # RENDERING
            #env.render()

            # choose random action (0 or 1)
            action = random.randrange(0,2)
            # do it!
            observation, reward, done, info = env.step(action)

            # notice that the observation is returned FROM the action
            # so we'll store the previous observation here, pairing
            # the prev observation to the action we'll take.
            if len(prev_observation) > 0 :
                game_memory.append([prev_observation, action])
            prev_observation = observation
            score+=reward

            if done:
                break

        # IF our score is higher than our threshold, we'd like to save
        # every move we made
        # NOTE the reinforcement methodology here. 
        # all we're doing is reinforcing the score, we're not trying 
        # to influence the machine in any way as to HOW that score is 
        # reached.
        if score >= score_requirement:
            accepted_scores.append(score)
            for data in game_memory:
                # convert to one-hot (this is the output layer for our neural network)
                if data[1] == 1:
                    output = [0,1]
                elif data[1] == 0:
                    output = [1,0]

                # saving our training data
                training_data.append([data[0], output])

        # reset env to play again
        env.reset()
        # save overall scores
        scores.append(score)

    # just in case you wanted to reference later
    training_data_save = np.array(training_data)
    np.save('saved.npy',training_data_save)

    # some stats here, to further illustrate the neural network magic!
    print('Average accepted score:',mean(accepted_scores))
    print('Median score for accepted scores:',median(accepted_scores))
    print(Counter(accepted_scores))

    return training_data

def neural_network_model(input_size):

    network = input_data(shape=[None, input_size, 1], name='input')

    network = fully_connected(network, 128, activation='relu')
    network = dropout(network, 0.8)

    network = fully_connected(network, 256, activation='relu')
    network = dropout(network, 0.8)

    network = fully_connected(network, 512, activation='relu')
    network = dropout(network, 0.8)

    network = fully_connected(network, 256, activation='relu')
    network = dropout(network, 0.8)

    network = fully_connected(network, 128, activation='relu')
    network = dropout(network, 0.8)

    network = fully_connected(network, 2, activation='softmax')
    network = regression(network, optimizer='adam', learning_rate=LR, loss='categorical_crossentropy', name='targets')
    model = tflearn.DNN(network, tensorboard_dir='log')

    return model


def train_model(training_data, model=False):

    X = np.array([i[0] for i in training_data]).reshape(-1,len(training_data[0][0]),1)
    y = [i[1] for i in training_data]

    if not model:
        model = neural_network_model(input_size = len(X[0]))

    model.fit({'input': X}, {'targets': y}, n_epoch=5, snapshot_step=1000, show_metric=True, run_id='openai_learning')
    return model

#some_random_games_first()
training_data = initial_population()
model = train_model(training_data)

scores = []
choices = []
for each_game in range(10):
    score = 0
    game_memory = []
    prev_obs = []
    env.reset()
    for _ in range(goal_steps):
        env.render()

        if len(prev_obs)==0:
            action = random.randrange(0,2)
        else:
            action = np.argmax(model.predict(prev_obs.reshape(-1,len(prev_obs),1))[0])

        choices.append(action)

        new_observation, reward, done, info = env.step(action)
        prev_obs = new_observation
        game_memory.append([new_observation, action])
        score+=reward
        if done: break

    scores.append(score)

print('Average Score:',sum(scores)/len(scores))
print('choice 1:{}  choice 0:{}'.format(choices.count(1)/len(choices),choices.count(0)/len(choices)))
print(score_requirement)

pygame.quit()
quit()

From training other AI car models, usually providing the distances to the nearest obstacle or wall in both directions is more immediately useful to the AI than the x and y locations of both the obstacle and the car. In this way, the AI should quickly learn to go left when the distance to an obstacle on the right is very small or vice versa.

There may be some bug preventing the car from turning or accelerating in a different direction, so try setting up a way to control the car manually (with arrow keys or something) to test that everything being sent to the AI model is correct.

我认为你必须把它放在第一位:import pygame, sys

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM