基于tensorflow的躲避障碍物的ai训练

import pygameimport randomfrom pygame.locals import *import numpy as npfrom collections import dequeimport tensorflow as tf  # http://blog.topspeedsnail.com/archives/10116import cv2  # http://blog.topspeedsnail.com/archives/4755score = 0BLACK = (0, 0, 0)WHITE = (255, 255, 255)

SCREEN_SIZE = [320, 400]BAR_SIZE = [100, 5]BALL_SIZE = [20, 20]

# 神经网络的输出MOVE_STAY = [1, 0, 0]MOVE_LEFT = [0, 1, 0]MOVE_RIGHT = [0, 0, 1]

class Game(object):    def __init__(self):        pygame.init()        self.clock = pygame.time.Clock()        self.screen = pygame.display.set_mode(SCREEN_SIZE)        pygame.display.set_caption(‘Simple Game‘)

        self.ball_pos_x = SCREEN_SIZE[0] // 2 - BALL_SIZE[0] / 2        self.ball_pos_y = SCREEN_SIZE[1] // 2 - BALL_SIZE[1] / 2

        self.ball_dir_x = -1  # -1 = left 1 = right        self.ball_dir_y = -1  # -1 = up   1 = down        self.ball_pos = pygame.Rect(self.ball_pos_x, self.ball_pos_y, BALL_SIZE[0], BALL_SIZE[1])

        self.bar_pos_x = SCREEN_SIZE[0] // 2 - BAR_SIZE[0] // 2        self.bar_pos = pygame.Rect(self.bar_pos_x, SCREEN_SIZE[1] - BAR_SIZE[1], BAR_SIZE[0], BAR_SIZE[1])

        self.ball2_pos_x = SCREEN_SIZE[0] // 2 - BALL_SIZE[0] / 2        self.ball2_pos_y = SCREEN_SIZE[1] // 2 - BALL_SIZE[1] / 2

        self.ball2_dir_x = -1  # -1 = left 1 = right        self.ball2_dir_y = -1  # -1 = up   1 = down        self.ball2_pos = pygame.Rect(self.ball2_pos_x, self.ball2_pos_y, BALL_SIZE[0], BALL_SIZE[1])

    # action是MOVE_STAY、MOVE_LEFT、MOVE_RIGHT    # ai控制棒子左右移动;返回游戏界面像素数和对应的奖励。(像素->奖励->强化棒子往奖励高的方向移动)    def step(self, action):

        if action == MOVE_LEFT:            self.bar_pos_x = self.bar_pos_x - 2        elif action == MOVE_RIGHT:            self.bar_pos_x = self.bar_pos_x + 2        else:            pass        if self.bar_pos_x < 0:            self.bar_pos_x = 0        if self.bar_pos_x > SCREEN_SIZE[0] - BAR_SIZE[0]:            self.bar_pos_x = SCREEN_SIZE[0] - BAR_SIZE[0]

        self.screen.fill(BLACK)        self.bar_pos.left = self.bar_pos_x        pygame.draw.rect(self.screen, WHITE, self.bar_pos)

        # if random.randint(0, 2) < 1:        #     self.ball_pos.left += self.ball_dir_x * random.randint(2, 10)        # else:        #     self.ball2_pos.left -= self.ball2_dir_x * random.randint(2, 10)        self.ball_pos.left += self.ball_dir_x * random.randint(2, 10)        self.ball_pos.bottom += self.ball_dir_y * random.randint(2, 10)        # if self.ball_pos.left < 0:        #     self.ball_pos.left = 1        #        # if self.ball_pos.left > 320:        #     self.ball_pos.left = 305        pygame.draw.rect(self.screen, WHITE, self.ball_pos)

        if self.ball_pos.top <= 0 or self.ball_pos.bottom >= (SCREEN_SIZE[1] - BAR_SIZE[1] + 1):            self.ball_dir_y = self.ball_dir_y * -1        if self.ball_pos.left <= 0 or self.ball_pos.right >= (SCREEN_SIZE[0]):            self.ball_dir_x = self.ball_dir_x * -1

        # if random.randint(0, 2) < 1:        #     self.ball2_pos.left += self.ball2_dir_x * random.randint(2, 10)        # else:        #     self.ball2_pos.left -= self.ball2_dir_x * random.randint(2, 10)        self.ball2_pos.left += self.ball2_dir_x * random.randint(2, 10)        self.ball2_pos.bottom += self.ball2_dir_y * random.randint(2, 10)

        # if self.ball2_pos.left < 0:        #     self.ball2_pos.left = 1        # if self.ball2_pos.left > 320:        #     self.ball2_pos.left = 305

        pygame.draw.rect(self.screen, WHITE, self.ball2_pos)

        if self.ball2_pos.top <= 0 or self.ball2_pos.bottom >= (SCREEN_SIZE[1] - BAR_SIZE[1] + 1):            self.ball2_dir_y = self.ball2_dir_y * -1        if self.ball2_pos.left <= 0 or self.ball2_pos.right >= (SCREEN_SIZE[0]):            self.ball2_dir_x = self.ball2_dir_x * -1

        reward = 0

        if (self.bar_pos.top <= self.ball_pos.bottom and (self.bar_pos.left < self.ball_pos.right and self.bar_pos.right > self.ball_pos.left)) or (self.bar_pos.top <= self.ball2_pos.bottom and (self.bar_pos.left < self.ball2_pos.right and self.bar_pos.right > self.ball2_pos.left)) :            reward = - 10  # 击中惩罚            score = +1            print(score)        elif self.bar_pos.top <= self.ball_pos.bottom and (                self.bar_pos.left > self.ball_pos.right or self.bar_pos.right < self.ball_pos.left):            reward = +1  # 躲避奖励

        # 获得游戏界面像素        screen_image = pygame.surfarray.array3d(pygame.display.get_surface())        pygame.display.update()        # 返回游戏界面像素和对应的奖励        return reward, screen_image

# learning_rateLEARNING_RATE = 0.99# 更新梯度INITIAL_EPSILON = 1.0FINAL_EPSILON = 0.05# 测试观测次数EXPLORE = 500000OBSERVE = 50000# 存储过往经验大小REPLAY_MEMORY = 500000

BATCH = 100

output = 3  # 输出层神经元数。代表3种操作-MOVE_STAY:[1, 0, 0]  MOVE_LEFT:[0, 1, 0]  MOVE_RIGHT:[0, 0, 1]input_image = tf.placeholder("float", [None, 80, 100, 4])  # 游戏像素action = tf.placeholder("float", [None, output])  # 操作

# 定义CNN-卷积神经网络 参考:http://blog.topspeedsnail.com/archives/10451def convolutional_neural_network(input_image):    weights = {‘w_conv1‘: tf.Variable(tf.zeros([8, 8, 4, 32])),               ‘w_conv2‘: tf.Variable(tf.zeros([4, 4, 32, 64])),               ‘w_conv3‘: tf.Variable(tf.zeros([3, 3, 64, 64])),               ‘w_fc4‘: tf.Variable(tf.zeros([3456, 784])),               ‘w_out‘: tf.Variable(tf.zeros([784, output]))}

    biases = {‘b_conv1‘: tf.Variable(tf.zeros([32])),              ‘b_conv2‘: tf.Variable(tf.zeros([64])),              ‘b_conv3‘: tf.Variable(tf.zeros([64])),              ‘b_fc4‘: tf.Variable(tf.zeros([784])),              ‘b_out‘: tf.Variable(tf.zeros([output]))}

    conv1 = tf.nn.relu(        tf.nn.conv2d(input_image, weights[‘w_conv1‘], strides=[1, 4, 4, 1], padding="VALID") + biases[‘b_conv1‘])    conv2 = tf.nn.relu(        tf.nn.conv2d(conv1, weights[‘w_conv2‘], strides=[1, 2, 2, 1], padding="VALID") + biases[‘b_conv2‘])    conv3 = tf.nn.relu(        tf.nn.conv2d(conv2, weights[‘w_conv3‘], strides=[1, 1, 1, 1], padding="VALID") + biases[‘b_conv3‘])    conv3_flat = tf.reshape(conv3, [-1, 3456])    fc4 = tf.nn.relu(tf.matmul(conv3_flat, weights[‘w_fc4‘]) + biases[‘b_fc4‘])

    output_layer = tf.matmul(fc4, weights[‘w_out‘]) + biases[‘b_out‘]    return output_layer

# 深度强化学习入门: https://www.nervanasys.com/demystifying-deep-reinforcement-learning/# 训练神经网络def train_neural_network(input_image):    predict_action = convolutional_neural_network(input_image)

    argmax = tf.placeholder("float", [None, output])    gt = tf.placeholder("float", [None])

    action = tf.reduce_sum(tf.multiply(predict_action, argmax), reduction_indices=1)    cost = tf.reduce_mean(tf.square(action - gt))    optimizer = tf.train.AdamOptimizer(1e-6).minimize(cost)

    game = Game()    D = deque()

    _, image = game.step(MOVE_STAY)    # 转换为灰度值    image = cv2.cvtColor(cv2.resize(image, (100, 80)), cv2.COLOR_BGR2GRAY)    # 转换为二值    ret, image = cv2.threshold(image, 1, 255, cv2.THRESH_BINARY)    input_image_data = np.stack((image, image, image, image), axis=2)

    with tf.Session() as sess:        sess.run(tf.initialize_all_variables())

        saver = tf.train.Saver()

        n = 0        epsilon = INITIAL_EPSILON        while True:            action_t = predict_action.eval(feed_dict={input_image: [input_image_data]})[0]

            argmax_t = np.zeros([output], dtype=np.int)            if (random.random() <= INITIAL_EPSILON):                maxIndex = random.randrange(output)            else:                maxIndex = np.argmax(action_t)            argmax_t[maxIndex] = 1            if epsilon > FINAL_EPSILON:                epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE

            # for event in pygame.event.get():  macOS需要事件循环,否则白屏            #  if event.type == QUIT:            #     pygame.quit()            #     sys.exit()            reward, image = game.step(list(argmax_t))

            image = cv2.cvtColor(cv2.resize(image, (100, 80)), cv2.COLOR_BGR2GRAY)            ret, image = cv2.threshold(image, 1, 255, cv2.THRESH_BINARY)            image = np.reshape(image, (80, 100, 1))            input_image_data1 = np.append(image, input_image_data[:, :, 0:3], axis=2)

            D.append((input_image_data, argmax_t, reward, input_image_data1))

            if len(D) > REPLAY_MEMORY:                D.popleft()

            if n > OBSERVE:                minibatch = random.sample(D, BATCH)                input_image_data_batch = [d[0] for d in minibatch]                argmax_batch = [d[1] for d in minibatch]                reward_batch = [d[2] for d in minibatch]                input_image_data1_batch = [d[3] for d in minibatch]

                gt_batch = []

                out_batch = predict_action.eval(feed_dict={input_image: input_image_data1_batch})

                for i in range(0, len(minibatch)):                    gt_batch.append(reward_batch[i] + LEARNING_RATE * np.max(out_batch[i]))

                optimizer.run(feed_dict={gt: gt_batch, argmax: argmax_batch, input_image: input_image_data_batch})

            input_image_data = input_image_data1            n = n + 1

            if n % 10000 == 0:                saver.save(sess, ‘game.cpk‘, global_step=n)  # 保存模型

            print(n, "epsilon:", epsilon, " ", "action:", maxIndex, " ", "reward:", reward)

train_neural_network(input_image)
时间: 2024-11-07 10:12:45

基于tensorflow的躲避障碍物的ai训练的相关文章

Unity 初级AI躲避障碍物

1.引言 阅读<Unity Game AI programming >第6章后,感觉躲避障碍物算法不是很给力.为了研究和学习Unity,自己改良躲避障碍物的算法.当然,代码没有优化,不过没关系,抛砖引玉,记录思想,学习交流. 2.与原书算法相比变化: 1.启用物理引擎,使用速度和力解决问题.而不是使用的角度和位置 2.加入沿着障碍物行走,即使障碍物宽度很大或物体向障碍物前进的速度过快,也不会发生穿墙而过现象. 3.为配合沿着障碍物行走,并且显得更自然,引入三个速度与障碍物的交互分区. 3.分享

《21个项目玩转深度学习:基于TensorFlow的实践详解》高清带标签PDF版本学习下载

1 写在前面 <21个项目玩转深度学习——基于TensorFlow的实践详解>以实践为导向,深入介绍了深度学习技术和TensorFlow框架编程内容. 通过本书,读者可以训练自己的图像识别模型.进行目标检测和人脸识别.完成一个风格迁移应用,还可以使用神经网络生成图像和文本,进行时间序列预测.搭建机器翻译引擎,训练机器玩游戏.全书共包含21个项目,分为深度卷积网络.RNN网络.深度强化学习三部分.读者可以在自己动手实践的过程中找到学习的乐趣,了解算法和编程框架的细节,让学习深度学习算法和Tens

大前端技术系列:TWA技术+TensorFlow.js =&gt; 集成原生和AI功能的app

大前端技术系列:TWA技术+TensorFlow.js => 集成原生和AI功能的app ( 本文内容为melodyWxy原作,git地址:https://github.com/melodyWxy/twa-tf.js , ) 什么是TWA 简单来讲,TWA(Trusted Web Activity 可信任的网络应用)即: 基于Chrome Custom Tabs,利用谷歌浏览器提供的api,实现强大功能的桌面应用技术. 如果说你对PWA这个概念有所了解,那么TWA的实现就相当于 PWA + 更丰

基于TensorFlow的深度学习系列教程 2——常量Constant

前面介绍过了Tensorflow的基本概念,比如如何使用tensorboard查看计算图.本篇则着重介绍和整理下Constant相关的内容. 基于TensorFlow的深度学习系列教程 1--Hello World! 常量的概念 在tensorflow中,数据分为几种类型: 常量Constant.变量Variable.占位符Placeholder.其中: 常量:用于存储一些不变的数值,在计算图创建的时候,调用初始化方法时,直接保存在计算图中 变量:模型训练的参数,比如全连接里面的W和bias 占

21个项目玩转深度学习:基于TensorFlow的实践详解06—人脸检测和识别——数据集

书籍:<21个项目玩转深度学习:基于TensorFlow的实践详解> 人脸检测 FDDB FDDB是UMass的数据集,被用来做人脸检测(Face Detection).这个数据集比较大,比较有挑战性.而且作者提供了程序用来评估检测结果,所以在这个数据上面比较算法也相对公平. 2845 张图片,其中包含了 5171 张人脸: 包含了各种遮挡,高难度的姿态,低分辨率以及对焦模糊的人脸: 用椭圆来标定人脸区域: 同时包括灰度图和彩色图. 人脸识别 LFW 户外标记人脸数据集LFW (Labeled

基于tensorflow的CNN卷积神经网络对Fasion-MNIST数据集的分类器

写一个基于tensorflow的cnn,分类fasion-MNIST数据集 这个就是fasion-mnist数据集了 先上代码,在分析: import tensorflow as tf import pandas as pd import numpy as np config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.3 train_data = pd.read_csv('test.csv'

分享《21个项目玩转深度学习:基于TensorFlow的实践详解》PDF+源代码

下载:https://pan.baidu.com/s/19GwZ9X2E20L3BykhoxhjTg 更多资料:http://blog.51cto.com/3215120 <21个项目玩转深度学习:基于TensorFlow的实践详解>PDF+源代码PDF,378页,带书签目录,文字可以复制.配套源代码.深度学习经典书籍. 如图: 原文地址:http://blog.51cto.com/3215120/2316094

《21个项目玩转深度学习:基于TensorFlow的实践详解》

下载:https://pan.baidu.com/s/1NYYpsxbWBvMn9U7jvj6XSw更多资料:http://blog.51cto.com/3215120<21个项目玩转深度学习:基于TensorFlow的实践详解>PDF+源代码PDF,378页,带书签目录,文字可以复制.配套源代码.深度学习经典书籍.如图: <div id="jspay" sid="DkfSZwf0121" style="display:none"

实践:《21个项目玩转深度学习基于TensorFlow的实践详解》PDF+源代码

学习<21个项目玩转深度学习>可以在自己动手实践的过程中找到学习的乐趣,了解算法和编程框架的细节,让学习深度学习算法和TensorFlow 的过程变得轻松和高效. 不是给想学深度学习的初学者看的,没有详细的讲解,只有项目操作的指示.但对于没有任何深度学习基础,又要尽快做出视觉分类demo的人来说是一本好书,能帮助快速上手. 学习实践: <21个项目玩转深度学习:基于TensorFlow的实践详解>PDF,378页,带书签目录,文字可以复制.配套源代码. 网盘下载:http://10