1414ACTIONS = 2 # number of valid actions
1515GAMMA = 0.99 # decay rate of past observations
1616OBSERVE = 100000. # timesteps to observe before training
17- EXPLORE = 150000 . # frames over which to anneal epsilon
18- FINAL_EPSILON = 0.0 # final value of epsilon
19- INITIAL_EPSILON = 0.0 # starting value of epsilon
17+ EXPLORE = 2000000 . # frames over which to anneal epsilon
18+ FINAL_EPSILON = 0.0001 # final value of epsilon
19+ INITIAL_EPSILON = 0.0001 # starting value of epsilon
2020REPLAY_MEMORY = 50000 # number of previous transitions to remember
2121BATCH = 32 # size of minibatch
2222FRAME_PER_ACTION = 1
@@ -79,7 +79,7 @@ def trainNetwork(s, readout, h_fc1, sess):
7979 # define the cost function
8080 a = tf .placeholder ("float" , [None , ACTIONS ])
8181 y = tf .placeholder ("float" , [None ])
82- readout_action = tf .reduce_sum (tf .mul (readout , a ), reduction_indices = 1 )
82+ readout_action = tf .reduce_sum (tf .mul (readout , a ), reduction_indices = 1 )
8383 cost = tf .reduce_mean (tf .square (y - readout_action ))
8484 train_step = tf .train .AdamOptimizer (1e-6 ).minimize (cost )
8585
@@ -99,7 +99,7 @@ def trainNetwork(s, readout, h_fc1, sess):
9999 x_t , r_0 , terminal = game_state .frame_step (do_nothing )
100100 x_t = cv2 .cvtColor (cv2 .resize (x_t , (80 , 80 )), cv2 .COLOR_BGR2GRAY )
101101 ret , x_t = cv2 .threshold (x_t ,1 ,255 ,cv2 .THRESH_BINARY )
102- s_t = np .stack ((x_t , x_t , x_t , x_t ), axis = 2 )
102+ s_t = np .stack ((x_t , x_t , x_t , x_t ), axis = 2 )
103103
104104 # saving and loading networks
105105 saver = tf .train .Saver ()
@@ -111,11 +111,12 @@ def trainNetwork(s, readout, h_fc1, sess):
111111 else :
112112 print ("Could not find old network weights" )
113113
114+ # start training
114115 epsilon = INITIAL_EPSILON
115116 t = 0
116117 while "flappy bird" != "angry bird" :
117118 # choose an action epsilon greedily
118- readout_t = readout .eval (feed_dict = {s : [s_t ]})[0 ]
119+ readout_t = readout .eval (feed_dict = {s : [s_t ]})[0 ]
119120 a_t = np .zeros ([ACTIONS ])
120121 action_index = 0
121122 if t % FRAME_PER_ACTION == 0 :
@@ -138,7 +139,8 @@ def trainNetwork(s, readout, h_fc1, sess):
138139 x_t1 = cv2 .cvtColor (cv2 .resize (x_t1_colored , (80 , 80 )), cv2 .COLOR_BGR2GRAY )
139140 ret , x_t1 = cv2 .threshold (x_t1 , 1 , 255 , cv2 .THRESH_BINARY )
140141 x_t1 = np .reshape (x_t1 , (80 , 80 , 1 ))
141- s_t1 = np .append (x_t1 , s_t [:,:,1 :], axis = 2 )
142+ #s_t1 = np.append(x_t1, s_t[:,:,1:], axis = 2)
143+ s_t1 = np .append (x_t1 , s_t [:, :, :3 ], axis = 2 )
142144
143145 # store the transition in D
144146 D .append ((s_t , a_t , r_t , s_t1 , terminal ))
0 commit comments