Skip to content

Commit fac4ba7

Browse files
committed
Fix deep_q_network last 4 images bug
1 parent 944bb47 commit fac4ba7

13 files changed

+16
-12
lines changed

deep_q_network.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@
1414
ACTIONS = 2 # number of valid actions
1515
GAMMA = 0.99 # decay rate of past observations
1616
OBSERVE = 100000. # timesteps to observe before training
17-
EXPLORE = 150000. # frames over which to anneal epsilon
18-
FINAL_EPSILON = 0.0 # final value of epsilon
19-
INITIAL_EPSILON = 0.0 # starting value of epsilon
17+
EXPLORE = 2000000. # frames over which to anneal epsilon
18+
FINAL_EPSILON = 0.0001 # final value of epsilon
19+
INITIAL_EPSILON = 0.0001 # starting value of epsilon
2020
REPLAY_MEMORY = 50000 # number of previous transitions to remember
2121
BATCH = 32 # size of minibatch
2222
FRAME_PER_ACTION = 1
@@ -79,7 +79,7 @@ def trainNetwork(s, readout, h_fc1, sess):
7979
# define the cost function
8080
a = tf.placeholder("float", [None, ACTIONS])
8181
y = tf.placeholder("float", [None])
82-
readout_action = tf.reduce_sum(tf.mul(readout, a), reduction_indices = 1)
82+
readout_action = tf.reduce_sum(tf.mul(readout, a), reduction_indices=1)
8383
cost = tf.reduce_mean(tf.square(y - readout_action))
8484
train_step = tf.train.AdamOptimizer(1e-6).minimize(cost)
8585

@@ -99,7 +99,7 @@ def trainNetwork(s, readout, h_fc1, sess):
9999
x_t, r_0, terminal = game_state.frame_step(do_nothing)
100100
x_t = cv2.cvtColor(cv2.resize(x_t, (80, 80)), cv2.COLOR_BGR2GRAY)
101101
ret, x_t = cv2.threshold(x_t,1,255,cv2.THRESH_BINARY)
102-
s_t = np.stack((x_t, x_t, x_t, x_t), axis = 2)
102+
s_t = np.stack((x_t, x_t, x_t, x_t), axis=2)
103103

104104
# saving and loading networks
105105
saver = tf.train.Saver()
@@ -111,11 +111,12 @@ def trainNetwork(s, readout, h_fc1, sess):
111111
else:
112112
print("Could not find old network weights")
113113

114+
# start training
114115
epsilon = INITIAL_EPSILON
115116
t = 0
116117
while "flappy bird" != "angry bird":
117118
# choose an action epsilon greedily
118-
readout_t = readout.eval(feed_dict = {s : [s_t]})[0]
119+
readout_t = readout.eval(feed_dict={s : [s_t]})[0]
119120
a_t = np.zeros([ACTIONS])
120121
action_index = 0
121122
if t % FRAME_PER_ACTION == 0:
@@ -138,7 +139,8 @@ def trainNetwork(s, readout, h_fc1, sess):
138139
x_t1 = cv2.cvtColor(cv2.resize(x_t1_colored, (80, 80)), cv2.COLOR_BGR2GRAY)
139140
ret, x_t1 = cv2.threshold(x_t1, 1, 255, cv2.THRESH_BINARY)
140141
x_t1 = np.reshape(x_t1, (80, 80, 1))
141-
s_t1 = np.append(x_t1, s_t[:,:,1:], axis = 2)
142+
#s_t1 = np.append(x_t1, s_t[:,:,1:], axis = 2)
143+
s_t1 = np.append(x_t1, s_t[:, :, :3], axis=2)
142144

143145
# store the transition in D
144146
D.append((s_t, a_t, r_t, s_t1, terminal))

game/wrapped_flappy_bird.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def __init__(self):
5454
self.playerMaxVelY = 10 # max vel along Y, max descend speed
5555
self.playerMinVelY = -8 # min vel along Y, max ascend speed
5656
self.playerAccY = 1 # players downward accleration
57-
self.playerFlapAcc = -7 # players speed on flapping
57+
self.playerFlapAcc = -9 # players speed on flapping
5858
self.playerFlapped = False # True when player flaps
5959

6060
def frame_step(self, input_actions):

saved_networks/bird-dqn-2880000

10.3 MB
Binary file not shown.
63.9 KB
Binary file not shown.

saved_networks/bird-dqn-2890000

10.3 MB
Binary file not shown.
63.9 KB
Binary file not shown.

saved_networks/bird-dqn-2900000

10.3 MB
Binary file not shown.
63.9 KB
Binary file not shown.

saved_networks/bird-dqn-2910000

10.3 MB
Binary file not shown.
63.9 KB
Binary file not shown.

0 commit comments

Comments
 (0)