Skip to content

Commit ffeff50

Browse files
committed
8.14 修复了一堆bug
目前bug的数量似乎并不乐观。。
1 parent ecca039 commit ffeff50

File tree

7 files changed

+120
-80
lines changed

7 files changed

+120
-80
lines changed

agent.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def learn(self, rep, step_t):
4444
:param step_t:
4545
:return:
4646
"""
47-
s, a, a_log_prob, r, s_, dw, done = rep.to_tensor()
47+
s, a, a_log_prob, r, s_, dw, done = rep.get_data()
4848

4949
# 利用GAE计算优势函数
5050
adv = []

bot_div/game.py

+15-13
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def __init__(self, size):
1414

1515

1616
class Game(object):
17-
def __init__(self, map_upd_func, action_upd_func):
17+
def __init__(self, bot_color, map_upd_func, action_upd_func):
1818
self.driver = None
1919
self.mp = Map()
2020
self.players = []
@@ -29,17 +29,19 @@ def __init__(self, map_upd_func, action_upd_func):
2929
self.useless = []
3030
self.land_before = []
3131
self.defending = False
32-
self.color = None
32+
self.bot_color = bot_color
3333
self.get_tensor_map = map_upd_func
3434
self.send_action = action_upd_func
35+
self.is_half = False
3536

3637
def get_map_from_env(self):
3738
"""
3839
get map from environment
3940
:return:
4041
"""
41-
game_map = self.get_tensor_map(self.color)
42+
game_map = self.get_tensor_map(self.bot_color)
4243
map_size = game_map.shape[1]
44+
game_map = game_map.long().tolist()
4345
for i in range(map_size):
4446
for j in range(map_size):
4547
if game_map[1][i][j] == BlockType.city:
@@ -51,15 +53,16 @@ def get_map_from_env(self):
5153
self.mp.mp[i][j].type = 'general'
5254
elif game_map[1][i][j] == BlockType.mountain or game_map[1][i][j] == BlockType.obstacle:
5355
self.mp.mp[i][j].type = 'mountain'
54-
elif game_map[1][i][j] == BlockType.road and game_map[2][i][j] == PlayerColor.grey:
56+
elif game_map[1][i][j] == BlockType.road and game_map[2][i][j] == PlayerColor.grey\
57+
and game_map[3][i][j] != 0:
5558
if game_map[2][i][j] == PlayerColor.grey:
5659
self.mp.mp[i][j].type = 'empty'
5760
else:
5861
self.mp.mp[i][j].type = 'land'
5962
else:
6063
self.mp.mp[i][j].type = 'unknown'
6164

62-
if game_map[2][i][j] == self.color:
65+
if game_map[2][i][j] == self.bot_color:
6366
self.mp.mp[i][j].belong = 1
6467
else:
6568
self.mp.mp[i][j].belong = 0
@@ -71,12 +74,10 @@ def get_map_from_env(self):
7174
else:
7275
self.mp.mp[i][j].cost = 1
7376

74-
def set_color(self, color):
75-
self.color = color
76-
7777
def pre(self): # 预处理地图
7878
tmp = self.mp.find_match(lambda a: a.type == 'general' and a.belong == 1)
7979
if len(tmp) != 1:
80+
print("")
8081
return 1
8182
self.home_x = tmp[0][0]
8283
self.home_y = tmp[0][1]
@@ -340,7 +341,7 @@ def flush_movements(self): # 更新移动
340341
cur_movement = self.movements[0]
341342
act = [self.cur_x, self.cur_y, self.cur_x + directions[cur_movement][0],
342343
self.cur_y + directions[cur_movement][1], is_half]
343-
self.send_action(self.color, act)
344+
self.send_action(self.bot_color, act)
344345
if self.movements[0] == 'W':
345346
self.cur_x -= 1
346347
elif self.movements[0] == 'S':
@@ -350,17 +351,18 @@ def flush_movements(self): # 更新移动
350351
elif self.movements[0] == 'D':
351352
self.cur_y += 1
352353
self.movements.pop(0)
353-
self.get_tensor_map()
354+
self.get_map_from_env()
354355
self.update_map()
355-
self.get_tensor_map()
356+
self.get_map_from_env()
356357
return
357358

358359
def bot_move(self): # 主循环,每回合执行一次
359-
self.get_tensor_map()
360+
self.get_map_from_env()
360361
if not self.is_pre:
361362
if self.pre() == 1:
362363
return
363-
if self.movements:
364+
if len(self.movements):
365+
print("flushed")
364366
self.flush_movements()
365367
return
366368
if [self.cur_x, self.cur_y] not in self.vis:

main.py

+16-14
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,11 @@ def main(offline_train=True):
1818
"action_dim": 5,
1919
"lr_a": 0.01,
2020
"lr_c": 0.01,
21-
"gamma": None,
22-
"lambda": None,
23-
"epsilon": None,
24-
"k_epochs": None,
25-
"entropy_coef": None,
21+
"gamma": 0.99,
22+
"lambda": 0.95,
23+
"epsilon": 0.2,
24+
"k_epochs": 10,
25+
"entropy_coef": 0.01,
2626
"autosave_step": 107,
2727
"device": device
2828
}
@@ -53,6 +53,8 @@ def main(offline_train=True):
5353
a, a_log_prob = agent.predict(s)
5454
s_, r, done, _ = env.step(a)
5555

56+
env.render("human")
57+
5658
s_ = state_norm(s_).to(device)
5759
r = reward_scaling(r)
5860

@@ -78,17 +80,17 @@ def main(offline_train=True):
7880

7981
"""
8082
Traceback (most recent call last):
81-
File "D:/MyFiles/LearningBot/main.py", line 77, in <module>
83+
File "D:/MyFiles/LearningBot/main.py", line 79, in <module>
8284
main()
8385
File "D:/MyFiles/LearningBot/main.py", line 54, in main
8486
s_, r, done, _ = env.step(a)
85-
File "D:\MyFiles\LearningBot\offsite_env.py", line 86, in step
87+
File "D:\MyFiles\LearningBot\offsite_env.py", line 85, in step
8688
self.execute_actions(action[0].long())
87-
File "D:\MyFiles\LearningBot\offsite_env.py", line 137, in execute_actions
88-
self.combine((act[0], act[1]), (act[2], act[3]), mov_troop)
89-
File "D:\MyFiles\LearningBot\offsite_env.py", line 153, in combine
90-
"amount": int(self.map[0][b2[0]][b2[1]]),
91-
IndexError: index 20 is out of bounds for dimension 0 with size 20
92-
93-
Process finished with exit code 1
89+
File "D:\MyFiles\LearningBot\offsite_env.py", line 156, in execute_actions
90+
self.internal_bots[cur_color].bot_move()
91+
File "D:\MyFiles\LearningBot\bot_div\game.py", line 365, in bot_move
92+
self.flush_movements()
93+
File "D:\MyFiles\LearningBot\bot_div\game.py", line 341, in flush_movements
94+
act = [self.cur_x, self.cur_y, self.cur_x + directions[cur_movement][0],
95+
KeyError: 'Z'
9496
"""

normalization.py

+11-15
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,26 @@
1-
import numpy as np
1+
import copy
2+
import torch
23

34

45
class RunningMeanStd(object):
56

67
def __init__(self, shape):
78
self.n = 0
8-
self.mean = np.zeros(shape)
9-
self.S = np.zeros(shape)
10-
self.std = np.sqrt(self.S)
9+
self.mean = torch.zeros(shape)
10+
self.S = torch.zeros(shape)
11+
self.std = torch.sqrt(self.S)
1112

1213
def update(self, x):
13-
x = np.array(x)
14+
x = torch.tensor(x)
1415
self.n += 1
1516
if self.n == 1:
1617
self.mean = x
1718
self.std = x
1819
else:
19-
old_mean = self.mean.copy()
20+
old_mean = copy.copy(self.mean)
2021
self.mean = old_mean + (x - old_mean) / self.n
2122
self.S = self.S + (x - old_mean) * (x - self.mean)
22-
self.std = np.sqrt(self.S / self.n)
23+
self.std = torch.sqrt(self.S / self.n)
2324

2425

2526
class Normalization(object):
@@ -40,18 +41,13 @@ def __init__(self, shape, gamma):
4041
self.shape = shape
4142
self.gamma = gamma
4243
self.running_ms = RunningMeanStd(shape=self.shape)
43-
self.R = np.zeros(self.shape)
44+
self.R = torch.zeros(self.shape)
4445

4546
def __call__(self, x):
46-
try:
47-
self.R = self.gamma * self.R + x
48-
except TypeError:
49-
print(self.R)
50-
print(self.gamma)
51-
exit(1)
47+
self.R = self.gamma * self.R + x
5248
self.running_ms.update(self.R)
5349
x = x / (self.running_ms.std + 1e-8)
5450
return x
5551

5652
def reset(self):
57-
self.R = np.zeros(self.shape)
53+
self.R = torch.zeros(self.shape)

0 commit comments

Comments
 (0)