Skip to content

Commit c46af30

Browse files
committed
8.15 又修复了一大堆bug
切记内存里和实际的xy轴相反!!
1 parent ffeff50 commit c46af30

File tree

5 files changed

+66
-57
lines changed

5 files changed

+66
-57
lines changed

agent.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -44,15 +44,17 @@ def learn(self, rep, step_t):
4444
:param step_t:
4545
:return:
4646
"""
47-
s, a, a_log_prob, r, s_, dw, done = rep.get_data()
47+
s, a, a_log_prob, r, s_, done = rep.get_data()
4848

4949
# 利用GAE计算优势函数
5050
adv = []
5151
gae = 0
52+
s = s.to(self.device)
53+
s_ = s_.to(self.device)
5254
with torch.no_grad(): # 不需要梯度
5355
vs = self.v(s)
5456
vs_ = self.v(s_)
55-
deltas = r + self.gamma * (1.0 - dw) * vs_ - vs
57+
deltas = r + self.gamma * (1.0 - done) * vs_ - vs
5658
for delta, d in zip(reversed(deltas.flatten().numpy()), reversed(done.flatten().numpy())):
5759
gae = delta + self.gamma * self.lamda * gae * (1.0 - d)
5860
adv.insert(0, gae)

bot_div/game.py

+16-16
Original file line numberDiff line numberDiff line change
@@ -42,32 +42,32 @@ def get_map_from_env(self):
4242
game_map = self.get_tensor_map(self.bot_color)
4343
map_size = game_map.shape[1]
4444
game_map = game_map.long().tolist()
45-
for i in range(map_size):
46-
for j in range(map_size):
47-
if game_map[1][i][j] == BlockType.city:
48-
if game_map[2][i][j] == PlayerColor.grey:
45+
for i in range(1, map_size + 1):
46+
for j in range(1, map_size + 1):
47+
if game_map[1][i - 1][j - 1] == BlockType.city:
48+
if game_map[2][i - 1][j - 1] == PlayerColor.grey:
4949
self.mp.mp[i][j].type = 'empty-city'
5050
else:
5151
self.mp.mp[i][j].type = 'city'
52-
elif game_map[1][i][j] == BlockType.crown:
52+
elif game_map[1][i - 1][j - 1] == BlockType.crown:
5353
self.mp.mp[i][j].type = 'general'
54-
elif game_map[1][i][j] == BlockType.mountain or game_map[1][i][j] == BlockType.obstacle:
54+
elif game_map[1][i - 1][j - 1] == BlockType.mountain or game_map[1][i - 1][j - 1] == BlockType.obstacle:
5555
self.mp.mp[i][j].type = 'mountain'
56-
elif game_map[1][i][j] == BlockType.road and game_map[2][i][j] == PlayerColor.grey\
57-
and game_map[3][i][j] != 0:
58-
if game_map[2][i][j] == PlayerColor.grey:
56+
elif game_map[1][i - 1][j - 1] == BlockType.road and game_map[2][i - 1][j - 1] == PlayerColor.grey\
57+
and game_map[3][i - 1][j - 1] != 0:
58+
if game_map[2][i - 1][j - 1] == PlayerColor.grey:
5959
self.mp.mp[i][j].type = 'empty'
6060
else:
6161
self.mp.mp[i][j].type = 'land'
6262
else:
6363
self.mp.mp[i][j].type = 'unknown'
6464

65-
if game_map[2][i][j] == self.bot_color:
65+
if game_map[2][i - 1][j - 1] == self.bot_color:
6666
self.mp.mp[i][j].belong = 1
6767
else:
6868
self.mp.mp[i][j].belong = 0
6969

70-
self.mp.mp[i][j].amount = int(game_map[0][i][j])
70+
self.mp.mp[i][j].amount = int(game_map[0][i - 1][j - 1])
7171

7272
if self.mp.mp[i][j].belong != 1:
7373
self.mp.mp[i][j].cost = max(self.mp.mp[i][j].amount, 2)
@@ -77,7 +77,6 @@ def get_map_from_env(self):
7777
def pre(self): # 预处理地图
7878
tmp = self.mp.find_match(lambda a: a.type == 'general' and a.belong == 1)
7979
if len(tmp) != 1:
80-
print("")
8180
return 1
8281
self.home_x = tmp[0][0]
8382
self.home_y = tmp[0][1]
@@ -331,6 +330,7 @@ def flush_movements(self): # 更新移动
331330
is_half = 0
332331
if 'Z' in self.movements:
333332
is_half = 1
333+
self.movements.remove('Z')
334334
tmp = self.mp.mp[self.home_x][self.home_y].amount
335335
cur_movement = self.movements[0]
336336
while isinstance(cur_movement, list) or isinstance(cur_movement, tuple):
@@ -339,9 +339,8 @@ def flush_movements(self): # 更新移动
339339
if not self.movements:
340340
return
341341
cur_movement = self.movements[0]
342-
act = [self.cur_x, self.cur_y, self.cur_x + directions[cur_movement][0],
343-
self.cur_y + directions[cur_movement][1], is_half]
344-
self.send_action(self.bot_color, act)
342+
x_old = self.cur_x
343+
y_old = self.cur_y
345344
if self.movements[0] == 'W':
346345
self.cur_x -= 1
347346
elif self.movements[0] == 'S':
@@ -350,6 +349,8 @@ def flush_movements(self): # 更新移动
350349
self.cur_y -= 1
351350
elif self.movements[0] == 'D':
352351
self.cur_y += 1
352+
act = [x_old, y_old, self.cur_x, self.cur_y, is_half]
353+
self.send_action(self.bot_color, act)
353354
self.movements.pop(0)
354355
self.get_map_from_env()
355356
self.update_map()
@@ -362,7 +363,6 @@ def bot_move(self): # 主循环,每回合执行一次
362363
if self.pre() == 1:
363364
return
364365
if len(self.movements):
365-
print("flushed")
366366
self.flush_movements()
367367
return
368368
if [self.cur_x, self.cur_y] not in self.vis:

const.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def __init__(self):
7474
self.__indexes = {20: self.__20index, 19: self.__19index, 10: self.__10index, 9: self.__9index}
7575

7676
def _generate(self, size):
77-
action = [torch.Tensor([[0, 0, 0, 0, 0]])]
77+
action = [torch.Tensor([[-1, -1, -1, -1, -1]])]
7878
index = []
7979
for _ in range(size + 1):
8080
index.append([])

main.py

-17
Original file line numberDiff line numberDiff line change
@@ -77,20 +77,3 @@ def main(offline_train=True):
7777

7878
if __name__ == '__main__':
7979
main()
80-
81-
"""
82-
Traceback (most recent call last):
83-
File "D:/MyFiles/LearningBot/main.py", line 79, in <module>
84-
main()
85-
File "D:/MyFiles/LearningBot/main.py", line 54, in main
86-
s_, r, done, _ = env.step(a)
87-
File "D:\MyFiles\LearningBot\offsite_env.py", line 85, in step
88-
self.execute_actions(action[0].long())
89-
File "D:\MyFiles\LearningBot\offsite_env.py", line 156, in execute_actions
90-
self.internal_bots[cur_color].bot_move()
91-
File "D:\MyFiles\LearningBot\bot_div\game.py", line 365, in bot_move
92-
self.flush_movements()
93-
File "D:\MyFiles\LearningBot\bot_div\game.py", line 341, in flush_movements
94-
act = [self.cur_x, self.cur_y, self.cur_x + directions[cur_movement][0],
95-
KeyError: 'Z'
96-
"""

offsite_env.py

+45-21
Original file line numberDiff line numberDiff line change
@@ -87,16 +87,24 @@ def step(self, action: torch.Tensor):
8787
# 生成observation
8888
obs = self.gen_observation()
8989

90-
# 计算奖励
90+
# 只有动作都执行完才需要检查游戏是否结束
9191
reward = 0
92-
# 如果是第一回合 直接return
93-
if self.action_history.qsize() < 1:
94-
return obs, reward, False, {}
92+
w_state = self.win_check()
93+
if w_state != 0:
94+
reward += 300 if w_state == 2 else -300
95+
return obs, reward, True, {}
96+
9597
# 计算上一步的奖励
9698
_dirx = [0, -1, 0, 1, 1, -1, 1, -1]
9799
_diry = [-1, 0, 1, 0, 1, -1, -1, 1]
100+
# 如果是第一回合 直接return
101+
if self.action_history.qsize() < 1:
102+
return obs, reward, False, {}
98103
last_move = self.action_history.queue[-1]
99104
last_obs = self.obs_history.queue[-1]
105+
# 如果动作为空
106+
if last_move[0] < 0:
107+
return obs, reward, False, {}
100108
# 无效移动扣大分
101109
if last_obs[2][last_move[0] - 1][last_move[1] - 1] != self._get_colormark(self.learningbot_color):
102110
reward -= 100
@@ -126,11 +134,6 @@ def step(self, action: torch.Tensor):
126134
self.obs_history.get()
127135
self.obs_history.put(self.get_view_of(self.learningbot_color))
128136

129-
# 只有动作都执行完才需要检查游戏是否结束
130-
w_state = self.win_check()
131-
if w_state != 0:
132-
reward += 300 if w_state == 2 else -300
133-
return obs, reward, True, {}
134137
return obs, reward, False, {}
135138

136139
def render(self, mode="human"):
@@ -142,7 +145,7 @@ def render(self, mode="human"):
142145
if mode == "human":
143146
print_tensor_map(self.map)
144147
print(f"round: {self.round}")
145-
time.sleep(1)
148+
time.sleep(0.65)
146149

147150
def execute_actions(self, action: torch.Tensor):
148151
"""
@@ -155,22 +158,35 @@ def execute_actions(self, action: torch.Tensor):
155158
cur_color = self.internal_bots_color[i]
156159
self.internal_bots[cur_color].bot_move()
157160
if not self.actions_now[cur_color]:
161+
print(f"bot {cur_color} empty move")
158162
continue
159163
cur_action = self.actions_now[cur_color]
160-
print(cur_action)
161-
f_amount = int(self.map[0][cur_action[0]][cur_action[1]])
162-
if cur_action[4] == 1:
163-
mov_troop = math.ceil((f_amount + 0.5) / 2) - 1
164-
else:
165-
mov_troop = f_amount - 1
166-
self.combine((cur_action[0], cur_action[1]), (cur_action[2], cur_action[3]), mov_troop)
164+
# 如果移动超界了 直接下一个
165+
skip = False
166+
for j in range(4):
167+
if not 0 <= cur_action[j] < self.map_size:
168+
skip = True
169+
break
170+
if skip:
171+
print(f"skipped: {cur_action}")
172+
continue
173+
174+
print(f"internal bot color {cur_color}: {cur_action}")
175+
# 检查动作是否合法
176+
if cur_action[0] >= 0 and self.map[2][cur_action[0]][cur_action[1]] == cur_color:
177+
f_amount = int(self.map[0][cur_action[0]][cur_action[1]])
178+
if cur_action[4] == 1:
179+
mov_troop = math.ceil((f_amount + 0.5) / 2) - 1
180+
else:
181+
mov_troop = f_amount - 1
182+
self.combine((cur_action[0], cur_action[1]), (cur_action[2], cur_action[3]), mov_troop)
167183

168184
# 处理LearningBot动作
169185
act = self.at.i_to_a(self.map_size, int(action))[0].long()
170186
act -= 1
171187
act[4] += 1
172188
# 检查动作是否合法
173-
if self.map[2][act[0]][act[1]] == self.learningbot_color:
189+
if act[0] >= 0 and self.map[2][act[0]][act[1]] == self.learningbot_color:
174190
f_amount = int(self.map[0][act[0]][act[1]])
175191
if act[4] == 1:
176192
mov_troop = math.ceil((f_amount + 0.5) / 2) - 1
@@ -197,6 +213,10 @@ def combine(self, b1: tuple, b2: tuple, cnt):
197213
"color": int(self.map[2][b2[0]][b2[1]])
198214
}
199215

216+
# 地形特判
217+
if t["type"] == BlockType.mountain:
218+
return
219+
200220
if t["color"] == f["color"]:
201221
t["amount"] += cnt
202222
f["amount"] -= cnt
@@ -213,7 +233,7 @@ def combine(self, b1: tuple, b2: tuple, cnt):
213233
if self.map[2][i][j] == BlockType.crown:
214234
self.map[2][i][j] = BlockType.city
215235
t["color"] = f["color"]
216-
t["amount"] = f["amount"]
236+
t["amount"] = -t["amount"]
217237

218238
# 赋值回去
219239
self.map[0][b1[0]][b1[1]] = f["amount"]
@@ -303,9 +323,13 @@ def bot_action_upd(self, color, action):
303323
:param action: [x1, y1, x2, y2, is_half]
304324
:return:
305325
"""
306-
print(f"internal bot color {color}: {action}")
307326
if action:
308-
self.actions_now[color] = action
327+
self.actions_now[color] = [None, None, None, None, None]
328+
self.actions_now[color][0] = action[0] - 1
329+
self.actions_now[color][1] = action[1] - 1
330+
self.actions_now[color][2] = action[2] - 1
331+
self.actions_now[color][3] = action[3] - 1
332+
self.actions_now[color][4] = action[4]
309333
else:
310334
self.actions_now[color] = False
311335

0 commit comments

Comments
 (0)