8.15 又修复了一大堆bug

c2d08y · c2d08y · commit c46af3003806 · 2022-08-15T21:05:10.000+08:00
切记内存里和实际的xy轴相反！！
diff --git a/agent.py b/agent.py
@@ -44,15 +44,17 @@ def learn(self, rep, step_t):
         :param step_t:
         :return:
         """
-        s, a, a_log_prob, r, s_, dw, done = rep.get_data()
+        s, a, a_log_prob, r, s_, done = rep.get_data()
 
         # 利用GAE计算优势函数
         adv = []
         gae = 0
+        s = s.to(self.device)
+        s_ = s_.to(self.device)
         with torch.no_grad():  # 不需要梯度
             vs = self.v(s)
             vs_ = self.v(s_)
-            deltas = r + self.gamma * (1.0 - dw) * vs_ - vs
+            deltas = r + self.gamma * (1.0 - done) * vs_ - vs
             for delta, d in zip(reversed(deltas.flatten().numpy()), reversed(done.flatten().numpy())):
                 gae = delta + self.gamma * self.lamda * gae * (1.0 - d)
                 adv.insert(0, gae)
diff --git a/bot_div/game.py b/bot_div/game.py
@@ -42,32 +42,32 @@ def get_map_from_env(self):
         game_map = self.get_tensor_map(self.bot_color)
         map_size = game_map.shape[1]
         game_map = game_map.long().tolist()
-        for i in range(map_size):
-            for j in range(map_size):
-                if game_map[1][i][j] == BlockType.city:
-                    if game_map[2][i][j] == PlayerColor.grey:
+        for i in range(1, map_size + 1):
+            for j in range(1, map_size + 1):
+                if game_map[1][i - 1][j - 1] == BlockType.city:
+                    if game_map[2][i - 1][j - 1] == PlayerColor.grey:
                         self.mp.mp[i][j].type = 'empty-city'
                     else:
                         self.mp.mp[i][j].type = 'city'
-                elif game_map[1][i][j] == BlockType.crown:
+                elif game_map[1][i - 1][j - 1] == BlockType.crown:
                     self.mp.mp[i][j].type = 'general'
-                elif game_map[1][i][j] == BlockType.mountain or game_map[1][i][j] == BlockType.obstacle:
+                elif game_map[1][i - 1][j - 1] == BlockType.mountain or game_map[1][i - 1][j - 1] == BlockType.obstacle:
                     self.mp.mp[i][j].type = 'mountain'
-                elif game_map[1][i][j] == BlockType.road and game_map[2][i][j] == PlayerColor.grey\
-                        and game_map[3][i][j] != 0:
-                    if game_map[2][i][j] == PlayerColor.grey:
+                elif game_map[1][i - 1][j - 1] == BlockType.road and game_map[2][i - 1][j - 1] == PlayerColor.grey\
+                        and game_map[3][i - 1][j - 1] != 0:
+                    if game_map[2][i - 1][j - 1] == PlayerColor.grey:
                         self.mp.mp[i][j].type = 'empty'
                     else:
                         self.mp.mp[i][j].type = 'land'
                 else:
                     self.mp.mp[i][j].type = 'unknown'
 
-                if game_map[2][i][j] == self.bot_color:
+                if game_map[2][i - 1][j - 1] == self.bot_color:
                     self.mp.mp[i][j].belong = 1
                 else:
                     self.mp.mp[i][j].belong = 0
 
-                self.mp.mp[i][j].amount = int(game_map[0][i][j])
+                self.mp.mp[i][j].amount = int(game_map[0][i - 1][j - 1])
 
                 if self.mp.mp[i][j].belong != 1:
                     self.mp.mp[i][j].cost = max(self.mp.mp[i][j].amount, 2)
@@ -77,7 +77,6 @@ def get_map_from_env(self):
     def pre(self):  # 预处理地图
         tmp = self.mp.find_match(lambda a: a.type == 'general' and a.belong == 1)
         if len(tmp) != 1:
-            print("")
             return 1
         self.home_x = tmp[0][0]
         self.home_y = tmp[0][1]
@@ -331,6 +330,7 @@ def flush_movements(self):  # 更新移动
         is_half = 0
         if 'Z' in self.movements:
             is_half = 1
+            self.movements.remove('Z')
         tmp = self.mp.mp[self.home_x][self.home_y].amount
         cur_movement = self.movements[0]
         while isinstance(cur_movement, list) or isinstance(cur_movement, tuple):
@@ -339,9 +339,8 @@ def flush_movements(self):  # 更新移动
             if not self.movements:
                 return
             cur_movement = self.movements[0]
-        act = [self.cur_x, self.cur_y, self.cur_x + directions[cur_movement][0],
-               self.cur_y + directions[cur_movement][1], is_half]
-        self.send_action(self.bot_color, act)
+        x_old = self.cur_x
+        y_old = self.cur_y
         if self.movements[0] == 'W':
             self.cur_x -= 1
         elif self.movements[0] == 'S':
@@ -350,6 +349,8 @@ def flush_movements(self):  # 更新移动
             self.cur_y -= 1
         elif self.movements[0] == 'D':
             self.cur_y += 1
+        act = [x_old, y_old, self.cur_x, self.cur_y, is_half]
+        self.send_action(self.bot_color, act)
         self.movements.pop(0)
         self.get_map_from_env()
         self.update_map()
@@ -362,7 +363,6 @@ def bot_move(self):  # 主循环，每回合执行一次
             if self.pre() == 1:
                 return
         if len(self.movements):
-            print("flushed")
             self.flush_movements()
             return
         if [self.cur_x, self.cur_y] not in self.vis:
diff --git a/const.py b/const.py
@@ -74,7 +74,7 @@ def __init__(self):
         self.__indexes = {20: self.__20index, 19: self.__19index, 10: self.__10index, 9: self.__9index}
 
     def _generate(self, size):
-        action = [torch.Tensor([[0, 0, 0, 0, 0]])]
+        action = [torch.Tensor([[-1, -1, -1, -1, -1]])]
         index = []
         for _ in range(size + 1):
             index.append([])
diff --git a/main.py b/main.py
@@ -77,20 +77,3 @@ def main(offline_train=True):
 
 if __name__ == '__main__':
     main()
-
-"""
-Traceback (most recent call last):
-  File "D:/MyFiles/LearningBot/main.py", line 79, in <module>
-    main()
-  File "D:/MyFiles/LearningBot/main.py", line 54, in main
-    s_, r, done, _ = env.step(a)
-  File "D:\MyFiles\LearningBot\offsite_env.py", line 85, in step
-    self.execute_actions(action[0].long())
-  File "D:\MyFiles\LearningBot\offsite_env.py", line 156, in execute_actions
-    self.internal_bots[cur_color].bot_move()
-  File "D:\MyFiles\LearningBot\bot_div\game.py", line 365, in bot_move
-    self.flush_movements()
-  File "D:\MyFiles\LearningBot\bot_div\game.py", line 341, in flush_movements
-    act = [self.cur_x, self.cur_y, self.cur_x + directions[cur_movement][0],
-KeyError: 'Z'
-"""
diff --git a/offsite_env.py b/offsite_env.py
@@ -87,16 +87,24 @@ def step(self, action: torch.Tensor):
         # 生成observation
         obs = self.gen_observation()
 
-        # 计算奖励
+        # 只有动作都执行完才需要检查游戏是否结束
         reward = 0
-        # 如果是第一回合 直接return
-        if self.action_history.qsize() < 1:
-            return obs, reward, False, {}
+        w_state = self.win_check()
+        if w_state != 0:
+            reward += 300 if w_state == 2 else -300
+            return obs, reward, True, {}
+
         # 计算上一步的奖励
         _dirx = [0, -1, 0, 1, 1, -1, 1, -1]
         _diry = [-1, 0, 1, 0, 1, -1, -1, 1]
+        # 如果是第一回合 直接return
+        if self.action_history.qsize() < 1:
+            return obs, reward, False, {}
         last_move = self.action_history.queue[-1]
         last_obs = self.obs_history.queue[-1]
+        # 如果动作为空
+        if last_move[0] < 0:
+            return obs, reward, False, {}
         # 无效移动扣大分
         if last_obs[2][last_move[0] - 1][last_move[1] - 1] != self._get_colormark(self.learningbot_color):
             reward -= 100
@@ -126,11 +134,6 @@ def step(self, action: torch.Tensor):
             self.obs_history.get()
         self.obs_history.put(self.get_view_of(self.learningbot_color))
 
-        # 只有动作都执行完才需要检查游戏是否结束
-        w_state = self.win_check()
-        if w_state != 0:
-            reward += 300 if w_state == 2 else -300
-            return obs, reward, True, {}
         return obs, reward, False, {}
 
     def render(self, mode="human"):
@@ -142,7 +145,7 @@ def render(self, mode="human"):
         if mode == "human":
             print_tensor_map(self.map)
             print(f"round: {self.round}")
-            time.sleep(1)
+            time.sleep(0.65)
 
     def execute_actions(self, action: torch.Tensor):
         """
@@ -155,22 +158,35 @@ def execute_actions(self, action: torch.Tensor):
             cur_color = self.internal_bots_color[i]
             self.internal_bots[cur_color].bot_move()
             if not self.actions_now[cur_color]:
+                print(f"bot {cur_color} empty move")
                 continue
             cur_action = self.actions_now[cur_color]
-            print(cur_action)
-            f_amount = int(self.map[0][cur_action[0]][cur_action[1]])
-            if cur_action[4] == 1:
-                mov_troop = math.ceil((f_amount + 0.5) / 2) - 1
-            else:
-                mov_troop = f_amount - 1
-            self.combine((cur_action[0], cur_action[1]), (cur_action[2], cur_action[3]), mov_troop)
+            # 如果移动超界了 直接下一个
+            skip = False
+            for j in range(4):
+                if not 0 <= cur_action[j] < self.map_size:
+                    skip = True
+                    break
+            if skip:
+                print(f"skipped: {cur_action}")
+                continue
+
+            print(f"internal bot color {cur_color}: {cur_action}")
+            # 检查动作是否合法
+            if cur_action[0] >= 0 and self.map[2][cur_action[0]][cur_action[1]] == cur_color:
+                f_amount = int(self.map[0][cur_action[0]][cur_action[1]])
+                if cur_action[4] == 1:
+                    mov_troop = math.ceil((f_amount + 0.5) / 2) - 1
+                else:
+                    mov_troop = f_amount - 1
+                self.combine((cur_action[0], cur_action[1]), (cur_action[2], cur_action[3]), mov_troop)
 
         # 处理LearningBot动作
         act = self.at.i_to_a(self.map_size, int(action))[0].long()
         act -= 1
         act[4] += 1
         # 检查动作是否合法
-        if self.map[2][act[0]][act[1]] == self.learningbot_color:
+        if act[0] >= 0 and self.map[2][act[0]][act[1]] == self.learningbot_color:
             f_amount = int(self.map[0][act[0]][act[1]])
             if act[4] == 1:
                 mov_troop = math.ceil((f_amount + 0.5) / 2) - 1
@@ -197,6 +213,10 @@ def combine(self, b1: tuple, b2: tuple, cnt):
             "color": int(self.map[2][b2[0]][b2[1]])
         }
 
+        # 地形特判
+        if t["type"] == BlockType.mountain:
+            return
+
         if t["color"] == f["color"]:
             t["amount"] += cnt
             f["amount"] -= cnt
@@ -213,7 +233,7 @@ def combine(self, b1: tuple, b2: tuple, cnt):
                                 if self.map[2][i][j] == BlockType.crown:
                                     self.map[2][i][j] = BlockType.city
                 t["color"] = f["color"]
-                t["amount"] = f["amount"]
+                t["amount"] = -t["amount"]
 
         # 赋值回去
         self.map[0][b1[0]][b1[1]] = f["amount"]
@@ -303,9 +323,13 @@ def bot_action_upd(self, color, action):
         :param action: [x1, y1, x2, y2, is_half]
         :return:
         """
-        print(f"internal bot color {color}: {action}")
         if action:
-            self.actions_now[color] = action
+            self.actions_now[color] = [None, None, None, None, None]
+            self.actions_now[color][0] = action[0] - 1
+            self.actions_now[color][1] = action[1] - 1
+            self.actions_now[color][2] = action[2] - 1
+            self.actions_now[color][3] = action[3] - 1
+            self.actions_now[color][4] = action[4]
         else:
             self.actions_now[color] = False