c2d08y
diff --git a/‎agent.py
+1-1 b/‎agent.py
+1-1
diff --git a/‎bot_div/game.py
+15-13 b/‎bot_div/game.py
+15-13
diff --git a/‎main.py
+16-14 b/‎main.py
+16-14
diff --git a/‎normalization.py
+11-15 b/‎normalization.py
+11-15
@@ -44,7 +44,7 @@ def learn(self, rep, step_t):
         :param step_t:
         :return:
         """
-        s, a, a_log_prob, r, s_, dw, done = rep.to_tensor()
+        s, a, a_log_prob, r, s_, dw, done = rep.get_data()
 
         # 利用GAE计算优势函数
         adv = []
 
@@ -14,7 +14,7 @@ def __init__(self, size):
 
 
 class Game(object):
-    def __init__(self, map_upd_func, action_upd_func):
+    def __init__(self, bot_color, map_upd_func, action_upd_func):
         self.driver = None
         self.mp = Map()
         self.players = []
@@ -29,17 +29,19 @@ def __init__(self, map_upd_func, action_upd_func):
         self.useless = []
         self.land_before = []
         self.defending = False
-        self.color = None
+        self.bot_color = bot_color
         self.get_tensor_map = map_upd_func
         self.send_action = action_upd_func
+        self.is_half = False
 
     def get_map_from_env(self):
         """
         get map from environment
         :return:
         """
-        game_map = self.get_tensor_map(self.color)
+        game_map = self.get_tensor_map(self.bot_color)
         map_size = game_map.shape[1]
+        game_map = game_map.long().tolist()
         for i in range(map_size):
             for j in range(map_size):
                 if game_map[1][i][j] == BlockType.city:
@@ -51,15 +53,16 @@ def get_map_from_env(self):
                     self.mp.mp[i][j].type = 'general'
                 elif game_map[1][i][j] == BlockType.mountain or game_map[1][i][j] == BlockType.obstacle:
                     self.mp.mp[i][j].type = 'mountain'
-                elif game_map[1][i][j] == BlockType.road and game_map[2][i][j] == PlayerColor.grey:
+                elif game_map[1][i][j] == BlockType.road and game_map[2][i][j] == PlayerColor.grey\
+                        and game_map[3][i][j] != 0:
                     if game_map[2][i][j] == PlayerColor.grey:
                         self.mp.mp[i][j].type = 'empty'
                     else:
                         self.mp.mp[i][j].type = 'land'
                 else:
                     self.mp.mp[i][j].type = 'unknown'
 
-                if game_map[2][i][j] == self.color:
+                if game_map[2][i][j] == self.bot_color:
                     self.mp.mp[i][j].belong = 1
                 else:
                     self.mp.mp[i][j].belong = 0
@@ -71,12 +74,10 @@ def get_map_from_env(self):
                 else:
                     self.mp.mp[i][j].cost = 1
 
-    def set_color(self, color):
-        self.color = color
-
     def pre(self):  # 预处理地图
         tmp = self.mp.find_match(lambda a: a.type == 'general' and a.belong == 1)
         if len(tmp) != 1:
+            print("")
             return 1
         self.home_x = tmp[0][0]
         self.home_y = tmp[0][1]
@@ -340,7 +341,7 @@ def flush_movements(self):  # 更新移动
             cur_movement = self.movements[0]
         act = [self.cur_x, self.cur_y, self.cur_x + directions[cur_movement][0],
                self.cur_y + directions[cur_movement][1], is_half]
-        self.send_action(self.color, act)
+        self.send_action(self.bot_color, act)
         if self.movements[0] == 'W':
             self.cur_x -= 1
         elif self.movements[0] == 'S':
@@ -350,17 +351,18 @@ def flush_movements(self):  # 更新移动
         elif self.movements[0] == 'D':
             self.cur_y += 1
         self.movements.pop(0)
-        self.get_tensor_map()
+        self.get_map_from_env()
         self.update_map()
-        self.get_tensor_map()
+        self.get_map_from_env()
         return
 
     def bot_move(self):  # 主循环，每回合执行一次
-        self.get_tensor_map()
+        self.get_map_from_env()
         if not self.is_pre:
             if self.pre() == 1:
                 return
-        if self.movements:
+        if len(self.movements):
+            print("flushed")
             self.flush_movements()
             return
         if [self.cur_x, self.cur_y] not in self.vis:
 
@@ -18,11 +18,11 @@ def main(offline_train=True):
         "action_dim": 5,
         "lr_a": 0.01,
         "lr_c": 0.01,
-        "gamma": None,
-        "lambda": None,
-        "epsilon": None,
-        "k_epochs": None,
-        "entropy_coef": None,
+        "gamma": 0.99,
+        "lambda": 0.95,
+        "epsilon": 0.2,
+        "k_epochs": 10,
+        "entropy_coef": 0.01,
         "autosave_step": 107,
         "device": device
     }
@@ -53,6 +53,8 @@ def main(offline_train=True):
             a, a_log_prob = agent.predict(s)
             s_, r, done, _ = env.step(a)
 
+            env.render("human")
+
             s_ = state_norm(s_).to(device)
             r = reward_scaling(r)
 
@@ -78,17 +80,17 @@ def main(offline_train=True):
 
 """
 Traceback (most recent call last):
-  File "D:/MyFiles/LearningBot/main.py", line 77, in <module>
+  File "D:/MyFiles/LearningBot/main.py", line 79, in <module>
     main()
   File "D:/MyFiles/LearningBot/main.py", line 54, in main
     s_, r, done, _ = env.step(a)
-  File "D:\MyFiles\LearningBot\offsite_env.py", line 86, in step
+  File "D:\MyFiles\LearningBot\offsite_env.py", line 85, in step
     self.execute_actions(action[0].long())
-  File "D:\MyFiles\LearningBot\offsite_env.py", line 137, in execute_actions
-    self.combine((act[0], act[1]), (act[2], act[3]), mov_troop)
-  File "D:\MyFiles\LearningBot\offsite_env.py", line 153, in combine
-    "amount": int(self.map[0][b2[0]][b2[1]]),
-IndexError: index 20 is out of bounds for dimension 0 with size 20
-
-Process finished with exit code 1
+  File "D:\MyFiles\LearningBot\offsite_env.py", line 156, in execute_actions
+    self.internal_bots[cur_color].bot_move()
+  File "D:\MyFiles\LearningBot\bot_div\game.py", line 365, in bot_move
+    self.flush_movements()
+  File "D:\MyFiles\LearningBot\bot_div\game.py", line 341, in flush_movements
+    act = [self.cur_x, self.cur_y, self.cur_x + directions[cur_movement][0],
+KeyError: 'Z'
 """
@@ -1,25 +1,26 @@
-import numpy as np
+import copy
+import torch
 
 
 class RunningMeanStd(object):
 
     def __init__(self, shape):
         self.n = 0
-        self.mean = np.zeros(shape)
-        self.S = np.zeros(shape)
-        self.std = np.sqrt(self.S)
+        self.mean = torch.zeros(shape)
+        self.S = torch.zeros(shape)
+        self.std = torch.sqrt(self.S)
 
     def update(self, x):
-        x = np.array(x)
+        x = torch.tensor(x)
         self.n += 1
         if self.n == 1:
             self.mean = x
             self.std = x
         else:
-            old_mean = self.mean.copy()
+            old_mean = copy.copy(self.mean)
             self.mean = old_mean + (x - old_mean) / self.n
             self.S = self.S + (x - old_mean) * (x - self.mean)
-            self.std = np.sqrt(self.S / self.n)
+            self.std = torch.sqrt(self.S / self.n)
 
 
 class Normalization(object):
@@ -40,18 +41,13 @@ def __init__(self, shape, gamma):
         self.shape = shape
         self.gamma = gamma
         self.running_ms = RunningMeanStd(shape=self.shape)
-        self.R = np.zeros(self.shape)
+        self.R = torch.zeros(self.shape)
 
     def __call__(self, x):
-        try:
-            self.R = self.gamma * self.R + x
-        except TypeError:
-            print(self.R)
-            print(self.gamma)
-            exit(1)
+        self.R = self.gamma * self.R + x
         self.running_ms.update(self.R)
         x = x / (self.running_ms.std + 1e-8)
         return x
 
     def reset(self):
-        self.R = np.zeros(self.shape)
+        self.R = torch.zeros(self.shape)