@@ -87,16 +87,24 @@ def step(self, action: torch.Tensor):
87
87
# 生成observation
88
88
obs = self .gen_observation ()
89
89
90
- # 计算奖励
90
+ # 只有动作都执行完才需要检查游戏是否结束
91
91
reward = 0
92
- # 如果是第一回合 直接return
93
- if self .action_history .qsize () < 1 :
94
- return obs , reward , False , {}
92
+ w_state = self .win_check ()
93
+ if w_state != 0 :
94
+ reward += 300 if w_state == 2 else - 300
95
+ return obs , reward , True , {}
96
+
95
97
# 计算上一步的奖励
96
98
_dirx = [0 , - 1 , 0 , 1 , 1 , - 1 , 1 , - 1 ]
97
99
_diry = [- 1 , 0 , 1 , 0 , 1 , - 1 , - 1 , 1 ]
100
+ # 如果是第一回合 直接return
101
+ if self .action_history .qsize () < 1 :
102
+ return obs , reward , False , {}
98
103
last_move = self .action_history .queue [- 1 ]
99
104
last_obs = self .obs_history .queue [- 1 ]
105
+ # 如果动作为空
106
+ if last_move [0 ] < 0 :
107
+ return obs , reward , False , {}
100
108
# 无效移动扣大分
101
109
if last_obs [2 ][last_move [0 ] - 1 ][last_move [1 ] - 1 ] != self ._get_colormark (self .learningbot_color ):
102
110
reward -= 100
@@ -126,11 +134,6 @@ def step(self, action: torch.Tensor):
126
134
self .obs_history .get ()
127
135
self .obs_history .put (self .get_view_of (self .learningbot_color ))
128
136
129
- # 只有动作都执行完才需要检查游戏是否结束
130
- w_state = self .win_check ()
131
- if w_state != 0 :
132
- reward += 300 if w_state == 2 else - 300
133
- return obs , reward , True , {}
134
137
return obs , reward , False , {}
135
138
136
139
def render (self , mode = "human" ):
@@ -142,7 +145,7 @@ def render(self, mode="human"):
142
145
if mode == "human" :
143
146
print_tensor_map (self .map )
144
147
print (f"round: { self .round } " )
145
- time .sleep (1 )
148
+ time .sleep (0.65 )
146
149
147
150
def execute_actions (self , action : torch .Tensor ):
148
151
"""
@@ -155,22 +158,35 @@ def execute_actions(self, action: torch.Tensor):
155
158
cur_color = self .internal_bots_color [i ]
156
159
self .internal_bots [cur_color ].bot_move ()
157
160
if not self .actions_now [cur_color ]:
161
+ print (f"bot { cur_color } empty move" )
158
162
continue
159
163
cur_action = self .actions_now [cur_color ]
160
- print (cur_action )
161
- f_amount = int (self .map [0 ][cur_action [0 ]][cur_action [1 ]])
162
- if cur_action [4 ] == 1 :
163
- mov_troop = math .ceil ((f_amount + 0.5 ) / 2 ) - 1
164
- else :
165
- mov_troop = f_amount - 1
166
- self .combine ((cur_action [0 ], cur_action [1 ]), (cur_action [2 ], cur_action [3 ]), mov_troop )
164
+ # 如果移动超界了 直接下一个
165
+ skip = False
166
+ for j in range (4 ):
167
+ if not 0 <= cur_action [j ] < self .map_size :
168
+ skip = True
169
+ break
170
+ if skip :
171
+ print (f"skipped: { cur_action } " )
172
+ continue
173
+
174
+ print (f"internal bot color { cur_color } : { cur_action } " )
175
+ # 检查动作是否合法
176
+ if cur_action [0 ] >= 0 and self .map [2 ][cur_action [0 ]][cur_action [1 ]] == cur_color :
177
+ f_amount = int (self .map [0 ][cur_action [0 ]][cur_action [1 ]])
178
+ if cur_action [4 ] == 1 :
179
+ mov_troop = math .ceil ((f_amount + 0.5 ) / 2 ) - 1
180
+ else :
181
+ mov_troop = f_amount - 1
182
+ self .combine ((cur_action [0 ], cur_action [1 ]), (cur_action [2 ], cur_action [3 ]), mov_troop )
167
183
168
184
# 处理LearningBot动作
169
185
act = self .at .i_to_a (self .map_size , int (action ))[0 ].long ()
170
186
act -= 1
171
187
act [4 ] += 1
172
188
# 检查动作是否合法
173
- if self .map [2 ][act [0 ]][act [1 ]] == self .learningbot_color :
189
+ if act [ 0 ] >= 0 and self .map [2 ][act [0 ]][act [1 ]] == self .learningbot_color :
174
190
f_amount = int (self .map [0 ][act [0 ]][act [1 ]])
175
191
if act [4 ] == 1 :
176
192
mov_troop = math .ceil ((f_amount + 0.5 ) / 2 ) - 1
@@ -197,6 +213,10 @@ def combine(self, b1: tuple, b2: tuple, cnt):
197
213
"color" : int (self .map [2 ][b2 [0 ]][b2 [1 ]])
198
214
}
199
215
216
+ # 地形特判
217
+ if t ["type" ] == BlockType .mountain :
218
+ return
219
+
200
220
if t ["color" ] == f ["color" ]:
201
221
t ["amount" ] += cnt
202
222
f ["amount" ] -= cnt
@@ -213,7 +233,7 @@ def combine(self, b1: tuple, b2: tuple, cnt):
213
233
if self .map [2 ][i ][j ] == BlockType .crown :
214
234
self .map [2 ][i ][j ] = BlockType .city
215
235
t ["color" ] = f ["color" ]
216
- t ["amount" ] = f ["amount" ]
236
+ t ["amount" ] = - t ["amount" ]
217
237
218
238
# 赋值回去
219
239
self .map [0 ][b1 [0 ]][b1 [1 ]] = f ["amount" ]
@@ -303,9 +323,13 @@ def bot_action_upd(self, color, action):
303
323
:param action: [x1, y1, x2, y2, is_half]
304
324
:return:
305
325
"""
306
- print (f"internal bot color { color } : { action } " )
307
326
if action :
308
- self .actions_now [color ] = action
327
+ self .actions_now [color ] = [None , None , None , None , None ]
328
+ self .actions_now [color ][0 ] = action [0 ] - 1
329
+ self .actions_now [color ][1 ] = action [1 ] - 1
330
+ self .actions_now [color ][2 ] = action [2 ] - 1
331
+ self .actions_now [color ][3 ] = action [3 ] - 1
332
+ self .actions_now [color ][4 ] = action [4 ]
309
333
else :
310
334
self .actions_now [color ] = False
311
335
0 commit comments