Update behavior_policy.py

wuxiyang1996 · web-flow · commit 7342a89a9fd1 · 2025-03-03T02:25:12.000-05:00
diff --git a/nova/behavior_policy.py b/nova/behavior_policy.py
@@ -142,7 +142,11 @@ def learn(self, batch, t_env):
             agent_history = agent_history.reshape(n_thread, num_history, self.max_history_len, max_vehicle_num, obs_dim)
 
             cut_len = (num_history - 1) * self.max_history_len
-            mask_cut = agent_terminate[:, :cut_len, i, 0]
+
+            if self.args.env == "MPE":
+                mask_cut = 1 - agent_terminate[:, :cut_len, i, 0]
+            else:
+                mask_cut = agent_terminate[:, :cut_len, i, 0]
 
             # Mask over given episode
             mask = mask_cut.reshape(n_thread, cut_len, 1, 1).tile(1, 1, max_vehicle_num, obs_dim)