Source code for mate.wrappers.repeated_reward_individual_done

# pylint: disable=missing-module-docstring

from typing import List, Tuple, Union

import gym
import numpy as np

from mate.utils import Team

# pylint: disable-next=cyclic-import
from mate.wrappers.typing import (
    MultiAgentEnvironmentType,
    WrapperMeta,
    assert_multi_agent_environment,
)


[docs]class RepeatedRewardIndividualDone(gym.Wrapper, metaclass=WrapperMeta): """Repeat the reward field and assign individual done field of step(), which is similar to the OpenAI Multi-Agent Particle Environment. (Not used in the evaluation script.) """ def __init__(self, env: MultiAgentEnvironmentType, target_done_at_destination=False) -> None: assert_multi_agent_environment(env) assert not isinstance( env, RepeatedRewardIndividualDone ), f'You should not use wrapper `{self.__class__}` more than once. Got env = {env}.' super().__init__(env) self.target_done_at_destination = target_done_at_destination # pylint: disable-next=import-outside-toplevel,cyclic-import from mate.wrappers.single_team import SingleTeamHelper self.single_team = isinstance(env, SingleTeamHelper)
[docs] def step( self, action: Union[Tuple[np.ndarray, np.ndarray], np.ndarray] ) -> Union[ Tuple[ Tuple[np.ndarray, np.ndarray], Tuple[List[float], List[float]], Tuple[List[bool], List[bool]], Tuple[List[dict], List[dict]], ], Tuple[np.ndarray, List[float], List[bool], List[dict]], ]: observation, reward, done, info = self.env.step(action) if self.target_done_at_destination: target_dones = self.target_dones.tolist() else: target_dones = [done] * self.num_targets if self.single_team: reward = [reward] * self.num_teammates if self.team is Team.TARGET: done = target_dones else: done = [done] * self.num_teammates else: camera_team_reward, target_team_reward = reward reward = ( [camera_team_reward] * self.num_cameras, [target_team_reward] * self.num_targets, ) done = ([done] * self.num_cameras, target_dones) return observation, reward, done, info