Source code for mate.agents.heuristic

"""Built-in heuristic rule-based agents."""

from collections import defaultdict
from functools import lru_cache

import numpy as np

from mate.agents.base import CameraAgentBase
from mate.agents.greedy import GreedyTargetAgent
from mate.constants import MAX_CAMERA_VIEWING_ANGLE
from mate.utils import Vector2D, normalize_angle, polar2cartesian, sin_deg


__all__ = ['HeuristicCameraAgent', 'HeuristicTargetAgent']


[docs]class HeuristicCameraAgent(CameraAgentBase):  # pylint: disable=too-many-instance-attributes
    """Heuristic Camera Agent

    Greedily maximizes the heuristic scores as much as possible.
    All agents send their observations to the centralized controller (agent 0).
    Then the central controller sends the goal state (camera pose) to each agent.
    """

[docs]    def __init__(self, seed=None):
        """Initialize the agent.
        This function will be called only once on initialization.
        """

        super().__init__(seed=seed)

        self.controller_index = 0
        self.scores = None
        self.state_mesh = None
        self.coord_grid = None
        self.camera_states = None
        self.joint_observation = None
        self.joint_goal_state = None
        self.prev_action = self.DEFAULT_ACTION

[docs]    def reset(self, observation):
        """Reset the agent.
        This function will be called immediately after env.reset().
        """

        super().reset(observation)

        results = self.calculate_scores(
            round(float(self.state.max_sight_range), 8),
            round(float(self.state.min_viewing_angle), 8),
        )
        self.state_mesh, self.coord_grid, self.scores = results

        self.camera_states = None
        self.joint_observation = None
        self.joint_goal_state = None
        self.prev_action = self.DEFAULT_ACTION

[docs]    def act(self, observation, info=None, deterministic=None):
        """Get the agent action by the observation.
        This function will be called before every env.step().

        Arbitrarily track the nearest target.
        If no target found, use previous action or generate a new random action.
        """

        if self.index == self.controller_index:
            goal_state = self.joint_goal_state[self.index]
        else:
            try:
                goal_state = self.last_responses[-1].content
            except IndexError:
                target_states, tracked_bits = self.get_all_opponent_states(self.last_observation)
                target_states = [target_states[t] for t in np.flatnonzero(tracked_bits)]
                if len(target_states) > 0:
                    goal_state = self.get_joint_goal_state([self.state], target_states)[self.index]
                else:
                    goal_state = (None, None)

        if None not in goal_state:
            goal_orientation, goal_viewing_angle = goal_state
            action = np.asarray(
                [
                    normalize_angle(goal_orientation - self.state.orientation),
                    goal_viewing_angle - self.state.viewing_angle,
                ]
            ).clip(min=self.action_space.low, max=self.action_space.high)
        else:
            if self.np_random.binomial(1, 0.1) != 0:
                action = self.action_space.sample()
            else:
                action = self.prev_action

        self.prev_action = action
        return action

[docs]    def send_requests(self):
        """Prepare messages to communicate with other agents in the same team.
        This function will be called after observe() but before receive_requests().
        """

        if self.index == self.controller_index:
            return []

        request = self.pack_message(content=self.last_observation, recipient=self.controller_index)

        return [request]

[docs]    def receive_requests(self, messages):
        """Receive messages from other agents in the same team.
        This function will be called after send_requests().
        """

        self.last_requests = tuple(messages)

        if self.index != self.controller_index:
            return

        self.joint_observation = {self.controller_index: self.last_observation}
        for message in self.last_requests:
            self.joint_observation[message.sender] = message.content

        self.camera_states = {}
        target_states = {}
        unsensed_targets = set(range(self.num_targets))
        for c, observation in self.joint_observation.items():
            camera_state = self.STATE_CLASS(
                observation[self.observation_slices['self_state']], index=c
            )
            self.camera_states[c] = camera_state

            for t in tuple(unsensed_targets):
                target_state, sensed = self.get_opponent_state(observation, index=t)
                if sensed:
                    target_states[t] = target_state
                    unsensed_targets.remove(t)

        target_states = list(target_states.values())

        self.joint_goal_state = self.get_joint_goal_state(
            list(self.camera_states.values()), target_states
        )

[docs]    def send_responses(self):
        """Prepare messages to communicate with other agents in the same team.
        This function will be called after receive_requests().
        """

        if self.index != self.controller_index:
            return []

        responses = []
        for c, goal_state in self.joint_goal_state.items():
            if c == self.index:
                continue
            message = self.pack_message(content=goal_state, recipient=c)
            responses.append(message)

        return responses

[docs]    def receive_responses(self, messages):
        """Receive messages from other agents in the same team.
        This function will be called after send_responses() but before act().
        """

        self.last_responses = tuple(messages)

[docs]    def get_joint_goal_state(self, camera_states, target_states):  # pylint: disable=too-many-locals
        """Greedily track the targets as many as possible."""

        joint_scores = []
        joint_tracked_bits = []
        num_within_range_targets = []
        for camera_state in camera_states:
            within_range_targets = [
                ts
                for ts in target_states
                if (ts - camera_state).norm <= camera_state.max_sight_range
            ]
            num_within_range_targets.append(len(within_range_targets))

            scores = np.zeros(self.scores.shape[-1], dtype=np.float64)
            tracked_bits = np.zeros((self.scores.shape[-1], self.num_targets), dtype=np.bool8)
            for target_state in within_range_targets:
                direction = target_state.location - camera_state.location
                index = np.argmin(np.linalg.norm(direction - self.coord_grid, axis=-1), axis=-1)
                tracked_bits[self.scores[index, :] > 0, target_state.index] = True
                scores += self.scores[index, :]

            joint_scores.append(scores)
            joint_tracked_bits.append(tracked_bits)

        permutations = []
        for _ in range(32):
            permutation = self.np_random.permutation(range(len(camera_states)))
            indices = []
            current_tracked_bits = np.zeros((self.num_targets,), dtype=np.bool8)
            total_scores = 0
            total_cost = 0
            for c in permutation:
                camera_state, scores, tracked_bits = (
                    camera_states[c],
                    joint_scores[c],
                    joint_tracked_bits[c],
                )
                untracked_bits = np.logical_and(tracked_bits, np.logical_not(current_tracked_bits))
                index = np.argmax(scores + untracked_bits.sum(axis=-1))

                state_diff = np.abs(
                    self.state_mesh[index, :2]
                    - np.array([camera_state.orientation, camera_state.viewing_angle])
                )
                cost = (state_diff / self.action_space.high).max()

                current_tracked_bits = np.logical_or(current_tracked_bits, tracked_bits[index])
                total_scores = total_scores + scores[index]
                total_cost += cost

                indices.append(index)

            total_scores += current_tracked_bits.sum()
            permutations.append((total_scores, -total_cost, tuple(permutation), tuple(indices)))

        _, _, best_permutation, best_indices = max(permutations)
        joint_goal_state = defaultdict(lambda: (None, None))
        for c, index in zip(best_permutation, best_indices):
            if num_within_range_targets[c] > 0:
                goal_orientation, goal_viewing_angle, _ = self.state_mesh[index]
                joint_goal_state[camera_states[c].index] = (goal_orientation, goal_viewing_angle)

        return joint_goal_state

[docs]    @staticmethod
    @lru_cache(maxsize=None)
    def calculate_scores(max_sight_range, min_viewing_angle):  # pylint: disable=too-many-locals
        """Calculates the coordinate grid and the weighted scores."""

        state_mesh = np.stack(
            np.meshgrid(
                np.linspace(start=-180.0, stop=+180.0, num=36, endpoint=False),
                np.linspace(
                    start=min_viewing_angle, stop=MAX_CAMERA_VIEWING_ANGLE, num=21, endpoint=True
                ),
            ),
            axis=-1,
        ).reshape(-1, 2)
        sight_ranges = max_sight_range * np.sqrt(min_viewing_angle / state_mesh[..., 1])
        state_mesh = np.hstack([state_mesh, sight_ranges[:, np.newaxis]])
        rho, phi = (
            np.stack(
                np.meshgrid(
                    np.linspace(start=0.0, stop=max_sight_range, num=41, endpoint=True),
                    np.linspace(start=-180.0, stop=+180.0, num=72, endpoint=False),
                ),
                axis=-1,
            )
            .reshape(-1, 2)
            .transpose()
        )
        coord_grid = polar2cartesian(rho, phi).transpose()

        scores = np.zeros((len(coord_grid), len(state_mesh)), dtype=np.float64)
        # pylint: disable-next=invalid-name
        for s, (orientation, viewing_angle, sight_range) in enumerate(state_mesh):
            half_viewing_angle = viewing_angle / 2.0
            if viewing_angle < 180.0:
                dist_max = sight_range / (1.0 + 1.0 / sin_deg(half_viewing_angle))
            else:
                dist_max = sight_range / 2.0

            delta_angle = np.abs(normalize_angle(phi - orientation))
            within_range = np.logical_and(rho <= sight_range, delta_angle <= half_viewing_angle)

            dist2boundary1 = np.minimum(rho, sight_range - rho)
            dist2boundary2 = rho * sin_deg(np.minimum(half_viewing_angle - delta_angle, 90.0))
            dist2boundary = np.maximum(np.minimum(dist2boundary1, dist2boundary2), 0.0)

            # Target at the incenter:     score1 -> 1.0
            # Target at the boundary:     score1 -> 0.0
            # Target in the sector:       score1 -> 0.0 ~ 1.0
            # Target out of the boundary: score1 -> 0.0
            scores1 = dist2boundary[within_range] / dist_max
            scores2 = 1.0 - rho[within_range] / sight_range

            scores[within_range, s] = scores1 * scores2

        return state_mesh, coord_grid, scores


[docs]class HeuristicTargetAgent(GreedyTargetAgent):
    """Heuristic Target Agent

    Arbitrarily runs towards the destination (desired warehouse) with some noise.
    In addition to the greedy target agent, a drift speed is added, which escapes
    away from the center of cameras' field of view.
    """

[docs]    def act(self, observation, info=None, deterministic=None):  # pylint: disable=too-many-locals
        action = super().act(observation, info, deterministic=deterministic)

        camera_states, sensed = self.get_all_opponent_states(observation)

        camera_centers = []
        for c in np.flatnonzero(sensed):
            camera_state = camera_states[c]
            direction = self.state - camera_state
            half_viewing_angle = camera_state.viewing_angle / 2.0
            angle_diff = normalize_angle(direction.angle - camera_state.orientation)
            if (
                direction.norm <= 1.2 * camera_state.sight_range
                and angle_diff <= 1.2 * half_viewing_angle
            ):
                center = Vector2D(
                    norm=camera_state.sight_range / (1.0 + sin_deg(min(half_viewing_angle, 90.0))),
                    angle=camera_state.orientation,
                    origin=camera_state.location,
                )
                inner_radius = camera_state.sight_range - center.norm
                camera_centers.append((center, inner_radius))

        if len(camera_centers) > 0:
            center, inner_radius = min(
                camera_centers,
                key=lambda cr: np.linalg.norm(self.state.location - cr[0].endpoint) / cr[1],
            )

            drift = self.state.location - center.endpoint
            drift_size = np.linalg.norm(drift)
            if drift_size > self.state.step_size * self.noise_scale:
                drift *= self.state.step_size * self.noise_scale / drift_size

            if np.dot(action, drift) >= 0.0:
                action = (action + drift).clip(
                    min=self.action_space.low, max=self.action_space.high
                )

        return action