Spaces:

tree3po
/

kinet-test

Runtime error

App Files Files Community

tree3po commited on Nov 13, 2024

Commit

581eeac

verified ·

1 Parent(s): ec333dc

Upload 46 files

Browse files

Files changed (46) hide show

kinetix/__init__.py +0 -0
kinetix/assets/circle.png +0 -0
kinetix/assets/edit.png +0 -0
kinetix/assets/fjoint.png +0 -0
kinetix/assets/fjoint2.png +0 -0
kinetix/assets/hand.png +0 -0
kinetix/assets/joint.png +0 -0
kinetix/assets/play.png +0 -0
kinetix/assets/rjoint.png +0 -0
kinetix/assets/rjoint2.png +0 -0
kinetix/assets/rotate.png +0 -0
kinetix/assets/square.png +0 -0
kinetix/assets/thruster.png +0 -0
kinetix/assets/thruster6.png +0 -0
kinetix/assets/triangle.png +0 -0
kinetix/editor.py +0 -0
kinetix/environment/__init__.py +0 -0
kinetix/environment/env.py +829 -0
kinetix/environment/env_state.py +43 -0
kinetix/environment/ued/distributions.py +349 -0
kinetix/environment/ued/mutators.py +1157 -0
kinetix/environment/ued/ued.py +249 -0
kinetix/environment/ued/ued_state.py +53 -0
kinetix/environment/ued/util.py +358 -0
kinetix/environment/utils.py +66 -0
kinetix/environment/wrappers.py +309 -0
kinetix/models/.gitignore +2 -0
kinetix/models/__init__.py +65 -0
kinetix/models/action_spaces.py +58 -0
kinetix/models/actor_critic.py +206 -0
kinetix/models/rel_multi_head.py +546 -0
kinetix/models/transformer_model.py +302 -0
kinetix/pcg/__init__.py +0 -0
kinetix/pcg/pcg.py +97 -0
kinetix/pcg/pcg_state.py +24 -0
kinetix/render/__init__.py +0 -0
kinetix/render/renderer_pixels.py +290 -0
kinetix/render/renderer_symbolic_common.py +190 -0
kinetix/render/renderer_symbolic_entity.py +121 -0
kinetix/render/renderer_symbolic_flat.py +102 -0
kinetix/render/textures.py +43 -0
kinetix/util/__init__.py +0 -0
kinetix/util/config.py +229 -0
kinetix/util/learning.py +565 -0
kinetix/util/saving.py +540 -0
kinetix/util/timing.py +15 -0

kinetix/__init__.py ADDED Viewed

File without changes

kinetix/assets/circle.png ADDED Viewed

kinetix/assets/edit.png ADDED Viewed

kinetix/assets/fjoint.png ADDED Viewed

kinetix/assets/fjoint2.png ADDED Viewed

kinetix/assets/hand.png ADDED Viewed

kinetix/assets/joint.png ADDED Viewed

kinetix/assets/play.png ADDED Viewed

kinetix/assets/rjoint.png ADDED Viewed

kinetix/assets/rjoint2.png ADDED Viewed

kinetix/assets/rotate.png ADDED Viewed

kinetix/assets/square.png ADDED Viewed

kinetix/assets/thruster.png ADDED Viewed

kinetix/assets/thruster6.png ADDED Viewed

kinetix/assets/triangle.png ADDED Viewed

kinetix/editor.py ADDED Viewed

The diff for this file is too large to render. See raw diff

kinetix/environment/__init__.py ADDED Viewed

File without changes

kinetix/environment/env.py ADDED Viewed

	@@ -0,0 +1,829 @@

+import functools
+from functools import partial
+from typing import Any, Dict, Optional, Tuple, Union
+import chex
+import jax
+import jax.numpy as jnp
+import numpy as np
+from chex._src.pytypes import PRNGKey
+from gymnax.environments import environment, spaces
+from gymnax.environments.environment import TEnvParams, TEnvState
+from gymnax.environments.spaces import Space
+from jax import lax
+from jax2d.engine import PhysicsEngine, create_empty_sim, recalculate_mass_and_inertia
+from jax2d.sim_state import CollisionManifold, SimState
+from kinetix.environment.env_state import EnvParams, EnvState, StaticEnvParams
+from kinetix.environment.wrappers import (
+    AutoReplayWrapper,
+    AutoResetWrapper,
+    UnderspecifiedToGymnaxWrapper,
+    DenseRewardWrapper,
+    LogWrapper,
+)
+from kinetix.pcg.pcg import env_state_to_pcg_state, sample_pcg_state
+from kinetix.pcg.pcg_state import PCGState
+from kinetix.render.renderer_symbolic_entity import make_render_entities
+from kinetix.render.renderer_pixels import make_render_pixels, make_render_pixels_rl
+from kinetix.render.renderer_symbolic_flat import make_render_symbolic
+from kinetix.util.saving import load_pcg_state_pickle
+from jaxued.environments import UnderspecifiedEnv
+def create_empty_env(static_env_params):
+    sim_state = create_empty_sim(static_env_params)
+    return EnvState(
+        timestep=0,
+        thruster_bindings=jnp.zeros(static_env_params.num_thrusters, dtype=jnp.int32),
+        motor_bindings=jnp.zeros(static_env_params.num_joints, dtype=jnp.int32),
+        motor_auto=jnp.zeros(static_env_params.num_joints, dtype=bool),
+        polygon_shape_roles=jnp.zeros(static_env_params.num_polygons, dtype=jnp.int32),
+        circle_shape_roles=jnp.zeros(static_env_params.num_circles, dtype=jnp.int32),
+        polygon_highlighted=jnp.zeros(static_env_params.num_polygons, dtype=bool),
+        circle_highlighted=jnp.zeros(static_env_params.num_circles, dtype=bool),
+        polygon_densities=jnp.ones(static_env_params.num_polygons, dtype=jnp.float32),
+        circle_densities=jnp.ones(static_env_params.num_circles, dtype=jnp.float32),
+        **sim_state.__dict__,
+    )
+def index_motor_actions(
+    action: jnp.ndarray,
+    state: EnvState,
+    clip_min=None,
+    clip_max=None,
+):
+    # Expand the motor actions to all joints with the same colour
+    return jnp.clip(action[state.motor_bindings], clip_min, clip_max)
+def index_thruster_actions(
+    action: jnp.ndarray,
+    state: EnvState,
+    clip_min=None,
+    clip_max=None,
+):
+    # Expand the thruster actions to all joints with the same colour
+    return jnp.clip(action[state.thruster_bindings], clip_min, clip_max)
+def convert_continuous_actions(
+    action: jnp.ndarray, state: SimState, static_env_params: StaticEnvParams, params: EnvParams
+):
+    action_motor = action[: static_env_params.num_motor_bindings]
+    action_thruster = action[static_env_params.num_motor_bindings :]
+    action_motor = index_motor_actions(action_motor, state, -1, 1)
+    action_thruster = index_thruster_actions(action_thruster, state, 0, 1)
+    action_motor = jnp.where(state.motor_auto, jnp.ones_like(action_motor), action_motor)
+    action_to_perform = jnp.concatenate([action_motor, action_thruster], axis=0)
+    return action_to_perform
+def convert_discrete_actions(action: int, state: SimState, static_env_params: StaticEnvParams, params: EnvParams):
+    # so, we have
+    # 0 to NJC * 2 - 1: Joint Actions
+    # NJC * 2: No-op
+    # NJC * 2 + 1 to NJC * 2 + 1 + NTC - 1: Thruster Actions
+    # action here is a categorical action
+    which_idx = action // 2
+    which_dir = action % 2
+    actions = (
+        jnp.zeros(static_env_params.num_motor_bindings + static_env_params.num_thruster_bindings)
+        .at[which_idx]
+        .set(which_dir * 2 - 1)
+    )
+    actions = actions * (
+        1 - (action >= static_env_params.num_motor_bindings * 2)
+    )  # if action is the last one, set it to zero, i.e., a no-op. Alternatively, if the action is larger than NJC * 2, then it is a thruster action and we shouldn't control the joints.
+    actions = jax.lax.select(
+        action > static_env_params.num_motor_bindings * 2,
+        actions.at[action - static_env_params.num_motor_bindings * 2 - 1 + static_env_params.num_motor_bindings].set(1),
+        actions,
+    )
+    action_motor = index_motor_actions(actions[: static_env_params.num_motor_bindings], state, -1, 1)
+    action_motor = jnp.where(state.motor_auto, jnp.ones_like(action_motor), action_motor)
+    action_thruster = index_thruster_actions(actions[static_env_params.num_motor_bindings :], state, 0, 1)
+    action_to_perform = jnp.concatenate([action_motor, action_thruster], axis=0)
+    return action_to_perform
+def convert_multi_discrete_actions(
+    action: jnp.ndarray, state: SimState, static_env_params: StaticEnvParams, params: EnvParams
+):
+    # Comes in with each action being in {0,1,2} for joints and {0,1} for thrusters
+    # Convert to [-1., 1.] for joints and [0., 1.] for thrusters
+    def _single_motor_action(act):
+        return jax.lax.switch(
+            act,
+            [lambda: 0.0, lambda: 1.0, lambda: -1.0],
+        )
+    def _single_thruster_act(act):
+        return jax.lax.select(
+            act == 0,
+            0.0,
+            1.0,
+        )
+    action_motor = jax.vmap(_single_motor_action)(action[: static_env_params.num_motor_bindings])
+    action_thruster = jax.vmap(_single_thruster_act)(action[static_env_params.num_motor_bindings :])
+    action_motor = index_motor_actions(action_motor, state, -1, 1)
+    action_thruster = index_thruster_actions(action_thruster, state, 0, 1)
+    action_motor = jnp.where(state.motor_auto, jnp.ones_like(action_motor), action_motor)
+    action_to_perform = jnp.concatenate([action_motor, action_thruster], axis=0)
+    return action_to_perform
+def make_kinetix_env_from_args(
+    obs_type, action_type, reset_type, static_env_params=None, auto_reset_fn=None, dense_reward_scale=1.0
+):
+    if obs_type == "entity":
+        if action_type == "multidiscrete":
+            env = KinetixEntityMultiDiscreteActions(should_do_pcg_reset=True, static_env_params=static_env_params)
+        elif action_type == "discrete":
+            env = KinetixEntityDiscreteActions(should_do_pcg_reset=True, static_env_params=static_env_params)
+        elif action_type == "continuous":
+            env = KinetixEntityContinuousActions(should_do_pcg_reset=True, static_env_params=static_env_params)
+        else:
+            raise ValueError(f"Unknown action type: {action_type}")
+    elif obs_type == "symbolic":
+        if action_type == "multidiscrete":
+            env = KinetixSymbolicMultiDiscreteActions(should_do_pcg_reset=True, static_env_params=static_env_params)
+        elif action_type == "discrete":
+            env = KinetixSymbolicDiscreteActions(should_do_pcg_reset=True, static_env_params=static_env_params)
+        elif action_type == "continuous":
+            env = KinetixSymbolicContinuousActions(should_do_pcg_reset=True, static_env_params=static_env_params)
+        else:
+            raise ValueError(f"Unknown action type: {action_type}")
+    elif obs_type == "pixels":
+        if action_type == "multidiscrete":
+            env = KinetixPixelsMultiDiscreteActions(should_do_pcg_reset=True, static_env_params=static_env_params)
+        elif action_type == "discrete":
+            env = KinetixPixelsDiscreteActions(should_do_pcg_reset=True, static_env_params=static_env_params)
+        elif action_type == "continuous":
+            env = KinetixPixelsContinuousActions(should_do_pcg_reset=True, static_env_params=static_env_params)
+        else:
+            raise ValueError(f"Unknown action type: {action_type}")
+    elif obs_type == "blind":
+        if action_type == "discrete":
+            env = KinetixBlindDiscreteActions(should_do_pcg_reset=True, static_env_params=static_env_params)
+        elif action_type == "continuous":
+            env = KinetixBlindContinuousActions(should_do_pcg_reset=True, static_env_params=static_env_params)
+        else:
+            raise ValueError(f"Unknown action type: {action_type}")
+    else:
+        raise ValueError(f"Unknown observation type: {obs_type}")
+    # Wrap
+    if reset_type == "replay":
+        env = AutoReplayWrapper(env)
+    elif reset_type == "reset":
+        env = AutoResetWrapper(env, sample_level=auto_reset_fn)
+    else:
+        raise ValueError(f"Unknown reset type {reset_type}")
+    env = UnderspecifiedToGymnaxWrapper(env)
+    env = DenseRewardWrapper(env, dense_reward_scale=dense_reward_scale)
+    env = LogWrapper(env)
+    return env
+def make_kinetix_env_from_name(name, static_env_params=None):
+    kwargs = dict(filename_to_use_for_reset=None, should_do_pcg_reset=True, static_env_params=static_env_params)
+    values = {
+        "Kinetix-Pixels-MultiDiscrete-v1": KinetixPixelsMultiDiscreteActions,
+        "Kinetix-Pixels-Discrete-v1": KinetixPixelsDiscreteActions,
+        "Kinetix-Pixels-Continuous-v1": KinetixPixelsContinuousActions,
+        "Kinetix-Symbolic-MultiDiscrete-v1": KinetixSymbolicMultiDiscreteActions,
+        "Kinetix-Symbolic-Discrete-v1": KinetixSymbolicDiscreteActions,
+        "Kinetix-Symbolic-Continuous-v1": KinetixSymbolicContinuousActions,
+        "Kinetix-Blind-Discrete-v1": KinetixBlindDiscreteActions,
+        "Kinetix-Blind-Continuous-v1": KinetixBlindContinuousActions,
+        "Kinetix-Entity-Discrete-v1": KinetixEntityDiscreteActions,
+        "Kinetix-Entity-Continuous-v1": KinetixEntityContinuousActions,
+        "Kinetix-Entity-MultiDiscrete-v1": KinetixEntityMultiDiscreteActions,
+    }
+    return values[name](**kwargs)
+class ObservationSpace:
+    def __init__(self, params: EnvParams, static_env_params: StaticEnvParams):
+        pass
+    def get_obs(self, state: EnvState):
+        raise NotImplementedError()
+    def observation_space(self, params: EnvParams):
+        raise NotImplementedError()
+class PixelObservations(ObservationSpace):
+    def __init__(self, params: EnvParams, static_env_params: StaticEnvParams):
+        self.render_function = make_render_pixels_rl(params, static_env_params)
+        self.static_env_params = static_env_params
+    def get_obs(self, state: EnvState):
+        return self.render_function(state)
+    def observation_space(self, params: EnvParams) -> spaces.Box:
+        return spaces.Box(
+            0.0,
+            1.0,
+            tuple(a // self.static_env_params.downscale for a in self.static_env_params.screen_dim) + (3,),
+            dtype=jnp.float32,
+        )
+class SymbolicObservations(ObservationSpace):
+    def __init__(self, params: EnvParams, static_env_params: StaticEnvParams):
+        self.render_function = make_render_symbolic(params, static_env_params)
+    def get_obs(self, state: EnvState):
+        return self.render_function(state)
+class EntityObservations(ObservationSpace):
+    def __init__(self, params: EnvParams, static_env_params: StaticEnvParams):
+        self.render_function = make_render_entities(params, static_env_params)
+    def get_obs(self, state: EnvState):
+        return self.render_function(state)
+class BlindObservations(ObservationSpace):
+    def __init__(self, params: EnvParams, static_env_params: StaticEnvParams):
+        self.params = params
+    def get_obs(self, state: EnvState):
+        return jax.nn.one_hot(state.timestep, self.params.max_timesteps + 1)
+def get_observation_space_from_name(name: str, params, static_env_params):
+    if "Pixels" in name:
+        return PixelObservations(params, static_env_params)
+    elif "Symbolic" in name:
+        return SymbolicObservations(params, static_env_params)
+    elif "Entity" in name:
+        return EntityObservations(params, static_env_params)
+    if "Blind" in name:
+        return BlindObservations(params, static_env_params)
+    else:
+        raise ValueError(f"Unknown name {name}")
+class ActionType:
+    def __init__(self, params: EnvParams, static_env_params: StaticEnvParams):
+        # This is the processed, unified action space size that is shared with all action types
+        # 1 dim per motor and thruster
+        self.unified_action_space_size = static_env_params.num_motor_bindings + static_env_params.num_thruster_bindings
+    def action_space(self, params: Optional[EnvParams] = None) -> Union[spaces.Discrete, spaces.Box]:
+        raise NotImplementedError()
+    def process_action(self, action: jnp.ndarray, state: EnvState, static_env_params: StaticEnvParams) -> jnp.ndarray:
+        raise NotImplementedError()
+    def noop_action(self) -> jnp.ndarray:
+        raise NotImplementedError()
+    def random_action(self, rng: chex.PRNGKey):
+        raise NotImplementedError()
+class ActionTypeContinuous(ActionType):
+    def __init__(self, params: EnvParams, static_env_params: StaticEnvParams):
+        super().__init__(params, static_env_params)
+        self.params = params
+        self.static_env_params = static_env_params
+    def action_space(self, params: EnvParams | None = None) -> spaces.Discrete | spaces.Box:
+        return spaces.Box(
+            low=jnp.ones(self.unified_action_space_size) * -1.0,
+            high=jnp.ones(self.unified_action_space_size) * 1.0,
+            shape=(self.unified_action_space_size,),
+        )
+    def process_action(self, action: PRNGKey, state: EnvState, static_env_params: StaticEnvParams) -> PRNGKey:
+        return convert_continuous_actions(action, state, static_env_params, self.params)
+    def noop_action(self) -> jnp.ndarray:
+        return jnp.zeros(self.unified_action_space_size, dtype=jnp.float32)
+    def random_action(self, rng: chex.PRNGKey) -> jnp.ndarray:
+        actions = jax.random.uniform(rng, shape=(self.unified_action_space_size,), minval=-1.0, maxval=1.0)
+        # Motors between -1 and 1, thrusters between 0 and 1
+        actions = actions.at[self.static_env_params.num_motor_bindings :].set(
+            jnp.abs(actions[self.static_env_params.num_motor_bindings :])
+        )
+        return actions
+class ActionTypeDiscrete(ActionType):
+    def __init__(self, params: EnvParams, static_env_params: StaticEnvParams):
+        super().__init__(params, static_env_params)
+        self.params = params
+        self.static_env_params = static_env_params
+        self._n_actions = (
+            self.static_env_params.num_motor_bindings * 2 + 1 + self.static_env_params.num_thruster_bindings
+        )
+    def action_space(self, params: Optional[EnvParams] = None) -> spaces.Discrete:
+        return spaces.Discrete(self._n_actions)
+    def process_action(self, action: jnp.ndarray, state: EnvState, static_env_params: StaticEnvParams) -> jnp.ndarray:
+        return convert_discrete_actions(action, state, static_env_params, self.params)
+    def noop_action(self) -> int:
+        return self.static_env_params.num_motor_bindings * 2
+    def random_action(self, rng: chex.PRNGKey):
+        return jax.random.randint(rng, shape=(), minval=0, maxval=self._n_actions)
+class MultiDiscrete(Space):
+    def __init__(self, n, number_of_dims_per_distribution):
+        self.number_of_dims_per_distribution = number_of_dims_per_distribution
+        self.n = n
+        self.shape = (number_of_dims_per_distribution.shape[0],)
+        self.dtype = jnp.int_
+    def sample(self, rng: chex.PRNGKey) -> chex.Array:
+        """Sample random action uniformly from set of categorical choices."""
+        uniform_sample = jax.random.uniform(rng, shape=self.shape) * self.number_of_dims_per_distribution
+        md_dist = jnp.floor(uniform_sample)
+        return md_dist.astype(self.dtype)
+    def contains(self, x) -> jnp.ndarray:
+        """Check whether specific object is within space."""
+        range_cond = jnp.logical_and(x >= 0, (x < self.number_of_dims_per_distribution).all())
+        return range_cond
+class ActionTypeMultiDiscrete(ActionType):
+    def __init__(self, params: EnvParams, static_env_params: StaticEnvParams):
+        super().__init__(params, static_env_params)
+        self.params = params
+        self.static_env_params = static_env_params
+        # This is the action space that will be used internally by an agent
+        # 3 dims per motor (foward, backward, off) and 2 per thruster (on, off)
+        self.n_hot_action_space_size = (
+            self.static_env_params.num_motor_bindings * 3 + self.static_env_params.num_thruster_bindings * 2
+        )
+        def _make_sample_random():
+            minval = jnp.zeros(self.unified_action_space_size, dtype=jnp.int32)
+            maxval = jnp.ones(self.unified_action_space_size, dtype=jnp.int32) * 3
+            maxval = maxval.at[self.static_env_params.num_motor_bindings :].set(2)
+            def random(rng):
+                return jax.random.randint(rng, shape=(self.unified_action_space_size,), minval=minval, maxval=maxval)
+            return random
+        self._random = _make_sample_random
+        self.number_of_dims_per_distribution = jnp.concatenate(
+            [
+                np.ones(self.static_env_params.num_motor_bindings) * 3,
+                np.ones(self.static_env_params.num_thruster_bindings) * 2,
+            ]
+        ).astype(np.int32)
+    def action_space(self, params: Optional[EnvParams] = None) -> MultiDiscrete:
+        return MultiDiscrete(self.n_hot_action_space_size, self.number_of_dims_per_distribution)
+    def process_action(self, action: jnp.ndarray, state: EnvState, static_env_params: StaticEnvParams) -> jnp.ndarray:
+        return convert_multi_discrete_actions(action, state, static_env_params, self.params)
+    def noop_action(self):
+        return jnp.zeros(self.unified_action_space_size, dtype=jnp.int32)
+    def random_action(self, rng: chex.PRNGKey):
+        return self._random()(rng)
+class BasePhysicsEnv(UnderspecifiedEnv):
+    def __init__(
+        self,
+        action_type: ActionType,
+        observation_space: ObservationSpace,
+        static_env_params: StaticEnvParams,
+        target_index: int = 0,
+        filename_to_use_for_reset=None,  # "worlds/games/bipedal_v1",
+        should_do_pcg_reset: bool = False,
+    ):
+        super().__init__()
+        self.target_index = target_index
+        self.static_env_params = static_env_params
+        self.action_type = action_type
+        self._observation_space = observation_space
+        self.physics_engine = PhysicsEngine(self.static_env_params)
+        self.should_do_pcg_reset = should_do_pcg_reset
+        self.filename_to_use_for_reset = filename_to_use_for_reset
+        if self.filename_to_use_for_reset is not None:
+            self.reset_state = load_pcg_state_pickle(filename_to_use_for_reset)
+        else:
+            env_state = create_empty_env(self.static_env_params)
+            self.reset_state = env_state_to_pcg_state(env_state)
+    # Action / Observations
+    def action_space(self, params: Optional[EnvParams] = None) -> Union[spaces.Discrete, spaces.Box]:
+        return self.action_type.action_space(params)
+    def observation_space(self, params: Any):
+        return self._observation_space.observation_space(params)
+    def get_obs(self, state: EnvState):
+        return self._observation_space.get_obs(state)
+    def step_env(self, rng, state, action: jnp.ndarray, params):
+        action_processed = self.action_type.process_action(action, state, self.static_env_params)
+        return self.engine_step(state, action_processed, params)
+    def reset_env(self, rng, params):
+        # Wrap in AutoResetWrapper or AutoReplayWrapper
+        raise NotImplementedError()
+    def reset_env_to_level(self, rng, state: EnvState, params):
+        if isinstance(state, PCGState):
+            return self.reset_env_to_pcg_level(rng, state, params)
+        return self.get_obs(state), state
+    def reset_env_to_pcg_level(self, rng, state: PCGState, params):
+        env_state = sample_pcg_state(rng, state, params, self.static_env_params)
+        return self.get_obs(env_state), env_state
+    @property
+    def default_params(self) -> EnvParams:
+        return EnvParams()
+    @staticmethod
+    def default_static_params() -> StaticEnvParams:
+        return StaticEnvParams()
+    def compute_reward_info(
+        self, state: EnvState, manifolds: tuple[CollisionManifold, CollisionManifold, CollisionManifold]
+    ) -> float:
+        def get_active(manifold: CollisionManifold) -> jnp.ndarray:
+            return manifold.active
+        def dist(a, b):
+            return jnp.linalg.norm(a - b)
+        @jax.vmap
+        def dist_rr(idxa, idxb):
+            return dist(state.polygon.position[idxa], state.polygon.position[idxb])
+        @jax.vmap
+        def dist_cc(idxa, idxb):
+            return dist(state.circle.position[idxa], state.circle.position[idxb])
+        @jax.vmap
+        def dist_cr(idxa, idxb):
+            return dist(state.circle.position[idxa], state.polygon.position[idxb])
+        info = {
+            "GoalR": False,
+        }
+        negative_reward = 0
+        reward = 0
+        distance = 0
+        rs = state.polygon_shape_roles * state.polygon.active
+        cs = state.circle_shape_roles * state.circle.active
+        # Polygon Polygon
+        r1 = rs[self.physics_engine.poly_poly_pairs[:, 0]]
+        r2 = rs[self.physics_engine.poly_poly_pairs[:, 1]]
+        reward += ((r1 * r2 == 2) * get_active(manifolds[0])).sum()
+        negative_reward += ((r1 * r2 == 3) * get_active(manifolds[0])).sum()
+        distance += (
+            (r1 * r2 == 2)
+            * dist_rr(self.physics_engine.poly_poly_pairs[:, 0], self.physics_engine.poly_poly_pairs[:, 1])
+        ).sum()
+        # Circle Polygon
+        c1 = cs[self.physics_engine.circle_poly_pairs[:, 0]]
+        r2 = rs[self.physics_engine.circle_poly_pairs[:, 1]]
+        reward += ((c1 * r2 == 2) * get_active(manifolds[1])).sum()
+        negative_reward += ((c1 * r2 == 3) * get_active(manifolds[1])).sum()
+        t = dist_cr(self.physics_engine.circle_poly_pairs[:, 0], self.physics_engine.circle_poly_pairs[:, 1])
+        distance += ((c1 * r2 == 2) * t).sum()
+        # Circle Circle
+        c1 = cs[self.physics_engine.circle_circle_pairs[:, 0]]
+        c2 = cs[self.physics_engine.circle_circle_pairs[:, 1]]
+        reward += ((c1 * c2 == 2) * get_active(manifolds[2])).sum()
+        negative_reward += ((c1 * c2 == 3) * get_active(manifolds[2])).sum()
+        distance += (
+            (c1 * c2 == 2)
+            * dist_cc(self.physics_engine.circle_circle_pairs[:, 0], self.physics_engine.circle_circle_pairs[:, 1])
+        ).sum()
+        reward = jax.lax.select(
+            negative_reward > 0,
+            -1.0,
+            jax.lax.select(
+                reward > 0,
+                1.0,
+                0.0,
+            ),
+        )
+        info["GoalR"] = reward > 0
+        info["distance"] = distance
+        return reward, info
+    @partial(jax.jit, static_argnums=(0,))
+    def engine_step(self, env_state, action_to_perform, env_params):
+        def _single_step(env_state, unused):
+            env_state, mfolds = self.physics_engine.step(
+                env_state,
+                env_params,
+                action_to_perform,
+            )
+            reward, info = self.compute_reward_info(env_state, mfolds)
+            done = reward != 0
+            info = {"rr_manifolds": None, "cr_manifolds": None} | info
+            return env_state, (reward, done, info)
+        env_state, (rewards, dones, infos) = jax.lax.scan(
+            _single_step, env_state, xs=None, length=self.static_env_params.frame_skip
+        )
+        env_state = env_state.replace(timestep=env_state.timestep + 1)
+        reward = rewards.max()
+        done = dones.sum() > 0 | jax.tree.reduce(
+            jnp.logical_or, jax.tree.map(lambda x: jnp.isnan(x).any(), env_state), False
+        )
+        done |= env_state.timestep >= env_params.max_timesteps
+        info = jax.tree.map(lambda x: x[-1], infos)
+        return (
+            lax.stop_gradient(self.get_obs(env_state)),
+            lax.stop_gradient(env_state),
+            reward,
+            done,
+            info,
+        )
+    @functools.partial(jax.jit, static_argnums=(0,))
+    def step(
+        self,
+        key: chex.PRNGKey,
+        state: TEnvState,
+        action: Union[int, float, chex.Array],
+        params: Optional[TEnvParams] = None,
+    ) -> Tuple[chex.Array, TEnvState, jnp.ndarray, jnp.ndarray, Dict[Any, Any]]:
+        raise NotImplementedError()
+class KinetixPixelsDiscreteActions(BasePhysicsEnv):
+    def __init__(
+        self,
+        static_env_params: StaticEnvParams | None = None,
+        **kwargs,
+    ):
+        params = self.default_params
+        static_env_params = static_env_params or self.default_static_params()
+        super().__init__(
+            action_type=ActionTypeDiscrete(params, static_env_params),
+            observation_space=PixelObservations(params, static_env_params),
+            static_env_params=static_env_params,
+            **kwargs,
+        )
+    @property
+    def name(self) -> str:
+        return "Kinetix-Pixels-Discrete-v1"
+class KinetixPixelsContinuousActions(BasePhysicsEnv):
+    def __init__(
+        self,
+        static_env_params: StaticEnvParams | None = None,
+        **kwargs,
+    ):
+        params = self.default_params
+        static_env_params = static_env_params or self.default_static_params()
+        super().__init__(
+            action_type=ActionTypeContinuous(params, static_env_params),
+            observation_space=PixelObservations(params, static_env_params),
+            static_env_params=static_env_params,
+            **kwargs,
+        )
+    @property
+    def name(self) -> str:
+        return "Kinetix-Pixels-Continuous-v1"
+class KinetixPixelsMultiDiscreteActions(BasePhysicsEnv):
+    def __init__(
+        self,
+        static_env_params: StaticEnvParams | None = None,
+        **kwargs,
+    ):
+        params = self.default_params
+        static_env_params = static_env_params or self.default_static_params()
+        super().__init__(
+            action_type=ActionTypeMultiDiscrete(params, static_env_params),
+            observation_space=PixelObservations(params, static_env_params),
+            static_env_params=static_env_params,
+            **kwargs,
+        )
+    @property
+    def name(self) -> str:
+        return "Kinetix-Pixels-MultiDiscrete-v1"
+class KinetixSymbolicDiscreteActions(BasePhysicsEnv):
+    def __init__(
+        self,
+        static_env_params: StaticEnvParams | None = None,
+        **kwargs,
+    ):
+        params = self.default_params
+        static_env_params = static_env_params or self.default_static_params()
+        super().__init__(
+            action_type=ActionTypeDiscrete(params, static_env_params),
+            observation_space=SymbolicObservations(params, static_env_params),
+            static_env_params=static_env_params,
+            **kwargs,
+        )
+    @property
+    def name(self) -> str:
+        return "Kinetix-Symbolic-Discrete-v1"
+class KinetixSymbolicContinuousActions(BasePhysicsEnv):
+    def __init__(
+        self,
+        static_env_params: StaticEnvParams | None = None,
+        **kwargs,
+    ):
+        params = self.default_params
+        static_env_params = static_env_params or self.default_static_params()
+        super().__init__(
+            action_type=ActionTypeContinuous(params, static_env_params),
+            observation_space=SymbolicObservations(params, static_env_params),
+            static_env_params=static_env_params,
+            **kwargs,
+        )
+    @property
+    def name(self) -> str:
+        return "Kinetix-Symbolic-Continuous-v1"
+class KinetixSymbolicMultiDiscreteActions(BasePhysicsEnv):
+    def __init__(
+        self,
+        static_env_params: StaticEnvParams | None = None,
+        **kwargs,
+    ):
+        params = self.default_params
+        static_env_params = static_env_params or self.default_static_params()
+        super().__init__(
+            action_type=ActionTypeMultiDiscrete(params, static_env_params),
+            observation_space=SymbolicObservations(params, static_env_params),
+            static_env_params=static_env_params,
+            **kwargs,
+        )
+    @property
+    def name(self) -> str:
+        return "Kinetix-Symbolic-MultiDiscrete-v1"
+class KinetixEntityDiscreteActions(BasePhysicsEnv):
+    def __init__(
+        self,
+        static_env_params: StaticEnvParams | None = None,
+        **kwargs,
+    ):
+        params = self.default_params
+        static_env_params = static_env_params or self.default_static_params()
+        super().__init__(
+            action_type=ActionTypeDiscrete(params, static_env_params),
+            observation_space=EntityObservations(params, static_env_params),
+            static_env_params=static_env_params,
+            **kwargs,
+        )
+    @property
+    def name(self) -> str:
+        return "Kinetix-Entity-Discrete-v1"
+class KinetixEntityContinuousActions(BasePhysicsEnv):
+    def __init__(
+        self,
+        static_env_params: StaticEnvParams | None = None,
+        **kwargs,
+    ):
+        params = self.default_params
+        static_env_params = static_env_params or self.default_static_params()
+        super().__init__(
+            action_type=ActionTypeContinuous(params, static_env_params),
+            observation_space=EntityObservations(params, static_env_params),
+            static_env_params=static_env_params,
+            **kwargs,
+        )
+    @property
+    def name(self) -> str:
+        return "Kinetix-Entity-Continuous-v1"
+class KinetixEntityMultiDiscreteActions(BasePhysicsEnv):
+    def __init__(
+        self,
+        static_env_params: StaticEnvParams | None = None,
+        **kwargs,
+    ):
+        params = self.default_params
+        static_env_params = static_env_params or self.default_static_params()
+        super().__init__(
+            action_type=ActionTypeMultiDiscrete(params, static_env_params),
+            observation_space=EntityObservations(params, static_env_params),
+            static_env_params=static_env_params,
+            **kwargs,
+        )
+    @property
+    def name(self) -> str:
+        return "Kinetix-Entity-MultiDiscrete-v1"
+class KinetixBlindDiscreteActions(BasePhysicsEnv):
+    def __init__(
+        self,
+        static_env_params: StaticEnvParams | None = None,
+        **kwargs,
+    ):
+        params = self.default_params
+        static_env_params = static_env_params or self.default_static_params()
+        super().__init__(
+            action_type=ActionTypeDiscrete(params, static_env_params),
+            observation_space=BlindObservations(params, static_env_params),
+            static_env_params=static_env_params,
+            **kwargs,
+        )
+    @property
+    def name(self) -> str:
+        return "Kinetix-Blind-Discrete-v1"
+class KinetixBlindContinuousActions(BasePhysicsEnv):
+    def __init__(
+        self,
+        static_env_params: StaticEnvParams | None = None,
+        **kwargs,
+    ):
+        params = self.default_params
+        static_env_params = static_env_params or self.default_static_params()
+        super().__init__(
+            action_type=ActionTypeContinuous(params, static_env_params),
+            observation_space=BlindObservations(params, static_env_params),
+            static_env_params=static_env_params,
+            **kwargs,
+        )
+    @property
+    def name(self) -> str:
+        return "Kinetix-Blind-Continuous-v1"

kinetix/environment/env_state.py ADDED Viewed

	@@ -0,0 +1,43 @@

+from dataclasses import field
+import jax.numpy as jnp
+from flax import struct
+from jax2d.sim_state import SimState, SimParams, StaticSimParams
+@struct.dataclass
+class EnvState(SimState):
+    thruster_bindings: jnp.ndarray
+    motor_bindings: jnp.ndarray
+    motor_auto: jnp.ndarray
+    polygon_shape_roles: jnp.ndarray
+    circle_shape_roles: jnp.ndarray
+    polygon_highlighted: jnp.ndarray
+    circle_highlighted: jnp.ndarray
+    polygon_densities: jnp.ndarray
+    circle_densities: jnp.ndarray
+    timestep: int = 0
+@struct.dataclass
+class EnvParams(SimParams):
+    max_timesteps: int = 256
+    pixels_per_unit: int = 100
+    dense_reward_scale: float = 0.1
+    num_shape_roles: int = 4
+@struct.dataclass
+class StaticEnvParams(StaticSimParams):
+    screen_dim: tuple[int, int] = (500, 500)
+    downscale: int = 4
+    frame_skip: int = 1
+    max_shape_size: int = 2
+    num_motor_bindings: int = 4
+    num_thruster_bindings: int = 2

kinetix/environment/ued/distributions.py ADDED Viewed

	@@ -0,0 +1,349 @@

+from functools import partial
+import math
+import chex
+import jax
+import jax.numpy as jnp
+from flax.serialization import to_state_dict
+from jax2d.engine import (
+    calculate_collision_matrix,
+    calc_inverse_mass_polygon,
+    calc_inverse_mass_circle,
+    calc_inverse_inertia_circle,
+    calc_inverse_inertia_polygon,
+    recalculate_mass_and_inertia,
+    select_shape,
+    PhysicsEngine,
+)
+from jax2d.sim_state import SimState, RigidBody, Joint, Thruster
+from jax2d.maths import rmat
+from kinetix.environment.env_state import EnvParams, EnvState, StaticEnvParams
+from kinetix.environment.ued.mutators import (
+    mutate_add_connected_shape_proper,
+    mutate_add_shape,
+    mutate_add_connected_shape,
+    mutate_add_thruster,
+)
+from kinetix.environment.ued.ued_state import UEDParams
+from kinetix.environment.ued.util import (
+    get_role,
+    sample_dimensions,
+    is_space_for_shape,
+    random_position_on_polygon,
+    random_position_on_circle,
+    are_there_shapes_present,
+    is_space_for_joint,
+)
+from kinetix.environment.utils import permute_state
+from kinetix.util.saving import load_world_state_pickle
+from flax import struct
+from kinetix.environment.env import create_empty_env
+@partial(jax.jit, static_argnums=(1, 3, 5, 6, 7, 8, 9, 10))
+def create_vmapped_filtered_distribution(
+    rng,
+    level_sampler,
+    env_params: EnvParams,
+    static_env_params: StaticEnvParams,
+    ued_params: UEDParams,
+    n_samples: int,
+    env,
+    do_filter_levels: bool,
+    level_filter_sample_ratio: int,
+    env_size_name: str,
+    level_filter_n_steps: int,
+):
+    if do_filter_levels and level_filter_n_steps > 0:
+        sample_ratio = level_filter_sample_ratio
+        n_unfiltered_samples = sample_ratio * n_samples
+        rng, _rng = jax.random.split(rng)
+        _rngs = jax.random.split(_rng, n_unfiltered_samples)
+        # unfiltered_levels = jax.vmap(level_sampler, in_axes=(0, None, None, None, None))(
+        #     _rngs, env_params, static_env_params, ued_params, env_size_name
+        # )
+        unfiltered_levels = jax.vmap(level_sampler, in_axes=(0,))(_rngs)
+        #
+        # No-op filtering
+        def _noop_step(states, rng):
+            rng, _rng = jax.random.split(rng)
+            _rngs = jax.random.split(_rng, n_unfiltered_samples)
+            action = jnp.zeros((n_unfiltered_samples, *env.action_space(env_params).shape), dtype=jnp.int32)
+            obs, states, reward, done, info = jax.vmap(env.step, in_axes=(0, 0, 0, None))(
+                _rngs, states, action, env_params
+            )
+            return states, (done, reward)
+        # Wrap levels
+        rng, _rng = jax.random.split(rng)
+        _rngs = jax.random.split(_rng, n_unfiltered_samples)
+        obsv, unfiltered_levels_wrapped = jax.vmap(env.reset_to_level, in_axes=(0, 0, None))(
+            _rngs, unfiltered_levels, env_params
+        )
+        rng, _rng = jax.random.split(rng)
+        _rngs = jax.random.split(_rng, level_filter_n_steps)
+        _, (done, rewards) = jax.lax.scan(_noop_step, unfiltered_levels_wrapped, xs=_rngs)
+        done_indexes = jnp.argmax(done, axis=0)
+        done_rewards = rewards[done_indexes, jnp.arange(n_unfiltered_samples)]
+        noop_solved_indexes = done_rewards > 0.5
+        p = noop_solved_indexes * 0.001 + (1 - noop_solved_indexes) * 1.0
+        p /= p.sum()
+        rng, _rng = jax.random.split(rng)
+        level_indexes = jax.random.choice(
+            _rng, jnp.arange(n_unfiltered_samples), shape=(n_samples,), replace=False, p=p
+        )
+        levels = jax.tree.map(lambda x: x[level_indexes], unfiltered_levels)
+    else:
+        rng, _rng = jax.random.split(rng)
+        _rngs = jax.random.split(_rng, n_samples)
+        levels = jax.vmap(level_sampler, in_axes=(0,))(_rngs)
+    return levels
+@partial(jax.jit, static_argnums=(1, 3, 4, 5))
+def sample_kinetix_level(
+    rng,
+    engine: PhysicsEngine,
+    env_params: EnvParams,
+    static_env_params: StaticEnvParams,
+    ued_params: UEDParams,
+    env_size_name: str = "l",
+):
+    rng, _rng = jax.random.split(rng)
+    _rngs = jax.random.split(_rng, 12)
+    small_force_no_fixate = env_size_name == "s"
+    # Start with empty state
+    state = create_empty_env(static_env_params)
+    # Set the floor
+    prob_of_floor_colour = jnp.array(
+        [
+            ued_params.floor_prob_normal,
+            ued_params.floor_prob_green,
+            ued_params.floor_prob_blue,
+            ued_params.floor_prob_red,
+        ]
+    )
+    floor_colour = jax.random.choice(_rngs[0], jnp.arange(4), p=prob_of_floor_colour)
+    state = state.replace(polygon_shape_roles=state.polygon_shape_roles.at[0].set(floor_colour))
+    # When we add shapes we don't want them to collide with already existing shapes
+    def _choose_proposal_with_least_collisions(proposals, bias=None):
+        rr, cr, cc = jax.vmap(engine.calculate_collision_manifolds)(proposals)
+        rr_collisions = jnp.sum(jnp.sum(rr.active.astype(jnp.int32), axis=-1), axis=-1)
+        cr_collisions = jnp.sum(cr.active.astype(jnp.int32), axis=-1)
+        cc_collisions = jnp.sum(cc.active.astype(jnp.int32), axis=-1)
+        all_collisions = jnp.concatenate(
+            [rr_collisions[:, None], cr_collisions[:, None], cc_collisions[:, None]], axis=1
+        )
+        num_collisions = jnp.sum(all_collisions, axis=-1)
+        if bias is not None:
+            num_collisions = num_collisions + bias
+        chosen_addition_idx = jnp.argmin(num_collisions)
+        return jax.tree.map(lambda x: x[chosen_addition_idx], proposals)
+    def _add_filtered_shape(rng, state, force_no_fixate=False):
+        rng, _rng = jax.random.split(rng)
+        _rngs = jax.random.split(_rng, ued_params.add_shape_n_proposals)
+        proposed_additions = jax.vmap(mutate_add_shape, in_axes=(0, None, None, None, None, None))(
+            _rngs,
+            state,
+            env_params,
+            static_env_params,
+            ued_params,
+            jnp.logical_or(force_no_fixate, small_force_no_fixate),
+        )
+        return _choose_proposal_with_least_collisions(proposed_additions)
+    def _add_filtered_connected_shape(rng, state, force_rjoint=False):
+        rng, _rng = jax.random.split(rng)
+        _rngs = jax.random.split(_rng, ued_params.add_shape_n_proposals)
+        proposed_additions, valid = jax.vmap(mutate_add_connected_shape, in_axes=(0, None, None, None, None, None))(
+            _rngs, state, env_params, static_env_params, ued_params, force_rjoint
+        )
+        bias = (jnp.ones(ued_params.add_shape_n_proposals) - 1 * valid) * ued_params.connect_no_visibility_bias
+        return _choose_proposal_with_least_collisions(proposed_additions, bias=bias)
+    # Add green and blue - make sure they're not both fixated
+    force_green_no_fixate = (jax.random.uniform(_rngs[1]) < 0.5) | (state.polygon_shape_roles[0] == 2)
+    state = _add_filtered_shape(_rngs[2], state, force_green_no_fixate)
+    state = _add_filtered_shape(_rngs[3], state, ~force_green_no_fixate)
+    # Forced controls
+    forced_control = jnp.array([[0, 1], [1, 0], [1, 1]])[jax.random.randint(_rngs[4], (), 0, 3)]
+    force_thruster, force_motor = forced_control[0], forced_control[1]
+    # Forced motor
+    state = jax.lax.cond(
+        force_motor,
+        lambda: _add_filtered_connected_shape(_rngs[5], state, force_rjoint=True),  # force the rjoint
+        lambda: _add_filtered_shape(_rngs[6], state),
+    )
+    # Forced thruster
+    state = jax.lax.cond(
+        force_thruster,
+        lambda: mutate_add_thruster(_rngs[7], state, env_params, static_env_params, ued_params),
+        lambda: state,
+    )
+    # Add rest of shapes
+    n_shapes_to_add = (
+        static_env_params.num_polygons + static_env_params.num_circles - 3 - static_env_params.num_static_fixated_polys
+    )
+    def _add_shape(state, rng):
+        rng, _rng = jax.random.split(rng)
+        _rngs = jax.random.split(_rng, 3)
+        shape_add_type = jax.random.choice(
+            _rngs[0],
+            jnp.arange(3),
+            p=jnp.array(
+                [ued_params.add_connected_shape_chance, ued_params.add_shape_chance, ued_params.add_no_shape_chance]
+            ),
+        )
+        state = jax.lax.switch(
+            shape_add_type,
+            [
+                lambda: _add_filtered_connected_shape(_rngs[1], state),
+                lambda: _add_filtered_shape(_rngs[2], state),
+                lambda: state,
+            ],
+        )
+        return state, None
+    state, _ = jax.lax.scan(_add_shape, state, jax.random.split(_rngs[8], n_shapes_to_add))
+    # Add thrusters
+    n_thrusters_to_add = static_env_params.num_thrusters - 1
+    def _add_thruster(state, rng):
+        rng, _rng = jax.random.split(rng)
+        _rngs = jax.random.split(_rng, 3)
+        state = jax.lax.cond(
+            jax.random.uniform(_rngs[0]) < ued_params.add_thruster_chance,
+            lambda: mutate_add_thruster(_rngs[1], state, env_params, static_env_params, ued_params),
+            lambda: state,
+        )
+        return state, None
+    state, _ = jax.lax.scan(_add_thruster, state, jax.random.split(_rngs[9], n_thrusters_to_add))
+    # Randomly swap green and blue to remove left-right bias
+    def _swap_roles(do_swap_roles, roles):
+        role1 = roles == 1
+        role2 = roles == 2
+        swapped_roles = roles * ~(role1 | role2) + role1.astype(int) * 2 + role2.astype(int) * 1
+        return jax.lax.select(do_swap_roles, swapped_roles, roles)
+    do_swap_roles = jax.random.uniform(_rngs[10], shape=()) < 0.5
+    # Don't want to swap if floor is non-standard
+    do_swap_roles &= state.polygon_shape_roles[0] == 0
+    state = state.replace(
+        polygon_shape_roles=_swap_roles(do_swap_roles, state.polygon_shape_roles),
+        circle_shape_roles=_swap_roles(do_swap_roles, state.circle_shape_roles),
+    )
+    return permute_state(_rngs[11], state, static_env_params)
+@partial(jax.jit, static_argnums=(2, 4, 5))
+def create_random_starting_distribution(
+    rng,
+    env_params: EnvParams,
+    static_env_params: StaticEnvParams,
+    ued_params: UEDParams,
+    env_size_name: str,
+    controllable=True,
+):
+    rng, _rng = jax.random.split(rng)
+    _rngs = jax.random.split(_rng, 15)
+    d = to_state_dict(ued_params)
+    ued_params = UEDParams(
+        **(
+            d
+            | dict(
+                goal_body_size_factor=2.0,
+                thruster_power_multiplier=2.0,
+                max_shape_size=0.5,
+            )
+        ),
+    )
+    prob_of_large_shapes = 0.05
+    ued_params_large_shapes = ued_params.replace(
+        max_shape_size=static_env_params.max_shape_size * 1.0, goal_body_size_factor=1.0
+    )
+    state = create_empty_env(env_params, static_env_params)
+    def _get_ued_params(rng):
+        rng, _rng, _rng2 = jax.random.split(rng, 3)
+        large_shapes = jax.random.uniform(_rng) < prob_of_large_shapes
+        params_to_use = jax.tree.map(
+            lambda x, y: jax.lax.select(large_shapes, x, y), ued_params_large_shapes, ued_params
+        )
+        return params_to_use
+    def _my_add_shape(rng, state):
+        rng, _rng, _rng2 = jax.random.split(rng, 3)
+        return mutate_add_shape(_rng, state, env_params, static_env_params, _get_ued_params(_rng2))
+    def _my_add_connected_shape(rng, state, **kwargs):
+        rng, _rng, _rng2 = jax.random.split(rng, 3)
+        return mutate_add_connected_shape_proper(
+            _rng, state, env_params, static_env_params, _get_ued_params(_rng2), **kwargs
+        )
+    # Add the green thing and blue thing
+    state = _my_add_shape(_rngs[0], state)
+    state = _my_add_shape(_rngs[1], state)
+    if controllable:
+        # Forced controls
+        forced_control = jnp.array([[0, 1], [1, 0], [1, 1]])[jax.random.randint(_rngs[2], (), 0, 3)]
+        force_thruster, force_motor = forced_control[0], forced_control[1]
+        # Forced motor
+        state = jax.lax.cond(
+            force_motor,
+            lambda: _my_add_connected_shape(_rngs[3], state, force_rjoint=True),  # force the rjoint
+            lambda: state,
+        )
+        # Forced thruster
+        state = jax.lax.cond(
+            force_thruster,
+            lambda: mutate_add_thruster(_rngs[4], state, env_params, static_env_params, ued_params),
+            lambda: state,
+        )
+    return permute_state(_rngs[7], state, static_env_params)

kinetix/environment/ued/mutators.py ADDED Viewed

	@@ -0,0 +1,1157 @@

+from functools import partial
+import math
+import chex
+import jax
+import jax.numpy as jnp
+from flax.serialization import to_state_dict
+from jax2d.engine import (
+    PhysicsEngine,
+    calculate_collision_matrix,
+    calc_inverse_mass_polygon,
+    calc_inverse_mass_circle,
+    calc_inverse_inertia_circle,
+    calc_inverse_inertia_polygon,
+    recalculate_mass_and_inertia,
+    select_shape,
+)
+from jax2d.sim_state import SimState, RigidBody, Joint, Thruster
+from jax2d.maths import rmat
+from kinetix.environment.env_state import EnvParams, EnvState, StaticEnvParams
+from kinetix.environment.ued.ued_state import UEDParams
+from kinetix.environment.ued.util import (
+    count_roles,
+    is_space_for_joint,
+    make_velocities_zero,
+    sample_dimensions,
+    random_position_on_polygon,
+    random_position_on_circle,
+    get_role,
+    is_space_for_shape,
+    are_there_shapes_present,
+)
+from kinetix.util.saving import load_world_state_pickle
+from flax import struct
+from kinetix.environment.env import create_empty_env
+from kinetix.environment.ued.util import make_do_dummy_step
+@partial(jax.jit, static_argnums=(3, 4))
+def mutate_add_shape(
+    rng,
+    state: EnvState,
+    params: EnvParams,
+    static_env_params: StaticEnvParams,
+    ued_params: UEDParams,
+    force_no_fixate: bool = False,
+):
+    def do_dummy(rng, state):
+        return state
+    def do_add(rng, state):
+        rng, _rng = jax.random.split(rng)
+        _rngs = jax.random.split(_rng, 9)
+        space_for_new_rect = state.polygon.active.astype(int).sum() < static_env_params.num_polygons
+        space_for_new_circle = state.circle.active.astype(int).sum() < static_env_params.num_circles
+        is_rect_p = jnp.array([space_for_new_rect * 1.0, space_for_new_circle * 1.0])
+        is_rect = jax.random.choice(_rngs[0], jnp.array([True, False], dtype=bool), p=is_rect_p)
+        rect_index = jnp.argmin(state.polygon.active)
+        circle_index = jnp.argmin(state.circle.active)
+        shape_role = get_role(_rngs[1], state, static_env_params)
+        max_shape_size = (
+            jnp.array([1.0, ued_params.goal_body_size_factor, ued_params.goal_body_size_factor, 1.0])[shape_role]
+            * ued_params.max_shape_size
+        )
+        vertices, half_dimensions, radius = sample_dimensions(
+            _rngs[2],
+            static_env_params,
+            is_rect,
+            ued_params,
+            max_shape_size=max_shape_size,
+        )
+        n_vertices = jax.lax.select(ued_params.generate_triangles, jax.random.choice(_rngs[3], jnp.array([3, 4])), 4)
+        largest = jnp.max(jnp.array([half_dimensions[0] * jnp.sqrt(2), half_dimensions[1] * jnp.sqrt(2), radius]))
+        screen_dim_world = (
+            static_env_params.screen_dim[0] / params.pixels_per_unit,
+            static_env_params.screen_dim[1] / params.pixels_per_unit,
+        )
+        min_x = largest
+        max_x = screen_dim_world[0] - largest
+        min_y = largest + 0.4
+        max_y = screen_dim_world[1] - largest
+        def _og_minmax():
+            return min_x, max_x, min_y, max_y
+        def _opposite_minmax():
+            return jax.lax.switch(
+                shape_role,
+                [
+                    (lambda: (min_x, max_x, min_y, max_y)),
+                    (lambda: (min_x, max_x - screen_dim_world[0] / 2, min_y, max_y)),
+                    (lambda: (min_x + screen_dim_world[0] / 2, max_x, min_y, max_y)),
+                    (lambda: (min_x, max_x, min_y, max_y)),
+                ],
+            )
+        min_x, max_x, min_y, max_y = jax.lax.cond(
+            jax.random.uniform(_rngs[4], shape=()) < ued_params.goal_body_opposide_side_chance,
+            _opposite_minmax,
+            _og_minmax,
+        )
+        position = jax.random.uniform(_rngs[5], shape=(2,)) * jnp.array(
+            [
+                max_x - min_x,
+                max_y - min_y,
+            ]
+        ) + jnp.array([min_x, min_y])
+        rotation = jax.random.uniform(_rngs[6], shape=()) * 2 * math.pi
+        velocity = jnp.array([0.0, 0.0])
+        angular_velocity = 0.0
+        density = 1.0
+        inverse_mass = jax.lax.select(
+            is_rect,
+            calc_inverse_mass_polygon(vertices, n_vertices, static_env_params, density)[0],
+            calc_inverse_mass_circle(radius, density),
+        )
+        inverse_inertia = jax.lax.select(
+            is_rect,
+            calc_inverse_inertia_polygon(vertices, n_vertices, static_env_params, density),
+            calc_inverse_inertia_circle(radius, density),
+        )
+        fixate_chance = ued_params.fixate_chance_min + (1.0 / inverse_mass) * ued_params.fixate_chance_scale
+        fixate_chance = jnp.minimum(fixate_chance, ued_params.fixate_chance_max)
+        is_fixated = jax.random.uniform(_rngs[7], shape=()) < fixate_chance
+        is_fixated &= ~force_no_fixate
+        inverse_mass *= 1 - is_fixated
+        inverse_inertia *= 1 - is_fixated
+        # We want to bias fixated shapes to starting nearer the bottom half of the screen
+        fixate_shape_bottom_bias = (
+            ued_params.fixate_shape_bottom_bias + ued_params.fixate_shape_bottom_bias_special_role * (shape_role != 0)
+        )
+        is_forcing_bottom = jax.random.uniform(_rngs[8]) < fixate_shape_bottom_bias
+        half_screen_height = (static_env_params.screen_dim[1] / params.pixels_per_unit) / 2.0
+        position = jax.lax.select(
+            is_fixated & is_forcing_bottom & (position[1] >= half_screen_height),
+            position.at[1].add(-half_screen_height),
+            position,
+        )
+        # This could be either a rect or a circle
+        new_rigid_body = RigidBody(
+            position=position,
+            velocity=velocity,
+            inverse_mass=inverse_mass,
+            inverse_inertia=inverse_inertia,
+            rotation=rotation,
+            angular_velocity=angular_velocity,
+            radius=radius,
+            active=True,
+            friction=1.0,
+            vertices=vertices,
+            n_vertices=n_vertices,
+            collision_mode=1,
+            restitution=0.0,
+        )
+        state = state.replace(
+            polygon=jax.tree.map(
+                lambda x, y: jax.lax.select(is_rect, y.at[rect_index].set(x), y), new_rigid_body, state.polygon
+            ),
+            circle=jax.tree.map(
+                lambda x, y: jax.lax.select(jnp.logical_not(is_rect), y.at[circle_index].set(x), y),
+                new_rigid_body,
+                state.circle,
+            ),
+            polygon_shape_roles=jax.lax.select(
+                is_rect,
+                state.polygon_shape_roles.at[rect_index].set(shape_role),
+                state.polygon_shape_roles,
+            ),
+            circle_shape_roles=jax.lax.select(
+                jnp.logical_not(is_rect),
+                state.circle_shape_roles.at[circle_index].set(shape_role),
+                state.circle_shape_roles,
+            ),
+        )
+        return recalculate_mass_and_inertia(state, static_env_params, state.polygon_densities, state.circle_densities)
+    return jax.lax.cond(is_space_for_shape(state), do_add, do_dummy, rng, state)
+@partial(jax.jit, static_argnums=(3, 4))
+def mutate_add_connected_shape(
+    rng,
+    state: EnvState,
+    params: EnvParams,
+    static_env_params: StaticEnvParams,
+    ued_params: UEDParams,
+    force_rjoint: bool = False,
+):
+    def do_dummy(rng, state):
+        return state, False
+    def do_add(rng, state):
+        rng, _rng = jax.random.split(rng)
+        _rngs = jax.random.split(_rng, 21)
+        # Select a random index amongst the currently active shapes.
+        p_rect = state.polygon.active.at[: static_env_params.num_static_fixated_polys].set(False)
+        p_circle = state.circle.active
+        p_rect = p_rect.astype(jnp.float32)
+        p_circle = p_circle.astype(jnp.float32)
+        p_rect *= (state.polygon.inverse_mass == 0) * ued_params.connect_to_fixated_prob_coeff + (
+            state.polygon.inverse_mass != 0
+        ) * 1.0
+        p_circle *= (state.circle.inverse_mass == 0) * ued_params.connect_to_fixated_prob_coeff + (
+            state.circle.inverse_mass != 0
+        ) * 1.0
+        # Bias based on number of existing connections
+        rect_connections = jnp.zeros(static_env_params.num_polygons)
+        circle_connections = jnp.zeros(static_env_params.num_circles)
+        rect_connections = rect_connections.at[state.joint.a_index].add(
+            jnp.ones(static_env_params.num_joints)
+            * state.joint.active
+            * (state.joint.a_index < static_env_params.num_polygons)
+        )
+        rect_connections = rect_connections.at[state.joint.b_index].add(
+            jnp.ones(static_env_params.num_joints)
+            * state.joint.active
+            * (state.joint.b_index < static_env_params.num_polygons)
+        )
+        circle_connections = circle_connections.at[state.joint.a_index - static_env_params.num_polygons].add(
+            jnp.ones(static_env_params.num_joints)
+            * state.joint.active
+            * (state.joint.a_index >= static_env_params.num_polygons)
+        )
+        circle_connections = circle_connections.at[state.joint.b_index - static_env_params.num_polygons].add(
+            jnp.ones(static_env_params.num_joints)
+            * state.joint.active
+            * (state.joint.b_index >= static_env_params.num_polygons)
+        )
+        # Rectangles can have up to 2 connections
+        p_rect *= (-rect_connections + 2.0) / 2.0
+        p_rect = jnp.maximum(p_rect, 0.0)
+        # Circles can have 1 connection
+        p_circle *= circle_connections == 0
+        # To sample a target rect/circle, we have to have at least one.
+        target_rect_p = jnp.array(
+            [
+                (state.polygon.active.astype(int).sum() > static_env_params.num_static_fixated_polys) * 1.0,
+                (state.circle.active.astype(int).sum() > 0) * 1.0,
+            ]
+        )
+        # Don't connect to a circle if no connection-free ones exist
+        target_rect_p = target_rect_p.at[1].mul(p_circle.sum() > 0)
+        space_for_new_rect = state.polygon.active.astype(int).sum() < static_env_params.num_polygons
+        space_for_new_circle = state.circle.active.astype(int).sum() < static_env_params.num_circles
+        is_target_rect = jax.random.choice(_rngs[0], jnp.array([True, False], dtype=bool), p=target_rect_p) | (
+            ~space_for_new_rect
+        )
+        is_rect_p = jnp.array([space_for_new_rect * 1.0, space_for_new_circle * 1.0])
+        is_rect = jax.random.choice(_rngs[1], jnp.array([True, False], dtype=bool), p=is_rect_p) | (
+            ~is_target_rect & space_for_new_rect
+        )
+        shape_index = jax.lax.select(
+            is_rect,
+            jnp.argmin(state.polygon.active),
+            jnp.argmin(state.circle.active),
+        )
+        unified_shape_index = shape_index + (~is_rect) * static_env_params.num_polygons
+        vertices, half_dimensions, radius = sample_dimensions(
+            _rngs[2], static_env_params, is_rect, ued_params, max_shape_size=ued_params.max_shape_size
+        )
+        n_vertices = jax.lax.select(ued_params.generate_triangles, jax.random.choice(_rngs[3], jnp.array([3, 4])), 4)
+        rotation = jax.random.uniform(_rngs[4], shape=()) * 2 * math.pi
+        velocity = jnp.array([0.0, 0.0])
+        angular_velocity = 0.0
+        density = 1.0
+        inverse_mass = jax.lax.select(
+            is_rect,
+            calc_inverse_mass_polygon(vertices, n_vertices, static_env_params, density)[0],
+            calc_inverse_mass_circle(radius, density),
+        )
+        inverse_inertia = jax.lax.select(
+            is_rect,
+            calc_inverse_inertia_polygon(vertices, n_vertices, static_env_params, density),
+            calc_inverse_inertia_circle(radius, density),
+        )
+        # Joint
+        current_num_rjoints = (jnp.logical_not(state.joint.is_fixed_joint) * state.joint.active).sum()
+        is_rjoint = jnp.logical_or(
+            jnp.logical_or(jax.random.uniform(_rngs[5]) < 0.5, force_rjoint),
+            current_num_rjoints < ued_params.min_rjoints_bias,
+        )
+        joint_index = jnp.argmin(state.joint.active)
+        local_joint_position_rect = random_position_on_polygon(_rngs[6], vertices, n_vertices, static_env_params)
+        local_joint_position_circle = random_position_on_circle(_rngs[7], radius, on_centre_chance=1.0)
+        local_joint_position = jax.lax.select(is_rect, local_joint_position_rect, local_joint_position_circle)
+        p_rect = jax.lax.select(p_rect.sum() == 0, state.polygon.active.astype(jnp.float32), p_rect)
+        p_circle = jax.lax.select(p_circle.sum() == 0, state.circle.active.astype(jnp.float32), p_circle)
+        target_index = jax.lax.select(
+            is_target_rect,
+            jax.random.choice(
+                _rngs[8],
+                jnp.arange(static_env_params.num_polygons),
+                p=p_rect,
+            ),
+            jax.random.choice(
+                _rngs[9],
+                jnp.arange(static_env_params.num_circles),
+                p=p_circle,
+            ),
+        )
+        unified_target_index = target_index + jnp.logical_not(is_target_rect) * static_env_params.num_polygons
+        target_shape = select_shape(state, unified_target_index, static_env_params)
+        target_joint_position_rect = random_position_on_polygon(
+            _rngs[10], state.polygon.vertices[target_index], state.polygon.n_vertices[target_index], static_env_params
+        )
+        target_joint_position_circle = random_position_on_circle(
+            _rngs[11], state.circle.radius[target_index], on_centre_chance=1.0
+        )
+        target_joint_position = jax.lax.select(is_target_rect, target_joint_position_rect, target_joint_position_circle)
+        # Calculate the world position of the new shape
+        # We know the rotation of the new shape. We also know the position of the current shape, which we want to remain fixed.
+        # Set `position` such that local_joint_position is the same as `target_joint_position`
+        global_joint_pos = target_shape.position + jnp.matmul(rmat(target_shape.rotation), target_joint_position)
+        position = global_joint_pos - jnp.matmul(rmat(rotation), local_joint_position)
+        _, pos_diff = calc_inverse_mass_polygon(vertices, n_vertices, static_env_params, density)
+        position = jax.lax.select(is_rect, position + pos_diff, position)
+        local_joint_position = jax.lax.select(is_rect, local_joint_position - pos_diff, local_joint_position)
+        vertices = jax.lax.select(is_rect, vertices - pos_diff[None], vertices)
+        target_role = jax.lax.select(
+            is_target_rect, state.polygon_shape_roles[target_index], state.circle_shape_roles[target_index]
+        )
+        # We cannot have role 1 and role 2 being connected.
+        p = jnp.array([1.0, 1.0, 1.0, 1.0])
+        # If role is 0, keep all probs at 1, otherwise set the target role's complement to 0 prob
+        # 3 - role turns 1 to 2 and 2 to 1
+        # If the target role is three, we set everything to zero except for the default
+        p = jax.lax.select(
+            target_role == 0,
+            p,
+            jax.lax.select(
+                target_role <= 2,
+                p.at[3 - target_role].set(False).at[3].set(False),
+                (p.at[2].set(False).at[1].set(False)),
+            ),
+        )
+        shape_role = get_role(_rngs[12], state, static_env_params, initial_p=p)
+        # This could be either a rect or a circle
+        new_rigid_body = RigidBody(
+            position=position,
+            velocity=velocity,
+            inverse_mass=inverse_mass,
+            inverse_inertia=inverse_inertia,
+            rotation=rotation,
+            angular_velocity=angular_velocity,
+            radius=radius,
+            active=True,
+            friction=1.0,
+            vertices=vertices,
+            n_vertices=n_vertices,
+            collision_mode=1,
+            restitution=0.0,
+        )
+        # Change the shape indices such that a_index is less than b_index
+        a_index = shape_index + (1 - is_rect) * static_env_params.num_polygons
+        b_index = target_index + (1 - is_target_rect) * static_env_params.num_polygons
+        should_swap = a_index > b_index
+        a_index, b_index, local_joint_position, target_joint_position, shape_a, shape_b = jax.lax.cond(
+            should_swap,
+            lambda x: (x[1], x[0], x[3], x[2], x[5], x[4]),  # pairwise swap
+            lambda x: x,
+            (a_index, b_index, local_joint_position, target_joint_position, new_rigid_body, target_shape),
+        )
+        motor_on = jax.random.uniform(_rngs[13], shape=()) < ued_params.motor_on_chance
+        joint_colour = jax.random.randint(_rngs[14], shape=(), minval=0, maxval=static_env_params.num_motor_bindings)
+        joint_rotation = shape_b.rotation - shape_a.rotation
+        motor_speed = jax.random.uniform(
+            _rngs[15], shape=(), minval=ued_params.motor_min_speed, maxval=ued_params.motor_max_speed
+        )
+        motor_power = jax.random.uniform(
+            _rngs[16], shape=(), minval=ued_params.motor_min_power, maxval=ued_params.motor_max_power
+        )
+        wheel_power = jax.random.uniform(
+            _rngs[20], shape=(), minval=ued_params.motor_min_power, maxval=ued_params.wheel_max_power
+        )
+        # High-powered wheels break the physics engine - this is a temporary fix
+        motor_power = jax.lax.select(is_rect & is_target_rect, motor_power, wheel_power)
+        motor_has_joint_limits = jax.random.uniform(_rngs[17], shape=()) < ued_params.joint_limit_chance
+        motor_has_joint_limits &= is_rect & is_target_rect
+        joint_limit_min = (
+            jax.random.uniform(_rngs[18], shape=(), minval=-ued_params.joint_limit_max, maxval=0.0)
+            * motor_has_joint_limits
+        )
+        joint_limit_max = (
+            jax.random.uniform(_rngs[19], shape=(), minval=0.0, maxval=ued_params.joint_limit_max)
+            * motor_has_joint_limits
+        )
+        rjoint = Joint(
+            a_index=a_index,
+            b_index=b_index,
+            a_relative_pos=local_joint_position,
+            b_relative_pos=target_joint_position,
+            global_position=global_joint_pos,
+            active=True,
+            motor_speed=motor_speed,
+            motor_power=motor_power,
+            motor_on=motor_on,
+            # colour=joint_colour,
+            motor_has_joint_limits=motor_has_joint_limits,
+            min_rotation=joint_limit_min,
+            max_rotation=joint_limit_max,
+            is_fixed_joint=False,
+            rotation=0.0,
+            acc_impulse=jnp.zeros((2,), dtype=jnp.float32),
+            acc_r_impulse=jnp.zeros((), dtype=jnp.float32),
+        )
+        fjoint = Joint(
+            a_index=a_index,
+            b_index=b_index,
+            a_relative_pos=local_joint_position,
+            b_relative_pos=target_joint_position,
+            global_position=global_joint_pos,
+            active=True,
+            rotation=joint_rotation,
+            acc_impulse=jnp.zeros((2,), dtype=jnp.float32),
+            acc_r_impulse=jnp.zeros((), dtype=jnp.float32),
+            is_fixed_joint=True,
+            motor_has_joint_limits=False,
+            min_rotation=0.0,
+            max_rotation=0.0,
+            motor_on=False,
+            motor_power=0.0,
+            motor_speed=0.0,
+        )
+        state = state.replace(
+            polygon=jax.tree.map(
+                lambda x, y: jax.lax.select(is_rect, y.at[shape_index].set(x), y), new_rigid_body, state.polygon
+            ),
+            circle=jax.tree.map(
+                lambda x, y: jax.lax.select(jnp.logical_not(is_rect), y.at[shape_index].set(x), y),
+                new_rigid_body,
+                state.circle,
+            ),
+            joint=jax.tree.map(
+                lambda rj, fj, y: jax.lax.select(is_rjoint, y.at[joint_index].set(rj), y.at[joint_index].set(fj)),
+                rjoint,
+                fjoint,
+                state.joint,
+            ),
+            polygon_shape_roles=jax.lax.select(
+                is_rect,
+                state.polygon_shape_roles.at[shape_index].set(shape_role),
+                state.polygon_shape_roles,
+            ),
+            circle_shape_roles=jax.lax.select(
+                jnp.logical_not(is_rect),
+                state.circle_shape_roles.at[shape_index].set(shape_role),
+                state.circle_shape_roles,
+            ),
+            motor_bindings=state.motor_bindings.at[joint_index].set(joint_colour),
+        )
+        # We need the new collision matrix.
+        state = state.replace(collision_matrix=calculate_collision_matrix(static_env_params, state.joint))
+        state = recalculate_mass_and_inertia(state, static_env_params, state.polygon_densities, state.circle_densities)
+        # Was this a valid addition?
+        # We calculate whether (assuming the possiblity of 360 degree rotation around the joint)
+        # both shapes can be visible
+        # This is to remove the common degenerate pattern of connected shapes being fully inside each other
+        def _get_min_rect_dist(r_id, local_pos):
+            rect: RigidBody = jax.tree.map(lambda x: x[r_id], state.polygon)
+            half_width = (jnp.max(rect.vertices[:, 0]) - jnp.min(rect.vertices[:, 0])) / 2.0
+            half_height = (jnp.max(rect.vertices[:, 1]) - jnp.min(rect.vertices[:, 1])) / 2.0
+            dist_x = half_width - jnp.abs(local_pos[0])
+            dist_y = half_height - jnp.abs(local_pos[1])
+            return jnp.minimum(dist_x, dist_y)
+        def _get_max_rect_dist(r_id, local_pos):
+            rect: RigidBody = jax.tree.map(lambda x: x[r_id], state.polygon)
+            half_width = (jnp.max(rect.vertices[:, 0]) - jnp.min(rect.vertices[:, 0])) / 2.0
+            half_height = (jnp.max(rect.vertices[:, 1]) - jnp.min(rect.vertices[:, 1])) / 2.0
+            dist_x = jnp.maximum(
+                jnp.abs(half_width - local_pos[0]),
+                jnp.abs(-half_width - local_pos[0]),
+            )
+            dist_y = jnp.maximum(
+                jnp.abs(half_height - local_pos[1]),
+                jnp.abs(-half_height - local_pos[1]),
+            )
+            return jnp.sqrt(dist_x * dist_x + dist_y * dist_y)
+        def are_both_shapes_showing(idx1, idx2, local_pos1, local_pos2):
+            def _is_small_shape_showing(small_idx, big_idx, small_local_pos, big_local_pos):
+                small_is_poly = small_idx < static_env_params.num_polygons
+                big_is_poly = big_idx < static_env_params.num_polygons
+                # CC
+                cc_result = False
+                # CR
+                cr_r_dist = _get_min_rect_dist(big_idx, big_local_pos)
+                cr_result = (
+                    cr_r_dist + ued_params.connect_visibility_min
+                    < state.circle.radius[small_idx - static_env_params.num_polygons]
+                )
+                # RC
+                rc_r_dist = _get_max_rect_dist(small_idx, small_local_pos)
+                rc_result = (
+                    rc_r_dist
+                    > state.circle.radius[big_idx - static_env_params.num_polygons] + ued_params.connect_visibility_min
+                )
+                # RR
+                rr_small_dist = _get_max_rect_dist(small_idx, small_local_pos)
+                rr_big_dist = _get_min_rect_dist(big_idx, big_local_pos)
+                rr_result = rr_small_dist > rr_big_dist + ued_params.connect_visibility_min
+                # Select
+                return jax.lax.select(
+                    small_is_poly,
+                    jax.lax.select(big_is_poly, rr_result, rc_result),
+                    jax.lax.select(big_is_poly, cr_result, cc_result),
+                )
+            # Are both shapes showing?
+            return _is_small_shape_showing(idx1, idx2, local_pos1, local_pos2) & _is_small_shape_showing(
+                idx2, idx1, local_pos2, local_pos1
+            )
+        valid = are_both_shapes_showing(
+            unified_shape_index, unified_target_index, local_joint_position, target_joint_position
+        )
+        return state, valid
+    # To add a connected shape, we must have both at least one existing shape and space
+    return jax.lax.cond(
+        is_space_for_shape(state) & are_there_shapes_present(state, static_env_params) & is_space_for_joint(state),
+        do_add,
+        do_dummy,
+        rng,
+        state,
+    )
+@partial(jax.jit, static_argnums=(3, 4))
+def mutate_add_connected_shape_proper(
+    rng,
+    state: EnvState,
+    params: EnvParams,
+    static_env_params: StaticEnvParams,
+    ued_params: UEDParams,
+    force_rjoint: bool = False,
+):
+    return mutate_add_connected_shape(rng, state, params, static_env_params, ued_params, force_rjoint=force_rjoint)[0]
+@partial(jax.jit, static_argnums=(3, 4))
+def mutate_remove_shape(
+    rng, state: EnvState, params: EnvParams, static_env_params: StaticEnvParams, ued_params: UEDParams
+):
+    can_remove_mask = (
+        jnp.concatenate([state.polygon.active, state.circle.active])
+        .at[: static_env_params.num_static_fixated_polys]
+        .set(False)
+    )
+    def dummy(rng, state):
+        return state
+    def do_remove(rng, state: EnvState):
+        rng, _rng = jax.random.split(rng)
+        rngs = jax.random.split(_rng, 2)
+        p = can_remove_mask.astype(jnp.float32)
+        index_to_remove = jax.random.choice(rngs[0], jnp.arange(can_remove_mask.shape[0]), p=p)
+        is_rect = index_to_remove < static_env_params.num_polygons
+        state = state.replace(
+            polygon=state.polygon.replace(
+                active=jax.lax.select(
+                    is_rect, state.polygon.active.at[index_to_remove].set(False), state.polygon.active
+                )
+            ),
+            circle=state.circle.replace(
+                active=jax.lax.select(
+                    jnp.logical_not(is_rect),
+                    state.circle.active.at[index_to_remove - static_env_params.num_polygons].set(False),
+                    state.circle.active,
+                )
+            ),
+        )
+        # We need to now remove any joints connected to this shape
+        joints_to_remove = (state.joint.a_index == index_to_remove) | (state.joint.b_index == index_to_remove)
+        thrusters_to_remove = state.thruster.object_index == index_to_remove
+        state = state.replace(
+            joint=state.joint.replace(active=jnp.where(joints_to_remove, False, state.joint.active)),
+            thruster=state.thruster.replace(active=jnp.where(thrusters_to_remove, False, state.thruster.active)),
+        )
+        # Now recalculate collision matrix
+        state = state.replace(collision_matrix=calculate_collision_matrix(static_env_params, state.joint))
+        return state
+    return jax.lax.cond(can_remove_mask.sum() > 0, do_remove, dummy, rng, state)
+@partial(jax.jit, static_argnums=(3, 4))
+def mutate_remove_joint(
+    rng, state: EnvState, params: EnvParams, static_env_params: StaticEnvParams, ued_params: UEDParams
+):
+    can_remove_mask = state.joint.active
+    def dummy(rng, state):
+        return state
+    def do_remove(rng, state):
+        rng, _rng = jax.random.split(rng)
+        rngs = jax.random.split(_rng, 2)
+        p = can_remove_mask.astype(jnp.float32)
+        index_to_remove = jax.random.choice(rngs[0], jnp.arange(can_remove_mask.shape[0]), p=p)
+        state = state.replace(joint=state.joint.replace(active=state.joint.active.at[index_to_remove].set(False)))
+        # Recalculate collision matrix.
+        state = state.replace(collision_matrix=calculate_collision_matrix(static_env_params, state.joint))
+        return state
+    return jax.lax.cond(can_remove_mask.sum() > 0, do_remove, dummy, rng, state)
+@partial(jax.jit, static_argnums=(3, 4))
+def mutate_swap_role(
+    rng, state: EnvState, params: EnvParams, static_env_params: StaticEnvParams, ued_params: UEDParams
+):
+    def _cr(*args):
+        return count_roles(*args, include_static_polys=False)
+    role_counts = jax.vmap(_cr, (None, None, 0))(state, static_env_params, jnp.arange(4))
+    are_there_multiple_roles = (role_counts > 0).sum() > 1
+    def dummy(rng, state):
+        return state
+    def do_swap(rng, state):
+        rng, _rng = jax.random.split(rng)
+        rngs = jax.random.split(_rng, 2)
+        all_roles = jnp.concatenate([state.polygon_shape_roles, state.circle_shape_roles])
+        p = (
+            (jnp.concatenate([state.polygon.active, state.circle.active]))
+            .astype(jnp.float32)
+            .at[: static_env_params.num_static_fixated_polys]
+            .set(0.0)
+        )
+        shape_idx_a = jax.random.choice(
+            rngs[0], jnp.arange(static_env_params.num_polygons + static_env_params.num_circles), p=p
+        )
+        role_a = all_roles[shape_idx_a]
+        p = jnp.where(all_roles == role_a, 0.0, p)
+        shape_idx_b = jax.random.choice(
+            rngs[1], jnp.arange(static_env_params.num_polygons + static_env_params.num_circles), p=p
+        )
+        role_b = all_roles[shape_idx_b]
+        role_a, role_b = role_b, role_a
+        for idx, role in [(shape_idx_a, role_a), (shape_idx_b, role_b)]:
+            is_rect = idx < static_env_params.num_polygons
+            state = state.replace(
+                polygon_shape_roles=jax.lax.select(
+                    is_rect, state.polygon_shape_roles.at[idx].set(role), state.polygon_shape_roles
+                ),
+                circle_shape_roles=jax.lax.select(
+                    jnp.logical_not(is_rect),
+                    state.circle_shape_roles.at[idx - static_env_params.num_polygons].set(role),
+                    state.circle_shape_roles,
+                ),
+            )
+        return state
+    return jax.lax.cond(are_there_multiple_roles, do_swap, dummy, rng, state)
+@partial(jax.jit, static_argnums=(3, 4))
+def mutate_toggle_fixture(
+    rng, state: EnvState, params: EnvParams, static_env_params: StaticEnvParams, ued_params: UEDParams
+):
+    can_toggle_mask = (
+        jnp.concatenate([state.polygon.active, state.circle.active])
+        .at[: static_env_params.num_static_fixated_polys]
+        .set(False)
+    )
+    def dummy(rng, state):
+        return state
+    def do_toggle(rng, state: EnvState):
+        rng, _rng = jax.random.split(rng)
+        rngs = jax.random.split(_rng, 2)
+        p = can_toggle_mask.astype(jnp.float32)
+        index_to_remove = jax.random.choice(rngs[0], jnp.arange(can_toggle_mask.shape[0]), p=p)
+        is_rect = index_to_remove < static_env_params.num_polygons
+        is_current_fixed = (
+            jax.lax.select(
+                is_rect,
+                state.polygon.inverse_inertia[index_to_remove],
+                state.circle.inverse_inertia[index_to_remove - static_env_params.num_polygons],
+            )
+            == 0.0
+        )
+        is_current_fixed = is_current_fixed * 1.0  # if it is fixed, we set it to 1.0 and recalc.
+        # If it is not fixed, this is 0.0, and it makes it fixed.
+        state = state.replace(
+            polygon=state.polygon.replace(
+                inverse_inertia=jax.lax.select(
+                    is_rect,
+                    state.polygon.inverse_inertia.at[index_to_remove].set(is_current_fixed),
+                    state.polygon.inverse_inertia,
+                ),
+                inverse_mass=jax.lax.select(
+                    is_rect,
+                    state.polygon.inverse_mass.at[index_to_remove].set(is_current_fixed),
+                    state.polygon.inverse_mass,
+                ),
+            ),
+            circle=state.circle.replace(
+                inverse_inertia=jax.lax.select(
+                    jnp.logical_not(is_rect),
+                    state.circle.inverse_inertia.at[index_to_remove - static_env_params.num_polygons].set(
+                        is_current_fixed
+                    ),
+                    state.circle.inverse_inertia,
+                ),
+                inverse_mass=jax.lax.select(
+                    jnp.logical_not(is_rect),
+                    state.circle.inverse_mass.at[index_to_remove - static_env_params.num_polygons].set(
+                        is_current_fixed
+                    ),
+                    state.circle.inverse_mass,
+                ),
+            ),
+        )
+        state = recalculate_mass_and_inertia(state, static_env_params, state.polygon_densities, state.circle_densities)
+        return state
+    return jax.lax.cond(can_toggle_mask.sum() > 0, do_toggle, dummy, rng, state)
+@partial(jax.jit, static_argnums=(3, 4))
+def mutate_add_thruster(
+    rng, state: EnvState, params: EnvParams, static_env_params: StaticEnvParams, ued_params: UEDParams
+):
+    is_fixated = jnp.concatenate([state.polygon.inverse_mass == 0, state.circle.inverse_mass == 0])
+    # is_fixated = jnp.zeros_like(is_fixated, dtype=bool)
+    is_active = jnp.concatenate([state.polygon.active, state.circle.active])
+    can_add_mask = is_active & (~is_fixated)
+    can_add_mask = jnp.logical_and(is_active, jnp.logical_not(is_fixated))
+    def dummy(rng, state):
+        return state
+    def do_add(rng, state: EnvState):
+        rng, _rng = jax.random.split(rng)
+        _rngs = jax.random.split(_rng, 10)
+        p = can_add_mask.astype(jnp.float32)
+        shape_index = jax.random.choice(_rngs[0], jnp.arange(can_add_mask.shape[0]), p=p)
+        thruster_idx = jnp.argmin(state.thruster.active)
+        shape = select_shape(state, shape_index, static_env_params)
+        position_to_add_thruster = jax.lax.select(
+            shape_index < static_env_params.num_polygons,
+            random_position_on_polygon(_rngs[1], shape.vertices, shape.n_vertices, static_env_params),
+            random_position_on_circle(_rngs[2], shape.radius, on_centre_chance=0.0),
+        )
+        direction_to_com = ((jax.random.uniform(_rngs[3]) > 0.5) * 2 - 1) * position_to_add_thruster
+        direction_to_com = jax.lax.select(
+            jnp.linalg.norm(direction_to_com) == 0.0, jnp.array([1.0, 0.0]), direction_to_com
+        )
+        thruster_angle = jax.lax.select(
+            jax.random.uniform(_rngs[4]) < ued_params.thruster_align_com_prob,
+            jnp.atan2(direction_to_com[1], direction_to_com[0]),  # test this
+            jax.random.uniform(
+                _rngs[5],
+                (),
+            )
+            * 2
+            * jnp.pi,
+        )
+        thruster_power = jax.random.uniform(_rngs[6]) * 1.5 + 0.5
+        thruster = Thruster(
+            object_index=shape_index,
+            active=True,
+            relative_position=position_to_add_thruster,  # jnp.array([0.0, 0.0]),  # a bit of a hack but reasonable.
+            rotation=thruster_angle,  # jax.random.choice(rngs[1], jnp.arange(4) * jnp.pi / 2),
+            power=1.0
+            / jax.lax.select(shape.inverse_mass == 0, 1.0, shape.inverse_mass)
+            * ued_params.thruster_power_multiplier
+            * thruster_power,
+            global_position=shape.position + jnp.matmul(rmat(shape.rotation), position_to_add_thruster),
+        )
+        thruster_colour = jax.random.randint(
+            _rngs[7], shape=(), minval=0, maxval=static_env_params.num_thruster_bindings
+        )
+        state = state.replace(
+            thruster=jax.tree_map(lambda y, x: y.at[thruster_idx].set(x), state.thruster, thruster),
+            thruster_bindings=state.thruster_bindings.at[thruster_idx].set(thruster_colour),
+        )
+        return state
+    return jax.lax.cond(
+        jnp.logical_and((can_add_mask.sum() > 0), (jnp.logical_not(state.thruster.active).sum() > 0)),
+        do_add,
+        dummy,
+        rng,
+        state,
+    )
+@partial(jax.jit, static_argnums=(3, 4))
+def mutate_change_gravity(
+    rng, state: EnvState, params: EnvParams, static_env_params: StaticEnvParams, ued_params: UEDParams
+):
+    rng, _rng = jax.random.split(rng)
+    rngs = jax.random.split(_rng, 2)
+    new_gravity = jax.lax.select(
+        jax.random.uniform(rngs[0]) < 0.5,
+        jnp.array([0.0, -9.8]),
+        jnp.array([0.0, jax.random.uniform(rngs[1], minval=-9.8, maxval=0)]),
+    )
+    return state.replace(gravity=new_gravity)
+@partial(jax.jit, static_argnums=(3, 4))
+def mutate_remove_thruster(
+    rng, state: EnvState, params: EnvParams, static_env_params: StaticEnvParams, ued_params: UEDParams
+):
+    are_there_thrusters = state.thruster.active
+    def dummy(rng, state):
+        return state
+    def do_remove(rng, state):
+        rng, _rng = jax.random.split(rng)
+        rngs = jax.random.split(_rng, 2)
+        p = are_there_thrusters.astype(jnp.float32)
+        thruster_idx = jax.random.choice(rngs[0], jnp.arange(are_there_thrusters.shape[0]), p=p)
+        return state.replace(thruster=state.thruster.replace(active=state.thruster.active.at[thruster_idx].set(False)))
+    return jax.lax.cond(are_there_thrusters.sum() > 0, do_remove, dummy, rng, state)
+def make_mutate_change_shape_size(params, static_env_params):
+    do_dummy_step = make_do_dummy_step(params, static_env_params)
+    @partial(jax.jit, static_argnums=(3, 4))
+    def mutate_change_shape_size(
+        rng, state: EnvState, params: EnvParams, static_env_params: StaticEnvParams, ued_params: UEDParams
+    ):
+        shape_active = jnp.concatenate(
+            [state.polygon.active.at[: static_env_params.num_static_fixated_polys].set(False), state.circle.active]
+        )
+        def dummy(rng, state):
+            return state
+        def do_change(rng, state):
+            rng, _rng = jax.random.split(rng)
+            rngs = jax.random.split(_rng, 10)
+            p = shape_active.astype(jnp.float32)
+            shape_idx = jax.random.choice(rngs[0], jnp.arange(shape_active.shape[0]), p=p)
+            is_rect = shape_idx < static_env_params.num_polygons
+            vertices, _, radius = sample_dimensions(
+                rngs[1], static_env_params, is_rect, ued_params, max_shape_size=ued_params.max_shape_size
+            )
+            idx_new_top_left = jnp.argmin(vertices[:, 0] * 100 + vertices[:, 1])
+            idx_old_top_left = jnp.argmin(
+                state.polygon.vertices[shape_idx, :, 0] * 100 + state.polygon.vertices[shape_idx, :, 1]
+            )
+            scale_rect = (vertices[idx_new_top_left]) / (state.polygon.vertices[shape_idx, idx_old_top_left])
+            scale_circle = radius / state.circle.radius[shape_idx - static_env_params.num_polygons]
+            vertices = state.polygon.vertices[shape_idx] * scale_rect
+            scale = jax.lax.select(
+                is_rect,
+                scale_rect,
+                jnp.array([scale_circle, scale_circle]),
+            )
+            is_a = ((state.joint.a_index == shape_idx) & state.joint.active)[:, None]
+            is_b = ((state.joint.b_index == shape_idx) & state.joint.active)[:, None]
+            state = state.replace(
+                joint=state.joint.replace(
+                    a_relative_pos=(state.joint.a_relative_pos * scale[None]) * is_a
+                    + (1 - is_a) * state.joint.a_relative_pos,
+                    b_relative_pos=(state.joint.b_relative_pos * scale[None]) * is_b
+                    + (1 - is_b) * state.joint.b_relative_pos,
+                ),
+                polygon=state.polygon.replace(
+                    vertices=jax.lax.select(
+                        is_rect, state.polygon.vertices.at[shape_idx].set(vertices), state.polygon.vertices
+                    ),
+                ),
+                circle=state.circle.replace(
+                    radius=jax.lax.select(
+                        jnp.logical_not(is_rect),
+                        state.circle.radius.at[shape_idx - static_env_params.num_polygons].set(radius),
+                        state.circle.radius,
+                    )
+                ),
+            )
+            def _ss(state, _):
+                return do_dummy_step(state), None
+            state = jax.lax.scan(_ss, state, jnp.arange(5))[0]
+            return recalculate_mass_and_inertia(
+                state, static_env_params, state.polygon_densities, state.circle_densities
+            )
+        return jax.lax.cond(shape_active.sum() > 0, do_change, dummy, rng, state)
+    return mutate_change_shape_size
+@partial(jax.jit, static_argnums=(3, 4))
+def mutate_change_shape_location(
+    rng, state: EnvState, params: EnvParams, static_env_params: StaticEnvParams, ued_params: UEDParams
+):
+    shape_active = jnp.concatenate(
+        [state.polygon.active.at[: static_env_params.num_static_fixated_polys].set(False), state.circle.active]
+    )
+    def dummy(rng, state):
+        return state
+    def do_change(rng, state):
+        rng, _rng = jax.random.split(rng)
+        rngs = jax.random.split(_rng, 10)
+        p = shape_active.astype(jnp.float32)
+        shape_idx = jax.random.choice(rngs[0], jnp.arange(shape_active.shape[0]), p=p)
+        delta_pos = jax.random.uniform(rngs[1], shape=(2,)) - 0.5  # [-0.5, 0.5]
+        positions = jnp.concatenate([state.polygon.position, state.circle.position])
+        mask_of_shape_locations_to_change = (
+            (state.collision_matrix[shape_idx] == 0).at[: static_env_params.num_static_fixated_polys].set(False)
+        )
+        # check the new positions, but then maybe revert if any shape becomes out of bounds now.
+        new_positions_tentative = positions * (
+            1 - mask_of_shape_locations_to_change[:, None]
+        ) + mask_of_shape_locations_to_change[:, None] * (positions + delta_pos[None])
+        polys = state.polygon
+        p_pos = new_positions_tentative[: static_env_params.num_polygons]
+        c_pos = new_positions_tentative[static_env_params.num_polygons :]  # state.circle.position
+        rad = state.circle.radius
+        rect_vertex_mask = jnp.arange(static_env_params.max_polygon_vertices)[None] < polys.n_vertices[:, None]
+        rect_mask = polys.active.at[: static_env_params.num_static_fixated_polys].set(False)
+        circ_mask = state.circle.active
+        # check if new pos maybe goes out of bounds:
+        min_x, max_x, min_y, max_y = (
+            jnp.minimum(
+                jnp.min(
+                    p_pos[:, 0] + jnp.min(polys.vertices[:, :, 0], where=rect_vertex_mask, initial=0, axis=1),
+                    where=rect_mask,
+                    initial=jnp.inf,
+                ),
+                jnp.min(c_pos[:, 0] - rad, where=circ_mask, initial=jnp.inf),
+            ),
+            jnp.maximum(
+                jnp.max(
+                    p_pos[:, 0] + jnp.max(polys.vertices[:, :, 0], where=rect_vertex_mask, initial=0, axis=1),
+                    where=rect_mask,
+                    initial=-jnp.inf,
+                ),
+                jnp.max(c_pos[:, 0] + rad, where=circ_mask, initial=-jnp.inf),
+            ),
+            jnp.minimum(
+                jnp.min(
+                    p_pos[:, 1] + jnp.min(polys.vertices[:, :, 1], where=rect_vertex_mask, initial=0, axis=1),
+                    where=rect_mask,
+                    initial=jnp.inf,
+                ),
+                jnp.min(c_pos[:, 1] - rad, where=circ_mask, initial=jnp.inf),
+            ),
+            jnp.maximum(
+                jnp.max(
+                    p_pos[:, 1] + jnp.max(polys.vertices[:, :, 1], where=rect_vertex_mask, initial=0, axis=1),
+                    where=rect_mask,
+                    initial=-jnp.inf,
+                ),
+                jnp.max(c_pos[:, 1] + rad, where=circ_mask, initial=-jnp.inf),
+            ),
+        )
+        how_much_oob_x_left = jnp.maximum(0, 0 - min_x)
+        how_much_oob_x_right = jnp.maximum(0, max_x - static_env_params.screen_dim[0] / params.pixels_per_unit)
+        how_much_oob_y_down = jnp.maximum(0, 0.4 - min_y)  # this is for the floor
+        how_much_oob_y_up = jnp.maximum(0, max_y - static_env_params.screen_dim[1] / params.pixels_per_unit)
+        # correct by out of bounds factor
+        positions = (
+            new_positions_tentative
+            + jnp.array(
+                [
+                    how_much_oob_x_left - how_much_oob_x_right,
+                    how_much_oob_y_down - how_much_oob_y_up,
+                ]
+            )[None]
+            * mask_of_shape_locations_to_change[:, None]
+        )
+        state = state.replace(
+            polygon=state.polygon.replace(
+                position=positions[: static_env_params.num_polygons],
+            ),
+            circle=state.circle.replace(
+                position=positions[static_env_params.num_polygons :],
+            ),
+        )
+        return recalculate_mass_and_inertia(state, static_env_params, state.polygon_densities, state.circle_densities)
+    return jax.lax.cond(shape_active.sum() > 0, do_change, dummy, rng, state)
+def make_mutate_change_shape_rotation(params, static_env_params):
+    do_dummy_step = make_do_dummy_step(params, static_env_params)
+    @partial(jax.jit, static_argnums=(3, 4))
+    def mutate_change_shape_rotation(
+        rng, state: EnvState, params: EnvParams, static_env_params: StaticEnvParams, ued_params: UEDParams
+    ):
+        shape_active = jnp.concatenate(
+            [state.polygon.active.at[: static_env_params.num_static_fixated_polys].set(False), state.circle.active]
+        )
+        def dummy(rng, state):
+            return state
+        def do_change(rng, state):
+            rng, _rng = jax.random.split(rng)
+            rngs = jax.random.split(_rng, 10)
+            p = shape_active.astype(jnp.float32)
+            shape_idx = jax.random.choice(rngs[0], jnp.arange(shape_active.shape[0]), p=p)
+            is_rect = shape_idx < static_env_params.num_polygons
+            rotation_delta = jax.random.uniform(rngs[1], shape=()) * math.pi / 2
+            has_fixed_joint_a = (state.joint.a_index == shape_idx) & state.joint.is_fixed_joint & state.joint.active
+            has_fixed_joint_b = (state.joint.b_index == shape_idx) & state.joint.is_fixed_joint & state.joint.active
+            state = state.replace(
+                joint=state.joint.replace(
+                    rotation=jax.lax.select(
+                        has_fixed_joint_a,
+                        state.joint.rotation - rotation_delta,
+                        jax.lax.select(
+                            has_fixed_joint_b,
+                            state.joint.rotation + rotation_delta,
+                            state.joint.rotation,
+                        ),
+                    )
+                ),
+                polygon=state.polygon.replace(
+                    rotation=jax.lax.select(
+                        is_rect, state.polygon.rotation.at[shape_idx].add(rotation_delta), state.polygon.rotation
+                    ),
+                ),
+                circle=state.circle.replace(
+                    rotation=jax.lax.select(
+                        jnp.logical_not(is_rect),
+                        state.circle.rotation.at[shape_idx - static_env_params.num_polygons].add(rotation_delta),
+                        state.circle.rotation,
+                    )
+                ),
+            )
+            def _ss(state, _):
+                return do_dummy_step(state), None
+            state = jax.lax.scan(_ss, state, jnp.arange(5))[0]
+            return recalculate_mass_and_inertia(
+                state, static_env_params, state.polygon_densities, state.circle_densities
+            )
+        return jax.lax.cond(shape_active.sum() > 0, do_change, dummy, rng, state)
+    return mutate_change_shape_rotation

kinetix/environment/ued/ued.py ADDED Viewed

	@@ -0,0 +1,249 @@

+from functools import partial
+import math
+import os
+import chex
+import jax
+import jax.numpy as jnp
+from flax.serialization import to_state_dict
+from jax2d.engine import (
+    calculate_collision_matrix,
+    calc_inverse_mass_polygon,
+    calc_inverse_mass_circle,
+    calc_inverse_inertia_circle,
+    calc_inverse_inertia_polygon,
+    recalculate_mass_and_inertia,
+    select_shape,
+    PhysicsEngine,
+)
+from jax2d.sim_state import SimState, RigidBody, Joint, Thruster
+from jax2d.maths import rmat
+from kinetix.environment.env_state import EnvParams, EnvState, StaticEnvParams
+from kinetix.environment.ued.distributions import (
+    create_vmapped_filtered_distribution,
+    sample_kinetix_level,
+)
+from kinetix.environment.ued.mutators import (
+    make_mutate_change_shape_rotation,
+    make_mutate_change_shape_size,
+    mutate_add_connected_shape_proper,
+    mutate_add_shape,
+    mutate_add_connected_shape,
+    mutate_change_shape_location,
+    mutate_remove_joint,
+    mutate_remove_shape,
+    mutate_swap_role,
+    mutate_toggle_fixture,
+    mutate_add_thruster,
+    mutate_remove_thruster,
+    mutate_change_gravity,
+)
+from kinetix.environment.ued.ued_state import UEDParams
+from kinetix.environment.utils import permute_pcg_state
+from kinetix.pcg.pcg import env_state_to_pcg_state, sample_pcg_state
+from kinetix.util.config import generate_ued_params_from_config, generate_params_from_config
+from kinetix.util.saving import get_pcg_state_from_json, load_pcg_state_pickle, load_world_state_pickle, stack_list_of_pytrees, expand_env_state
+from flax import struct
+from kinetix.environment.env import create_empty_env
+from kinetix.util.learning import BASE_DIR, general_eval, get_eval_levels
+def make_mutate_env(static_env_params: StaticEnvParams, params: EnvParams, ued_params: UEDParams):
+    mutate_size = make_mutate_change_shape_size(params, static_env_params)
+    mutate_rot = make_mutate_change_shape_rotation(params, static_env_params)
+    def mutate_level(rng, level: EnvState, n=1):
+        def inner(carry: tuple[chex.PRNGKey, EnvState], _):
+            rng, level = carry
+            rng, _rng, _rng2 = jax.random.split(rng, 3)
+            any_rects_left = jnp.logical_not(level.polygon.active).sum() > 0
+            any_circles_left = jnp.logical_not(level.circle.active).sum() > 0
+            any_joints_left = jnp.logical_not(level.joint.active).sum() > 0
+            any_thrust_left = jnp.logical_not(level.thruster.active).sum() > 0
+            has_any_thursters = level.thruster.active.sum() > 0
+            can_do_add_shape = any_rects_left | any_circles_left
+            can_do_add_joint = can_do_add_shape & any_joints_left
+            all_mutations = [
+                mutate_add_shape,
+                mutate_add_connected_shape_proper,
+                mutate_remove_joint,
+                mutate_remove_shape,
+                mutate_swap_role,
+                mutate_add_thruster,
+                mutate_remove_thruster,
+                mutate_toggle_fixture,
+                mutate_size,
+                mutate_change_shape_location,
+                mutate_rot,
+            ]
+            def mypartial(f):
+                def inner(rng, level):
+                    return f(rng, level, params, static_env_params, ued_params)
+                return inner
+            probs = jnp.array(
+                [
+                    can_do_add_shape * 1.0,
+                    can_do_add_joint * 1.0,
+                    0.0,
+                    0.0,
+                    1.0,
+                    any_thrust_left * 1.0,
+                    has_any_thursters * 1.0,
+                    0.1,
+                    1.0,
+                    1.0,
+                    1.0,
+                ]
+            )
+            all_mutations = [mypartial(i) for i in all_mutations]
+            index = jax.random.choice(_rng, jnp.arange(len(all_mutations)), (), p=probs)
+            level = jax.lax.switch(index, all_mutations, _rng2, level)
+            return (rng, level), None
+        (_, level), _ = jax.lax.scan(inner, (rng, level), None, length=n)
+        return level
+    return mutate_level
+def make_create_eval_env():
+    eval_level1 = load_world_state_pickle("worlds/eval/eval_0610_car1")
+    eval_level2 = load_world_state_pickle("worlds/eval/eval_0610_car2")
+    eval_level3 = load_world_state_pickle("worlds/eval/eval_0628_ball_left")
+    eval_level4 = load_world_state_pickle("worlds/eval/eval_0628_ball_right")
+    eval_level5 = load_world_state_pickle("worlds/eval/eval_0628_hard_car_obstacle")
+    eval_level6 = load_world_state_pickle("worlds/eval/eval_0628_swingup")
+    def _create_eval_env(rng, env_params, static_env_params, index):
+        return jax.lax.switch(
+            index,
+            [
+                lambda: eval_level1,
+                lambda: eval_level2,
+                lambda: eval_level3,
+                lambda: eval_level4,
+                lambda: eval_level5,
+                lambda: eval_level6,
+            ],
+        )
+        return jax.tree.map(lambda x, y: jax.lax.select(index == 0, x, y), eval_level1, eval_level2)
+    return _create_eval_env
+def make_reset_train_function_with_mutations(
+    engine: PhysicsEngine, env_params: EnvParams, static_env_params: StaticEnvParams, config, make_pcg_state=True
+):
+    ued_params = generate_ued_params_from_config(config)
+    def reset(rng):
+        inner = sample_kinetix_level(
+            rng, engine, env_params, static_env_params, ued_params, env_size_name=config["env_size_name"]
+        )
+        if make_pcg_state:
+            return env_state_to_pcg_state(inner)
+        else:
+            return inner
+    return reset
+def make_vmapped_filtered_level_sampler(
+    level_sampler, env_params: EnvParams, static_env_params: StaticEnvParams, config, make_pcg_state, env
+):
+    ued_params = generate_ued_params_from_config(config)
+    def reset(rng, n_samples):
+        inner = create_vmapped_filtered_distribution(
+            rng,
+            level_sampler,
+            env_params,
+            static_env_params,
+            ued_params,
+            n_samples,
+            env,
+            config["filter_levels"],
+            config["level_filter_sample_ratio"],
+            config["env_size_name"],
+            config["level_filter_n_steps"],
+        )
+        if make_pcg_state:
+            return env_state_to_pcg_state(inner)
+        else:
+            return inner
+    return reset
+def make_reset_train_function_with_list_of_levels(config, levels, static_env_params, make_pcg_state=True,
+                                                  is_loading_train_levels=False):
+    assert len(levels) > 0, "Need to provide at least one level to train on"
+    if config["load_train_levels_legacy"]:
+        ls = [get_pcg_state_from_json(os.path.join(BASE_DIR, l + ("" if l.endswith(".json") else ".json"))) for l in levels]
+        v = stack_list_of_pytrees(ls)
+    elif is_loading_train_levels:
+        v = get_eval_levels(levels, static_env_params)
+    else:
+        _, static_env_params = generate_params_from_config(
+            config["eval_env_size_true"] | {"frame_skip": config["frame_skip"]}
+        )
+        v = get_eval_levels(levels, static_env_params)
+    def reset(rng):
+        rng, _rng, _rng2 = jax.random.split(rng, 3)
+        idx = jax.random.randint(_rng, (), 0, len(levels))
+        state_to_return = jax.tree.map(lambda x: x[idx], v)
+        if config["permute_state_during_training"]:
+            state_to_return = permute_pcg_state(rng, state_to_return, static_env_params)
+        if not make_pcg_state:
+            state_to_return = sample_pcg_state(_rng2, state_to_return, params=None, static_params=static_env_params)
+        return state_to_return
+    return reset
+ALL_MUTATION_FNS = [
+    mutate_add_shape,
+    mutate_add_connected_shape,
+    mutate_remove_joint,
+    mutate_swap_role,
+    mutate_toggle_fixture,
+    mutate_add_thruster,
+    mutate_remove_thruster,
+    mutate_remove_shape,
+    mutate_change_gravity,
+]
+def test_ued():
+    from kinetix.environment.env import create_empty_env
+    env_params = EnvParams()
+    static_env_params = StaticEnvParams()
+    ued_params = UEDParams()
+    rng = jax.random.PRNGKey(0)
+    rng, _rng = jax.random.split(rng)
+    state = create_empty_env(env_params, static_env_params)
+    state = mutate_add_shape(_rng, state, env_params, static_env_params, ued_params)
+    state = mutate_add_connected_shape(_rng, state, env_params, static_env_params, ued_params)
+    state = mutate_remove_shape(_rng, state, env_params, static_env_params, ued_params)
+    state = mutate_remove_joint(_rng, state, env_params, static_env_params, ued_params)
+    state = mutate_swap_role(_rng, state, env_params, static_env_params, ued_params)
+    state = mutate_toggle_fixture(_rng, state, env_params, static_env_params, ued_params)
+    print("Successfully did this")
+if __name__ == "__main__":
+    test_ued()

kinetix/environment/ued/ued_state.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import math
+from flax import struct
+@struct.dataclass
+class UEDParams:
+    max_shape_size: float = 1.0
+    goal_body_opposide_side_chance: float = 0.5
+    goal_body_size_factor: float = 1.0
+    min_rjoints_bias: int = 2
+    large_rect_dim_chance: float = 0.3
+    large_rect_dim_scale: float = 2.0
+    generate_triangles: bool = False
+    thruster_power_multiplier: float = 2.0
+    thruster_align_com_prob: float = 0.8
+    motor_on_chance: float = 0.8
+    motor_min_speed: float = 0.4
+    motor_max_speed: float = 3.0
+    motor_min_power: float = 1.0
+    motor_max_power: float = 3.0
+    wheel_max_power: float = 1.0
+    joint_limit_chance: float = 0.4
+    joint_limit_max: float = math.pi
+    joint_fixed_chance: float = 0.1
+    fixate_chance_min: float = 0.02
+    fixate_chance_max: float = 1.0
+    fixate_chance_scale: float = 4.0  # Fixation probability scales with size
+    fixate_shape_bottom_bias: float = 0.0
+    fixate_shape_bottom_bias_special_role: float = 0.6
+    circle_max_size_coeff: float = 0.8
+    connect_to_fixated_prob_coeff: float = 0.05
+    connect_visibility_min: float = 0.05
+    connect_no_visibility_bias: float = 10.0
+    add_shape_chance: float = 0.35
+    add_connected_shape_chance: float = 0.35
+    add_no_shape_chance: float = 0.3
+    add_thruster_chance: float = 0.3
+    add_shape_n_proposals: int = 8
+    floor_prob_normal: float = 0.9
+    floor_prob_green: float = 0.0
+    floor_prob_blue: float = 0.02
+    floor_prob_red: float = 0.08

kinetix/environment/ued/util.py ADDED Viewed

	@@ -0,0 +1,358 @@

+import math
+from functools import partial
+import jax
+import jax.numpy as jnp
+from jax2d.engine import PhysicsEngine, calculate_collision_matrix, recalculate_mass_and_inertia, select_shape
+from jax2d.sim_state import RigidBody, Thruster
+from kinetix.environment.env_state import EnvParams, EnvState, StaticEnvParams
+def sample_dimensions(rng, static_env_params: StaticEnvParams, is_rect: bool, ued_params, max_shape_size=None):
+    if max_shape_size is None:
+        max_shape_size = static_env_params.max_shape_size
+    # Returns (half_dimensions, radius)
+    rng, _rng = jax.random.split(rng)
+    # Don't want overly small shapes
+    min_rect_size = 0.05
+    min_circle_size = 0.1
+    cap_rect = max_shape_size / 2.0 / jnp.sqrt(2.0)
+    cap_circ = max_shape_size / 2.0 * ued_params.circle_max_size_coeff
+    half_dimensions = (
+        jax.lax.select(is_rect, jax.random.uniform(_rng, shape=(2,)), jnp.zeros(2, dtype=jnp.float32))
+        * (cap_rect - min_rect_size)
+        + min_rect_size
+    )
+    rng, _rng, __rng = jax.random.split(rng, 3)
+    dim_scale = (
+        jnp.ones(2)
+        .at[jax.random.randint(_rng, shape=(), minval=0, maxval=2)]
+        .set(
+            jax.lax.select(
+                jax.random.uniform(__rng) < ued_params.large_rect_dim_chance, ued_params.large_rect_dim_scale, 1.0
+            )
+        )
+    )
+    half_dimensions *= dim_scale
+    vertices = jnp.array(
+        [
+            half_dimensions * jnp.array([1, 1]),
+            half_dimensions * jnp.array([1, -1]),
+            half_dimensions * jnp.array([-1, -1]),
+            half_dimensions * jnp.array([-1, 1]),
+        ]
+    )
+    rng, _rng = jax.random.split(rng)
+    radius = (
+        jax.lax.select(is_rect, jnp.zeros((), dtype=jnp.float32), jax.random.uniform(_rng, shape=()))
+        * (cap_circ - min_circle_size)
+        + min_circle_size
+    )
+    return vertices, half_dimensions, radius
+def count_roles(state: EnvState, static_env_params: StaticEnvParams, role: int, include_static_polys=True) -> int:
+    active_to_use = state.polygon.active
+    if not include_static_polys:
+        active_to_use = active_to_use.at[: static_env_params.num_static_fixated_polys].set(False)
+    return ((state.polygon_shape_roles == role) * active_to_use).sum() + (
+        (state.circle_shape_roles == role) * state.circle.active
+    ).sum()
+def random_position_on_triangle(rng, vertices):
+    verts = vertices[:3]
+    rng, _rng, _rng2 = jax.random.split(rng, 3)
+    f1 = jax.random.uniform(_rng)
+    f2 = jax.random.uniform(_rng2)
+    # https://www.reddit.com/r/godot/comments/mqp29g/how_do_i_get_a_random_position_inside_a_collision/
+    return verts[0] + jnp.sqrt(f1) * (-verts[0] + verts[1] + f2 * (verts[2] - verts[1]))
+def random_position_on_rectangle(rng, vertices):
+    verts = vertices[:4]
+    rng, _rng, _rng2 = jax.random.split(rng, 3)
+    f1 = jax.random.uniform(_rng)
+    f2 = jax.random.uniform(_rng2)
+    min_x, max_x = jnp.min(verts[:, 0]), jnp.max(verts[:, 0])
+    min_y, max_y = jnp.min(verts[:, 1]), jnp.max(verts[:, 1])
+    random_x_pos = min_x + f1 * (max_x - min_x)
+    random_y_pos = min_y + f2 * (max_y - min_y)
+    return jnp.array([random_x_pos, random_y_pos])
+def random_position_on_polygon(rng, vertices, n_vertices, static_env_params: StaticEnvParams):
+    assert static_env_params.max_polygon_vertices <= 4, "Only supports up to 4 vertices"
+    return jax.lax.select(
+        n_vertices <= 3, random_position_on_triangle(rng, vertices), random_position_on_rectangle(rng, vertices)
+    )
+def random_position_on_circle(rng, radius, on_centre_chance):
+    rngs = jax.random.split(rng, 3)
+    on_centre = jax.random.uniform(rngs[0]) < on_centre_chance
+    local_joint_position_circle_theta = jax.random.uniform(rngs[1], shape=()) * 2 * math.pi
+    local_joint_position_circle_r = jax.random.uniform(rngs[2], shape=()) * radius
+    local_joint_position_circle = jnp.array(
+        [
+            local_joint_position_circle_r * jnp.cos(local_joint_position_circle_theta),
+            local_joint_position_circle_r * jnp.sin(local_joint_position_circle_theta),
+        ]
+    )
+    return jax.lax.select(on_centre, jnp.array([0.0, 0.0]), local_joint_position_circle)
+def get_role(rng, state: EnvState, static_env_params: StaticEnvParams, initial_p=None) -> int:
+    if initial_p is None:
+        initial_p = jnp.array([1.0, 1.0, 1.0, 1.0])
+    needs_ball = count_roles(state, static_env_params, 1) == 0
+    needs_goal = count_roles(state, static_env_params, 2) == 0
+    needs_lava = count_roles(state, static_env_params, 3) == 0
+    # always put goal/ball first.
+    prob_of_something_else = (needs_ball == 0) & (needs_goal == 0)
+    p = initial_p * jnp.array(
+        [prob_of_something_else, needs_ball, needs_goal, prob_of_something_else * needs_lava / 3]
+    )  # This ensures we cannot more than one ball or goal.
+    return jax.random.choice(rng, jnp.array([0, 1, 2, 3]), p=p)
+def is_space_for_shape(state: EnvState):
+    return jnp.logical_not(jnp.concatenate([state.polygon.active, state.circle.active])).sum() > 0
+def is_space_for_joint(state: EnvState):
+    return jnp.logical_not(state.joint.active).sum() > 0
+def are_there_shapes_present(state: EnvState, static_env_params: StaticEnvParams):
+    m = (
+        jnp.concatenate([state.polygon.active, state.circle.active])
+        .at[: static_env_params.num_static_fixated_polys]
+        .set(False)
+    )
+    return m.sum() > 0
+@partial(jax.jit, static_argnums=(2, 9))
+def add_rigidbody_to_state(
+    state: EnvState,
+    env_params: EnvParams,
+    static_env_params: StaticEnvParams,
+    position: jnp.ndarray,
+    vertices: jnp.ndarray,
+    n_vertices: int,
+    radius: float,
+    shape_role: int,
+    density: float = 1,
+    is_circle: bool = False,
+):
+    new_rigid_body = RigidBody(
+        position=position,
+        velocity=jnp.array([0.0, 0.0]),
+        inverse_mass=1.0,
+        inverse_inertia=1.0,
+        rotation=0.0,
+        angular_velocity=0.0,
+        radius=radius,
+        active=True,
+        friction=1.0,
+        vertices=vertices,
+        n_vertices=n_vertices,
+        collision_mode=1,
+        restitution=0.0,
+    )
+    if is_circle:
+        actives = state.circle.active
+    else:
+        actives = state.polygon.active
+    idx = jnp.argmin(actives)
+    def noop(state):
+        return state
+    def replace(state):
+        add_func = lambda all, new: all.at[idx].set(new)
+        if is_circle:
+            state = state.replace(
+                circle=jax.tree.map(add_func, state.circle, new_rigid_body),
+                circle_densities=state.circle_densities.at[idx].set(density),
+                circle_shape_roles=state.circle_shape_roles.at[idx].set(shape_role),
+            )
+        else:
+            state = state.replace(
+                polygon=jax.tree.map(add_func, state.polygon, new_rigid_body),
+                polygon_densities=state.polygon_densities.at[idx].set(density),
+                polygon_shape_roles=state.polygon_shape_roles.at[idx].set(shape_role),
+            )
+        state = state.replace(
+            collision_matrix=calculate_collision_matrix(static_env_params, state.joint),
+        )
+        state = recalculate_mass_and_inertia(state, static_env_params, state.polygon_densities, state.circle_densities)
+        return state
+    return jax.lax.cond(jnp.logical_not(actives).sum() > 0, replace, noop, state)
+def rectangle_vertices(half_dim):
+    return jnp.array(
+        [
+            half_dim * jnp.array([1, 1]),
+            half_dim * jnp.array([1, -1]),
+            half_dim * jnp.array([-1, -1]),
+            half_dim * jnp.array([-1, 1]),
+        ]
+    )
+# More Manual Control
+@partial(jax.jit, static_argnums=(2,))
+def add_rectangle_to_state(
+    state: EnvState,
+    env_params: EnvParams,
+    static_env_params: StaticEnvParams,
+    position: jnp.ndarray,
+    width: float,
+    height: float,
+    shape_role: int,
+    density: float = 1,
+):
+    return add_rigidbody_to_state(
+        state,
+        env_params,
+        static_env_params,
+        position,
+        rectangle_vertices(jnp.array([width, height]) / 2),
+        4,
+        0.0,
+        shape_role,
+        density,
+        is_circle=False,
+    )
+@partial(jax.jit, static_argnums=(2,))
+def add_circle_to_state(
+    state: EnvState,
+    env_params: EnvParams,
+    static_env_params: StaticEnvParams,
+    position: jnp.ndarray,
+    radius: float,
+    shape_role: int,
+    density: float = 1,
+):
+    return add_rigidbody_to_state(
+        state,
+        env_params,
+        static_env_params,
+        position,
+        jnp.array([0.0, 0.0]),
+        0,
+        radius,
+        shape_role,
+        density,
+        is_circle=True,
+    )
+@partial(jax.jit, static_argnums=(2,))
+def add_thruster_to_object(
+    state: EnvState,
+    env_params: EnvParams,
+    static_env_params: StaticEnvParams,
+    shape_index: int,
+    rotation: float,
+    colour: int,
+    thruster_power_multiplier: float,
+):
+    def dummy(state):
+        return state
+    def do_add(state: EnvState):
+        thruster_idx = jnp.argmin(state.thruster.active)
+        shape = select_shape(state, shape_index, static_env_params)
+        thruster = Thruster(
+            object_index=shape_index,
+            active=True,
+            relative_position=jnp.array([0.0, 0.0]),  # a bit of a hack but reasonable.
+            rotation=rotation,
+            power=1.0 / jax.lax.select(shape.inverse_mass == 0, 1.0, shape.inverse_mass) * thruster_power_multiplier,
+            global_position=select_shape(state, shape_index, static_env_params).position,
+        )
+        state = state.replace(
+            thruster=jax.tree_map(lambda y, x: y.at[thruster_idx].set(x), state.thruster, thruster),
+            thruster_bindings=state.thruster_bindings.at[thruster_idx].set(colour),
+        )
+        return state
+    return jax.lax.cond(
+        (select_shape(state, shape_index, static_env_params).active)
+        & (jnp.logical_not(state.thruster.active).sum() > 0),
+        do_add,
+        dummy,
+        state,
+    )
+def make_velocities_zero(state: EnvState):
+    def inner(state):
+        return state.replace(
+            polygon=state.polygon.replace(
+                angular_velocity=state.polygon.angular_velocity * 0,
+                velocity=state.polygon.velocity * 0,
+            ),
+            circle=state.circle.replace(
+                angular_velocity=state.circle.angular_velocity * 0,
+                velocity=state.circle.velocity * 0,
+            ),
+        )
+    return inner(state)
+def make_do_dummy_step(
+    params: EnvParams, static_sim_params: StaticEnvParams, zero_collisions=True, zero_velocities=True
+):
+    env = PhysicsEngine(static_sim_params)
+    @jax.jit
+    def _step_fn(state):
+        state, _ = env.step(state, params, jnp.zeros((static_sim_params.num_joints + static_sim_params.num_thrusters,)))
+        return state
+    def do_dummy_step(state: EnvState) -> EnvState:
+        rng = jax.random.PRNGKey(0)
+        og_col = state.collision_matrix
+        g = state.gravity
+        state = state.replace(
+            collision_matrix=state.collision_matrix & (not zero_collisions), gravity=state.gravity * 0
+        )
+        state = _step_fn(state)
+        state = state.replace(gravity=g, collision_matrix=og_col)
+        if zero_velocities:
+            state = make_velocities_zero(state)
+        return state
+    return do_dummy_step

kinetix/environment/utils.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import chex
+import jax
+from jax2d.engine import calculate_collision_matrix
+from kinetix.environment.env_state import EnvState, StaticEnvParams
+import jax.numpy as jnp
+from kinetix.pcg.pcg_state import PCGState
+def permute_state(rng: chex.PRNGKey, env_state: EnvState, static_env_params: StaticEnvParams):
+    idxs_circles = jnp.arange(static_env_params.num_circles)
+    idxs_polygons = jnp.arange(static_env_params.num_polygons)
+    idxs_joints = jnp.arange(static_env_params.num_joints)
+    idxs_thrusters = jnp.arange(static_env_params.num_thrusters)
+    rng, *_rngs = jax.random.split(rng, 5)
+    idxs_circles_permuted = jax.random.permutation(_rngs[0], idxs_circles, independent=True)
+    idxs_polygons_permuted = idxs_polygons.at[static_env_params.num_static_fixated_polys :].set(
+        jax.random.permutation(_rngs[1], idxs_polygons[static_env_params.num_static_fixated_polys :], independent=True)
+    )
+    idxs_joints_permuted = jax.random.permutation(_rngs[2], idxs_joints, independent=True)
+    idxs_thrusters_permuted = jax.random.permutation(_rngs[3], idxs_thrusters, independent=True)
+    combined = jnp.concatenate([idxs_polygons_permuted, idxs_circles_permuted + static_env_params.num_polygons])
+    # Change the ordering of the shapes, and also remember to change the indices associated with the joints
+    inverse_permutation = jnp.argsort(combined)
+    env_state = env_state.replace(
+        polygon_shape_roles=env_state.polygon_shape_roles[idxs_polygons_permuted],
+        circle_shape_roles=env_state.circle_shape_roles[idxs_circles_permuted],
+        polygon_highlighted=env_state.polygon_highlighted[idxs_polygons_permuted],
+        circle_highlighted=env_state.circle_highlighted[idxs_circles_permuted],
+        polygon_densities=env_state.polygon_densities[idxs_polygons_permuted],
+        circle_densities=env_state.circle_densities[idxs_circles_permuted],
+        polygon=jax.tree.map(lambda x: x[idxs_polygons_permuted], env_state.polygon),
+        circle=jax.tree.map(lambda x: x[idxs_circles_permuted], env_state.circle),
+        joint=env_state.joint.replace(
+            a_index=inverse_permutation[env_state.joint.a_index],
+            b_index=inverse_permutation[env_state.joint.b_index],
+        ),
+        thruster=env_state.thruster.replace(
+            object_index=inverse_permutation[env_state.thruster.object_index],
+        ),
+    )
+    # And now permute the thrusters and joints
+    env_state = env_state.replace(
+        thruster_bindings=env_state.thruster_bindings[idxs_thrusters_permuted],
+        motor_bindings=env_state.motor_bindings[idxs_joints_permuted],
+        motor_auto=env_state.motor_auto[idxs_joints_permuted],
+        joint=jax.tree.map(lambda x: x[idxs_joints_permuted], env_state.joint),
+        thruster=jax.tree.map(lambda x: x[idxs_thrusters_permuted], env_state.thruster),
+    )
+    # and collision matrix
+    env_state = env_state.replace(collision_matrix=calculate_collision_matrix(static_env_params, env_state.joint))
+    return env_state
+def permute_pcg_state(rng: chex.PRNGKey, pcg_state: PCGState, static_env_params: StaticEnvParams):
+    return pcg_state.replace(
+        env_state=permute_state(rng, pcg_state.env_state, static_env_params),
+        env_state_max=permute_state(rng, pcg_state.env_state_max, static_env_params),
+        env_state_pcg_mask=jax.tree.map(lambda x: jnp.zeros_like(x, dtype=bool), pcg_state.env_state_pcg_mask),
+    )

kinetix/environment/wrappers.py ADDED Viewed

	@@ -0,0 +1,309 @@

+import functools
+from chex._src.pytypes import PRNGKey
+import jax
+import jax.numpy as jnp
+import chex
+from jax.numpy import ndarray
+import numpy as np
+from flax import struct
+from functools import partial
+from typing import Callable, Dict, Optional, Tuple, Union, Any
+from gymnax.environments import spaces, environment
+from kinetix.environment.env_state import EnvParams, EnvState
+from jaxued.environments import UnderspecifiedEnv
+class UnderspecifiedEnvWrapper(UnderspecifiedEnv):
+    """Base class for Gymnax wrappers."""
+    def __init__(self, env):
+        self._env = env
+    # provide proxy access to regular attributes of wrapped object
+    def __getattr__(self, name):
+        return getattr(self._env, name)
+class GymnaxWrapper(object):
+    """Base class for Gymnax wrappers."""
+    def __init__(self, env):
+        self._env = env
+    # provide proxy access to regular attributes of wrapped object
+    def __getattr__(self, name):
+        return getattr(self._env, name)
+# From Here: https://github.com/DramaCow/jaxued/blob/main/src/jaxued/wrappers/autoreset.py
+class AutoResetWrapper(UnderspecifiedEnvWrapper):
+    """
+    This is a wrapper around an `UnderspecifiedEnv`, allowing for the environment to be automatically reset upon completion of an episode. This behaviour is similar to the default Gymnax interface. The user can specify a callable `sample_level` that takes in a PRNGKey and returns a level.
+    Warning:
+        To maintain compliance with UnderspecifiedEnv interface, user can reset to an
+        arbitrary level. This includes levels outside the support of sample_level(). Consequently,
+        the tagged rng is defaulted to jax.random.PRNGKey(0). If your code relies on this, careful
+        attention may be required.
+    """
+    def __init__(self, env: UnderspecifiedEnv, sample_level: Callable[[chex.PRNGKey], EnvState]):
+        self._env = env
+        self.sample_level = sample_level
+    @property
+    def default_params(self) -> EnvParams:
+        return self._env.default_params
+    def reset_env(self, rng, params):
+        rng, rng_sample, rng_reset = jax.random.split(rng, 3)
+        state_to_reset_to = self.sample_level(rng_sample)
+        return self._env.reset_env_to_pcg_level(rng_reset, state_to_reset_to, params)
+    def step_env(
+        self,
+        rng: chex.PRNGKey,
+        state: EnvState,
+        action: Union[int, float],
+        params: EnvParams,
+    ) -> Tuple[chex.ArrayTree, EnvState, float, bool, dict]:
+        rng_reset, rng_step = jax.random.split(rng, 2)
+        obs_st, env_state_st, reward, done, info = self._env.step_env(rng_step, state, action, params)
+        obs_re, env_state_re = self.reset_env(rng_reset, params)
+        env_state = jax.tree_map(lambda x, y: jax.lax.select(done, x, y), env_state_re, env_state_st)
+        obs = jax.tree_map(lambda x, y: jax.lax.select(done, x, y), obs_re, obs_st)
+        return obs, env_state, reward, done, info
+    def reset_env_to_level(self, rng: chex.PRNGKey, level: EnvState, params: EnvParams) -> Tuple[Any, EnvState]:
+        # raise NotImplementedError("This method should not be called directly. Use reset instead.")
+        obs, env_state = self._env.reset_to_level(rng, level, params)
+        return obs, env_state
+    def action_space(self, params: EnvParams) -> Any:
+        return self._env.action_space(params)
+class AutoReplayWrapper(UnderspecifiedEnv):
+    """
+    This wrapper replay the **same** level over and over again by resetting to the same level after each episode.
+    This is useful for training/rolling out multiple times on the same level.
+    """
+    def __init__(self, env: UnderspecifiedEnv):
+        self._env = env
+    @property
+    def default_params(self) -> EnvParams:
+        return self._env.default_params
+    def step_env(
+        self,
+        rng: chex.PRNGKey,
+        state: EnvState,
+        action: Union[int, float],
+        params: EnvParams,
+    ) -> Tuple[chex.ArrayTree, EnvState, float, bool, dict]:
+        rng_reset, rng_step = jax.random.split(rng)
+        obs_re, env_state_re = self._env.reset_to_level(rng_reset, state.level, params)
+        obs_st, env_state_st, reward, done, info = self._env.step_env(rng_step, state.env_state, action, params)
+        env_state = jax.tree_map(lambda x, y: jax.lax.select(done, x, y), env_state_re, env_state_st)
+        obs = jax.tree_map(lambda x, y: jax.lax.select(done, x, y), obs_re, obs_st)
+        return obs, state.replace(env_state=env_state), reward, done, info
+    def reset_env_to_level(self, rng: chex.PRNGKey, level: EnvState, params: EnvParams) -> Tuple[Any, EnvState]:
+        obs, env_state = self._env.reset_to_level(rng, level, params)
+        return obs, AutoReplayState(env_state=env_state, level=level)
+    def action_space(self, params: EnvParams) -> Any:
+        return self._env.action_space(params)
+@struct.dataclass
+class AutoReplayState:
+    env_state: EnvState
+    level: EnvState
+class AutoReplayWrapper(UnderspecifiedEnvWrapper):
+    """
+    This wrapper replay the **same** level over and over again by resetting to the same level after each episode.
+    This is useful for training/rolling out multiple times on the same level.
+    """
+    def __init__(self, env: UnderspecifiedEnv):
+        self._env = env
+    @property
+    def default_params(self) -> EnvParams:
+        return self._env.default_params
+    def step_env(
+        self,
+        rng: chex.PRNGKey,
+        state: EnvState,
+        action: Union[int, float],
+        params: EnvParams,
+    ) -> Tuple[chex.ArrayTree, EnvState, float, bool, dict]:
+        rng_reset, rng_step = jax.random.split(rng)
+        obs_re, env_state_re = self._env.reset_to_level(rng_reset, state.level, params)
+        obs_st, env_state_st, reward, done, info = self._env.step_env(rng_step, state.env_state, action, params)
+        env_state = jax.tree_map(lambda x, y: jax.lax.select(done, x, y), env_state_re, env_state_st)
+        obs = jax.tree_map(lambda x, y: jax.lax.select(done, x, y), obs_re, obs_st)
+        return obs, state.replace(env_state=env_state), reward, done, info
+    def reset_env_to_level(self, rng: chex.PRNGKey, level: EnvState, params: EnvParams) -> Tuple[Any, EnvState]:
+        obs, env_state = self._env.reset_to_level(rng, level, params)
+        return obs, AutoReplayState(env_state=env_state, level=level)
+    def action_space(self, params: EnvParams) -> Any:
+        return self._env.action_space(params)
+class UnderspecifiedToGymnaxWrapper(environment.Environment):
+    def __init__(self, env):
+        self._env = env
+    # provide proxy access to regular attributes of wrapped object
+    def __getattr__(self, name):
+        return getattr(self._env, name)
+    @property
+    def default_params(self) -> Any:
+        return self._env.default_params
+    def step_env(
+        self, key: jax.Array, state: Any, action: int | float | jax.Array | ndarray | np.bool_ | np.number, params: Any
+    ) -> Tuple[jax.Array | ndarray | np.bool_ | np.number | Any | Dict[Any, Any]]:
+        return self._env.step_env(key, state, action, params)
+    def reset_env(self, key: PRNGKey, params: Any) -> Tuple[PRNGKey | np.ndarray | np.bool_ | np.number | Any]:
+        return self._env.reset_env(key, params)
+    def action_space(self, params: Any):
+        return self._env.action_space(params)
+class BatchEnvWrapper(GymnaxWrapper):
+    """Batches reset and step functions"""
+    def __init__(self, env, num_envs: int):
+        super().__init__(env)
+        self.num_envs = num_envs
+        self.reset_fn = jax.vmap(self._env.reset, in_axes=(0, None))
+        self.reset_to_level_fn = jax.vmap(self._env.reset_to_level, in_axes=(0, 0, None))
+        self.step_fn = jax.vmap(self._env.step, in_axes=(0, 0, 0, None))
+    @partial(jax.jit, static_argnums=(0, 2))
+    def reset(self, rng, params=None):
+        rng, _rng = jax.random.split(rng)
+        rngs = jax.random.split(_rng, self.num_envs)
+        obs, env_state = self.reset_fn(rngs, params)
+        return obs, env_state
+    @partial(jax.jit, static_argnums=(0, 3))
+    def reset_to_level(self, rng, level, params=None):
+        rng, _rng = jax.random.split(rng)
+        rngs = jax.random.split(_rng, self.num_envs)
+        obs, env_state = self.reset_to_level_fn(rngs, level, params)
+        return obs, env_state
+    @partial(jax.jit, static_argnums=(0, 4))
+    def step(self, rng, state, action, params=None):
+        rng, _rng = jax.random.split(rng)
+        rngs = jax.random.split(_rng, self.num_envs)
+        obs, state, reward, done, info = self.step_fn(rngs, state, action, params)
+        return obs, state, reward, done, info
+@struct.dataclass
+class DenseRewardState:
+    env_state: EnvState
+    last_distance: float = -1.0
+class DenseRewardWrapper(GymnaxWrapper):
+    def __init__(self, env, dense_reward_scale: float = 1.0) -> None:
+        super().__init__(env)
+        self.dense_reward_scale = dense_reward_scale
+    def step(self, key, state, action: int, params=None):
+        obs, env_state, reward, done, info = self._env.step_env(key, state.env_state, action, params)
+        delta_dist = (
+            -(info["distance"] - state.last_distance) * params.dense_reward_scale
+        )  # if distance got less, then reward is positive
+        delta_dist = jnp.nan_to_num(delta_dist, nan=0.0, posinf=0.0, neginf=0.0)
+        reward = reward + jax.lax.select(
+            (state.last_distance == -1) | (self.dense_reward_scale == 0.0), 0.0, delta_dist * self.dense_reward_scale
+        )
+        return obs, DenseRewardState(env_state, info["distance"]), reward, done, info
+    def reset(self, rng, params=None):
+        obs, env_state = self._env.reset(rng, params)
+        return obs, DenseRewardState(env_state, -1.0)
+    def reset_to_level(self, rng, level, params=None):
+        obs, env_state = self._env.reset_to_level(rng, level, params)
+        return obs, DenseRewardState(env_state, -1.0)
+@struct.dataclass
+class LogEnvState:
+    env_state: Any
+    episode_returns: float
+    episode_lengths: int
+    returned_episode_returns: float
+    returned_episode_lengths: int
+    timestep: int
+class LogWrapper(GymnaxWrapper):
+    """Log the episode returns and lengths."""
+    def __init__(self, env):
+        super().__init__(env)
+    @partial(jax.jit, static_argnums=(0, 2))
+    def reset(self, key: chex.PRNGKey, params=None):
+        obs, env_state = self._env.reset(key, params)
+        state = LogEnvState(env_state, 0.0, 0, 0.0, 0, 0)
+        return obs, state
+    def reset_to_level(self, key: chex.PRNGKey, level: EnvState, params=None):
+        obs, env_state = self._env.reset_to_level(key, level, params)
+        state = LogEnvState(env_state, 0.0, 0, 0.0, 0, 0)
+        return obs, state
+    @partial(jax.jit, static_argnums=(0, 4))
+    def step(
+        self,
+        key: chex.PRNGKey,
+        state,
+        action: Union[int, float],
+        params=None,
+    ):
+        obs, env_state, reward, done, info = self._env.step(key, state.env_state, action, params)
+        new_episode_return = state.episode_returns + reward
+        new_episode_length = state.episode_lengths + 1
+        state = LogEnvState(
+            env_state=env_state,
+            episode_returns=new_episode_return * (1 - done),
+            episode_lengths=new_episode_length * (1 - done),
+            returned_episode_returns=state.returned_episode_returns * (1 - done) + new_episode_return * done,
+            returned_episode_lengths=state.returned_episode_lengths * (1 - done) + new_episode_length * done,
+            timestep=state.timestep + 1,
+        )
+        info["returned_episode_returns"] = state.returned_episode_returns
+        info["returned_episode_lengths"] = state.returned_episode_lengths
+        info["returned_episode_solved"] = info["GoalR"]
+        info["timestep"] = state.timestep
+        info["returned_episode"] = done
+        return obs, state, reward, done, info

kinetix/models/.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ actor_critic_old.py
2	+ gactor_gritic_old.py

kinetix/models/__init__.py ADDED Viewed

	@@ -0,0 +1,65 @@

+from kinetix.models.actor_critic import (
+    ActorCriticPixelsRNN,
+    ActorCriticSymbolicRNN,
+)
+from kinetix.models.transformer_model import ActorCriticTransformer
+def make_network_from_config(env, env_params, config, network_kws={}):
+    env_name = config["env_name"]
+    if "MultiDiscrete" in env_name:
+        action_mode = "multi_discrete"
+    elif "Discrete" in env_name:
+        action_mode = "discrete"
+    elif "Continuous" in env_name:
+        action_mode = "continuous"
+    elif "Hybrid" in env_name:
+        action_mode = "hybrid"
+    else:
+        raise ValueError(f"Unknown action mode for {env_name}")
+    action_dim = (
+        env.action_space(env_params).shape[0] if action_mode == "continuous" else env.action_space(env_params).n
+    )
+    if "hybrid_action_continuous_dim" not in network_kws:
+        network_kws["hybrid_action_continuous_dim"] = action_dim
+    if "multi_discrete_number_of_dims_per_distribution" not in network_kws:
+        num_joint_bindings = config["static_env_params"]["num_motor_bindings"]
+        num_thruster_bindings = config["static_env_params"]["num_thruster_bindings"]
+        network_kws["multi_discrete_number_of_dims_per_distribution"] = [3 for _ in range(num_joint_bindings)] + [
+            2 for _ in range(num_thruster_bindings)
+        ]
+    network_kws["recurrent"] = config.get("recurrent_model", True)
+    if "Pixels" in env_name:
+        cls_to_use = ActorCriticPixelsRNN
+    elif "Symbolic" in env_name or "Blind" in env_name:
+        cls_to_use = ActorCriticSymbolicRNN
+    if "Entity" in env_name:
+        network = ActorCriticTransformer(
+            action_dim=action_dim,
+            fc_layer_width=config["fc_layer_width"],
+            fc_layer_depth=config["fc_layer_depth"],
+            action_mode=action_mode,
+            num_heads=config["num_heads"],
+            transformer_depth=config["transformer_depth"],
+            transformer_size=config["transformer_size"],
+            transformer_encoder_size=config["transformer_encoder_size"],
+            aggregate_mode=config["aggregate_mode"],
+            full_attention_mask=config["full_attention_mask"],
+            activation=config["activation"],
+            **network_kws,
+        )
+    else:
+        network = cls_to_use(
+            action_dim,
+            fc_layer_width=config["fc_layer_width"],
+            fc_layer_depth=config["fc_layer_depth"],
+            activation=config["activation"],
+            action_mode=action_mode,
+            **network_kws,
+        )
+    return network

kinetix/models/action_spaces.py ADDED Viewed

	@@ -0,0 +1,58 @@

+from typing import Any, Sequence
+from chex import PRNGKey
+import distrax
+from flax import struct
+import jax
+import jax.numpy as jnp
+@struct.dataclass
+class HybridAction:
+    discrete: int
+    continuous: jnp.ndarray
+class HybridActionDistribution(distrax.Distribution):
+    def __init__(self, discrete_logits, continuous_mu, continuous_sigma) -> None:
+        self.discrete = distrax.Categorical(logits=discrete_logits)
+        self.continuous = distrax.MultivariateNormalDiag(continuous_mu, continuous_sigma)
+    def _sample_n(self, rng: PRNGKey, n: int) -> Any:
+        rng, _rng, _rng2 = jax.random.split(rng, 3)
+        a = self.discrete._sample_n(_rng, n)
+        b = self.continuous._sample_n(_rng2, n)
+        return HybridAction(a, b)
+    def log_prob(self, value: Any):
+        a = self.discrete.log_prob(value.discrete)
+        b = self.continuous.log_prob(value.continuous)
+        return a + b  # log probs, we add.
+    def entropy(self):
+        return self.discrete.entropy() + self.continuous.entropy()
+    def event_shape(self) -> Sequence[int]:
+        return ()
+class MultiDiscreteActionDistribution(distrax.Distribution):
+    def __init__(self, flat_logits, number_of_dims_per_distribution) -> None:
+        self.distributions = []
+        total_dims = 0
+        for dims in number_of_dims_per_distribution:
+            self.distributions.append(distrax.Categorical(logits=flat_logits[..., total_dims : total_dims + dims]))
+            total_dims += dims
+    def _sample_n(self, key: PRNGKey, n: int) -> Any:
+        rngs = jax.random.split(key, len(self.distributions))
+        samples = [jnp.expand_dims(d._sample_n(rng, n), axis=-1) for rng, d in zip(rngs, self.distributions)]
+        return jnp.concatenate(samples, axis=-1)
+    def log_prob(self, value: Any):
+        return sum(d.log_prob(value[..., i]) for i, d in enumerate(self.distributions))
+    def entropy(self):
+        return sum(d.entropy() for d in self.distributions)
+    def event_shape(self) -> Sequence[int]:
+        return ()

kinetix/models/actor_critic.py ADDED Viewed

	@@ -0,0 +1,206 @@

+import functools
+import jax
+import jax.numpy as jnp
+import flax.linen as nn
+import numpy as np
+from flax.linen.initializers import constant, orthogonal
+from typing import List, Sequence
+import distrax
+from kinetix.models.action_spaces import HybridActionDistribution, MultiDiscreteActionDistribution
+class ScannedRNN(nn.Module):
+    @functools.partial(
+        nn.scan,
+        variable_broadcast="params",
+        in_axes=0,
+        out_axes=0,
+        split_rngs={"params": False},
+    )
+    @nn.compact
+    def __call__(self, carry, x):
+        """Applies the module."""
+        rnn_state = carry
+        ins, resets = x
+        rnn_state = jnp.where(
+            resets[:, np.newaxis],
+            self.initialize_carry(ins.shape[0], 256),
+            rnn_state,
+        )
+        new_rnn_state, y = nn.GRUCell(features=256)(rnn_state, ins)
+        return new_rnn_state, y
+    @staticmethod
+    def initialize_carry(batch_size, hidden_size=256):
+        # Use a dummy key since the default state init fn is just zeros.
+        cell = nn.GRUCell(features=256)
+        return cell.initialize_carry(jax.random.PRNGKey(0), (batch_size, hidden_size))
+class GeneralActorCriticRNN(nn.Module):
+    action_dim: Sequence[int]
+    fc_layer_depth: int
+    fc_layer_width: int
+    action_mode: str  # "continuous" or "discrete" or "hybrid"
+    hybrid_action_continuous_dim: int
+    multi_discrete_number_of_dims_per_distribution: List[int]
+    add_generator_embedding: bool = False
+    generator_embedding_number_of_timesteps: int = 10
+    recurrent: bool = False
+    # Given an embedding, return the action/values, since this is shared across all models.
+    @nn.compact
+    def __call__(self, hidden, obs, embedding, dones, activation):
+        if self.add_generator_embedding:
+            raise NotImplementedError()
+        if self.recurrent:
+            rnn_in = (embedding, dones)
+            hidden, embedding = ScannedRNN()(hidden, rnn_in)
+        actor_mean = embedding
+        critic = embedding
+        actor_mean_last = embedding
+        for _ in range(self.fc_layer_depth):
+            actor_mean = nn.Dense(
+                self.fc_layer_width,
+                kernel_init=orthogonal(np.sqrt(2)),
+                bias_init=constant(0.0),
+            )(actor_mean)
+            actor_mean = activation(actor_mean)
+            critic = nn.Dense(
+                self.fc_layer_width,
+                kernel_init=orthogonal(np.sqrt(2)),
+                bias_init=constant(0.0),
+            )(critic)
+            critic = activation(critic)
+        actor_mean_last = actor_mean
+        actor_mean = nn.Dense(self.action_dim, kernel_init=orthogonal(0.01), bias_init=constant(0.0))(actor_mean)
+        if self.action_mode == "discrete":
+            pi = distrax.Categorical(logits=actor_mean)
+        elif self.action_mode == "continuous":
+            actor_logtstd = self.param("log_std", nn.initializers.zeros, (self.action_dim,))
+            pi = distrax.MultivariateNormalDiag(actor_mean, jnp.exp(actor_logtstd))
+        elif self.action_mode == "multi_discrete":
+            pi = MultiDiscreteActionDistribution(actor_mean, self.multi_discrete_number_of_dims_per_distribution)
+        else:
+            actor_mean_continuous = nn.Dense(
+                self.hybrid_action_continuous_dim, kernel_init=orthogonal(0.01), bias_init=constant(0.0)
+            )(actor_mean_last)
+            actor_mean_sigma = jnp.exp(
+                nn.Dense(self.hybrid_action_continuous_dim, kernel_init=orthogonal(0.01), bias_init=constant(0.0))(
+                    actor_mean_last
+                )
+            )
+            pi = HybridActionDistribution(actor_mean, actor_mean_continuous, actor_mean_sigma)
+        critic = nn.Dense(1, kernel_init=orthogonal(1.0), bias_init=constant(0.0))(critic)
+        return hidden, pi, jnp.squeeze(critic, axis=-1)
+class ActorCriticPixelsRNN(nn.Module):
+    action_dim: Sequence[int]
+    fc_layer_depth: int
+    fc_layer_width: int
+    action_mode: str
+    hybrid_action_continuous_dim: int
+    multi_discrete_number_of_dims_per_distribution: List[int]
+    activation: str
+    add_generator_embedding: bool = False
+    generator_embedding_number_of_timesteps: int = 10
+    recurrent: bool = True
+    @nn.compact
+    def __call__(self, hidden, x, **kwargs):
+        if self.activation == "relu":
+            activation = nn.relu
+        else:
+            activation = nn.tanh
+        og_obs, dones = x
+        if self.add_generator_embedding:
+            obs = og_obs.obs
+        else:
+            obs = og_obs
+        image = obs.image
+        global_info = obs.global_info
+        x = nn.Conv(features=16, kernel_size=(8, 8), strides=(4, 4))(image)
+        x = nn.relu(x)
+        x = nn.Conv(features=32, kernel_size=(4, 4), strides=(2, 2))(x)
+        x = nn.relu(x)
+        embedding = x.reshape(x.shape[0], x.shape[1], -1)
+        embedding = jnp.concatenate([embedding, global_info], axis=-1)
+        return GeneralActorCriticRNN(
+            action_dim=self.action_dim,
+            fc_layer_depth=self.fc_layer_depth,
+            fc_layer_width=self.fc_layer_width,
+            action_mode=self.action_mode,
+            hybrid_action_continuous_dim=self.hybrid_action_continuous_dim,
+            multi_discrete_number_of_dims_per_distribution=self.multi_discrete_number_of_dims_per_distribution,
+            add_generator_embedding=self.add_generator_embedding,
+            generator_embedding_number_of_timesteps=self.generator_embedding_number_of_timesteps,
+            recurrent=self.recurrent,
+        )(hidden, og_obs, embedding, dones, activation)
+    @staticmethod
+    def initialize_carry(batch_size, hidden_size=256):
+        return ScannedRNN.initialize_carry(batch_size, hidden_size)
+class ActorCriticSymbolicRNN(nn.Module):
+    action_dim: Sequence[int]
+    fc_layer_width: int
+    action_mode: str
+    hybrid_action_continuous_dim: int
+    multi_discrete_number_of_dims_per_distribution: List[int]
+    fc_layer_depth: int
+    activation: str
+    add_generator_embedding: bool = False
+    generator_embedding_number_of_timesteps: int = 10
+    recurrent: bool = True
+    @nn.compact
+    def __call__(self, hidden, x):
+        if self.activation == "relu":
+            activation = nn.relu
+        else:
+            activation = nn.tanh
+        og_obs, dones = x
+        if self.add_generator_embedding:
+            obs = og_obs.obs
+        else:
+            obs = og_obs
+        embedding = nn.Dense(
+            self.fc_layer_width,
+            kernel_init=orthogonal(np.sqrt(2)),
+            bias_init=constant(0.0),
+        )(obs)
+        embedding = nn.relu(embedding)
+        return GeneralActorCriticRNN(
+            action_dim=self.action_dim,
+            fc_layer_depth=self.fc_layer_depth,
+            fc_layer_width=self.fc_layer_width,
+            action_mode=self.action_mode,
+            hybrid_action_continuous_dim=self.hybrid_action_continuous_dim,
+            multi_discrete_number_of_dims_per_distribution=self.multi_discrete_number_of_dims_per_distribution,
+            add_generator_embedding=self.add_generator_embedding,
+            generator_embedding_number_of_timesteps=self.generator_embedding_number_of_timesteps,
+            recurrent=self.recurrent,
+        )(hidden, og_obs, embedding, dones, activation)
+    @staticmethod
+    def initialize_carry(batch_size, hidden_size=256):
+        return ScannedRNN.initialize_carry(batch_size, hidden_size)

kinetix/models/rel_multi_head.py ADDED Viewed

	@@ -0,0 +1,546 @@

+# Copyright 2023 The Flax Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# CODE IS HEAVILY INSPIRED FROM https://github.com/huggingface/transformers/blob/v4.40.1/src/transformers/models/deprecated/transfo_xl/modeling_transfo_xl.py
+# MOST OF THE TIME JUST A CONVERSION IN JAX
+"""Relative Attention HEAVILY INSPIRED FROM https://github.com/huggingface/transformers/blob/v4.40.1/src/transformers/models/deprecated/transfo_xl/modeling_transfo_xl.py
+, flax attention, https://github.com/kimiyoung/transformer-xl/blob/master/pytorch/mem_transformer.py#L143, most of the time just a flax/jax conversion """
+import functools
+from typing import Any, Callable, Optional, Tuple
+from flax.linen.dtypes import promote_dtype
+from flax.linen import initializers
+from flax.linen.linear import default_kernel_init
+from flax.linen.linear import DenseGeneral
+from flax.linen.linear import DotGeneralT
+from flax.linen.linear import PrecisionLike
+from flax.linen.module import compact
+from flax.linen.module import merge_param
+from flax.linen.module import Module
+import jax
+from jax import lax
+from jax import random
+import jax.numpy as jnp
+PRNGKey = Any
+Shape = Tuple[int, ...]
+Dtype = Any
+Array = Any
+roll_vmap = jax.vmap(jnp.roll, in_axes=(-2, 0, None), out_axes=-2)
+def _rel_shift(x):
+    zero_pad_shape = x.shape[:-2] + (x.shape[-2], 1)
+    zero_pad = jnp.zeros(zero_pad_shape, dtype=x.dtype)
+    x_padded = jnp.concatenate([zero_pad, x], axis=-1)
+    x_padded_shape = x.shape[:-2] + (x.shape[-1] + 1, x.shape[-2])
+    x_padded = x_padded.reshape(x_padded_shape)
+    # x_padded=jnp.swapaxes(x_padded,0,1)
+    x = jnp.take(x_padded, jnp.arange(1, x_padded.shape[-2]), axis=-2).reshape(x.shape)
+    return x
+def dot_product_attention_weights(
+    query: Array,
+    key: Array,
+    r_pos_embed,
+    r_r_bias,
+    r_w_bias,
+    bias: Optional[Array] = None,
+    mask: Optional[Array] = None,
+    broadcast_dropout: bool = True,
+    dropout_rng: Optional[PRNGKey] = None,
+    dropout_rate: float = 0.0,
+    deterministic: bool = False,
+    dtype: Optional[Dtype] = None,
+    precision: PrecisionLike = None,
+):
+    """Computes dot-product attention weights given query and key.
+    Used by :func:`dot_product_attention`, which is what you'll most likely use.
+    But if you want access to the attention weights for introspection, then
+    you can directly call this function and call einsum yourself.
+    Args:
+        query: queries for calculating attention with shape of
+            `[batch..., q_length, num_heads, qk_depth_per_head]`.
+        key: keys for calculating attention with shape of
+            `[batch..., kv_length, num_heads, qk_depth_per_head]`.
+        bias: bias for the attention weights. This should be broadcastable to the
+            shape `[batch..., num_heads, q_length, kv_length]`.
+            This can be used for incorporating causal masks, padding masks,
+            proximity bias, etc.
+        mask: mask for the attention weights. This should be broadcastable to the
+            shape `[batch..., num_heads, q_length, kv_length]`.
+            This can be used for incorporating causal masks.
+            Attention weights are masked out if their corresponding mask value
+            is `False`.
+        broadcast_dropout: bool: use a broadcasted dropout along batch dims.
+        dropout_rng: JAX PRNGKey: to be used for dropout
+        dropout_rate: dropout rate
+        deterministic: bool, deterministic or not (to apply dropout)
+        dtype: the dtype of the computation (default: infer from inputs and params)
+        precision: numerical precision of the computation see `jax.lax.Precision`
+            for details.
+    Returns:
+        Output of shape `[batch..., num_heads, q_length, kv_length]`.
+    """
+    query, key = promote_dtype(query, key, dtype=dtype)
+    dtype = query.dtype
+    assert query.ndim == key.ndim, "q, k must have same rank."
+    assert query.shape[:-3] == key.shape[:-3], "q, k batch dims must match."
+    assert query.shape[-2] == key.shape[-2], "q, k num_heads must match."
+    assert query.shape[-1] == key.shape[-1], "q, k depths must match."
+    # calculate attention matrix
+    depth = query.shape[-1]
+    # query = query
+    # attn weight shape is (batch..., num_heads, q_length, kv_length)
+    attn_weights = jnp.einsum("...qhd,...khd->...hqk", query + r_w_bias, key, precision=precision)
+    attn_weights_r = jnp.einsum("...qhd,khd->...hqk", query + r_r_bias, r_pos_embed, precision=precision)
+    attn_weights_r = roll_vmap(attn_weights_r, jnp.arange(0, query.shape[-3]) - (query.shape[-3] - 1), -1)
+    # attn_weights_r=_rel_shift(attn_weights_r)
+    attn_weights = attn_weights + attn_weights_r
+    attn_weights = attn_weights / jnp.sqrt(depth).astype(dtype)
+    # apply attention bias: masking, dropout, proximity bias, etc.
+    if bias is not None:
+        attn_weights = attn_weights + bias
+    # apply attention mask
+    if mask is not None:
+        big_neg = jnp.finfo(dtype).min
+        attn_weights = jnp.where(mask, attn_weights, big_neg)
+    # normalize the attention weights
+    attn_weights = jax.nn.softmax(attn_weights).astype(dtype)
+    # apply attention dropout
+    if not deterministic and dropout_rate > 0.0:
+        keep_prob = 1.0 - dropout_rate
+        if broadcast_dropout:
+            # dropout is broadcast across the batch + head dimensions
+            dropout_shape = tuple([1] * (key.ndim - 2)) + attn_weights.shape[-2:]
+            keep = random.bernoulli(dropout_rng, keep_prob, dropout_shape)  # type: ignore
+        else:
+            keep = random.bernoulli(dropout_rng, keep_prob, attn_weights.shape)  # type: ignore
+        multiplier = keep.astype(dtype) / jnp.asarray(keep_prob, dtype=dtype)
+        attn_weights = attn_weights * multiplier
+    return attn_weights
+def dot_product_attention(
+    query: Array,
+    key: Array,
+    value: Array,
+    r_pos_embed,
+    r_r_bias,
+    r_w_bias,
+    bias: Optional[Array] = None,
+    mask: Optional[Array] = None,
+    broadcast_dropout: bool = True,
+    dropout_rng: Optional[PRNGKey] = None,
+    dropout_rate: float = 0.0,
+    deterministic: bool = False,
+    dtype: Optional[Dtype] = None,
+    precision: PrecisionLike = None,
+):
+    """Computes dot-product attention given query, key, and value.
+    This is the core function for applying attention based on
+    https://arxiv.org/abs/1706.03762. It calculates the attention weights given
+    query and key and combines the values using the attention weights.
+    Note: query, key, value needn't have any batch dimensions.
+    Args:
+        query: queries for calculating attention with shape of
+            `[batch..., q_length, num_heads, qk_depth_per_head]`.
+        key: keys for calculating attention with shape of
+            `[batch..., kv_length, num_heads, qk_depth_per_head]`.
+        value: values to be used in attention with shape of
+            `[batch..., kv_length, num_heads, v_depth_per_head]`.
+        bias: bias for the attention weights. This should be broadcastable to the
+            shape `[batch..., num_heads, q_length, kv_length]`.
+            This can be used for incorporating causal masks, padding masks,
+            proximity bias, etc.
+        mask: mask for the attention weights. This should be broadcastable to the
+            shape `[batch..., num_heads, q_length, kv_length]`.
+            This can be used for incorporating causal masks.
+            Attention weights are masked out if their corresponding mask value
+            is `False`.
+        broadcast_dropout: bool: use a broadcasted dropout along batch dims.
+        dropout_rng: JAX PRNGKey: to be used for dropout
+        dropout_rate: dropout rate
+        deterministic: bool, deterministic or not (to apply dropout)
+        dtype: the dtype of the computation (default: infer from inputs)
+        precision: numerical precision of the computation see `jax.lax.Precision`
+            for details.
+    Returns:
+        Output of shape `[batch..., q_length, num_heads, v_depth_per_head]`.
+    """
+    query, key, value = promote_dtype(query, key, value, dtype=dtype)
+    dtype = query.dtype
+    assert key.ndim == query.ndim == value.ndim, "q, k, v must have same rank."
+    assert query.shape[:-3] == key.shape[:-3] == value.shape[:-3], "q, k, v batch dims must match."
+    assert query.shape[-2] == key.shape[-2] == value.shape[-2], "q, k, v num_heads must match."
+    assert key.shape[-3] == value.shape[-3], "k, v lengths must match."
+    # compute attention weights
+    attn_weights = dot_product_attention_weights(
+        query,
+        key,
+        r_pos_embed,
+        r_r_bias,
+        r_w_bias,
+        bias,
+        mask,
+        broadcast_dropout,
+        dropout_rng,
+        dropout_rate,
+        deterministic,
+        dtype,
+        precision,
+    )
+    # return weighted sum over values for each query position
+    return jnp.einsum("...hqk,...khd->...qhd", attn_weights, value, precision=precision)
+class RelMultiHeadDotProductAttention(Module):
+    """Multi-head dot-product attention.
+    Attributes:
+        num_heads: number of attention heads. Features (i.e. inputs_q.shape[-1])
+            should be divisible by the number of heads.
+        dtype: the dtype of the computation (default: infer from inputs and params)
+        param_dtype: the dtype passed to parameter initializers (default: float32)
+        qkv_features: dimension of the key, query, and value.
+        out_features: dimension of the last projection
+        broadcast_dropout: bool: use a broadcasted dropout along batch dims.
+        dropout_rate: dropout rate
+        deterministic: if false, the attention weight is masked randomly using
+            dropout, whereas if true, the attention weights are deterministic.
+        precision: numerical precision of the computation see `jax.lax.Precision`
+            for details.
+        kernel_init: initializer for the kernel of the Dense layers.
+        bias_init: initializer for the bias of the Dense layers.
+        use_bias: bool: whether pointwise QKVO dense transforms use bias.
+        attention_fn: dot_product_attention or compatible function. Accepts query,
+            key, value, and returns output of shape `[bs, dim1, dim2, ..., dimN,,
+            num_heads, value_channels]``
+        decode: whether to prepare and use an autoregressive cache.
+    """
+    num_heads: int
+    dtype: Optional[Dtype] = None
+    param_dtype: Dtype = jnp.float32
+    qkv_features: Optional[int] = None
+    out_features: Optional[int] = None
+    broadcast_dropout: bool = True
+    dropout_rate: float = 0.0
+    deterministic: Optional[bool] = None
+    precision: PrecisionLike = None
+    kernel_init: Callable[[PRNGKey, Shape, Dtype], Array] = default_kernel_init
+    bias_init: Callable[[PRNGKey, Shape, Dtype], Array] = initializers.zeros_init()
+    use_bias: bool = True
+    attention_fn: Callable[..., Array] = dot_product_attention
+    decode: bool = False
+    qkv_dot_general: DotGeneralT = lax.dot_general
+    out_dot_general: DotGeneralT = lax.dot_general
+    @compact
+    def __call__(
+        self,
+        inputs_q: Array,
+        inputs_kv: Array,
+        pos_embed: Array,
+        mask: Optional[Array] = None,
+        deterministic: Optional[bool] = None,
+    ):
+        """Applies multi-head dot product attention on the input data.
+        Projects the inputs into multi-headed query, key, and value vectors,
+        applies dot-product attention and project the results to an output vector.
+        Args:
+            inputs_q: input queries of shape
+                `[batch_sizes..., length, features]`.
+            inputs_kv: key/values of shape
+                `[batch_sizes..., length, features]`.
+            mask: attention mask of shape
+                `[batch_sizes..., num_heads, query_length, key/value_length]`.
+                Attention weights are masked out if their corresponding mask value
+                is `False`.
+            deterministic: if false, the attention weight is masked randomly
+                using dropout, whereas if true, the attention weights
+                are deterministic.
+        Returns:
+            output of shape `[batch_sizes..., length, features]`.
+        """
+        features = self.out_features or inputs_q.shape[-1]
+        qkv_features = self.qkv_features or inputs_q.shape[-1]
+        assert qkv_features % self.num_heads == 0, (
+            f"Memory dimension ({qkv_features}) must be divisible by number of" f" heads ({self.num_heads})."
+        )
+        head_dim = qkv_features // self.num_heads
+        dense = functools.partial(
+            DenseGeneral,
+            axis=-1,
+            dtype=self.dtype,
+            param_dtype=self.param_dtype,
+            features=(self.num_heads, head_dim),
+            kernel_init=self.kernel_init,
+            bias_init=self.bias_init,
+            use_bias=self.use_bias,
+            precision=self.precision,
+            dot_general=self.qkv_dot_general,
+        )
+        # project inputs_q to multi-headed q/k/v
+        # dimensions are then [batch..., length, n_heads, n_features_per_head]
+        query, key, value = (
+            dense(name="query")(inputs_q),
+            dense(name="key")(inputs_kv),
+            dense(name="value")(inputs_kv),
+        )
+        # different bc no bias
+        dense_relpos = functools.partial(
+            DenseGeneral,
+            axis=-1,
+            dtype=self.dtype,
+            param_dtype=self.param_dtype,
+            features=(self.num_heads, head_dim),
+            kernel_init=self.kernel_init,
+            use_bias=False,
+            precision=self.precision,
+            dot_general=self.qkv_dot_general,
+        )
+        r_pos_embed = dense_relpos(name="pos_embed_mat")(pos_embed)
+        r_r_bias = self.param("r_r_bias", self.bias_init, (self.num_heads, head_dim))  # Initialization function
+        r_w_bias = self.param("r_w_bias", self.bias_init, (self.num_heads, head_dim))  # Initialization function
+        # During fast autoregressive decoding, we feed one position at a time,
+        # and cache the keys and values step by step.
+        if self.decode:
+            # detect if we're initializing by absence of existing cache data.
+            is_initialized = self.has_variable("cache", "cached_key")
+            cached_key = self.variable("cache", "cached_key", jnp.zeros, key.shape, key.dtype)
+            cached_value = self.variable("cache", "cached_value", jnp.zeros, value.shape, value.dtype)
+            cache_index = self.variable("cache", "cache_index", lambda: jnp.array(0, dtype=jnp.int32))
+            if is_initialized:
+                (
+                    *batch_dims,
+                    max_length,
+                    num_heads,
+                    depth_per_head,
+                ) = cached_key.value.shape
+                # shape check of cached keys against query input
+                expected_shape = tuple(batch_dims) + (1, num_heads, depth_per_head)
+                if expected_shape != query.shape:
+                    raise ValueError(
+                        "Autoregressive cache shape error, "
+                        "expected query shape %s instead got %s." % (expected_shape, query.shape)
+                    )
+                # update key, value caches with our new 1d spatial slices
+                cur_index = cache_index.value
+                indices = (0,) * len(batch_dims) + (cur_index, 0, 0)
+                key = lax.dynamic_update_slice(cached_key.value, key, indices)
+                value = lax.dynamic_update_slice(cached_value.value, value, indices)
+                cached_key.value = key
+                cached_value.value = value
+                cache_index.value = cache_index.value + 1
+                # causal mask for cached decoder self-attention:
+                # our single query position should only attend to those key
+                # positions that have already been generated and cached,
+                # not the remaining zero elements.
+                mask = combine_masks(
+                    mask,
+                    jnp.broadcast_to(
+                        jnp.arange(max_length) <= cur_index,
+                        tuple(batch_dims) + (1, 1, max_length),
+                    ),
+                )
+        dropout_rng = None
+        if self.dropout_rate > 0.0:  # Require `deterministic` only if using dropout.
+            m_deterministic = merge_param("deterministic", self.deterministic, deterministic)
+            if not m_deterministic:
+                dropout_rng = self.make_rng("dropout")
+        else:
+            m_deterministic = True
+        # apply attention
+        x = self.attention_fn(
+            query,
+            key,
+            value,
+            r_pos_embed,
+            r_r_bias,
+            r_w_bias,
+            mask=mask,
+            dropout_rng=dropout_rng,
+            dropout_rate=self.dropout_rate,
+            broadcast_dropout=self.broadcast_dropout,
+            deterministic=m_deterministic,
+            dtype=self.dtype,
+            precision=self.precision,
+        )  # pytype: disable=wrong-keyword-args
+        # back to the original inputs dimensions
+        out = DenseGeneral(
+            features=features,
+            axis=(-2, -1),
+            kernel_init=self.kernel_init,
+            bias_init=self.bias_init,
+            use_bias=self.use_bias,
+            dtype=self.dtype,
+            param_dtype=self.param_dtype,
+            precision=self.precision,
+            dot_general=self.out_dot_general,
+            name="out",  # type: ignore[call-arg]
+        )(x)
+        return out
+class SelfAttention(RelMultiHeadDotProductAttention):
+    """Self-attention special case of multi-head dot-product attention."""
+    @compact
+    def __call__(  # type: ignore
+        self,
+        inputs_q: Array,
+        mask: Optional[Array] = None,
+        deterministic: Optional[bool] = None,
+    ):
+        """Applies multi-head dot product self-attention on the input data.
+        Projects the inputs into multi-headed query, key, and value vectors,
+        applies dot-product attention and project the results to an output vector.
+        Args:
+            inputs_q: input queries of shape
+                `[batch_sizes..., length, features]`.
+            mask: attention mask of shape
+                `[batch_sizes..., num_heads, query_length, key/value_length]`.
+                Attention weights are masked out if their corresponding mask value
+                is `False`.
+            deterministic: if false, the attention weight is masked randomly
+                using dropout, whereas if true, the attention weights
+                are deterministic.
+        Returns:
+            output of shape `[batch_sizes..., length, features]`.
+        """
+        return super().__call__(inputs_q, inputs_q, mask, deterministic=deterministic)
+# mask-making utility functions
+def make_attention_mask(
+    query_input: Array,
+    key_input: Array,
+    pairwise_fn: Callable[..., Any] = jnp.multiply,
+    extra_batch_dims: int = 0,
+    dtype: Dtype = jnp.float32,
+):
+    """Mask-making helper for attention weights.
+    In case of 1d inputs (i.e., `[batch..., len_q]`, `[batch..., len_kv]`, the
+    attention weights will be `[batch..., heads, len_q, len_kv]` and this
+    function will produce `[batch..., 1, len_q, len_kv]`.
+    Args:
+        query_input: a batched, flat input of query_length size
+        key_input: a batched, flat input of key_length size
+        pairwise_fn: broadcasting elementwise comparison function
+        extra_batch_dims: number of extra batch dims to add singleton
+            axes for, none by default
+        dtype: mask return dtype
+    Returns:
+        A `[batch..., 1, len_q, len_kv]` shaped mask for 1d attention.
+    """
+    mask = pairwise_fn(jnp.expand_dims(query_input, axis=-1), jnp.expand_dims(key_input, axis=-2))
+    mask = jnp.expand_dims(mask, axis=-3)
+    mask = jnp.expand_dims(mask, axis=tuple(range(extra_batch_dims)))
+    return mask.astype(dtype)
+def make_causal_mask(x: Array, extra_batch_dims: int = 0, dtype: Dtype = jnp.float32) -> Array:
+    """Make a causal mask for self-attention.
+    In case of 1d inputs (i.e., `[batch..., len]`, the self-attention weights
+    will be `[batch..., heads, len, len]` and this function will produce a
+    causal mask of shape `[batch..., 1, len, len]`.
+    Args:
+        x: input array of shape `[batch..., len]`
+        extra_batch_dims: number of batch dims to add singleton axes for,
+            none by default
+        dtype: mask return dtype
+    Returns:
+        A `[batch..., 1, len, len]` shaped causal mask for 1d attention.
+    """
+    idxs = jnp.broadcast_to(jnp.arange(x.shape[-1], dtype=jnp.int32), x.shape)
+    return make_attention_mask(
+        idxs,
+        idxs,
+        jnp.greater_equal,
+        extra_batch_dims=extra_batch_dims,
+        dtype=dtype,
+    )
+def combine_masks(*masks: Optional[Array], dtype: Dtype = jnp.float32) -> Array:
+    """Combine attention masks.
+    Args:
+        *masks: set of attention mask arguments to combine, some can be None.
+        dtype: dtype for the returned mask.
+    Returns:
+        Combined mask, reduced by logical and, returns None if no masks given.
+    """
+    masks_list = [m for m in masks if m is not None]
+    if not masks_list:
+        return None
+    assert all(
+        map(lambda x: x.ndim == masks_list[0].ndim, masks_list)
+    ), f"masks must have same rank: {tuple(map(lambda x: x.ndim, masks_list))}"
+    mask, *other_masks = masks_list
+    for other_mask in other_masks:
+        mask = jnp.logical_and(mask, other_mask)
+    return mask.astype(dtype)

kinetix/models/transformer_model.py ADDED Viewed

	@@ -0,0 +1,302 @@

+import functools
+import jax.numpy as jnp
+import flax.linen as nn
+import numpy as np
+from flax.linen.initializers import constant, orthogonal
+from typing import List, Sequence
+import distrax
+import jax
+from kinetix.models.actor_critic import GeneralActorCriticRNN, ScannedRNN
+from kinetix.render.renderer_symbolic_entity import EntityObservation
+from flax.linen.attention import MultiHeadDotProductAttention
+class Gating(nn.Module):
+    # code taken from https://github.com/dhruvramani/Transformers-RL/blob/master/layers.py
+    d_input: int
+    bg: float = 0.0
+    @nn.compact
+    def __call__(self, x, y):
+        r = jax.nn.sigmoid(nn.Dense(self.d_input, use_bias=False)(y) + nn.Dense(self.d_input, use_bias=False)(x))
+        z = jax.nn.sigmoid(
+            nn.Dense(self.d_input, use_bias=False)(y)
+            + nn.Dense(self.d_input, use_bias=False)(x)
+            - self.param("gating_bias", constant(self.bg), (self.d_input,))
+        )
+        h = jnp.tanh(nn.Dense(self.d_input, use_bias=False)(y) + nn.Dense(self.d_input, use_bias=False)(r * x))
+        g = (1 - z) * x + (z * h)
+        return g
+class transformer_layer(nn.Module):
+    num_heads: int
+    out_features: int
+    qkv_features: int
+    gating: bool = False
+    gating_bias: float = 0.0
+    def setup(self):
+        self.attention1 = MultiHeadDotProductAttention(
+            num_heads=self.num_heads, qkv_features=self.qkv_features, out_features=self.out_features
+        )
+        self.ln1 = nn.LayerNorm()
+        self.dense1 = nn.Dense(self.out_features)
+        self.dense2 = nn.Dense(self.out_features)
+        self.ln2 = nn.LayerNorm()
+        if self.gating:
+            self.gate1 = Gating(self.out_features, self.gating_bias)
+            self.gate2 = Gating(self.out_features, self.gating_bias)
+    def __call__(self, queries: jnp.ndarray, mask: jnp.ndarray):
+        # After reading the paper, this is what I think we should do:
+        # First layernorm, then do attention
+        queries_n = self.ln1(queries)
+        y = self.attention1(queries_n, mask=mask)
+        if self.gating:  # and gate
+            y = self.gate1(queries, jax.nn.relu(y))
+        else:
+            y = queries + y
+        # Dense after norming, crucially no relu.
+        e = self.dense1(self.ln2(y))
+        if self.gating:  # and gate again
+            # This may be the wrong way around
+            e = self.gate2(y, jax.nn.relu(e))
+        else:
+            e = y + e
+        return e
+class Transformer(nn.Module):
+    encoder_size: int
+    num_heads: int
+    qkv_features: int
+    num_layers: int
+    gating: bool = False
+    gating_bias: float = 0.0
+    def setup(self):
+        # self.encoder = nn.Dense(self.encoder_size)
+        # self.positional_encoding = PositionalEncoding(self.encoder_size, max_len=self.max_len)
+        self.tf_layers = [
+            transformer_layer(
+                num_heads=self.num_heads,
+                qkv_features=self.qkv_features,
+                out_features=self.encoder_size,
+                gating=self.gating,
+                gating_bias=self.gating_bias,
+            )
+            for _ in range(self.num_layers)
+        ]
+        self.joint_layers = [nn.Dense(self.encoder_size) for _ in range(self.num_layers)]
+        self.thruster_layers = [nn.Dense(self.encoder_size) for _ in range(self.num_layers)]
+        # self.pos_emb=PositionalEmbedding(self.encoder_size)
+    def __call__(
+        self,
+        shape_embeddings: jnp.ndarray,
+        shape_attention_mask,
+        joint_embeddings,
+        joint_mask,
+        joint_indexes,
+        thruster_embeddings,
+        thruster_mask,
+        thruster_indexes,
+    ):
+        # forward eval so obs is only one timestep
+        # encoded = self.encoder(shape_embeddings)
+        # pos_embed=self.pos_emb(jnp.arange(1+memories.shape[-3],-1,-1))[:1+memories.shape[-3]]
+        for tf_layer, joint_layer, thruster_layer in zip(self.tf_layers, self.joint_layers, self.thruster_layers):
+            # Do attention
+            shape_embeddings = tf_layer(shape_embeddings, shape_attention_mask)
+            # Joints
+            # T, B, 2J, (2SE + JE)
+            @jax.vmap
+            @jax.vmap
+            def do_index2(to_ind, ind):
+                return to_ind[ind]
+            joint_shape_embeddings = jnp.concatenate(
+                [
+                    do_index2(shape_embeddings, joint_indexes[..., 0]),
+                    do_index2(shape_embeddings, joint_indexes[..., 1]),
+                    joint_embeddings,
+                ],
+                axis=-1,
+            )
+            shape_joint_entity_delta = joint_layer(joint_shape_embeddings) * joint_mask[..., None]
+            @jax.vmap
+            @jax.vmap
+            def add2(addee, index, adder):
+                return addee.at[index].add(adder)
+            # Thrusters
+            thruster_shape_embeddings = jnp.concatenate(
+                [
+                    do_index2(shape_embeddings, thruster_indexes),
+                    thruster_embeddings,
+                ],
+                axis=-1,
+            )
+            shape_thruster_entity_delta = thruster_layer(thruster_shape_embeddings) * thruster_mask[..., None]
+            shape_embeddings = add2(shape_embeddings, joint_indexes[..., 0], shape_joint_entity_delta)
+            shape_embeddings = add2(shape_embeddings, thruster_indexes, shape_thruster_entity_delta)
+        return shape_embeddings
+class ActorCriticTransformer(nn.Module):
+    action_dim: Sequence[int]
+    fc_layer_width: int
+    action_mode: str
+    hybrid_action_continuous_dim: int
+    multi_discrete_number_of_dims_per_distribution: List[int]
+    transformer_size: int
+    transformer_encoder_size: int
+    transformer_depth: int
+    fc_layer_depth: int
+    num_heads: int
+    activation: str
+    aggregate_mode: str  # "dummy" or "mean" or "dummy_and_mean"
+    full_attention_mask: bool  # if true, only mask out inactives, and have everything attend to everything else
+    add_generator_embedding: bool = False
+    generator_embedding_number_of_timesteps: int = 10
+    recurrent: bool = True
+    @nn.compact
+    def __call__(self, hidden, x):
+        if self.activation == "relu":
+            activation = nn.relu
+        else:
+            activation = nn.tanh
+        og_obs, dones = x
+        if self.add_generator_embedding:
+            obs = og_obs.obs
+        else:
+            obs = og_obs
+        # obs._ is [T, B, N, L]
+        # B - batch size
+        # T - time
+        # N - number of things
+        # L - unembedded entity size
+        obs: EntityObservation
+        def _single_encoder(features, entity_id, concat=True):
+            # assume two entity types
+            num_to_remove = 1 if concat else 0
+            embedding = activation(
+                nn.Dense(
+                    self.transformer_encoder_size - num_to_remove,
+                    kernel_init=orthogonal(np.sqrt(2)),
+                    bias_init=constant(0.0),
+                )(features)
+            )
+            if concat:
+                id_1h = jnp.zeros((*embedding.shape[:3], 1)).at[:, :, :, entity_id].set(entity_id)
+                return jnp.concatenate([embedding, id_1h], axis=-1)
+            else:
+                return embedding
+        circle_encodings = _single_encoder(obs.circles, 0)
+        polygon_encodings = _single_encoder(obs.polygons, 1)
+        joint_encodings = _single_encoder(obs.joints, -1, False)
+        thruster_encodings = _single_encoder(obs.thrusters, -1, False)
+        # Size of this is something like (T, B, N, K) (time, batch, num_entities, embedding_size)
+        # T, B, M, K
+        shape_encodings = jnp.concatenate([polygon_encodings, circle_encodings], axis=2)
+        # T, B, M
+        shape_mask = jnp.concatenate([obs.polygon_mask, obs.circle_mask], axis=2)
+        def mask_out_inactives(flat_active_mask, matrix_attention_mask):
+            matrix_attention_mask = matrix_attention_mask & (flat_active_mask[:, None]) & (flat_active_mask[None, :])
+            return matrix_attention_mask
+        joint_indexes = obs.joint_indexes
+        thruster_indexes = obs.thruster_indexes
+        if self.aggregate_mode == "dummy" or self.aggregate_mode == "dummy_and_mean":
+            T, B, _, K = circle_encodings.shape
+            dummy = jnp.ones((T, B, 1, K))
+            shape_encodings = jnp.concatenate([dummy, shape_encodings], axis=2)
+            shape_mask = jnp.concatenate(
+                [jnp.ones((T, B, 1), dtype=bool), shape_mask],
+                axis=2,
+            )
+            N = obs.attention_mask.shape[-1]
+            overall_mask = (
+                jnp.ones((T, B, obs.attention_mask.shape[2], N + 1, N + 1), dtype=bool)
+                .at[:, :, :, 1:, 1:]
+                .set(obs.attention_mask)
+            )
+            overall_mask = jax.vmap(jax.vmap(mask_out_inactives))(shape_mask, overall_mask)
+            # To account for the dummy entity
+            joint_indexes = joint_indexes + 1
+            thruster_indexes = thruster_indexes + 1
+        else:
+            overall_mask = obs.attention_mask
+        if self.full_attention_mask:
+            overall_mask = jnp.ones(overall_mask.shape, dtype=bool)
+            overall_mask = jax.vmap(jax.vmap(mask_out_inactives))(shape_mask, overall_mask)
+        # Now do attention on these
+        embedding = Transformer(
+            num_layers=self.transformer_depth,
+            num_heads=self.num_heads,
+            qkv_features=self.transformer_size,
+            encoder_size=self.transformer_encoder_size,
+            gating=True,
+            gating_bias=0.0,
+        )(
+            shape_encodings,
+            jnp.repeat(overall_mask, repeats=self.num_heads // overall_mask.shape[2], axis=2),
+            joint_encodings,
+            obs.joint_mask,
+            joint_indexes,
+            thruster_encodings,
+            obs.thruster_mask,
+            thruster_indexes,
+        )  # add the extra dimension for the heads
+        if self.aggregate_mode == "mean" or self.aggregate_mode == "dummy_and_mean":
+            embedding = jnp.mean(embedding, axis=2, where=shape_mask[..., None])
+        else:
+            embedding = embedding[:, :, 0]  # Take the dummy entity as the embedding of the entire scene.
+        return GeneralActorCriticRNN(
+            action_dim=self.action_dim,
+            fc_layer_depth=self.fc_layer_depth,
+            fc_layer_width=self.fc_layer_width,
+            action_mode=self.action_mode,
+            hybrid_action_continuous_dim=self.hybrid_action_continuous_dim,
+            multi_discrete_number_of_dims_per_distribution=self.multi_discrete_number_of_dims_per_distribution,
+            add_generator_embedding=self.add_generator_embedding,
+            generator_embedding_number_of_timesteps=self.generator_embedding_number_of_timesteps,
+            recurrent=self.recurrent,
+        )(hidden, og_obs, embedding, dones, activation)

kinetix/pcg/__init__.py ADDED Viewed

File without changes

kinetix/pcg/pcg.py ADDED Viewed

	@@ -0,0 +1,97 @@

+from functools import partial
+from jax2d.engine import recalculate_mass_and_inertia, recompute_global_joint_positions, select_shape
+from kinetix.environment.env_state import EnvState, StaticEnvParams
+from kinetix.pcg.pcg_state import PCGState
+import jax
+import jax.numpy as jnp
+def _process_tied_together_shapes(pcg_state: PCGState, sampled_state: EnvState, static_params: StaticEnvParams):
+    # Get the matrix of tied together positions. Since we vmap, we only want one entry active for any (i, j, k). Thus, we mask out some of the duplicate ones.
+    tied = jnp.triu(pcg_state.tied_together & jnp.logical_not(jnp.eye(pcg_state.tied_together.shape[0], dtype=bool)))
+    has_anything_in_column = tied.any(axis=0)
+    tied = (
+        tied * jnp.logical_not(has_anything_in_column)[:, None]
+    )  # if there is something in a column, it means a previous one with a lower index has already been processed
+    should_use_delta_positions = tied.any(axis=0)
+    # This is the delta we have moved after sampling
+    delta_positions = jnp.concatenate(
+        [
+            sampled_state.polygon.position - pcg_state.env_state.polygon.position,
+            sampled_state.circle.position - pcg_state.env_state.circle.position,
+        ]
+    )
+    def _get_effect_of_shape_i_on_all_others(item_index, item_row_of_what_is_tied):
+        delta_pos = delta_positions[item_index]
+        return jnp.arange(len(item_row_of_what_is_tied)), delta_pos[None] * item_row_of_what_is_tied[:, None]
+    indices, positions = jax.vmap(_get_effect_of_shape_i_on_all_others, (0, 0))(jnp.arange(tied.shape[0]), tied)
+    indices = indices.flatten()
+    positions = positions.reshape(indices.shape[0], -1)
+    default_positions = jnp.concatenate(
+        [pcg_state.env_state.polygon.position, pcg_state.env_state.circle.position], axis=0
+    )
+    sampled_positions = jnp.concatenate([sampled_state.polygon.position, sampled_state.circle.position], axis=0)
+    updated_positions = default_positions.at[indices].add(positions)
+    # Use the deltas or the sampled positions
+    positions = jnp.where(should_use_delta_positions[:, None], updated_positions, sampled_positions)
+    sampled_state = sampled_state.replace(
+        polygon=sampled_state.polygon.replace(position=positions[: static_params.num_polygons]),
+        circle=sampled_state.circle.replace(position=positions[static_params.num_polygons :]),
+    )
+    return sampled_state
+@partial(jax.jit, static_argnums=(3,))
+def sample_pcg_state(rng, pcg_state: PCGState, params, static_params):
+    def _pcg_fn(rng, main_val, max_val, mask):
+        pcg_val = jax.random.uniform(rng, shape=main_val.shape) * (
+            max_val.astype(float) - main_val.astype(float)
+        ) + main_val.astype(float)
+        if jnp.issubdtype(main_val.dtype, jnp.integer) or jnp.issubdtype(main_val.dtype, jnp.bool_):
+            pcg_val = jnp.round(pcg_val)
+        pcg_val = pcg_val.astype(main_val.dtype)
+        new_val = jax.lax.select(mask.astype(bool), pcg_val, main_val)
+        return new_val
+    def _random_split_like_tree(rng, target):
+        tree_def = jax.tree_structure(target)
+        rngs = jax.random.split(rng, tree_def.num_leaves)
+        return jax.tree_unflatten(tree_def, rngs)
+    rng, _rng = jax.random.split(rng)
+    rng_tree = _random_split_like_tree(_rng, pcg_state.env_state)
+    sampled_state = jax.tree_util.tree_map(
+        _pcg_fn, rng_tree, pcg_state.env_state, pcg_state.env_state_max, pcg_state.env_state_pcg_mask
+    )
+    sampled_state = _process_tied_together_shapes(pcg_state, sampled_state, static_params)
+    sampled_state = recompute_global_joint_positions(sampled_state, static_params)
+    env_state = recalculate_mass_and_inertia(
+        sampled_state, static_params, sampled_state.polygon_densities, sampled_state.circle_densities
+    )
+    return env_state
+def env_state_to_pcg_state(env_state: EnvState):
+    N = env_state.polygon.active.shape[0] + env_state.circle.active.shape[0]
+    pcg_state = PCGState(
+        env_state=env_state,
+        env_state_max=env_state,
+        env_state_pcg_mask=jax.tree_util.tree_map(lambda x: jnp.zeros_like(x, dtype=bool), env_state),
+        tied_together=jnp.zeros((N, N), dtype=bool),
+    )
+    return pcg_state

kinetix/pcg/pcg_state.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from dataclasses import field
+import jax.numpy as jnp
+from flax import struct
+from jax2d.sim_state import SimState, SimParams, StaticSimParams, RigidBody, Joint, Thruster, CollisionManifold
+from kinetix.environment.env_state import EnvState
+@struct.dataclass
+class PCGState:
+    # Primary env state
+    env_state: EnvState
+    # The PCG mask.  If a value is truthy in this, then it is PCG not static
+    env_state_pcg_mask: EnvState
+    # In the case that a value is PCG, the env_state value is the min and this state represents the max
+    env_state_max: EnvState
+    tied_together: jnp.ndarray  # NxN matrix of booleans, where N is the number of shapes
+    def __setstate__(self, state):
+        if "tied_together" not in state:
+            num_shapes = state["env_state"].polygon.active.shape[0] + state["env_state"].circle.active.shape[0]
+            state["tied_together"] = jnp.zeros((num_shapes, num_shapes), dtype=bool)
+        object.__setattr__(self, "__dict__", state)

kinetix/render/__init__.py ADDED Viewed

File without changes

kinetix/render/renderer_pixels.py ADDED Viewed

	@@ -0,0 +1,290 @@

+from functools import partial
+import jax
+import jax.numpy as jnp
+import numpy as np
+from jax2d import joint
+from jax2d.engine import select_shape
+from jax2d.maths import rmat
+from jax2d.sim_state import RigidBody
+from jaxgl.maths import dist_from_line
+from jaxgl.renderer import clear_screen, make_renderer
+from jaxgl.shaders import (
+    fragment_shader_quad,
+    fragment_shader_edged_quad,
+    make_fragment_shader_texture,
+    nearest_neighbour,
+    make_fragment_shader_quad_textured,
+)
+from kinetix.render.textures import (
+    THRUSTER_TEXTURE_16_RGBA,
+    RJOINT_TEXTURE_6_RGBA,
+    FJOINT_TEXTURE_6_RGBA,
+)
+from kinetix.environment.env_state import StaticEnvParams, EnvParams, EnvState
+from flax import struct
+def make_render_pixels(
+    params,
+    static_params: StaticEnvParams,
+):
+    screen_dim = static_params.screen_dim
+    downscale = static_params.downscale
+    joint_tex_size = 6
+    thruster_tex_size = 16
+    FIXATED_COLOUR = jnp.array([80, 80, 80])
+    JOINT_COLOURS = jnp.array(
+        [
+            # [0, 0, 255],
+            [255, 255, 255],  # yellow
+            [255, 255, 0],  # yellow
+            [255, 0, 255],  # purple/magenta
+            [0, 255, 255],  # cyan
+            [255, 153, 51],  # white
+        ]
+    )
+    def colour_thruster_texture(colour):
+        return THRUSTER_TEXTURE_16_RGBA.at[:9, :, :3].mul(colour[None, None, :] / 255.0)
+    coloured_thruster_textures = jax.vmap(colour_thruster_texture)(JOINT_COLOURS)
+    ROLE_COLOURS = jnp.array(
+        [
+            [160.0, 160.0, 160.0],  # None
+            [0.0, 204.0, 0.0],  # Green:    The ball
+            [0.0, 102.0, 204.0],  # Blue:   The goal
+            [255.0, 102.0, 102.0],  # Red:      Death Objects
+        ]
+    )
+    BACKGROUND_COLOUR = jnp.array([255.0, 255.0, 255.0])
+    def _get_colour(shape_role, inverse_inertia):
+        base_colour = ROLE_COLOURS[shape_role]
+        f = (inverse_inertia == 0) * 1
+        is_not_normal = (shape_role != 0) * 1
+        return jnp.array(
+            [
+                base_colour,
+                base_colour,
+                FIXATED_COLOUR,
+                base_colour * 0.5,
+            ]
+        )[2 * f + is_not_normal]
+    # Pixels per unit distance
+    ppud = params.pixels_per_unit // downscale
+    downscaled_screen_dim = (screen_dim[0] // downscale, screen_dim[1] // downscale)
+    full_screen_size = (
+        downscaled_screen_dim[0] + (static_params.max_shape_size * 2 * ppud),
+        downscaled_screen_dim[1] + (static_params.max_shape_size * 2 * ppud),
+    )
+    cleared_screen = clear_screen(full_screen_size, BACKGROUND_COLOUR)
+    def _world_space_to_pixel_space(x):
+        return (x + static_params.max_shape_size) * ppud
+    def fragment_shader_kinetix_circle(position, current_frag, unit_position, uniform):
+        centre, radius, rotation, colour, mask = uniform
+        dist = jnp.sqrt(jnp.square(position - centre).sum())
+        inside = dist <= radius
+        on_edge = dist > radius - 2
+        # TODO - precompute?
+        normal = jnp.array([jnp.sin(rotation), -jnp.cos(rotation)])
+        dist = dist_from_line(position, centre, centre + normal)
+        on_edge |= (dist < 1) & (jnp.dot(normal, position - centre) <= 0)
+        fragment = jax.lax.select(on_edge, jnp.zeros(3), colour)
+        return jax.lax.select(inside & mask, fragment, current_frag)
+    def fragment_shader_kinetix_joint(position, current_frag, unit_position, uniform):
+        texture, colour, mask = uniform
+        tex_coord = (
+            jnp.array(
+                [
+                    joint_tex_size * unit_position[0],
+                    joint_tex_size * unit_position[1],
+                ]
+            )
+            - 0.5
+        )
+        tex_frag = nearest_neighbour(texture, tex_coord)
+        tex_frag = tex_frag.at[3].mul(mask)
+        tex_frag = tex_frag.at[:3].mul(colour / 255.0)
+        tex_frag = (tex_frag[3] * tex_frag[:3]) + ((1.0 - tex_frag[3]) * current_frag)
+        return tex_frag
+    thruster_pixel_size = thruster_tex_size // downscale
+    thruster_pixel_size_diagonal = (thruster_pixel_size * np.sqrt(2)).astype(jnp.int32) + 1
+    def fragment_shader_kinetix_thruster(fragment_position, current_frag, unit_position, uniform):
+        thruster_position, rotation, texture, mask = uniform
+        tex_position = jnp.matmul(rmat(-rotation), (fragment_position - thruster_position)) / thruster_pixel_size + 0.5
+        mask &= (tex_position[0] >= 0) & (tex_position[0] <= 1) & (tex_position[1] >= 0) & (tex_position[1] <= 1)
+        eps = 0.001
+        tex_coord = (
+            jnp.array(
+                [
+                    thruster_tex_size * tex_position[0],
+                    thruster_tex_size * tex_position[1],
+                ]
+            )
+            - 0.5
+            + eps
+        )
+        tex_frag = nearest_neighbour(texture, tex_coord)
+        tex_frag = tex_frag.at[3].mul(mask)
+        tex_frag = (tex_frag[3] * tex_frag[:3]) + ((1.0 - tex_frag[3]) * current_frag)
+        return tex_frag
+    patch_size_1d = static_params.max_shape_size * ppud
+    patch_size = (patch_size_1d, patch_size_1d)
+    circle_renderer = make_renderer(full_screen_size, fragment_shader_kinetix_circle, patch_size, batched=True)
+    quad_renderer = make_renderer(full_screen_size, fragment_shader_edged_quad, patch_size, batched=True)
+    big_quad_renderer = make_renderer(full_screen_size, fragment_shader_edged_quad, downscaled_screen_dim)
+    joint_pixel_size = joint_tex_size // downscale
+    joint_renderer = make_renderer(
+        full_screen_size, fragment_shader_kinetix_joint, (joint_pixel_size, joint_pixel_size), batched=True
+    )
+    thruster_renderer = make_renderer(
+        full_screen_size,
+        fragment_shader_kinetix_thruster,
+        (thruster_pixel_size_diagonal, thruster_pixel_size_diagonal),
+        batched=True,
+    )
+    @jax.jit
+    def render_pixels(state: EnvState):
+        pixels = cleared_screen
+        # Floor
+        floor_uniform = (
+            _world_space_to_pixel_space(state.polygon.position[0, None, :] + state.polygon.vertices[0]),
+            _get_colour(state.polygon_shape_roles[0], 0),
+            jnp.zeros(3),
+            True,
+        )
+        pixels = big_quad_renderer(pixels, _world_space_to_pixel_space(jnp.zeros(2, dtype=jnp.int32)), floor_uniform)
+        # Rectangles
+        rectangle_patch_positions = _world_space_to_pixel_space(
+            state.polygon.position - (static_params.max_shape_size / 2.0)
+        ).astype(jnp.int32)
+        rectangle_rmats = jax.vmap(rmat)(state.polygon.rotation)
+        rectangle_rmats = jnp.repeat(rectangle_rmats[:, None, :, :], repeats=static_params.max_polygon_vertices, axis=1)
+        rectangle_vertices_pixel_space = _world_space_to_pixel_space(
+            state.polygon.position[:, None, :] + jax.vmap(jax.vmap(jnp.matmul))(rectangle_rmats, state.polygon.vertices)
+        )
+        rectangle_colours = jax.vmap(_get_colour)(state.polygon_shape_roles, state.polygon.inverse_mass)
+        rectangle_edge_colours = jnp.zeros((static_params.num_polygons, 3))
+        rectangle_uniforms = (
+            rectangle_vertices_pixel_space,
+            rectangle_colours,
+            rectangle_edge_colours,
+            state.polygon.active,
+        )
+        pixels = quad_renderer(pixels, rectangle_patch_positions, rectangle_uniforms)
+        # Circles
+        circle_positions_pixel_space = _world_space_to_pixel_space(state.circle.position)
+        circle_radii_pixel_space = state.circle.radius * ppud
+        circle_patch_positions = _world_space_to_pixel_space(
+            state.circle.position - (static_params.max_shape_size / 2.0)
+        ).astype(jnp.int32)
+        circle_colours = jax.vmap(_get_colour)(state.circle_shape_roles, state.circle.inverse_mass)
+        circle_uniforms = (
+            circle_positions_pixel_space,
+            circle_radii_pixel_space,
+            state.circle.rotation,
+            circle_colours,
+            state.circle.active,
+        )
+        pixels = circle_renderer(pixels, circle_patch_positions, circle_uniforms)
+        # Joints
+        joint_patch_positions = jnp.round(
+            _world_space_to_pixel_space(state.joint.global_position) - (joint_pixel_size // 2)
+        ).astype(jnp.int32)
+        joint_textures = jax.vmap(jax.lax.select, in_axes=(0, None, None))(
+            state.joint.is_fixed_joint, FJOINT_TEXTURE_6_RGBA, RJOINT_TEXTURE_6_RGBA
+        )
+        joint_colours = JOINT_COLOURS[
+            (state.motor_bindings + 1) * (state.joint.motor_on & (~state.joint.is_fixed_joint))
+        ]
+        joint_uniforms = (joint_textures, joint_colours, state.joint.active)
+        pixels = joint_renderer(pixels, joint_patch_positions, joint_uniforms)
+        # Thrusters
+        thruster_positions = jnp.round(_world_space_to_pixel_space(state.thruster.global_position)).astype(jnp.int32)
+        thruster_patch_positions = thruster_positions - (thruster_pixel_size_diagonal // 2)
+        thruster_textures = coloured_thruster_textures[state.thruster_bindings + 1]
+        thruster_rotations = (
+            state.thruster.rotation
+            + jax.vmap(select_shape, in_axes=(None, 0, None))(
+                state, state.thruster.object_index, static_params
+            ).rotation
+        )
+        thruster_uniforms = (thruster_positions, thruster_rotations, thruster_textures, state.thruster.active)
+        pixels = thruster_renderer(pixels, thruster_patch_positions, thruster_uniforms)
+        # Crop out the sides
+        crop_amount = static_params.max_shape_size * ppud
+        return pixels[crop_amount:-crop_amount, crop_amount:-crop_amount]
+    return render_pixels
+@struct.dataclass
+class PixelsObservation:
+    image: jnp.ndarray
+    global_info: jnp.ndarray
+def make_render_pixels_rl(params, static_params: StaticEnvParams):
+    render_fn = make_render_pixels(params, static_params)
+    def inner(state):
+        pixels = render_fn(state) / 255.0
+        return PixelsObservation(
+            image=pixels,
+            global_info=jnp.array([state.gravity[1] / 10.0]),
+        )
+    return inner

kinetix/render/renderer_symbolic_common.py ADDED Viewed

	@@ -0,0 +1,190 @@

+import jax
+from jax2d.sim_state import RigidBody
+import jax.numpy as jnp
+from kinetix.environment.env_state import EnvParams, EnvState, StaticEnvParams
+def _get_base_shape_features(
+    density: jnp.ndarray, roles: jnp.ndarray, shapes: RigidBody, env_params: EnvParams
+) -> jnp.ndarray:
+    cos = jnp.cos(shapes.rotation)
+    sin = jnp.sin(shapes.rotation)
+    return jnp.concatenate(
+        [
+            shapes.position,
+            shapes.velocity,
+            jnp.expand_dims(shapes.inverse_mass, axis=1),
+            jnp.expand_dims(shapes.inverse_inertia, axis=1),
+            jnp.expand_dims(density, axis=1),
+            jnp.expand_dims(jnp.tanh(shapes.angular_velocity / 10), axis=1),
+            jax.nn.one_hot(roles, env_params.num_shape_roles),
+            jnp.expand_dims(sin, axis=1),
+            jnp.expand_dims(cos, axis=1),
+            jnp.expand_dims(shapes.friction, axis=1),
+            jnp.expand_dims(shapes.restitution, axis=1),
+        ],
+        axis=1,
+    )
+def add_circle_features(
+    base_features: jnp.ndarray, shapes: RigidBody, env_params: EnvParams, static_env_params: StaticEnvParams
+):
+    return jnp.concatenate(
+        [
+            base_features,
+            shapes.radius[:, None],
+            jnp.ones_like(base_features[:, :1]),  # one for circle
+        ],
+        axis=1,
+    )
+def make_circle_features(
+    state: EnvState, env_params: EnvParams, static_env_params: StaticEnvParams
+) -> tuple[jnp.ndarray, jnp.ndarray]:
+    base_features = _get_base_shape_features(state.circle_densities, state.circle_shape_roles, state.circle, env_params)
+    node_features = add_circle_features(base_features, state.circle, env_params, static_env_params)
+    return node_features, state.circle.active
+def add_polygon_features(
+    base_features: jnp.ndarray, shapes: RigidBody, env_params: EnvParams, static_env_params: StaticEnvParams
+):
+    vertices = jnp.where(
+        jnp.arange(static_env_params.max_polygon_vertices)[None, :, None] < shapes.n_vertices[:, None, None],
+        shapes.vertices,
+        jnp.zeros_like(shapes.vertices) - 1,
+    )
+    return jnp.concatenate(
+        [
+            base_features,
+            jnp.zeros_like(base_features[:, :1]),  # zero for polygon
+            vertices.reshape((vertices.shape[0], -1)),
+            jnp.expand_dims((shapes.n_vertices <= 3), axis=1),
+        ],
+        axis=1,
+    )
+def make_polygon_features(
+    state: EnvState, env_params: EnvParams, static_env_params: StaticEnvParams
+) -> tuple[jnp.ndarray, jnp.ndarray]:
+    base_features = _get_base_shape_features(
+        state.polygon_densities, state.polygon_shape_roles, state.polygon, env_params
+    )
+    node_features = add_polygon_features(base_features, state.polygon, env_params, static_env_params)
+    return node_features, state.polygon.active
+def make_unified_shape_features(
+    state: EnvState, env_params: EnvParams, static_env_params: StaticEnvParams
+) -> tuple[jnp.ndarray, jnp.ndarray]:
+    base_p = _get_base_shape_features(state.polygon_densities, state.polygon_shape_roles, state.polygon, env_params)
+    base_c = _get_base_shape_features(state.circle_densities, state.circle_shape_roles, state.circle, env_params)
+    base_p = add_polygon_features(base_p, state.polygon, env_params, static_env_params)
+    base_p = add_circle_features(base_p, state.polygon, env_params, static_env_params)
+    base_c = add_polygon_features(base_c, state.circle, env_params, static_env_params)
+    base_c = add_circle_features(base_c, state.circle, env_params, static_env_params)
+    return jnp.concatenate([base_p, base_c], axis=0), jnp.concatenate(
+        [state.polygon.active, state.circle.active], axis=0
+    )
+def make_joint_features(
+    state: EnvState, env_params: EnvParams, static_env_params: StaticEnvParams
+) -> tuple[jnp.ndarray, jnp.ndarray, jnp.ndarray]:
+    # Returns joint_features, indexes, mask, of shape:
+    # (2 * J, K), (2 * J, 2), (2 * J,)
+    def _create_joint_features(joints):
+        # 2, J, A
+        J = joints.active.shape[0]
+        def _create_1way_joint_features(direction):
+            from_pos = jax.lax.select(direction, joints.a_relative_pos, joints.b_relative_pos)
+            to_pos = jax.lax.select(direction, joints.b_relative_pos, joints.a_relative_pos)
+            rotation_sin, rotation_cos = jnp.sin(joints.rotation), jnp.cos(joints.rotation)
+            rotation_max_sin = jnp.sin(joints.max_rotation) * joints.motor_has_joint_limits
+            rotation_max_cos = jnp.cos(joints.max_rotation) * joints.motor_has_joint_limits
+            rotation_min_sin = jnp.sin(joints.min_rotation) * joints.motor_has_joint_limits
+            rotation_min_cos = jnp.cos(joints.min_rotation) * joints.motor_has_joint_limits
+            rotation_diff_max = (joints.max_rotation - joints.rotation) * joints.motor_has_joint_limits
+            rotation_diff_min = (joints.min_rotation - joints.rotation) * joints.motor_has_joint_limits
+            base_features = jnp.concatenate(
+                [
+                    (joints.active * 1.0)[:, None],
+                    (joints.is_fixed_joint * 1.0)[:, None],  # J, 1
+                    from_pos,
+                    to_pos,
+                    rotation_sin[:, None],
+                    rotation_cos[:, None],
+                ],
+                axis=1,
+            )
+            rjoint_features = (
+                jnp.concatenate(
+                    [
+                        joints.motor_speed[:, None],
+                        joints.motor_power[:, None],
+                        (joints.motor_on * 1.0)[:, None],
+                        (joints.motor_has_joint_limits * 1.0)[:, None],
+                        jax.nn.one_hot(state.motor_bindings, num_classes=static_env_params.num_motor_bindings),
+                        rotation_min_sin[:, None],
+                        rotation_min_cos[:, None],
+                        rotation_max_sin[:, None],
+                        rotation_max_cos[:, None],
+                        rotation_diff_min[:, None],
+                        rotation_diff_max[:, None],
+                    ],
+                    axis=1,
+                )
+                * (1.0 - (joints.is_fixed_joint * 1.0))[:, None]
+            )
+            return jnp.concatenate([base_features, rjoint_features], axis=1)
+        # 2, J, A
+        joint_features = jax.vmap(_create_1way_joint_features)(jnp.array([False, True]))
+        # J, 2
+        indexes_from = jnp.concatenate([joints.b_index[:, None], joints.a_index[:, None]], axis=1)
+        indexes_to = jnp.concatenate([joints.a_index[:, None], joints.b_index[:, None]], axis=1)
+        indexes_from = jnp.where(joints.active[:, None], indexes_from, jnp.zeros_like(indexes_from))
+        indexes_to = jnp.where(joints.active[:, None], indexes_to, jnp.zeros_like(indexes_to))
+        indexes = jnp.concatenate([indexes_from, indexes_to], axis=0)
+        mask = jnp.concatenate([joints.active, joints.active], axis=0)
+        return joint_features.reshape((2 * J, -1)), indexes, mask
+    return _create_joint_features(state.joint)
+def make_thruster_features(
+    state: EnvState, env_params: EnvParams, static_env_params: StaticEnvParams
+) -> tuple[jnp.ndarray, jnp.ndarray, jnp.ndarray]:
+    # Returns thruster_features, indexes, mask, of shape:
+    # (T, K), (T,), (T,)
+    def _create_thruster_features(thrusters):
+        cos = jnp.cos(thrusters.rotation)
+        sin = jnp.sin(thrusters.rotation)
+        return jnp.concatenate(
+            [
+                (thrusters.active * 1.0)[:, None],
+                (thrusters.relative_position),
+                jax.nn.one_hot(state.thruster_bindings, num_classes=static_env_params.num_thruster_bindings),
+                sin[:, None],
+                cos[:, None],
+                thrusters.power[:, None],
+            ],
+            axis=1,
+        )
+    return _create_thruster_features(state.thruster), state.thruster.object_index, state.thruster.active

kinetix/render/renderer_symbolic_entity.py ADDED Viewed

	@@ -0,0 +1,121 @@

+from cmath import rect
+from functools import partial
+import jax
+import jax.numpy as jnp
+from flax import struct
+from jax2d.engine import get_pairwise_interaction_indices
+from kinetix.environment.env_state import EnvState
+from kinetix.render.renderer_symbolic_common import (
+    make_circle_features,
+    make_joint_features,
+    make_polygon_features,
+    make_thruster_features,
+    make_unified_shape_features,
+)
+@struct.dataclass
+class EntityObservation:
+    circles: jnp.ndarray
+    polygons: jnp.ndarray
+    joints: jnp.ndarray
+    thrusters: jnp.ndarray
+    circle_mask: jnp.ndarray
+    polygon_mask: jnp.ndarray
+    joint_mask: jnp.ndarray
+    thruster_mask: jnp.ndarray
+    attention_mask: jnp.ndarray
+    # collision_mask: jnp.ndarray
+    joint_indexes: jnp.ndarray
+    thruster_indexes: jnp.ndarray
+def make_render_entities(params, static_params):
+    _, _, _, circle_circle_pairs, circle_rect_pairs, rect_rect_pairs = get_pairwise_interaction_indices(static_params)
+    circle_rect_pairs = circle_rect_pairs.at[:, 0].add(static_params.num_polygons)
+    circle_circle_pairs = circle_circle_pairs + static_params.num_polygons
+    def render_entities(state: EnvState):
+        state = jax.tree_util.tree_map(lambda x: jnp.nan_to_num(x), state)
+        joint_features, joint_indexes, joint_mask = make_joint_features(state, params, static_params)
+        thruster_features, thruster_indexes, thruster_mask = make_thruster_features(state, params, static_params)
+        poly_nodes, poly_mask = make_polygon_features(state, params, static_params)
+        circle_nodes, circle_mask = make_circle_features(state, params, static_params)
+        def _add_grav(nodes):
+            return jnp.concatenate(
+                [nodes, jnp.zeros((nodes.shape[0], 1)) + state.gravity[1] / 10], axis=-1
+            )  # add gravity to each shape's embedding
+        poly_nodes = _add_grav(poly_nodes)
+        circle_nodes = _add_grav(circle_nodes)
+        # Shape of something like (NPoly + NCircle + 2 * NJoint + NThruster )
+        mask_flat_shapes = jnp.concatenate([poly_mask, circle_mask], axis=0)
+        num_shapes = static_params.num_polygons + static_params.num_circles
+        def make_n_squared_mask(val):
+            # val has shape N of bools.
+            N = val.shape[0]
+            A = jnp.eye(N, N, dtype=bool)  # also have things attend to themselves
+            # Make the shapes fully connected
+            full_mask = A.at[:num_shapes, :num_shapes].set(jnp.ones((num_shapes, num_shapes), dtype=bool))
+            one_hop_connected = jnp.zeros((N, N), dtype=bool)
+            one_hop_connected = one_hop_connected.at[joint_indexes[:, 0], joint_indexes[:, 1]].set(True)
+            one_hop_connected = one_hop_connected.at[0, 0].set(False)  # invalid joints have indices of (0, 0)
+            multi_hop_connected = jnp.logical_not(state.collision_matrix)
+            collision_mask = state.collision_matrix
+            # where val is false, we want to mask out the row and column.
+            full_mask = full_mask & (val[:, None]) & (val[None, :])
+            collision_mask = collision_mask & (val[:, None]) & (val[None, :])
+            multi_hop_connected = multi_hop_connected & (val[:, None]) & (val[None, :])
+            one_hop_connected = one_hop_connected & (val[:, None]) & (val[None, :])
+            collision_manifold_mask = jnp.zeros_like(collision_mask)
+            def _set(collision_manifold_mask, pairs, active):
+                return collision_manifold_mask.at[
+                    pairs[:, 0],
+                    pairs[:, 1],
+                ].set(active)
+            collision_manifold_mask = _set(
+                collision_manifold_mask,
+                rect_rect_pairs,
+                jnp.logical_or(state.acc_rr_manifolds.active[..., 0], state.acc_rr_manifolds.active[..., 1]),
+            )
+            collision_manifold_mask = _set(collision_manifold_mask, circle_rect_pairs, state.acc_cr_manifolds.active)
+            collision_manifold_mask = _set(collision_manifold_mask, circle_circle_pairs, state.acc_cc_manifolds.active)
+            collision_manifold_mask = collision_manifold_mask & (val[:, None]) & (val[None, :])
+            return jnp.concatenate(
+                [full_mask[None], multi_hop_connected[None], one_hop_connected[None], collision_manifold_mask[None]],
+                axis=0,
+            )
+        mask_n_squared = make_n_squared_mask(mask_flat_shapes)
+        return EntityObservation(
+            circles=circle_nodes,
+            polygons=poly_nodes,
+            joints=joint_features,
+            thrusters=thruster_features,
+            circle_mask=circle_mask,
+            polygon_mask=poly_mask,
+            joint_mask=joint_mask,
+            thruster_mask=thruster_mask,
+            attention_mask=mask_n_squared,
+            joint_indexes=joint_indexes,
+            thruster_indexes=thruster_indexes,
+        )
+    return render_entities

kinetix/render/renderer_symbolic_flat.py ADDED Viewed

	@@ -0,0 +1,102 @@

+from functools import partial
+import jax
+import jax.numpy as jnp
+import numpy as np
+from jax2d import joint
+from jax2d.engine import select_shape
+from jax2d.maths import rmat
+from jax2d.sim_state import RigidBody
+from jaxgl.maths import dist_from_line
+from jaxgl.renderer import clear_screen, make_renderer
+from jaxgl.shaders import (
+    fragment_shader_quad,
+    fragment_shader_edged_quad,
+    make_fragment_shader_texture,
+    nearest_neighbour,
+    make_fragment_shader_quad_textured,
+)
+from kinetix.render.renderer_symbolic_common import (
+    make_circle_features,
+    make_joint_features,
+    make_polygon_features,
+    make_thruster_features,
+)
+from kinetix.environment.env_state import StaticEnvParams, EnvParams, EnvState
+from flax import struct
+def make_render_symbolic(params, static_params: StaticEnvParams):
+    def render_symbolic(state):
+        n_polys = static_params.num_polygons
+        nshapes = n_polys + static_params.num_circles
+        polygon_features, polygon_mask = make_polygon_features(state, params, static_params)
+        mask_to_ignore_walls_ceiling = np.ones(static_params.num_polygons, dtype=bool)
+        mask_to_ignore_walls_ceiling[np.array([1, 2, 3])] = False
+        polygon_features = polygon_features[mask_to_ignore_walls_ceiling]
+        polygon_mask = polygon_mask[mask_to_ignore_walls_ceiling]
+        circle_features, circle_mask = make_circle_features(state, params, static_params)
+        joint_features, joint_idxs, joint_mask = make_joint_features(state, params, static_params)
+        thruster_features, thruster_idxs, thruster_mask = make_thruster_features(state, params, static_params)
+        two_J = joint_features.shape[0]
+        J = two_J // 2  # for symbolic only have the one
+        joint_features = jnp.concatenate(
+            [
+                joint_features[:J],  # shape (2 * J, K)
+                jax.nn.one_hot(joint_idxs[:J, 0], nshapes),  # shape (2 * J, N)
+                jax.nn.one_hot(joint_idxs[:J, 1], nshapes),  # shape (2 * J, N)
+            ],
+            axis=1,
+        )
+        thruster_features = jnp.concatenate(
+            [
+                thruster_features,
+                jax.nn.one_hot(thruster_idxs, nshapes),
+            ],
+            axis=1,
+        )
+        polygon_features = jnp.where(polygon_mask[:, None], polygon_features, 0.0).flatten()
+        circle_features = jnp.where(circle_mask[:, None], circle_features, 0.0).flatten()
+        joint_features = jnp.where(joint_mask[:J, None], joint_features, 0.0).flatten()
+        thruster_features = jnp.where(thruster_mask[:, None], thruster_features, 0.0).flatten()
+        def _get_manifold_features(manifold):
+            collision_mask_features = jnp.concatenate(
+                [
+                    manifold.normal,
+                    jnp.expand_dims(manifold.penetration, axis=-1),
+                    manifold.collision_point,
+                    jnp.expand_dims(manifold.acc_impulse_normal, axis=-1),
+                    jnp.expand_dims(manifold.acc_impulse_tangent, axis=-1),
+                ],
+                axis=-1,
+            )
+            return (collision_mask_features * manifold.active[..., None]).flatten()
+        obs = jnp.concatenate(
+            [
+                polygon_features,
+                circle_features,
+                joint_features,
+                thruster_features,
+                jnp.array([state.gravity[1]]) / 10,
+                # _get_manifold_features(state.acc_cc_manifolds),
+                # _get_manifold_features(state.acc_cr_manifolds),
+                # _get_manifold_features(state.acc_rr_manifolds),
+            ],
+            axis=0,
+        )
+        obs = jnp.clip(obs, a_min=-10.0, a_max=10.0)
+        obs = jnp.nan_to_num(obs)
+        return obs
+    return render_symbolic

kinetix/render/textures.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import os
+import pathlib
+from enum import Enum
+import jax.numpy as jnp
+import imageio.v3 as iio
+import numpy as np
+from PIL import Image
+def load_texture(filename, render_size):
+    filename = os.path.join(pathlib.Path(__file__).parent.parent.resolve(), "assets", filename)
+    img = iio.imread(filename)
+    jnp_img = jnp.array(img).astype(jnp.int32)
+    if jnp_img.shape[2] == 4:
+        jnp_img = jnp_img.at[:, :, 3].set(jnp_img[:, :, 3] // 255)
+    img = np.array(jnp_img, dtype=np.uint8)
+    image = Image.fromarray(img)
+    image = image.resize((render_size, render_size), resample=Image.NEAREST)
+    jnp_img = jnp.array(image, dtype=jnp.float32)
+    return jnp_img.transpose((1, 0, 2))
+EDIT_TEXTURE_RGBA = load_texture("edit.png", 64)
+PLAY_TEXTURE_RGBA = load_texture("play.png", 64)
+CIRCLE_TEXTURE_RGBA = load_texture("circle.png", 32)
+RECT_TEXTURE_RGBA = load_texture("square.png", 32)
+TRIANGLE_TEXTURE_RGBA = load_texture("triangle.png", 32)
+RJOINT_TEXTURE_6_RGBA = load_texture("rjoint.png", 6)
+RJOINT_TEXTURE_RGBA = load_texture("rjoint2.png", 32)
+FJOINT_TEXTURE_6_RGBA = load_texture("fjoint.png", 6)
+FJOINT_TEXTURE_RGBA = load_texture("fjoint2.png", 32)
+ROTATION_TEXTURE_RGBA = load_texture("rotate.png", 32)
+SELECT_TEXTURE_RGBA = load_texture("hand.png", 32)
+THRUSTER_TEXTURE_RGBA = jnp.rot90(load_texture("thruster6.png", 32), k=3)
+THRUSTER_TEXTURE_16_RGBA = jnp.rot90(load_texture("thruster.png", 16), k=3)

kinetix/util/__init__.py ADDED Viewed

File without changes

kinetix/util/config.py ADDED Viewed

	@@ -0,0 +1,229 @@

+import copy
+import datetime
+import gzip
+import json
+import os
+from hashlib import md5
+import jax
+import jax.numpy as jnp
+import numpy as np
+from numpy import isin
+from kinetix.environment.ued.ued_state import UEDParams
+from omegaconf import OmegaConf
+from pandas import isna
+from typing import List, Tuple
+import wandb
+from kinetix.environment.env_state import EnvParams, StaticEnvParams
+from collections import defaultdict
+from kinetix.util.saving import load_from_json_file
+def get_hash_without_seed(config):
+    old_seed = config["seed"]
+    config["seed"] = 0
+    ans = md5(OmegaConf.to_yaml(config, sort_keys=True).encode()).hexdigest()
+    config["seed"] = old_seed
+    return ans
+def get_date() -> str:
+    return datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
+def generate_params_from_config(config):
+    if config.get("env_size_type", "predefined") == "custom":
+        # must load env params from a file
+        _, static_env_params, env_params = load_from_json_file(os.path.join("worlds", config["custom_path"]))
+        return env_params, static_env_params.replace(
+            frame_skip=config["frame_skip"],
+        )
+    env_params = EnvParams()
+    static_env_params = StaticEnvParams().replace(
+        num_polygons=config["num_polygons"],
+        num_circles=config["num_circles"],
+        num_joints=config["num_joints"],
+        num_thrusters=config["num_thrusters"],
+        frame_skip=config["frame_skip"],
+        num_motor_bindings=config["num_motor_bindings"],
+        num_thruster_bindings=config["num_thruster_bindings"],
+    )
+    return env_params, static_env_params
+def generate_ued_params_from_config(config) -> UEDParams:
+    ans = UEDParams()
+    if config["env_size_name"] == "s":
+        ans = ans.replace(add_shape_n_proposals=1)  # otherwise we get a very weird XLA bug.
+    if "fixate_chance_max" in config:
+        print("Changing fixate chance max to", config["fixate_chance_max"])
+        ans = ans.replace(fixate_chance_max=config["fixate_chance_max"])
+    return ans
+def get_eval_level_groups(eval_levels: List[str]) -> List[Tuple[str, str]]:
+    def get_groups(s):
+        # This is the size group
+        group_one = s.split("/")[0]
+        group_two = s.split("/")[1].split("_")[0]
+        group_two = "".join([i for i in group_two if not i.isdigit()])
+        if group_two == "h":
+            group_two = "handmade"
+        if group_two == "r":
+            group_two = "random"
+        return f"{group_one}_all", f"{group_one}_{group_two}"
+    indices = defaultdict(list)
+    for idx, s in enumerate(eval_levels):
+        groups = get_groups(s)
+        for group in groups:
+            indices[group].append(idx)
+    indices2 = {}
+    for g in indices:
+        indices2[g] = np.array(indices[g])
+    return indices2
+def normalise_config(config, name, editor_config=False):
+    old_config = copy.deepcopy(config)
+    keys = ["env", "learning", "model", "misc", "eval", "ued", "env_size", "train_levels"]
+    for k in keys:
+        if k not in config:
+            config[k] = {}
+        small_d = config[k]
+        del config[k]
+        for kk, vv in small_d.items():
+            assert kk not in config, kk
+            config[kk] = vv
+    if not editor_config:
+        config["eval_env_size_true"] = config["eval_env_size"]
+        if config["num_train_envs"] == 2048 and "Pixels" in config["env_name"]:
+            config["num_train_envs"] = 512
+        if "SFL" in name and config["env_size_name"] in ["m", "l"]:
+            config["eval_num_attempts"] = 6  # to avoid a very weird XLA bug.
+        config["hash"] = get_hash_without_seed(config)
+        config["random_hash"] = np.random.randint(2**31)
+        config["log_save_path"] = f"logs/{config['hash']}/{config['seed']}-{get_date()}"
+        os.makedirs(config["log_save_path"], exist_ok=True)
+        with open(f"{config['log_save_path']}/config.yaml", "w") as f:
+            f.write(OmegaConf.to_yaml(old_config))
+        if config["group"] == "auto":
+            config["group"] = f"{name}-" + config["group_auto_prefix"] + config["env_name"].replace("Kinetix-", "")
+            config["group"] += "-" + str(config["env_size_name"])
+        if config["eval_levels"] == ["auto"] or config["eval_levels"] == "auto":
+            config["eval_levels"] = config["train_levels_list"]
+            print("Using Auto eval levels:", config["eval_levels"])
+        config["num_eval_levels"] = len(config["eval_levels"])
+        steps = (
+            config["num_steps"]
+            * config.get("outer_rollout_steps", 1)
+            * config["num_train_envs"]
+            * (2 if name == "PAIRED" else 1)
+        )
+        config["num_updates"] = int(config["total_timesteps"]) // steps
+        nsteps = int(config["total_timesteps"] // 1e6)
+        letter = "M"
+        if nsteps >= 1000:
+            nsteps = nsteps // 1000
+            letter = "B"
+        config["run_name"] = (
+            config["env_name"] + f"-{name}-" + str(nsteps) + letter + "-" + str(config["num_train_envs"])
+        )
+        if config["checkpoint_save_freq"] >= config["num_updates"]:
+            config["checkpoint_save_freq"] = config["num_updates"]
+    return config
+def get_tags(config, name):
+    return [name]
+    tags = [name]
+    if name in ["PLR", "ACCEL", "DR"]:
+        if config["use_accel"]:
+            tags.append("ACCEL")
+        else:
+            tags.append("PLR")
+    return tags
+def init_wandb(config, name) -> wandb.run:
+    run = wandb.init(
+        config=config,
+        project=config["wandb_project"],
+        group=config["group"],
+        name=config["run_name"],
+        entity=config["wandb_entity"],
+        mode=config["wandb_mode"],
+        tags=get_tags(config, name),
+    )
+    wandb.define_metric("timing/num_updates")
+    wandb.define_metric("timing/num_env_steps")
+    wandb.define_metric("*", step_metric="timing/num_env_steps")
+    wandb.define_metric("timing/sps", step_metric="timing/num_env_steps")
+    return run
+def save_data_to_local_file(data_to_save, config):
+    if not config.get("save_local_data", False):
+        return
+    def reverse_in(li, value):
+        for i, v in enumerate(li):
+            if v in value:
+                return True
+        return False
+    clean_data = {k: v for k, v in data_to_save.items() if not reverse_in(["media/", "images/"], k)}
+    def _clean(x):
+        if isinstance(x, jnp.ndarray):
+            return x.tolist()
+        elif isinstance(x, jnp.float32):
+            if jnp.isnan(x):
+                return -float("inf")
+            return round(float(x) * 1000) / 1000
+        elif isinstance(x, jnp.int32):
+            return int(x)
+        return x
+    clean_data = jax.tree_map(lambda x: _clean(x), clean_data)
+    print("Saving this data:", clean_data)
+    with open(f"{config['log_save_path']}/data.jsonl", "a+") as f:
+        f.write(json.dumps(clean_data) + "\n")
+def compress_log_files_after_run(config):
+    fpath = f"{config['log_save_path']}/data.jsonl"
+    with open(fpath, "rb") as f_in, gzip.open(fpath + ".gz", "wb") as f_out:
+        f_out.writelines(f_in)
+def get_video_frequency(config, update_step):
+    frac_through_training = update_step / config["num_updates"]
+    vid_frequency = (
+        config["eval_freq"]
+        * config["video_frequency"]
+        * jax.lax.select(
+            (0.1 <= frac_through_training) & (frac_through_training < 0.3),
+            1,
+            jax.lax.select(
+                (0.3 <= frac_through_training) & (frac_through_training < 0.6),
+                2,
+                4,
+            ),
+        )
+    )
+    return vid_frequency

kinetix/util/learning.py ADDED Viewed

	@@ -0,0 +1,565 @@

+from functools import partial
+import json
+import os
+import re
+import time
+from enum import IntEnum
+from typing import Tuple
+import chex
+import jax
+import jax.numpy as jnp
+import numpy as np
+import optax
+import orbax.checkpoint as ocp
+from flax import core, struct
+from flax.training.train_state import TrainState as BaseTrainState
+import wandb
+from jaxued.environments.underspecified_env import EnvParams, EnvState, Observation, UnderspecifiedEnv
+from jaxued.level_sampler import LevelSampler
+from jaxued.utils import compute_max_returns, max_mc, positive_value_loss
+from kinetix.environment.env import PixelObservations, make_kinetix_env_from_name
+from kinetix.environment.env_state import StaticEnvParams
+from kinetix.environment.utils import permute_pcg_state
+from kinetix.environment.wrappers import (
+    UnderspecifiedToGymnaxWrapper,
+    LogWrapper,
+    DenseRewardWrapper,
+    AutoReplayWrapper,
+)
+from kinetix.models import make_network_from_config
+from kinetix.pcg.pcg import env_state_to_pcg_state
+from kinetix.render.renderer_pixels import make_render_pixels
+from kinetix.models.actor_critic import ScannedRNN
+from kinetix.util.saving import (
+    expand_pcg_state,
+    get_pcg_state_from_json,
+    load_pcg_state_pickle,
+    load_world_state_pickle,
+    stack_list_of_pytrees,
+    import_env_state_from_json,
+    load_from_json_file,
+)
+from flax.training.train_state import TrainState
+BASE_DIR = "worlds"
+DEFAULT_EVAL_LEVELS = [
+    "easy.cartpole",
+    "easy.flappy_bird",
+    "easy.unicycle",
+    "easy.car_left",
+    "easy.car_right",
+    "easy.pinball",
+    "easy.swing_up",
+    "easy.thruster",
+]
+def get_eval_levels(eval_levels, static_env_params):
+    should_permute = [".permute" in l for l in eval_levels]
+    eval_levels = [re.sub(r"\.permute\d+", "", l) for l in eval_levels]
+    ls = [get_pcg_state_from_json(os.path.join(BASE_DIR, l + ("" if l.endswith(".json") else ".json"))) for l in eval_levels]
+    ls = [expand_pcg_state(l, static_env_params) for l in ls]
+    new_ls = []
+    rng = jax.random.PRNGKey(0)
+    for sp, l in zip(should_permute, ls):
+        rng, _rng = jax.random.split(rng)
+        if sp:
+            l = permute_pcg_state(_rng, l, static_env_params)
+        new_ls.append(l)
+    return stack_list_of_pytrees(new_ls)
+def evaluate_rnn(  # from jaxued
+    rng: chex.PRNGKey,
+    env: UnderspecifiedEnv,
+    env_params: EnvParams,
+    train_state: TrainState,
+    init_hstate: chex.ArrayTree,
+    init_obs: Observation,
+    init_env_state: EnvState,
+    max_episode_length: int,
+    keep_states=True,
+    return_trajectories=False,
+) -> Tuple[chex.Array, chex.Array, chex.Array]:
+    """This runs the RNN on the environment, given an initial state and observation, and returns (states, rewards, episode_lengths)
+    Args:
+        rng (chex.PRNGKey):
+        env (UnderspecifiedEnv):
+        env_params (EnvParams):
+        train_state (TrainState):
+        init_hstate (chex.ArrayTree): Shape (num_levels, )
+        init_obs (Observation): Shape (num_levels, )
+        init_env_state (EnvState): Shape (num_levels, )
+        max_episode_length (int):
+    Returns:
+        Tuple[chex.Array, chex.Array, chex.Array]: (States, rewards, episode lengths) ((NUM_STEPS, NUM_LEVELS), (NUM_STEPS, NUM_LEVELS), (NUM_LEVELS,)
+    """
+    num_levels = jax.tree_util.tree_flatten(init_obs)[0][0].shape[0]
+    def step(carry, _):
+        rng, hstate, obs, state, done, mask, episode_length = carry
+        rng, rng_action, rng_step = jax.random.split(rng, 3)
+        x = jax.tree.map(lambda x: x[None, ...], (obs, done))
+        hstate, pi, _ = train_state.apply_fn(train_state.params, hstate, x)
+        action = pi.sample(seed=rng_action).squeeze(0)
+        obs, next_state, reward, done, info = jax.vmap(env.step, in_axes=(0, 0, 0, None))(
+            jax.random.split(rng_step, num_levels), state, action, env_params
+        )
+        next_mask = mask & ~done
+        episode_length += mask
+        if keep_states:
+            return (rng, hstate, obs, next_state, done, next_mask, episode_length), (state, reward, done, info)
+        else:
+            return (rng, hstate, obs, next_state, done, next_mask, episode_length), (None, reward, done, info)
+    (_, _, _, _, _, _, episode_lengths), (states, rewards, dones, infos) = jax.lax.scan(
+        step,
+        (
+            rng,
+            init_hstate,
+            init_obs,
+            init_env_state,
+            jnp.zeros(num_levels, dtype=bool),
+            jnp.ones(num_levels, dtype=bool),
+            jnp.zeros(num_levels, dtype=jnp.int32),
+        ),
+        None,
+        length=max_episode_length,
+    )
+    done_idx = jnp.argmax(dones, axis=0)
+    to_return = (states, rewards, done_idx, episode_lengths, infos)
+    if return_trajectories:
+        return to_return, (dones, rewards)
+    return to_return
+def general_eval(
+    rng: chex.PRNGKey,
+    eval_env: UnderspecifiedEnv,
+    env_params: EnvParams,
+    train_state: TrainState,
+    levels: EnvState,
+    num_eval_steps: int,
+    num_levels: int,
+    keep_states=True,
+    return_trajectories=False,
+):
+    """
+    This evaluates the current policy on the set of evaluation levels
+    It returns (states, cum_rewards, episode_lengths), with shapes (num_steps, num_eval_levels, ...), (num_eval_levels,), (num_eval_levels,)
+    """
+    rng, rng_reset = jax.random.split(rng)
+    init_obs, init_env_state = jax.vmap(eval_env.reset_to_level, (0, 0, None))(
+        jax.random.split(rng_reset, num_levels), levels, env_params
+    )
+    init_hstate = ScannedRNN.initialize_carry(num_levels)
+    (states, rewards, done_idx, episode_lengths, infos), (dones, reward) = evaluate_rnn(
+        rng,
+        eval_env,
+        env_params,
+        train_state,
+        init_hstate,
+        init_obs,
+        init_env_state,
+        num_eval_steps,
+        keep_states=keep_states,
+        return_trajectories=True,
+    )
+    mask = jnp.arange(num_eval_steps)[..., None] < episode_lengths
+    cum_rewards = (rewards * mask).sum(axis=0)
+    to_return = (
+        states,
+        cum_rewards,
+        done_idx,
+        episode_lengths,
+        infos,
+    )  # (num_steps, num_eval_levels, ...), (num_eval_levels,), (num_eval_levels,)
+    if return_trajectories:
+        return to_return, (dones, reward)
+    return to_return
+def compute_gae(
+    gamma: float,
+    lambd: float,
+    last_value: chex.Array,
+    values: chex.Array,
+    rewards: chex.Array,
+    dones: chex.Array,
+) -> Tuple[chex.Array, chex.Array]:
+    """This takes in arrays of shape (NUM_STEPS, NUM_ENVS) and returns the advantages and targets.
+    Args:
+        gamma (float):
+        lambd (float):
+        last_value (chex.Array):  Shape (NUM_ENVS)
+        values (chex.Array): Shape (NUM_STEPS, NUM_ENVS)
+        rewards (chex.Array): Shape (NUM_STEPS, NUM_ENVS)
+        dones (chex.Array): Shape (NUM_STEPS, NUM_ENVS)
+    Returns:
+        Tuple[chex.Array, chex.Array]: advantages, targets; each of shape (NUM_STEPS, NUM_ENVS)
+    """
+    def compute_gae_at_timestep(carry, x):
+        gae, next_value = carry
+        value, reward, done = x
+        delta = reward + gamma * next_value * (1 - done) - value
+        gae = delta + gamma * lambd * (1 - done) * gae
+        return (gae, value), gae
+    _, advantages = jax.lax.scan(
+        compute_gae_at_timestep,
+        (jnp.zeros_like(last_value), last_value),
+        (values, rewards, dones),
+        reverse=True,
+        unroll=16,
+    )
+    return advantages, advantages + values
+def sample_trajectories_rnn(
+    rng: chex.PRNGKey,
+    env: UnderspecifiedEnv,
+    env_params: EnvParams,
+    train_state: TrainState,
+    init_hstate: chex.ArrayTree,
+    init_obs: Observation,
+    init_env_state: EnvState,
+    num_envs: int,
+    max_episode_length: int,
+    return_states: bool = False,
+) -> Tuple[
+    Tuple[chex.PRNGKey, TrainState, chex.ArrayTree, Observation, EnvState, chex.Array],
+    Tuple[Observation, chex.Array, chex.Array, chex.Array, chex.Array, chex.Array, dict],
+]:
+    """This samples trajectories from the environment using the agent specified by the `train_state`.
+    Args:
+        rng (chex.PRNGKey): Singleton
+        env (UnderspecifiedEnv):
+        env_params (EnvParams):
+        train_state (TrainState): Singleton
+        init_hstate (chex.ArrayTree): This is the init RNN hidden state, has to have shape (NUM_ENVS, ...)
+        init_obs (Observation): The initial observation, shape (NUM_ENVS, ...)
+        init_env_state (EnvState): The initial env state (NUM_ENVS, ...)
+        num_envs (int): The number of envs that are vmapped over.
+        max_episode_length (int): The maximum episode length, i.e., the number of steps to do the rollouts for.
+    Returns:
+        Tuple[Tuple[chex.PRNGKey, TrainState, chex.ArrayTree, Observation, EnvState, chex.Array], Tuple[Observation, chex.Array, chex.Array, chex.Array, chex.Array, chex.Array, dict]]: (rng, train_state, hstate, last_obs, last_env_state, last_value), traj, where traj is (obs, action, reward, done, log_prob, value, info). The first element in the tuple consists of arrays that have shapes (NUM_ENVS, ...) (except `rng` and and `train_state` which are singleton). The second element in the tuple is of shape (NUM_STEPS, NUM_ENVS, ...), and it contains the trajectory.
+    """
+    def sample_step(carry, _):
+        rng, train_state, hstate, obs, env_state, last_done = carry
+        prev_state = env_state
+        rng, rng_action, rng_step = jax.random.split(rng, 3)
+        x = jax.tree.map(lambda x: x[None, ...], (obs, last_done))
+        hstate, pi, value = train_state.apply_fn(train_state.params, hstate, x)
+        action = pi.sample(seed=rng_action)
+        log_prob = pi.log_prob(action)
+        value, action, log_prob = jax.tree.map(lambda x: x.squeeze(0), (value, action, log_prob))
+        next_obs, env_state, reward, done, info = jax.vmap(env.step, in_axes=(0, 0, 0, None))(
+            jax.random.split(rng_step, num_envs), env_state, action, env_params
+        )
+        carry = (rng, train_state, hstate, next_obs, env_state, done)
+        step = (obs, action, reward, done, log_prob, value, info)
+        if return_states:
+            step += (prev_state,)
+        return carry, step
+    (rng, train_state, hstate, last_obs, last_env_state, last_done), traj = jax.lax.scan(
+        sample_step,
+        (
+            rng,
+            train_state,
+            init_hstate,
+            init_obs,
+            init_env_state,
+            jnp.zeros(num_envs, dtype=bool),
+        ),
+        None,
+        length=max_episode_length,
+    )
+    x = jax.tree.map(lambda x: x[None, ...], (last_obs, last_done))
+    _, _, last_value = train_state.apply_fn(train_state.params, hstate, x)
+    my_obs = traj[0]
+    rew = traj[2]
+    return (rng, train_state, hstate, last_obs, last_env_state, last_value.squeeze(0)), traj
+def update_actor_critic_rnn(
+    rng: chex.PRNGKey,
+    train_state: TrainState,
+    init_hstate: chex.ArrayTree,
+    batch: chex.ArrayTree,
+    num_envs: int,
+    n_steps: int,
+    n_minibatch: int,
+    n_epochs: int,
+    clip_eps: float,
+    entropy_coeff: float,
+    critic_coeff: float,
+    update_grad: bool = True,
+) -> Tuple[Tuple[chex.PRNGKey, TrainState], chex.ArrayTree]:
+    """This function takes in a rollout, and PPO hyperparameters, and updates the train state.
+    Args:
+        rng (chex.PRNGKey):
+        train_state (TrainState):
+        init_hstate (chex.ArrayTree):
+        batch (chex.ArrayTree): obs, actions, dones, log_probs, values, targets, advantages
+        num_envs (int):
+        n_steps (int):
+        n_minibatch (int):
+        n_epochs (int):
+        clip_eps (float):
+        entropy_coeff (float):
+        critic_coeff (float):
+        update_grad (bool, optional): If False, the train state does not actually get updated. Defaults to True.
+    Returns:
+        Tuple[Tuple[chex.PRNGKey, TrainState], chex.ArrayTree]: It returns a new rng, the updated train_state, and the losses. The losses have structure (loss, (l_vf, l_clip, entropy))
+    """
+    obs, actions, dones, log_probs, values, targets, advantages = batch
+    last_dones = jnp.roll(dones, 1, axis=0).at[0].set(False)
+    batch = obs, actions, last_dones, log_probs, values, targets, advantages
+    def update_epoch(carry, _):
+        def update_minibatch(train_state, minibatch):
+            init_hstate, obs, actions, last_dones, log_probs, values, targets, advantages = minibatch
+            def loss_fn(params):
+                _, pi, values_pred = train_state.apply_fn(params, init_hstate, (obs, last_dones))
+                log_probs_pred = pi.log_prob(actions)
+                entropy = pi.entropy().mean()
+                ratio = jnp.exp(log_probs_pred - log_probs)
+                A = (advantages - advantages.mean()) / (advantages.std() + 1e-8)
+                l_clip = (-jnp.minimum(ratio * A, jnp.clip(ratio, 1 - clip_eps, 1 + clip_eps) * A)).mean()
+                values_pred_clipped = values + (values_pred - values).clip(-clip_eps, clip_eps)
+                l_vf = 0.5 * jnp.maximum((values_pred - targets) ** 2, (values_pred_clipped - targets) ** 2).mean()
+                loss = l_clip + critic_coeff * l_vf - entropy_coeff * entropy
+                return loss, (l_vf, l_clip, entropy)
+            grad_fn = jax.value_and_grad(loss_fn, has_aux=True)
+            loss, grads = grad_fn(train_state.params)
+            if update_grad:
+                train_state = train_state.apply_gradients(grads=grads)
+            grad_norm = jnp.linalg.norm(
+                jnp.concatenate(jax.tree_util.tree_map(lambda x: x.flatten(), jax.tree_util.tree_flatten(grads)[0]))
+            )
+            return train_state, (loss, grad_norm)
+        rng, train_state = carry
+        rng, rng_perm = jax.random.split(rng)
+        permutation = jax.random.permutation(rng_perm, num_envs)
+        minibatches = (
+            jax.tree.map(
+                lambda x: jnp.take(x, permutation, axis=0).reshape(n_minibatch, -1, *x.shape[1:]),
+                init_hstate,
+            ),
+            *jax.tree.map(
+                lambda x: jnp.take(x, permutation, axis=1)
+                .reshape(x.shape[0], n_minibatch, -1, *x.shape[2:])
+                .swapaxes(0, 1),
+                batch,
+            ),
+        )
+        train_state, (losses, grads) = jax.lax.scan(update_minibatch, train_state, minibatches)
+        return (rng, train_state), (losses, grads)
+    return jax.lax.scan(update_epoch, (rng, train_state), None, n_epochs)
+@partial(jax.jit, static_argnums=(0, 2, 8, 9))
+def sample_trajectories_and_learn(
+    env: UnderspecifiedEnv,
+    env_params: EnvParams,
+    config: dict,
+    rng: chex.PRNGKey,
+    train_state: TrainState,
+    init_hstate: chex.Array,
+    init_obs: Observation,
+    init_env_state: EnvState,
+    update_grad: bool = True,
+    return_states: bool = False,
+) -> Tuple[
+    Tuple[chex.PRNGKey, TrainState, Observation, EnvState],
+    Tuple[
+        Observation,
+        chex.Array,
+        chex.Array,
+        chex.Array,
+        chex.Array,
+        chex.Array,
+        dict,
+        chex.Array,
+        chex.Array,
+        chex.ArrayTree,
+        chex.Array,
+    ],
+]:
+    """This function loops the following:
+        - rollout for config['num_steps']
+        - learn / update policy
+    And it loops it for config['outer_rollout_steps'].
+    What is returns is a new carry (rng, train_state, init_obs, init_env_state), and concatenated rollouts. The shape of the rollouts are config['num_steps'] * config['outer_rollout_steps']. In other words, the trajectories returned by this function are the same as if we ran rollouts for config['num_steps'] * config['outer_rollout_steps'] steps, but the agent does perform PPO updates in between.
+    Args:
+        env (UnderspecifiedEnv):
+        env_params (EnvParams):
+        config (dict):
+        rng (chex.PRNGKey):
+        train_state (TrainState):
+        init_obs (Observation):
+        init_env_state (EnvState):
+        update_grad (bool, optional): Defaults to True.
+    Returns:
+        Tuple[Tuple[chex.PRNGKey, TrainState, Observation, EnvState], Tuple[Observation, chex.Array, chex.Array, chex.Array, chex.Array, chex.Array, dict, chex.Array, chex.Array, chex.ArrayTree, chex.Array]]: This returns a tuple:
+        (
+            (rng, train_state, init_obs, init_env_state),
+            (obs, actions, rewards, dones, log_probs, values, info, advantages, targets, losses, grads)
+        )
+    """
+    def single_step(carry, _):
+        rng, train_state, init_hstate, init_obs, init_env_state = carry
+        ((rng, train_state, new_hstate, last_obs, last_env_state, last_value), traj,) = sample_trajectories_rnn(
+            rng,
+            env,
+            env_params,
+            train_state,
+            init_hstate,
+            init_obs,
+            init_env_state,
+            config["num_train_envs"],
+            config["num_steps"],
+            return_states=return_states,
+        )
+        if return_states:
+            states = traj[-1]
+            traj = traj[:-1]
+        (obs, actions, rewards, dones, log_probs, values, info) = traj
+        advantages, targets = compute_gae(config["gamma"], config["gae_lambda"], last_value, values, rewards, dones)
+        # Update the policy using trajectories collected from replay levels
+        (rng, train_state), (losses, grads) = update_actor_critic_rnn(
+            rng,
+            train_state,
+            init_hstate,
+            (obs, actions, dones, log_probs, values, targets, advantages),
+            config["num_train_envs"],
+            config["num_steps"],
+            config["num_minibatches"],
+            config["update_epochs"],
+            config["clip_eps"],
+            config["ent_coef"],
+            config["vf_coef"],
+            update_grad=update_grad,
+        )
+        new_carry = (rng, train_state, new_hstate, last_obs, last_env_state)
+        step = (obs, actions, rewards, dones, log_probs, values, info, advantages, targets, losses, grads)
+        if return_states:
+            step += (states,)
+        return new_carry, step
+    carry = (rng, train_state, init_hstate, init_obs, init_env_state)
+    new_carry, all_rollouts = jax.lax.scan(single_step, carry, None, length=config["outer_rollout_steps"])
+    all_rollouts = jax.tree_util.tree_map(lambda x: jnp.concatenate(x, axis=0), all_rollouts)
+    return new_carry, all_rollouts
+def no_op_rollout(
+    env: UnderspecifiedEnv,
+    env_params: EnvParams,
+    rng: chex.PRNGKey,
+    init_obs: Observation,
+    init_env_state: EnvState,
+    num_envs: int,
+    max_episode_length: int,
+    do_random=False,
+):
+    noop = jnp.array(env.action_type.noop_action())
+    zero_action = jnp.repeat(noop[None, ...], num_envs, axis=0)
+    SHAPE = zero_action.shape
+    def sample_step(carry, _):
+        rng, obs, env_state, last_done = carry
+        rng, rng_step, _rng = jax.random.split(rng, 3)
+        if do_random:
+            action = jax.vmap(env.action_space(env_params).sample)(jax.random.split(_rng, num_envs))
+        else:
+            action = zero_action
+        next_obs, env_state, reward, done, info = jax.vmap(env.step, in_axes=(0, 0, 0, None))(
+            jax.random.split(rng_step, num_envs), env_state, action, env_params
+        )
+        carry = (rng, next_obs, env_state, done)
+        return carry, (obs, action, reward, done, info)
+    (rng, last_obs, last_env_state, last_done), traj = jax.lax.scan(
+        sample_step,
+        (
+            rng,
+            init_obs,
+            init_env_state,
+            jnp.zeros(num_envs, dtype=bool),
+        ),
+        None,
+        length=max_episode_length,
+    )
+    info = traj[-1]
+    dones = traj[-2]
+    returns_per_env = (info["returned_episode_returns"] * dones).sum(axis=0) / jnp.maximum(1, dones.sum(axis=0))
+    lens_per_env = (info["returned_episode_lengths"] * dones).sum(axis=0) / jnp.maximum(1, dones.sum(axis=0))
+    success_per_env = (info["returned_episode_solved"] * dones).sum(axis=0) / jnp.maximum(1, dones.sum(axis=0))
+    return returns_per_env, lens_per_env, success_per_env
+def no_op_and_random_rollout(
+    env: UnderspecifiedEnv,
+    env_params: EnvParams,
+    rng: chex.PRNGKey,
+    init_obs: Observation,
+    init_env_state: EnvState,
+    num_envs: int,
+    max_episode_length: int,
+):
+    returns_noop, lens_noop, success_noop = no_op_rollout(
+        env, env_params, rng, init_obs, init_env_state, num_envs, max_episode_length, do_random=False
+    )
+    returns_random, lens_random, success_random = no_op_rollout(
+        env, env_params, rng, init_obs, init_env_state, num_envs, max_episode_length, do_random=True
+    )
+    return returns_noop, lens_noop, success_noop, returns_random, lens_random, success_random

kinetix/util/saving.py ADDED Viewed

	@@ -0,0 +1,540 @@

+import json
+import os
+import pickle
+from typing import Any, Dict, Union
+import flax.serialization
+import flax.serialization
+import flax.serialization
+import flax.serialization
+import flax.serialization
+import flax.serialization
+import flax.serialization
+import jax
+import jax.numpy as jnp
+import flax
+import wandb
+from jax2d.engine import (
+    calculate_collision_matrix,
+    get_empty_collision_manifolds,
+    get_pairwise_interaction_indices,
+    recalculate_mass_and_inertia,
+)
+from jax2d.sim_state import RigidBody, SimState
+from kinetix.environment.env_state import EnvState, StaticEnvParams, EnvParams
+from flax.traverse_util import flatten_dict, unflatten_dict
+from safetensors.flax import save_file, load_file
+from kinetix.pcg.pcg import env_state_to_pcg_state
+from kinetix.pcg.pcg_state import PCGState
+import bz2
+def check_if_mass_and_inertia_are_correct(state: SimState, env_params: EnvParams, static_params):
+    new = recalculate_mass_and_inertia(state, static_params, state.polygon_densities, state.circle_densities)
+    def _check(a, b, shape, name):
+        a = jnp.where(shape.active, a, jnp.zeros_like(a))
+        b = jnp.where(shape.active, b, jnp.zeros_like(b))
+        if not jnp.allclose(a, b):
+            idxs = jnp.arange(len(shape.active))[(a != b) & shape.active]
+            new_one = a[idxs]
+            old_one = b[idxs]
+            raise ValueError(
+                f"Error: {name} is not the same after loading. Indexes {idxs} are incorrect. New = {new_one} | Before = {old_one}"
+            )
+    _check(new.polygon.inverse_mass, state.polygon.inverse_mass, state.polygon, "Polygon inverse mass")
+    _check(new.circle.inverse_mass, state.circle.inverse_mass, state.circle, "Circle inverse mass")
+    _check(new.polygon.inverse_inertia, state.polygon.inverse_inertia, state.polygon, "Polygon inverse inertia")
+    _check(new.circle.inverse_inertia, state.circle.inverse_inertia, state.circle, "Circle inverse inertia")
+    return True
+def save_pickle(filename, state):
+    with open(filename, "wb") as f:
+        pickle.dump(state, f)
+def load_pcg_state_pickle(filename):
+    with open(filename, "rb") as f:
+        return pickle.load(f)
+def expand_env_state(env_state: EnvState, static_env_params: StaticEnvParams, ignore_collision_matrix=False):
+    num_rects = len(env_state.polygon.position)
+    num_circles = len(env_state.circle.position)
+    num_joints = len(env_state.joint.a_index)
+    num_thrusters = len(env_state.thruster.object_index)
+    def _add_dummy(num_to_add, obj):
+        return jax.tree_map(
+            lambda current: jnp.concatenate(
+                [current, jnp.zeros((num_to_add, *current.shape[1:]), dtype=current.dtype)], axis=0
+            ),
+            obj,
+        )
+    does_need_to_change = False
+    added_rects = 0
+    if (
+        num_rects > static_env_params.num_polygons
+        or num_circles > static_env_params.num_circles
+        or num_joints > static_env_params.num_joints
+    ):
+        raise Exception(
+            f"The current static_env_params is too small to accommodate the loaded env_state (needs num_rects={num_rects}, num_circles={num_circles}, num_joints={num_joints} but current is {static_env_params.num_polygons}, {static_env_params.num_circles}, {static_env_params.num_joints})."
+        )
+    if num_rects < static_env_params.num_polygons:
+        added_rects = static_env_params.num_polygons - num_rects
+        does_need_to_change = True
+        env_state = env_state.replace(
+            polygon=_add_dummy(added_rects, env_state.polygon),
+            polygon_shape_roles=_add_dummy(added_rects, env_state.polygon_shape_roles),
+            polygon_highlighted=_add_dummy(added_rects, env_state.polygon_highlighted),
+            polygon_densities=_add_dummy(added_rects, env_state.polygon_densities),
+        )
+    if num_circles < static_env_params.num_circles:
+        does_need_to_change = True
+        n_to_add = static_env_params.num_circles - num_circles
+        env_state = env_state.replace(
+            circle=_add_dummy(n_to_add, env_state.circle),
+            circle_shape_roles=_add_dummy(n_to_add, env_state.circle_shape_roles),
+            circle_highlighted=_add_dummy(n_to_add, env_state.circle_highlighted),
+            circle_densities=_add_dummy(n_to_add, env_state.circle_densities),
+        )
+    if num_joints < static_env_params.num_joints:
+        does_need_to_change = True
+        n_to_add = static_env_params.num_joints - num_joints
+        env_state = env_state.replace(
+            joint=_add_dummy(n_to_add, env_state.joint),
+            motor_bindings=_add_dummy(n_to_add, env_state.motor_bindings),
+            motor_auto=_add_dummy(n_to_add, env_state.motor_auto),
+        )
+    if num_thrusters < static_env_params.num_thrusters:
+        does_need_to_change = True
+        n_to_add = static_env_params.num_thrusters - num_thrusters
+        env_state = env_state.replace(
+            thruster=_add_dummy(n_to_add, env_state.thruster),
+            thruster_bindings=_add_dummy(n_to_add, env_state.thruster_bindings),
+        )
+    # This fixes the indices
+    def _modify_index(old_indices):
+        return jnp.where(old_indices >= num_rects, old_indices + added_rects, old_indices)
+    if added_rects > 0:
+        env_state = env_state.replace(
+            joint=env_state.joint.replace(
+                a_index=_modify_index(env_state.joint.a_index),
+                b_index=_modify_index(env_state.joint.b_index),
+            ),
+            thruster=env_state.thruster.replace(
+                object_index=_modify_index(env_state.thruster.object_index),
+            ),
+        )
+    # Double check the collision manifolds are fine
+    if does_need_to_change or 1:
+        # print("Loading but changing the shapes to match the current static params.")
+        acc_rr_manifolds, acc_cr_manifolds, acc_cc_manifolds = get_empty_collision_manifolds(static_env_params)
+        env_state = env_state.replace(
+            collision_matrix=(
+                env_state.collision_matrix
+                if ignore_collision_matrix
+                else calculate_collision_matrix(static_env_params, env_state.joint)
+            ),
+            acc_rr_manifolds=acc_rr_manifolds,
+            acc_cr_manifolds=acc_cr_manifolds,
+            acc_cc_manifolds=acc_cc_manifolds,
+        )
+    return env_state
+def expand_pcg_state(pcg_state: PCGState, static_env_params):
+    new_pcg_state = pcg_state.replace(
+        env_state=expand_env_state(pcg_state.env_state, static_env_params),
+        env_state_max=expand_env_state(pcg_state.env_state_max, static_env_params),
+        env_state_pcg_mask=expand_env_state(
+            pcg_state.env_state_pcg_mask, static_env_params, ignore_collision_matrix=True
+        ),
+    )
+    new_pcg_state = new_pcg_state.replace(
+        env_state_pcg_mask=new_pcg_state.env_state_pcg_mask.replace(
+            collision_matrix=jnp.zeros_like(new_pcg_state.env_state.collision_matrix, dtype=bool),
+        )
+    )
+    num_shapes = new_pcg_state.env_state.polygon.active.shape[0] + new_pcg_state.env_state.circle.active.shape[0]
+    return new_pcg_state.replace(
+        tied_together=jnp.zeros((num_shapes, num_shapes), dtype=bool)
+        .at[
+            : pcg_state.tied_together.shape[0],
+            : pcg_state.tied_together.shape[1],
+        ]
+        .set(pcg_state.tied_together)
+    )
+def load_world_state_pickle(filename, params=None, static_env_params=None):
+    static_params = static_env_params or StaticEnvParams()
+    with open(filename, "rb") as f:
+        state: SimState = pickle.load(f)
+        state = jax.tree.map(lambda x: jnp.nan_to_num(x), state)
+        # Check if the mass and inertia are reasonable.
+        check_if_mass_and_inertia_are_correct(state, params or EnvParams(), static_params)
+    # Now check if the shapes are correct
+    return expand_env_state(state, static_params)
+def stack_list_of_pytrees(list_of_pytrees):
+    v = jax.tree_map(lambda x: jnp.expand_dims(x, 0), list_of_pytrees[0])
+    for l in list_of_pytrees[1:]:
+        v = jax.tree_map(lambda x, y: jnp.concatenate([x, jnp.expand_dims(y, 0)], axis=0), v, l)
+    return v
+def get_pcg_state_from_json(json_filename) -> PCGState:
+    env_state, _, _ = load_from_json_file(json_filename)
+    return env_state_to_pcg_state(env_state)
+def my_load_file(filename):
+    data = bz2.BZ2File(filename, "rb")
+    data = pickle.load(data)
+    return data
+def my_save_file(obj, filename):
+    with bz2.BZ2File(filename, "w") as f:
+        pickle.dump(obj, f)
+def save_params(params: Dict, filename: Union[str, os.PathLike]) -> None:
+    my_save_file(params, filename)
+def load_params(filename: Union[str, os.PathLike], legacy=False) -> Dict:
+    if legacy:
+        filename = filename.replace("full_model.pbz2", "model.safetensors")
+        filename = filename.replace(".pbz2", ".safetensors")
+        return unflatten_dict(load_file(filename), sep=",")
+    return my_load_file(filename)
+def load_params_from_wandb_artifact_path(checkpoint_name, legacy=False):
+    api = wandb.Api()
+    name = api.artifact(checkpoint_name).download()
+    network_params = load_params(name + "/model.pbz2", legacy=legacy)
+    return network_params
+def save_params_to_wandb(params, timesteps, config):
+    if config["checkpoint_human_numbers"]:
+        timesteps = str(round(timesteps / 1e9)) + "B"
+    run_name = config["run_name"] + "-" + str(config["random_hash"]) + "-" + str(timesteps)
+    save_dir = os.path.join(config["save_path"], run_name)
+    os.makedirs(save_dir, exist_ok=True)
+    save_params(params, f"{save_dir}/model.pbz2")
+    # upload this to wandb as an artifact
+    artifact = wandb.Artifact(f"{run_name}-checkpoint", type="checkpoint")
+    artifact.add_file(f"{save_dir}/model.pbz2")
+    artifact.save()
+    print(f"Parameters of model saved in {save_dir}/model.pbz2")
+def load_params_wandb_artifact_path_full_model(checkpoint_name):
+    api = wandb.Api()
+    name = api.artifact(checkpoint_name).download()
+    all_dict = load_params(name + "/full_model.pbz2")
+    return all_dict["params"]
+def load_train_state_from_wandb_artifact_path(train_state, checkpoint_name, load_only_params=False, legacy=False):
+    api = wandb.Api()
+    name = api.artifact(checkpoint_name).download()
+    all_dict = load_params(name + "/full_model.pbz2", legacy=legacy)
+    if legacy:
+        return train_state.replace(params=all_dict)
+    train_state = train_state.replace(params=all_dict["params"])
+    if not load_only_params:
+        train_state = train_state.replace(
+            # step=all_dict["step"],
+            opt_state=all_dict["opt_state"]
+        )
+    return train_state
+def save_params_to_wandb(params, timesteps, config):
+    return save_dict_to_wandb(params, timesteps, config, "params")
+def save_dict_to_wandb(dict, timesteps, config, name):
+    timesteps = str(round(timesteps / 1e9)) + "B"
+    run_name = config["run_name"] + "-" + str(config["random_hash"]) + "-" + str(timesteps)
+    save_dir = os.path.join(config["save_path"], run_name)
+    os.makedirs(save_dir, exist_ok=True)
+    save_params(dict, f"{save_dir}/{name}.pbz2")
+    # upload this to wandb as an artifact
+    artifact = wandb.Artifact(f"{run_name}-checkpoint", type="checkpoint")
+    artifact.add_file(f"{save_dir}/{name}.pbz2")
+    artifact.save()
+    print(f"Parameters of model saved in {save_dir}/{name}.pbz2")
+def save_model_to_wandb(train_state, timesteps, config, is_final=False):
+    dict_to_use = {"step": train_state.step, "params": train_state.params, "opt_state": train_state.opt_state}
+    step = int(train_state.step)
+    if config["economical_saving"]:
+        if step in [2048, 10240, 40960, 81920] or is_final:
+            save_dict_to_wandb(dict_to_use, timesteps, config, "full_model")
+        else:
+            print("Not saving model because step is", step)
+    else:
+        save_dict_to_wandb(dict_to_use, timesteps, config, "full_model")
+def import_env_state_from_json(json_file: dict[str, Any]) -> tuple[EnvState, StaticEnvParams, EnvParams]:
+    from kinetix.environment.env import create_empty_env
+    def normalise(k, v):
+        if k == "screen_dim":
+            return v
+        if type(v) == dict and "0" in v:
+            return jnp.array([normalise(k, v[str(i)]) for i in range(len(v))])
+        return v
+    env_state = json_file["env_state"]
+    env_params = json_file["env_params"]
+    static_env_params = json_file["static_env_params"]
+    env_params_target = EnvParams()
+    static_env_params_target = StaticEnvParams()
+    new_env_params = flax.serialization.from_state_dict(
+        env_params_target, {k: normalise(k, v) for k, v in env_params.items()}
+    )
+    norm_static = {k: normalise(k, v) for k, v in static_env_params.items()}
+    # norm_static["screen_dim"] = tuple(static_env_params_target.screen_dim)
+    norm_static["downscale"] = static_env_params_target.downscale
+    # print(
+    #     static_env_params_target,
+    # )
+    new_static_env_params = flax.serialization.from_state_dict(static_env_params_target, norm_static)
+    new_static_env_params = new_static_env_params.replace(screen_dim=static_env_params_target.screen_dim)
+    env_state_target = create_empty_env(new_static_env_params)
+    def astype(x, all):
+        return jnp.astype(x, all.dtype)
+    def _load_rigidbody(env_state_target, i, is_poly):
+        to_load_from: dict[str, Any] = env_state["circle" if not is_poly else "polygon"][i]
+        role = to_load_from.pop("role")
+        density = to_load_from.pop("density")
+        if "highlighted" in to_load_from:
+            _ = to_load_from.pop("highlighted")
+        new_obj = flax.serialization.from_state_dict(
+            jax.tree.map(lambda x: x[i], env_state_target.circle if not is_poly else env_state_target.polygon),
+            {k: normalise(k, v) for k, v in to_load_from.items()},
+        )
+        if is_poly:
+            env_state_target = env_state_target.replace(
+                polygon_shape_roles=env_state_target.polygon_shape_roles.at[i].set(role),
+                polygon_densities=env_state_target.polygon_densities.at[i].set(density),
+                polygon=jax.tree.map(
+                    lambda all, new: all.at[i].set(astype(new, all)), env_state_target.polygon, new_obj
+                ),
+            )
+        else:
+            env_state_target = env_state_target.replace(
+                circle_shape_roles=env_state_target.circle_shape_roles.at[i].set(role),
+                circle_densities=env_state_target.circle_densities.at[i].set(density),
+                circle=jax.tree.map(lambda all, new: all.at[i].set(astype(new, all)), env_state_target.circle, new_obj),
+            )
+        return env_state_target
+    # Now load the env state:
+    for i in range(new_static_env_params.num_circles):
+        env_state_target = _load_rigidbody(env_state_target, i, False)
+    for i in range(new_static_env_params.num_polygons):
+        env_state_target = _load_rigidbody(env_state_target, i, True)
+    for i in range(new_static_env_params.num_joints):
+        to_load_from = env_state["joint"][i]
+        motor_binding = to_load_from.pop("motor_binding")
+        new_obj = flax.serialization.from_state_dict(
+            jax.tree.map(lambda x: x[i], env_state_target.joint), {k: normalise(k, v) for k, v in to_load_from.items()}
+        )
+        env_state_target = env_state_target.replace(
+            joint=jax.tree.map(lambda all, new: all.at[i].set(astype(new, all)), env_state_target.joint, new_obj),
+            motor_bindings=env_state_target.motor_bindings.at[i].set(motor_binding),
+        )
+    for i in range(new_static_env_params.num_thrusters):
+        to_load_from = env_state["thruster"][i]
+        thruster_binding = to_load_from.pop("thruster_binding")
+        new_obj = flax.serialization.from_state_dict(
+            jax.tree.map(lambda x: x[i], env_state_target.thruster),
+            {k: normalise(k, v) for k, v in to_load_from.items()},
+        )
+        env_state_target = env_state_target.replace(
+            thruster=jax.tree.map(lambda all, new: all.at[i].set(astype(new, all)), env_state_target.thruster, new_obj),
+            thruster_bindings=env_state_target.thruster_bindings.at[i].set(thruster_binding),
+        )
+    env_state_target = env_state_target.replace(
+        collision_matrix=flax.serialization.from_state_dict(
+            env_state_target.collision_matrix, normalise("collision_matrix", env_state["collision_matrix"])
+        )
+    )
+    for i in range(env_state_target.acc_rr_manifolds.active.shape[0]):
+        a = flax.serialization.from_state_dict(
+            jax.tree.map(lambda x: x[i], env_state_target.acc_rr_manifolds),
+            {k: normalise(k, v) for k, v in env_state["acc_rr_manifolds"][i].items()},
+        )
+        b = flax.serialization.from_state_dict(
+            jax.tree.map(lambda x: x[i], env_state_target.acc_rr_manifolds),
+            {k: normalise(k, v) for k, v in env_state["acc_rr_manifolds"][i + 1].items()},
+        )
+        env_state_target = env_state_target.replace(
+            acc_rr_manifolds=jax.tree.map(
+                lambda all, new: all.at[i].set(astype(new, all)), env_state_target.acc_rr_manifolds, a
+            ),
+        )
+        env_state_target.replace(
+            acc_rr_manifolds=jax.tree.map(
+                lambda all, new: all.at[i + 1].set(astype(new, all)), env_state_target.acc_rr_manifolds, b
+            )
+        )
+    for i in range(env_state_target.acc_cr_manifolds.active.shape[0]):
+        a = flax.serialization.from_state_dict(
+            jax.tree.map(lambda x: x[i], env_state_target.acc_cr_manifolds),
+            {k: normalise(k, v) for k, v in env_state["acc_cr_manifolds"][i].items()},
+        )
+        env_state_target = env_state_target.replace(
+            acc_cr_manifolds=jax.tree.map(
+                lambda all, new: all.at[i].set(astype(new, all)), env_state_target.acc_cr_manifolds, a
+            ),
+        )
+    for i in range(env_state_target.acc_cc_manifolds.active.shape[0]):
+        a = flax.serialization.from_state_dict(
+            jax.tree.map(lambda x: x[i], env_state_target.acc_cc_manifolds),
+            {k: normalise(k, v) for k, v in env_state["acc_cc_manifolds"][i].items()},
+        )
+        env_state_target = env_state_target.replace(
+            acc_cc_manifolds=jax.tree.map(
+                lambda all, new: all.at[i].set(astype(new, all)), env_state_target.acc_cc_manifolds, a
+            ),
+        )
+    env_state_target = env_state_target.replace(
+        collision_matrix=calculate_collision_matrix(new_static_env_params, env_state_target.joint)
+    )
+    return (
+        env_state_target,
+        new_static_env_params,
+        new_env_params.replace(max_timesteps=env_params_target.max_timesteps),
+    )
+def export_env_state_to_json(
+    filename: str, env_state: EnvState, static_env_params: StaticEnvParams, env_params: EnvParams
+):
+    json_to_save = {
+        "polygon": [],
+        "circle": [],
+        "joint": [],
+        "thruster": [],
+        "collision_matrix": flax.serialization.to_state_dict(env_state.collision_matrix.tolist()),
+        "acc_rr_manifolds": [],
+        "acc_cr_manifolds": [],
+        "acc_cc_manifolds": [],
+        "gravity": flax.serialization.to_state_dict(env_state.gravity.tolist()),
+    }
+    def _rigidbody_to_json(index: int, is_poly):
+        main_arr = env_state.polygon if is_poly else env_state.circle
+        c = jax.tree.map(lambda x: x[index].tolist(), main_arr)
+        roles = env_state.polygon_shape_roles if is_poly else env_state.circle_shape_roles
+        densities = env_state.polygon_densities if is_poly else env_state.circle_densities
+        highlighted = env_state.polygon_highlighted if is_poly else env_state.circle_highlighted
+        d = flax.serialization.to_state_dict(c)
+        d["role"] = roles[index].tolist()
+        d["density"] = densities[index].tolist()
+        d["highlighted"] = highlighted[index].tolist()
+        return d
+    def _joint_to_json(i):
+        joint = jax.tree.map(lambda x: x[i].tolist(), env_state.joint)
+        d = flax.serialization.to_state_dict(joint)
+        d["motor_binding"] = env_state.motor_bindings[i].tolist()
+        return d
+    def _thruster_to_json(i):
+        thruster = jax.tree.map(lambda x: x[i].tolist(), env_state.thruster)
+        d = flax.serialization.to_state_dict(thruster)
+        d["thruster_binding"] = env_state.thruster_bindings[i].tolist()
+        return d
+    for i in range(static_env_params.num_circles):
+        json_to_save["circle"].append(_rigidbody_to_json(i, False))
+    for i in range(static_env_params.num_polygons):
+        json_to_save["polygon"].append(_rigidbody_to_json(i, True))
+    for i in range(static_env_params.num_joints):
+        json_to_save["joint"].append(_joint_to_json(i))
+    for i in range(static_env_params.num_thrusters):
+        json_to_save["thruster"].append(_thruster_to_json(i))
+    ncc, ncr, nrr, circle_circle_pairs, circle_rect_pairs, rect_rect_pairs = get_pairwise_interaction_indices(
+        static_env_params
+    )
+    for i in range(nrr):
+        a = jax.tree.map(lambda x: x[i, 0].tolist(), env_state.acc_rr_manifolds)
+        b = jax.tree.map(lambda x: x[i, 1].tolist(), env_state.acc_rr_manifolds)
+        json_to_save["acc_rr_manifolds"].append(flax.serialization.to_state_dict(a))
+        json_to_save["acc_rr_manifolds"].append(flax.serialization.to_state_dict(b))
+    for i in range(ncr):
+        a = jax.tree.map(lambda x: x[i].tolist(), env_state.acc_cr_manifolds)
+        json_to_save["acc_cr_manifolds"].append(flax.serialization.to_state_dict(a))
+    for i in range(ncc):
+        a = jax.tree.map(lambda x: x[i].tolist(), env_state.acc_cc_manifolds)
+        json_to_save["acc_cc_manifolds"].append(flax.serialization.to_state_dict(a))
+    to_save = {
+        "env_state": json_to_save,
+        "env_params": flax.serialization.to_state_dict(
+            jax.tree.map(lambda x: x.tolist() if type(x) == jnp.ndarray else x, env_params)
+        ),
+        "static_env_params": flax.serialization.to_state_dict(
+            jax.tree.map(lambda x: x.tolist() if type(x) == jnp.ndarray else x, static_env_params)
+        ),
+    }
+    with open(filename, "w+") as f:
+        json.dump(to_save, f)
+    return to_save
+def load_from_json_file(filename):
+    with open(filename, "r") as f:
+        return import_env_state_from_json(json.load(f))
+if __name__ == "__main__":
+    pass

kinetix/util/timing.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from timeit import default_timer as tmr
+counter = 0
+def time_function(f, name):
+    global counter
+    t = "\t" * counter
+    # print(f"{t}Starting... {name}")
+    ss = tmr()
+    counter += 1
+    a = f()
+    counter -= 1
+    print(f"{t}{name} took {tmr() - ss} seconds")
+    return a