Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

README.md +38 -0
config.json +48 -0
config.yaml +225 -0
model.safetensors +3 -0
replay.mp4 +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,38 @@

+---
+library_name: lerobot
+tags:
+- model_hub_mixin
+- pytorch_model_hub_mixin
+- robotics
+- dot
+license: apache-2.0
+datasets:
+- lerobot/pusht_keypoints
+pipeline_tag: robotics
+---
+# Model Card for "Decoder Only Transformer (DOT) Policy" for PushT keypoints dataset
+Read more about the model and implementation details in the [DOT Policy repository](https://github.com/IliaLarchenko/dot_policy).
+This model is trained using the [LeRobot library](https://huggingface.co/lerobot) and achieves state-of-the-art results on behavior cloning on the PushT keypoints dataset. It achieves 84.5% success rate (and 0.964 average max reward) vs. ~78% for the previous state-of-the-art model or 69% that I managed to reproduce using VQ-BET implementation in LeRobot.
+This result is achieved without the checkpoint selection. If you are interested in an even better model with a success rate of ~94% (but harder to reproduce as it requires some parameters tuning and checkpoint selection), please refer to [this model](https://huggingface.co/IliaLarchenko/dot_pusht_keypoints_best)
+You can use this model by installing LeRobot from [this branch](https://github.com/IliaLarchenko/lerobot/tree/dot)
+To train the model:
+```bash
+python lerobot/scripts/train.py policy=dot_pusht_keypoints env=pusht env.obs_type=environment_state_agent_pos
+```
+To evaluate the model:
+```bash
+python lerobot/scripts/eval.py -p IliaLarchenko/dot_pusht_keypoints eval.n_episodes=1000 eval.batch_size=100 seed=1000000
+```
+Model size:
+- Total parameters: 2.1m
+- Trainable parameters: 2.1m

config.json ADDED Viewed

	@@ -0,0 +1,48 @@

+{
+  "alpha": 0.75,
+  "crop_scale": 1.0,
+  "dim_feedforward": 512,
+  "dim_model": 128,
+  "dropout": 0.1,
+  "inference_horizon": 20,
+  "input_normalization_modes": {
+    "observation.environment_state": "min_max",
+    "observation.state": "min_max"
+  },
+  "input_shapes": {
+    "observation.environment_state": [
+      16
+    ],
+    "observation.state": [
+      2
+    ]
+  },
+  "lookback_aug": 5,
+  "lookback_obs_steps": 10,
+  "lora_rank": 20,
+  "merge_lora": true,
+  "n_decoder_layers": 8,
+  "n_heads": 8,
+  "n_obs_steps": 3,
+  "noise_decay": 0.999995,
+  "output_normalization_modes": {
+    "action": "min_max"
+  },
+  "output_shapes": {
+    "action": [
+      2
+    ]
+  },
+  "pre_norm": true,
+  "predict_every_n": 1,
+  "pretrained_backbone_weights": "ResNet18_Weights.IMAGENET1K_V1",
+  "rescale_shape": [
+    96,
+    96
+  ],
+  "return_every_n": 2,
+  "state_noise": 0.01,
+  "train_alpha": 0.9,
+  "train_horizon": 20,
+  "vision_backbone": "resnet18"
+}

config.yaml ADDED Viewed

	@@ -0,0 +1,225 @@

+resume: false
+device: cuda
+use_amp: true
+seed: 100000
+dataset_repo_id: lerobot/pusht_keypoints
+video_backend: pyav
+training:
+  offline_steps: 1000000
+  num_workers: 24
+  batch_size: 24
+  eval_freq: 10000
+  log_freq: 1000
+  save_checkpoint: true
+  save_freq: 50000
+  online_steps: 0
+  online_rollout_n_episodes: 1
+  online_rollout_batch_size: 1
+  online_steps_between_rollouts: 1
+  online_sampling_ratio: 0.5
+  online_env_seed: null
+  online_buffer_capacity: null
+  online_buffer_seed_size: 0
+  do_online_rollout_async: false
+  image_transforms:
+    enable: false
+    max_num_transforms: 3
+    random_order: false
+    brightness:
+      weight: 1
+      min_max:
+      - 0.8
+      - 1.2
+    contrast:
+      weight: 1
+      min_max:
+      - 0.8
+      - 1.2
+    saturation:
+      weight: 1
+      min_max:
+      - 0.5
+      - 1.5
+    hue:
+      weight: 1
+      min_max:
+      - -0.05
+      - 0.05
+    sharpness:
+      weight: 1
+      min_max:
+      - 0.8
+      - 1.2
+  save_model: true
+  grad_clip_norm: 50
+  lr: 0.0001
+  min_lr: 0.0001
+  lr_cycle_steps: 300000
+  weight_decay: 1.0e-05
+  delta_timestamps:
+    observation.environment_state:
+    - -1.5
+    - -1.4
+    - -1.3
+    - -1.2
+    - -1.1
+    - -1.0
+    - -0.9
+    - -0.8
+    - -0.7
+    - -0.6
+    - -0.5
+    - -0.1
+    - 0.0
+    observation.state:
+    - -1.5
+    - -1.4
+    - -1.3
+    - -1.2
+    - -1.1
+    - -1.0
+    - -0.9
+    - -0.8
+    - -0.7
+    - -0.6
+    - -0.5
+    - -0.1
+    - 0.0
+    action:
+    - -1.5
+    - -1.4
+    - -1.3
+    - -1.2
+    - -1.1
+    - -1.0
+    - -0.9
+    - -0.8
+    - -0.7
+    - -0.6
+    - -0.5
+    - -0.1
+    - 0.0
+    - 0.1
+    - 0.2
+    - 0.3
+    - 0.4
+    - 0.5
+    - 0.6
+    - 0.7
+    - 0.8
+    - 0.9
+    - 1.0
+    - 1.1
+    - 1.2
+    - 1.3
+    - 1.4
+    - 1.5
+    - 1.6
+    - 1.7
+    - 1.8
+    - 1.9
+eval:
+  n_episodes: 100
+  batch_size: 100
+  use_async_envs: false
+wandb:
+  enable: true
+  disable_artifact: false
+  project: pusht
+  notes: ''
+fps: 10
+env:
+  name: pusht
+  task: PushT-v0
+  image_size: 96
+  state_dim: 2
+  action_dim: 2
+  fps: ${fps}
+  episode_length: 300
+  gym:
+    obs_type: environment_state_agent_pos
+    render_mode: rgb_array
+    visualization_width: 384
+    visualization_height: 384
+override_dataset_stats:
+  observation.environment_state:
+    min:
+    - 0.0
+    - 0.0
+    - 0.0
+    - 0.0
+    - 0.0
+    - 0.0
+    - 0.0
+    - 0.0
+    - 0.0
+    - 0.0
+    - 0.0
+    - 0.0
+    - 0.0
+    - 0.0
+    - 0.0
+    - 0.0
+    max:
+    - 512.0
+    - 512.0
+    - 512.0
+    - 512.0
+    - 512.0
+    - 512.0
+    - 512.0
+    - 512.0
+    - 512.0
+    - 512.0
+    - 512.0
+    - 512.0
+    - 512.0
+    - 512.0
+    - 512.0
+    - 512.0
+  observation.state:
+    min:
+    - 0.0
+    - 0.0
+    max:
+    - 512.0
+    - 512.0
+  action:
+    min:
+    - 0.0
+    - 0.0
+    max:
+    - 512.0
+    - 512.0
+policy:
+  name: dot
+  n_obs_steps: 3
+  train_horizon: 20
+  inference_horizon: 20
+  lookback_obs_steps: 10
+  lookback_aug: 5
+  input_shapes:
+    observation.environment_state:
+    - 16
+    observation.state:
+    - ${env.state_dim}
+  output_shapes:
+    action:
+    - ${env.action_dim}
+  input_normalization_modes:
+    observation.environment_state: min_max
+    observation.state: min_max
+  output_normalization_modes:
+    action: min_max
+  state_noise: 0.01
+  noise_decay: 0.999995
+  pre_norm: true
+  dim_model: 128
+  n_heads: 8
+  dim_feedforward: 512
+  n_decoder_layers: 8
+  dropout: 0.1
+  alpha: 0.75
+  train_alpha: 0.9
+  predict_every_n: 1
+  return_every_n: 2

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b45aaac6d363fb26f405462dd901b45d1b436b686c163c9b4d2b71085bdc1aa5
+size 8523444

replay.mp4 ADDED Viewed

Binary file (58.5 kB). View file