oier-mees commited on
Commit
a7c8d30
·
verified ·
1 Parent(s): caaf136

Upload 8 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ FuSe_Octo_model/50000/default/checkpoint filter=lfs diff=lfs merge=lfs -text
FuSe_Octo_model/50000/commit_success.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Checkpoint commit was successful to gs://oier-europe-bucket/octo/josh_pod_final_combined_model_tvl_single_rephrase_full_combinations_b1024_20240910_220927/50000
FuSe_Octo_model/50000/default/_METADATA ADDED
The diff for this file is too large to render. See raw diff
 
FuSe_Octo_model/50000/default/checkpoint ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:455d2cc3a7d2a062525cc389a7eb1019f0d497ce0e229040e658b94bca8483a0
3
+ size 811234699
FuSe_Octo_model/50000/default/commit_success.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Checkpoint commit was successful to gs://oier-europe-bucket/octo/josh_pod_final_combined_model_tvl_single_rephrase_full_combinations_b1024_20240910_220927/50000/default
FuSe_Octo_model/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"dataset_kwargs": {"balance_weights": true, "batch_size": 64, "dataset_kwargs_list": [{"name": "fractal20220817_data", "data_dir": "gs://rail-datasets-europe-west4/oxe/resize_256_256", "image_obs_keys": {"primary": "image", "wrist": null}, "action_normalization_mask": [true, true, true, true, true, true, false], "language_key": "language_instruction", "standardize_fn": {"module": "octo.data.oxe.oxe_standardization_transforms", "name": "rt1_dataset_transform", "args": [], "kwargs": {}}}, {"name": "kuka", "data_dir": "gs://rail-datasets-europe-west4/oxe/resize_256_256", "image_obs_keys": {"primary": "image", "wrist": null}, "action_normalization_mask": [true, true, true, true, true, true, false], "language_key": "language_instruction", "standardize_fn": {"module": "octo.data.oxe.oxe_standardization_transforms", "name": "kuka_dataset_transform", "args": [], "kwargs": {}}}, {"name": "bridge_dataset", "data_dir": "gs://rail-datasets-europe-west4/oxe/resize_256_256", "image_obs_keys": {"primary": "image_0", "wrist": null}, "action_normalization_mask": [true, true, true, true, true, true, false], "language_key": "language_instruction", "standardize_fn": {"module": "octo.data.oxe.oxe_standardization_transforms", "name": "bridge_dataset_transform", "args": [], "kwargs": {}}}, {"name": "taco_play", "data_dir": "gs://rail-datasets-europe-west4/oxe/resize_256_256", "image_obs_keys": {"primary": "rgb_static", "wrist": "rgb_gripper"}, "action_normalization_mask": [true, true, true, true, true, true, false], "language_key": "language_instruction", "standardize_fn": {"module": "octo.data.oxe.oxe_standardization_transforms", "name": "taco_dataset_transform", "args": [], "kwargs": {}}}, {"name": "taco_extra", "data_dir": "gs://rail-datasets-europe-west4/oxe/resize_256_256", "image_obs_keys": {"primary": "rgb_static", "wrist": "rgb_gripper"}, "action_normalization_mask": [true, true, true, true, true, true, false], "language_key": "language_instruction", "standardize_fn": {"module": "octo.data.oxe.oxe_standardization_transforms", "name": "taco_dataset_transform", "args": [], "kwargs": {}}}, {"name": "jaco_play", "data_dir": "gs://rail-datasets-europe-west4/oxe/resize_256_256", "image_obs_keys": {"primary": "image", "wrist": "image_wrist"}, "action_normalization_mask": [true, true, true, true, true, true, false], "language_key": "language_instruction", "standardize_fn": {"module": "octo.data.oxe.oxe_standardization_transforms", "name": "jaco_play_dataset_transform", "args": [], "kwargs": {}}}, {"name": "berkeley_cable_routing", "data_dir": "gs://rail-datasets-europe-west4/oxe/resize_256_256", "image_obs_keys": {"primary": "image", "wrist": "wrist45_image"}, "action_normalization_mask": [true, true, true, true, true, true, false], "language_key": "language_instruction", "standardize_fn": {"module": "octo.data.oxe.oxe_standardization_transforms", "name": "berkeley_cable_routing_dataset_transform", "args": [], "kwargs": {}}}, {"name": "roboturk", "data_dir": "gs://rail-datasets-europe-west4/oxe/resize_256_256", "image_obs_keys": {"primary": "front_rgb", "wrist": null}, "action_normalization_mask": [true, true, true, true, true, true, false], "language_key": "language_instruction", "standardize_fn": {"module": "octo.data.oxe.oxe_standardization_transforms", "name": "roboturk_dataset_transform", "args": [], "kwargs": {}}}, {"name": "nyu_door_opening_surprising_effectiveness", "data_dir": "gs://rail-datasets-europe-west4/oxe/resize_256_256", "image_obs_keys": {"primary": null, "wrist": "image"}, "action_normalization_mask": [true, true, true, true, true, true, false], "language_key": "language_instruction", "standardize_fn": {"module": "octo.data.oxe.oxe_standardization_transforms", "name": "nyu_door_opening_dataset_transform", "args": [], "kwargs": {}}}, {"name": "viola", "data_dir": "gs://rail-datasets-europe-west4/oxe/resize_256_256", "image_obs_keys": {"primary": "agentview_rgb", "wrist": "eye_in_hand_rgb"}, "action_normalization_mask": [true, true, true, true, true, true, false], "language_key": "language_instruction", "standardize_fn": {"module": "octo.data.oxe.oxe_standardization_transforms", "name": "viola_dataset_transform", "args": [], "kwargs": {}}}, {"name": "berkeley_autolab_ur5", "data_dir": "gs://rail-datasets-europe-west4/oxe/resize_256_256", "image_obs_keys": {"primary": "image", "wrist": "hand_image"}, "action_normalization_mask": [true, true, true, true, true, true, false], "language_key": "language_instruction", "standardize_fn": {"module": "octo.data.oxe.oxe_standardization_transforms", "name": "berkeley_autolab_ur5_dataset_transform", "args": [], "kwargs": {}}}, {"name": "toto", "data_dir": "gs://rail-datasets-europe-west4/oxe/resize_256_256", "image_obs_keys": {"primary": "image", "wrist": null}, "action_normalization_mask": [true, true, true, true, true, true, false], "language_key": "language_instruction", "standardize_fn": {"module": "octo.data.oxe.oxe_standardization_transforms", "name": "toto_dataset_transform", "args": [], "kwargs": {}}}, {"name": "language_table", "data_dir": "gs://rail-datasets-europe-west4/oxe/resize_256_256", "image_obs_keys": {"primary": "rgb", "wrist": null}, "action_normalization_mask": [true, true, true, true, true, true, false], "language_key": "language_instruction", "standardize_fn": {"module": "octo.data.oxe.oxe_standardization_transforms", "name": "language_table_dataset_transform", "args": [], "kwargs": {}}}, {"name": "stanford_hydra_dataset_converted_externally_to_rlds", "data_dir": "gs://rail-datasets-europe-west4/oxe/resize_256_256", "image_obs_keys": {"primary": "image", "wrist": "wrist_image"}, "action_normalization_mask": [true, true, true, true, true, true, false], "language_key": "language_instruction", "standardize_fn": {"module": "octo.data.oxe.oxe_standardization_transforms", "name": "stanford_hydra_dataset_transform", "args": [], "kwargs": {}}}, {"name": "austin_buds_dataset_converted_externally_to_rlds", "data_dir": "gs://rail-datasets-europe-west4/oxe/resize_256_256", "image_obs_keys": {"primary": "image", "wrist": "wrist_image"}, "action_normalization_mask": [true, true, true, true, true, true, false], "language_key": "language_instruction", "standardize_fn": {"module": "octo.data.oxe.oxe_standardization_transforms", "name": "austin_buds_dataset_transform", "args": [], "kwargs": {}}}, {"name": "nyu_franka_play_dataset_converted_externally_to_rlds", "data_dir": "gs://rail-datasets-europe-west4/oxe/resize_256_256", "image_obs_keys": {"primary": "image", "wrist": null}, "action_normalization_mask": [true, true, true, true, true, true, false], "language_key": "language_instruction", "standardize_fn": {"module": "octo.data.oxe.oxe_standardization_transforms", "name": "nyu_franka_play_dataset_transform", "args": [], "kwargs": {}}}, {"name": "furniture_bench_dataset_converted_externally_to_rlds", "data_dir": "gs://rail-datasets-europe-west4/oxe/resize_256_256", "image_obs_keys": {"primary": "image", "wrist": "wrist_image"}, "action_normalization_mask": [true, true, true, true, true, true, false], "language_key": "language_instruction", "standardize_fn": {"module": "octo.data.oxe.oxe_standardization_transforms", "name": "furniture_bench_dataset_transform", "args": [], "kwargs": {}}}, {"name": "ucsd_kitchen_dataset_converted_externally_to_rlds", "data_dir": "gs://rail-datasets-europe-west4/oxe/resize_256_256", "image_obs_keys": {"primary": "image", "wrist": null}, "action_normalization_mask": [true, true, true, true, true, true, false], "language_key": "language_instruction", "standardize_fn": {"module": "octo.data.oxe.oxe_standardization_transforms", "name": "ucsd_kitchen_dataset_transform", "args": [], "kwargs": {}}}, {"name": "austin_sailor_dataset_converted_externally_to_rlds", "data_dir": "gs://rail-datasets-europe-west4/oxe/resize_256_256", "image_obs_keys": {"primary": "image", "wrist": "wrist_image"}, "action_normalization_mask": [true, true, true, true, true, true, false], "language_key": "language_instruction", "standardize_fn": {"module": "octo.data.oxe.oxe_standardization_transforms", "name": "austin_sailor_dataset_transform", "args": [], "kwargs": {}}}, {"name": "austin_sirius_dataset_converted_externally_to_rlds", "data_dir": "gs://rail-datasets-europe-west4/oxe/resize_256_256", "image_obs_keys": {"primary": "image", "wrist": "wrist_image"}, "action_normalization_mask": [true, true, true, true, true, true, false], "language_key": "language_instruction", "standardize_fn": {"module": "octo.data.oxe.oxe_standardization_transforms", "name": "austin_sirius_dataset_transform", "args": [], "kwargs": {}}}, {"name": "bc_z", "data_dir": "gs://rail-datasets-europe-west4/oxe/resize_256_256", "image_obs_keys": {"primary": "image", "wrist": null}, "action_normalization_mask": [true, true, true, true, true, true, false], "language_key": "language_instruction", "standardize_fn": {"module": "octo.data.oxe.oxe_standardization_transforms", "name": "bc_z_dataset_transform", "args": [], "kwargs": {}}}, {"name": "dlr_edan_shared_control_converted_externally_to_rlds", "data_dir": "gs://rail-datasets-europe-west4/oxe/resize_256_256", "image_obs_keys": {"primary": "image", "wrist": null}, "action_normalization_mask": [true, true, true, true, true, true, false], "language_key": "language_instruction", "standardize_fn": {"module": "octo.data.oxe.oxe_standardization_transforms", "name": "dlr_edan_shared_control_dataset_transform", "args": [], "kwargs": {}}}, {"name": "iamlab_cmu_pickup_insert_converted_externally_to_rlds", "data_dir": "gs://rail-datasets-europe-west4/oxe/resize_256_256", "image_obs_keys": {"primary": "image", "wrist": "wrist_image"}, "action_normalization_mask": [true, true, true, true, true, true, false], "language_key": "language_instruction", "standardize_fn": {"module": "octo.data.oxe.oxe_standardization_transforms", "name": "iamlab_pick_insert_dataset_transform", "args": [], "kwargs": {}}}, {"name": "utaustin_mutex", "data_dir": "gs://rail-datasets-europe-west4/oxe/resize_256_256", "image_obs_keys": {"primary": "image", "wrist": "wrist_image"}, "action_normalization_mask": [true, true, true, true, true, true, false], "language_key": "language_instruction", "standardize_fn": {"module": "octo.data.oxe.oxe_standardization_transforms", "name": "utaustin_mutex_dataset_transform", "args": [], "kwargs": {}}}, {"name": "berkeley_fanuc_manipulation", "data_dir": "gs://rail-datasets-europe-west4/oxe/resize_256_256", "image_obs_keys": {"primary": "image", "wrist": "wrist_image"}, "action_normalization_mask": [true, true, true, true, true, true, false], "language_key": "language_instruction", "standardize_fn": {"module": "octo.data.oxe.oxe_standardization_transforms", "name": "berkeley_fanuc_dataset_transform", "args": [], "kwargs": {}}}, {"name": "cmu_stretch", "data_dir": "gs://rail-datasets-europe-west4/oxe/resize_256_256", "image_obs_keys": {"primary": "image", "wrist": null}, "action_normalization_mask": [true, true, true, true, true, true, false], "language_key": "language_instruction", "standardize_fn": {"module": "octo.data.oxe.oxe_standardization_transforms", "name": "cmu_stretch_dataset_transform", "args": [], "kwargs": {}}}], "frame_transform_kwargs": {"image_augment_kwargs": {"primary": {"augment_order": ["random_resized_crop", "random_brightness", "random_contrast", "random_saturation", "random_hue"], "random_brightness": [0.1], "random_contrast": [0.9, 1.1], "random_hue": [0.05], "random_resized_crop": {"ratio": [0.9, 1.1], "scale": [0.8, 1.0]}, "random_saturation": [0.9, 1.1]}, "wrist": {"augment_order": ["random_brightness", "random_contrast", "random_saturation", "random_hue"], "random_brightness": [0.1], "random_contrast": [0.9, 1.1], "random_hue": [0.05], "random_saturation": [0.9, 1.1]}}, "image_dropout_prob": 0.0, "num_parallel_calls": 200, "resize_size": {"primary": [256, 256], "wrist": [128, 128]}}, "sample_weights": [0.54087122203, 0.8341046294, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 1.0, 2.0, 2.0, 1.0, 0.1, 2.0, 1.0, 3.0, 0.1, 2.0, 1.0, 1.0, 0.2, 1.0, 1.0, 1.0, 2.0, 1.0], "shuffle_buffer_size": 250000, "traj_read_threads": 48, "traj_transform_kwargs": {"action_horizon": 4, "goal_relabeling_strategy": "uniform", "max_action_dim": 7, "subsample_length": 100, "task_augment_kwargs": {"keep_image_prob": 0.5, "pickle_file_path": "gs://rail-datasets-europe-west4/oxe/resize_256_256/paraphrases_oxe.pkl", "rephrase_prob": 0.5}, "task_augment_strategy": "delete_and_rephrase", "window_size": 2}, "traj_transform_threads": 48}, "eval_datasets": ["bridge_dataset"], "eval_interval": 5000, "log_interval": 100, "model": {"heads": {"action": {"args": [], "kwargs": {"action_dim": 7, "action_horizon": 4, "readout_key": "readout_action", "use_map": true}, "module": "octo.model.components.action_heads", "name": "MSEActionHead"}, "clip": {"args": [], "kwargs": {"readout_key": "readout_language", "use_map": true}, "module": "octo.model.components.language_reconstruction_heads", "name": "CLIPContrastiveHead"}, "gen": {"args": [], "kwargs": {"n_lang_tokens": 24}, "module": "octo.model.components.language_reconstruction_heads", "name": "SingleHeadContinuousGenerationHead"}}, "max_horizon": 100, "observation_tokenizers": {"mel_spectro": {"args": [], "kwargs": {"add_channel_dim": true, "encoder": {"args": [], "kwargs": {"use_film": false}, "module": "octo.model.components.vit_encoders", "name": "ResNet26FILM"}, "obs_stack_keys": ["mel_spectro"], "task_stack_keys": []}, "module": "octo.model.components.tokenizers", "name": "ImageTokenizer"}, "primary": {"args": [], "kwargs": {"encoder": {"args": [], "kwargs": {}, "module": "octo.model.components.vit_encoders", "name": "SmallStem16"}, "obs_stack_keys": ["image_primary"], "task_stack_keys": ["image_primary"]}, "module": "octo.model.components.tokenizers", "name": "ImageTokenizer"}, "tvl": {"args": [], "kwargs": {"encoder": {"args": [], "kwargs": {"img_size": [224, 224]}, "module": "octo.model.components.tvl_vit", "name": "tvlViT"}, "obs_stack_keys": ["image_digit_left", "image_digit_right"], "task_stack_keys": []}, "module": "octo.model.components.tokenizers", "name": "ImageTokenizerConcatTokens"}, "wrist": {"args": [], "kwargs": {"encoder": {"args": [], "kwargs": {}, "module": "octo.model.components.vit_encoders", "name": "SmallStem16"}, "obs_stack_keys": ["image_wrist"], "task_stack_keys": ["image_wrist"]}, "module": "octo.model.components.tokenizers", "name": "ImageTokenizer"}}, "readouts": {"action": 1, "language": 24}, "repeat_task_tokens": true, "task_tokenizers": {"language": {"args": [], "kwargs": {"encoder": "t5-base", "finetune_encoder": false}, "module": "octo.model.components.tokenizers", "name": "LanguageTokenizer"}}, "token_embedding_size": 384, "transformer_kwargs": {"add_position_embedding": false, "attention_dropout_rate": 0.0, "dropout_rate": 0.0, "mlp_dim": 1536, "num_attention_heads": 6, "num_layers": 12}, "use_correct_attention": true}, "num_steps": 300000.0, "optimizer": {"clip_gradient": 1.0, "frozen_keys": ["*hf_model*"], "learning_rate": {"init_value": 0.0, "name": "rsqrt", "peak_value": 0.0003, "timescale": 10000, "warmup_steps": 2000}, "weight_decay": 0.1}, "prefetch_num_batches": 0, "pretrained_loaders": [{"module": "octo.utils.train_utils", "name": "hf_weights_loader", "args": [], "kwargs": {"hf_model": "t5-base"}}], "resume_path": null, "save_dir": "gs://rail-tpus-homer-v5/log", "save_interval": 10000, "seed": 42, "start_step": null, "text_processor": {"args": [], "kwargs": {"encode_with_model": false, "tokenizer_kwargs": {"max_length": 16, "padding": "max_length", "return_tensors": "np", "truncation": true}, "tokenizer_name": "t5-base"}, "module": "octo.data.utils.text_processing", "name": "HFTokenizer"}, "val_kwargs": {"num_val_batches": 16, "val_shuffle_buffer_size": 1000}, "viz_interval": 1000000, "viz_kwargs": {"eval_batch_size": 128, "samples_per_state": 8, "trajs_for_metrics": 100, "trajs_for_viz": 8}, "wandb": {"entity": null, "group": null, "project": "octo"}, "wandb_resume_id": null, "window_size": 2, "max_horizon": 100}
FuSe_Octo_model/dataset_statistics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"action": {"mask": [true, true, true, true, true, true, false], "max": [0.3012024164199829, 0.08334571868181229, 0.27207261323928833, 2.9595746994018555, 1.7334380149841309, 0.5227872729301453, 1.0], "mean": [0.0036537835840135813, 0.0005640245508402586, -0.0020414507016539574, 0.00011840006482088938, 0.00012692870222963393, 0.002870981814339757, 0.6252029538154602], "min": [-0.30079442262649536, -0.07893040776252747, -0.22207710146903992, -3.0137717723846436, -1.6758967638015747, -0.6511449217796326, 0.0], "p01": [-0.02743857353925705, -0.034664224833250046, -0.047646041959524155, -0.1036328598856926, -0.11471883952617645, -0.24426692724227905, 0.0], "p99": [0.04795464500784874, 0.035712338984012604, 0.05198518931865692, 0.09977681189775467, 0.10635631531476974, 0.2508466839790344, 1.0], "std": [0.013129397295415401, 0.011655371636152267, 0.020389817655086517, 0.03639911115169525, 0.039006322622299194, 0.0826837494969368, 0.4840744137763977]}, "num_trajectories": 26866, "num_transitions": 760689}
FuSe_Octo_model/example_batch.msgpack ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da3b9f05d263302c917340ffcb1cfcd1dd3932b0d489c165ce71d56d04dac1da
3
+ size 2432891
FuSe_Octo_model/finetune_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"gen_modes": [["visual"], ["tactile"], ["audio"], ["visual", "tactile"], ["visual", "audio"], ["tactile", "audio"], ["visual", "tactile", "audio"]], "remove_tokenizers": [], "text_processor": {"module": "octo.data.utils.text_processing", "name": "HFTokenizer", "args": [], "kwargs": {"tokenizer_name": "t5-base", "encode_with_model": false, "tokenizer_kwargs": {"max_length": 24, "padding": "max_length", "truncation": true, "return_tensors": "np"}}}, "multi_head": false, "unified_lang": true, "pretrained_loaders": [{"module": "octo.utils.train_utils", "name": "tvl_loader", "args": [], "kwargs": {"restore_path": "gs://oier-europe-bucket/ported_weights/tvl/tvl_vitbgs_params_jax.npz", "verbose": true}}], "modalities": ["cam_primary", "cam_wrist", "vit_left", "vit_right"], "pretrained_path": "hf://rail-berkeley/octo-small-1.5", "batch_size": 32, "shuffle_buffer_size": 10000, "num_steps": 50000, "log_interval": 100, "eval_interval": 500000, "save_interval": 5000, "lang_interval": 10000, "save_dir": "gs://oier-europe-bucket", "seed": 42, "wandb": {"project": "octo", "group": null, "entity": null}, "dataset_kwargs": {"name": "digit_dataset:53.0.0", "data_dir": "gs://oier-europe-bucket", "image_obs_keys": {"primary": "image_0", "wrist": "image_1", "digit_left": "digit_0", "digit_left_background": "digit_0_background", "digit_right": "digit_1", "digit_right_background": "digit_1_background"}, "proprio_obs_key": null, "sensor_obs_keys": {"mel_spectro": "mel_spectro", "mic_mask": "has_mic"}, "language_key": "rephrase_batch_full", "annotation_manager_kwargs": {"force_uniform_overall": true, "reconstruction_loss_keys": ["visual", "tactile", "audio", "visual,tactile", "visual,audio", "tactile,audio", "visual,tactile,audio"], "num_gpt_gen": 20, "rephrase_prefixes": ["rephrased_2", "rephrased_3", "rephrased_4", "rephrased_5", "rephrased_6", "rephrased_7", "rephrased_8", "rephrased_0"], "all_lang_prefixes": ["all_lang_2", "all_lang_3", "all_lang_4", "all_lang_5", "all_lang_6", "all_lang_7", "all_lang_8", "all_lang_0"], "lang_info_str": "simple||visual|tactile|audio|visual,tactile|visual,audio|tactile,audio|visual,tactile,audio"}, "action_normalization_mask": [true, true, true, true, true, true, false], "standardize_fn": {"module": "octo.data.oxe.oxe_standardization_transforms", "name": "bridge_dataset_transform", "args": [], "kwargs": {}}, "num_gpt_gen_arg": 20}, "modality": "language_conditioned", "finetuning_mode": "full", "window_size": 2, "optimizer": {"learning_rate": {"name": "cosine", "init_value": 0.0, "peak_value": 0.0003, "warmup_steps": 2000, "decay_steps": 50000, "end_value": 0.0}, "weight_decay": 0.01, "clip_gradient": 1.0, "frozen_keys": ["*hf_model*"], "grad_accumulation_steps": null}, "val_kwargs": {"val_shuffle_buffer_size": 1000, "num_val_batches": 16}, "gen_kwargs": {"val_shuffle_buffer_size": 1000, "num_val_batches": 16}, "viz_kwargs": {"eval_batch_size": 64, "trajs_for_metrics": 100, "trajs_for_viz": 8, "samples_per_state": 8}, "gradcam_kwargs": {"eval_batch_size": 4, "shuffle_buffer_size": 1000, "train": false, "gradcam_kwargs_list": [["obs_primary", {"psuedo_loss_type": "loss"}], ["obs_wrist", {"psuedo_loss_type": "loss"}]]}, "frame_transform_threads": 16, "traj_transform_kwargs": {"window_size": 2, "action_horizon": 4, "goal_relabeling_strategy": null, "task_augment_strategy": "delete_task_conditioning", "task_augment_kwargs": {"keep_image_prob": 0.0}}, "frame_transform_kwargs": {"resize_size": {"primary": [256, 256], "wrist": [128, 128], "digit_left": [224, 224], "digit_right": [224, 224], "digit_left_background": [224, 224], "digit_right_background": [224, 224]}, "image_augment_kwargs": {"primary": {"random_resized_crop": {"scale": [0.8, 1.0], "ratio": [0.9, 1.1]}, "random_brightness": [0.1], "random_contrast": [0.9, 1.1], "random_saturation": [0.9, 1.1], "random_hue": [0.05], "augment_order": ["random_resized_crop", "random_brightness", "random_contrast", "random_saturation", "random_hue"]}, "wrist": {"random_brightness": [0.1], "random_contrast": [0.9, 1.1], "random_saturation": [0.9, 1.1], "random_hue": [0.05], "augment_order": ["random_brightness", "random_contrast", "random_saturation", "random_hue"]}}, "background_subtraction_map": {"image_digit_left": "image_digit_left_background", "image_digit_right": "image_digit_right_background"}}, "new_obs_tokenizers": {"mel_spectro": {"module": "octo.model.components.tokenizers", "name": "ImageTokenizer", "args": [], "kwargs": {"obs_stack_keys": ["mel_spectro"], "task_stack_keys": [], "encoder": {"module": "octo.model.components.vit_encoders", "name": "ResNet26FILM", "args": [], "kwargs": {"use_film": false}}, "add_channel_dim": true}}, "tvl": {"module": "octo.model.components.tokenizers", "name": "ImageTokenizerConcatTokens", "args": [], "kwargs": {"obs_stack_keys": ["image_digit_left", "image_digit_right"], "task_stack_keys": [], "encoder": {"module": "octo.model.components.tvl_vit", "name": "tvlViT", "args": [], "kwargs": {"img_size": [224, 224]}}}}}, "update_config": {"model": {"repeat_task_tokens": true, "heads": {"action": {"module": "octo.model.components.action_heads", "name": "MSEActionHead", "args": [], "kwargs": {"readout_key": "readout_action", "use_map": true, "action_horizon": 4, "action_dim": 7}}}, "observation_tokenizers": {"mel_spectro": {"module": "octo.model.components.tokenizers", "name": "ImageTokenizer", "args": [], "kwargs": {"obs_stack_keys": ["mel_spectro"], "task_stack_keys": [], "encoder": {"module": "octo.model.components.vit_encoders", "name": "ResNet26FILM", "args": [], "kwargs": {"use_film": false}}, "add_channel_dim": true}}, "tvl": {"module": "octo.model.components.tokenizers", "name": "ImageTokenizerConcatTokens", "args": [], "kwargs": {"obs_stack_keys": ["image_digit_left", "image_digit_right"], "task_stack_keys": [], "encoder": {"module": "octo.model.components.tvl_vit", "name": "tvlViT", "args": [], "kwargs": {"img_size": [224, 224]}}}}}, "readouts": {"language": 24}}}, "reconstruction_loss_weight": 1.0, "lang_head": {"module": "octo.model.components.language_reconstruction_heads", "name": "CLIPContrastiveHead", "args": [], "kwargs": {"readout_key": "readout_language", "use_map": true}}, "gen_head": {"module": "octo.model.components.language_reconstruction_heads", "name": "SingleHeadContinuousGenerationHead", "args": [], "kwargs": {"n_lang_tokens": 24}}, "pop_keys": [["model", "heads", "action", "kwargs", "n_diffusion_samples"], ["model", "heads", "action", "kwargs", "dropout_rate"]]}