Megalino111 commited on
Commit
585db0d
1 Parent(s): 9937ab5

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +59 -2
README.md CHANGED
@@ -30,8 +30,65 @@ TODO: Add your code
30
 
31
 
32
  ```python
33
- from stable_baselines3 import ...
34
- from huggingface_sb3 import load_from_hub
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  ...
37
  ```
 
30
 
31
 
32
  ```python
 
 
33
 
34
+ %%capture
35
+ !apt install python-opengl
36
+ !apt install ffmpeg
37
+ !apt install xvfb
38
+ !pip3 install pyvirtualdisplay
39
+
40
+
41
+ from pyvirtualdisplay import Display
42
+
43
+ virtual_display = Display(visible=0, size=(1400, 900))
44
+ virtual_display.start()
45
+
46
+ !pip install stable-baselines3[extra]
47
+ !pip install gymnasium
48
+ !pip install huggingface_sb3
49
+ !pip install huggingface_hub
50
+ !pip install panda_gym
51
+
52
+ import os
53
+
54
+ import gymnasium as gym
55
+ import panda_gym
56
+ from stable_baselines3 import A2C
57
+ from stable_baselines3.common.evaluation import evaluate_policy
58
+ from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
59
+ from stable_baselines3.common.env_util import make_vec_env
60
+
61
+ env_id = "PandaPickAndPlace-v3"
62
+
63
+ env = gym.make(env_id)
64
+ env = make_vec_env(env_id, n_envs=4)
65
+ env = VecNormalize(env, clip_obs = 10)
66
+ model = A2C("MultiInputPolicy", env, verbose=1)
67
+ model.learn(1_000_000)
68
+
69
+ model.save("a2c-PandaPickAndPlace-v3")
70
+ env.save("vec_normalize.pkl")
71
+
72
+
73
+ from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
74
+
75
+ # Load the saved statistics
76
+ eval_env = DummyVecEnv([lambda: gym.make("PandaPickAndPlace-v3")])
77
+ eval_env = VecNormalize.load("vec_normalize.pkl", eval_env)
78
+
79
+ # We need to override the render_mode
80
+ eval_env.render_mode = "rgb_array"
81
+
82
+ # do not update them at test time
83
+ eval_env.training = False
84
+ # reward normalization is not needed at test time
85
+ eval_env.norm_reward = False
86
+
87
+ # Load the agent
88
+ model = A2C.load("a2c-PandaPickAndPlace-v3")
89
+
90
+ mean_reward, std_reward = evaluate_policy(model, eval_env)
91
+
92
+ print(f"Mean reward = {mean_reward:.2f} +/- {std_reward:.2f}")
93
  ...
94
  ```