from collections import deque | |
import numpy as np | |
returns = deque(maxlen=20) | |
rewards = [1,1,1,1,1] | |
n_steps = len(rewards) | |
for t in range(n_steps)[::-1]: | |
print("Step=======",t) | |
disc_return_t = (returns[0] if len(returns)>0 else 0) | |
print("return",disc_return_t) | |
print("reward",rewards[t] ) | |
returns.appendleft( 0.95 * disc_return_t +rewards[t] ) | |
print("appended ret",returns ) | |
returns = np.array(returns) | |
print(returns) |