import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
from ISLP import load_data14 Lab Chapter 14: Reinforcement Learning
We include our usual imports seen in earlier labs.
We also collect the new imports needed for this lab.
from scipy.stats import \
(ttest_1samp,
ttest_rel,
ttest_ind,
t as t_dbn)
from statsmodels.stats.multicomp import \
pairwise_tukeyhsd
from statsmodels.stats.multitest import \
multipletests as mult_test14.1 First examples
We first define a function that calculates the discounted rewards.
def compute_returns(rewards, discount):
"""Given a list of reward signals and the discount rate,
compute discounted returns.
G_0 = = R_0 + γ R_1 + γ^2 R_2 + ... + γ^{T-1} R_{T-1}
Args:
rewards: a list of rewards from an episode trajectory.
discount: discount factor, must be 0 <= discount <= 1.
Returns:
return G_0 the discounted returns.
"""
assert 0.0 <= discount <= 1.0
G_t = 0
# We do it backwards so it's more efficient and easier to implement.
for t in reversed(range(len(rewards))):
G_t = rewards[t] + discount * G_t
return G_tWe next call the function and calculate the best action.
discount = 0.9
cases = [
[-1, -1, -1, 10],
[-1, 10],
[-1, 1, -1, -1, 10],
[+1] * 10000,
]
for i, rewards in enumerate(cases):
G = compute_returns(rewards, discount)
print(f'Discounted return for case {i+1} is: {G}')Discounted return for case 1 is: 4.58
Discounted return for case 2 is: 8.0
Discounted return for case 3 is: 4.922000000000001
Discounted return for case 4 is: 9.999999999999995