Skip to content

Commit ad9f76c

Browse files
committed
references
1 parent 9a49790 commit ad9f76c

9 files changed

+38
-22
lines changed

SARSA_demo.ipynb

+28-21
Large diffs are not rendered by default.

func_approx_q_learning_demo.ipynb

+2
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
},
99
"outputs": [],
1010
"source": [
11+
"# Adapted from: https://github.com/lazyprogrammer/machine_learning_examples/tree/master/rl\n",
12+
"# Function approximation using Q Learning\n",
1113
"import numpy as np\n",
1214
"import matplotlib.pyplot as plt\n",
1315
"from gridWorldGame import standard_grid, negative_grid,print_values, print_policy\n",

gridWorldGame.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# Adapted from: https://github.com/lazyprogrammer/machine_learning_examples/tree/master/rl
12
import numpy as np
23

34
class Grid: # Environment

monte_carlo_demo.ipynb

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
},
99
"outputs": [],
1010
"source": [
11+
"# Adapted from: https://github.com/lazyprogrammer/machine_learning_examples/tree/master/rl\n",
1112
"import numpy as np\n",
1213
"from gridWorldGame import standard_grid, negative_grid,print_values, print_policy\n",
1314
"\n",

monte_carlo_epsilon_greedy_demo.ipynb

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
},
99
"outputs": [],
1010
"source": [
11+
"# Adapted from: https://github.com/lazyprogrammer/machine_learning_examples/tree/master/rl\n",
1112
"# the Monte Carlo Epsilon-Greedy method to find the optimal policy and value function\n",
1213
"import numpy as np\n",
1314
"import matplotlib.pyplot as plt\n",

monte_carlo_es_demo.ipynb

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
},
99
"outputs": [],
1010
"source": [
11+
"# Adapted from: https://github.com/lazyprogrammer/machine_learning_examples/tree/master/rl\n",
1112
"# the Monte Carlo Exploring-Starts method to find the optimal policy\n",
1213
"import numpy as np\n",
1314
"import matplotlib.pyplot as plt\n",

policy_iteration_demo.ipynb

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
}
1717
],
1818
"source": [
19+
"# Adapted from: https://github.com/lazyprogrammer/machine_learning_examples/tree/master/rl\n",
1920
"import numpy as np\n",
2021
"from gridWorldGame import standard_grid, negative_grid,print_values, print_policy\n",
2122
"\n",

q_learning_demo.ipynb

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
},
99
"outputs": [],
1010
"source": [
11+
"# Adapted from: https://github.com/lazyprogrammer/machine_learning_examples/tree/master/rl\n",
1112
"# the Q-Learning method to find the optimal policy and value function\n",
1213
"import numpy as np\n",
1314
"import matplotlib.pyplot as plt\n",

value_iteration_demo.ipynb

+2-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@
1616
}
1717
],
1818
"source": [
19-
"# same as policy iteration\n",
19+
"# Adapted from: https://github.com/lazyprogrammer/machine_learning_examples/tree/master/rl\n",
20+
"# Value iteration\n",
2021
"import numpy as np\n",
2122
"from gridWorldGame import standard_grid, negative_grid,print_values, print_policy\n",
2223
"\n",

0 commit comments

Comments
 (0)