references

omerbsezer · omerbsezer · commit ad9f76c3998c · 2018-12-18T01:34:12.000+03:00
diff --git a/SARSA_demo.ipynb b/SARSA_demo.ipynb
diff --git a/func_approx_q_learning_demo.ipynb b/func_approx_q_learning_demo.ipynb
@@ -8,6 +8,8 @@
    },
    "outputs": [],
    "source": [
+    "# Adapted from: https://github.com/lazyprogrammer/machine_learning_examples/tree/master/rl\n",
+    "# Function approximation using Q Learning\n",
     "import numpy as np\n",
     "import matplotlib.pyplot as plt\n",
     "from gridWorldGame import standard_grid, negative_grid,print_values, print_policy\n",
diff --git a/gridWorldGame.py b/gridWorldGame.py
@@ -1,3 +1,4 @@
+# Adapted from: https://github.com/lazyprogrammer/machine_learning_examples/tree/master/rl
 import numpy as np
 
 class Grid: # Environment
diff --git a/monte_carlo_demo.ipynb b/monte_carlo_demo.ipynb
@@ -8,6 +8,7 @@
    },
    "outputs": [],
    "source": [
+    "# Adapted from: https://github.com/lazyprogrammer/machine_learning_examples/tree/master/rl\n",
     "import numpy as np\n",
     "from gridWorldGame import standard_grid, negative_grid,print_values, print_policy\n",
     "\n",
diff --git a/monte_carlo_epsilon_greedy_demo.ipynb b/monte_carlo_epsilon_greedy_demo.ipynb
@@ -8,6 +8,7 @@
    },
    "outputs": [],
    "source": [
+    "# Adapted from: https://github.com/lazyprogrammer/machine_learning_examples/tree/master/rl\n",
     "# the Monte Carlo Epsilon-Greedy method to find the optimal policy and value function\n",
     "import numpy as np\n",
     "import matplotlib.pyplot as plt\n",
diff --git a/monte_carlo_es_demo.ipynb b/monte_carlo_es_demo.ipynb
@@ -8,6 +8,7 @@
    },
    "outputs": [],
    "source": [
+    "# Adapted from: https://github.com/lazyprogrammer/machine_learning_examples/tree/master/rl\n",
     "# the Monte Carlo Exploring-Starts method to find the optimal policy\n",
     "import numpy as np\n",
     "import matplotlib.pyplot as plt\n",
diff --git a/policy_iteration_demo.ipynb b/policy_iteration_demo.ipynb
@@ -16,6 +16,7 @@
     }
    ],
    "source": [
+    "# Adapted from: https://github.com/lazyprogrammer/machine_learning_examples/tree/master/rl\n",
     "import numpy as np\n",
     "from gridWorldGame import standard_grid, negative_grid,print_values, print_policy\n",
     "\n",
diff --git a/q_learning_demo.ipynb b/q_learning_demo.ipynb
@@ -8,6 +8,7 @@
    },
    "outputs": [],
    "source": [
+    "# Adapted from: https://github.com/lazyprogrammer/machine_learning_examples/tree/master/rl\n",
     "# the Q-Learning method to find the optimal policy and value function\n",
     "import numpy as np\n",
     "import matplotlib.pyplot as plt\n",
diff --git a/value_iteration_demo.ipynb b/value_iteration_demo.ipynb
@@ -16,7 +16,8 @@
     }
    ],
    "source": [
-    "# same as policy iteration\n",
+    "# Adapted from: https://github.com/lazyprogrammer/machine_learning_examples/tree/master/rl\n",
+    "# Value iteration\n",
     "import numpy as np\n",
     "from gridWorldGame import standard_grid, negative_grid,print_values, print_policy\n",
     "\n",

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+# Adapted from: https://github.com/lazyprogrammer/machine_learning_examples/tree/master/rl`
`1`	`2`	`import numpy as np`
`2`	`3`
`3`	`4`	`class Grid: # Environment`
Original file line number	Diff line number	Diff line change
`@@ -16,6 +16,7 @@`
`16`	`16`	`}`
`17`	`17`	`],`
`18`	`18`	`"source": [`
	`19`	`+ "# Adapted from: https://github.com/lazyprogrammer/machine_learning_examples/tree/master/rl\n",`
`19`	`20`	`"import numpy as np\n",`
`20`	`21`	`"from gridWorldGame import standard_grid, negative_grid,print_values, print_policy\n",`
`21`	`22`	`"\n",`