Skip to content

Commit 29c28b6

Browse files
committed
Notes updated 30/10/18 -- Add Adaptive Noise scaling method and Cross-entropy method( Evolution Strategies), black_box optimization technique (hill climbing)
1 parent 3f7f415 commit 29c28b6

5 files changed

+1025
-9
lines changed

CartPole_smart_agent.py

+55
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
## This Agent was trained using hill climbing with Adaptive Noise Scaling which comes under POLICY BASED METHODS.
2+
## And purpose of this script is to show a smart agent!
3+
import pickle
4+
import gym
5+
import numpy as np
6+
7+
with open('hill_climbing_weight.pickle','rb') as f:
8+
weight = pickle.load(f)
9+
10+
11+
class Policy():
12+
def __init__(self, s_size=4, a_size=2):
13+
"""
14+
Here I'm intializing the self.w with trained weights
15+
The basic purpose of this function is to randomly initalize the weights for the network and then we would
16+
optimize these weights with noise scaling(adaptive).
17+
18+
Shape: [state_dimension,action_dimension] and softmax activation function at output layer-- when action space is discrete
19+
[state_dimension, 1 node] and no activation function when action space is not discrete.
20+
"""
21+
#self.w = 1e-4*np.random.rand(s_size, a_size) ##weights for simple linear policy: state_space x action_space
22+
self.w = weight
23+
24+
def forward(self, state):
25+
"""
26+
Here we multipy(vectorized) our state with weights and get corresponding output
27+
"""
28+
x = np.dot(state,self.w)
29+
## below is the implementation of softmax function!!
30+
return np.exp(x)/sum(np.exp(x))
31+
32+
def act(self,state):
33+
"""
34+
This function decides whether we want our policy to be stochastic or determinstic policy.
35+
"""
36+
probs = self.forward(state)
37+
#action = np.random.choice(2,p=probs)
38+
action = np.argmax(probs)
39+
# option 1: stochastic policy
40+
# option 2: stochastic policy
41+
return action
42+
43+
44+
policy = Policy()
45+
46+
env = gym.make('CartPole-v0')
47+
48+
for i in range(3):
49+
state = env.reset()
50+
while True:
51+
env.render()
52+
action = policy.act(state)
53+
state,reward,done,_=env.step(action)
54+
if done:
55+
break

0 commit comments

Comments
 (0)