1
+ ## This Agent was trained using hill climbing with Adaptive Noise Scaling which comes under POLICY BASED METHODS.
2
+ ## And purpose of this script is to show a smart agent!
3
+ import pickle
4
+ import gym
5
+ import numpy as np
6
+
7
+ with open ('hill_climbing_weight.pickle' ,'rb' ) as f :
8
+ weight = pickle .load (f )
9
+
10
+
11
+ class Policy ():
12
+ def __init__ (self , s_size = 4 , a_size = 2 ):
13
+ """
14
+ Here I'm intializing the self.w with trained weights
15
+ The basic purpose of this function is to randomly initalize the weights for the network and then we would
16
+ optimize these weights with noise scaling(adaptive).
17
+
18
+ Shape: [state_dimension,action_dimension] and softmax activation function at output layer-- when action space is discrete
19
+ [state_dimension, 1 node] and no activation function when action space is not discrete.
20
+ """
21
+ #self.w = 1e-4*np.random.rand(s_size, a_size) ##weights for simple linear policy: state_space x action_space
22
+ self .w = weight
23
+
24
+ def forward (self , state ):
25
+ """
26
+ Here we multipy(vectorized) our state with weights and get corresponding output
27
+ """
28
+ x = np .dot (state ,self .w )
29
+ ## below is the implementation of softmax function!!
30
+ return np .exp (x )/ sum (np .exp (x ))
31
+
32
+ def act (self ,state ):
33
+ """
34
+ This function decides whether we want our policy to be stochastic or determinstic policy.
35
+ """
36
+ probs = self .forward (state )
37
+ #action = np.random.choice(2,p=probs)
38
+ action = np .argmax (probs )
39
+ # option 1: stochastic policy
40
+ # option 2: stochastic policy
41
+ return action
42
+
43
+
44
+ policy = Policy ()
45
+
46
+ env = gym .make ('CartPole-v0' )
47
+
48
+ for i in range (3 ):
49
+ state = env .reset ()
50
+ while True :
51
+ env .render ()
52
+ action = policy .act (state )
53
+ state ,reward ,done ,_ = env .step (action )
54
+ if done :
55
+ break
0 commit comments