[docs]classDeterministicPolicy(nn.Module):"""Deterministic policy network for continuous action space."""def__init__(self,args,obs_space,action_space,device=torch.device("cpu")):"""Initialize DeterministicPolicy model. Args: args: (dict) arguments containing relevant model information. obs_space: (gym.Space) observation space. action_space: (gym.Space) action space. device: (torch.device) specifies the device to run on (cpu/gpu). """super().__init__()self.tpdv=dict(dtype=torch.float32,device=device)hidden_sizes=args["hidden_sizes"]activation_func=args["activation_func"]final_activation_func=args["final_activation_func"]obs_shape=get_shape_from_obs_space(obs_space)iflen(obs_shape)==3:self.feature_extractor=PlainCNN(obs_shape,hidden_sizes[0],activation_func)feature_dim=hidden_sizes[0]else:self.feature_extractor=Nonefeature_dim=obs_shape[0]act_dim=action_space.shape[0]pi_sizes=[feature_dim]+list(hidden_sizes)+[act_dim]self.pi=PlainMLP(pi_sizes,activation_func,final_activation_func)low=torch.tensor(action_space.low).to(**self.tpdv)high=torch.tensor(action_space.high).to(**self.tpdv)self.scale=(high-low)/2self.mean=(high+low)/2self.to(device)
[docs]defforward(self,obs):# Return output from network scaled to action space limits.ifself.feature_extractorisnotNone:x=self.feature_extractor(obs)else:x=obsx=self.pi(x)x=self.scale*x+self.meanreturnx