[spam][crazy][personal] an early sunday

Sun Nov 14 02:21:25 PST 2021

10:18

now i'm at https://stable-baselines3.readthedocs.io/en/master/guide/examples.html#id5

here is my vim buffer

import gym
import numpy as np
import time

class Env(gym.Env):
    # spaces: gym.spaces.Box(low, high, shape=None, dtype=np.float32)
    #         gym.spaces.Discrete(n)
    def __init__(self):
        self.action_space = gym.spaces.Box(-np.inf, np.inf, (2,))
        self.observation_space = gym.spaces.Box(-np.inf, np.inf, (2,))
        #reward_range = None
    def seed(self, seed=None):
        self.action_space.seed(seed)
        self.observation_space.seed(self.action_space.sample())
        return seed
    def reset(self):
        self.state = self.observation_space.sample()
        self.goal = self.observation_space.sample()
        self.state[1] = time.time()
        self.goal[1] = self.state[1] + 1
        return self.state
    def step(self, action):
        nexttime = time.time()
        change = nexttime - self.state[1]
        self.state[0] += action * change
        self.state[1] = nexttime
        distance = self.state[0] - self.goal[0]
        reward = -np.log(np.abs(distance))
        return (self.state, reward, (distance == 0 or self.state[1] >=
self.goal[1]), {})
    def render(self, mode='human'):
        print(self.goal, self.state)

import stable_baselines3 as sb3
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.utils import set_random_seed

there's an issue where the action space is a vector including
frownedness , but is treated as a scalar in the step function.  this
would be resolved by indexing the action to get a time coefficient out
of the vector i guess.

PPO is right there.  the docs say to use a vector of parallel
environments, whcih means just passing the environment class to a
function.

it's 10:21 and i'm thinking of doing something else.