import sys, os
# This code creates a virtual display to draw game images on.
# It will have no effect if your machine has a monitor.
if type(os.environ.get("DISPLAY")) is not str or len(os.environ.get("DISPLAY")) == 0:
    !bash ../xvfb start
    os.environ['DISPLAY'] = ':1'

bash: ../xvfb: No such file or directory

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

!pip install gymnasium

Collecting gymnasium
  Downloading gymnasium-1.0.0-py3-none-any.whl.metadata (9.5 kB)
Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium) (1.26.4)
Requirement already satisfied: cloudpickle>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium) (3.1.0)
Requirement already satisfied: typing-extensions>=4.3.0 in /usr/local/lib/python3.10/dist-packages (from gymnasium) (4.12.2)
Collecting farama-notifications>=0.0.1 (from gymnasium)
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl.metadata (558 bytes)
Downloading gymnasium-1.0.0-py3-none-any.whl (958 kB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 958.1/958.1 kB 12.5 MB/s eta 0:00:00
Downloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Installing collected packages: farama-notifications, gymnasium
Successfully installed farama-notifications-0.0.4 gymnasium-1.0.0

import gymnasium as gym

env = gym.make("MountainCar-v0", render_mode="rgb_array")
env.reset()

plt.imshow(env.render())
print("Observation space:", env.observation_space)
print("Action space:", env.action_space)

Observation space: Box([-1.2  -0.07], [0.6  0.07], (2,), float32)
Action space: Discrete(3)

# Set seed to reproduce initial state in stochastic environment
obs0, info = env.reset(seed=0)
print("initial observation code:", obs0)

obs0, info = env.reset(seed=1)
print("initial observation code:", obs0)

# Note: in MountainCar, observation is just two numbers: car position and velocity

initial observation code: [-0.47260767  0.        ]
initial observation code: [-0.49763566  0.        ]

print("taking action 2 (right)")
new_obs, reward, terminated, truncated, _ = env.step(2)

print("new observation code:", new_obs)
print("reward:", reward)
print("is game over?:", terminated)
print("is game truncated due to time limit?:", truncated)

# Note: as you can see, the car has moved to the right slightly (around 0.0005)

taking action 2 (right)
new observation code: [-0.4968302   0.00080547]
reward: -1.0
is game over?: False
is game truncated due to time limit?: False

from IPython import display

# Create env manually to set time limit. Please don't change this.
TIME_LIMIT = 250
env = gym.wrappers.TimeLimit(
    gym.make("MountainCar-v0", render_mode="rgb_array"),
    max_episode_steps=TIME_LIMIT + 1,
)
actions = {"left": 0, "stop": 1, "right": 2}

def policy(obs, t):
    position, velocity = obs
    if velocity <0:
      return 0
    else:
      return 2

plt.figure(figsize=(4, 3))
display.clear_output(wait=True)

obs, _ = env.reset()
for t in range(TIME_LIMIT):
    plt.gca().clear()

    action = policy(obs, t)  # Call your policy
    obs, reward, terminated, truncated, _ = env.step(
        action
    )  # Pass the action chosen by the policy to the environment

    # We don't do anything with reward here because MountainCar is a very simple environment,
    # and reward is a constant -1. Therefore, your goal is to end the episode as quickly as possible.

    # Draw game image on display.
    plt.imshow(env.render())

    display.display(plt.gcf())
    display.clear_output(wait=True)

    if terminated or truncated:
        print("Well done!")
        break
else:
    print("Time limit exceeded. Try again.")

display.clear_output(wait=True)

assert obs[0] > 0.47
print("You solved it!")

OpenAI Gym --> Farama Gymnasium¶

Gymnasium interface¶

Play with it¶