AssertionError：算法只支持<class 'gym.spaces.box.box'>作为动作空间，但提供了 Box(-1.0, 1.0, (3,), float32)</class>

Question

So basically I tried converting this custom gym environment from https://github.com/Gor-Ren/gym-jsbsim to use farama foundation's gymnasium api. This is my repo whih I am working on: https://github.com/sryu1/jsbgym When I try training the environment with gym-jsbsim, it works, but with the gymnasium environment, I get the error that is in the title... I feel like Lines 234 to 242 in tasks.py are somewhere that causes the problem.所以基本上我尝试将这个自定义健身房环境从https://github.com/Gor-Ren/gym-jsbsim转换为使用 farama 基金会的体育馆 api。这是我正在处理的 repo： https://github.com/ sryu1/jsbgym当我尝试使用 gym-jsbsim 训练环境时，它有效，但在体育馆环境中，我得到标题中的错误...我觉得 tasks.py 中的第 234 到 242 行是导致的地方问题。 the same error appears for all algorithms... If anyone could tell me what I did wrong, it'd be very appreciated.所有算法都会出现相同的错误...如果有人能告诉我我做错了什么，将不胜感激。 I tried training the custom environment with stable baselines 3 all algos that support Box and they all get the same error.我尝试使用稳定的基线训练自定义环境 3 所有支持 Box 的算法，它们都得到相同的错误。

This is my ipynb in raw format, if you just open a new ipynb file with notepad then past this in then open again, it should work.这是我的原始格式的 ipynb，如果你只是用记事本打开一个新的 ipynb 文件，然后通过它然后再次打开，它应该可以工作。

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "7e5a3876",
   "metadata": {},
   "outputs": [],
   "source": [
    "import jsbsim\n",
    "import gymnasium as gym\n",
    "import jsbgym\n",
    "import os\n",
    "from stable_baselines3 import DDPG\n",
    "from stable_baselines3.common.callbacks import BaseCallback\n",
    "from stable_baselines3.common.monitor import Monitor\n",
    "from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "c78d0a36",
   "metadata": {},
   "outputs": [],
   "source": [
    "env = gym.make('JSBSim-TurnHeadingControlTask-Cessna172P-Shaping.STANDARD-NoFG-v0')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "17ea6f3e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([ 5.00000000e+03, -1.52383093e-16,  1.16583160e-16,  2.02536000e+02,\n",
       "        -4.26325641e-14, -7.10542736e-15,  0.00000000e+00,  0.00000000e+00,\n",
       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
       "         0.00000000e+00, -3.72529030e-09, -9.68166768e-15, -1.60633375e+02,\n",
       "         2.99000000e+02]),\n",
       " {})"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "env.reset()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "60a7ecab",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total Reward for episode 1 is 24.43895374007404\n",
      "Total Reward for episode 2 is 17.88229242588352\n",
      "Total Reward for episode 3 is 20.844080298653026\n",
      "Total Reward for episode 4 is 23.09412403738447\n",
      "Total Reward for episode 5 is 22.540357474496297\n"
     ]
    }
   ],
   "source": [
    "for episode in range(1, 6):\n",
    "    obs = env.reset()\n",
    "    done = False\n",
    "    total_reward = 0\n",
    "    while not done:\n",
    "        obs, reward, done, _, info = env.step(env.action_space.sample())\n",
    "        total_reward += reward\n",
    "    print(\"Total Reward for episode {} is {}\".format(episode, total_reward))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "d819a741",
   "metadata": {},
   "outputs": [],
   "source": [
    "class TrainAndLoggingCallback(BaseCallback):\n",
    "    def __init__(self, check_freq, save_path, verbose=1):\n",
    "        super(TrainAndLoggingCallback, self).__init__(verbose)\n",
    "        self.check_freq = check_freq\n",
    "        self.save_path = save_path\n",
    "\n",
    "    def _init_callback(self):\n",
    "        if self.save_path is not None:\n",
    "            os.makedirs(self.save_path, exist_ok=True)\n",
    "\n",
    "    def _on_step(self):\n",
    "        if self.n_calls % self.check_freq == 0:\n",
    "            model_path = os.path.join(\n",
    "                self.save_path, \"best_model_{}\".format(self.n_calls)\n",
    "            )\n",
    "            self.model.save(model_path)\n",
    "\n",
    "        return True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "a0f962d6",
   "metadata": {},
   "outputs": [],
   "source": [
    "CHECKPOINT_DIR = \"./train/\"\n",
    "LOG_DIR = \"./logs/\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "d2ab0033",
   "metadata": {},
   "outputs": [],
   "source": [
    "callback = TrainAndLoggingCallback(check_freq=1000000, save_path=CHECKPOINT_DIR)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "ea1fd388",
   "metadata": {},
   "outputs": [
    {
     "ename": "AssertionError",
     "evalue": "The algorithm only supports <class 'gym.spaces.box.Box'> as action spaces but Box(-1.0, 1.0, (3,), float64) was provided",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mAssertionError\u001b[0m                            Traceback (most recent call last)",
      "\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_17260\\3532872291.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mmodel\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mDDPG\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"MlpPolicy\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0menv\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtensorboard_log\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mLOG_DIR\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;32mc:\\Users\\Noah Ryu\\AppData\\Local\\Programs\\Python\\Python37\\lib\\site-packages\\stable_baselines3\\ddpg\\ddpg.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, policy, env, learning_rate, buffer_size, learning_starts, batch_size, tau, gamma, train_freq, gradient_steps, action_noise, replay_buffer_class, replay_buffer_kwargs, optimize_memory_usage, tensorboard_log, policy_kwargs, verbose, seed, device, _init_setup_model)\u001b[0m\n\u001b[0;32m    103\u001b[0m             \u001b[0mtarget_noise_clip\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0.0\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    104\u001b[0m             \u001b[0mtarget_policy_noise\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0.1\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 105\u001b[1;33m             \u001b[0m_init_setup_model\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    106\u001b[0m         )\n\u001b[0;32m    107\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mc:\\Users\\Noah Ryu\\AppData\\Local\\Programs\\Python\\Python37\\lib\\site-packages\\stable_baselines3\\td3\\td3.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, policy, env, learning_rate, buffer_size, learning_starts, batch_size, tau, gamma, train_freq, gradient_steps, action_noise, replay_buffer_class, replay_buffer_kwargs, optimize_memory_usage, policy_delay, target_policy_noise, target_noise_clip, tensorboard_log, policy_kwargs, verbose, seed, device, _init_setup_model)\u001b[0m\n\u001b[0;32m    118\u001b[0m             \u001b[0moptimize_memory_usage\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0moptimize_memory_usage\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    119\u001b[0m             \u001b[0msupported_action_spaces\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mspaces\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mBox\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 120\u001b[1;33m             \u001b[0msupport_multi_env\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    121\u001b[0m         )\n\u001b[0;32m    122\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mc:\\Users\\Noah Ryu\\AppData\\Local\\Programs\\Python\\Python37\\lib\\site-packages\\stable_baselines3\\common\\off_policy_algorithm.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, policy, env, learning_rate, buffer_size, learning_starts, batch_size, tau, gamma, train_freq, gradient_steps, action_noise, replay_buffer_class, replay_buffer_kwargs, optimize_memory_usage, policy_kwargs, tensorboard_log, verbose, device, support_multi_env, monitor_wrapper, seed, use_sde, sde_sample_freq, use_sde_at_warmup, sde_support, supported_action_spaces)\u001b[0m\n\u001b[0;32m    117\u001b[0m             \u001b[0muse_sde\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0muse_sde\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    118\u001b[0m             \u001b[0msde_sample_freq\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0msde_sample_freq\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 119\u001b[1;33m             \u001b[0msupported_action_spaces\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0msupported_action_spaces\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    120\u001b[0m         )\n\u001b[0;32m    121\u001b[0m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbuffer_size\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mbuffer_size\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mc:\\Users\\Noah Ryu\\AppData\\Local\\Programs\\Python\\Python37\\lib\\site-packages\\stable_baselines3\\common\\base_class.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, policy, env, learning_rate, policy_kwargs, tensorboard_log, verbose, device, support_multi_env, monitor_wrapper, seed, use_sde, sde_sample_freq, supported_action_spaces)\u001b[0m\n\u001b[0;32m    171\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0msupported_action_spaces\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    172\u001b[0m                 assert isinstance(self.action_space, supported_action_spaces), (\n\u001b[1;32m--> 173\u001b[1;33m                     \u001b[1;34mf\"The algorithm only supports {supported_action_spaces} as action spaces \"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    174\u001b[0m                     \u001b[1;34mf\"but {self.action_space} was provided\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    175\u001b[0m                 )\n",
      "\u001b[1;31mAssertionError\u001b[0m: The algorithm only supports <class 'gym.spaces.box.Box'> as action spaces but Box(-1.0, 1.0, (3,), float64) was provided"
     ]
    }
   ],
   "source": [
    "model = DDPG(\"MlpPolicy\", env, tensorboard_log=LOG_DIR)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "b80a3ed4",
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'model' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_17260\\1190813584.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlearn\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtotal_timesteps\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m10000000\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcallback\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mcallback\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      2\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"JSBSim_10000000_steps\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mNameError\u001b[0m: name 'model' is not defined"
     ]
    }
   ],
   "source": [
    "model.learn(total_timesteps=10000000, callback=callback)\n",
    "model.save(\"JSBSim_10000000_steps\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "72842db2",
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'model' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_17260\\4222837208.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      4\u001b[0m     \u001b[0mtotal_reward\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      5\u001b[0m     \u001b[1;32mwhile\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 6\u001b[1;33m         \u001b[0maction\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0m_\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      7\u001b[0m         \u001b[0mobs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreward\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minfo\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0maction\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      8\u001b[0m         \u001b[0mtotal_reward\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[0mreward\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mNameError\u001b[0m: name 'model' is not defined"
     ]
    }
   ],
   "source": [
    "for episode in range(5):\n",
    "    obs = env.reset()\n",
    "    done = False\n",
    "    total_reward = 0\n",
    "    while not done:\n",
    "        action, _ = model.predict(obs)\n",
    "        obs, reward, done, info = env.step(int(action))\n",
    "        total_reward += reward\n",
    "    print(\"Total Reward for episode {} is {}\".format(episode, total_reward))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f0daa1ab",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.9"
  },
  "vscode": {
   "interpreter": {
    "hash": "fc676d0716d313b34d9b58671be5ff89ed5ca710c84a0894db60f3144580aba8"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}

Answer 1

Stable Baselines 3 , at least up to 1.7.0 , depends on gym , not on its newer gymnasium equivalent. Stable Baselines 3 ，至少达到1.7.0 ，取决于gym ，而不是其较新的gymnasium等价物。

Although import gymnasium as gym should do the trick within your own code, some of the Stable Baselines3 code still performs imports such as (see td3.py for instance):虽然import gymnasium as gym应该可以在您自己的代码中完成，但一些 Stable Baselines3 代码仍然执行导入，例如（参见td3.py ）：

from gym import spaces

and uses the gym spaces to validate your gymnasium environment's action space.并使用gym空间来验证您的gymnasium环境的活动空间。

You can check yourself by running type(env.action_space) , you'll see that it returns gymnasium.spaces.box.Box instead of gym.spaces.box.Box .您可以通过运行type(env.action_space)来检查自己，您会看到它返回gymnasium.spaces.box.Box而不是gym.spaces.box.Box 。

You can read the comments on this PR to have more details on the potential future solutions.您可以阅读有关此 PR的评论，以了解有关未来潜在解决方案的更多详细信息。

AssertionError：算法只支持<class 'gym.spaces.box.box'>作为动作空间，但提供了 Box(-1.0, 1.0, (3,), float32)</class>

问题描述

1 个解决方案

解决方案1
0 2023-02-02 04:39:59

AssertionError：算法只支持<class 'gym.spaces.box.box'>作为动作空间，但提供了 Box(-1.0, 1.0, (3,), float32)</class>

问题描述

1 个解决方案

解决方案1 0 2023-02-02 04:39:59

解决方案1
0 2023-02-02 04:39:59