Răsfoiți Sursa

Refactored sandbox config and added fast boot (#2455)

* Refactored sandbox config and added fastboot

* added tests

* fixed tests

* fixed tests

* intimate user about breaking change

* remove default config from eval

* check for lowercase env

* add test

* Revert Migration

* migrate old sandbox configs

* resolve merge conflict

* revert migration 2

* Revert "remove default config from eval"

This reverts commit de57c588dbf29a3327798ce68976e2d2277b8bb1.

* change type to box_type

* fix var name

* linted

* lint

* lint comments

* fix tests

* fix tests

* fix typo

* fix box_type, remove fast_boot

* add tests for sandbox config

* fix test

* update eval docs

* small removal comments

* adapt toml template

* old fields shouldn't be in the app dataclass

* fix old keys in app config

* clean up exec box

---------

Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
மனோஜ்குமார் பழனிச்சாமி 1 an în urmă
părinte
comite
143f38d25a

+ 1 - 1
.github/workflows/ghcr.yml

@@ -156,7 +156,7 @@ jobs:
 
 
       - name: Load sandbox image and run integration tests
       - name: Load sandbox image and run integration tests
         env:
         env:
-          SANDBOX_TYPE: ${{ matrix.sandbox }}
+          SANDBOX_BOX_TYPE: ${{ matrix.sandbox }}
         run: |
         run: |
           # Load the Docker image and capture the output
           # Load the Docker image and capture the output
           output=$(docker load -i /tmp/sandbox_image_amd64.tar)
           output=$(docker load -i /tmp/sandbox_image_amd64.tar)

+ 1 - 1
.github/workflows/review-pr.yml

@@ -55,7 +55,7 @@ jobs:
       env:
       env:
         LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
         LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-        SANDBOX_TYPE: ssh
+        SANDBOX_BOX_TYPE: ssh
       run: |
       run: |
         # Append path to launch poetry
         # Append path to launch poetry
         export PATH="/github/home/.local/bin:$PATH"
         export PATH="/github/home/.local/bin:$PATH"

+ 1 - 1
.github/workflows/solve-issue.yml

@@ -50,7 +50,7 @@ jobs:
         ISSUE_BODY: ${{ github.event.issue.body }}
         ISSUE_BODY: ${{ github.event.issue.body }}
         LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
         LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-        SANDBOX_TYPE: ssh
+        SANDBOX_BOX_TYPE: ssh
       run: |
       run: |
         # Append path to launch poetry
         # Append path to launch poetry
         export PATH="/github/home/.local/bin:$PATH"
         export PATH="/github/home/.local/bin:$PATH"

+ 1 - 1
agenthub/micro/commit_writer/README.md

@@ -3,7 +3,7 @@
 CommitWriterAgent can help write git commit message. Example:
 CommitWriterAgent can help write git commit message. Example:
 
 
 ```bash
 ```bash
-WORKSPACE_MOUNT_PATH="`PWD`" SANDBOX_TYPE="ssh" \
+WORKSPACE_MOUNT_PATH="`PWD`" SANDBOX_BOX_TYPE="ssh" \
   poetry run python opendevin/core/main.py -t "dummy task" -c CommitWriterAgent -d ./
   poetry run python opendevin/core/main.py -t "dummy task" -c CommitWriterAgent -d ./
 ```
 ```
 
 

+ 6 - 6
agenthub/monologue_agent/utils/prompts.py

@@ -1,13 +1,13 @@
 from opendevin.core.config import config
 from opendevin.core.config import config
 from opendevin.core.utils import json
 from opendevin.core.utils import json
-from opendevin.events.observation import (
-    CmdOutputObservation,
-)
 from opendevin.events.action import (
 from opendevin.events.action import (
     Action,
     Action,
 )
 )
-
+from opendevin.events.observation import (
+    CmdOutputObservation,
+)
 from opendevin.events.serialization.action import action_from_dict
 from opendevin.events.serialization.action import action_from_dict
+
 ACTION_PROMPT = """
 ACTION_PROMPT = """
 You're a thoughtful robot. Your main task is this:
 You're a thoughtful robot. Your main task is this:
 %(task)s
 %(task)s
@@ -206,7 +206,7 @@ def get_request_action_prompt(
         'background_commands': bg_commands_message,
         'background_commands': bg_commands_message,
         'hint': hint,
         'hint': hint,
         'user': user,
         'user': user,
-        'timeout': config.sandbox_timeout,
+        'timeout': config.sandbox.timeout,
         'WORKSPACE_MOUNT_PATH_IN_SANDBOX': config.workspace_mount_path_in_sandbox,
         'WORKSPACE_MOUNT_PATH_IN_SANDBOX': config.workspace_mount_path_in_sandbox,
     }
     }
 
 
@@ -242,4 +242,4 @@ def parse_summary_response(response: str) -> list[dict]:
     - list[dict]: The list of summaries output by the model
     - list[dict]: The list of summaries output by the model
     """
     """
     parsed = json.loads(response)
     parsed = json.loads(response)
-    return parsed['new_monologue']
+    return parsed['new_monologue']

+ 17 - 13
config.template.toml

@@ -19,9 +19,6 @@ workspace_base = "./workspace"
 # Cache directory path
 # Cache directory path
 #cache_dir = "/tmp/cache"
 #cache_dir = "/tmp/cache"
 
 
-# Container image to use for the sandbox
-#sandbox_container_image = "ghcr.io/opendevin/sandbox:main"
-
 # Debugging enabled
 # Debugging enabled
 #debug = false
 #debug = false
 
 
@@ -79,15 +76,6 @@ persist_sandbox = false
 # SSH port for the sandbox
 # SSH port for the sandbox
 #ssh_port = 63710
 #ssh_port = 63710
 
 
-# Sandbox timeout in seconds
-#sandbox_timeout = 120
-
-# Sandbox type (ssh, exec, e2b, local)
-#sandbox_type = "ssh"
-
-# Sandbox user ID
-#sandbox_user_id = 1000
-
 # Use host network
 # Use host network
 #use_host_network = false
 #use_host_network = false
 
 
@@ -174,7 +162,23 @@ model = "gpt-4o"
 # Name of the agent
 # Name of the agent
 #name = "CodeActAgent"
 #name = "CodeActAgent"
 
 
+#################################### Sandbox ###################################
+# Configuration for the sandbox
+##############################################################################
+[sandbox]
+# Sandbox timeout in seconds
+#timeout = 120
+
+# Sandbox type (ssh, e2b, local)
+#box_type = "ssh"
+
+# Sandbox user ID
+#user_id = 1000
+
+# Container image to use for the sandbox
+#container_image = "ghcr.io/opendevin/sandbox:main"
+
 #################################### Eval ####################################
 #################################### Eval ####################################
 # Configuration for the evaluation, please refer to the specific evaluation
 # Configuration for the evaluation, please refer to the specific evaluation
 # plugin for the available options
 # plugin for the available options
-##############################################################################
+##############################################################################

+ 4 - 2
evaluation/TUTORIAL.md

@@ -31,8 +31,6 @@ workspace_base = "/path/to/your/workspace"
 workspace_mount_path = "/path/to/your/workspace"
 workspace_mount_path = "/path/to/your/workspace"
 # ==========================
 # ==========================
 
 
-sandbox_type = "ssh"
-sandbox_timeout = 120
 ssh_hostname = "localhost"
 ssh_hostname = "localhost"
 
 
 # SWEBench eval specific - but you can tweak it to your needs
 # SWEBench eval specific - but you can tweak it to your needs
@@ -41,6 +39,10 @@ run_as_devin = false
 # linting python after editing helps LLM fix indentations
 # linting python after editing helps LLM fix indentations
 enable_auto_lint = true
 enable_auto_lint = true
 
 
+[sandbox]
+box_type = "ssh"
+timeout = 120
+
 [llm]
 [llm]
 # IMPORTANT: add your API key here, and set the model to the one you want to evaluate
 # IMPORTANT: add your API key here, and set the model to the one you want to evaluate
 model = "gpt-4o-2024-05-13"
 model = "gpt-4o-2024-05-13"

+ 4 - 2
evaluation/agent_bench/README.md

@@ -18,8 +18,6 @@ cache_dir = "/path/to/cache"
 workspace_base = "/path/to/workspace"
 workspace_base = "/path/to/workspace"
 workspace_mount_path = "/path/to/workspace"
 workspace_mount_path = "/path/to/workspace"
 
 
-sandbox_type = "ssh"
-sandbox_timeout = 120
 ssh_hostname = "localhost"
 ssh_hostname = "localhost"
 
 
 use_host_network = false
 use_host_network = false
@@ -27,6 +25,10 @@ use_host_network = false
 run_as_devin = true
 run_as_devin = true
 enable_auto_lint = true
 enable_auto_lint = true
 
 
+[sandbox]
+box_type = "ssh"
+timeout = 120
+
 [eval_gpt35_turbo]
 [eval_gpt35_turbo]
 model = "gpt-3.5-turbo"
 model = "gpt-3.5-turbo"
 api_key = "sk-123"
 api_key = "sk-123"

+ 4 - 2
evaluation/miniwob/README.md

@@ -16,9 +16,11 @@ Add the following configurations:
 [core]
 [core]
 max_iterations = 100
 max_iterations = 100
 cache_dir = "/tmp/cache"
 cache_dir = "/tmp/cache"
-sandbox_type = "ssh"
 ssh_hostname = "localhost"
 ssh_hostname = "localhost"
-sandbox_timeout = 120
+
+[sandbox]
+box_type = "ssh"
+timeout = 120
 
 
 # TODO: Change these to the model you want to evaluate
 # TODO: Change these to the model you want to evaluate
 [eval_gpt4_1106_preview]
 [eval_gpt4_1106_preview]

+ 4 - 2
evaluation/swe_bench/README.md

@@ -44,9 +44,11 @@ Add the following configurations:
 [core]
 [core]
 max_iterations = 100
 max_iterations = 100
 cache_dir = "/tmp/cache"
 cache_dir = "/tmp/cache"
-sandbox_type = "ssh"
 ssh_hostname = "localhost"
 ssh_hostname = "localhost"
-sandbox_timeout = 120
+
+[sandbox]
+box_type = "ssh"
+timeout = 120
 
 
 # SWEBench eval specific
 # SWEBench eval specific
 use_host_network = false
 use_host_network = false

+ 4 - 2
evaluation/webarena/README.md

@@ -16,9 +16,11 @@ Add the following configurations:
 [core]
 [core]
 max_iterations = 100
 max_iterations = 100
 cache_dir = "/tmp/cache"
 cache_dir = "/tmp/cache"
-sandbox_type = "ssh"
 ssh_hostname = "localhost"
 ssh_hostname = "localhost"
-sandbox_timeout = 120
+
+[sandbox]
+box_type = "ssh"
+timeout = 120
 
 
 # TODO: Change these to the model you want to evaluate
 # TODO: Change these to the model you want to evaluate
 [eval_gpt4_1106_preview]
 [eval_gpt4_1106_preview]

+ 78 - 15
opendevin/core/config.py

@@ -125,6 +125,51 @@ class AgentConfig(metaclass=Singleton):
         return result
         return result
 
 
 
 
+@dataclass
+class SandboxConfig(metaclass=Singleton):
+    """
+    Configuration for the sandbox.
+
+    Attributes:
+        box_type: The type of sandbox to use. Options are: ssh, e2b, local.
+        container_image: The container image to use for the sandbox.
+        user_id: The user ID for the sandbox.
+        timeout: The timeout for the sandbox.
+
+    """
+
+    box_type: str = 'ssh'
+    container_image: str = 'ghcr.io/opendevin/sandbox' + (
+        f':{os.getenv("OPEN_DEVIN_BUILD_VERSION")}'
+        if os.getenv('OPEN_DEVIN_BUILD_VERSION')
+        else ':main'
+    )
+    user_id: int = os.getuid() if hasattr(os, 'getuid') else 1000
+    timeout: int = 120
+
+    def defaults_to_dict(self) -> dict:
+        """
+        Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional.
+        """
+        dict = {}
+        for f in fields(self):
+            dict[f.name] = get_field_info(f)
+        return dict
+
+    def __str__(self):
+        attr_str = []
+        for f in fields(self):
+            attr_name = f.name
+            attr_value = getattr(self, f.name)
+
+            attr_str.append(f'{attr_name}={repr(attr_value)}')
+
+        return f"SandboxConfig({', '.join(attr_str)})"
+
+    def __repr__(self):
+        return self.__str__()
+
+
 class UndefinedString(str, Enum):
 class UndefinedString(str, Enum):
     UNDEFINED = 'UNDEFINED'
     UNDEFINED = 'UNDEFINED'
 
 
@@ -137,6 +182,7 @@ class AppConfig(metaclass=Singleton):
     Attributes:
     Attributes:
         llm: The LLM configuration.
         llm: The LLM configuration.
         agent: The agent configuration.
         agent: The agent configuration.
+        sandbox: The sandbox configuration.
         runtime: The runtime environment.
         runtime: The runtime environment.
         file_store: The file store to use.
         file_store: The file store to use.
         file_store_path: The path to the file store.
         file_store_path: The path to the file store.
@@ -145,17 +191,14 @@ class AppConfig(metaclass=Singleton):
         workspace_mount_path_in_sandbox: The path to mount the workspace in the sandbox. Defaults to /workspace.
         workspace_mount_path_in_sandbox: The path to mount the workspace in the sandbox. Defaults to /workspace.
         workspace_mount_rewrite: The path to rewrite the workspace mount path to.
         workspace_mount_rewrite: The path to rewrite the workspace mount path to.
         cache_dir: The path to the cache directory. Defaults to /tmp/cache.
         cache_dir: The path to the cache directory. Defaults to /tmp/cache.
-        sandbox_container_image: The container image to use for the sandbox.
         run_as_devin: Whether to run as devin.
         run_as_devin: Whether to run as devin.
         max_iterations: The maximum number of iterations.
         max_iterations: The maximum number of iterations.
         max_budget_per_task: The maximum budget allowed per task, beyond which the agent will stop.
         max_budget_per_task: The maximum budget allowed per task, beyond which the agent will stop.
         e2b_api_key: The E2B API key.
         e2b_api_key: The E2B API key.
-        sandbox_type: The type of sandbox to use. Options are: ssh, exec, e2b, local.
         use_host_network: Whether to use the host network.
         use_host_network: Whether to use the host network.
         ssh_hostname: The SSH hostname.
         ssh_hostname: The SSH hostname.
         disable_color: Whether to disable color. For terminals that don't support color.
         disable_color: Whether to disable color. For terminals that don't support color.
-        sandbox_user_id: The user ID for the sandbox.
-        sandbox_timeout: The timeout for the sandbox.
+        initialize_plugins: Whether to initialize plugins.
         debug: Whether to enable debugging.
         debug: Whether to enable debugging.
         enable_auto_lint: Whether to enable auto linting. This is False by default, for regular runs of the app. For evaluation, please set this to True.
         enable_auto_lint: Whether to enable auto linting. This is False by default, for regular runs of the app. For evaluation, please set this to True.
         enable_cli_session: Whether to enable saving and restoring the session when run from CLI.
         enable_cli_session: Whether to enable saving and restoring the session when run from CLI.
@@ -166,6 +209,7 @@ class AppConfig(metaclass=Singleton):
 
 
     llm: LLMConfig = field(default_factory=LLMConfig)
     llm: LLMConfig = field(default_factory=LLMConfig)
     agent: AgentConfig = field(default_factory=AgentConfig)
     agent: AgentConfig = field(default_factory=AgentConfig)
+    sandbox: SandboxConfig = field(default_factory=SandboxConfig)
     runtime: str = 'server'
     runtime: str = 'server'
     file_store: str = 'memory'
     file_store: str = 'memory'
     file_store_path: str = '/tmp/file_store'
     file_store_path: str = '/tmp/file_store'
@@ -176,21 +220,13 @@ class AppConfig(metaclass=Singleton):
     workspace_mount_path_in_sandbox: str = '/workspace'
     workspace_mount_path_in_sandbox: str = '/workspace'
     workspace_mount_rewrite: str | None = None
     workspace_mount_rewrite: str | None = None
     cache_dir: str = '/tmp/cache'
     cache_dir: str = '/tmp/cache'
-    sandbox_container_image: str = 'ghcr.io/opendevin/sandbox' + (
-        f':{os.getenv("OPEN_DEVIN_BUILD_VERSION")}'
-        if os.getenv('OPEN_DEVIN_BUILD_VERSION')
-        else ':main'
-    )
     run_as_devin: bool = True
     run_as_devin: bool = True
     max_iterations: int = 100
     max_iterations: int = 100
     max_budget_per_task: float | None = None
     max_budget_per_task: float | None = None
     e2b_api_key: str = ''
     e2b_api_key: str = ''
-    sandbox_type: str = 'ssh'  # Can be 'ssh', 'exec', or 'e2b'
     use_host_network: bool = False
     use_host_network: bool = False
     ssh_hostname: str = 'localhost'
     ssh_hostname: str = 'localhost'
     disable_color: bool = False
     disable_color: bool = False
-    sandbox_user_id: int = os.getuid() if hasattr(os, 'getuid') else 1000
-    sandbox_timeout: int = 120
     initialize_plugins: bool = True
     initialize_plugins: bool = True
     persist_sandbox: bool = False
     persist_sandbox: bool = False
     ssh_port: int = 63710
     ssh_port: int = 63710
@@ -287,7 +323,7 @@ def get_field_info(f):
 
 
 def load_from_env(cfg: AppConfig, env_or_toml_dict: dict | MutableMapping[str, str]):
 def load_from_env(cfg: AppConfig, env_or_toml_dict: dict | MutableMapping[str, str]):
     """Reads the env-style vars and sets config attributes based on env vars or a config.toml dict.
     """Reads the env-style vars and sets config attributes based on env vars or a config.toml dict.
-    Compatibility with vars like LLM_BASE_URL, AGENT_MEMORY_ENABLED and others.
+    Compatibility with vars like LLM_BASE_URL, AGENT_MEMORY_ENABLED, SANDBOX_TIMEOUT and others.
 
 
     Args:
     Args:
         cfg: The AppConfig object to set attributes on.
         cfg: The AppConfig object to set attributes on.
@@ -335,6 +371,9 @@ def load_from_env(cfg: AppConfig, env_or_toml_dict: dict | MutableMapping[str, s
                         f'Error setting env var {env_var_name}={value}: check that the value is of the right type'
                         f'Error setting env var {env_var_name}={value}: check that the value is of the right type'
                     )
                     )
 
 
+    if 'SANDBOX_TYPE' in env_or_toml_dict:
+        logger.error('SANDBOX_TYPE is deprecated. Please use SANDBOX_BOX_TYPE instead.')
+        env_or_toml_dict['SANDBOX_BOX_TYPE'] = env_or_toml_dict.pop('SANDBOX_TYPE')
     # Start processing from the root of the config object
     # Start processing from the root of the config object
     set_attr_from_env(cfg)
     set_attr_from_env(cfg)
 
 
@@ -380,8 +419,32 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'):
         if 'agent' in toml_config:
         if 'agent' in toml_config:
             agent_config = AgentConfig(**toml_config['agent'])
             agent_config = AgentConfig(**toml_config['agent'])
 
 
+        # set sandbox config from the toml file
+        sandbox_config = config.sandbox
+
+        # migrate old sandbox configs from [core] section to sandbox config
+        keys_to_migrate = [key for key in core_config if key.startswith('sandbox_')]
+        for key in keys_to_migrate:
+            new_key = key.replace('sandbox_', '')
+            if new_key == 'type':
+                new_key = 'box_type'
+            if new_key in sandbox_config.__annotations__:
+                # read the key in sandbox and remove it from core
+                setattr(sandbox_config, new_key, core_config.pop(key))
+            else:
+                logger.warning(f'Unknown sandbox config: {key}')
+
+        # the new style values override the old style values
+        if 'sandbox' in toml_config:
+            sandbox_config = SandboxConfig(**toml_config['sandbox'])
+
         # update the config object with the new values
         # update the config object with the new values
-        AppConfig(llm=llm_config, agent=agent_config, **core_config)
+        AppConfig(
+            llm=llm_config,
+            agent=agent_config,
+            sandbox=sandbox_config,
+            **core_config,
+        )
     except (TypeError, KeyError) as e:
     except (TypeError, KeyError) as e:
         logger.warning(
         logger.warning(
             f'Cannot parse config from toml, toml values have not been applied.\nError: {e}',
             f'Cannot parse config from toml, toml values have not been applied.\nError: {e}',
@@ -400,7 +463,7 @@ def finalize_config(cfg: AppConfig):
     cfg.workspace_base = os.path.abspath(cfg.workspace_base)
     cfg.workspace_base = os.path.abspath(cfg.workspace_base)
 
 
     # In local there is no sandbox, the workspace will have the same pwd as the host
     # In local there is no sandbox, the workspace will have the same pwd as the host
-    if cfg.sandbox_type == 'local':
+    if cfg.sandbox.box_type == 'local':
         cfg.workspace_mount_path_in_sandbox = cfg.workspace_mount_path
         cfg.workspace_mount_path_in_sandbox = cfg.workspace_mount_path
 
 
     if cfg.workspace_mount_rewrite:  # and not config.workspace_mount_path:
     if cfg.workspace_mount_rewrite:  # and not config.workspace_mount_path:

+ 1 - 1
opendevin/core/schema/config.py

@@ -34,7 +34,7 @@ class ConfigType(str, Enum):
     MAX_ITERATIONS = 'MAX_ITERATIONS'
     MAX_ITERATIONS = 'MAX_ITERATIONS'
     AGENT = 'AGENT'
     AGENT = 'AGENT'
     E2B_API_KEY = 'E2B_API_KEY'
     E2B_API_KEY = 'E2B_API_KEY'
-    SANDBOX_TYPE = 'SANDBOX_TYPE'
+    SANDBOX_BOX_TYPE = 'SANDBOX_BOX_TYPE'
     SANDBOX_USER_ID = 'SANDBOX_USER_ID'
     SANDBOX_USER_ID = 'SANDBOX_USER_ID'
     SANDBOX_TIMEOUT = 'SANDBOX_TIMEOUT'
     SANDBOX_TIMEOUT = 'SANDBOX_TIMEOUT'
     USE_HOST_NETWORK = 'USE_HOST_NETWORK'
     USE_HOST_NETWORK = 'USE_HOST_NETWORK'

+ 1 - 1
opendevin/runtime/docker/local_box.py

@@ -26,7 +26,7 @@ from opendevin.runtime.sandbox import Sandbox
 
 
 
 
 class LocalBox(Sandbox):
 class LocalBox(Sandbox):
-    def __init__(self, timeout: int = config.sandbox_timeout):
+    def __init__(self, timeout: int = config.sandbox.timeout):
         os.makedirs(config.workspace_base, exist_ok=True)
         os.makedirs(config.workspace_base, exist_ok=True)
         self.timeout = timeout
         self.timeout = timeout
         self.background_commands: dict[int, Process] = {}
         self.background_commands: dict[int, Process] = {}

+ 3 - 7
opendevin/runtime/docker/ssh_box.py

@@ -209,7 +209,7 @@ class DockerSSHBox(Sandbox):
     def __init__(
     def __init__(
         self,
         self,
         container_image: str | None = None,
         container_image: str | None = None,
-        timeout: int = config.sandbox_timeout,
+        timeout: int = config.sandbox.timeout,
         sid: str | None = None,
         sid: str | None = None,
     ):
     ):
         logger.info(
         logger.info(
@@ -235,7 +235,7 @@ class DockerSSHBox(Sandbox):
             self.instance_id = (sid or '') + str(uuid.uuid4())
             self.instance_id = (sid or '') + str(uuid.uuid4())
 
 
         self.timeout = timeout
         self.timeout = timeout
-        self.container_image = container_image or config.sandbox_container_image
+        self.container_image = container_image or config.sandbox.container_image
         self.container_image = get_od_sandbox_image(
         self.container_image = get_od_sandbox_image(
             self.container_image, self.docker_client
             self.container_image, self.docker_client
         )
         )
@@ -661,11 +661,7 @@ class DockerSSHBox(Sandbox):
 
 
     @property
     @property
     def user_id(self):
     def user_id(self):
-        return config.sandbox_user_id
-
-    @property
-    def sandbox_user_id(self):
-        return config.sandbox_user_id
+        return config.sandbox.user_id
 
 
     @property
     @property
     def run_as_devin(self):
     def run_as_devin(self):

+ 1 - 1
opendevin/runtime/e2b/sandbox.py

@@ -24,7 +24,7 @@ class E2BBox(Sandbox):
     def __init__(
     def __init__(
         self,
         self,
         template: str = 'open-devin',
         template: str = 'open-devin',
-        timeout: int = config.sandbox_timeout,
+        timeout: int = config.sandbox.timeout,
     ):
     ):
         self.sandbox = E2BSandbox(
         self.sandbox = E2BSandbox(
             api_key=config.e2b_api_key,
             api_key=config.e2b_api_key,

+ 6 - 6
opendevin/runtime/runtime.py

@@ -37,15 +37,15 @@ from opendevin.runtime.tools import RuntimeTool
 from opendevin.storage import FileStore, InMemoryFileStore
 from opendevin.storage import FileStore, InMemoryFileStore
 
 
 
 
-def create_sandbox(sid: str = 'default', sandbox_type: str = 'ssh') -> Sandbox:
-    if sandbox_type == 'local':
+def create_sandbox(sid: str = 'default', box_type: str = 'ssh') -> Sandbox:
+    if box_type == 'local':
         return LocalBox()
         return LocalBox()
-    elif sandbox_type == 'ssh':
+    elif box_type == 'ssh':
         return DockerSSHBox(sid=sid)
         return DockerSSHBox(sid=sid)
-    elif sandbox_type == 'e2b':
+    elif box_type == 'e2b':
         return E2BBox()
         return E2BBox()
     else:
     else:
-        raise ValueError(f'Invalid sandbox type: {sandbox_type}')
+        raise ValueError(f'Invalid sandbox type: {box_type}')
 
 
 
 
 class Runtime:
 class Runtime:
@@ -67,7 +67,7 @@ class Runtime:
     ):
     ):
         self.sid = sid
         self.sid = sid
         if sandbox is None:
         if sandbox is None:
-            self.sandbox = create_sandbox(sid, config.sandbox_type)
+            self.sandbox = create_sandbox(sid, config.sandbox.box_type)
             self._is_external_sandbox = False
             self._is_external_sandbox = False
         else:
         else:
             self.sandbox = sandbox
             self.sandbox = sandbox

+ 4 - 4
tests/integration/regenerate.sh

@@ -25,7 +25,7 @@ echo "WORKSPACE_MOUNT_PATH_IN_SANDBOX: $WORKSPACE_MOUNT_PATH_IN_SANDBOX"
 mkdir -p $WORKSPACE_BASE
 mkdir -p $WORKSPACE_BASE
 
 
 # use environmental variable if exists, otherwise use "ssh"
 # use environmental variable if exists, otherwise use "ssh"
-SANDBOX_TYPE="${SANDBOX_TYPE:-ssh}"
+SANDBOX_BOX_TYPE="${SANDBOX_TYPE:-ssh}"
 # TODO: we should also test PERSIST_SANDBOX = true, once it's fixed
 # TODO: we should also test PERSIST_SANDBOX = true, once it's fixed
 PERSIST_SANDBOX=false
 PERSIST_SANDBOX=false
 MAX_ITERATIONS=10
 MAX_ITERATIONS=10
@@ -64,7 +64,7 @@ run_test() {
     pytest_cmd+=" --cov=agenthub --cov=opendevin --cov-report=xml --cov-append"
     pytest_cmd+=" --cov=agenthub --cov=opendevin --cov-report=xml --cov-append"
   fi
   fi
 
 
-  SANDBOX_TYPE=$SANDBOX_TYPE \
+  SANDBOX_BOX_TYPE=$SANDBOX_BOX_TYPE \
     PERSIST_SANDBOX=$PERSIST_SANDBOX \
     PERSIST_SANDBOX=$PERSIST_SANDBOX \
     WORKSPACE_BASE=$WORKSPACE_BASE \
     WORKSPACE_BASE=$WORKSPACE_BASE \
     WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH \
     WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH \
@@ -133,7 +133,7 @@ trap cleanup EXIT
 regenerate_without_llm() {
 regenerate_without_llm() {
   # set -x to print the command being executed
   # set -x to print the command being executed
   set -x
   set -x
-  SANDBOX_TYPE=$SANDBOX_TYPE \
+  SANDBOX_BOX_TYPE=$SANDBOX_BOX_TYPE \
     PERSIST_SANDBOX=$PERSIST_SANDBOX \
     PERSIST_SANDBOX=$PERSIST_SANDBOX \
     WORKSPACE_BASE=$WORKSPACE_BASE \
     WORKSPACE_BASE=$WORKSPACE_BASE \
     WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH \
     WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH \
@@ -162,7 +162,7 @@ regenerate_with_llm() {
   set -x
   set -x
   echo -e "/exit\n" | \
   echo -e "/exit\n" | \
     DEBUG=true \
     DEBUG=true \
-    SANDBOX_TYPE=$SANDBOX_TYPE \
+    SANDBOX_BOX_TYPE=$SANDBOX_BOX_TYPE \
     PERSIST_SANDBOX=$PERSIST_SANDBOX \
     PERSIST_SANDBOX=$PERSIST_SANDBOX \
     WORKSPACE_BASE=$WORKSPACE_BASE \
     WORKSPACE_BASE=$WORKSPACE_BASE \
     WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH AGENT=$agent \
     WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH AGENT=$agent \

+ 7 - 7
tests/integration/test_agent.py

@@ -32,7 +32,7 @@ print(f'workspace_mount_path_in_sandbox: {workspace_mount_path_in_sandbox}')
 )
 )
 @pytest.mark.skipif(
 @pytest.mark.skipif(
     (os.getenv('AGENT') == 'CodeActAgent' or os.getenv('AGENT') == 'CodeActSWEAgent')
     (os.getenv('AGENT') == 'CodeActAgent' or os.getenv('AGENT') == 'CodeActSWEAgent')
-    and os.getenv('SANDBOX_TYPE', '').lower() != 'ssh',
+    and os.getenv('SANDBOX_BOX_TYPE', '').lower() != 'ssh',
     reason='CodeActAgent/CodeActSWEAgent only supports ssh sandbox which is stateful',
     reason='CodeActAgent/CodeActSWEAgent only supports ssh sandbox which is stateful',
 )
 )
 @pytest.mark.skipif(
 @pytest.mark.skipif(
@@ -71,7 +71,7 @@ def test_write_simple_script():
 )
 )
 @pytest.mark.skipif(
 @pytest.mark.skipif(
     (os.getenv('AGENT') == 'CodeActAgent' or os.getenv('AGENT') == 'CodeActSWEAgent')
     (os.getenv('AGENT') == 'CodeActAgent' or os.getenv('AGENT') == 'CodeActSWEAgent')
-    and os.getenv('SANDBOX_TYPE', '').lower() != 'ssh',
+    and os.getenv('SANDBOX_BOX_TYPE', '').lower() != 'ssh',
     reason='CodeActAgent/CodeActSWEAgent only supports ssh sandbox which is stateful',
     reason='CodeActAgent/CodeActSWEAgent only supports ssh sandbox which is stateful',
 )
 )
 @pytest.mark.skipif(
 @pytest.mark.skipif(
@@ -79,7 +79,7 @@ def test_write_simple_script():
     reason='We only keep basic tests for MonologueAgent and PlannerAgent',
     reason='We only keep basic tests for MonologueAgent and PlannerAgent',
 )
 )
 @pytest.mark.skipif(
 @pytest.mark.skipif(
-    os.getenv('SANDBOX_TYPE') == 'local',
+    os.getenv('SANDBOX_BOX_TYPE') == 'local',
     reason='local sandbox shows environment-dependent absolute path for pwd command',
     reason='local sandbox shows environment-dependent absolute path for pwd command',
 )
 )
 def test_edits():
 def test_edits():
@@ -120,7 +120,7 @@ Enjoy!
     reason='currently only CodeActAgent and CodeActSWEAgent have IPython (Jupyter) execution by default',
     reason='currently only CodeActAgent and CodeActSWEAgent have IPython (Jupyter) execution by default',
 )
 )
 @pytest.mark.skipif(
 @pytest.mark.skipif(
-    os.getenv('SANDBOX_TYPE') != 'ssh',
+    os.getenv('SANDBOX_BOX_TYPE') != 'ssh',
     reason='Currently, only ssh sandbox supports stateful tasks',
     reason='Currently, only ssh sandbox supports stateful tasks',
 )
 )
 def test_ipython():
 def test_ipython():
@@ -154,7 +154,7 @@ def test_ipython():
     reason='Currently, only ManagerAgent supports task rejection',
     reason='Currently, only ManagerAgent supports task rejection',
 )
 )
 @pytest.mark.skipif(
 @pytest.mark.skipif(
-    os.getenv('SANDBOX_TYPE') == 'local',
+    os.getenv('SANDBOX_BOX_TYPE') == 'local',
     reason='FIXME: local sandbox does not capture stderr',
     reason='FIXME: local sandbox does not capture stderr',
 )
 )
 def test_simple_task_rejection():
 def test_simple_task_rejection():
@@ -177,7 +177,7 @@ def test_simple_task_rejection():
     reason='currently only CodeActAgent and CodeActSWEAgent have IPython (Jupyter) execution by default',
     reason='currently only CodeActAgent and CodeActSWEAgent have IPython (Jupyter) execution by default',
 )
 )
 @pytest.mark.skipif(
 @pytest.mark.skipif(
-    os.getenv('SANDBOX_TYPE') != 'ssh',
+    os.getenv('SANDBOX_BOX_TYPE') != 'ssh',
     reason='Currently, only ssh sandbox supports stateful tasks',
     reason='Currently, only ssh sandbox supports stateful tasks',
 )
 )
 def test_ipython_module():
 def test_ipython_module():
@@ -213,7 +213,7 @@ def test_ipython_module():
 )
 )
 @pytest.mark.skipif(
 @pytest.mark.skipif(
     (os.getenv('AGENT') == 'CodeActAgent' or os.getenv('AGENT') == 'CodeActSWEAgent')
     (os.getenv('AGENT') == 'CodeActAgent' or os.getenv('AGENT') == 'CodeActSWEAgent')
-    and os.getenv('SANDBOX_TYPE', '').lower() != 'ssh',
+    and os.getenv('SANDBOX_BOX_TYPE', '').lower() != 'ssh',
     reason='CodeActAgent/CodeActSWEAgent only supports ssh sandbox which is stateful',
     reason='CodeActAgent/CodeActSWEAgent only supports ssh sandbox which is stateful',
 )
 )
 def test_browse_internet(http_server):
 def test_browse_internet(http_server):

+ 171 - 8
tests/unit/test_config.py

@@ -51,6 +51,8 @@ def test_compat_env_to_config(monkeypatch, setup_env):
     monkeypatch.setenv('AGENT_MEMORY_MAX_THREADS', '4')
     monkeypatch.setenv('AGENT_MEMORY_MAX_THREADS', '4')
     monkeypatch.setenv('AGENT_MEMORY_ENABLED', 'True')
     monkeypatch.setenv('AGENT_MEMORY_ENABLED', 'True')
     monkeypatch.setenv('AGENT', 'CodeActAgent')
     monkeypatch.setenv('AGENT', 'CodeActAgent')
+    monkeypatch.setenv('SANDBOX_TYPE', 'local')
+    monkeypatch.setenv('SANDBOX_TIMEOUT', '10')
 
 
     config = AppConfig()
     config = AppConfig()
     load_from_env(config, os.environ)
     load_from_env(config, os.environ)
@@ -62,6 +64,10 @@ def test_compat_env_to_config(monkeypatch, setup_env):
     assert isinstance(config.agent, AgentConfig)
     assert isinstance(config.agent, AgentConfig)
     assert isinstance(config.agent.memory_max_threads, int)
     assert isinstance(config.agent.memory_max_threads, int)
     assert config.agent.memory_max_threads == 4
     assert config.agent.memory_max_threads == 4
+    assert config.agent.memory_enabled is True
+    assert config.agent.name == 'CodeActAgent'
+    assert config.sandbox.box_type == 'local'
+    assert config.sandbox.timeout == 10
 
 
 
 
 def test_load_from_old_style_env(monkeypatch, default_config):
 def test_load_from_old_style_env(monkeypatch, default_config):
@@ -70,6 +76,7 @@ def test_load_from_old_style_env(monkeypatch, default_config):
     monkeypatch.setenv('AGENT_MEMORY_ENABLED', 'True')
     monkeypatch.setenv('AGENT_MEMORY_ENABLED', 'True')
     monkeypatch.setenv('AGENT_NAME', 'PlannerAgent')
     monkeypatch.setenv('AGENT_NAME', 'PlannerAgent')
     monkeypatch.setenv('WORKSPACE_BASE', '/opt/files/workspace')
     monkeypatch.setenv('WORKSPACE_BASE', '/opt/files/workspace')
+    monkeypatch.setenv('SANDBOX_CONTAINER_IMAGE', 'custom_image')
 
 
     load_from_env(default_config, os.environ)
     load_from_env(default_config, os.environ)
 
 
@@ -83,12 +90,14 @@ def test_load_from_old_style_env(monkeypatch, default_config):
     assert (
     assert (
         default_config.workspace_mount_path_in_sandbox is not UndefinedString.UNDEFINED
         default_config.workspace_mount_path_in_sandbox is not UndefinedString.UNDEFINED
     )
     )
+    assert default_config.sandbox.container_image == 'custom_image'
 
 
 
 
 def test_load_from_new_style_toml(default_config, temp_toml_file):
 def test_load_from_new_style_toml(default_config, temp_toml_file):
     # Test loading configuration from a new-style TOML file
     # Test loading configuration from a new-style TOML file
     with open(temp_toml_file, 'w', encoding='utf-8') as toml_file:
     with open(temp_toml_file, 'w', encoding='utf-8') as toml_file:
-        toml_file.write("""
+        toml_file.write(
+            """
 [llm]
 [llm]
 model = "test-model"
 model = "test-model"
 api_key = "toml-api-key"
 api_key = "toml-api-key"
@@ -97,9 +106,14 @@ api_key = "toml-api-key"
 name = "TestAgent"
 name = "TestAgent"
 memory_enabled = true
 memory_enabled = true
 
 
+[sandbox]
+timeout = 1
+
 [core]
 [core]
 workspace_base = "/opt/files2/workspace"
 workspace_base = "/opt/files2/workspace"
-""")
+sandbox_type = "local"
+"""
+        )
 
 
     load_from_toml(default_config, temp_toml_file)
     load_from_toml(default_config, temp_toml_file)
 
 
@@ -108,6 +122,11 @@ workspace_base = "/opt/files2/workspace"
     assert default_config.agent.name == 'TestAgent'
     assert default_config.agent.name == 'TestAgent'
     assert default_config.agent.memory_enabled is True
     assert default_config.agent.memory_enabled is True
     assert default_config.workspace_base == '/opt/files2/workspace'
     assert default_config.workspace_base == '/opt/files2/workspace'
+    assert default_config.sandbox.box_type == 'local'
+    assert default_config.sandbox.timeout == 1
+
+    # default config doesn't have a field sandbox_type
+    assert not hasattr(default_config, 'sandbox_type')
 
 
     # before finalize_config, workspace_mount_path is UndefinedString.UNDEFINED if it was not set
     # before finalize_config, workspace_mount_path is UndefinedString.UNDEFINED if it was not set
     assert default_config.workspace_mount_path is UndefinedString.UNDEFINED
     assert default_config.workspace_mount_path is UndefinedString.UNDEFINED
@@ -123,8 +142,56 @@ workspace_base = "/opt/files2/workspace"
     assert default_config.workspace_mount_path == '/opt/files2/workspace'
     assert default_config.workspace_mount_path == '/opt/files2/workspace'
 
 
 
 
-def test_env_overrides_toml(monkeypatch, default_config, temp_toml_file):
-    # Test that environment variables override TOML values using monkeypatch
+def test_compat_load_sandbox_from_toml(default_config, temp_toml_file):
+    # test loading configuration from a new-style TOML file
+    # uses a toml file with sandbox_vars instead of a sandbox section
+    with open(temp_toml_file, 'w', encoding='utf-8') as toml_file:
+        toml_file.write(
+            """
+[llm]
+model = "test-model"
+
+[agent]
+name = "TestAgent"
+memory_enabled = true
+
+[core]
+workspace_base = "/opt/files2/workspace"
+sandbox_type = "local"
+sandbox_timeout = 500
+sandbox_container_image = "node:14"
+sandbox_user_id = 1001
+"""
+        )
+
+    load_from_toml(default_config, temp_toml_file)
+
+    assert default_config.llm.model == 'test-model'
+    assert default_config.agent.name == 'TestAgent'
+    assert default_config.agent.memory_enabled is True
+    assert default_config.workspace_base == '/opt/files2/workspace'
+    assert default_config.sandbox.box_type == 'local'
+    assert default_config.sandbox.timeout == 500
+    assert default_config.sandbox.container_image == 'node:14'
+    assert default_config.sandbox.user_id == 1001
+    assert default_config.workspace_mount_path_in_sandbox == '/workspace'
+
+    finalize_config(default_config)
+
+    # app config doesn't have fields sandbox_*
+    assert not hasattr(default_config, 'sandbox_type')
+    assert not hasattr(default_config, 'sandbox_timeout')
+    assert not hasattr(default_config, 'sandbox_container_image')
+    assert not hasattr(default_config, 'sandbox_user_id')
+
+    # after finalize_config, workspace_mount_path is set to the absolute path of workspace_base
+    # if it was undefined
+    assert default_config.workspace_mount_path == '/opt/files2/workspace'
+
+
+def test_env_overrides_compat_toml(monkeypatch, default_config, temp_toml_file):
+    # test that environment variables override TOML values using monkeypatch
+    # uses a toml file with sandbox_vars instead of a sandbox section
     with open(temp_toml_file, 'w', encoding='utf-8') as toml_file:
     with open(temp_toml_file, 'w', encoding='utf-8') as toml_file:
         toml_file.write("""
         toml_file.write("""
 [llm]
 [llm]
@@ -135,11 +202,15 @@ api_key = "toml-api-key"
 workspace_base = "/opt/files3/workspace"
 workspace_base = "/opt/files3/workspace"
 sandbox_type = "local"
 sandbox_type = "local"
 disable_color = true
 disable_color = true
+sandbox_timeout = 500
+sandbox_user_id = 1001
 """)
 """)
 
 
     monkeypatch.setenv('LLM_API_KEY', 'env-api-key')
     monkeypatch.setenv('LLM_API_KEY', 'env-api-key')
     monkeypatch.setenv('WORKSPACE_BASE', 'UNDEFINED')
     monkeypatch.setenv('WORKSPACE_BASE', 'UNDEFINED')
-    monkeypatch.setenv('SANDBOX_TYPE', 'ssh')
+    monkeypatch.setenv('SANDBOX_TYPE', 'e2b')
+    monkeypatch.setenv('SANDBOX_TIMEOUT', '1000')
+    monkeypatch.setenv('SANDBOX_USER_ID', '1002')
 
 
     load_from_toml(default_config, temp_toml_file)
     load_from_toml(default_config, temp_toml_file)
 
 
@@ -160,20 +231,106 @@ disable_color = true
     assert default_config.workspace_mount_path is UndefinedString.UNDEFINED
     assert default_config.workspace_mount_path is UndefinedString.UNDEFINED
     assert default_config.workspace_mount_path == 'UNDEFINED'
     assert default_config.workspace_mount_path == 'UNDEFINED'
 
 
-    assert default_config.sandbox_type == 'ssh'
+    assert default_config.sandbox.box_type == 'e2b'
     assert default_config.disable_color is True
     assert default_config.disable_color is True
+    assert default_config.sandbox.timeout == 1000
+    assert default_config.sandbox.user_id == 1002
 
 
     finalize_config(default_config)
     finalize_config(default_config)
     # after finalize_config, workspace_mount_path is set to absolute path of workspace_base if it was undefined
     # after finalize_config, workspace_mount_path is set to absolute path of workspace_base if it was undefined
     assert default_config.workspace_mount_path == os.getcwd() + '/UNDEFINED'
     assert default_config.workspace_mount_path == os.getcwd() + '/UNDEFINED'
 
 
 
 
+def test_env_overrides_sandbox_toml(monkeypatch, default_config, temp_toml_file):
+    # test that environment variables override TOML values using monkeypatch
+    # uses a toml file with a sandbox section
+    with open(temp_toml_file, 'w', encoding='utf-8') as toml_file:
+        toml_file.write("""
+[llm]
+model = "test-model"
+api_key = "toml-api-key"
+
+[core]
+workspace_base = "/opt/files3/workspace"
+
+[sandbox]
+box_type = "e2b"
+timeout = 500
+user_id = 1001
+""")
+
+    monkeypatch.setenv('LLM_API_KEY', 'env-api-key')
+    monkeypatch.setenv('WORKSPACE_BASE', 'UNDEFINED')
+    monkeypatch.setenv('SANDBOX_TYPE', 'local')
+    monkeypatch.setenv('SANDBOX_TIMEOUT', '1000')
+    monkeypatch.setenv('SANDBOX_USER_ID', '1002')
+
+    load_from_toml(default_config, temp_toml_file)
+
+    # before finalize_config, workspace_mount_path is UndefinedString.UNDEFINED if it was not set
+    assert default_config.workspace_mount_path is UndefinedString.UNDEFINED
+
+    # before load_from_env, values are set to the values from the toml file
+    assert default_config.llm.api_key == 'toml-api-key'
+    assert default_config.sandbox.box_type == 'e2b'
+    assert default_config.sandbox.timeout == 500
+    assert default_config.sandbox.user_id == 1001
+
+    load_from_env(default_config, os.environ)
+
+    # values from env override values from toml
+    assert os.environ.get('LLM_MODEL') is None
+    assert default_config.llm.model == 'test-model'
+    assert default_config.llm.api_key == 'env-api-key'
+
+    assert default_config.sandbox.box_type == 'local'
+    assert default_config.sandbox.timeout == 1000
+    assert default_config.sandbox.user_id == 1002
+
+    finalize_config(default_config)
+    # after finalize_config, workspace_mount_path is set to absolute path of workspace_base if it was undefined
+    assert default_config.workspace_mount_path == os.getcwd() + '/UNDEFINED'
+
+
+def test_sandbox_config_from_toml(default_config, temp_toml_file):
+    # Test loading configuration from a new-style TOML file
+    with open(temp_toml_file, 'w', encoding='utf-8') as toml_file:
+        toml_file.write(
+            """
+[core]
+workspace_base = "/opt/files/workspace"
+
+[llm]
+model = "test-model"
+
+[sandbox]
+box_type = "local"
+timeout = 1
+container_image = "custom_image"
+user_id = 1001
+"""
+        )
+
+    load_from_toml(default_config, temp_toml_file)
+    load_from_env(default_config, os.environ)
+    finalize_config(default_config)
+
+    assert default_config.llm.model == 'test-model'
+    assert default_config.sandbox.box_type == 'local'
+    assert default_config.sandbox.timeout == 1
+    assert default_config.sandbox.container_image == 'custom_image'
+    assert default_config.sandbox.user_id == 1001
+
+
 def test_defaults_dict_after_updates(default_config):
 def test_defaults_dict_after_updates(default_config):
     # Test that `defaults_dict` retains initial values after updates.
     # Test that `defaults_dict` retains initial values after updates.
     initial_defaults = default_config.defaults_dict
     initial_defaults = default_config.defaults_dict
     assert (
     assert (
         initial_defaults['workspace_mount_path']['default'] is UndefinedString.UNDEFINED
         initial_defaults['workspace_mount_path']['default'] is UndefinedString.UNDEFINED
     )
     )
+    assert initial_defaults['llm']['api_key']['default'] is None
+    assert initial_defaults['agent']['name']['default'] == 'CodeActAgent'
+
     updated_config = AppConfig()
     updated_config = AppConfig()
     updated_config.llm.api_key = 'updated-api-key'
     updated_config.llm.api_key = 'updated-api-key'
     updated_config.agent.name = 'MonologueAgent'
     updated_config.agent.name = 'MonologueAgent'
@@ -185,6 +342,12 @@ def test_defaults_dict_after_updates(default_config):
         defaults_after_updates['workspace_mount_path']['default']
         defaults_after_updates['workspace_mount_path']['default']
         is UndefinedString.UNDEFINED
         is UndefinedString.UNDEFINED
     )
     )
+    assert defaults_after_updates['sandbox']['box_type']['default'] == 'ssh'
+    assert defaults_after_updates['sandbox']['timeout']['default'] == 120
+    assert (
+        defaults_after_updates['sandbox']['container_image']['default']
+        == 'ghcr.io/opendevin/sandbox:main'
+    )
     assert defaults_after_updates == initial_defaults
     assert defaults_after_updates == initial_defaults
 
 
 
 
@@ -210,7 +373,7 @@ def test_invalid_toml_format(monkeypatch, temp_toml_file, default_config):
 def test_finalize_config(default_config):
 def test_finalize_config(default_config):
     # Test finalize config
     # Test finalize config
     assert default_config.workspace_mount_path is UndefinedString.UNDEFINED
     assert default_config.workspace_mount_path is UndefinedString.UNDEFINED
-    default_config.sandbox_type = 'local'
+    default_config.sandbox.box_type = 'local'
     finalize_config(default_config)
     finalize_config(default_config)
 
 
     assert (
     assert (
@@ -233,7 +396,7 @@ def test_workspace_mount_path_default(default_config):
 
 
 def test_workspace_mount_path_in_sandbox_local(default_config):
 def test_workspace_mount_path_in_sandbox_local(default_config):
     assert default_config.workspace_mount_path_in_sandbox == '/workspace'
     assert default_config.workspace_mount_path_in_sandbox == '/workspace'
-    default_config.sandbox_type = 'local'
+    default_config.sandbox.box_type = 'local'
     finalize_config(default_config)
     finalize_config(default_config)
     assert (
     assert (
         default_config.workspace_mount_path_in_sandbox
         default_config.workspace_mount_path_in_sandbox

+ 1 - 1
tests/unit/test_ipython.py

@@ -79,7 +79,7 @@ def test_sandbox_jupyter_plugin_backticks(temp_dir):
     with patch.object(config, 'workspace_base', new=temp_dir), patch.object(
     with patch.object(config, 'workspace_base', new=temp_dir), patch.object(
         config, 'workspace_mount_path', new=temp_dir
         config, 'workspace_mount_path', new=temp_dir
     ), patch.object(config, 'run_as_devin', new='true'), patch.object(
     ), patch.object(config, 'run_as_devin', new='true'), patch.object(
-        config, 'sandbox_type', new='ssh'
+        config.sandbox, 'box_type', new='ssh'
     ):
     ):
         box = DockerSSHBox()
         box = DockerSSHBox()
         box.init_plugins([JupyterRequirement])
         box.init_plugins([JupyterRequirement])

+ 12 - 12
tests/unit/test_sandbox.py

@@ -92,7 +92,7 @@ def test_ssh_box_run_as_devin(temp_dir):
     with patch.object(config, 'workspace_base', new=temp_dir), patch.object(
     with patch.object(config, 'workspace_base', new=temp_dir), patch.object(
         config, 'workspace_mount_path', new=temp_dir
         config, 'workspace_mount_path', new=temp_dir
     ), patch.object(config, 'run_as_devin', new='true'), patch.object(
     ), patch.object(config, 'run_as_devin', new='true'), patch.object(
-        config, 'sandbox_type', new='ssh'
+        config.sandbox, 'box_type', new='ssh'
     ):
     ):
         for box in [
         for box in [
             DockerSSHBox()
             DockerSSHBox()
@@ -134,7 +134,7 @@ def test_ssh_box_multi_line_cmd_run_as_devin(temp_dir):
     with patch.object(config, 'workspace_base', new=temp_dir), patch.object(
     with patch.object(config, 'workspace_base', new=temp_dir), patch.object(
         config, 'workspace_mount_path', new=temp_dir
         config, 'workspace_mount_path', new=temp_dir
     ), patch.object(config, 'run_as_devin', new='true'), patch.object(
     ), patch.object(config, 'run_as_devin', new='true'), patch.object(
-        config, 'sandbox_type', new='ssh'
+        config.sandbox, 'box_type', new='ssh'
     ):
     ):
         box = DockerSSHBox()
         box = DockerSSHBox()
         exit_code, output = box.execute('pwd && ls -l')
         exit_code, output = box.execute('pwd && ls -l')
@@ -152,7 +152,7 @@ def test_ssh_box_stateful_cmd_run_as_devin(temp_dir):
     with patch.object(config, 'workspace_base', new=temp_dir), patch.object(
     with patch.object(config, 'workspace_base', new=temp_dir), patch.object(
         config, 'workspace_mount_path', new=temp_dir
         config, 'workspace_mount_path', new=temp_dir
     ), patch.object(config, 'run_as_devin', new='true'), patch.object(
     ), patch.object(config, 'run_as_devin', new='true'), patch.object(
-        config, 'sandbox_type', new='ssh'
+        config.sandbox, 'box_type', new='ssh'
     ):
     ):
         box = DockerSSHBox()
         box = DockerSSHBox()
         exit_code, output = box.execute('mkdir test')
         exit_code, output = box.execute('mkdir test')
@@ -178,7 +178,7 @@ def test_ssh_box_failed_cmd_run_as_devin(temp_dir):
     with patch.object(config, 'workspace_base', new=temp_dir), patch.object(
     with patch.object(config, 'workspace_base', new=temp_dir), patch.object(
         config, 'workspace_mount_path', new=temp_dir
         config, 'workspace_mount_path', new=temp_dir
     ), patch.object(config, 'run_as_devin', new='true'), patch.object(
     ), patch.object(config, 'run_as_devin', new='true'), patch.object(
-        config, 'sandbox_type', new='ssh'
+        config.sandbox, 'box_type', new='ssh'
     ):
     ):
         box = DockerSSHBox()
         box = DockerSSHBox()
         exit_code, output = box.execute('non_existing_command')
         exit_code, output = box.execute('non_existing_command')
@@ -193,7 +193,7 @@ def test_single_multiline_command(temp_dir):
     with patch.object(config, 'workspace_base', new=temp_dir), patch.object(
     with patch.object(config, 'workspace_base', new=temp_dir), patch.object(
         config, 'workspace_mount_path', new=temp_dir
         config, 'workspace_mount_path', new=temp_dir
     ), patch.object(config, 'run_as_devin', new='true'), patch.object(
     ), patch.object(config, 'run_as_devin', new='true'), patch.object(
-        config, 'sandbox_type', new='ssh'
+        config.sandbox, 'box_type', new='ssh'
     ):
     ):
         box = DockerSSHBox()
         box = DockerSSHBox()
         exit_code, output = box.execute('echo \\\n -e "foo"')
         exit_code, output = box.execute('echo \\\n -e "foo"')
@@ -209,7 +209,7 @@ def test_multiline_echo(temp_dir):
     with patch.object(config, 'workspace_base', new=temp_dir), patch.object(
     with patch.object(config, 'workspace_base', new=temp_dir), patch.object(
         config, 'workspace_mount_path', new=temp_dir
         config, 'workspace_mount_path', new=temp_dir
     ), patch.object(config, 'run_as_devin', new='true'), patch.object(
     ), patch.object(config, 'run_as_devin', new='true'), patch.object(
-        config, 'sandbox_type', new='ssh'
+        config.sandbox, 'box_type', new='ssh'
     ):
     ):
         box = DockerSSHBox()
         box = DockerSSHBox()
         exit_code, output = box.execute('echo -e "hello\nworld"')
         exit_code, output = box.execute('echo -e "hello\nworld"')
@@ -226,7 +226,7 @@ def test_sandbox_whitespace(temp_dir):
     with patch.object(config, 'workspace_base', new=temp_dir), patch.object(
     with patch.object(config, 'workspace_base', new=temp_dir), patch.object(
         config, 'workspace_mount_path', new=temp_dir
         config, 'workspace_mount_path', new=temp_dir
     ), patch.object(config, 'run_as_devin', new='true'), patch.object(
     ), patch.object(config, 'run_as_devin', new='true'), patch.object(
-        config, 'sandbox_type', new='ssh'
+        config.sandbox, 'box_type', new='ssh'
     ):
     ):
         box = DockerSSHBox()
         box = DockerSSHBox()
         exit_code, output = box.execute('echo -e "\\n\\n\\n"')
         exit_code, output = box.execute('echo -e "\\n\\n\\n"')
@@ -242,7 +242,7 @@ def test_sandbox_jupyter_plugin(temp_dir):
     with patch.object(config, 'workspace_base', new=temp_dir), patch.object(
     with patch.object(config, 'workspace_base', new=temp_dir), patch.object(
         config, 'workspace_mount_path', new=temp_dir
         config, 'workspace_mount_path', new=temp_dir
     ), patch.object(config, 'run_as_devin', new='true'), patch.object(
     ), patch.object(config, 'run_as_devin', new='true'), patch.object(
-        config, 'sandbox_type', new='ssh'
+        config.sandbox, 'box_type', new='ssh'
     ):
     ):
         box = DockerSSHBox()
         box = DockerSSHBox()
         box.init_plugins([JupyterRequirement])
         box.init_plugins([JupyterRequirement])
@@ -335,8 +335,8 @@ def test_sandbox_jupyter_agentskills_fileop_pwd(temp_dir):
     # get a temporary directory
     # get a temporary directory
     with patch.object(config, 'workspace_base', new=temp_dir), patch.object(
     with patch.object(config, 'workspace_base', new=temp_dir), patch.object(
         config, 'workspace_mount_path', new=temp_dir
         config, 'workspace_mount_path', new=temp_dir
-    ), patch.object(config, 'run_as_devin', new=True), patch.object(
-        config, 'sandbox_type', new='ssh'
+    ), patch.object(config, 'run_as_devin', new='true'), patch.object(
+        config.sandbox, 'box_type', new='ssh'
     ), patch.object(config, 'enable_auto_lint', new=True):
     ), patch.object(config, 'enable_auto_lint', new=True):
         assert config.enable_auto_lint
         assert config.enable_auto_lint
         box = DockerSSHBox()
         box = DockerSSHBox()
@@ -353,9 +353,9 @@ def test_agnostic_sandbox_jupyter_agentskills_fileop_pwd(temp_dir):
         with patch.object(config, 'workspace_base', new=temp_dir), patch.object(
         with patch.object(config, 'workspace_base', new=temp_dir), patch.object(
             config, 'workspace_mount_path', new=temp_dir
             config, 'workspace_mount_path', new=temp_dir
         ), patch.object(config, 'run_as_devin', new=True), patch.object(
         ), patch.object(config, 'run_as_devin', new=True), patch.object(
-            config, 'sandbox_type', new='ssh'
+            config.sandbox, 'box_type', new='ssh'
         ), patch.object(
         ), patch.object(
-            config, 'sandbox_container_image', new=base_sandbox_image
+            config.sandbox, 'container_image', new=base_sandbox_image
         ), patch.object(config, 'enable_auto_lint', new=False):
         ), patch.object(config, 'enable_auto_lint', new=False):
             assert not config.enable_auto_lint
             assert not config.enable_auto_lint
             box = DockerSSHBox()
             box = DockerSSHBox()