Forráskód Böngészése

Remove global config from sandbox (#2961)

* Some changes

* Fixed errors

* Remove duplicate initialize_plugins

* Fix some tests

* Fix tests
Graham Neubig 1 éve
szülő
commit
257698e89b

+ 4 - 6
opendevin/core/config.py

@@ -139,6 +139,10 @@ class SandboxConfig(metaclass=Singleton):
     )
     user_id: int = os.getuid() if hasattr(os, 'getuid') else 1000
     timeout: int = 120
+    enable_auto_lint: bool = (
+        False  # once enabled, OpenDevin would lint files after editing
+    )
+    initialize_plugins: bool = True
 
     def defaults_to_dict(self) -> dict:
         """Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional."""
@@ -189,9 +193,7 @@ class AppConfig(metaclass=Singleton):
         use_host_network: Whether to use the host network.
         ssh_hostname: The SSH hostname.
         disable_color: Whether to disable color. For terminals that don't support color.
-        initialize_plugins: Whether to initialize plugins.
         debug: Whether to enable debugging.
-        enable_auto_lint: Whether to enable auto linting. This is False by default, for regular runs of the app. For evaluation, please set this to True.
         enable_cli_session: Whether to enable saving and restoring the session when run from CLI.
         file_uploads_max_file_size_mb: Maximum file size for uploads in megabytes. 0 means no limit.
         file_uploads_restrict_file_types: Whether to restrict file types for file uploads. Defaults to False.
@@ -220,15 +222,11 @@ class AppConfig(metaclass=Singleton):
     use_host_network: bool = False
     ssh_hostname: str = 'localhost'
     disable_color: bool = False
-    initialize_plugins: bool = True
     persist_sandbox: bool = False
     ssh_port: int = 63710
     ssh_password: str | None = None
     jwt_secret: str = uuid.uuid4().hex
     debug: bool = False
-    enable_auto_lint: bool = (
-        False  # once enabled, OpenDevin would lint files after editing
-    )
     enable_cli_session: bool = False
     file_uploads_max_file_size_mb: int = 0
     file_uploads_restrict_file_types: bool = False

+ 17 - 13
opendevin/runtime/docker/local_box.py

@@ -3,7 +3,7 @@ import os
 import subprocess
 import sys
 
-from opendevin.core.config import config
+from opendevin.core.config import SandboxConfig
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.core.schema import CancellableStream
 from opendevin.runtime.sandbox import Sandbox
@@ -25,24 +25,28 @@ from opendevin.runtime.sandbox import Sandbox
 
 
 class LocalBox(Sandbox):
-    def __init__(self, timeout: int = config.sandbox.timeout):
-        os.makedirs(config.workspace_base, exist_ok=True)
-        self.timeout = timeout
+    def __init__(
+        self,
+        config: SandboxConfig,
+        workspace_base: str,
+    ):
+        self.config = config
+        os.makedirs(workspace_base, exist_ok=True)
+        self.workspace_base = workspace_base
         atexit.register(self.cleanup)
-        super().__init__()
+        super().__init__(config)
 
     def execute(
         self, cmd: str, stream: bool = False, timeout: int | None = None
     ) -> tuple[int, str | CancellableStream]:
-        timeout = timeout if timeout is not None else self.timeout
         try:
             completed_process = subprocess.run(
                 cmd,
                 shell=True,
                 text=True,
                 capture_output=True,
-                timeout=timeout,
-                cwd=config.workspace_base,
+                timeout=self.config.timeout,
+                cwd=self.workspace_base,
                 env=self._env,
             )
             return completed_process.returncode, completed_process.stdout.strip()
@@ -55,7 +59,7 @@ class LocalBox(Sandbox):
             f'mkdir -p {sandbox_dest}',
             shell=True,
             text=True,
-            cwd=config.workspace_base,
+            cwd=self.workspace_base,
             env=self._env,
         )
         if res.returncode != 0:
@@ -66,7 +70,7 @@ class LocalBox(Sandbox):
                 f'cp -r {host_src} {sandbox_dest}',
                 shell=True,
                 text=True,
-                cwd=config.workspace_base,
+                cwd=self.workspace_base,
                 env=self._env,
             )
             if res.returncode != 0:
@@ -78,7 +82,7 @@ class LocalBox(Sandbox):
                 f'cp {host_src} {sandbox_dest}',
                 shell=True,
                 text=True,
-                cwd=config.workspace_base,
+                cwd=self.workspace_base,
                 env=self._env,
             )
             if res.returncode != 0:
@@ -93,11 +97,11 @@ class LocalBox(Sandbox):
         self.close()
 
     def get_working_directory(self):
-        return config.workspace_base
+        return self.workspace_base
 
 
 if __name__ == '__main__':
-    local_box = LocalBox()
+    local_box = LocalBox(SandboxConfig(), '/tmp/opendevin')
     sys.stdout.flush()
     try:
         while True:

+ 55 - 54
opendevin/runtime/docker/ssh_box.py

@@ -12,11 +12,12 @@ import docker
 from pexpect import exceptions, pxssh
 from tenacity import retry, stop_after_attempt, wait_fixed
 
-from opendevin.core.config import config
+from opendevin.core.config import SandboxConfig
 from opendevin.core.const.guide_url import TROUBLESHOOTING_URL
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.core.schema import CancellableStream
 from opendevin.runtime.plugins import AgentSkillsRequirement, JupyterRequirement
+from opendevin.runtime.plugins.requirement import PluginRequirement
 from opendevin.runtime.sandbox import Sandbox
 from opendevin.runtime.utils import find_available_tcp_port
 from opendevin.runtime.utils.image_agnostic import get_od_sandbox_image
@@ -199,16 +200,30 @@ class DockerSSHBox(Sandbox):
 
     _ssh_password: str
     _ssh_port: int
-    ssh: pxssh.pxssh
+    ssh: pxssh.pxssh | None = None
 
     def __init__(
         self,
-        container_image: str | None = None,
-        timeout: int = config.sandbox.timeout,
+        config: SandboxConfig,
+        persist_sandbox: bool,
+        workspace_mount_path: str,
+        sandbox_workspace_dir: str,
+        cache_dir: str,
+        use_host_network: bool,
+        run_as_devin: bool,
+        ssh_hostname: str = 'host.docker.internal',
+        ssh_password: str | None = None,
+        ssh_port: int = 22,
         sid: str | None = None,
     ):
+        self.config = config
+        self.workspace_mount_path = workspace_mount_path
+        self.sandbox_workspace_dir = sandbox_workspace_dir
+        self.cache_dir = cache_dir
+        self.use_host_network = use_host_network
+        self.run_as_devin = run_as_devin
         logger.info(
-            f'SSHBox is running as {"opendevin" if self.run_as_devin else "root"} user with USER_ID={self.user_id} in the sandbox'
+            f'SSHBox is running as {"opendevin" if self.run_as_devin else "root"} user with USER_ID={config.user_id} in the sandbox'
         )
         # Initialize docker client. Throws an exception if Docker is not reachable.
         try:
@@ -220,7 +235,7 @@ class DockerSSHBox(Sandbox):
             )
             raise ex
 
-        if config.persist_sandbox:
+        if persist_sandbox:
             if not self.run_as_devin:
                 raise Exception(
                     'Persistent sandbox is currently designed for opendevin user only. Please set run_as_devin=True in your config.toml'
@@ -229,21 +244,19 @@ class DockerSSHBox(Sandbox):
         else:
             self.instance_id = (sid or '') + str(uuid.uuid4())
 
-        self.timeout = timeout
-        self.container_image = container_image or config.sandbox.container_image
         self.container_image = get_od_sandbox_image(
-            self.container_image, self.docker_client
+            config.container_image, self.docker_client
         )
         self.container_name = self.container_name_prefix + self.instance_id
 
         # set up random user password
-        if config.persist_sandbox:
-            if not config.ssh_password:
-                raise Exception(
-                    'Please add ssh_password to your config.toml or add -e SSH_PASSWORD to your docker run command'
-                )
-            self._ssh_password = config.ssh_password
-            self._ssh_port = config.ssh_port
+        self.persist_sandbox = persist_sandbox
+        self.ssh_hostname = ssh_hostname
+        if persist_sandbox:
+            if not ssh_password:
+                raise ValueError('ssh_password is required for persistent sandbox')
+            self._ssh_password = ssh_password
+            self._ssh_port = ssh_port
         else:
             self._ssh_password = str(uuid.uuid4())
             self._ssh_port = find_available_tcp_port()
@@ -253,7 +266,7 @@ class DockerSSHBox(Sandbox):
         except docker.errors.NotFound:
             self.is_initial_session = True
             logger.info('Detected initial session.')
-        if not config.persist_sandbox or self.is_initial_session:
+        if not persist_sandbox or self.is_initial_session:
             logger.info('Creating new Docker container')
             n_tries = 5
             while n_tries > 0:
@@ -285,10 +298,7 @@ class DockerSSHBox(Sandbox):
         self.execute('git config --global user.name "OpenDevin"')
         self.execute('git config --global user.email "opendevin@all-hands.dev"')
         atexit.register(self.close)
-        super().__init__()
-
-    def add_to_env(self, key: str, value: str):
-        super().add_to_env(key, value)
+        super().__init__(config)
 
     def setup_user(self):
         # Make users sudoers passwordless
@@ -325,7 +335,7 @@ class DockerSSHBox(Sandbox):
                 [
                     '/bin/bash',
                     '-c',
-                    f'useradd -rm -d /home/opendevin -s /bin/bash -g root -G sudo -u {self.user_id} opendevin',
+                    f'useradd -rm -d /home/opendevin -s /bin/bash -g root -G sudo -u {self.config.user_id} opendevin',
                 ],
                 workdir=self.sandbox_workspace_dir,
                 environment=self._env,
@@ -408,13 +418,13 @@ class DockerSSHBox(Sandbox):
         try:
             self.ssh = pxssh.pxssh(
                 echo=False,
-                timeout=self.timeout,
+                timeout=self.config.timeout,
                 encoding='utf-8',
                 codec_errors='replace',
             )
             hostname = self.ssh_hostname
             username = 'opendevin' if self.run_as_devin else 'root'
-            if config.persist_sandbox:
+            if self.persist_sandbox:
                 password_msg = 'using your SSH password'
             else:
                 password_msg = f"using the password '{self._ssh_password}'"
@@ -434,6 +444,7 @@ class DockerSSHBox(Sandbox):
 
     def start_ssh_session(self):
         self.__ssh_login()
+        assert self.ssh is not None
 
         # Fix: https://github.com/pexpect/pexpect/issues/669
         self.ssh.sendline("bind 'set enable-bracketed-paste off'")
@@ -454,6 +465,7 @@ class DockerSSHBox(Sandbox):
         prev_output: str = '',
         ignore_last_output: bool = False,
     ) -> tuple[int, str]:
+        assert self.ssh is not None
         logger.exception(
             f'Command "{cmd}" timed out, killing process...', exc_info=False
         )
@@ -471,7 +483,8 @@ class DockerSSHBox(Sandbox):
     def execute(
         self, cmd: str, stream: bool = False, timeout: int | None = None
     ) -> tuple[int, str | CancellableStream]:
-        timeout = timeout or self.timeout
+        assert self.ssh is not None
+        timeout = timeout or self.config.timeout
         commands = split_bash_commands(cmd)
         if len(commands) > 1:
             all_output = ''
@@ -486,7 +499,7 @@ class DockerSSHBox(Sandbox):
 
         self.ssh.sendline(cmd)
         if stream:
-            return 0, SSHExecCancellableStream(self.ssh, cmd, self.timeout)
+            return 0, SSHExecCancellableStream(self.ssh, cmd, self.config.timeout)
         success = self.ssh.prompt(timeout=timeout)
         if not success:
             return self._send_interrupt(cmd)
@@ -580,7 +593,7 @@ class DockerSSHBox(Sandbox):
             while container.status != 'running':
                 time.sleep(1)
                 elapsed += 1
-                if elapsed > self.timeout:
+                if elapsed > self.config.timeout:
                     break
                 container = self.docker_client.containers.get(self.container_name)
         except Exception:
@@ -597,7 +610,7 @@ class DockerSSHBox(Sandbox):
             while container.status != 'exited':
                 time.sleep(1)
                 elapsed += 1
-                if elapsed > self.timeout:
+                if elapsed > self.config.timeout:
                     break
                 container = self.docker_client.containers.get(self.container_name)
         except docker.errors.NotFound:
@@ -609,26 +622,6 @@ class DockerSSHBox(Sandbox):
             raise Exception('Failed to get working directory')
         return str(result).strip()
 
-    @property
-    def user_id(self):
-        return config.sandbox.user_id
-
-    @property
-    def run_as_devin(self):
-        return config.run_as_devin
-
-    @property
-    def sandbox_workspace_dir(self):
-        return config.workspace_mount_path_in_sandbox
-
-    @property
-    def ssh_hostname(self):
-        return config.ssh_hostname
-
-    @property
-    def use_host_network(self):
-        return config.use_host_network
-
     def is_container_running(self):
         try:
             container = self.docker_client.containers.get(self.container_name)
@@ -641,11 +634,11 @@ class DockerSSHBox(Sandbox):
 
     @property
     def volumes(self):
-        mount_dir = config.workspace_mount_path
+        mount_dir = self.workspace_mount_path
         return {
             mount_dir: {'bind': self.sandbox_workspace_dir, 'mode': 'rw'},
             # mount cache directory to /home/opendevin/.cache for pip cache reuse
-            config.cache_dir: {
+            self.cache_dir: {
                 'bind': (
                     '/home/opendevin/.cache' if self.run_as_devin else '/root/.cache'
                 ),
@@ -706,7 +699,7 @@ class DockerSSHBox(Sandbox):
             logger.info(
                 f'waiting for container to start: {elapsed}, container status: {self.container.status}'
             )
-            if elapsed > self.timeout:
+            if elapsed > self.config.timeout:
                 break
         if self.container.status != 'running':
             raise Exception('Failed to start container')
@@ -717,7 +710,7 @@ class DockerSSHBox(Sandbox):
         for container in containers:
             try:
                 if container.name.startswith(self.container_name):
-                    if config.persist_sandbox:
+                    if self.persist_sandbox:
                         container.stop()
                     else:
                         # only remove the container we created
@@ -731,7 +724,15 @@ class DockerSSHBox(Sandbox):
 
 if __name__ == '__main__':
     try:
-        ssh_box = DockerSSHBox()
+        ssh_box = DockerSSHBox(
+            config=SandboxConfig(),
+            run_as_devin=False,
+            workspace_mount_path='/path/to/workspace',
+            cache_dir='/path/to/cache',
+            sandbox_workspace_dir='/sandbox',
+            use_host_network=False,
+            persist_sandbox=False,
+        )
     except Exception as e:
         logger.exception('Failed to start Docker container: %s', e)
         sys.exit(1)
@@ -741,7 +742,7 @@ if __name__ == '__main__':
     )
 
     # Initialize required plugins
-    plugins = [AgentSkillsRequirement(), JupyterRequirement()]
+    plugins: list[PluginRequirement] = [AgentSkillsRequirement(), JupyterRequirement()]
     ssh_box.init_plugins(plugins)
     logger.info(
         '--- AgentSkills COMMAND DOCUMENTATION ---\n'

+ 6 - 6
opendevin/runtime/e2b/sandbox.py

@@ -7,7 +7,7 @@ from e2b.sandbox.exception import (
     TimeoutException,
 )
 
-from opendevin.core.config import config
+from opendevin.core.config import SandboxConfig
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.core.schema import CancellableStream
 from opendevin.runtime.sandbox import Sandbox
@@ -19,20 +19,20 @@ class E2BBox(Sandbox):
 
     def __init__(
         self,
+        config: SandboxConfig,
+        e2b_api_key: str,
         template: str = 'open-devin',
-        timeout: int = config.sandbox.timeout,
     ):
+        super().__init__(config)
         self.sandbox = E2BSandbox(
-            api_key=config.e2b_api_key,
+            api_key=e2b_api_key,
             template=template,
             # It's possible to stream stdout and stderr from sandbox and from each process
             on_stderr=lambda x: logger.info(f'E2B sandbox stderr: {x}'),
             on_stdout=lambda x: logger.info(f'E2B sandbox stdout: {x}'),
             cwd=self._cwd,  # Default workdir inside sandbox
         )
-        self.timeout = timeout
         logger.info(f'Started E2B sandbox with ID "{self.sandbox.id}"')
-        super().__init__()
 
     @property
     def filesystem(self):
@@ -64,7 +64,7 @@ class E2BBox(Sandbox):
     def execute(
         self, cmd: str, stream: bool = False, timeout: int | None = None
     ) -> tuple[int, str | CancellableStream]:
-        timeout = timeout if timeout is not None else self.timeout
+        timeout = timeout if timeout is not None else self.config.timeout
         process = self.sandbox.process.start(cmd, env_vars=self._env)
         try:
             process_output = process.wait(timeout=timeout)

+ 4 - 2
opendevin/runtime/sandbox.py

@@ -1,8 +1,9 @@
+import copy
 import json
 import os
 from abc import ABC, abstractmethod
 
-from opendevin.core.config import config
+from opendevin.core.config import SandboxConfig
 from opendevin.core.schema import CancellableStream
 from opendevin.runtime.plugins.mixin import PluginMixin
 
@@ -11,7 +12,8 @@ class Sandbox(ABC, PluginMixin):
     _env: dict[str, str] = {}
     is_initial_session: bool = True
 
-    def __init__(self, **kwargs):
+    def __init__(self, config: SandboxConfig):
+        self.config = copy.deepcopy(config)
         for key in os.environ:
             if key.startswith('SANDBOX_ENV_'):
                 sandbox_key = key.removeprefix('SANDBOX_ENV_')

+ 18 - 3
opendevin/runtime/server/runtime.py

@@ -36,11 +36,26 @@ from .files import read_file, write_file
 
 def create_sandbox(sid: str = 'default', box_type: str = 'ssh') -> Sandbox:
     if box_type == 'local':
-        return LocalBox()
+        return LocalBox(config=config.sandbox, workspace_base=config.workspace_base)
     elif box_type == 'ssh':
-        return DockerSSHBox(sid=sid)
+        return DockerSSHBox(
+            config=config.sandbox,
+            persist_sandbox=config.persist_sandbox,
+            workspace_mount_path=config.workspace_mount_path,
+            sandbox_workspace_dir=config.workspace_mount_path_in_sandbox,
+            cache_dir=config.cache_dir,
+            use_host_network=config.use_host_network,
+            run_as_devin=config.run_as_devin,
+            ssh_hostname=config.ssh_hostname,
+            ssh_password=config.ssh_password,
+            ssh_port=config.ssh_port,
+            sid=sid,
+        )
     elif box_type == 'e2b':
-        return E2BBox()
+        return E2BBox(
+            config=config.sandbox,
+            e2b_api_key=config.e2b_api_key,
+        )
     else:
         raise ValueError(f'Invalid sandbox type: {box_type}')
 

+ 12 - 1
tests/unit/test_ipython.py

@@ -81,7 +81,18 @@ def test_sandbox_jupyter_plugin_backticks(temp_dir):
     ), patch.object(config, 'run_as_devin', new='true'), patch.object(
         config.sandbox, 'box_type', new='ssh'
     ):
-        box = DockerSSHBox()
+        box = DockerSSHBox(
+            config=config.sandbox,
+            persist_sandbox=config.persist_sandbox,
+            workspace_mount_path=config.workspace_mount_path,
+            sandbox_workspace_dir=config.workspace_mount_path_in_sandbox,
+            cache_dir=config.cache_dir,
+            use_host_network=config.use_host_network,
+            run_as_devin=config.run_as_devin,
+            ssh_hostname=config.ssh_hostname,
+            ssh_password=config.ssh_password,
+            ssh_port=config.ssh_port,
+        )
         box.init_plugins([JupyterRequirement])
         test_code = "print('Hello, `World`!')"
         expected_write_command = (

+ 123 - 161
tests/unit/test_sandbox.py

@@ -5,12 +5,27 @@ from unittest.mock import patch
 
 import pytest
 
-from opendevin.core.config import config
+from opendevin.core.config import AppConfig, config
 from opendevin.runtime.docker.local_box import LocalBox
 from opendevin.runtime.docker.ssh_box import DockerSSHBox, split_bash_commands
 from opendevin.runtime.plugins import AgentSkillsRequirement, JupyterRequirement
 
 
+def create_docker_box_from_app_config(config: AppConfig, path: str) -> DockerSSHBox:
+    return DockerSSHBox(
+        config=config.sandbox,
+        persist_sandbox=config.persist_sandbox,
+        workspace_mount_path=path,
+        sandbox_workspace_dir=config.workspace_mount_path_in_sandbox,
+        cache_dir=config.cache_dir,
+        use_host_network=config.use_host_network,
+        run_as_devin=True,
+        ssh_hostname=config.ssh_hostname,
+        ssh_password=config.ssh_password,
+        ssh_port=config.ssh_port,
+    )
+
+
 @pytest.fixture
 def temp_dir(monkeypatch):
     # get a temporary directory
@@ -21,14 +36,18 @@ def temp_dir(monkeypatch):
 
 def test_env_vars(temp_dir):
     os.environ['SANDBOX_ENV_FOOBAR'] = 'BAZ'
-    for box_class in [DockerSSHBox, LocalBox]:
-        box = box_class()
-        box.add_to_env('QUUX', 'abc"def')
+    for box in [
+        create_docker_box_from_app_config(config, temp_dir),
+        LocalBox(config.sandbox, temp_dir),
+    ]:
+        box.add_to_env(key='QUUX', value='abc"def')
         assert box._env['FOOBAR'] == 'BAZ'
         assert box._env['QUUX'] == 'abc"def'
         exit_code, output = box.execute('echo $FOOBAR $QUUX')
         assert exit_code == 0, 'The exit code should be 0.'
-        assert output.strip() == 'BAZ abc"def', f'Output: {output} for {box_class}'
+        assert (
+            output.strip() == 'BAZ abc"def'
+        ), f'Output: {output} for {box.__class__.__name__}'
 
 
 def test_split_commands():
@@ -89,170 +108,121 @@ EOF
 
 def test_ssh_box_run_as_devin(temp_dir):
     # get a temporary directory
-    with patch.object(config, 'workspace_base', new=temp_dir), patch.object(
-        config, 'workspace_mount_path', new=temp_dir
-    ), patch.object(config, 'run_as_devin', new='true'), patch.object(
-        config.sandbox, 'box_type', new='ssh'
-    ):
-        for box in [
-            DockerSSHBox()
-        ]:  # FIXME: permission error on mkdir test for exec box
-            exit_code, output = box.execute('ls -l')
-            assert exit_code == 0, (
-                'The exit code should be 0 for ' + box.__class__.__name__
-            )
-            assert output.strip() == 'total 0'
-
-            assert config.workspace_base == temp_dir
-            exit_code, output = box.execute('ls -l')
-            assert exit_code == 0, 'The exit code should be 0.'
-            assert output.strip() == 'total 0'
-
-            exit_code, output = box.execute('mkdir test')
-            assert exit_code == 0, 'The exit code should be 0.'
-            assert output.strip() == ''
-
-            exit_code, output = box.execute('ls -l')
-            assert exit_code == 0, 'The exit code should be 0.'
-            assert (
-                'opendevin' in output
-            ), "The output should contain username 'opendevin'"
-            assert 'test' in output, 'The output should contain the test directory'
-
-            exit_code, output = box.execute('touch test/foo.txt')
-            assert exit_code == 0, 'The exit code should be 0.'
-            assert output.strip() == ''
-
-            exit_code, output = box.execute('ls -l test')
-            assert exit_code == 0, 'The exit code should be 0.'
-            assert 'foo.txt' in output, 'The output should contain the foo.txt file'
-            box.close()
-
-
-def test_ssh_box_multi_line_cmd_run_as_devin(temp_dir):
-    # get a temporary directory
-    with patch.object(config, 'workspace_base', new=temp_dir), patch.object(
-        config, 'workspace_mount_path', new=temp_dir
-    ), patch.object(config, 'run_as_devin', new='true'), patch.object(
-        config.sandbox, 'box_type', new='ssh'
-    ):
-        box = DockerSSHBox()
-        exit_code, output = box.execute('pwd && ls -l')
+    for box in [
+        create_docker_box_from_app_config(config, temp_dir)
+    ]:  # FIXME: permission error on mkdir test for exec box
+        exit_code, output = box.execute('ls -l')
         assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__
-        expected_lines = ['/workspace', 'total 0']
-        line_sep = '\r\n' if isinstance(box, DockerSSHBox) else '\n'
-        assert output == line_sep.join(expected_lines), (
-            'The output should be the same as the input for ' + box.__class__.__name__
-        )
-        box.close()
+        assert output.strip() == 'total 0'
 
+        assert box.workspace_mount_path == temp_dir
+        exit_code, output = box.execute('ls -l')
+        assert exit_code == 0, 'The exit code should be 0.'
+        assert output.strip() == 'total 0'
 
-def test_ssh_box_stateful_cmd_run_as_devin(temp_dir):
-    # get a temporary directory
-    with patch.object(config, 'workspace_base', new=temp_dir), patch.object(
-        config, 'workspace_mount_path', new=temp_dir
-    ), patch.object(config, 'run_as_devin', new='true'), patch.object(
-        config.sandbox, 'box_type', new='ssh'
-    ):
-        box = DockerSSHBox()
         exit_code, output = box.execute('mkdir test')
         assert exit_code == 0, 'The exit code should be 0.'
         assert output.strip() == ''
 
-        exit_code, output = box.execute('cd test')
-        assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__
-        assert output.strip() == '', (
-            'The output should be empty for ' + box.__class__.__name__
-        )
+        exit_code, output = box.execute('ls -l')
+        assert exit_code == 0, 'The exit code should be 0.'
+        assert 'opendevin' in output, "The output should contain username 'opendevin'"
+        assert 'test' in output, 'The output should contain the test directory'
 
-        exit_code, output = box.execute('pwd')
-        assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__
-        assert output.strip() == '/workspace/test', (
-            'The output should be /workspace for ' + box.__class__.__name__
-        )
+        exit_code, output = box.execute('touch test/foo.txt')
+        assert exit_code == 0, 'The exit code should be 0.'
+        assert output.strip() == ''
+
+        exit_code, output = box.execute('ls -l test')
+        assert exit_code == 0, 'The exit code should be 0.'
+        assert 'foo.txt' in output, 'The output should contain the foo.txt file'
         box.close()
 
 
+def test_ssh_box_multi_line_cmd_run_as_devin(temp_dir):
+    box = create_docker_box_from_app_config(config, temp_dir)
+    exit_code, output = box.execute('pwd && ls -l')
+    assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__
+    expected_lines = ['/workspace', 'total 0']
+    line_sep = '\r\n' if isinstance(box, DockerSSHBox) else '\n'
+    assert output == line_sep.join(expected_lines), (
+        'The output should be the same as the input for ' + box.__class__.__name__
+    )
+    box.close()
+
+
+def test_ssh_box_stateful_cmd_run_as_devin(temp_dir):
+    box = create_docker_box_from_app_config(config, temp_dir)
+    exit_code, output = box.execute('mkdir test')
+    assert exit_code == 0, 'The exit code should be 0.'
+    assert output.strip() == ''
+
+    exit_code, output = box.execute('cd test')
+    assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__
+    assert output.strip() == '', (
+        'The output should be empty for ' + box.__class__.__name__
+    )
+
+    exit_code, output = box.execute('pwd')
+    assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__
+    assert output.strip() == '/workspace/test', (
+        'The output should be /workspace for ' + box.__class__.__name__
+    )
+    box.close()
+
+
 def test_ssh_box_failed_cmd_run_as_devin(temp_dir):
-    # get a temporary directory
-    with patch.object(config, 'workspace_base', new=temp_dir), patch.object(
-        config, 'workspace_mount_path', new=temp_dir
-    ), patch.object(config, 'run_as_devin', new='true'), patch.object(
-        config.sandbox, 'box_type', new='ssh'
-    ):
-        box = DockerSSHBox()
-        exit_code, output = box.execute('non_existing_command')
-        assert exit_code != 0, (
-            'The exit code should not be 0 for a failed command for '
-            + box.__class__.__name__
-        )
-        box.close()
+    box = create_docker_box_from_app_config(config, temp_dir)
+    exit_code, output = box.execute('non_existing_command')
+    assert exit_code != 0, (
+        'The exit code should not be 0 for a failed command for '
+        + box.__class__.__name__
+    )
+    box.close()
 
 
 def test_single_multiline_command(temp_dir):
-    with patch.object(config, 'workspace_base', new=temp_dir), patch.object(
-        config, 'workspace_mount_path', new=temp_dir
-    ), patch.object(config, 'run_as_devin', new='true'), patch.object(
-        config.sandbox, 'box_type', new='ssh'
-    ):
-        box = DockerSSHBox()
-        exit_code, output = box.execute('echo \\\n -e "foo"')
-        assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__
-        # FIXME: why is there a `>` in the output? Probably PS2?
-        assert output == '> foo', (
-            'The output should be the same as the input for ' + box.__class__.__name__
-        )
-        box.close()
+    box = create_docker_box_from_app_config(config, temp_dir)
+    exit_code, output = box.execute('echo \\\n -e "foo"')
+    assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__
+    # FIXME: why is there a `>` in the output? Probably PS2?
+    assert output == '> foo', (
+        'The output should be the same as the input for ' + box.__class__.__name__
+    )
+    box.close()
 
 
 def test_multiline_echo(temp_dir):
-    with patch.object(config, 'workspace_base', new=temp_dir), patch.object(
-        config, 'workspace_mount_path', new=temp_dir
-    ), patch.object(config, 'run_as_devin', new='true'), patch.object(
-        config.sandbox, 'box_type', new='ssh'
-    ):
-        box = DockerSSHBox()
-        exit_code, output = box.execute('echo -e "hello\nworld"')
-        assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__
-        # FIXME: why is there a `>` in the output?
-        assert output == '> hello\r\nworld', (
-            'The output should be the same as the input for ' + box.__class__.__name__
-        )
-        box.close()
+    box = create_docker_box_from_app_config(config, temp_dir)
+    exit_code, output = box.execute('echo -e "hello\nworld"')
+    assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__
+    # FIXME: why is there a `>` in the output?
+    assert output == '> hello\r\nworld', (
+        'The output should be the same as the input for ' + box.__class__.__name__
+    )
+    box.close()
 
 
 def test_sandbox_whitespace(temp_dir):
-    # get a temporary directory
-    with patch.object(config, 'workspace_base', new=temp_dir), patch.object(
-        config, 'workspace_mount_path', new=temp_dir
-    ), patch.object(config, 'run_as_devin', new='true'), patch.object(
-        config.sandbox, 'box_type', new='ssh'
-    ):
-        box = DockerSSHBox()
-        exit_code, output = box.execute('echo -e "\\n\\n\\n"')
-        assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__
-        assert output == '\r\n\r\n\r\n', (
-            'The output should be the same as the input for ' + box.__class__.__name__
-        )
-        box.close()
+    box = create_docker_box_from_app_config(config, temp_dir)
+    exit_code, output = box.execute('echo -e "\\n\\n\\n"')
+    assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__
+    assert output == '\r\n\r\n\r\n', (
+        'The output should be the same as the input for ' + box.__class__.__name__
+    )
+    box.close()
 
 
 def test_sandbox_jupyter_plugin(temp_dir):
-    # get a temporary directory
-    with patch.object(config, 'workspace_base', new=temp_dir), patch.object(
-        config, 'workspace_mount_path', new=temp_dir
-    ), patch.object(config, 'run_as_devin', new='true'), patch.object(
-        config.sandbox, 'box_type', new='ssh'
-    ):
-        box = DockerSSHBox()
-        box.init_plugins([JupyterRequirement])
-        exit_code, output = box.execute('echo "print(1)" | execute_cli')
-        print(output)
-        assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__
-        assert output == '1\r\n', (
-            'The output should be the same as the input for ' + box.__class__.__name__
-        )
-        box.close()
+    box = create_docker_box_from_app_config(config, temp_dir)
+    box.init_plugins([JupyterRequirement])
+    exit_code, output = box.execute('echo "print(1)" | execute_cli')
+    print(output)
+    assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__
+    assert output == '1\r\n', (
+        'The output should be the same as the input for ' + box.__class__.__name__
+    )
+    box.close()
 
 
 def _test_sandbox_jupyter_agentskills_fileop_pwd_impl(box):
@@ -287,7 +257,7 @@ def _test_sandbox_jupyter_agentskills_fileop_pwd_impl(box):
         '[File hello.py created.]\r\n'
     ).strip().split('\r\n')
 
-    if config.enable_auto_lint:
+    if config.sandbox.enable_auto_lint:
         # edit file, but make a mistake in indentation
         exit_code, output = box.execute(
             'echo "insert_content_at_line(\'hello.py\', 1, \'  print(\\"hello world\\")\')" | execute_cli'
@@ -341,13 +311,9 @@ DO NOT re-run the same failed edit command. Running it again will lead to the sa
 
 def test_sandbox_jupyter_agentskills_fileop_pwd(temp_dir):
     # get a temporary directory
-    with patch.object(config, 'workspace_base', new=temp_dir), patch.object(
-        config, 'workspace_mount_path', new=temp_dir
-    ), patch.object(config, 'run_as_devin', new='true'), patch.object(
-        config.sandbox, 'box_type', new='ssh'
-    ), patch.object(config, 'enable_auto_lint', new=True):
-        assert config.enable_auto_lint
-        box = DockerSSHBox()
+    with patch.object(config.sandbox, 'enable_auto_lint', new=True):
+        assert config.sandbox.enable_auto_lint
+        box = create_docker_box_from_app_config(config, temp_dir)
         _test_sandbox_jupyter_agentskills_fileop_pwd_impl(box)
 
 
@@ -358,13 +324,9 @@ def test_sandbox_jupyter_agentskills_fileop_pwd(temp_dir):
 def test_agnostic_sandbox_jupyter_agentskills_fileop_pwd(temp_dir):
     for base_sandbox_image in ['ubuntu:22.04', 'debian:11']:
         # get a temporary directory
-        with patch.object(config, 'workspace_base', new=temp_dir), patch.object(
-            config, 'workspace_mount_path', new=temp_dir
-        ), patch.object(config, 'run_as_devin', new=True), patch.object(
-            config.sandbox, 'box_type', new='ssh'
-        ), patch.object(
+        with patch.object(
             config.sandbox, 'container_image', new=base_sandbox_image
-        ), patch.object(config, 'enable_auto_lint', new=False):
-            assert not config.enable_auto_lint
-            box = DockerSSHBox()
+        ), patch.object(config.sandbox, 'enable_auto_lint', new=False):
+            assert not config.sandbox.enable_auto_lint
+            box = create_docker_box_from_app_config(config, temp_dir)
             _test_sandbox_jupyter_agentskills_fileop_pwd_impl(box)