Jelajahi Sumber

test_runtime: run tests per runtime, not alternating (#3103)

tobitege 1 tahun lalu
induk
melakukan
d0217b84ef

+ 4 - 0
opendevin/runtime/docker/ssh_box.py

@@ -201,6 +201,7 @@ class DockerSSHBox(Sandbox):
         except Exception as e:
             self.close()
             raise e
+        time.sleep(1)
 
         # make sure /tmp always exists
         self.execute('mkdir -p /tmp')
@@ -353,12 +354,15 @@ class DockerSSHBox(Sandbox):
             raise e
 
     def start_ssh_session(self):
+        time.sleep(1)
         self.__ssh_login()
         assert self.ssh is not None
 
         # Fix: https://github.com/pexpect/pexpect/issues/669
         self.ssh.sendline("bind 'set enable-bracketed-paste off'")
         self.ssh.prompt()
+        time.sleep(1)
+
         # cd to workspace
         self.ssh.sendline(f'cd {self.sandbox_workspace_dir}')
         self.ssh.prompt()

+ 5 - 0
opendevin/runtime/server/runtime.py

@@ -77,6 +77,11 @@ class ServerRuntime(Runtime):
             self._is_external_sandbox = True
         self.browser: BrowserEnv | None = None
 
+    async def ainit(self, env_vars: dict[str, str] | None = None):
+        # MUST call super().ainit() to initialize both default env vars
+        # AND the ones in env vars!
+        await super().ainit(env_vars)
+
     async def close(self):
         if hasattr(self, '_is_external_sandbox') and not self._is_external_sandbox:
             self.sandbox.close()

+ 104 - 94
tests/unit/test_runtime.py

@@ -1,8 +1,10 @@
 """Test the EventStreamRuntime, which connects to the RuntimeClient running in the sandbox."""
 
+import asyncio
 import os
 import pathlib
 import tempfile
+import time
 from unittest.mock import patch
 
 import pytest
@@ -21,6 +23,13 @@ from opendevin.runtime.plugins import AgentSkillsRequirement, JupyterRequirement
 from opendevin.runtime.server.runtime import ServerRuntime
 
 
+@pytest.fixture(autouse=True)
+def print_method_name(request):
+    print('\n########################################################################')
+    print(f'Running test: {request.node.name}')
+    print('########################################################################')
+
+
 @pytest.fixture
 def temp_dir(monkeypatch):
     # get a temporary directory
@@ -29,9 +38,19 @@ def temp_dir(monkeypatch):
         yield temp_dir
 
 
-async def _load_runtime(box_class, event_stream, plugins, sid):
+# This assures that all tests run together for each runtime, not alternating between them,
+# which caused them to fail previously.
+@pytest.fixture(scope='module', params=[EventStreamRuntime, ServerRuntime])
+def box_class(request):
+    time.sleep(1)
+    return request.param
+
+
+async def _load_runtime(box_class, event_stream):
+    sid = 'test'
+    plugins = [JupyterRequirement(), AgentSkillsRequirement()]
     sandbox_config = SandboxConfig(
-        use_host_network=False,
+        use_host_network=True,
     )
     container_image = sandbox_config.container_image
     # NOTE: we will use the default container image specified in the config.sandbox
@@ -39,7 +58,7 @@ async def _load_runtime(box_class, event_stream, plugins, sid):
     if 'od_runtime' not in container_image:
         container_image = 'ubuntu:22.04'
         logger.warning(
-            f'`sandbox_config.container_image` is not an od_runtime image. Will use `{container_image}` as the container image for testing.'
+            f'`{sandbox_config.container_image}` is not an od_runtime image. Will use `{container_image}` as the container image for testing.'
         )
     if box_class == EventStreamRuntime:
         runtime = EventStreamRuntime(
@@ -65,142 +84,130 @@ async def _load_runtime(box_class, event_stream, plugins, sid):
         )
     else:
         raise ValueError(f'Invalid box class: {box_class}')
+    await asyncio.sleep(1)
     return runtime
 
 
-RUNTIME_TO_TEST = [EventStreamRuntime, ServerRuntime]
-
-
 @pytest.mark.asyncio
-async def test_env_vars_os_environ():
+async def test_env_vars_os_environ(box_class):
     with patch.dict(os.environ, {'SANDBOX_ENV_FOOBAR': 'BAZ'}):
-        plugins = [JupyterRequirement(), AgentSkillsRequirement()]
-        sid = 'test'
         cli_session = 'main_test'
 
-        for box_class in RUNTIME_TO_TEST:
-            event_stream = EventStream(cli_session)
-            runtime = await _load_runtime(box_class, event_stream, plugins, sid)
-
-            obs: CmdOutputObservation = await runtime.run_action(
-                CmdRunAction(command='env')
-            )
-            print(obs)
-
-            obs: CmdOutputObservation = await runtime.run_action(
-                CmdRunAction(command='echo $FOOBAR')
-            )
-            print(obs)
-            assert obs.exit_code == 0, 'The exit code should be 0.'
-            assert (
-                obs.content.strip().split('\n\r')[0].strip() == 'BAZ'
-            ), f'Output: [{obs.content}] for {box_class}'
-
-            await runtime.close()
-
-
-@pytest.mark.asyncio
-async def test_env_vars_runtime_add_env_vars():
-    plugins = [JupyterRequirement(), AgentSkillsRequirement()]
-    sid = 'test'
-    cli_session = 'main_test'
-
-    for box_class in RUNTIME_TO_TEST:
         event_stream = EventStream(cli_session)
-        runtime = await _load_runtime(box_class, event_stream, plugins, sid)
-        await runtime.add_env_vars({'QUUX': 'abc"def'})
+        runtime = await _load_runtime(box_class, event_stream)
+
+        obs: CmdOutputObservation = await runtime.run_action(
+            CmdRunAction(command='env')
+        )
+        print(obs)
 
         obs: CmdOutputObservation = await runtime.run_action(
-            CmdRunAction(command='echo $QUUX')
+            CmdRunAction(command='echo $FOOBAR')
         )
         print(obs)
         assert obs.exit_code == 0, 'The exit code should be 0.'
         assert (
-            obs.content.strip().split('\r\n')[0].strip() == 'abc"def'
+            obs.content.strip().split('\n\r')[0].strip() == 'BAZ'
         ), f'Output: [{obs.content}] for {box_class}'
+
         await runtime.close()
+        await asyncio.sleep(1)
 
 
 @pytest.mark.asyncio
-async def test_env_vars_runtime_add_empty_dict():
-    plugins = [JupyterRequirement(), AgentSkillsRequirement()]
-    sid = 'test'
+async def test_env_vars_runtime_add_env_vars(box_class):
     cli_session = 'main_test'
 
-    for box_class in RUNTIME_TO_TEST:
-        event_stream = EventStream(cli_session)
-        runtime = await _load_runtime(box_class, event_stream, plugins, sid)
+    event_stream = EventStream(cli_session)
+    runtime = await _load_runtime(box_class, event_stream)
+    await runtime.add_env_vars({'QUUX': 'abc"def'})
 
-        prev_obs = await runtime.run_action(CmdRunAction(command='env'))
-        assert prev_obs.exit_code == 0, 'The exit code should be 0.'
-        print(prev_obs)
+    obs: CmdOutputObservation = await runtime.run_action(
+        CmdRunAction(command='echo $QUUX')
+    )
+    print(obs)
+    assert obs.exit_code == 0, 'The exit code should be 0.'
+    assert (
+        obs.content.strip().split('\r\n')[0].strip() == 'abc"def'
+    ), f'Output: [{obs.content}] for {box_class}'
 
-        await runtime.add_env_vars({})
+    await runtime.close()
+    await asyncio.sleep(1)
 
-        obs = await runtime.run_action(CmdRunAction(command='env'))
-        assert obs.exit_code == 0, 'The exit code should be 0.'
-        print(obs)
-        assert (
-            obs.content == prev_obs.content
-        ), 'The env var content should be the same after adding an empty dict.'
 
-        await runtime.close()
+@pytest.mark.asyncio
+async def test_env_vars_runtime_add_empty_dict(box_class):
+    cli_session = 'main_test'
+
+    event_stream = EventStream(cli_session)
+    runtime = await _load_runtime(box_class, event_stream)
+
+    prev_obs = await runtime.run_action(CmdRunAction(command='env'))
+    assert prev_obs.exit_code == 0, 'The exit code should be 0.'
+    print(prev_obs)
+
+    await runtime.add_env_vars({})
+
+    obs = await runtime.run_action(CmdRunAction(command='env'))
+    assert obs.exit_code == 0, 'The exit code should be 0.'
+    print(obs)
+    assert (
+        obs.content == prev_obs.content
+    ), 'The env var content should be the same after adding an empty dict.'
+
+    await runtime.close()
+    await asyncio.sleep(1)
 
 
 @pytest.mark.asyncio
-async def test_env_vars_runtime_add_multiple_env_vars():
-    plugins = [JupyterRequirement(), AgentSkillsRequirement()]
-    sid = 'test'
+async def test_env_vars_runtime_add_multiple_env_vars(box_class):
+    cli_session = 'main_test'
+
+    event_stream = EventStream(cli_session)
+    runtime = await _load_runtime(box_class, event_stream)
+    await runtime.add_env_vars({'QUUX': 'abc"def', 'FOOBAR': 'xyz'})
+
+    obs: CmdOutputObservation = await runtime.run_action(
+        CmdRunAction(command='echo $QUUX $FOOBAR')
+    )
+    print(obs)
+    assert obs.exit_code == 0, 'The exit code should be 0.'
+    assert (
+        obs.content.strip().split('\r\n')[0].strip() == 'abc"def xyz'
+    ), f'Output: [{obs.content}] for {box_class}'
+
+    await runtime.close()
+    await asyncio.sleep(1)
+
+
+@pytest.mark.asyncio
+async def test_env_vars_runtime_add_env_vars_overwrite(box_class):
     cli_session = 'main_test'
 
-    for box_class in RUNTIME_TO_TEST:
+    with patch.dict(os.environ, {'SANDBOX_ENV_FOOBAR': 'BAZ'}):
         event_stream = EventStream(cli_session)
-        runtime = await _load_runtime(box_class, event_stream, plugins, sid)
-        await runtime.add_env_vars({'QUUX': 'abc"def', 'FOOBAR': 'xyz'})
+        runtime = await _load_runtime(box_class, event_stream)
+        await runtime.add_env_vars({'FOOBAR': 'xyz'})
 
         obs: CmdOutputObservation = await runtime.run_action(
-            CmdRunAction(command='echo $QUUX $FOOBAR')
+            CmdRunAction(command='echo $FOOBAR')
         )
         print(obs)
         assert obs.exit_code == 0, 'The exit code should be 0.'
         assert (
-            obs.content.strip().split('\r\n')[0].strip() == 'abc"def xyz'
+            obs.content.strip().split('\r\n')[0].strip() == 'xyz'
         ), f'Output: [{obs.content}] for {box_class}'
-        await runtime.close()
-
 
-@pytest.mark.asyncio
-async def test_env_vars_runtime_add_env_vars_overwrite():
-    plugins = [JupyterRequirement(), AgentSkillsRequirement()]
-    sid = 'test'
-    cli_session = 'main_test'
-
-    for box_class in RUNTIME_TO_TEST:
-        with patch.dict(os.environ, {'SANDBOX_ENV_FOOBAR': 'BAZ'}):
-            event_stream = EventStream(cli_session)
-            runtime = await _load_runtime(box_class, event_stream, plugins, sid)
-            await runtime.add_env_vars({'FOOBAR': 'xyz'})
-
-            obs: CmdOutputObservation = await runtime.run_action(
-                CmdRunAction(command='echo $FOOBAR')
-            )
-            print(obs)
-            assert obs.exit_code == 0, 'The exit code should be 0.'
-            assert (
-                obs.content.strip().split('\r\n')[0].strip() == 'xyz'
-            ), f'Output: [{obs.content}] for {box_class}'
-            await runtime.close()
+        await runtime.close()
+        await asyncio.sleep(1)
 
 
 @pytest.mark.asyncio
-async def test_bash_command_pexcept(temp_dir):
-    plugins = [JupyterRequirement(), AgentSkillsRequirement()]
-    sid = 'test'
+async def test_bash_command_pexcept(temp_dir, box_class):
     cli_session = 'main_test'
 
-    box_class = EventStreamRuntime
     event_stream = EventStream(cli_session)
-    runtime = await _load_runtime(box_class, event_stream, plugins, sid)
+    runtime = await _load_runtime(box_class, event_stream)
 
     # We set env var PS1="\u@\h:\w $"
     # and construct the PEXCEPT prompt base on it.
@@ -224,3 +231,6 @@ async def test_bash_command_pexcept(temp_dir):
         obs, CmdOutputObservation
     ), 'The observation should be a CmdOutputObservation.'
     assert obs.exit_code == 0, 'The exit code should be 0.'
+
+    await runtime.close()
+    await asyncio.sleep(1)