Kaynağa Gözat

Reduce runtime tests duration by running them across CPUs (#3779)

* Reduce runtime tests duration by running them across CPUs

* fix hardcoded image name

* test two cpus

* Test folder change

* Up the CPU to 4 again to test

* Change to 3 CPUs

* Down to 2

* Add param to remove all openhands containers

* Add comment

* Add reruns just in case

* Fix ordering of if
mamoodi 1 yıl önce
ebeveyn
işleme
f3b2085f9b

+ 10 - 2
.github/workflows/ghcr_runtime.yml

@@ -107,9 +107,10 @@ jobs:
   # Run unit tests with the EventStream runtime Docker images
   test_runtime:
     name: Test Runtime
-    runs-on: ubuntu-latest
     needs: [ghcr_build_runtime]
+    runs-on: ubuntu-latest
     strategy:
+      fail-fast: false
       matrix:
         base_image: ['nikolaik']
     steps:
@@ -153,6 +154,13 @@ jobs:
         run: make install-python-dependencies
       - name: Run runtime tests
         run: |
+          # We install pytest-xdist in order to run tests across CPUs. However, tests start to fail when we run
+          # then across more than 2 CPUs for some reason
+          poetry run pip install pytest-xdist
+
+          # Install to be able to retry on failures for flaky tests
+          poetry run pip install pytest-rerunfailures
+
           image_name=ghcr.io/${{ github.repository_owner }}/runtime:${{ github.sha }}-${{ matrix.base_image }}
           image_name=$(echo $image_name | tr '[:upper:]' '[:lower:]')
 
@@ -160,7 +168,7 @@ jobs:
           SANDBOX_USER_ID=$(id -u) \
           SANDBOX_BASE_CONTAINER_IMAGE=$image_name \
           TEST_IN_CI=true \
-          poetry run pytest --cov=agenthub --cov=openhands --cov-report=xml -s ./tests/runtime
+          poetry run pytest -n 2 --reruns 2 --cov=agenthub --cov=openhands --cov-report=xml -s ./tests/runtime
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v4
         env:

+ 17 - 2
openhands/runtime/client/runtime.py

@@ -305,7 +305,15 @@ class EventStreamRuntime(Runtime):
     def sandbox_workspace_dir(self):
         return self.config.workspace_mount_path_in_sandbox
 
-    def close(self, close_client: bool = True):
+    def close(self, close_client: bool = True, rm_all_containers: bool = True):
+        """
+        Closes the EventStreamRuntime and associated objects
+
+        Parameters:
+        - close_client (bool): Whether to close the DockerClient
+        - rm_all_containers (bool): Whether to remove all containers with the 'openhands-sandbox-' prefix
+        """
+
         if self.log_buffer:
             self.log_buffer.close()
 
@@ -315,7 +323,13 @@ class EventStreamRuntime(Runtime):
         containers = self.docker_client.containers.list(all=True)
         for container in containers:
             try:
-                if container.name.startswith(self.container_name_prefix):
+                # If the app doesn't shut down properly, it can leave runtime containers on the system. This ensures
+                # that all 'openhands-sandbox-' containers are removed as well.
+                if rm_all_containers and container.name.startswith(
+                    self.container_name_prefix
+                ):
+                    container.remove(force=True)
+                elif container.name == self.container_name:
                     logs = container.logs(tail=1000).decode('utf-8')
                     logger.debug(
                         f'==== Container logs ====\n{logs}\n==== End of container logs ===='
@@ -323,6 +337,7 @@ class EventStreamRuntime(Runtime):
                     container.remove(force=True)
             except docker.errors.NotFound:
                 pass
+
         if close_client:
             self.docker_client.close()
 

+ 16 - 2
tests/runtime/conftest.py

@@ -1,4 +1,5 @@
 import os
+import random
 import time
 
 import pytest
@@ -23,7 +24,21 @@ def print_method_name(request):
 
 @pytest.fixture
 def temp_dir(tmp_path_factory: TempPathFactory) -> str:
-    return str(tmp_path_factory.mktemp('test_runtime'))
+    """
+    Creates a unique temporary directory
+
+    Parameters:
+    - tmp_path_factory (TempPathFactory): A TempPathFactory class
+
+    Returns:
+    - str: The temporary directory path that was created
+    """
+
+    unique_suffix = random.randint(10000, 99999)
+    temp_directory = tmp_path_factory.mktemp(
+        f'test_runtime_{unique_suffix}', numbered=False
+    )
+    return str(temp_directory)
 
 
 TEST_RUNTIME = os.getenv('TEST_RUNTIME', 'eventstream')
@@ -93,7 +108,6 @@ def base_container_image(request):
 def runtime(temp_dir, box_class, run_as_openhands):
     runtime = _load_runtime(temp_dir, box_class, run_as_openhands)
     yield runtime
-    runtime.close()
     time.sleep(1)
 
 

+ 19 - 21
tests/runtime/test_bash.py

@@ -42,7 +42,7 @@ def test_bash_command_pexcept(temp_dir, box_class, run_as_openhands):
     ), 'The observation should be a CmdOutputObservation.'
     assert obs.exit_code == 0, 'The exit code should be 0.'
 
-    runtime.close()
+    runtime.close(rm_all_containers=False)
     time.sleep(1)
 
 
@@ -56,7 +56,7 @@ def test_single_multiline_command(temp_dir, box_class):
     assert obs.exit_code == 0, 'The exit code should be 0.'
     assert 'foo' in obs.content
 
-    runtime.close()
+    runtime.close(rm_all_containers=False)
     time.sleep(1)
 
 
@@ -70,7 +70,7 @@ def test_multiline_echo(temp_dir, box_class):
     assert obs.exit_code == 0, 'The exit code should be 0.'
     assert 'hello\r\nworld' in obs.content
 
-    runtime.close()
+    runtime.close(rm_all_containers=False)
     time.sleep(1)
 
 
@@ -85,7 +85,7 @@ def test_runtime_whitespace(temp_dir, box_class):
     assert obs.exit_code == 0, 'The exit code should be 0.'
     assert '\r\n\r\n\r\n' in obs.content
 
-    runtime.close()
+    runtime.close(rm_all_containers=False)
     time.sleep(1)
 
 
@@ -137,7 +137,7 @@ world "
     assert 'hello\r\nworld\r\nare\r\nyou\r\n\r\nthere?' in obs.content
     assert 'hello\r\nworld "\r\n' in obs.content
 
-    runtime.close()
+    runtime.close(rm_all_containers=False)
     time.sleep(1)
 
 
@@ -153,7 +153,7 @@ def test_no_ps2_in_output(temp_dir, box_class, run_as_openhands):
     assert 'hello\r\nworld' in obs.content
     assert '>' not in obs.content
 
-    runtime.close()
+    runtime.close(rm_all_containers=False)
     time.sleep(1)
 
 
@@ -196,7 +196,7 @@ echo "success"
     assert obs.exit_code == 0, 'The exit code should be 0.'
     assert 'success' in obs.content
 
-    runtime.close()
+    runtime.close(rm_all_containers=False)
     time.sleep(1)
 
 
@@ -255,7 +255,7 @@ def test_cmd_run(temp_dir, box_class, run_as_openhands):
     assert isinstance(obs, CmdOutputObservation)
     assert obs.exit_code == 0
 
-    runtime.close()
+    runtime.close(rm_all_containers=False)
     time.sleep(1)
 
 
@@ -273,7 +273,7 @@ def test_run_as_user_correct_home_dir(temp_dir, box_class, run_as_openhands):
     else:
         assert '/root' in obs.content
 
-    runtime.close()
+    runtime.close(rm_all_containers=False)
     time.sleep(1)
 
 
@@ -289,7 +289,7 @@ def test_multi_cmd_run_in_single_line(temp_dir, box_class):
     assert '/workspace' in obs.content
     assert 'total 0' in obs.content
 
-    runtime.close()
+    runtime.close(rm_all_containers=False)
     time.sleep(1)
 
 
@@ -318,7 +318,7 @@ def test_stateful_cmd(temp_dir, box_class):
     assert obs.exit_code == 0, 'The exit code should be 0.'
     assert '/workspace/test' in obs.content
 
-    runtime.close()
+    runtime.close(rm_all_containers=False)
     time.sleep(1)
 
 
@@ -332,7 +332,7 @@ def test_failed_cmd(temp_dir, box_class):
     assert isinstance(obs, CmdOutputObservation)
     assert obs.exit_code != 0, 'The exit code should not be 0 for a failed command.'
 
-    runtime.close()
+    runtime.close(rm_all_containers=False)
     time.sleep(1)
 
 
@@ -365,7 +365,7 @@ def test_copy_single_file(temp_dir, box_class):
     assert obs.exit_code == 0
     assert 'Hello, World!' in obs.content
 
-    runtime.close()
+    runtime.close(rm_all_containers=False)
     time.sleep(1)
 
 
@@ -414,7 +414,7 @@ def test_copy_directory_recursively(temp_dir, box_class):
     assert obs.exit_code == 0
     assert 'File 1 content' in obs.content
 
-    runtime.close()
+    runtime.close(rm_all_containers=False)
     time.sleep(1)
 
 
@@ -435,7 +435,7 @@ def test_copy_to_non_existent_directory(temp_dir, box_class):
     assert obs.exit_code == 0
     assert 'Hello, World!' in obs.content
 
-    runtime.close()
+    runtime.close(rm_all_containers=False)
     time.sleep(1)
 
 
@@ -470,7 +470,7 @@ def test_overwrite_existing_file(temp_dir, box_class):
     assert obs.exit_code == 0
     assert 'Hello, World!' in obs.content
 
-    runtime.close()
+    runtime.close(rm_all_containers=False)
     time.sleep(1)
 
 
@@ -490,7 +490,7 @@ def test_copy_non_existent_file(temp_dir, box_class):
     assert isinstance(obs, CmdOutputObservation)
     assert obs.exit_code != 0  # File should not exist
 
-    runtime.close()
+    runtime.close(rm_all_containers=False)
     time.sleep(1)
 
 
@@ -517,7 +517,7 @@ def test_keep_prompt(box_class, temp_dir):
     assert obs.exit_code == 0
     assert 'root@' not in obs.content
 
-    runtime.close()
+    runtime.close(rm_all_containers=False)
     time.sleep(1)
 
 
@@ -595,7 +595,5 @@ def test_git_operation(box_class):
     assert isinstance(obs, CmdOutputObservation)
     assert obs.exit_code == 0
 
-    runtime.close()
-
-    runtime.close()
+    runtime.close(rm_all_containers=False)
     time.sleep(1)

+ 2 - 2
tests/runtime/test_browsing.py

@@ -66,7 +66,7 @@ def test_simple_browse(temp_dir, box_class, run_as_openhands):
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
     assert obs.exit_code == 0
 
-    runtime.close()
+    runtime.close(rm_all_containers=False)
     time.sleep(1)
 
 
@@ -111,5 +111,5 @@ def test_browsergym_eval_env(box_class, temp_dir):
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
     assert json.loads(obs.content) == [0.0]
 
-    runtime.close()
+    runtime.close(rm_all_containers=False)
     time.sleep(1)

+ 2 - 2
tests/runtime/test_env_vars.py

@@ -30,7 +30,7 @@ def test_env_vars_os_environ(temp_dir, box_class, run_as_openhands):
             obs.content.strip().split('\n\r')[0].strip() == 'BAZ'
         ), f'Output: [{obs.content}] for {box_class}'
 
-        runtime.close()
+        runtime.close(rm_all_containers=False)
         time.sleep(1)
 
 
@@ -66,5 +66,5 @@ def test_env_vars_runtime_operations(temp_dir, box_class):
         and obs.content.strip().split('\r\n')[0].strip() == 'new_value'
     )
 
-    runtime.close()
+    runtime.close(rm_all_containers=False)
     time.sleep(1)

+ 3 - 3
tests/runtime/test_images.py

@@ -45,7 +45,7 @@ def test_bash_python_version(temp_dir, box_class, base_container_image):
     assert obs.exit_code == 0
     assert 'pip' in obs.content  # Check that pip is available
 
-    runtime.close()
+    runtime.close(rm_all_containers=False)
     time.sleep(1)
 
 
@@ -68,7 +68,7 @@ def test_nodejs_22_version(temp_dir, box_class, base_container_image):
     assert obs.exit_code == 0
     assert 'v22' in obs.content  # Check for specific version
 
-    runtime.close()
+    runtime.close(rm_all_containers=False)
     time.sleep(1)
 
 
@@ -90,5 +90,5 @@ def test_go_version(temp_dir, box_class, base_container_image):
     assert obs.exit_code == 0
     assert 'go1.23' in obs.content  # Check for specific version
 
-    runtime.close()
+    runtime.close(rm_all_containers=False)
     time.sleep(1)

+ 7 - 7
tests/runtime/test_ipython.py

@@ -90,7 +90,7 @@ def test_simple_cmd_ipython_and_fileop(temp_dir, box_class, run_as_openhands):
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
     assert obs.exit_code == 0
 
-    runtime.close()
+    runtime.close(rm_all_containers=False)
     time.sleep(1)
 
 
@@ -164,7 +164,7 @@ def test_ipython_multi_user(temp_dir, box_class, run_as_openhands):
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
     assert obs.exit_code == 0
 
-    runtime.close()
+    runtime.close(rm_all_containers=False)
     time.sleep(1)
 
 
@@ -188,7 +188,7 @@ def test_ipython_simple(temp_dir, box_class):
         ).strip()
     )
 
-    runtime.close()
+    runtime.close(rm_all_containers=False)
     time.sleep(1)
 
 
@@ -309,7 +309,7 @@ DO NOT re-run the same failed edit command. Running it again will lead to the sa
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
     assert obs.exit_code == 0
 
-    runtime.close()
+    runtime.close(rm_all_containers=False)
     time.sleep(1)
 
 
@@ -323,7 +323,7 @@ def test_ipython_agentskills_fileop_pwd(
     )
     _test_ipython_agentskills_fileop_pwd_impl(runtime, enable_auto_lint)
 
-    runtime.close()
+    runtime.close(rm_all_containers=False)
     time.sleep(1)
 
 
@@ -392,7 +392,7 @@ def test_ipython_agentskills_fileop_pwd_with_userdir(temp_dir, box_class):
         '[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]'
     ).strip().split('\n')
 
-    runtime.close()
+    runtime.close(rm_all_containers=False)
     time.sleep(1)
 
 
@@ -428,5 +428,5 @@ def test_ipython_package_install(temp_dir, box_class, run_as_openhands):
         '[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]'
     )
 
-    runtime.close()
+    runtime.close(rm_all_containers=False)
     time.sleep(1)