Răsfoiți Sursa

fix: improve remote runtime reliability on large-scale evaluation (#4869)

Xingyao Wang 1 an în urmă
părinte
comite
a07e8272da

+ 1 - 0
evaluation/swe_bench/eval_infer.py

@@ -83,6 +83,7 @@ def get_config(instance: pd.Series) -> AppConfig:
             timeout=1800,
             timeout=1800,
             api_key=os.environ.get('ALLHANDS_API_KEY', None),
             api_key=os.environ.get('ALLHANDS_API_KEY', None),
             remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
             remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
+            remote_runtime_init_timeout=1800,
         ),
         ),
         # do not mount workspace
         # do not mount workspace
         workspace_base=None,
         workspace_base=None,

+ 1 - 0
evaluation/swe_bench/run_infer.py

@@ -146,6 +146,7 @@ def get_config(
             api_key=os.environ.get('ALLHANDS_API_KEY', None),
             api_key=os.environ.get('ALLHANDS_API_KEY', None),
             remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
             remote_runtime_api_url=os.environ.get('SANDBOX_REMOTE_RUNTIME_API_URL'),
             keep_remote_runtime_alive=False,
             keep_remote_runtime_alive=False,
+            remote_runtime_init_timeout=1800,
         ),
         ),
         # do not mount workspace
         # do not mount workspace
         workspace_base=None,
         workspace_base=None,

+ 3 - 1
openhands/core/config/sandbox_config.py

@@ -14,7 +14,8 @@ class SandboxConfig:
         base_container_image: The base container image from which to build the runtime image.
         base_container_image: The base container image from which to build the runtime image.
         runtime_container_image: The runtime container image to use.
         runtime_container_image: The runtime container image to use.
         user_id: The user ID for the sandbox.
         user_id: The user ID for the sandbox.
-        timeout: The timeout for the sandbox.
+        timeout: The timeout for the default sandbox action execution.
+        remote_runtime_init_timeout: The timeout for the remote runtime to start.
         enable_auto_lint: Whether to enable auto-lint.
         enable_auto_lint: Whether to enable auto-lint.
         use_host_network: Whether to use the host network.
         use_host_network: Whether to use the host network.
         initialize_plugins: Whether to initialize plugins.
         initialize_plugins: Whether to initialize plugins.
@@ -41,6 +42,7 @@ class SandboxConfig:
     runtime_container_image: str | None = None
     runtime_container_image: str | None = None
     user_id: int = os.getuid() if hasattr(os, 'getuid') else 1000
     user_id: int = os.getuid() if hasattr(os, 'getuid') else 1000
     timeout: int = 120
     timeout: int = 120
+    remote_runtime_init_timeout: int = 180
     enable_auto_lint: bool = (
     enable_auto_lint: bool = (
         False  # once enabled, OpenHands would lint files after editing
         False  # once enabled, OpenHands would lint files after editing
     )
     )

+ 13 - 7
openhands/runtime/impl/remote/remote_runtime.py

@@ -1,7 +1,7 @@
 import os
 import os
-from pathlib import Path
 import tempfile
 import tempfile
 import threading
 import threading
+from pathlib import Path
 from typing import Callable, Optional
 from typing import Callable, Optional
 from zipfile import ZipFile
 from zipfile import ZipFile
 
 
@@ -260,13 +260,19 @@ class RemoteRuntime(Runtime):
                 {'X-Session-API-Key': start_response['session_api_key']}
                 {'X-Session-API-Key': start_response['session_api_key']}
             )
             )
 
 
-    @tenacity.retry(
-        stop=tenacity.stop_after_delay(180) | stop_if_should_exit(),
-        reraise=True,
-        retry=tenacity.retry_if_exception_type(RuntimeNotReadyError),
-        wait=tenacity.wait_fixed(2),
-    )
     def _wait_until_alive(self):
     def _wait_until_alive(self):
+        retry_decorator = tenacity.retry(
+            stop=tenacity.stop_after_delay(
+                self.config.sandbox.remote_runtime_init_timeout
+            )
+            | stop_if_should_exit(),
+            reraise=True,
+            retry=tenacity.retry_if_exception_type(RuntimeNotReadyError),
+            wait=tenacity.wait_fixed(2),
+        )
+        return retry_decorator(self._wait_until_alive_impl)()
+
+    def _wait_until_alive_impl(self):
         self.log('debug', f'Waiting for runtime to be alive at url: {self.runtime_url}')
         self.log('debug', f'Waiting for runtime to be alive at url: {self.runtime_url}')
         runtime_info_response = self._send_request(
         runtime_info_response = self._send_request(
             'GET',
             'GET',