Эх сурвалжийг харах

(feat) runtime: in _wait_until_alive upon start wait for client to have initialized too (#3612)

* runtime: in _wait_until_alive wait initially for client to initialize

* fix typo in runtime log entry
tobitege 1 жил өмнө
parent
commit
1fddc77247

+ 29 - 1
openhands/runtime/client/runtime.py

@@ -115,7 +115,7 @@ class EventStreamRuntime(Runtime):
         self.instance_id = (
             sid + '_' + str(uuid.uuid4()) if sid is not None else str(uuid.uuid4())
         )
-        # TODO: We can switch to aiodocker when `get_od_sandbox_image` is updated to use aiodocker
+        # TODO: We can switch to aiodocker when `build_sandbox_image` is updated to use aiodocker
         self.docker_client: docker.DockerClient = self._init_docker_client()
         self.base_container_image = self.config.sandbox.base_container_image
         self.runtime_container_image = self.config.sandbox.runtime_container_image
@@ -129,6 +129,7 @@ class EventStreamRuntime(Runtime):
 
         # Buffer for container logs
         self.log_buffer: LogBuffer | None = None
+        self.startup_done = False
 
     async def ainit(self, env_vars: dict[str, str] | None = None):
         if self.config.sandbox.runtime_extra_deps:
@@ -254,12 +255,15 @@ class EventStreamRuntime(Runtime):
         wait=tenacity.wait_exponential(multiplier=2, min=10, max=60),
     )
     async def _wait_until_alive(self):
+        init_msg = 'Runtime client initialized.'
         logger.debug('Getting container logs...')
 
         # Print and clear the log buffer
         assert (
             self.log_buffer is not None
         ), 'Log buffer is expected to be initialized when container is started'
+
+        # Always process logs, regardless of startup_done status
         logs = self.log_buffer.get_and_clear()
         if logs:
             formatted_logs = '\n'.join([f'    |{log}' for log in logs])
@@ -272,6 +276,30 @@ class EventStreamRuntime(Runtime):
                 + '\n'
                 + '-' * 90
             )
+            # Check for initialization message even if startup_done is True
+            if any(init_msg in log for log in logs):
+                self.startup_done = True
+
+        if not self.startup_done:
+            attempts = 0
+            while not self.startup_done and attempts < 10:
+                attempts += 1
+                await asyncio.sleep(1)
+                logs = self.log_buffer.get_and_clear()
+                if logs:
+                    formatted_logs = '\n'.join([f'    |{log}' for log in logs])
+                    logger.info(
+                        '\n'
+                        + '-' * 30
+                        + 'Container logs:'
+                        + '-' * 30
+                        + f'\n{formatted_logs}'
+                        + '\n'
+                        + '-' * 90
+                    )
+                    if any(init_msg in log for log in logs):
+                        self.startup_done = True
+                        break
 
         async with aiohttp.ClientSession() as session:
             async with session.get(f'{self.api_url}/alive') as response:

+ 1 - 1
openhands/runtime/utils/runtime_build.py

@@ -187,7 +187,7 @@ def get_runtime_image_repo_and_tag(base_image: str) -> tuple[str, str]:
 
     if RUNTIME_IMAGE_REPO in base_image:
         logger.info(
-            f'The provided image [{base_image}] is a already a valid runtime image.\n'
+            f'The provided image [{base_image}] is already a valid runtime image.\n'
             f'Will try to reuse it as is.'
         )