Просмотр исходного кода

(eval) EOF fixes for SWE-Bench evaluation (#3623)

* add error handling for client eof

* remove root check

* remove set -e

* echo USER to fix for swebench infer

* fix entry timeout

* add timeout;
fix runtime close
Xingyao Wang 1 год назад
Родитель
Сommit
98081b9b1b

+ 9 - 0
evaluation/swe_bench/run_infer.py

@@ -141,6 +141,12 @@ async def initialize_runtime(
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
     assert obs.exit_code == 0
 
+    action = CmdRunAction(command="""export USER=$(whoami); echo USER=${USER} """)
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = await runtime.run_action(action)
+    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+    assert obs.exit_code == 0
+
     if USE_INSTANCE_IMAGE:
         # inject the init script
         script_dir = os.path.dirname(__file__)
@@ -192,6 +198,7 @@ async def initialize_runtime(
         assert obs.exit_code == 0
     else:
         action = CmdRunAction(command='source /swe_util/swe_entry.sh')
+        action.timeout = 1800
         logger.info(action, extra={'msg_type': 'ACTION'})
         obs = await runtime.run_action(action)
         logger.info(obs, extra={'msg_type': 'OBSERVATION'})
@@ -323,6 +330,8 @@ async def process_instance(
     logger.info(
         f'Got git diff for instance {instance.instance_id}:\n--------\n{git_patch}\n--------'
     )
+
+    await runtime.close()
     # ==========================================
 
     # ======= Attempt to evaluate the agent's edits =======

+ 0 - 8
evaluation/swe_bench/scripts/setup/instance_swe_entry.sh

@@ -1,13 +1,5 @@
 #!/bin/bash
 
-# set -e
-
-# assert user name is `root`
-if [ "$USER" != "root" ]; then
-    echo "Error: This script is intended to be run by the 'root' user only." >&2
-    exit 1
-fi
-
 source ~/.bashrc
 
 SWEUTIL_DIR=/swe_util

+ 3 - 0
openhands/runtime/client/client.py

@@ -209,6 +209,9 @@ class RuntimeClient:
 
     def _get_bash_prompt_and_update_pwd(self):
         ps1 = self.shell.after
+        if ps1 == pexpect.EOF:
+            logger.error(f'Bash shell EOF! {self.shell.after=}, {self.shell.before=}')
+            raise RuntimeError('Bash shell EOF')
 
         # begin at the last occurrence of '[PEXPECT_BEGIN]'.
         # In multi-line bash commands, the prompt will be repeated