Просмотр исходного кода

(test) Fix regressions in tests (#3579)

* fix conftest.py option (#3573)

* try to fix fixture base_container_image in runtime conftest

* fix integration test mock files due to #3548

* fix test_ipython.py integration test
tobitege 1 год назад
Родитель
Сommit
f1882ba886
23 измененных файлов с 46 добавлено и 41 удалено
  1. 1 1
      tests/integration/mock/eventstream_runtime/CodeActAgent/test_ipython/prompt_002.log
  2. 1 1
      tests/integration/mock/eventstream_runtime/CodeActAgent/test_ipython_module/prompt_002.log
  3. 2 2
      tests/integration/mock/eventstream_runtime/CodeActAgent/test_ipython_module/prompt_003.log
  4. 3 3
      tests/integration/mock/eventstream_runtime/CodeActAgent/test_ipython_module/prompt_004.log
  5. 1 1
      tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_ipython/prompt_002.log
  6. 1 1
      tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_ipython_module/prompt_002.log
  7. 2 2
      tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_ipython_module/prompt_003.log
  8. 3 3
      tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_ipython_module/prompt_004.log
  9. 1 1
      tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_002.log
  10. 1 1
      tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_003.log
  11. 1 1
      tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_002.log
  12. 1 1
      tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_005.log
  13. 1 1
      tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_006.log
  14. 1 1
      tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_009.log
  15. 1 1
      tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_003.log
  16. 1 1
      tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_004.log
  17. 1 1
      tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_005.log
  18. 1 1
      tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_006.log
  19. 1 1
      tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_007.log
  20. 1 1
      tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_010.log
  21. 1 1
      tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_011.log
  22. 8 3
      tests/runtime/conftest.py
  23. 11 11
      tests/runtime/test_ipython.py

+ 1 - 1
tests/integration/mock/eventstream_runtime/CodeActAgent/test_ipython/prompt_002.log

@@ -411,6 +411,6 @@ with open('/workspace/test.txt', 'w') as file:
 OBSERVATION:
 OBSERVATION:
 [Code executed successfully with no output]
 [Code executed successfully with no output]
 [Jupyter current working directory: /workspace]
 [Jupyter current working directory: /workspace]
-[Jupyter Python interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]
+[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]
 
 
 ENVIRONMENT REMINDER: You have 13 turns left to complete the task. When finished reply with <finish></finish>.
 ENVIRONMENT REMINDER: You have 13 turns left to complete the task. When finished reply with <finish></finish>.

+ 1 - 1
tests/integration/mock/eventstream_runtime/CodeActAgent/test_ipython_module/prompt_002.log

@@ -425,6 +425,6 @@ Successfully installed pymsgbox-1.0.9
 [notice] To update, run: pip install --upgrade pip
 [notice] To update, run: pip install --upgrade pip
 Note: you may need to restart the kernel to use updated packages.
 Note: you may need to restart the kernel to use updated packages.
 [Jupyter current working directory: /workspace]
 [Jupyter current working directory: /workspace]
-[Jupyter Python interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]
+[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]
 
 
 ENVIRONMENT REMINDER: You have 13 turns left to complete the task. When finished reply with <finish></finish>.
 ENVIRONMENT REMINDER: You have 13 turns left to complete the task. When finished reply with <finish></finish>.

+ 2 - 2
tests/integration/mock/eventstream_runtime/CodeActAgent/test_ipython_module/prompt_003.log

@@ -425,7 +425,7 @@ Successfully installed pymsgbox-1.0.9
 [notice] To update, run: pip install --upgrade pip
 [notice] To update, run: pip install --upgrade pip
 Note: you may need to restart the kernel to use updated packages.
 Note: you may need to restart the kernel to use updated packages.
 [Jupyter current working directory: /workspace]
 [Jupyter current working directory: /workspace]
-[Jupyter Python interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]
+[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]
 
 
 ----------
 ----------
 
 
@@ -444,6 +444,6 @@ with open('/workspace/test.txt', 'w') as file:
 OBSERVATION:
 OBSERVATION:
 [Code executed successfully with no output]
 [Code executed successfully with no output]
 [Jupyter current working directory: /workspace]
 [Jupyter current working directory: /workspace]
-[Jupyter Python interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]
+[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]
 
 
 ENVIRONMENT REMINDER: You have 12 turns left to complete the task. When finished reply with <finish></finish>.
 ENVIRONMENT REMINDER: You have 12 turns left to complete the task. When finished reply with <finish></finish>.

+ 3 - 3
tests/integration/mock/eventstream_runtime/CodeActAgent/test_ipython_module/prompt_004.log

@@ -425,7 +425,7 @@ Successfully installed pymsgbox-1.0.9
 [notice] To update, run: pip install --upgrade pip
 [notice] To update, run: pip install --upgrade pip
 Note: you may need to restart the kernel to use updated packages.
 Note: you may need to restart the kernel to use updated packages.
 [Jupyter current working directory: /workspace]
 [Jupyter current working directory: /workspace]
-[Jupyter Python interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]
+[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]
 
 
 ----------
 ----------
 
 
@@ -444,7 +444,7 @@ with open('/workspace/test.txt', 'w') as file:
 OBSERVATION:
 OBSERVATION:
 [Code executed successfully with no output]
 [Code executed successfully with no output]
 [Jupyter current working directory: /workspace]
 [Jupyter current working directory: /workspace]
-[Jupyter Python interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]
+[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]
 
 
 ----------
 ----------
 
 
@@ -458,7 +458,7 @@ cat /workspace/test.txt
 OBSERVATION:
 OBSERVATION:
 pymsgbox version: 1.0.9
 pymsgbox version: 1.0.9
 
 
-[Python Interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]
+[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]
 openhands@docker-desktop:/workspace $
 openhands@docker-desktop:/workspace $
 [Command -1 finished with exit code 0]
 [Command -1 finished with exit code 0]
 
 

+ 1 - 1
tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_ipython/prompt_002.log

@@ -604,6 +604,6 @@ with open('/workspace/test.txt', 'w') as file:
 OBSERVATION:
 OBSERVATION:
 [Code executed successfully with no output]
 [Code executed successfully with no output]
 [Jupyter current working directory: /workspace]
 [Jupyter current working directory: /workspace]
-[Jupyter Python interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]
+[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]
 
 
 ENVIRONMENT REMINDER: You have 13 turns left to complete the task. When finished reply with <finish></finish>.
 ENVIRONMENT REMINDER: You have 13 turns left to complete the task. When finished reply with <finish></finish>.

+ 1 - 1
tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_ipython_module/prompt_002.log

@@ -618,6 +618,6 @@ Successfully installed pymsgbox-1.0.9
 [notice] To update, run: pip install --upgrade pip
 [notice] To update, run: pip install --upgrade pip
 Note: you may need to restart the kernel to use updated packages.
 Note: you may need to restart the kernel to use updated packages.
 [Jupyter current working directory: /workspace]
 [Jupyter current working directory: /workspace]
-[Jupyter Python interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]
+[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]
 
 
 ENVIRONMENT REMINDER: You have 13 turns left to complete the task. When finished reply with <finish></finish>.
 ENVIRONMENT REMINDER: You have 13 turns left to complete the task. When finished reply with <finish></finish>.

+ 2 - 2
tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_ipython_module/prompt_003.log

@@ -618,7 +618,7 @@ Successfully installed pymsgbox-1.0.9
 [notice] To update, run: pip install --upgrade pip
 [notice] To update, run: pip install --upgrade pip
 Note: you may need to restart the kernel to use updated packages.
 Note: you may need to restart the kernel to use updated packages.
 [Jupyter current working directory: /workspace]
 [Jupyter current working directory: /workspace]
-[Jupyter Python interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]
+[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]
 
 
 ----------
 ----------
 
 
@@ -639,6 +639,6 @@ version_info
 OBSERVATION:
 OBSERVATION:
 '1.0.9'
 '1.0.9'
 [Jupyter current working directory: /workspace]
 [Jupyter current working directory: /workspace]
-[Jupyter Python interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]
+[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]
 
 
 ENVIRONMENT REMINDER: You have 12 turns left to complete the task. When finished reply with <finish></finish>.
 ENVIRONMENT REMINDER: You have 12 turns left to complete the task. When finished reply with <finish></finish>.

+ 3 - 3
tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_ipython_module/prompt_004.log

@@ -618,7 +618,7 @@ Successfully installed pymsgbox-1.0.9
 [notice] To update, run: pip install --upgrade pip
 [notice] To update, run: pip install --upgrade pip
 Note: you may need to restart the kernel to use updated packages.
 Note: you may need to restart the kernel to use updated packages.
 [Jupyter current working directory: /workspace]
 [Jupyter current working directory: /workspace]
-[Jupyter Python interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]
+[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]
 
 
 ----------
 ----------
 
 
@@ -639,7 +639,7 @@ version_info
 OBSERVATION:
 OBSERVATION:
 '1.0.9'
 '1.0.9'
 [Jupyter current working directory: /workspace]
 [Jupyter current working directory: /workspace]
-[Jupyter Python interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]
+[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]
 
 
 ----------
 ----------
 
 
@@ -653,7 +653,7 @@ cat /workspace/test.txt
 OBSERVATION:
 OBSERVATION:
 pymsgbox version: 1.0.9
 pymsgbox version: 1.0.9
 
 
-[Python Interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]
+[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]
 openhands@docker-desktop:/workspace $
 openhands@docker-desktop:/workspace $
 [Command -1 finished with exit code 0]
 [Command -1 finished with exit code 0]
 
 

+ 1 - 1
tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_002.log

@@ -41,7 +41,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 They are time-ordered, with your most recent action at the bottom.
 
 
-[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "bad.txt\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}]
+[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "bad.txt\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}]
 
 
 ## Format
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:

+ 1 - 1
tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_003.log

@@ -41,7 +41,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 They are time-ordered, with your most recent action at the bottom.
 
 
-[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "bad.txt\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}, {"source": "agent", "action": "read", "args": {"path": "bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", "extras": {"path": "/workspace/bad.txt"}}]
+[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "bad.txt\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}, {"source": "agent", "action": "read", "args": {"path": "bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", "extras": {"path": "/workspace/bad.txt"}}]
 
 
 ## Format
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:

+ 1 - 1
tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_002.log

@@ -41,7 +41,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 They are time-ordered, with your most recent action at the bottom.
 
 
-[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "\r\n[Python Interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}]
+[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}]
 
 
 ## Format
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:

+ 1 - 1
tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_005.log

@@ -41,7 +41,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 They are time-ordered, with your most recent action at the bottom.
 
 
-[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "/workspace/hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "chmod +x hello.sh", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "\r\n[Python Interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "chmod +x hello.sh", "exit_code": 0}}]
+[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "/workspace/hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "chmod +x hello.sh", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "chmod +x hello.sh", "exit_code": 0}}]
 
 
 ## Format
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:

+ 1 - 1
tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_006.log

@@ -41,7 +41,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 They are time-ordered, with your most recent action at the bottom.
 
 
-[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "/workspace/hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "chmod +x hello.sh", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "\r\n[Python Interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "chmod +x hello.sh", "exit_code": 0}}, {"source": "agent", "action": "run", "args": {"command": "./hello.sh", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "hello\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "./hello.sh", "exit_code": 0}}]
+[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "/workspace/hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "chmod +x hello.sh", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "chmod +x hello.sh", "exit_code": 0}}, {"source": "agent", "action": "run", "args": {"command": "./hello.sh", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "hello\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "./hello.sh", "exit_code": 0}}]
 
 
 ## Format
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:

+ 1 - 1
tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_009.log

@@ -39,7 +39,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 They are time-ordered, with your most recent action at the bottom.
 
 
-[{"source": "agent", "action": "read", "args": {"path": "hello.sh", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "#!/bin/bash\n\necho 'hello'\n", "extras": {"path": "/workspace/hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "bash hello.sh", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "hello\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "bash hello.sh", "exit_code": 0}}]
+[{"source": "agent", "action": "read", "args": {"path": "hello.sh", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "#!/bin/bash\n\necho 'hello'\n", "extras": {"path": "/workspace/hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "bash hello.sh", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "hello\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "bash hello.sh", "exit_code": 0}}]
 
 
 ## Format
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:

+ 1 - 1
tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_003.log

@@ -28,7 +28,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 They are time-ordered, with your most recent action at the bottom.
 
 
-[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}]
+[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}]
 
 
 If the last item in the history is an error, you should try to fix it.
 If the last item in the history is an error, you should try to fix it.
 
 

+ 1 - 1
tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_004.log

@@ -28,7 +28,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 They are time-ordered, with your most recent action at the bottom.
 
 
-[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
+[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
 
 
 If the last item in the history is an error, you should try to fix it.
 If the last item in the history is an error, you should try to fix it.
 
 

+ 1 - 1
tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_005.log

@@ -28,7 +28,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 They are time-ordered, with your most recent action at the bottom.
 
 
-[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
+[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
 
 
 If the last item in the history is an error, you should try to fix it.
 If the last item in the history is an error, you should try to fix it.
 
 

+ 1 - 1
tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_006.log

@@ -28,7 +28,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 They are time-ordered, with your most recent action at the bottom.
 
 
-[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
+[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
 
 
 If the last item in the history is an error, you should try to fix it.
 If the last item in the history is an error, you should try to fix it.
 
 

+ 1 - 1
tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_007.log

@@ -28,7 +28,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 They are time-ordered, with your most recent action at the bottom.
 
 
-[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
+[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
 
 
 If the last item in the history is an error, you should try to fix it.
 If the last item in the history is an error, you should try to fix it.
 
 

+ 1 - 1
tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_010.log

@@ -205,7 +205,7 @@ ten actions--more happened before that.
   {
   {
     "source": "agent",
     "source": "agent",
     "observation": "run",
     "observation": "run",
-    "content": "hello\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ",
+    "content": "hello\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ",
     "extras": {
     "extras": {
       "command_id": -1,
       "command_id": -1,
       "command": "bash hello.sh",
       "command": "bash hello.sh",

+ 1 - 1
tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_011.log

@@ -204,7 +204,7 @@ ten actions--more happened before that.
   {
   {
     "source": "agent",
     "source": "agent",
     "observation": "run",
     "observation": "run",
-    "content": "hello\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ",
+    "content": "hello\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ",
     "extras": {
     "extras": {
       "command_id": -1,
       "command_id": -1,
       "command": "bash hello.sh",
       "command": "bash hello.sh",

+ 8 - 3
tests/runtime/conftest.py

@@ -63,14 +63,19 @@ def enable_auto_lint(request):
 @pytest.fixture(scope='module', params=None)
 @pytest.fixture(scope='module', params=None)
 def base_container_image(request):
 def base_container_image(request):
     time.sleep(1)
     time.sleep(1)
-    env_image = os.environ.get('BASE_CONTAINER_IMAGE')
+    env_image = os.environ.get('SANDBOX_BASE_CONTAINER_IMAGE')
     if env_image:
     if env_image:
         request.param = env_image
         request.param = env_image
     else:
     else:
         if not hasattr(request, 'param'):  # prevent runtime AttributeError
         if not hasattr(request, 'param'):  # prevent runtime AttributeError
             request.param = None
             request.param = None
-        if request.param is None:
-            request.param = request.config.getoption('--container-image')
+        if request.param is None and hasattr(request.config, 'sandbox'):
+            try:
+                request.param = request.config.sandbox.getoption(
+                    '--base_container_image'
+                )
+            except ValueError:
+                request.param = None
         if request.param is None:
         if request.param is None:
             request.param = pytest.param(
             request.param = pytest.param(
                 'nikolaik/python-nodejs:python3.11-nodejs22',
                 'nikolaik/python-nodejs:python3.11-nodejs22',

+ 11 - 11
tests/runtime/test_ipython.py

@@ -51,7 +51,7 @@ async def test_simple_cmd_ipython_and_fileop(temp_dir, box_class, run_as_openhan
     assert obs.content.strip() == (
     assert obs.content.strip() == (
         'Hello, `World`!\n'
         'Hello, `World`!\n'
         '[Jupyter current working directory: /workspace]\n'
         '[Jupyter current working directory: /workspace]\n'
-        '[Jupyter Python interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]'
+        '[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]'
     )
     )
 
 
     # Test read file (file should not exist)
     # Test read file (file should not exist)
@@ -126,7 +126,7 @@ async def test_ipython_multi_user(temp_dir, box_class, run_as_openhands):
         == (
         == (
             '/workspace\n'
             '/workspace\n'
             '[Jupyter current working directory: /workspace]\n'
             '[Jupyter current working directory: /workspace]\n'
-            '[Jupyter Python interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]'
+            '[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]'
         ).strip()
         ).strip()
     )
     )
 
 
@@ -142,7 +142,7 @@ async def test_ipython_multi_user(temp_dir, box_class, run_as_openhands):
         == (
         == (
             '[Code executed successfully with no output]\n'
             '[Code executed successfully with no output]\n'
             '[Jupyter current working directory: /workspace]\n'
             '[Jupyter current working directory: /workspace]\n'
-            '[Jupyter Python interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]'
+            '[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]'
         ).strip()
         ).strip()
     )
     )
 
 
@@ -188,7 +188,7 @@ async def test_ipython_simple(temp_dir, box_class):
         == (
         == (
             '1\n'
             '1\n'
             '[Jupyter current working directory: /workspace]\n'
             '[Jupyter current working directory: /workspace]\n'
-            '[Jupyter Python interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]'
+            '[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]'
         ).strip()
         ).strip()
     )
     )
 
 
@@ -225,7 +225,7 @@ async def _test_ipython_agentskills_fileop_pwd_impl(
         '(this is the end of the file)\n'
         '(this is the end of the file)\n'
         '[File hello.py created.]\n'
         '[File hello.py created.]\n'
         '[Jupyter current working directory: /workspace]\n'
         '[Jupyter current working directory: /workspace]\n'
-        '[Jupyter Python interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]'
+        '[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]'
     ).strip().split('\n')
     ).strip().split('\n')
 
 
     action = CmdRunAction(command='cd test')
     action = CmdRunAction(command='cd test')
@@ -249,7 +249,7 @@ async def _test_ipython_agentskills_fileop_pwd_impl(
         '(this is the end of the file)\n'
         '(this is the end of the file)\n'
         '[File hello.py created.]\n'
         '[File hello.py created.]\n'
         '[Jupyter current working directory: /workspace/test]\n'
         '[Jupyter current working directory: /workspace/test]\n'
-        '[Jupyter Python interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]'
+        '[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]'
     ).strip().split('\n')
     ).strip().split('\n')
 
 
     if enable_auto_lint:
     if enable_auto_lint:
@@ -283,7 +283,7 @@ Your changes have NOT been applied. Please fix your edit command and try again.
 You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.
 You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.
 DO NOT re-run the same failed edit command. Running it again will lead to the same error.
 DO NOT re-run the same failed edit command. Running it again will lead to the same error.
 [Jupyter current working directory: /workspace/test]
 [Jupyter current working directory: /workspace/test]
-[Jupyter Python interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]
+[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]
 """
 """
         ).strip().split('\n')
         ).strip().split('\n')
 
 
@@ -303,7 +303,7 @@ DO NOT re-run the same failed edit command. Running it again will lead to the sa
 (this is the end of the file)
 (this is the end of the file)
 [File updated (edited at line 1). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
 [File updated (edited at line 1). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
 [Jupyter current working directory: /workspace/test]
 [Jupyter current working directory: /workspace/test]
-[Jupyter Python interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]
+[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]
 """
 """
     ).strip().split('\n')
     ).strip().split('\n')
 
 
@@ -371,7 +371,7 @@ async def test_ipython_agentskills_fileop_pwd_with_userdir(temp_dir, box_class):
         '(this is the end of the file)\n'
         '(this is the end of the file)\n'
         '[File hello.py created.]\n'
         '[File hello.py created.]\n'
         '[Jupyter current working directory: /root]\n'
         '[Jupyter current working directory: /root]\n'
-        '[Jupyter Python interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]'
+        '[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]'
     ).strip().split('\n')
     ).strip().split('\n')
 
 
     action = CmdRunAction(command='cd test')
     action = CmdRunAction(command='cd test')
@@ -395,7 +395,7 @@ async def test_ipython_agentskills_fileop_pwd_with_userdir(temp_dir, box_class):
         '(this is the end of the file)\n'
         '(this is the end of the file)\n'
         '[File hello.py created.]\n'
         '[File hello.py created.]\n'
         '[Jupyter current working directory: /root/test]\n'
         '[Jupyter current working directory: /root/test]\n'
-        '[Jupyter Python interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]'
+        '[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]'
     ).strip().split('\n')
     ).strip().split('\n')
 
 
     await runtime.close()
     await runtime.close()
@@ -432,7 +432,7 @@ async def test_ipython_package_install(temp_dir, box_class, run_as_openhands):
     assert obs.content.strip() == (
     assert obs.content.strip() == (
         '[Code executed successfully with no output]\n'
         '[Code executed successfully with no output]\n'
         '[Jupyter current working directory: /workspace]\n'
         '[Jupyter current working directory: /workspace]\n'
-        '[Jupyter Python interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]'
+        '[Jupyter Python interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]'
     )
     )
 
 
     await runtime.close()
     await runtime.close()