浏览代码

[Agent Action] Support the ability to specify whether to keep prompt for CmdRun (#3218)

* support the ability to specify whether to keep prompt

* fix action serialization

* fix jupyter pwd with ~ as user name

* add test for keep_prompt;
add missing await close for some tests

* update integration tests for eventstream runtime

* fix integration tests for server runtime
Xingyao Wang 1 年之前
父节点
当前提交
62ce183c2d
共有 28 个文件被更改,包括 165 次插入29 次删除
  1. 8 0
      opendevin/events/action/commands.py
  2. 18 9
      opendevin/runtime/client/client.py
  3. 1 1
      tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_002.log
  4. 1 1
      tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_003.log
  5. 1 1
      tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_002.log
  6. 1 1
      tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_005.log
  7. 1 1
      tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_006.log
  8. 1 1
      tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_009.log
  9. 1 1
      tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_003.log
  10. 1 1
      tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_004.log
  11. 1 1
      tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_005.log
  12. 1 1
      tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_006.log
  13. 1 1
      tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_007.log
  14. 1 0
      tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_010.log
  15. 1 0
      tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_011.log
  16. 1 1
      tests/integration/mock/server_runtime/DelegatorAgent/test_edits/prompt_002.log
  17. 1 1
      tests/integration/mock/server_runtime/DelegatorAgent/test_edits/prompt_003.log
  18. 1 1
      tests/integration/mock/server_runtime/DelegatorAgent/test_write_simple_script/prompt_002.log
  19. 1 1
      tests/integration/mock/server_runtime/DelegatorAgent/test_write_simple_script/prompt_005.log
  20. 1 1
      tests/integration/mock/server_runtime/DelegatorAgent/test_write_simple_script/prompt_006.log
  21. 1 1
      tests/integration/mock/server_runtime/DelegatorAgent/test_write_simple_script/prompt_009.log
  22. 1 1
      tests/integration/mock/server_runtime/ManagerAgent/test_simple_task_rejection/prompt_003.log
  23. 1 1
      tests/integration/mock/server_runtime/ManagerAgent/test_simple_task_rejection/prompt_004.log
  24. 1 1
      tests/integration/mock/server_runtime/ManagerAgent/test_simple_task_rejection/prompt_005.log
  25. 1 0
      tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_010.log
  26. 1 0
      tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_011.log
  27. 1 0
      tests/unit/test_action_serialization.py
  28. 114 0
      tests/unit/test_runtime.py

+ 8 - 0
opendevin/events/action/commands.py

@@ -10,6 +10,14 @@ from .action import Action, ActionConfirmationStatus
 class CmdRunAction(Action):
     command: str
     thought: str = ''
+    keep_prompt: bool = True
+    # if True, the command prompt will be kept in the command output observation
+    # Example of command output:
+    # root@sandbox:~# ls
+    # file1.txt
+    # file2.txt
+    # root@sandbox:~# <-- this is the command prompt
+
     action: str = ActionType.RUN
     runnable: ClassVar[bool] = True
     is_confirmed: ActionConfirmationStatus = ActionConfirmationStatus.CONFIRMED

+ 18 - 9
opendevin/runtime/client/client.py

@@ -170,8 +170,7 @@ class RuntimeClient:
             matched is not None
         ), f'Failed to parse bash prompt: {ps1}. This should not happen.'
         username, hostname, working_dir = matched.groups()
-        self._prev_pwd = self.pwd
-        self.pwd = working_dir
+        self.pwd = os.path.expanduser(working_dir)
 
         # re-assemble the prompt
         prompt = f'{username}@{hostname}:{working_dir} '
@@ -192,8 +191,10 @@ class RuntimeClient:
         self.shell.expect(self.__bash_expect_regex, timeout=timeout)
 
         output = self.shell.before
+
+        bash_prompt = self._get_bash_prompt_and_update_pwd()
         if keep_prompt:
-            output += '\r\n' + self._get_bash_prompt_and_update_pwd()
+            output += '\r\n' + bash_prompt
         logger.debug(f'Command output: {output}')
 
         # Get exit code
@@ -218,7 +219,11 @@ class RuntimeClient:
             commands = split_bash_commands(action.command)
             all_output = ''
             for command in commands:
-                output, exit_code = self._execute_bash(command, timeout=action.timeout)
+                output, exit_code = self._execute_bash(
+                    command,
+                    timeout=action.timeout,
+                    keep_prompt=action.keep_prompt,
+                )
                 if all_output:
                     # previous output already exists with prompt "user@hostname:working_dir #""
                     # we need to add the command to the previous output,
@@ -240,15 +245,19 @@ class RuntimeClient:
     async def run_ipython(self, action: IPythonRunCellAction) -> Observation:
         if 'jupyter' in self.plugins:
             _jupyter_plugin: JupyterPlugin = self.plugins['jupyter']  # type: ignore
-
             # This is used to make AgentSkills in Jupyter aware of the
             # current working directory in Bash
-            if not hasattr(self, '_prev_pwd') or self.pwd != self._prev_pwd:
-                reset_jupyter_pwd_code = (
-                    f'import os; os.environ["JUPYTER_PWD"] = "{self.pwd}"\n\n'
+            if self.pwd != getattr(self, '_jupyter_pwd', None):
+                logger.debug(
+                    f"{self.pwd} != {getattr(self, '_jupyter_pwd', None)} -> reset Jupyter PWD"
                 )
+                reset_jupyter_pwd_code = f'import os; os.environ["JUPYTER_PWD"] = os.path.abspath("{self.pwd}")'
                 _aux_action = IPythonRunCellAction(code=reset_jupyter_pwd_code)
-                _ = await _jupyter_plugin.run(_aux_action)
+                _reset_obs = await _jupyter_plugin.run(_aux_action)
+                logger.debug(
+                    f'Changed working directory in IPython to: {self.pwd}. Output: {_reset_obs}'
+                )
+                self._jupyter_pwd = self.pwd
 
             obs: IPythonRunCellObservation = await _jupyter_plugin.run(action)
             return obs

+ 1 - 1
tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_002.log

@@ -41,7 +41,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "bad.txt\r\n\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}]
+[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "bad.txt\r\n\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}]
 
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:

+ 1 - 1
tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_003.log

@@ -41,7 +41,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "bad.txt\r\n\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}, {"source": "agent", "action": "read", "args": {"path": "bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", "extras": {"path": "/workspace/bad.txt"}}]
+[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "bad.txt\r\n\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}, {"source": "agent", "action": "read", "args": {"path": "bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", "extras": {"path": "/workspace/bad.txt"}}]
 
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:

+ 1 - 1
tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_002.log

@@ -41,7 +41,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}]
+[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}]
 
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:

+ 1 - 1
tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_005.log

@@ -41,7 +41,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "/workspace/hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "chmod +x hello.sh", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "chmod +x hello.sh", "exit_code": 0}}]
+[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "/workspace/hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "chmod +x hello.sh", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "chmod +x hello.sh", "exit_code": 0}}]
 
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:

+ 1 - 1
tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_006.log

@@ -41,7 +41,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "/workspace/hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "chmod +x hello.sh", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "chmod +x hello.sh", "exit_code": 0}}, {"source": "agent", "action": "run", "args": {"command": "./hello.sh", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "hello\r\n\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "./hello.sh", "exit_code": 0}}]
+[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "/workspace/hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "chmod +x hello.sh", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "chmod +x hello.sh", "exit_code": 0}}, {"source": "agent", "action": "run", "args": {"command": "./hello.sh", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "hello\r\n\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "./hello.sh", "exit_code": 0}}]
 
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:

+ 1 - 1
tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_009.log

@@ -39,7 +39,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "agent", "action": "read", "args": {"path": "hello.sh", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "#!/bin/bash\n\necho 'hello'\n", "extras": {"path": "/workspace/hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "bash hello.sh", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "hello\r\n\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "bash hello.sh", "exit_code": 0}}]
+[{"source": "agent", "action": "read", "args": {"path": "hello.sh", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "#!/bin/bash\n\necho 'hello'\n", "extras": {"path": "/workspace/hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "bash hello.sh", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "hello\r\n\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "bash hello.sh", "exit_code": 0}}]
 
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:

+ 1 - 1
tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_003.log

@@ -28,7 +28,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}]
+[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}]
 
 If the last item in the history is an error, you should try to fix it.
 

+ 1 - 1
tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_004.log

@@ -28,7 +28,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
+[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
 
 If the last item in the history is an error, you should try to fix it.
 

+ 1 - 1
tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_005.log

@@ -28,7 +28,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
+[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
 
 If the last item in the history is an error, you should try to fix it.
 

+ 1 - 1
tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_006.log

@@ -28,7 +28,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
+[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
 
 If the last item in the history is an error, you should try to fix it.
 

+ 1 - 1
tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_007.log

@@ -28,7 +28,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
+[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
 
 If the last item in the history is an error, you should try to fix it.
 

+ 1 - 0
tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_010.log

@@ -198,6 +198,7 @@ ten actions--more happened before that.
     "args": {
       "command": "bash hello.sh",
       "thought": "",
+      "keep_prompt": true,
       "is_confirmed": "confirmed"
     }
   },

+ 1 - 0
tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_011.log

@@ -197,6 +197,7 @@ ten actions--more happened before that.
     "args": {
       "command": "bash hello.sh",
       "thought": "",
+      "keep_prompt": true,
       "is_confirmed": "confirmed"
     }
   },

+ 1 - 1
tests/integration/mock/server_runtime/DelegatorAgent/test_edits/prompt_002.log

@@ -41,7 +41,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "bad.txt", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}]
+[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "bad.txt", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}]
 
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:

+ 1 - 1
tests/integration/mock/server_runtime/DelegatorAgent/test_edits/prompt_003.log

@@ -41,7 +41,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "bad.txt", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}, {"source": "agent", "action": "read", "args": {"path": "bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", "extras": {"path": "bad.txt"}}]
+[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "bad.txt", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}, {"source": "agent", "action": "read", "args": {"path": "bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", "extras": {"path": "bad.txt"}}]
 
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:

+ 1 - 1
tests/integration/mock/server_runtime/DelegatorAgent/test_write_simple_script/prompt_002.log

@@ -41,7 +41,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}]
+[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}]
 
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:

+ 1 - 1
tests/integration/mock/server_runtime/DelegatorAgent/test_write_simple_script/prompt_005.log

@@ -41,7 +41,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\n# Print hello\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "chmod +x hello.sh", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "", "extras": {"command_id": -1, "command": "chmod +x hello.sh", "exit_code": 0}}]
+[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\n# Print hello\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "chmod +x hello.sh", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "", "extras": {"command_id": -1, "command": "chmod +x hello.sh", "exit_code": 0}}]
 
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:

+ 1 - 1
tests/integration/mock/server_runtime/DelegatorAgent/test_write_simple_script/prompt_006.log

@@ -41,7 +41,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\n# Print hello\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "chmod +x hello.sh", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "", "extras": {"command_id": -1, "command": "chmod +x hello.sh", "exit_code": 0}}, {"source": "agent", "action": "run", "args": {"command": "./hello.sh", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "hello", "extras": {"command_id": -1, "command": "./hello.sh", "exit_code": 0}}]
+[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\n# Print hello\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "chmod +x hello.sh", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "", "extras": {"command_id": -1, "command": "chmod +x hello.sh", "exit_code": 0}}, {"source": "agent", "action": "run", "args": {"command": "./hello.sh", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "hello", "extras": {"command_id": -1, "command": "./hello.sh", "exit_code": 0}}]
 
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:

+ 1 - 1
tests/integration/mock/server_runtime/DelegatorAgent/test_write_simple_script/prompt_009.log

@@ -39,7 +39,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "agent", "action": "read", "args": {"path": "hello.sh", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "#!/bin/bash\n\n# Print hello\necho 'hello'\n", "extras": {"path": "hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "bash hello.sh", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "hello", "extras": {"command_id": -1, "command": "bash hello.sh", "exit_code": 0}}]
+[{"source": "agent", "action": "read", "args": {"path": "hello.sh", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "#!/bin/bash\n\n# Print hello\necho 'hello'\n", "extras": {"path": "hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "bash hello.sh", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "hello", "extras": {"command_id": -1, "command": "bash hello.sh", "exit_code": 0}}]
 
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:

+ 1 - 1
tests/integration/mock/server_runtime/ManagerAgent/test_simple_task_rejection/prompt_003.log

@@ -28,7 +28,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}]
+[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}]
 
 If the last item in the history is an error, you should try to fix it.
 

+ 1 - 1
tests/integration/mock/server_runtime/ManagerAgent/test_simple_task_rejection/prompt_004.log

@@ -28,7 +28,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
+[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
 
 If the last item in the history is an error, you should try to fix it.
 

+ 1 - 1
tests/integration/mock/server_runtime/ManagerAgent/test_simple_task_rejection/prompt_005.log

@@ -28,7 +28,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 
-[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
+[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
 
 If the last item in the history is an error, you should try to fix it.
 

+ 1 - 0
tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_010.log

@@ -198,6 +198,7 @@ ten actions--more happened before that.
     "args": {
       "command": "bash hello.sh",
       "thought": "",
+      "keep_prompt": true,
       "is_confirmed": "confirmed"
     }
   },

+ 1 - 0
tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_011.log

@@ -197,6 +197,7 @@ ten actions--more happened before that.
     "args": {
       "command": "bash hello.sh",
       "thought": "",
+      "keep_prompt": true,
       "is_confirmed": "confirmed"
     }
   },

+ 1 - 0
tests/unit/test_action_serialization.py

@@ -86,6 +86,7 @@ def test_cmd_run_action_serialization_deserialization():
         'args': {
             'command': 'echo "Hello world"',
             'thought': '',
+            'keep_prompt': True,
             'is_confirmed': ActionConfirmationStatus.CONFIRMED,
         },
     }

+ 114 - 0
tests/unit/test_runtime.py

@@ -844,6 +844,9 @@ async def test_ipython_simple(temp_dir, box_class):
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
     assert obs.content.strip() == '1'
 
+    await runtime.close()
+    await asyncio.sleep(1)
+
 
 async def _test_ipython_agentskills_fileop_pwd_impl(
     runtime: ServerRuntime | EventStreamRuntime, enable_auto_lint: bool
@@ -954,6 +957,9 @@ DO NOT re-run the same failed edit command. Running it again will lead to the sa
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
     assert obs.exit_code == 0
 
+    await runtime.close()
+    await asyncio.sleep(1)
+
 
 @pytest.mark.asyncio
 async def test_ipython_agentskills_fileop_pwd(temp_dir, box_class, enable_auto_lint):
@@ -967,6 +973,72 @@ async def test_ipython_agentskills_fileop_pwd(temp_dir, box_class, enable_auto_l
     await asyncio.sleep(1)
 
 
+@pytest.mark.asyncio
+async def test_ipython_agentskills_fileop_pwd_with_userdir(temp_dir, box_class):
+    """Make sure that cd in bash also update the current working directory in ipython.
+
+    Handle special case where the pwd is provided as "~", which should be expanded using os.path.expanduser
+    on the client side.
+    """
+
+    runtime = await _load_runtime(
+        temp_dir,
+        box_class,
+        run_as_devin=False,
+    )
+
+    action = CmdRunAction(command='cd ~')
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = await runtime.run_action(action)
+    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+    assert obs.exit_code == 0
+
+    action = CmdRunAction(command='mkdir test && ls -la')
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = await runtime.run_action(action)
+    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+    assert isinstance(obs, CmdOutputObservation)
+    assert obs.exit_code == 0
+
+    action = IPythonRunCellAction(code="create_file('hello.py')")
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = await runtime.run_action(action)
+    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+    assert isinstance(obs, IPythonRunCellObservation)
+    assert obs.content.replace('\r\n', '\n').strip().split('\n') == (
+        '[File: /root/hello.py (1 lines total)]\n'
+        '(this is the beginning of the file)\n'
+        '1|\n'
+        '(this is the end of the file)\n'
+        '[File hello.py created.]\n'
+    ).strip().split('\n')
+
+    action = CmdRunAction(command='cd test')
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = await runtime.run_action(action)
+    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+    assert isinstance(obs, CmdOutputObservation)
+    assert obs.exit_code == 0
+
+    # This should create a file in the current working directory
+    # i.e., /workspace/test/hello.py instead of /workspace/hello.py
+    action = IPythonRunCellAction(code="create_file('hello.py')")
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = await runtime.run_action(action)
+    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+    assert isinstance(obs, IPythonRunCellObservation)
+    assert obs.content.replace('\r\n', '\n').strip().split('\n') == (
+        '[File: /root/test/hello.py (1 lines total)]\n'
+        '(this is the beginning of the file)\n'
+        '1|\n'
+        '(this is the end of the file)\n'
+        '[File hello.py created.]\n'
+    ).strip().split('\n')
+
+    await runtime.close()
+    await asyncio.sleep(1)
+
+
 @pytest.mark.skipif(
     TEST_RUNTIME.lower() == 'eventstream',
     reason='Skip this if we want to test EventStreamRuntime',
@@ -1082,6 +1154,9 @@ async def test_copy_single_file(temp_dir, box_class):
     assert obs.exit_code == 0
     assert 'Hello, World!' in obs.content
 
+    await runtime.close()
+    await asyncio.sleep(1)
+
 
 def _create_test_dir_with_files(host_temp_dir):
     os.mkdir(os.path.join(host_temp_dir, 'test_dir'))
@@ -1129,6 +1204,9 @@ async def test_copy_directory_recursively(temp_dir, box_class):
     assert obs.exit_code == 0
     assert 'File 1 content' in obs.content
 
+    await runtime.close()
+    await asyncio.sleep(1)
+
 
 @pytest.mark.asyncio
 async def test_copy_to_non_existent_directory(temp_dir, box_class):
@@ -1148,6 +1226,9 @@ async def test_copy_to_non_existent_directory(temp_dir, box_class):
     assert obs.exit_code == 0
     assert 'Hello, World!' in obs.content
 
+    await runtime.close()
+    await asyncio.sleep(1)
+
 
 @pytest.mark.asyncio
 async def test_overwrite_existing_file(temp_dir, box_class):
@@ -1183,6 +1264,9 @@ async def test_overwrite_existing_file(temp_dir, box_class):
     assert obs.exit_code == 0
     assert 'Hello, World!' in obs.content
 
+    await runtime.close()
+    await asyncio.sleep(1)
+
 
 @pytest.mark.asyncio
 async def test_copy_non_existent_file(temp_dir, box_class):
@@ -1200,3 +1284,33 @@ async def test_copy_non_existent_file(temp_dir, box_class):
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
     assert isinstance(obs, CmdOutputObservation)
     assert obs.exit_code != 0  # File should not exist
+
+    await runtime.close()
+    await asyncio.sleep(1)
+
+
+@pytest.mark.asyncio
+async def test_keep_prompt(temp_dir):
+    # only EventStreamRuntime supports keep_prompt
+    runtime = await _load_runtime(
+        temp_dir, box_class=EventStreamRuntime, run_as_devin=False
+    )
+
+    action = CmdRunAction(command='touch /workspace/test_file.txt')
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = await runtime.run_action(action)
+    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+    assert isinstance(obs, CmdOutputObservation)
+    assert obs.exit_code == 0
+    assert 'root@' in obs.content
+
+    action = CmdRunAction(command='cat /workspace/test_file.txt', keep_prompt=False)
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = await runtime.run_action(action)
+    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+    assert isinstance(obs, CmdOutputObservation)
+    assert obs.exit_code == 0
+    assert 'root@' not in obs.content
+
+    await runtime.close()
+    await asyncio.sleep(1)