Просмотр исходного кода

[Agent Action] Support the ability to specify whether to keep prompt for CmdRun (#3218)

* support the ability to specify whether to keep prompt

* fix action serialization

* fix jupyter pwd with ~ as user name

* add test for keep_prompt;
add missing await close for some tests

* update integration tests for eventstream runtime

* fix integration tests for server runtime
Xingyao Wang 1 год назад
Родитель
Сommit
62ce183c2d
28 измененных файлов с 165 добавлено и 29 удалено
  1. 8 0
      opendevin/events/action/commands.py
  2. 18 9
      opendevin/runtime/client/client.py
  3. 1 1
      tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_002.log
  4. 1 1
      tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_003.log
  5. 1 1
      tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_002.log
  6. 1 1
      tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_005.log
  7. 1 1
      tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_006.log
  8. 1 1
      tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_009.log
  9. 1 1
      tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_003.log
  10. 1 1
      tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_004.log
  11. 1 1
      tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_005.log
  12. 1 1
      tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_006.log
  13. 1 1
      tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_007.log
  14. 1 0
      tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_010.log
  15. 1 0
      tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_011.log
  16. 1 1
      tests/integration/mock/server_runtime/DelegatorAgent/test_edits/prompt_002.log
  17. 1 1
      tests/integration/mock/server_runtime/DelegatorAgent/test_edits/prompt_003.log
  18. 1 1
      tests/integration/mock/server_runtime/DelegatorAgent/test_write_simple_script/prompt_002.log
  19. 1 1
      tests/integration/mock/server_runtime/DelegatorAgent/test_write_simple_script/prompt_005.log
  20. 1 1
      tests/integration/mock/server_runtime/DelegatorAgent/test_write_simple_script/prompt_006.log
  21. 1 1
      tests/integration/mock/server_runtime/DelegatorAgent/test_write_simple_script/prompt_009.log
  22. 1 1
      tests/integration/mock/server_runtime/ManagerAgent/test_simple_task_rejection/prompt_003.log
  23. 1 1
      tests/integration/mock/server_runtime/ManagerAgent/test_simple_task_rejection/prompt_004.log
  24. 1 1
      tests/integration/mock/server_runtime/ManagerAgent/test_simple_task_rejection/prompt_005.log
  25. 1 0
      tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_010.log
  26. 1 0
      tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_011.log
  27. 1 0
      tests/unit/test_action_serialization.py
  28. 114 0
      tests/unit/test_runtime.py

+ 8 - 0
opendevin/events/action/commands.py

@@ -10,6 +10,14 @@ from .action import Action, ActionConfirmationStatus
 class CmdRunAction(Action):
 class CmdRunAction(Action):
     command: str
     command: str
     thought: str = ''
     thought: str = ''
+    keep_prompt: bool = True
+    # if True, the command prompt will be kept in the command output observation
+    # Example of command output:
+    # root@sandbox:~# ls
+    # file1.txt
+    # file2.txt
+    # root@sandbox:~# <-- this is the command prompt
+
     action: str = ActionType.RUN
     action: str = ActionType.RUN
     runnable: ClassVar[bool] = True
     runnable: ClassVar[bool] = True
     is_confirmed: ActionConfirmationStatus = ActionConfirmationStatus.CONFIRMED
     is_confirmed: ActionConfirmationStatus = ActionConfirmationStatus.CONFIRMED

+ 18 - 9
opendevin/runtime/client/client.py

@@ -170,8 +170,7 @@ class RuntimeClient:
             matched is not None
             matched is not None
         ), f'Failed to parse bash prompt: {ps1}. This should not happen.'
         ), f'Failed to parse bash prompt: {ps1}. This should not happen.'
         username, hostname, working_dir = matched.groups()
         username, hostname, working_dir = matched.groups()
-        self._prev_pwd = self.pwd
-        self.pwd = working_dir
+        self.pwd = os.path.expanduser(working_dir)
 
 
         # re-assemble the prompt
         # re-assemble the prompt
         prompt = f'{username}@{hostname}:{working_dir} '
         prompt = f'{username}@{hostname}:{working_dir} '
@@ -192,8 +191,10 @@ class RuntimeClient:
         self.shell.expect(self.__bash_expect_regex, timeout=timeout)
         self.shell.expect(self.__bash_expect_regex, timeout=timeout)
 
 
         output = self.shell.before
         output = self.shell.before
+
+        bash_prompt = self._get_bash_prompt_and_update_pwd()
         if keep_prompt:
         if keep_prompt:
-            output += '\r\n' + self._get_bash_prompt_and_update_pwd()
+            output += '\r\n' + bash_prompt
         logger.debug(f'Command output: {output}')
         logger.debug(f'Command output: {output}')
 
 
         # Get exit code
         # Get exit code
@@ -218,7 +219,11 @@ class RuntimeClient:
             commands = split_bash_commands(action.command)
             commands = split_bash_commands(action.command)
             all_output = ''
             all_output = ''
             for command in commands:
             for command in commands:
-                output, exit_code = self._execute_bash(command, timeout=action.timeout)
+                output, exit_code = self._execute_bash(
+                    command,
+                    timeout=action.timeout,
+                    keep_prompt=action.keep_prompt,
+                )
                 if all_output:
                 if all_output:
                     # previous output already exists with prompt "user@hostname:working_dir #""
                     # previous output already exists with prompt "user@hostname:working_dir #""
                     # we need to add the command to the previous output,
                     # we need to add the command to the previous output,
@@ -240,15 +245,19 @@ class RuntimeClient:
     async def run_ipython(self, action: IPythonRunCellAction) -> Observation:
     async def run_ipython(self, action: IPythonRunCellAction) -> Observation:
         if 'jupyter' in self.plugins:
         if 'jupyter' in self.plugins:
             _jupyter_plugin: JupyterPlugin = self.plugins['jupyter']  # type: ignore
             _jupyter_plugin: JupyterPlugin = self.plugins['jupyter']  # type: ignore
-
             # This is used to make AgentSkills in Jupyter aware of the
             # This is used to make AgentSkills in Jupyter aware of the
             # current working directory in Bash
             # current working directory in Bash
-            if not hasattr(self, '_prev_pwd') or self.pwd != self._prev_pwd:
-                reset_jupyter_pwd_code = (
-                    f'import os; os.environ["JUPYTER_PWD"] = "{self.pwd}"\n\n'
+            if self.pwd != getattr(self, '_jupyter_pwd', None):
+                logger.debug(
+                    f"{self.pwd} != {getattr(self, '_jupyter_pwd', None)} -> reset Jupyter PWD"
                 )
                 )
+                reset_jupyter_pwd_code = f'import os; os.environ["JUPYTER_PWD"] = os.path.abspath("{self.pwd}")'
                 _aux_action = IPythonRunCellAction(code=reset_jupyter_pwd_code)
                 _aux_action = IPythonRunCellAction(code=reset_jupyter_pwd_code)
-                _ = await _jupyter_plugin.run(_aux_action)
+                _reset_obs = await _jupyter_plugin.run(_aux_action)
+                logger.debug(
+                    f'Changed working directory in IPython to: {self.pwd}. Output: {_reset_obs}'
+                )
+                self._jupyter_pwd = self.pwd
 
 
             obs: IPythonRunCellObservation = await _jupyter_plugin.run(action)
             obs: IPythonRunCellObservation = await _jupyter_plugin.run(action)
             return obs
             return obs

+ 1 - 1
tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_002.log

@@ -41,7 +41,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 They are time-ordered, with your most recent action at the bottom.
 
 
-[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "bad.txt\r\n\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}]
+[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "bad.txt\r\n\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}]
 
 
 ## Format
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:

+ 1 - 1
tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_003.log

@@ -41,7 +41,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 They are time-ordered, with your most recent action at the bottom.
 
 
-[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "bad.txt\r\n\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}, {"source": "agent", "action": "read", "args": {"path": "bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", "extras": {"path": "/workspace/bad.txt"}}]
+[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "bad.txt\r\n\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}, {"source": "agent", "action": "read", "args": {"path": "bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", "extras": {"path": "/workspace/bad.txt"}}]
 
 
 ## Format
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:

+ 1 - 1
tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_002.log

@@ -41,7 +41,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 They are time-ordered, with your most recent action at the bottom.
 
 
-[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}]
+[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}]
 
 
 ## Format
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:

+ 1 - 1
tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_005.log

@@ -41,7 +41,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 They are time-ordered, with your most recent action at the bottom.
 
 
-[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "/workspace/hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "chmod +x hello.sh", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "chmod +x hello.sh", "exit_code": 0}}]
+[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "/workspace/hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "chmod +x hello.sh", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "chmod +x hello.sh", "exit_code": 0}}]
 
 
 ## Format
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:

+ 1 - 1
tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_006.log

@@ -41,7 +41,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 They are time-ordered, with your most recent action at the bottom.
 
 
-[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "/workspace/hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "chmod +x hello.sh", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "chmod +x hello.sh", "exit_code": 0}}, {"source": "agent", "action": "run", "args": {"command": "./hello.sh", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "hello\r\n\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "./hello.sh", "exit_code": 0}}]
+[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "/workspace/hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "chmod +x hello.sh", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "chmod +x hello.sh", "exit_code": 0}}, {"source": "agent", "action": "run", "args": {"command": "./hello.sh", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "hello\r\n\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "./hello.sh", "exit_code": 0}}]
 
 
 ## Format
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:

+ 1 - 1
tests/integration/mock/eventstream_runtime/DelegatorAgent/test_write_simple_script/prompt_009.log

@@ -39,7 +39,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 They are time-ordered, with your most recent action at the bottom.
 
 
-[{"source": "agent", "action": "read", "args": {"path": "hello.sh", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "#!/bin/bash\n\necho 'hello'\n", "extras": {"path": "/workspace/hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "bash hello.sh", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "hello\r\n\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "bash hello.sh", "exit_code": 0}}]
+[{"source": "agent", "action": "read", "args": {"path": "hello.sh", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "#!/bin/bash\n\necho 'hello'\n", "extras": {"path": "/workspace/hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "bash hello.sh", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "hello\r\n\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "bash hello.sh", "exit_code": 0}}]
 
 
 ## Format
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:

+ 1 - 1
tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_003.log

@@ -28,7 +28,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 They are time-ordered, with your most recent action at the bottom.
 
 
-[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}]
+[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}]
 
 
 If the last item in the history is an error, you should try to fix it.
 If the last item in the history is an error, you should try to fix it.
 
 

+ 1 - 1
tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_004.log

@@ -28,7 +28,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 They are time-ordered, with your most recent action at the bottom.
 
 
-[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
+[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
 
 
 If the last item in the history is an error, you should try to fix it.
 If the last item in the history is an error, you should try to fix it.
 
 

+ 1 - 1
tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_005.log

@@ -28,7 +28,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 They are time-ordered, with your most recent action at the bottom.
 
 
-[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
+[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
 
 
 If the last item in the history is an error, you should try to fix it.
 If the last item in the history is an error, you should try to fix it.
 
 

+ 1 - 1
tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_006.log

@@ -28,7 +28,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 They are time-ordered, with your most recent action at the bottom.
 
 
-[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
+[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
 
 
 If the last item in the history is an error, you should try to fix it.
 If the last item in the history is an error, you should try to fix it.
 
 

+ 1 - 1
tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_007.log

@@ -28,7 +28,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 They are time-ordered, with your most recent action at the bottom.
 
 
-[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
+[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\nopendevin@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
 
 
 If the last item in the history is an error, you should try to fix it.
 If the last item in the history is an error, you should try to fix it.
 
 

+ 1 - 0
tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_010.log

@@ -198,6 +198,7 @@ ten actions--more happened before that.
     "args": {
     "args": {
       "command": "bash hello.sh",
       "command": "bash hello.sh",
       "thought": "",
       "thought": "",
+      "keep_prompt": true,
       "is_confirmed": "confirmed"
       "is_confirmed": "confirmed"
     }
     }
   },
   },

+ 1 - 0
tests/integration/mock/eventstream_runtime/PlannerAgent/test_write_simple_script/prompt_011.log

@@ -197,6 +197,7 @@ ten actions--more happened before that.
     "args": {
     "args": {
       "command": "bash hello.sh",
       "command": "bash hello.sh",
       "thought": "",
       "thought": "",
+      "keep_prompt": true,
       "is_confirmed": "confirmed"
       "is_confirmed": "confirmed"
     }
     }
   },
   },

+ 1 - 1
tests/integration/mock/server_runtime/DelegatorAgent/test_edits/prompt_002.log

@@ -41,7 +41,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 They are time-ordered, with your most recent action at the bottom.
 
 
-[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "bad.txt", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}]
+[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "bad.txt", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}]
 
 
 ## Format
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:

+ 1 - 1
tests/integration/mock/server_runtime/DelegatorAgent/test_edits/prompt_003.log

@@ -41,7 +41,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 They are time-ordered, with your most recent action at the bottom.
 
 
-[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "bad.txt", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}, {"source": "agent", "action": "read", "args": {"path": "bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", "extras": {"path": "bad.txt"}}]
+[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "bad.txt", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}, {"source": "agent", "action": "read", "args": {"path": "bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", "extras": {"path": "bad.txt"}}]
 
 
 ## Format
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:

+ 1 - 1
tests/integration/mock/server_runtime/DelegatorAgent/test_write_simple_script/prompt_002.log

@@ -41,7 +41,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 They are time-ordered, with your most recent action at the bottom.
 
 
-[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}]
+[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}]
 
 
 ## Format
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:

+ 1 - 1
tests/integration/mock/server_runtime/DelegatorAgent/test_write_simple_script/prompt_005.log

@@ -41,7 +41,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 They are time-ordered, with your most recent action at the bottom.
 
 
-[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\n# Print hello\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "chmod +x hello.sh", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "", "extras": {"command_id": -1, "command": "chmod +x hello.sh", "exit_code": 0}}]
+[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\n# Print hello\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "chmod +x hello.sh", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "", "extras": {"command_id": -1, "command": "chmod +x hello.sh", "exit_code": 0}}]
 
 
 ## Format
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:

+ 1 - 1
tests/integration/mock/server_runtime/DelegatorAgent/test_write_simple_script/prompt_006.log

@@ -41,7 +41,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 They are time-ordered, with your most recent action at the bottom.
 
 
-[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\n# Print hello\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "chmod +x hello.sh", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "", "extras": {"command_id": -1, "command": "chmod +x hello.sh", "exit_code": 0}}, {"source": "agent", "action": "run", "args": {"command": "./hello.sh", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "hello", "extras": {"command_id": -1, "command": "./hello.sh", "exit_code": 0}}]
+[{"source": "agent", "action": "write", "args": {"path": "hello.sh", "content": "#!/bin/bash\n\n# Print hello\necho 'hello'", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "write", "content": "", "extras": {"path": "hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "chmod +x hello.sh", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "", "extras": {"command_id": -1, "command": "chmod +x hello.sh", "exit_code": 0}}, {"source": "agent", "action": "run", "args": {"command": "./hello.sh", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "hello", "extras": {"command_id": -1, "command": "./hello.sh", "exit_code": 0}}]
 
 
 ## Format
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:

+ 1 - 1
tests/integration/mock/server_runtime/DelegatorAgent/test_write_simple_script/prompt_009.log

@@ -39,7 +39,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 They are time-ordered, with your most recent action at the bottom.
 
 
-[{"source": "agent", "action": "read", "args": {"path": "hello.sh", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "#!/bin/bash\n\n# Print hello\necho 'hello'\n", "extras": {"path": "hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "bash hello.sh", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "hello", "extras": {"command_id": -1, "command": "bash hello.sh", "exit_code": 0}}]
+[{"source": "agent", "action": "read", "args": {"path": "hello.sh", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "#!/bin/bash\n\n# Print hello\necho 'hello'\n", "extras": {"path": "hello.sh"}}, {"source": "agent", "action": "run", "args": {"command": "bash hello.sh", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "hello", "extras": {"command_id": -1, "command": "bash hello.sh", "exit_code": 0}}]
 
 
 ## Format
 ## Format
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:
 Your response MUST be in JSON format. It must be an object, and it must contain two fields:

+ 1 - 1
tests/integration/mock/server_runtime/ManagerAgent/test_simple_task_rejection/prompt_003.log

@@ -28,7 +28,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 They are time-ordered, with your most recent action at the bottom.
 
 
-[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}]
+[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}]
 
 
 If the last item in the history is an error, you should try to fix it.
 If the last item in the history is an error, you should try to fix it.
 
 

+ 1 - 1
tests/integration/mock/server_runtime/ManagerAgent/test_simple_task_rejection/prompt_004.log

@@ -28,7 +28,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 They are time-ordered, with your most recent action at the bottom.
 
 
-[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
+[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
 
 
 If the last item in the history is an error, you should try to fix it.
 If the last item in the history is an error, you should try to fix it.
 
 

+ 1 - 1
tests/integration/mock/server_runtime/ManagerAgent/test_simple_task_rejection/prompt_005.log

@@ -28,7 +28,7 @@ as well as observations you've made. This only includes the MOST RECENT
 actions and observations--more may have happened before that.
 actions and observations--more may have happened before that.
 They are time-ordered, with your most recent action at the bottom.
 They are time-ordered, with your most recent action at the bottom.
 
 
-[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
+[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}, {"source": "agent", "observation": "error", "content": "action={'action': 'reject', 'args': {'reason': 'Not a valid git repository.'}} has the wrong arguments", "extras": {}}]
 
 
 If the last item in the history is an error, you should try to fix it.
 If the last item in the history is an error, you should try to fix it.
 
 

+ 1 - 0
tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_010.log

@@ -198,6 +198,7 @@ ten actions--more happened before that.
     "args": {
     "args": {
       "command": "bash hello.sh",
       "command": "bash hello.sh",
       "thought": "",
       "thought": "",
+      "keep_prompt": true,
       "is_confirmed": "confirmed"
       "is_confirmed": "confirmed"
     }
     }
   },
   },

+ 1 - 0
tests/integration/mock/server_runtime/PlannerAgent/test_write_simple_script/prompt_011.log

@@ -197,6 +197,7 @@ ten actions--more happened before that.
     "args": {
     "args": {
       "command": "bash hello.sh",
       "command": "bash hello.sh",
       "thought": "",
       "thought": "",
+      "keep_prompt": true,
       "is_confirmed": "confirmed"
       "is_confirmed": "confirmed"
     }
     }
   },
   },

+ 1 - 0
tests/unit/test_action_serialization.py

@@ -86,6 +86,7 @@ def test_cmd_run_action_serialization_deserialization():
         'args': {
         'args': {
             'command': 'echo "Hello world"',
             'command': 'echo "Hello world"',
             'thought': '',
             'thought': '',
+            'keep_prompt': True,
             'is_confirmed': ActionConfirmationStatus.CONFIRMED,
             'is_confirmed': ActionConfirmationStatus.CONFIRMED,
         },
         },
     }
     }

+ 114 - 0
tests/unit/test_runtime.py

@@ -844,6 +844,9 @@ async def test_ipython_simple(temp_dir, box_class):
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
     assert obs.content.strip() == '1'
     assert obs.content.strip() == '1'
 
 
+    await runtime.close()
+    await asyncio.sleep(1)
+
 
 
 async def _test_ipython_agentskills_fileop_pwd_impl(
 async def _test_ipython_agentskills_fileop_pwd_impl(
     runtime: ServerRuntime | EventStreamRuntime, enable_auto_lint: bool
     runtime: ServerRuntime | EventStreamRuntime, enable_auto_lint: bool
@@ -954,6 +957,9 @@ DO NOT re-run the same failed edit command. Running it again will lead to the sa
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
     assert obs.exit_code == 0
     assert obs.exit_code == 0
 
 
+    await runtime.close()
+    await asyncio.sleep(1)
+
 
 
 @pytest.mark.asyncio
 @pytest.mark.asyncio
 async def test_ipython_agentskills_fileop_pwd(temp_dir, box_class, enable_auto_lint):
 async def test_ipython_agentskills_fileop_pwd(temp_dir, box_class, enable_auto_lint):
@@ -967,6 +973,72 @@ async def test_ipython_agentskills_fileop_pwd(temp_dir, box_class, enable_auto_l
     await asyncio.sleep(1)
     await asyncio.sleep(1)
 
 
 
 
+@pytest.mark.asyncio
+async def test_ipython_agentskills_fileop_pwd_with_userdir(temp_dir, box_class):
+    """Make sure that cd in bash also update the current working directory in ipython.
+
+    Handle special case where the pwd is provided as "~", which should be expanded using os.path.expanduser
+    on the client side.
+    """
+
+    runtime = await _load_runtime(
+        temp_dir,
+        box_class,
+        run_as_devin=False,
+    )
+
+    action = CmdRunAction(command='cd ~')
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = await runtime.run_action(action)
+    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+    assert obs.exit_code == 0
+
+    action = CmdRunAction(command='mkdir test && ls -la')
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = await runtime.run_action(action)
+    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+    assert isinstance(obs, CmdOutputObservation)
+    assert obs.exit_code == 0
+
+    action = IPythonRunCellAction(code="create_file('hello.py')")
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = await runtime.run_action(action)
+    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+    assert isinstance(obs, IPythonRunCellObservation)
+    assert obs.content.replace('\r\n', '\n').strip().split('\n') == (
+        '[File: /root/hello.py (1 lines total)]\n'
+        '(this is the beginning of the file)\n'
+        '1|\n'
+        '(this is the end of the file)\n'
+        '[File hello.py created.]\n'
+    ).strip().split('\n')
+
+    action = CmdRunAction(command='cd test')
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = await runtime.run_action(action)
+    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+    assert isinstance(obs, CmdOutputObservation)
+    assert obs.exit_code == 0
+
+    # This should create a file in the current working directory
+    # i.e., /workspace/test/hello.py instead of /workspace/hello.py
+    action = IPythonRunCellAction(code="create_file('hello.py')")
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = await runtime.run_action(action)
+    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+    assert isinstance(obs, IPythonRunCellObservation)
+    assert obs.content.replace('\r\n', '\n').strip().split('\n') == (
+        '[File: /root/test/hello.py (1 lines total)]\n'
+        '(this is the beginning of the file)\n'
+        '1|\n'
+        '(this is the end of the file)\n'
+        '[File hello.py created.]\n'
+    ).strip().split('\n')
+
+    await runtime.close()
+    await asyncio.sleep(1)
+
+
 @pytest.mark.skipif(
 @pytest.mark.skipif(
     TEST_RUNTIME.lower() == 'eventstream',
     TEST_RUNTIME.lower() == 'eventstream',
     reason='Skip this if we want to test EventStreamRuntime',
     reason='Skip this if we want to test EventStreamRuntime',
@@ -1082,6 +1154,9 @@ async def test_copy_single_file(temp_dir, box_class):
     assert obs.exit_code == 0
     assert obs.exit_code == 0
     assert 'Hello, World!' in obs.content
     assert 'Hello, World!' in obs.content
 
 
+    await runtime.close()
+    await asyncio.sleep(1)
+
 
 
 def _create_test_dir_with_files(host_temp_dir):
 def _create_test_dir_with_files(host_temp_dir):
     os.mkdir(os.path.join(host_temp_dir, 'test_dir'))
     os.mkdir(os.path.join(host_temp_dir, 'test_dir'))
@@ -1129,6 +1204,9 @@ async def test_copy_directory_recursively(temp_dir, box_class):
     assert obs.exit_code == 0
     assert obs.exit_code == 0
     assert 'File 1 content' in obs.content
     assert 'File 1 content' in obs.content
 
 
+    await runtime.close()
+    await asyncio.sleep(1)
+
 
 
 @pytest.mark.asyncio
 @pytest.mark.asyncio
 async def test_copy_to_non_existent_directory(temp_dir, box_class):
 async def test_copy_to_non_existent_directory(temp_dir, box_class):
@@ -1148,6 +1226,9 @@ async def test_copy_to_non_existent_directory(temp_dir, box_class):
     assert obs.exit_code == 0
     assert obs.exit_code == 0
     assert 'Hello, World!' in obs.content
     assert 'Hello, World!' in obs.content
 
 
+    await runtime.close()
+    await asyncio.sleep(1)
+
 
 
 @pytest.mark.asyncio
 @pytest.mark.asyncio
 async def test_overwrite_existing_file(temp_dir, box_class):
 async def test_overwrite_existing_file(temp_dir, box_class):
@@ -1183,6 +1264,9 @@ async def test_overwrite_existing_file(temp_dir, box_class):
     assert obs.exit_code == 0
     assert obs.exit_code == 0
     assert 'Hello, World!' in obs.content
     assert 'Hello, World!' in obs.content
 
 
+    await runtime.close()
+    await asyncio.sleep(1)
+
 
 
 @pytest.mark.asyncio
 @pytest.mark.asyncio
 async def test_copy_non_existent_file(temp_dir, box_class):
 async def test_copy_non_existent_file(temp_dir, box_class):
@@ -1200,3 +1284,33 @@ async def test_copy_non_existent_file(temp_dir, box_class):
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
     logger.info(obs, extra={'msg_type': 'OBSERVATION'})
     assert isinstance(obs, CmdOutputObservation)
     assert isinstance(obs, CmdOutputObservation)
     assert obs.exit_code != 0  # File should not exist
     assert obs.exit_code != 0  # File should not exist
+
+    await runtime.close()
+    await asyncio.sleep(1)
+
+
+@pytest.mark.asyncio
+async def test_keep_prompt(temp_dir):
+    # only EventStreamRuntime supports keep_prompt
+    runtime = await _load_runtime(
+        temp_dir, box_class=EventStreamRuntime, run_as_devin=False
+    )
+
+    action = CmdRunAction(command='touch /workspace/test_file.txt')
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = await runtime.run_action(action)
+    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+    assert isinstance(obs, CmdOutputObservation)
+    assert obs.exit_code == 0
+    assert 'root@' in obs.content
+
+    action = CmdRunAction(command='cat /workspace/test_file.txt', keep_prompt=False)
+    logger.info(action, extra={'msg_type': 'ACTION'})
+    obs = await runtime.run_action(action)
+    logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+    assert isinstance(obs, CmdOutputObservation)
+    assert obs.exit_code == 0
+    assert 'root@' not in obs.content
+
+    await runtime.close()
+    await asyncio.sleep(1)