Преглед изворни кода

Revert "fix(backend) changes to improve Command-R+ behavior, plus file i/o er…" (#1405)

This reverts commit 44aea95ddee766dd25d20aa6eeab8759d383966f.
Robert Brennan пре 1 година
родитељ
комит
9c9aee29f0

+ 1 - 1
agenthub/monologue_agent/agent.py

@@ -77,7 +77,7 @@ INITIAL_THOUGHTS = [
     "I'll need a strategy. And as I make progress, I'll need to keep refining that strategy. I'll need to set goals, and break them into sub-goals.",
     "I'll need a strategy. And as I make progress, I'll need to keep refining that strategy. I'll need to set goals, and break them into sub-goals.",
     'In between actions, I must always take some time to think, strategize, and set new goals. I should never take two actions in a row.',
     'In between actions, I must always take some time to think, strategize, and set new goals. I should never take two actions in a row.',
     "OK so my task is to $TASK. I haven't made any progress yet. Where should I start?",
     "OK so my task is to $TASK. I haven't made any progress yet. Where should I start?",
-    'It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself.',
+    "It seems like there might be an existing project here. I should probably start by running `ls` to see what's here.",
 ]
 ]
 
 
 
 

+ 5 - 17
agenthub/monologue_agent/utils/prompts.py

@@ -28,8 +28,8 @@ This is your internal monologue, in JSON format:
 
 
 
 
 Your most recent thought is at the bottom of that monologue. Continue your train of thought.
 Your most recent thought is at the bottom of that monologue. Continue your train of thought.
-What is your next single thought or action? Your response must be in JSON format.
-It must be a single object, and it must contain two fields:
+What is your next thought or action? Your response must be in JSON format.
+It must be an object, and it must contain two fields:
 * `action`, which is one of the actions below
 * `action`, which is one of the actions below
 * `args`, which is a map of key-value pairs, specifying the arguments for that action
 * `args`, which is a map of key-value pairs, specifying the arguments for that action
 
 
@@ -59,15 +59,11 @@ You should never act twice in a row without thinking. But if your last several
 actions are all "think" actions, you should consider taking a different action.
 actions are all "think" actions, you should consider taking a different action.
 
 
 Notes:
 Notes:
-* you are logged in as %(user)s, but sudo will always work without a password.
-* all non-background commands will be forcibly stopped if they remain running for over %(timeout)s seconds.
-* your environment is Debian Linux. You can install software with `sudo apt-get`, but remember to use -y.
+* your environment is Debian Linux. You can install software with `apt`
+* your working directory will not change, even if you run `cd`. All commands will be run in the `%(WORKSPACE_MOUNT_PATH_IN_SANDBOX)s` directory.
 * don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`)
 * don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`)
-* don't run interactive text editors (e.g. `nano` or 'vim'), instead use the 'write' or 'read' action.
-* don't run gui applications (e.g. software IDEs (like vs code or codium), web browsers (like firefox or chromium), or other complex software packages). Use non-interactive cli applications, or special actions instead.
-* whenever an action fails, always `think` about why it may have happened before acting again.
 
 
-What is your next single thought or action? Again, you must reply with JSON, and only with JSON. You must respond with exactly one 'action' object.
+What is your next thought or action? Again, you must reply with JSON, and only with JSON.
 
 
 %(hint)s
 %(hint)s
 """
 """
@@ -146,15 +142,11 @@ def get_request_action_prompt(
             )
             )
         bg_commands_message += '\nYou can end any process by sending a `kill` action with the numerical `id` above.'
         bg_commands_message += '\nYou can end any process by sending a `kill` action with the numerical `id` above.'
 
 
-    user = 'opendevin' if config.get(ConfigType.RUN_AS_DEVIN) else 'root'
-
     return ACTION_PROMPT % {
     return ACTION_PROMPT % {
         'task': task,
         'task': task,
         'monologue': json.dumps(thoughts, indent=2),
         'monologue': json.dumps(thoughts, indent=2),
         'background_commands': bg_commands_message,
         'background_commands': bg_commands_message,
         'hint': hint,
         'hint': hint,
-        'user': user,
-        'timeout': config.get(ConfigType.SANDBOX_TIMEOUT),
         'WORKSPACE_MOUNT_PATH_IN_SANDBOX': config.get(ConfigType.WORKSPACE_MOUNT_PATH_IN_SANDBOX),
         'WORKSPACE_MOUNT_PATH_IN_SANDBOX': config.get(ConfigType.WORKSPACE_MOUNT_PATH_IN_SANDBOX),
     }
     }
 
 
@@ -189,10 +181,6 @@ def parse_action_response(response: str) -> Action:
         raise LLMOutputError(
         raise LLMOutputError(
             'Invalid JSON, the response must be well-formed JSON as specified in the prompt.'
             'Invalid JSON, the response must be well-formed JSON as specified in the prompt.'
         )
         )
-    except TypeError:
-        raise LLMOutputError(
-            'Invalid JSON, the response must be well-formed JSON as specified in the prompt.'
-        )
     if 'content' in action_dict:
     if 'content' in action_dict:
         # The LLM gets confused here. Might as well be robust
         # The LLM gets confused here. Might as well be robust
         action_dict['contents'] = action_dict.pop('content')
         action_dict['contents'] = action_dict.pop('content')

+ 5 - 11
opendevin/action/fileop.py

@@ -20,16 +20,10 @@ from .base import ExecutableAction
 SANDBOX_PATH_PREFIX = '/workspace/'
 SANDBOX_PATH_PREFIX = '/workspace/'
 
 
 
 
-def resolve_path(file_path, working_directory):
-    path_in_sandbox = Path(file_path)
-
-    # Apply working directory
-    if not path_in_sandbox.is_absolute():
-        path_in_sandbox = Path(working_directory) / path_in_sandbox
-
+def resolve_path(file_path):
     # Sanitize the path with respect to the root of the full sandbox
     # Sanitize the path with respect to the root of the full sandbox
-    # (deny any .. path traversal to parent directories of the sandbox)
-    abs_path_in_sandbox = path_in_sandbox.resolve()
+    # (deny any .. path traversal to parent directories of this)
+    abs_path_in_sandbox = (Path(SANDBOX_PATH_PREFIX) / Path(file_path)).resolve()
 
 
     # If the path is outside the workspace, deny it
     # If the path is outside the workspace, deny it
     if not abs_path_in_sandbox.is_relative_to(SANDBOX_PATH_PREFIX):
     if not abs_path_in_sandbox.is_relative_to(SANDBOX_PATH_PREFIX):
@@ -77,7 +71,7 @@ class FileReadAction(ExecutableAction):
             code_view = ''.join(read_lines)
             code_view = ''.join(read_lines)
         else:
         else:
             try:
             try:
-                whole_path = resolve_path(self.path, controller.action_manager.sandbox.get_working_directory())
+                whole_path = resolve_path(self.path)
                 self.start = max(self.start, 0)
                 self.start = max(self.start, 0)
                 try:
                 try:
                     with open(whole_path, 'r', encoding='utf-8') as file:
                     with open(whole_path, 'r', encoding='utf-8') as file:
@@ -129,7 +123,7 @@ class FileWriteAction(ExecutableAction):
                 return AgentErrorObservation(f'File not found: {self.path}')
                 return AgentErrorObservation(f'File not found: {self.path}')
         else:
         else:
             try:
             try:
-                whole_path = resolve_path(self.path, controller.action_manager.sandbox.get_working_directory())
+                whole_path = resolve_path(self.path)
                 mode = 'w' if not os.path.exists(whole_path) else 'r+'
                 mode = 'w' if not os.path.exists(whole_path) else 'r+'
                 try:
                 try:
                     with open(whole_path, mode, encoding='utf-8') as file:
                     with open(whole_path, mode, encoding='utf-8') as file:

+ 0 - 3
opendevin/config.py

@@ -36,8 +36,6 @@ DEFAULT_CONFIG: dict = {
     ConfigType.LLM_RETRY_MIN_WAIT: 3,
     ConfigType.LLM_RETRY_MIN_WAIT: 3,
     ConfigType.LLM_RETRY_MAX_WAIT: 60,
     ConfigType.LLM_RETRY_MAX_WAIT: 60,
     ConfigType.MAX_ITERATIONS: 100,
     ConfigType.MAX_ITERATIONS: 100,
-    ConfigType.LLM_TIMEOUT: None,
-    ConfigType.LLM_MAX_RETURN_TOKENS: None,
     ConfigType.AGENT_MEMORY_MAX_THREADS: 2,
     ConfigType.AGENT_MEMORY_MAX_THREADS: 2,
     ConfigType.AGENT_MEMORY_ENABLED: False,
     ConfigType.AGENT_MEMORY_ENABLED: False,
     # GPT-4 pricing is $10 per 1M input tokens. Since tokenization happens on LLM side,
     # GPT-4 pricing is $10 per 1M input tokens. Since tokenization happens on LLM side,
@@ -50,7 +48,6 @@ DEFAULT_CONFIG: dict = {
     ConfigType.USE_HOST_NETWORK: 'false',
     ConfigType.USE_HOST_NETWORK: 'false',
     ConfigType.SSH_HOSTNAME: 'localhost',
     ConfigType.SSH_HOSTNAME: 'localhost',
     ConfigType.DISABLE_COLOR: 'false',
     ConfigType.DISABLE_COLOR: 'false',
-    ConfigType.SANDBOX_TIMEOUT: 120
 }
 }
 
 
 config_str = ''
 config_str = ''

+ 3 - 9
opendevin/controller/action_manager.py

@@ -26,21 +26,15 @@ class ActionManager:
         if sandbox_type == 'exec':
         if sandbox_type == 'exec':
             self.sandbox = DockerExecBox(
             self.sandbox = DockerExecBox(
                 sid=(sid or 'default'),
                 sid=(sid or 'default'),
-                timeout=config.get(ConfigType.SANDBOX_TIMEOUT)
             )
             )
         elif sandbox_type == 'local':
         elif sandbox_type == 'local':
-            self.sandbox = LocalBox(
-                timeout=config.get(ConfigType.SANDBOX_TIMEOUT)
-            )
+            self.sandbox = LocalBox()
         elif sandbox_type == 'ssh':
         elif sandbox_type == 'ssh':
             self.sandbox = DockerSSHBox(
             self.sandbox = DockerSSHBox(
-                sid=(sid or 'default'),
-                timeout=config.get(ConfigType.SANDBOX_TIMEOUT)
+                sid=(sid or 'default')
             )
             )
         elif sandbox_type == 'e2b':
         elif sandbox_type == 'e2b':
-            self.sandbox = E2BBox(
-                timeout=config.get(ConfigType.SANDBOX_TIMEOUT)
-            )
+            self.sandbox = E2BBox()
         else:
         else:
             raise ValueError(f'Invalid sandbox type: {sandbox_type}')
             raise ValueError(f'Invalid sandbox type: {sandbox_type}')
 
 

+ 3 - 10
opendevin/llm/llm.py

@@ -4,9 +4,10 @@ from litellm.exceptions import APIConnectionError, RateLimitError, ServiceUnavai
 from functools import partial
 from functools import partial
 
 
 from opendevin import config
 from opendevin import config
+from opendevin.schema.config import ConfigType
 from opendevin.logger import llm_prompt_logger, llm_response_logger
 from opendevin.logger import llm_prompt_logger, llm_response_logger
 from opendevin.logger import opendevin_logger as logger
 from opendevin.logger import opendevin_logger as logger
-from opendevin.schema import ConfigType
+
 
 
 DEFAULT_API_KEY = config.get(ConfigType.LLM_API_KEY)
 DEFAULT_API_KEY = config.get(ConfigType.LLM_API_KEY)
 DEFAULT_BASE_URL = config.get(ConfigType.LLM_BASE_URL)
 DEFAULT_BASE_URL = config.get(ConfigType.LLM_BASE_URL)
@@ -15,8 +16,6 @@ DEFAULT_API_VERSION = config.get(ConfigType.LLM_API_VERSION)
 LLM_NUM_RETRIES = config.get(ConfigType.LLM_NUM_RETRIES)
 LLM_NUM_RETRIES = config.get(ConfigType.LLM_NUM_RETRIES)
 LLM_RETRY_MIN_WAIT = config.get(ConfigType.LLM_RETRY_MIN_WAIT)
 LLM_RETRY_MIN_WAIT = config.get(ConfigType.LLM_RETRY_MIN_WAIT)
 LLM_RETRY_MAX_WAIT = config.get(ConfigType.LLM_RETRY_MAX_WAIT)
 LLM_RETRY_MAX_WAIT = config.get(ConfigType.LLM_RETRY_MAX_WAIT)
-LLM_TIMEOUT = config.get(ConfigType.LLM_TIMEOUT)
-LLM_MAX_RETURN_TOKENS = config.get(ConfigType.LLM_MAX_RETURN_TOKENS)
 
 
 
 
 class LLM:
 class LLM:
@@ -32,8 +31,6 @@ class LLM:
                  num_retries=LLM_NUM_RETRIES,
                  num_retries=LLM_NUM_RETRIES,
                  retry_min_wait=LLM_RETRY_MIN_WAIT,
                  retry_min_wait=LLM_RETRY_MIN_WAIT,
                  retry_max_wait=LLM_RETRY_MAX_WAIT,
                  retry_max_wait=LLM_RETRY_MAX_WAIT,
-                 llm_timeout=LLM_TIMEOUT,
-                 llm_max_return_tokens=LLM_MAX_RETURN_TOKENS
                  ):
                  ):
         """
         """
         Args:
         Args:
@@ -44,8 +41,6 @@ class LLM:
             num_retries (int, optional): The number of retries for API calls. Defaults to LLM_NUM_RETRIES.
             num_retries (int, optional): The number of retries for API calls. Defaults to LLM_NUM_RETRIES.
             retry_min_wait (int, optional): The minimum time to wait between retries in seconds. Defaults to LLM_RETRY_MIN_TIME.
             retry_min_wait (int, optional): The minimum time to wait between retries in seconds. Defaults to LLM_RETRY_MIN_TIME.
             retry_max_wait (int, optional): The maximum time to wait between retries in seconds. Defaults to LLM_RETRY_MAX_TIME.
             retry_max_wait (int, optional): The maximum time to wait between retries in seconds. Defaults to LLM_RETRY_MAX_TIME.
-            llm_timeout (int, optional): The maximum time to wait for a response in seconds. Defaults to LLM_TIMEOUT.
-            llm_max_return_tokens (int, optional): The maximum number of tokens to return. Defaults to LLM_MAX_RETURN_TOKENS.
 
 
         Attributes:
         Attributes:
             model_name (str): The name of the language model.
             model_name (str): The name of the language model.
@@ -59,11 +54,9 @@ class LLM:
         self.api_key = api_key
         self.api_key = api_key
         self.base_url = base_url
         self.base_url = base_url
         self.api_version = api_version
         self.api_version = api_version
-        self.llm_timeout = llm_timeout
-        self.llm_max_return_tokens = llm_max_return_tokens
 
 
         self._completion = partial(
         self._completion = partial(
-            litellm_completion, model=self.model_name, api_key=self.api_key, base_url=self.base_url, api_version=self.api_version, max_tokens=self.llm_max_return_tokens, timeout=self.llm_timeout)
+            litellm_completion, model=self.model_name, api_key=self.api_key, base_url=self.base_url, api_version=self.api_version)
 
 
         completion_unwrapped = self._completion
         completion_unwrapped = self._completion
 
 

+ 0 - 3
opendevin/sandbox/docker/exec_box.py

@@ -268,9 +268,6 @@ class DockerExecBox(Sandbox):
             except docker.errors.NotFound:
             except docker.errors.NotFound:
                 pass
                 pass
 
 
-    def get_working_directory(self):
-        return SANDBOX_WORKSPACE_DIR
-
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
     try:
     try:

+ 0 - 3
opendevin/sandbox/docker/local_box.py

@@ -96,6 +96,3 @@ class LocalBox(Sandbox):
 
 
     def cleanup(self):
     def cleanup(self):
         self.close()
         self.close()
-
-    def get_working_directory(self):
-        return config.get(ConfigType.WORKSPACE_BASE)

+ 39 - 9
opendevin/sandbox/docker/ssh_box.py

@@ -169,7 +169,7 @@ class DockerSSHBox(Sandbox):
 
 
     def start_ssh_session(self):
     def start_ssh_session(self):
         # start ssh session at the background
         # start ssh session at the background
-        self.ssh = pxssh.pxssh(echo=False)
+        self.ssh = pxssh.pxssh()
         hostname = SSH_HOSTNAME
         hostname = SSH_HOSTNAME
         if RUN_AS_DEVIN:
         if RUN_AS_DEVIN:
             username = 'opendevin'
             username = 'opendevin'
@@ -211,14 +211,49 @@ class DockerSSHBox(Sandbox):
             # send a SIGINT to the process
             # send a SIGINT to the process
             self.ssh.sendintr()
             self.ssh.sendintr()
             self.ssh.prompt()
             self.ssh.prompt()
-            command_output = self.ssh.before.decode('utf-8').strip()
+            command_output = self.ssh.before.decode(
+                'utf-8').lstrip(cmd).strip()
             return -1, f'Command: "{cmd}" timed out. Sending SIGINT to the process: {command_output}'
             return -1, f'Command: "{cmd}" timed out. Sending SIGINT to the process: {command_output}'
         command_output = self.ssh.before.decode('utf-8').strip()
         command_output = self.ssh.before.decode('utf-8').strip()
 
 
+        # NOTE: there's some weird behavior with the prompt (it may come AFTER the command output)
+        # so we need to check if the command is in the output
+        n_tries = 5
+        while not command_output.startswith(cmd) and n_tries > 0:
+            self.ssh.prompt()
+            command_output = self.ssh.before.decode('utf-8').strip()
+            time.sleep(0.5)
+            n_tries -= 1
+        if n_tries == 0 and not command_output.startswith(cmd):
+            raise Exception(
+                f'Something went wrong with the SSH sanbox, cannot get output for command [{cmd}] after 5 retries'
+            )
+        logger.debug(f'Command output GOT SO FAR: {command_output}')
+        # once out, make sure that we have *every* output, we while loop until we get an empty output
+        while True:
+            logger.debug('WAITING FOR .prompt()')
+            self.ssh.sendline('\n')
+            timeout_not_reached = self.ssh.prompt(timeout=1)
+            if not timeout_not_reached:
+                logger.debug('TIMEOUT REACHED')
+                break
+            logger.debug('WAITING FOR .before')
+            output = self.ssh.before.decode('utf-8').strip()
+            logger.debug(f'WAITING FOR END OF command output ({bool(output)}): {output}')
+            if output == '':
+                break
+            command_output += output
+        command_output = command_output.lstrip(cmd).strip()
+
         # get the exit code
         # get the exit code
         self.ssh.sendline('echo $?')
         self.ssh.sendline('echo $?')
-        self.ssh.prompt(timeout=10)
-        exit_code = int(self.ssh.before.decode('utf-8').strip())
+        self.ssh.prompt()
+        exit_code = self.ssh.before.decode('utf-8')
+        while not exit_code.startswith('echo $?'):
+            self.ssh.prompt()
+            exit_code = self.ssh.before.decode('utf-8')
+            logger.debug(f'WAITING FOR exit code: {exit_code}')
+        exit_code = int(exit_code.lstrip('echo $?').strip())
         return exit_code, command_output
         return exit_code, command_output
 
 
     def copy_to(self, host_src: str, sandbox_dest: str, recursive: bool = False):
     def copy_to(self, host_src: str, sandbox_dest: str, recursive: bool = False):
@@ -302,11 +337,6 @@ class DockerSSHBox(Sandbox):
         except docker.errors.NotFound:
         except docker.errors.NotFound:
             pass
             pass
 
 
-    def get_working_directory(self):
-        self.ssh.sendline('pwd')
-        self.ssh.prompt(timeout=10)
-        return self.ssh.before.decode('utf-8').strip()
-
     def is_container_running(self):
     def is_container_running(self):
         try:
         try:
             container = self.docker_client.containers.get(self.container_name)
             container = self.docker_client.containers.get(self.container_name)

+ 0 - 3
opendevin/sandbox/e2b/sandbox.py

@@ -124,6 +124,3 @@ class E2BBox(Sandbox):
 
 
     def close(self):
     def close(self):
         self.sandbox.close()
         self.sandbox.close()
-
-    def get_working_directory(self):
-        return self.sandbox.cwd

+ 0 - 4
opendevin/sandbox/sandbox.py

@@ -32,7 +32,3 @@ class Sandbox(ABC, PluginMixin):
     @abstractmethod
     @abstractmethod
     def copy_to(self, host_src: str, sandbox_dest: str, recursive: bool = False):
     def copy_to(self, host_src: str, sandbox_dest: str, recursive: bool = False):
         pass
         pass
-
-    @abstractmethod
-    def get_working_directory(self):
-        pass

+ 0 - 3
opendevin/schema/config.py

@@ -2,8 +2,6 @@ from enum import Enum
 
 
 
 
 class ConfigType(str, Enum):
 class ConfigType(str, Enum):
-    LLM_MAX_RETURN_TOKENS = 'LLM_MAX_RETURN_TOKENS'
-    LLM_TIMEOUT = 'LLM_TIMEOUT'
     LLM_API_KEY = 'LLM_API_KEY'
     LLM_API_KEY = 'LLM_API_KEY'
     LLM_BASE_URL = 'LLM_BASE_URL'
     LLM_BASE_URL = 'LLM_BASE_URL'
     WORKSPACE_BASE = 'WORKSPACE_BASE'
     WORKSPACE_BASE = 'WORKSPACE_BASE'
@@ -28,7 +26,6 @@ class ConfigType(str, Enum):
     E2B_API_KEY = 'E2B_API_KEY'
     E2B_API_KEY = 'E2B_API_KEY'
     SANDBOX_TYPE = 'SANDBOX_TYPE'
     SANDBOX_TYPE = 'SANDBOX_TYPE'
     SANDBOX_USER_ID = 'SANDBOX_USER_ID'
     SANDBOX_USER_ID = 'SANDBOX_USER_ID'
-    SANDBOX_TIMEOUT = 'SANDBOX_TIMEOUT'
     USE_HOST_NETWORK = 'USE_HOST_NETWORK'
     USE_HOST_NETWORK = 'USE_HOST_NETWORK'
     SSH_HOSTNAME = 'SSH_HOSTNAME'
     SSH_HOSTNAME = 'SSH_HOSTNAME'
     DISABLE_COLOR = 'DISABLE_COLOR'
     DISABLE_COLOR = 'DISABLE_COLOR'

+ 6 - 10
tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_001.log

@@ -281,15 +281,15 @@ This is your internal monologue, in JSON format:
   {
   {
     "action": "think",
     "action": "think",
     "args": {
     "args": {
-      "thought": "It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself."
+      "thought": "It seems like there might be an existing project here. I should probably start by running `ls` to see what's here."
     }
     }
   }
   }
 ]
 ]
 
 
 
 
 Your most recent thought is at the bottom of that monologue. Continue your train of thought.
 Your most recent thought is at the bottom of that monologue. Continue your train of thought.
-What is your next single thought or action? Your response must be in JSON format.
-It must be a single object, and it must contain two fields:
+What is your next thought or action? Your response must be in JSON format.
+It must be an object, and it must contain two fields:
 * `action`, which is one of the actions below
 * `action`, which is one of the actions below
 * `args`, which is a map of key-value pairs, specifying the arguments for that action
 * `args`, which is a map of key-value pairs, specifying the arguments for that action
 
 
@@ -319,14 +319,10 @@ You should never act twice in a row without thinking. But if your last several
 actions are all "think" actions, you should consider taking a different action.
 actions are all "think" actions, you should consider taking a different action.
 
 
 Notes:
 Notes:
-* you are logged in as opendevin, but sudo will always work without a password.
-* all non-background commands will be forcibly stopped if they remain running for over 120 seconds.
-* your environment is Debian Linux. You can install software with `sudo apt-get`, but remember to use -y.
+* your environment is Debian Linux. You can install software with `apt`
+* your working directory will not change, even if you run `cd`. All commands will be run in the `/workspace` directory.
 * don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`)
 * don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`)
-* don't run interactive text editors (e.g. `nano` or 'vim'), instead use the 'write' or 'read' action.
-* don't run gui applications (e.g. software IDEs (like vs code or codium), web browsers (like firefox or chromium), or other complex software packages). Use non-interactive cli applications, or special actions instead.
-* whenever an action fails, always `think` about why it may have happened before acting again.
 
 
-What is your next single thought or action? Again, you must reply with JSON, and only with JSON. You must respond with exactly one 'action' object.
+What is your next thought or action? Again, you must reply with JSON, and only with JSON.
 
 
 You've been thinking a lot lately. Maybe it's time to take action?
 You've been thinking a lot lately. Maybe it's time to take action?

+ 6 - 10
tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_002.log

@@ -281,7 +281,7 @@ This is your internal monologue, in JSON format:
   {
   {
     "action": "think",
     "action": "think",
     "args": {
     "args": {
-      "thought": "It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself."
+      "thought": "It seems like there might be an existing project here. I should probably start by running `ls` to see what's here."
     }
     }
   },
   },
   {
   {
@@ -304,8 +304,8 @@ This is your internal monologue, in JSON format:
 
 
 
 
 Your most recent thought is at the bottom of that monologue. Continue your train of thought.
 Your most recent thought is at the bottom of that monologue. Continue your train of thought.
-What is your next single thought or action? Your response must be in JSON format.
-It must be a single object, and it must contain two fields:
+What is your next thought or action? Your response must be in JSON format.
+It must be an object, and it must contain two fields:
 * `action`, which is one of the actions below
 * `action`, which is one of the actions below
 * `args`, which is a map of key-value pairs, specifying the arguments for that action
 * `args`, which is a map of key-value pairs, specifying the arguments for that action
 
 
@@ -335,12 +335,8 @@ You should never act twice in a row without thinking. But if your last several
 actions are all "think" actions, you should consider taking a different action.
 actions are all "think" actions, you should consider taking a different action.
 
 
 Notes:
 Notes:
-* you are logged in as opendevin, but sudo will always work without a password.
-* all non-background commands will be forcibly stopped if they remain running for over 120 seconds.
-* your environment is Debian Linux. You can install software with `sudo apt-get`, but remember to use -y.
+* your environment is Debian Linux. You can install software with `apt`
+* your working directory will not change, even if you run `cd`. All commands will be run in the `/workspace` directory.
 * don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`)
 * don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`)
-* don't run interactive text editors (e.g. `nano` or 'vim'), instead use the 'write' or 'read' action.
-* don't run gui applications (e.g. software IDEs (like vs code or codium), web browsers (like firefox or chromium), or other complex software packages). Use non-interactive cli applications, or special actions instead.
-* whenever an action fails, always `think` about why it may have happened before acting again.
 
 
-What is your next single thought or action? Again, you must reply with JSON, and only with JSON. You must respond with exactly one 'action' object.
+What is your next thought or action? Again, you must reply with JSON, and only with JSON.

+ 6 - 10
tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_003.log

@@ -281,7 +281,7 @@ This is your internal monologue, in JSON format:
   {
   {
     "action": "think",
     "action": "think",
     "args": {
     "args": {
-      "thought": "It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself."
+      "thought": "It seems like there might be an existing project here. I should probably start by running `ls` to see what's here."
     }
     }
   },
   },
   {
   {
@@ -321,8 +321,8 @@ This is your internal monologue, in JSON format:
 
 
 
 
 Your most recent thought is at the bottom of that monologue. Continue your train of thought.
 Your most recent thought is at the bottom of that monologue. Continue your train of thought.
-What is your next single thought or action? Your response must be in JSON format.
-It must be a single object, and it must contain two fields:
+What is your next thought or action? Your response must be in JSON format.
+It must be an object, and it must contain two fields:
 * `action`, which is one of the actions below
 * `action`, which is one of the actions below
 * `args`, which is a map of key-value pairs, specifying the arguments for that action
 * `args`, which is a map of key-value pairs, specifying the arguments for that action
 
 
@@ -352,12 +352,8 @@ You should never act twice in a row without thinking. But if your last several
 actions are all "think" actions, you should consider taking a different action.
 actions are all "think" actions, you should consider taking a different action.
 
 
 Notes:
 Notes:
-* you are logged in as opendevin, but sudo will always work without a password.
-* all non-background commands will be forcibly stopped if they remain running for over 120 seconds.
-* your environment is Debian Linux. You can install software with `sudo apt-get`, but remember to use -y.
+* your environment is Debian Linux. You can install software with `apt`
+* your working directory will not change, even if you run `cd`. All commands will be run in the `/workspace` directory.
 * don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`)
 * don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`)
-* don't run interactive text editors (e.g. `nano` or 'vim'), instead use the 'write' or 'read' action.
-* don't run gui applications (e.g. software IDEs (like vs code or codium), web browsers (like firefox or chromium), or other complex software packages). Use non-interactive cli applications, or special actions instead.
-* whenever an action fails, always `think` about why it may have happened before acting again.
 
 
-What is your next single thought or action? Again, you must reply with JSON, and only with JSON. You must respond with exactly one 'action' object.
+What is your next thought or action? Again, you must reply with JSON, and only with JSON.

+ 6 - 10
tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_004.log

@@ -281,7 +281,7 @@ This is your internal monologue, in JSON format:
   {
   {
     "action": "think",
     "action": "think",
     "args": {
     "args": {
-      "thought": "It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself."
+      "thought": "It seems like there might be an existing project here. I should probably start by running `ls` to see what's here."
     }
     }
   },
   },
   {
   {
@@ -332,8 +332,8 @@ This is your internal monologue, in JSON format:
 
 
 
 
 Your most recent thought is at the bottom of that monologue. Continue your train of thought.
 Your most recent thought is at the bottom of that monologue. Continue your train of thought.
-What is your next single thought or action? Your response must be in JSON format.
-It must be a single object, and it must contain two fields:
+What is your next thought or action? Your response must be in JSON format.
+It must be an object, and it must contain two fields:
 * `action`, which is one of the actions below
 * `action`, which is one of the actions below
 * `args`, which is a map of key-value pairs, specifying the arguments for that action
 * `args`, which is a map of key-value pairs, specifying the arguments for that action
 
 
@@ -363,12 +363,8 @@ You should never act twice in a row without thinking. But if your last several
 actions are all "think" actions, you should consider taking a different action.
 actions are all "think" actions, you should consider taking a different action.
 
 
 Notes:
 Notes:
-* you are logged in as opendevin, but sudo will always work without a password.
-* all non-background commands will be forcibly stopped if they remain running for over 120 seconds.
-* your environment is Debian Linux. You can install software with `sudo apt-get`, but remember to use -y.
+* your environment is Debian Linux. You can install software with `apt`
+* your working directory will not change, even if you run `cd`. All commands will be run in the `/workspace` directory.
 * don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`)
 * don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`)
-* don't run interactive text editors (e.g. `nano` or 'vim'), instead use the 'write' or 'read' action.
-* don't run gui applications (e.g. software IDEs (like vs code or codium), web browsers (like firefox or chromium), or other complex software packages). Use non-interactive cli applications, or special actions instead.
-* whenever an action fails, always `think` about why it may have happened before acting again.
 
 
-What is your next single thought or action? Again, you must reply with JSON, and only with JSON. You must respond with exactly one 'action' object.
+What is your next thought or action? Again, you must reply with JSON, and only with JSON.

+ 6 - 10
tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_005.log

@@ -281,7 +281,7 @@ This is your internal monologue, in JSON format:
   {
   {
     "action": "think",
     "action": "think",
     "args": {
     "args": {
-      "thought": "It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself."
+      "thought": "It seems like there might be an existing project here. I should probably start by running `ls` to see what's here."
     }
     }
   },
   },
   {
   {
@@ -348,8 +348,8 @@ This is your internal monologue, in JSON format:
 
 
 
 
 Your most recent thought is at the bottom of that monologue. Continue your train of thought.
 Your most recent thought is at the bottom of that monologue. Continue your train of thought.
-What is your next single thought or action? Your response must be in JSON format.
-It must be a single object, and it must contain two fields:
+What is your next thought or action? Your response must be in JSON format.
+It must be an object, and it must contain two fields:
 * `action`, which is one of the actions below
 * `action`, which is one of the actions below
 * `args`, which is a map of key-value pairs, specifying the arguments for that action
 * `args`, which is a map of key-value pairs, specifying the arguments for that action
 
 
@@ -379,12 +379,8 @@ You should never act twice in a row without thinking. But if your last several
 actions are all "think" actions, you should consider taking a different action.
 actions are all "think" actions, you should consider taking a different action.
 
 
 Notes:
 Notes:
-* you are logged in as opendevin, but sudo will always work without a password.
-* all non-background commands will be forcibly stopped if they remain running for over 120 seconds.
-* your environment is Debian Linux. You can install software with `sudo apt-get`, but remember to use -y.
+* your environment is Debian Linux. You can install software with `apt`
+* your working directory will not change, even if you run `cd`. All commands will be run in the `/workspace` directory.
 * don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`)
 * don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`)
-* don't run interactive text editors (e.g. `nano` or 'vim'), instead use the 'write' or 'read' action.
-* don't run gui applications (e.g. software IDEs (like vs code or codium), web browsers (like firefox or chromium), or other complex software packages). Use non-interactive cli applications, or special actions instead.
-* whenever an action fails, always `think` about why it may have happened before acting again.
 
 
-What is your next single thought or action? Again, you must reply with JSON, and only with JSON. You must respond with exactly one 'action' object.
+What is your next thought or action? Again, you must reply with JSON, and only with JSON.

+ 6 - 10
tests/integration/mock/MonologueAgent/test_write_simple_script/prompt_006.log

@@ -281,7 +281,7 @@ This is your internal monologue, in JSON format:
   {
   {
     "action": "think",
     "action": "think",
     "args": {
     "args": {
-      "thought": "It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself."
+      "thought": "It seems like there might be an existing project here. I should probably start by running `ls` to see what's here."
     }
     }
   },
   },
   {
   {
@@ -364,8 +364,8 @@ This is your internal monologue, in JSON format:
 
 
 
 
 Your most recent thought is at the bottom of that monologue. Continue your train of thought.
 Your most recent thought is at the bottom of that monologue. Continue your train of thought.
-What is your next single thought or action? Your response must be in JSON format.
-It must be a single object, and it must contain two fields:
+What is your next thought or action? Your response must be in JSON format.
+It must be an object, and it must contain two fields:
 * `action`, which is one of the actions below
 * `action`, which is one of the actions below
 * `args`, which is a map of key-value pairs, specifying the arguments for that action
 * `args`, which is a map of key-value pairs, specifying the arguments for that action
 
 
@@ -395,12 +395,8 @@ You should never act twice in a row without thinking. But if your last several
 actions are all "think" actions, you should consider taking a different action.
 actions are all "think" actions, you should consider taking a different action.
 
 
 Notes:
 Notes:
-* you are logged in as opendevin, but sudo will always work without a password.
-* all non-background commands will be forcibly stopped if they remain running for over 120 seconds.
-* your environment is Debian Linux. You can install software with `sudo apt-get`, but remember to use -y.
+* your environment is Debian Linux. You can install software with `apt`
+* your working directory will not change, even if you run `cd`. All commands will be run in the `/workspace` directory.
 * don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`)
 * don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`)
-* don't run interactive text editors (e.g. `nano` or 'vim'), instead use the 'write' or 'read' action.
-* don't run gui applications (e.g. software IDEs (like vs code or codium), web browsers (like firefox or chromium), or other complex software packages). Use non-interactive cli applications, or special actions instead.
-* whenever an action fails, always `think` about why it may have happened before acting again.
 
 
-What is your next single thought or action? Again, you must reply with JSON, and only with JSON. You must respond with exactly one 'action' object.
+What is your next thought or action? Again, you must reply with JSON, and only with JSON.

+ 13 - 15
tests/test_fileops.py

@@ -1,26 +1,24 @@
-from opendevin import config
-from opendevin.schema import ConfigType
-from opendevin.action import fileop
 from pathlib import Path
 from pathlib import Path
+
 import pytest
 import pytest
 
 
+from opendevin import config
+from opendevin.schema.config import ConfigType
+from opendevin.action import fileop
 
 
 
 
 def test_resolve_path():
 def test_resolve_path():
-    assert fileop.resolve_path('test.txt', '/workspace') == Path(config.get(ConfigType.WORKSPACE_BASE)) / 'test.txt'
-    assert fileop.resolve_path('subdir/test.txt', '/workspace') == \
+    assert fileop.resolve_path('test.txt') == Path(config.get(ConfigType.WORKSPACE_BASE)) / 'test.txt'
+    assert fileop.resolve_path('subdir/test.txt') == Path(config.get(ConfigType.WORKSPACE_BASE)) / 'subdir' / 'test.txt'
+    assert fileop.resolve_path(Path(fileop.SANDBOX_PATH_PREFIX) / 'test.txt') == \
+        Path(config.get(ConfigType.WORKSPACE_BASE)) / 'test.txt'
+    assert fileop.resolve_path(Path(fileop.SANDBOX_PATH_PREFIX) / 'subdir' / 'test.txt') == \
         Path(config.get(ConfigType.WORKSPACE_BASE)) / 'subdir' / 'test.txt'
         Path(config.get(ConfigType.WORKSPACE_BASE)) / 'subdir' / 'test.txt'
-    assert fileop.resolve_path(Path(fileop.SANDBOX_PATH_PREFIX) / 'test.txt', '/workspace') == \
+    assert fileop.resolve_path(Path(fileop.SANDBOX_PATH_PREFIX) / 'subdir' / '..' / 'test.txt') == \
         Path(config.get(ConfigType.WORKSPACE_BASE)) / 'test.txt'
         Path(config.get(ConfigType.WORKSPACE_BASE)) / 'test.txt'
-    assert fileop.resolve_path(Path(fileop.SANDBOX_PATH_PREFIX) / 'subdir' / 'test.txt',
-                               '/workspace') == Path(config.get(ConfigType.WORKSPACE_BASE)) / 'subdir' / 'test.txt'
-    assert fileop.resolve_path(Path(fileop.SANDBOX_PATH_PREFIX) / 'subdir' / '..' / 'test.txt',
-                               '/workspace') == Path(config.get(ConfigType.WORKSPACE_BASE)) / 'test.txt'
     with pytest.raises(PermissionError):
     with pytest.raises(PermissionError):
-        fileop.resolve_path(Path(fileop.SANDBOX_PATH_PREFIX) / '..' / 'test.txt', '/workspace')
+        fileop.resolve_path(Path(fileop.SANDBOX_PATH_PREFIX) / '..' / 'test.txt')
     with pytest.raises(PermissionError):
     with pytest.raises(PermissionError):
-        fileop.resolve_path(Path('..') / 'test.txt', '/workspace')
+        fileop.resolve_path(Path('..') / 'test.txt')
     with pytest.raises(PermissionError):
     with pytest.raises(PermissionError):
-        fileop.resolve_path(Path('/') / 'test.txt', '/workspace')
-    assert fileop.resolve_path('test.txt', '/workspace/test') == \
-        Path(config.get(ConfigType.WORKSPACE_BASE)) / 'test' / 'test.txt'
+        fileop.resolve_path(Path('/') / 'test.txt')