1 rok pred · 8bfae8413e
--- a/.github/workflows/run-integration-tests.yml
+++ b/.github/workflows/run-integration-tests.yml
@@ -57,6 +57,7 @@ jobs:
 
				           AGENT: ${{ matrix.agent }}
			
 
				           MAX_ITERATIONS: 10
			
 
				           LLM_EMBEDDING_MODEL: ${{ matrix.embedding-model }}
			
 
				+          REMIND_ITERATIONS: ${{ (matrix.agent == 'CodeActAgent') && 'true' || 'false' }}
			
 
				         run: |
			
 
				           rm -rf workspace
			
 
				           mkdir workspace
			
--- a/opendevin/controller/agent_controller.py
+++ b/opendevin/controller/agent_controller.py
@@ -33,7 +33,7 @@ from opendevin.events.observation import (
 
				     Observation,
			
 
				 )
			
 
				 from opendevin.events.stream import EventSource, EventStream, EventStreamSubscriber
			
 
				-from opendevin.runtime import DockerSSHBox
			
 
				+from opendevin.runtime import DockerSSHBox, Sandbox
			
 
				 from opendevin.runtime.runtime import Runtime
			
 
				 from opendevin.runtime.server.runtime import ServerRuntime
			
 
				 
			
@@ -60,6 +60,8 @@ class AgentController:
 
				         sid: str = 'default',
			
 
				         max_iterations: int = MAX_ITERATIONS,
			
 
				         max_chars: int = MAX_CHARS,
			
 
				+        sandbox: Optional[Sandbox] = None,
			
 
				+        remind_iterations: bool = config.remind_iterations,
			
 
				     ):
			
 
				         """Initializes a new instance of the AgentController class.
			
 
				 
			
@@ -68,6 +70,8 @@ class AgentController:
 
				             sid: The session ID of the agent.
			
 
				             max_iterations: The maximum number of iterations the agent can run.
			
 
				             max_chars: The maximum number of characters the agent can output.
			
 
				+            sandbox: An optional initialized sandbox to run the agent in. If not provided, a default sandbox will be created based on config.
			
 
				+            remind_iterations: A boolean value indicating whether to remind the agent its remaining budget of interaction.
			
 
				         """
			
 
				         self.id = sid
			
 
				         self.agent = agent
			
@@ -76,8 +80,15 @@ class AgentController:
 
				             EventStreamSubscriber.AGENT_CONTROLLER, self.on_event
			
 
				         )
			
 
				         self.max_iterations = max_iterations
			
 
				-        self.runtime = ServerRuntime(self.id)
			
 
				+
			
 
				+        self.remind_iterations = remind_iterations
			
 
				+        if self.remind_iterations:
			
 
				+            logger.info(
			
 
				+                'Iteration reminder is ENABLED: agent will be reminded of remaining turns.'
			
 
				+            )
			
 
				+        self.runtime = ServerRuntime(sandbox=sandbox, sid=self.id)
			
 
				         self.max_chars = max_chars
			
 
				+
			
 
				         # Initialize agent-required plugins for sandbox (if any)
			
 
				         self.runtime.init_sandbox_plugins(agent.sandbox_plugins)
			
 
				 
			
@@ -187,7 +198,9 @@ class AgentController:
 
				         self.agent.reset()
			
 
				 
			
 
				     async def set_agent_state_to(self, new_state: AgentState):
			
 
				-        logger.info(f'Setting agent({type(self.agent).__name__}) state from {self._agent_state} to {new_state}')
			
 
				+        logger.info(
			
 
				+            f'Setting agent({type(self.agent).__name__}) state from {self._agent_state} to {new_state}'
			
 
				+        )
			
 
				         if new_state == self._agent_state:
			
 
				             return
			
 
				 
			
@@ -201,7 +214,11 @@ class AgentController:
 
				             self._cur_step += 1
			
 
				             if self.agent_task is not None:
			
 
				                 self.agent_task.cancel()
			
 
				-        elif new_state == AgentState.STOPPED or new_state == AgentState.ERROR or new_state == AgentState.FINISHED:
			
 
				+        elif (
			
 
				+            new_state == AgentState.STOPPED
			
 
				+            or new_state == AgentState.ERROR
			
 
				+            or new_state == AgentState.FINISHED
			
 
				+        ):
			
 
				             await self.reset_task()
			
 
				 
			
 
				         await self.event_stream.add_event(
			
@@ -225,6 +242,17 @@ class AgentController:
 
				         task = action.inputs.get('task') or ''
			
 
				         await self.delegate.setup_task(task, action.inputs)
			
 
				 
			
 
				+    def add_iteration_reminder_when_needed(self, i: int, obs: Observation):
			
 
				+        """Add iteration reminder to the observation if needed.
			
 
				+
			
 
				+        Args:
			
 
				+            i: The current iteration number (0-indexed).
			
 
				+            obs: The observation to add the reminder to.
			
 
				+        """
			
 
				+        if self.remind_iterations:
			
 
				+            obs.content += f'\n\nENVIRONMENT REMINDER: You have {self.max_iterations - i - 1} turns left to complete the task.'
			
 
				+        return obs
			
 
				+
			
 
				     async def step(self, i: int) -> bool:
			
 
				         if self.state is None:
			
 
				             raise ValueError('No task to run')
			
@@ -265,6 +293,7 @@ class AgentController:
 
				         if isinstance(action, AgentFinishAction):
			
 
				             self.state.outputs = action.outputs  # type: ignore[attr-defined]
			
 
				             logger.info(action, extra={'msg_type': 'INFO'})
			
 
				+            await self.add_history(action, NullObservation(''))
			
 
				             return True
			
 
				         elif isinstance(action, MessageAction) and action.wait_for_response:
			
 
				             # FIXME: remove this once history is managed outside the agent controller
			
@@ -280,6 +309,7 @@ class AgentController:
 
				         elif not isinstance(observation, ErrorObservation):
			
 
				             observation = await self.runtime.run_action(action)
			
 
				 
			
 
				+        observation = self.add_iteration_reminder_when_needed(i, observation)
			
 
				         if not isinstance(observation, NullObservation):
			
 
				             logger.info(observation, extra={'msg_type': 'OBSERVATION'})
			
 
				         await self.add_history(action, observation)
			
--- a/opendevin/core/config.py
+++ b/opendevin/core/config.py
@@ -82,6 +82,7 @@ class AppConfig(metaclass=Singleton):
 
				     )
			
 
				     run_as_devin: bool = True
			
 
				     max_iterations: int = 100
			
 
				+    remind_iterations: bool = False
			
 
				     e2b_api_key: str = ''
			
 
				     sandbox_type: str = 'ssh'  # Can be 'ssh', 'exec', or 'e2b'
			
 
				     use_host_network: bool = False
			
--- a/opendevin/core/schema/config.py
+++ b/opendevin/core/schema/config.py
@@ -31,6 +31,7 @@ class ConfigType(str, Enum):
 
				     AGENT_MEMORY_MAX_THREADS = 'AGENT_MEMORY_MAX_THREADS'
			
 
				     AGENT_MEMORY_ENABLED = 'AGENT_MEMORY_ENABLED'
			
 
				     MAX_ITERATIONS = 'MAX_ITERATIONS'
			
 
				+    REMIND_ITERATIONS = 'REMIND_ITERATIONS'
			
 
				     MAX_CHARS = 'MAX_CHARS'
			
 
				     AGENT = 'AGENT'
			
 
				     E2B_API_KEY = 'E2B_API_KEY'
			
--- a/opendevin/runtime/runtime.py
+++ b/opendevin/runtime/runtime.py
@@ -51,14 +51,17 @@ class Runtime:
 
				     """
			
 
				 
			
 
				     sid: str
			
 
				-    sandbox: Sandbox
			
 
				 
			
 
				     def __init__(
			
 
				         self,
			
 
				+        sandbox: Sandbox | None = None,
			
 
				         sid: str = 'default',
			
 
				     ):
			
 
				         self.sid = sid
			
 
				-        self.sandbox = create_sandbox(sid, config.sandbox_type)
			
 
				+        if sandbox is None:
			
 
				+            self.sandbox = create_sandbox(sid, config.sandbox_type)
			
 
				+        else:
			
 
				+            self.sandbox = sandbox
			
 
				         self.browser = BrowserEnv()
			
 
				 
			
 
				     def init_sandbox_plugins(self, plugins: list[PluginRequirement]) -> None:
			
--- a/tests/integration/README.md
+++ b/tests/integration/README.md
@@ -20,11 +20,10 @@ not possible with benchmarks.
 
				 
			
 
				 Known limitations:
			
 
				 1. To avoid the potential impact of non-determinism, we remove all special
			
 
				-characters and numbers (often used as PIDs) when doing the comparison. If two
			
 
				-prompts for the same task only differ in non-alpha characters, a wrong mock
			
 
				-response might be picked up.
			
 
				-2. It is required that the agent itself doesn't do anything non-deterministic,
			
 
				-including but not limited to using randomly generated numbers.
			
 
				+characters when doing the comparison. If two prompts for the same task only
			
 
				+differ in non-alphanumeric characters, a wrong mock response might be picked up.
			
 
				+2. It is required that everything has to be deterministic. For example, agent
			
 
				+must not use randomly generated numbers.
			
 
				 
			
 
				 The folder is organised as follows:
			
 
				 
			
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -11,7 +11,7 @@ workspace_path = os.getenv('WORKSPACE_BASE')
 
				 
			
 
				 
			
 
				 def filter_out_symbols(input):
			
 
				-    return ' '.join([char for char in input if char.isalpha()])
			
 
				+    return ' '.join([char for char in input if char.isalnum()])
			
 
				 
			
 
				 
			
 
				 def get_log_id(prompt_log_name):
			
@@ -100,7 +100,7 @@ def patch_completion(monkeypatch, request):
 
				         monkeypatch.setattr('sys.stdin', user_responses)
			
 
				 
			
 
				 
			
 
				-def clean_up():
			
 
				+def set_up():
			
 
				     assert workspace_path is not None
			
 
				     if os.path.exists(workspace_path):
			
 
				         for file in os.listdir(workspace_path):
			
@@ -109,7 +109,7 @@ def clean_up():
 
				 
			
 
				 @pytest.fixture(autouse=True)
			
 
				 def resource_setup():
			
 
				-    clean_up()
			
 
				+    set_up()
			
 
				     if not os.path.exists(workspace_path):
			
 
				         os.makedirs(workspace_path)
			
 
				     # Yield to test execution
			
--- a/tests/integration/mock/CodeActAgent/test_edits/prompt_002.log
+++ b/tests/integration/mock/CodeActAgent/test_edits/prompt_002.log
@@ -229,9 +229,10 @@ open bad.txt
 
				 ----------
			
 
				 
			
 
				 OBSERVATION:
			
 
				-[File: /workspace/bad.txt (4 lines total)]

			
 
				-1:This is a stupid typoo.

			
 
				-2:Really?

			
 
				-3:No mor typos!

			
 
				+[File: /workspace/bad.txt (4 lines total)]
			
 
				+1:This is a stupid typoo.
			
 
				+2:Really?
			
 
				+3:No mor typos!
			
 
				 4:Enjoy!
			
 
				+ENVIRONMENT REMINDER: You have 9 turns left to complete the task.
			
 
				 [Command -1 finished with exit code 0]]
			
--- a/tests/integration/mock/CodeActAgent/test_edits/prompt_003.log
+++ b/tests/integration/mock/CodeActAgent/test_edits/prompt_003.log
@@ -229,11 +229,12 @@ open bad.txt
 
				 ----------
			
 
				 
			
 
				 OBSERVATION:
			
 
				-[File: /workspace/bad.txt (4 lines total)]

			
 
				-1:This is a stupid typoo.

			
 
				-2:Really?

			
 
				-3:No mor typos!

			
 
				+[File: /workspace/bad.txt (4 lines total)]
			
 
				+1:This is a stupid typoo.
			
 
				+2:Really?
			
 
				+3:No mor typos!
			
 
				 4:Enjoy!
			
 
				+ENVIRONMENT REMINDER: You have 9 turns left to complete the task.
			
 
				 [Command -1 finished with exit code 0]]
			
 
				 
			
 
				 ----------
			
@@ -257,22 +258,23 @@ end_of_edit
 
				 ----------
			
 
				 
			
 
				 OBSERVATION:
			
 
				-> This is a stupid typo.

			
 
				-> EOF

			
 
				-[File: /workspace/bad.txt (4 lines total)]

			
 
				-1:This is a stupid typo.

			
 
				-2:Really?

			
 
				-3:No mor typos!

			
 
				-4:Enjoy!

			
 
				-File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.edit 3:3 <<EOF

			
 
				-> No more typos!

			
 
				-> EOF

			
 
				-

			
 
				-

			
 
				-[File: /workspace/bad.txt (4 lines total)]

			
 
				-1:This is a stupid typo.

			
 
				-2:Really?

			
 
				-3:No more typos!

			
 
				-4:Enjoy!

			
 
				+> This is a stupid typo.
			
 
				+> EOF
			
 
				+[File: /workspace/bad.txt (4 lines total)]
			
 
				+1:This is a stupid typo.
			
 
				+2:Really?
			
 
				+3:No mor typos!
			
 
				+4:Enjoy!
			
 
				+File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.edit 3:3 <<EOF
			
 
				+> No more typos!
			
 
				+> EOF
			
 
				+
			
 
				+
			
 
				+[File: /workspace/bad.txt (4 lines total)]
			
 
				+1:This is a stupid typo.
			
 
				+2:Really?
			
 
				+3:No more typos!
			
 
				+4:Enjoy!
			
 
				 File updated. Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.
			
 
				+ENVIRONMENT REMINDER: You have 8 turns left to complete the task.
			
 
				 [Command -1 finished with exit code 0]]
			
--- a/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_002.log
+++ b/tests/integration/mock/CodeActAgent/test_write_simple_script/prompt_002.log
@@ -230,4 +230,5 @@ echo "echo hello" > hello.sh
 
				 
			
 
				 OBSERVATION:
			
 
				 
			
 
				+ENVIRONMENT REMINDER: You have 9 turns left to complete the task.
			
 
				 [Command -1 finished with exit code 0]]
			
--- a/tests/integration/regenerate.sh
+++ b/tests/integration/regenerate.sh
@@ -4,20 +4,26 @@ set -eo pipefail
 
				 WORKSPACE_MOUNT_PATH=$(pwd)/_test_workspace
			
 
				 WORKSPACE_BASE=$(pwd)/_test_workspace
			
 
				 SANDBOX_TYPE="ssh"
			
 
				+MAX_ITERATIONS=10
			
 
				 
			
 
				 # FIXME: SWEAgent hangs, so it goes last
			
 
				 agents=("MonologueAgent" "CodeActAgent" "PlannerAgent" "SWEAgent")
			
 
				+# only enable iteration reminder for CodeActAgent in tests
			
 
				+remind_iterations_config=(false true false false)
			
 
				 tasks=("Fix typos in bad.txt." "Write a shell script 'hello.sh' that prints 'hello'.")
			
 
				 test_names=("test_edits" "test_write_simple_script")
			
 
				 
			
 
				 num_of_tests=${#tasks[@]}
			
 
				+num_of_agents=${#agents[@]}
			
 
				 
			
 
				 rm -rf logs
			
 
				 rm -rf _test_workspace
			
 
				 for ((i = 0; i < num_of_tests; i++)); do
			
 
				   task=${tasks[i]}
			
 
				   test_name=${test_names[i]}
			
 
				-  for agent in "${agents[@]}"; do
			
 
				+  for ((j = 0; j < num_of_agents; j++)); do
			
 
				+    agent=${agents[j]}
			
 
				+    remind_iterations=${remind_iterations_config[j]}
			
 
				     echo -e "\n\n\n\n========Running $test_name for $agent========\n\n\n\n"
			
 
				     rm -rf $WORKSPACE_BASE
			
 
				     mkdir $WORKSPACE_BASE
			
@@ -28,7 +34,8 @@ for ((i = 0; i < num_of_tests; i++)); do
 
				       # Temporarily disable 'exit on error'
			
 
				       set +e
			
 
				     fi
			
 
				-    SANDBOX_TYPE=$SANDBOX_TYPE WORKSPACE_BASE=$WORKSPACE_BASE MAX_ITERATIONS=10 \
			
 
				+    SANDBOX_TYPE=$SANDBOX_TYPE WORKSPACE_BASE=$WORKSPACE_BASE \
			
 
				+      MAX_ITERATIONS=$MAX_ITERATIONS REMIND_ITERATIONS=$remind_iterations \
			
 
				       WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH AGENT=$agent \
			
 
				       poetry run pytest -s ./tests/integration/test_agent.py::$test_name
			
 
				     TEST_STATUS=$?
			
@@ -43,10 +50,10 @@ for ((i = 0; i < num_of_tests; i++)); do
 
				       rm -rf logs
			
 
				       rm -rf tests/integration/mock/$agent/$test_name/*
			
 
				       echo -e "/exit\n" | SANDBOX_TYPE=$SANDBOX_TYPE WORKSPACE_BASE=$WORKSPACE_BASE \
			
 
				-        DEBUG=true \
			
 
				+        DEBUG=true REMIND_ITERATIONS=$remind_iterations \
			
 
				         WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH AGENT=$agent \
			
 
				         poetry run python ./opendevin/core/main.py \
			
 
				-        -i 10 \
			
 
				+        -i $MAX_ITERATIONS \
			
 
				         -t "$task Do not ask me for confirmation at any point." \
			
 
				         -c $agent
			
 
				 
			
--- a/tests/integration/test_agent.py
+++ b/tests/integration/test_agent.py
@@ -54,7 +54,6 @@ def test_edits():
 
				         dest_file = os.path.join(workspace_base, file)
			
 
				         if os.path.exists(dest_file):
			
 
				             os.remove(dest_file)
			
 
				-        print('source = ', os.path.join(source_dir, file), ' dest = ', dest_file)
			
 
				         shutil.copy(os.path.join(source_dir, file), dest_file)
			
 
				 
			
 
				     # Execute the task