Просмотр исходного кода

Support return final task states for evaluation (#1755)

* support returning states at the end of controller

* remove return None

* fix issue of overriding final state

* return the final state on close

* merge AgentState with State

* fix integration test

* add ChangeAgentStateAction to history in attempt to fix integration

* add back set agent state

* update tests

* update tests

* directly return get state

* add back the missing .close()

* Update typo in opendevin/core/main.py

---------

Co-authored-by: Boxuan Li <liboxuan@connect.hku.hk>
Xingyao Wang 1 год назад
Родитель
Сommit
d1fd277ad4

+ 5 - 6
opendevin/controller/agent_controller.py

@@ -45,7 +45,6 @@ class AgentController:
     state: State
     agent_task: Optional[asyncio.Task] = None
     delegate: 'AgentController | None' = None
-    _agent_state: AgentState = AgentState.LOADING
     _pending_action: Action | None = None
 
     def __init__(
@@ -147,22 +146,22 @@ class AgentController:
 
     async def set_agent_state_to(self, new_state: AgentState):
         logger.info(
-            f'Setting agent({type(self.agent).__name__}) state from {self._agent_state} to {new_state}'
+            f'Setting agent({type(self.agent).__name__}) state from {self.state.agent_state} to {new_state}'
         )
-        if new_state == self._agent_state:
+        if new_state == self.state.agent_state:
             return
 
-        self._agent_state = new_state
+        self.state.agent_state = new_state
         if new_state == AgentState.STOPPED or new_state == AgentState.ERROR:
             self.reset_task()
 
         await self.event_stream.add_event(
-            AgentStateChangedObservation('', self._agent_state), EventSource.AGENT
+            AgentStateChangedObservation('', self.state.agent_state), EventSource.AGENT
         )
 
     def get_agent_state(self):
         """Returns the current state of the agent task."""
-        return self._agent_state
+        return self.state.agent_state
 
     async def start_delegate(self, action: AgentDelegateAction):
         AgentCls: Type[Agent] = Agent.get_cls(action.agent)

+ 3 - 0
opendevin/controller/state/state.py

@@ -1,6 +1,7 @@
 from dataclasses import dataclass, field
 
 from opendevin.controller.state.task import RootTask
+from opendevin.core.schema import AgentState
 from opendevin.events.action import (
     Action,
     MessageAction,
@@ -23,6 +24,8 @@ class State:
     updated_info: list[tuple[Action, Observation]] = field(default_factory=list)
     inputs: dict = field(default_factory=dict)
     outputs: dict = field(default_factory=dict)
+    error: str | None = None
+    agent_state: AgentState = AgentState.LOADING
 
     def get_current_user_intent(self):
         # TODO: this is used to understand the user's main goal, but it's possible

+ 17 - 14
opendevin/core/main.py

@@ -1,10 +1,11 @@
 import asyncio
 import sys
-from typing import Type
+from typing import Callable, Optional, Type
 
 import agenthub  # noqa F401 (we import this to get the agents registered)
 from opendevin.controller import AgentController
 from opendevin.controller.agent import Agent
+from opendevin.controller.state.state import State
 from opendevin.core.config import args, get_llm_config_arg
 from opendevin.core.schema import AgentState
 from opendevin.events.action import ChangeAgentStateAction, MessageAction
@@ -26,17 +27,18 @@ def read_task_from_stdin() -> str:
     return sys.stdin.read()
 
 
-async def main(task_str: str = '', exit_on_message: bool = False) -> AgentState:
-    """
-    Main coroutine to run the agent controller with task input flexibility.
+async def main(
+    task_str: str = '',
+    exit_on_message: bool = False,
+    fake_user_response_fn: Optional[Callable[[Optional[State]], str]] = None,
+) -> Optional[State]:
+    """Main coroutine to run the agent controller with task input flexibility.
     It's only used when you launch opendevin backend directly via cmdline.
 
     Args:
-        task_str: task string (optional)
+        task_str: The task to run.
         exit_on_message: quit if agent asks for a message from user (optional)
-
-    Returns:
-        The final agent state right before shutdown
+        fake_user_response_fn: An optional function that receives the current state (could be None) and returns a fake user response.
     """
 
     # Determine the task source
@@ -94,10 +96,13 @@ async def main(task_str: str = '', exit_on_message: bool = False) -> AgentState:
     async def on_event(event: Event):
         if isinstance(event, AgentStateChangedObservation):
             if event.agent_state == AgentState.AWAITING_USER_INPUT:
-                action = MessageAction(content='/exit')
-                if not exit_on_message:
+                if exit_on_message:
+                    message = '/exit'
+                elif fake_user_response_fn is None:
                     message = input('Request user input >> ')
-                    action = MessageAction(content=message)
+                else:
+                    message = fake_user_response_fn(controller.get_state())
+                action = MessageAction(content=message)
                 await event_stream.add_event(action, EventSource.USER)
 
     event_stream.subscribe(EventStreamSubscriber.MAIN, on_event)
@@ -109,10 +114,8 @@ async def main(task_str: str = '', exit_on_message: bool = False) -> AgentState:
     ]:
         await asyncio.sleep(1)  # Give back control for a tick, so the agent can run
 
-    # retrieve the final state before we close the controller and agent
-    final_agent_state = controller.get_agent_state()
     await controller.close()
-    return final_agent_state
+    return controller.get_state()
 
 
 if __name__ == '__main__':

+ 7 - 6
tests/integration/test_agent.py

@@ -5,6 +5,7 @@ import subprocess
 
 import pytest
 
+from opendevin.controller.state.state import State
 from opendevin.core.main import main
 from opendevin.core.schema import AgentState
 
@@ -18,8 +19,8 @@ workspace_base = os.getenv('WORKSPACE_BASE')
 )
 def test_write_simple_script():
     task = "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point."
-    final_agent_state = asyncio.run(main(task, exit_on_message=True))
-    assert final_agent_state == AgentState.FINISHED
+    final_state: State = asyncio.run(main(task, exit_on_message=True))
+    assert final_state.agent_state == AgentState.STOPPED
 
     # Verify the script file exists
     script_path = os.path.join(workspace_base, 'hello.sh')
@@ -59,8 +60,8 @@ def test_edits():
 
     # Execute the task
     task = 'Fix typos in bad.txt. Do not ask me for confirmation at any point.'
-    final_agent_state = asyncio.run(main(task, exit_on_message=True))
-    assert final_agent_state == AgentState.FINISHED
+    final_state: State = asyncio.run(main(task, exit_on_message=True))
+    assert final_state.agent_state == AgentState.STOPPED
 
     # Verify bad.txt has been fixed
     text = """This is a stupid typo.
@@ -84,8 +85,8 @@ Enjoy!
 def test_ipython():
     # Execute the task
     task = "Use Jupyter IPython to write a text file containing 'hello world' to '/workspace/test.txt'. Do not ask me for confirmation at any point."
-    final_agent_state = asyncio.run(main(task, exit_on_message=True))
-    assert final_agent_state == AgentState.FINISHED
+    final_state: State = asyncio.run(main(task, exit_on_message=True))
+    assert final_state.agent_state == AgentState.STOPPED
 
     # Verify the file exists
     file_path = os.path.join(workspace_base, 'test.txt')