Jelajahi Sumber

Support return final task states for evaluation (#1755)

* support returning states at the end of controller

* remove return None

* fix issue of overriding final state

* return the final state on close

* merge AgentState with State

* fix integration test

* add ChangeAgentStateAction to history in attempt to fix integration

* add back set agent state

* update tests

* update tests

* directly return get state

* add back the missing .close()

* Update typo in opendevin/core/main.py

---------

Co-authored-by: Boxuan Li <liboxuan@connect.hku.hk>
Xingyao Wang 1 tahun lalu
induk
melakukan
d1fd277ad4

+ 5 - 6
opendevin/controller/agent_controller.py

@@ -45,7 +45,6 @@ class AgentController:
     state: State
     state: State
     agent_task: Optional[asyncio.Task] = None
     agent_task: Optional[asyncio.Task] = None
     delegate: 'AgentController | None' = None
     delegate: 'AgentController | None' = None
-    _agent_state: AgentState = AgentState.LOADING
     _pending_action: Action | None = None
     _pending_action: Action | None = None
 
 
     def __init__(
     def __init__(
@@ -147,22 +146,22 @@ class AgentController:
 
 
     async def set_agent_state_to(self, new_state: AgentState):
     async def set_agent_state_to(self, new_state: AgentState):
         logger.info(
         logger.info(
-            f'Setting agent({type(self.agent).__name__}) state from {self._agent_state} to {new_state}'
+            f'Setting agent({type(self.agent).__name__}) state from {self.state.agent_state} to {new_state}'
         )
         )
-        if new_state == self._agent_state:
+        if new_state == self.state.agent_state:
             return
             return
 
 
-        self._agent_state = new_state
+        self.state.agent_state = new_state
         if new_state == AgentState.STOPPED or new_state == AgentState.ERROR:
         if new_state == AgentState.STOPPED or new_state == AgentState.ERROR:
             self.reset_task()
             self.reset_task()
 
 
         await self.event_stream.add_event(
         await self.event_stream.add_event(
-            AgentStateChangedObservation('', self._agent_state), EventSource.AGENT
+            AgentStateChangedObservation('', self.state.agent_state), EventSource.AGENT
         )
         )
 
 
     def get_agent_state(self):
     def get_agent_state(self):
         """Returns the current state of the agent task."""
         """Returns the current state of the agent task."""
-        return self._agent_state
+        return self.state.agent_state
 
 
     async def start_delegate(self, action: AgentDelegateAction):
     async def start_delegate(self, action: AgentDelegateAction):
         AgentCls: Type[Agent] = Agent.get_cls(action.agent)
         AgentCls: Type[Agent] = Agent.get_cls(action.agent)

+ 3 - 0
opendevin/controller/state/state.py

@@ -1,6 +1,7 @@
 from dataclasses import dataclass, field
 from dataclasses import dataclass, field
 
 
 from opendevin.controller.state.task import RootTask
 from opendevin.controller.state.task import RootTask
+from opendevin.core.schema import AgentState
 from opendevin.events.action import (
 from opendevin.events.action import (
     Action,
     Action,
     MessageAction,
     MessageAction,
@@ -23,6 +24,8 @@ class State:
     updated_info: list[tuple[Action, Observation]] = field(default_factory=list)
     updated_info: list[tuple[Action, Observation]] = field(default_factory=list)
     inputs: dict = field(default_factory=dict)
     inputs: dict = field(default_factory=dict)
     outputs: dict = field(default_factory=dict)
     outputs: dict = field(default_factory=dict)
+    error: str | None = None
+    agent_state: AgentState = AgentState.LOADING
 
 
     def get_current_user_intent(self):
     def get_current_user_intent(self):
         # TODO: this is used to understand the user's main goal, but it's possible
         # TODO: this is used to understand the user's main goal, but it's possible

+ 17 - 14
opendevin/core/main.py

@@ -1,10 +1,11 @@
 import asyncio
 import asyncio
 import sys
 import sys
-from typing import Type
+from typing import Callable, Optional, Type
 
 
 import agenthub  # noqa F401 (we import this to get the agents registered)
 import agenthub  # noqa F401 (we import this to get the agents registered)
 from opendevin.controller import AgentController
 from opendevin.controller import AgentController
 from opendevin.controller.agent import Agent
 from opendevin.controller.agent import Agent
+from opendevin.controller.state.state import State
 from opendevin.core.config import args, get_llm_config_arg
 from opendevin.core.config import args, get_llm_config_arg
 from opendevin.core.schema import AgentState
 from opendevin.core.schema import AgentState
 from opendevin.events.action import ChangeAgentStateAction, MessageAction
 from opendevin.events.action import ChangeAgentStateAction, MessageAction
@@ -26,17 +27,18 @@ def read_task_from_stdin() -> str:
     return sys.stdin.read()
     return sys.stdin.read()
 
 
 
 
-async def main(task_str: str = '', exit_on_message: bool = False) -> AgentState:
-    """
-    Main coroutine to run the agent controller with task input flexibility.
+async def main(
+    task_str: str = '',
+    exit_on_message: bool = False,
+    fake_user_response_fn: Optional[Callable[[Optional[State]], str]] = None,
+) -> Optional[State]:
+    """Main coroutine to run the agent controller with task input flexibility.
     It's only used when you launch opendevin backend directly via cmdline.
     It's only used when you launch opendevin backend directly via cmdline.
 
 
     Args:
     Args:
-        task_str: task string (optional)
+        task_str: The task to run.
         exit_on_message: quit if agent asks for a message from user (optional)
         exit_on_message: quit if agent asks for a message from user (optional)
-
-    Returns:
-        The final agent state right before shutdown
+        fake_user_response_fn: An optional function that receives the current state (could be None) and returns a fake user response.
     """
     """
 
 
     # Determine the task source
     # Determine the task source
@@ -94,10 +96,13 @@ async def main(task_str: str = '', exit_on_message: bool = False) -> AgentState:
     async def on_event(event: Event):
     async def on_event(event: Event):
         if isinstance(event, AgentStateChangedObservation):
         if isinstance(event, AgentStateChangedObservation):
             if event.agent_state == AgentState.AWAITING_USER_INPUT:
             if event.agent_state == AgentState.AWAITING_USER_INPUT:
-                action = MessageAction(content='/exit')
-                if not exit_on_message:
+                if exit_on_message:
+                    message = '/exit'
+                elif fake_user_response_fn is None:
                     message = input('Request user input >> ')
                     message = input('Request user input >> ')
-                    action = MessageAction(content=message)
+                else:
+                    message = fake_user_response_fn(controller.get_state())
+                action = MessageAction(content=message)
                 await event_stream.add_event(action, EventSource.USER)
                 await event_stream.add_event(action, EventSource.USER)
 
 
     event_stream.subscribe(EventStreamSubscriber.MAIN, on_event)
     event_stream.subscribe(EventStreamSubscriber.MAIN, on_event)
@@ -109,10 +114,8 @@ async def main(task_str: str = '', exit_on_message: bool = False) -> AgentState:
     ]:
     ]:
         await asyncio.sleep(1)  # Give back control for a tick, so the agent can run
         await asyncio.sleep(1)  # Give back control for a tick, so the agent can run
 
 
-    # retrieve the final state before we close the controller and agent
-    final_agent_state = controller.get_agent_state()
     await controller.close()
     await controller.close()
-    return final_agent_state
+    return controller.get_state()
 
 
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':

+ 7 - 6
tests/integration/test_agent.py

@@ -5,6 +5,7 @@ import subprocess
 
 
 import pytest
 import pytest
 
 
+from opendevin.controller.state.state import State
 from opendevin.core.main import main
 from opendevin.core.main import main
 from opendevin.core.schema import AgentState
 from opendevin.core.schema import AgentState
 
 
@@ -18,8 +19,8 @@ workspace_base = os.getenv('WORKSPACE_BASE')
 )
 )
 def test_write_simple_script():
 def test_write_simple_script():
     task = "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point."
     task = "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point."
-    final_agent_state = asyncio.run(main(task, exit_on_message=True))
-    assert final_agent_state == AgentState.FINISHED
+    final_state: State = asyncio.run(main(task, exit_on_message=True))
+    assert final_state.agent_state == AgentState.STOPPED
 
 
     # Verify the script file exists
     # Verify the script file exists
     script_path = os.path.join(workspace_base, 'hello.sh')
     script_path = os.path.join(workspace_base, 'hello.sh')
@@ -59,8 +60,8 @@ def test_edits():
 
 
     # Execute the task
     # Execute the task
     task = 'Fix typos in bad.txt. Do not ask me for confirmation at any point.'
     task = 'Fix typos in bad.txt. Do not ask me for confirmation at any point.'
-    final_agent_state = asyncio.run(main(task, exit_on_message=True))
-    assert final_agent_state == AgentState.FINISHED
+    final_state: State = asyncio.run(main(task, exit_on_message=True))
+    assert final_state.agent_state == AgentState.STOPPED
 
 
     # Verify bad.txt has been fixed
     # Verify bad.txt has been fixed
     text = """This is a stupid typo.
     text = """This is a stupid typo.
@@ -84,8 +85,8 @@ Enjoy!
 def test_ipython():
 def test_ipython():
     # Execute the task
     # Execute the task
     task = "Use Jupyter IPython to write a text file containing 'hello world' to '/workspace/test.txt'. Do not ask me for confirmation at any point."
     task = "Use Jupyter IPython to write a text file containing 'hello world' to '/workspace/test.txt'. Do not ask me for confirmation at any point."
-    final_agent_state = asyncio.run(main(task, exit_on_message=True))
-    assert final_agent_state == AgentState.FINISHED
+    final_state: State = asyncio.run(main(task, exit_on_message=True))
+    assert final_state.agent_state == AgentState.STOPPED
 
 
     # Verify the file exists
     # Verify the file exists
     file_path = os.path.join(workspace_base, 'test.txt')
     file_path = os.path.join(workspace_base, 'test.txt')