소스 검색

Refactor MonologueAgent, PlannerAgent add response parser (#2400)

* refactor monologue

* refactor planner_agent

* fix bug

* add back code

* add back code
Yufan Song 1 년 전
부모
커밋
0c92144220

+ 6 - 5
agenthub/monologue_agent/agent.py

@@ -1,4 +1,5 @@
 import agenthub.monologue_agent.utils.prompts as prompts
+from agenthub.monologue_agent.response_parser import MonologueResponseParser
 from agenthub.monologue_agent.utils.prompts import INITIAL_THOUGHTS
 from opendevin.controller.agent import Agent
 from opendevin.controller.state.state import State
@@ -48,6 +49,7 @@ class MonologueAgent(Agent):
     memory: 'LongTermMemory | None'
     memory_condenser: MemoryCondenser
     runtime_tools: list[RuntimeTool] = [RuntimeTool.BROWSER]
+    response_parser = MonologueResponseParser()
 
     def __init__(self, llm: LLM):
         """
@@ -181,13 +183,12 @@ class MonologueAgent(Agent):
         # format all as a single message, a monologue
         resp = self.llm.do_completion(messages=messages)
 
-        # get the next action from the response
-        action_resp = resp['choices'][0]['message']['content']
-
         # keep track of max_chars fallback option
-        state.num_of_chars += len(prompt) + len(action_resp)
+        state.num_of_chars += len(prompt) + len(
+            resp['choices'][0]['message']['content']
+        )
 
-        action = prompts.parse_action_response(action_resp)
+        action = self.response_parser.parse(resp)
         self.latest_action = action
         return action
 

+ 40 - 0
agenthub/monologue_agent/response_parser.py

@@ -0,0 +1,40 @@
+from opendevin.controller.action_parser import ResponseParser
+from opendevin.core.utils import json
+from opendevin.events.action import (
+    Action,
+)
+from opendevin.events.serialization.action import action_from_dict
+
+
+class MonologueResponseParser(ResponseParser):
+    def __init__(
+        self,
+    ):
+        pass
+
+    def parse(self, response: str) -> Action:
+        action_str = self.parse_response(response)
+        return self.parse_action(action_str)
+
+    def parse_response(self, response) -> str:
+        # get the next action from the response
+        return response['choices'][0]['message']['content']
+
+    def parse_action(self, action_str: str) -> Action:
+        """
+        Parses a string to find an action within it
+
+        Parameters:
+        - response (str): The string to be parsed
+
+        Returns:
+        - Action: The action that was found in the response string
+        """
+        # attempt to load the JSON dict from the response
+        action_dict = json.loads(action_str)
+
+        if 'content' in action_dict:
+            # The LLM gets confused here. Might as well be robust
+            action_dict['contents'] = action_dict.pop('content')
+
+        return action_from_dict(action_dict)

+ 5 - 5
agenthub/monologue_agent/utils/prompts.py

@@ -1,13 +1,13 @@
 from opendevin.core.config import config
 from opendevin.core.utils import json
-from opendevin.events.action import (
-    Action,
-)
 from opendevin.events.observation import (
     CmdOutputObservation,
 )
-from opendevin.events.serialization.action import action_from_dict
+from opendevin.events.action import (
+    Action,
+)
 
+from opendevin.events.serialization.action import action_from_dict
 ACTION_PROMPT = """
 You're a thoughtful robot. Your main task is this:
 %(task)s
@@ -242,4 +242,4 @@ def parse_summary_response(response: str) -> list[dict]:
     - list[dict]: The list of summaries output by the model
     """
     parsed = json.loads(response)
-    return parsed['new_monologue']
+    return parsed['new_monologue']

+ 7 - 5
agenthub/planner_agent/agent.py

@@ -1,10 +1,11 @@
+from agenthub.monologue_agent.response_parser import MonologueResponseParser
 from opendevin.controller.agent import Agent
 from opendevin.controller.state.state import State
 from opendevin.events.action import Action, AgentFinishAction
 from opendevin.llm.llm import LLM
 from opendevin.runtime.tools import RuntimeTool
 
-from .prompt import get_prompt, parse_response
+from .prompt import get_prompt
 
 
 class PlannerAgent(Agent):
@@ -14,6 +15,7 @@ class PlannerAgent(Agent):
     The agent is given its previous action-observation pairs, current task, and hint based on last action taken at every step.
     """
     runtime_tools: list[RuntimeTool] = [RuntimeTool.BROWSER]
+    response_parser = MonologueResponseParser()
 
     def __init__(self, llm: LLM):
         """
@@ -46,10 +48,10 @@ class PlannerAgent(Agent):
         prompt = get_prompt(state)
         messages = [{'content': prompt, 'role': 'user'}]
         resp = self.llm.do_completion(messages=messages)
-        action_resp = resp['choices'][0]['message']['content']
-        state.num_of_chars += len(prompt) + len(action_resp)
-        action = parse_response(action_resp)
-        return action
+        state.num_of_chars += len(prompt) + len(
+            resp['choices'][0]['message']['content']
+        )
+        return self.response_parser.parse(resp)
 
     def search_memory(self, query: str) -> list[str]:
         return []

+ 0 - 2
agenthub/planner_agent/prompt.py

@@ -169,10 +169,8 @@ def get_prompt(state: State) -> str:
 def parse_response(response: str) -> Action:
     """
     Parses the model output to find a valid action to take
-
     Parameters:
     - response (str): A response from the model that potentially contains an Action.
-
     Returns:
     - Action: A valid next action to perform from model output
     """