1 년 전 · 0c92144220
--- a/agenthub/monologue_agent/agent.py
+++ b/agenthub/monologue_agent/agent.py
@@ -1,4 +1,5 @@
 
				 import agenthub.monologue_agent.utils.prompts as prompts
			
 
				+from agenthub.monologue_agent.response_parser import MonologueResponseParser
			
 
				 from agenthub.monologue_agent.utils.prompts import INITIAL_THOUGHTS
			
 
				 from opendevin.controller.agent import Agent
			
 
				 from opendevin.controller.state.state import State
			
@@ -48,6 +49,7 @@ class MonologueAgent(Agent):
 
				     memory: 'LongTermMemory | None'
			
 
				     memory_condenser: MemoryCondenser
			
 
				     runtime_tools: list[RuntimeTool] = [RuntimeTool.BROWSER]
			
 
				+    response_parser = MonologueResponseParser()
			
 
				 
			
 
				     def __init__(self, llm: LLM):
			
 
				         """
			
@@ -181,13 +183,12 @@ class MonologueAgent(Agent):
 
				         # format all as a single message, a monologue
			
 
				         resp = self.llm.do_completion(messages=messages)
			
 
				 
			
 
				-        # get the next action from the response
			
 
				-        action_resp = resp['choices'][0]['message']['content']
			
 
				-
			
 
				         # keep track of max_chars fallback option
			
 
				-        state.num_of_chars += len(prompt) + len(action_resp)
			
 
				+        state.num_of_chars += len(prompt) + len(
			
 
				+            resp['choices'][0]['message']['content']
			
 
				+        )
			
 
				 
			
 
				-        action = prompts.parse_action_response(action_resp)
			
 
				+        action = self.response_parser.parse(resp)
			
 
				         self.latest_action = action
			
 
				         return action
			
 
				 
			
--- a/agenthub/monologue_agent/response_parser.py
+++ b/agenthub/monologue_agent/response_parser.py
@@ -0,0 +1,40 @@
 
				+from opendevin.controller.action_parser import ResponseParser
			
 
				+from opendevin.core.utils import json
			
 
				+from opendevin.events.action import (
			
 
				+    Action,
			
 
				+)
			
 
				+from opendevin.events.serialization.action import action_from_dict
			
 
				+
			
 
				+
			
 
				+class MonologueResponseParser(ResponseParser):
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+    ):
			
 
				+        pass
			
 
				+
			
 
				+    def parse(self, response: str) -> Action:
			
 
				+        action_str = self.parse_response(response)
			
 
				+        return self.parse_action(action_str)
			
 
				+
			
 
				+    def parse_response(self, response) -> str:
			
 
				+        # get the next action from the response
			
 
				+        return response['choices'][0]['message']['content']
			
 
				+
			
 
				+    def parse_action(self, action_str: str) -> Action:
			
 
				+        """
			
 
				+        Parses a string to find an action within it
			
 
				+
			
 
				+        Parameters:
			
 
				+        - response (str): The string to be parsed
			
 
				+
			
 
				+        Returns:
			
 
				+        - Action: The action that was found in the response string
			
 
				+        """
			
 
				+        # attempt to load the JSON dict from the response
			
 
				+        action_dict = json.loads(action_str)
			
 
				+
			
 
				+        if 'content' in action_dict:
			
 
				+            # The LLM gets confused here. Might as well be robust
			
 
				+            action_dict['contents'] = action_dict.pop('content')
			
 
				+
			
 
				+        return action_from_dict(action_dict)
			
--- a/agenthub/monologue_agent/utils/prompts.py
+++ b/agenthub/monologue_agent/utils/prompts.py
@@ -1,13 +1,13 @@
 
				 from opendevin.core.config import config
			
 
				 from opendevin.core.utils import json
			
 
				-from opendevin.events.action import (
			
 
				-    Action,
			
 
				-)
			
 
				 from opendevin.events.observation import (
			
 
				     CmdOutputObservation,
			
 
				 )
			
 
				-from opendevin.events.serialization.action import action_from_dict
			
 
				+from opendevin.events.action import (
			
 
				+    Action,
			
 
				+)
			
 
				 
			
 
				+from opendevin.events.serialization.action import action_from_dict
			
 
				 ACTION_PROMPT = """
			
 
				 You're a thoughtful robot. Your main task is this:
			
 
				 %(task)s
			
@@ -242,4 +242,4 @@ def parse_summary_response(response: str) -> list[dict]:
 
				     - list[dict]: The list of summaries output by the model
			
 
				     """
			
 
				     parsed = json.loads(response)
			
 
				-    return parsed['new_monologue']
			
 
				+    return parsed['new_monologue']
			
--- a/agenthub/planner_agent/agent.py
+++ b/agenthub/planner_agent/agent.py
@@ -1,10 +1,11 @@
 
				+from agenthub.monologue_agent.response_parser import MonologueResponseParser
			
 
				 from opendevin.controller.agent import Agent
			
 
				 from opendevin.controller.state.state import State
			
 
				 from opendevin.events.action import Action, AgentFinishAction
			
 
				 from opendevin.llm.llm import LLM
			
 
				 from opendevin.runtime.tools import RuntimeTool
			
 
				 
			
 
				-from .prompt import get_prompt, parse_response
			
 
				+from .prompt import get_prompt
			
 
				 
			
 
				 
			
 
				 class PlannerAgent(Agent):
			
@@ -14,6 +15,7 @@ class PlannerAgent(Agent):
 
				     The agent is given its previous action-observation pairs, current task, and hint based on last action taken at every step.
			
 
				     """
			
 
				     runtime_tools: list[RuntimeTool] = [RuntimeTool.BROWSER]
			
 
				+    response_parser = MonologueResponseParser()
			
 
				 
			
 
				     def __init__(self, llm: LLM):
			
 
				         """
			
@@ -46,10 +48,10 @@ class PlannerAgent(Agent):
 
				         prompt = get_prompt(state)
			
 
				         messages = [{'content': prompt, 'role': 'user'}]
			
 
				         resp = self.llm.do_completion(messages=messages)
			
 
				-        action_resp = resp['choices'][0]['message']['content']
			
 
				-        state.num_of_chars += len(prompt) + len(action_resp)
			
 
				-        action = parse_response(action_resp)
			
 
				-        return action
			
 
				+        state.num_of_chars += len(prompt) + len(
			
 
				+            resp['choices'][0]['message']['content']
			
 
				+        )
			
 
				+        return self.response_parser.parse(resp)
			
 
				 
			
 
				     def search_memory(self, query: str) -> list[str]:
			
 
				         return []
			
--- a/agenthub/planner_agent/prompt.py
+++ b/agenthub/planner_agent/prompt.py
@@ -169,10 +169,8 @@ def get_prompt(state: State) -> str:
 
				 def parse_response(response: str) -> Action:
			
 
				     """
			
 
				     Parses the model output to find a valid action to take
			
 
				-
			
 
				     Parameters:
			
 
				     - response (str): A response from the model that potentially contains an Action.
			
 
				-
			
 
				     Returns:
			
 
				     - Action: A valid next action to perform from model output
			
 
				     """