1 år sedan · 7f8c324d3a
--- a/agenthub/codeact_swe_agent/action_parser.py
+++ b/agenthub/codeact_swe_agent/action_parser.py
@@ -0,0 +1,114 @@
 
				+import re
			
 
				+
			
 
				+from opendevin.controller.action_parser import ActionParser
			
 
				+from opendevin.events.action import (
			
 
				+    Action,
			
 
				+    AgentFinishAction,
			
 
				+    CmdRunAction,
			
 
				+    IPythonRunCellAction,
			
 
				+    MessageAction,
			
 
				+)
			
 
				+
			
 
				+
			
 
				+class CodeActSWEActionParserFinish(ActionParser):
			
 
				+    """
			
 
				+    Parser action:
			
 
				+        - AgentFinishAction() - end the interaction
			
 
				+    """
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+    ):
			
 
				+        self.finish_command = None
			
 
				+
			
 
				+    def check_condition(self, action_str: str) -> bool:
			
 
				+        self.finish_command = re.search(r'<finish>.*</finish>', action_str, re.DOTALL)
			
 
				+        return self.finish_command is not None
			
 
				+
			
 
				+    def parse(self, action_str: str) -> Action:
			
 
				+        assert (
			
 
				+            self.finish_command is not None
			
 
				+        ), 'self.finish_command should not be None when parse is called'
			
 
				+        thought = action_str.replace(self.finish_command.group(0), '').strip()
			
 
				+        return AgentFinishAction(thought=thought)
			
 
				+
			
 
				+
			
 
				+class CodeActSWEActionParserCmdRun(ActionParser):
			
 
				+    """
			
 
				+    Parser action:
			
 
				+        - CmdRunAction(command) - bash command to run
			
 
				+        - AgentFinishAction() - end the interaction
			
 
				+    """
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+    ):
			
 
				+        self.bash_command = None
			
 
				+
			
 
				+    def check_condition(self, action_str: str) -> bool:
			
 
				+        self.bash_command = re.search(
			
 
				+            r'<execute_bash>(.*?)</execute_bash>', action_str, re.DOTALL
			
 
				+        )
			
 
				+        return self.bash_command is not None
			
 
				+
			
 
				+    def parse(self, action_str: str) -> Action:
			
 
				+        assert (
			
 
				+            self.bash_command is not None
			
 
				+        ), 'self.bash_command should not be None when parse is called'
			
 
				+        thought = action_str.replace(self.bash_command.group(0), '').strip()
			
 
				+        # a command was found
			
 
				+        command_group = self.bash_command.group(1).strip()
			
 
				+        if command_group.strip() == 'exit':
			
 
				+            return AgentFinishAction()
			
 
				+        return CmdRunAction(command=command_group, thought=thought)
			
 
				+
			
 
				+
			
 
				+class CodeActSWEActionParserIPythonRunCell(ActionParser):
			
 
				+    """
			
 
				+    Parser action:
			
 
				+        - IPythonRunCellAction(code) - IPython code to run
			
 
				+    """
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+    ):
			
 
				+        self.python_code = None
			
 
				+        self.jupyter_kernel_init_code: str = 'from agentskills import *'
			
 
				+
			
 
				+    def check_condition(self, action_str: str) -> bool:
			
 
				+        self.python_code = re.search(
			
 
				+            r'<execute_ipython>(.*?)</execute_ipython>', action_str, re.DOTALL
			
 
				+        )
			
 
				+        return self.python_code is not None
			
 
				+
			
 
				+    def parse(self, action_str: str) -> Action:
			
 
				+        assert (
			
 
				+            self.python_code is not None
			
 
				+        ), 'self.python_code should not be None when parse is called'
			
 
				+        code_group = self.python_code.group(1).strip()
			
 
				+        thought = action_str.replace(self.python_code.group(0), '').strip()
			
 
				+        return IPythonRunCellAction(
			
 
				+            code=code_group,
			
 
				+            thought=thought,
			
 
				+            kernel_init_code=self.jupyter_kernel_init_code,
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+class CodeActSWEActionParserMessage(ActionParser):
			
 
				+    """
			
 
				+    Parser action:
			
 
				+        - MessageAction(content) - Message action to run (e.g. ask for clarification)
			
 
				+    """
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+    ):
			
 
				+        pass
			
 
				+
			
 
				+    def check_condition(self, action_str: str) -> bool:
			
 
				+        # We assume the LLM is GOOD enough that when it returns pure natural language
			
 
				+        # it wants to talk to the user
			
 
				+        return True
			
 
				+
			
 
				+    def parse(self, action_str: str) -> Action:
			
 
				+        return MessageAction(content=action_str, wait_for_response=True)
			
--- a/agenthub/codeact_swe_agent/codeact_swe_agent.py
+++ b/agenthub/codeact_swe_agent/codeact_swe_agent.py
@@ -1,11 +1,10 @@
 
				-import re
			
 
				-
			
 
				 from agenthub.codeact_swe_agent.prompt import (
			
 
				     COMMAND_DOCS,
			
 
				     MINIMAL_SYSTEM_PREFIX,
			
 
				     SWE_EXAMPLE,
			
 
				     SYSTEM_SUFFIX,
			
 
				 )
			
 
				+from agenthub.codeact_swe_agent.response_parser import CodeActSWEResponseParser
			
 
				 from opendevin.controller.agent import Agent
			
 
				 from opendevin.controller.state.state import State
			
 
				 from opendevin.events.action import (
			
@@ -30,14 +29,6 @@ from opendevin.runtime.plugins import (
 
				 from opendevin.runtime.tools import RuntimeTool
			
 
				 
			
 
				 
			
 
				-def parse_response(response) -> str:
			
 
				-    action = response.choices[0].message.content
			
 
				-    for lang in ['bash', 'ipython', 'browse']:
			
 
				-        if f'<execute_{lang}>' in action and f'</execute_{lang}>' not in action:
			
 
				-            action += f'</execute_{lang}>'
			
 
				-    return action
			
 
				-
			
 
				-
			
 
				 def action_to_str(action: Action) -> str:
			
 
				     if isinstance(action, CmdRunAction):
			
 
				         return f'{action.thought}\n<execute_bash>\n{action.command}\n</execute_bash>'
			
@@ -129,11 +120,12 @@ class CodeActSWEAgent(Agent):
 
				         JupyterRequirement(),
			
 
				     ]
			
 
				     runtime_tools: list[RuntimeTool] = [RuntimeTool.BROWSER]
			
 
				-    jupyter_kernel_init_code: str = 'from agentskills import *'
			
 
				 
			
 
				     system_message: str = get_system_message()
			
 
				     in_context_example: str = f"Here is an example of how you can interact with the environment for task solving:\n{get_in_context_example()}\n\nNOW, LET'S START!"
			
 
				 
			
 
				+    response_parser = CodeActSWEResponseParser()
			
 
				+
			
 
				     def __init__(
			
 
				         self,
			
 
				         llm: LLM,
			
@@ -199,50 +191,11 @@ class CodeActSWEAgent(Agent):
 
				             ],
			
 
				             temperature=0.0,
			
 
				         )
			
 
				-
			
 
				-        action_str: str = parse_response(response)
			
 
				         state.num_of_chars += sum(
			
 
				             len(message['content']) for message in messages
			
 
				-        ) + len(action_str)
			
 
				-
			
 
				-        if finish_command := re.search(r'<finish>.*</finish>', action_str, re.DOTALL):
			
 
				-            thought = action_str.replace(finish_command.group(0), '').strip()
			
 
				-            return AgentFinishAction(thought=thought)
			
 
				-        if bash_command := re.search(
			
 
				-            r'<execute_bash>(.*?)</execute_bash>', action_str, re.DOTALL
			
 
				-        ):
			
 
				-            # remove the command from the action string to get thought
			
 
				-            thought = action_str.replace(bash_command.group(0), '').strip()
			
 
				-            # a command was found
			
 
				-            command_group = bash_command.group(1).strip()
			
 
				-
			
 
				-            if command_group.strip() == 'exit':
			
 
				-                return AgentFinishAction()
			
 
				-            return CmdRunAction(command=command_group, thought=thought)
			
 
				-        elif python_code := re.search(
			
 
				-            r'<execute_ipython>(.*?)</execute_ipython>', action_str, re.DOTALL
			
 
				-        ):
			
 
				-            # a code block was found
			
 
				-            code_group = python_code.group(1).strip()
			
 
				-            thought = action_str.replace(python_code.group(0), '').strip()
			
 
				-            return IPythonRunCellAction(
			
 
				-                code=code_group,
			
 
				-                thought=thought,
			
 
				-                kernel_init_code=self.jupyter_kernel_init_code,
			
 
				-            )
			
 
				-        elif browse_command := re.search(
			
 
				-            r'<execute_browse>(.*)</execute_browse>', action_str, re.DOTALL
			
 
				-        ):
			
 
				-            # BrowserGym actions was found
			
 
				-            browse_actions = browse_command.group(1).strip()
			
 
				-            thought = action_str.replace(browse_command.group(0), '').strip()
			
 
				-            return BrowseInteractiveAction(
			
 
				-                browser_actions=browse_actions, thought=thought
			
 
				-            )
			
 
				-        else:
			
 
				-            # We assume the LLM is GOOD enough that when it returns pure natural language
			
 
				-            # it want to talk to the user
			
 
				-            return MessageAction(content=action_str, wait_for_response=True)
			
 
				+        ) + len(response.choices[0].message.content)
			
 
				+
			
 
				+        return self.response_parser.parse(response)
			
 
				 
			
 
				     def search_memory(self, query: str) -> list[str]:
			
 
				         raise NotImplementedError('Implement this abstract method')
			
--- a/agenthub/codeact_swe_agent/response_parser.py
+++ b/agenthub/codeact_swe_agent/response_parser.py
@@ -0,0 +1,46 @@
 
				+from agenthub.codeact_swe_agent.action_parser import (
			
 
				+    CodeActSWEActionParserCmdRun,
			
 
				+    CodeActSWEActionParserFinish,
			
 
				+    CodeActSWEActionParserIPythonRunCell,
			
 
				+    CodeActSWEActionParserMessage,
			
 
				+)
			
 
				+from opendevin.controller.action_parser import ResponseParser
			
 
				+from opendevin.events.action import Action
			
 
				+
			
 
				+
			
 
				+class CodeActSWEResponseParser(ResponseParser):
			
 
				+    """
			
 
				+    Parser action:
			
 
				+        - CmdRunAction(command) - bash command to run
			
 
				+        - IPythonRunCellAction(code) - IPython code to run
			
 
				+        - MessageAction(content) - Message action to run (e.g. ask for clarification)
			
 
				+        - AgentFinishAction() - end the interaction
			
 
				+    """
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+    ):
			
 
				+        # Need pay attention to the item order in self.action_parsers
			
 
				+        self.action_parsers = [
			
 
				+            CodeActSWEActionParserFinish(),
			
 
				+            CodeActSWEActionParserCmdRun(),
			
 
				+            CodeActSWEActionParserIPythonRunCell(),
			
 
				+        ]
			
 
				+        self.default_parser = CodeActSWEActionParserMessage()
			
 
				+
			
 
				+    def parse(self, response: str) -> Action:
			
 
				+        action_str = self.parse_response(response)
			
 
				+        return self.parse_action(action_str)
			
 
				+
			
 
				+    def parse_response(self, response) -> str:
			
 
				+        action = response.choices[0].message.content
			
 
				+        for lang in ['bash', 'ipython']:
			
 
				+            if f'<execute_{lang}>' in action and f'</execute_{lang}>' not in action:
			
 
				+                action += f'</execute_{lang}>'
			
 
				+        return action
			
 
				+
			
 
				+    def parse_action(self, action_str: str) -> Action:
			
 
				+        for action_parser in self.action_parsers:
			
 
				+            if action_parser.check_condition(action_str):
			
 
				+                return action_parser.parse(action_str)
			
 
				+        return self.default_parser.parse(action_str)