action_parser.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183
  1. import re
  2. from opendevin.controller.action_parser import ActionParser, ResponseParser
  3. from opendevin.events.action import (
  4. Action,
  5. AgentDelegateAction,
  6. AgentFinishAction,
  7. CmdRunAction,
  8. IPythonRunCellAction,
  9. MessageAction,
  10. )
  11. class CodeActResponseParser(ResponseParser):
  12. """
  13. Parser action:
  14. - CmdRunAction(command) - bash command to run
  15. - IPythonRunCellAction(code) - IPython code to run
  16. - AgentDelegateAction(agent, inputs) - delegate action for (sub)task
  17. - MessageAction(content) - Message action to run (e.g. ask for clarification)
  18. - AgentFinishAction() - end the interaction
  19. """
  20. def __init__(self):
  21. # Need pay attention to the item order in self.action_parsers
  22. super().__init__()
  23. self.action_parsers = [
  24. CodeActActionParserFinish(),
  25. CodeActActionParserCmdRun(),
  26. CodeActActionParserIPythonRunCell(),
  27. CodeActActionParserAgentDelegate(),
  28. ]
  29. self.default_parser = CodeActActionParserMessage()
  30. def parse(self, response) -> Action:
  31. action_str = self.parse_response(response)
  32. return self.parse_action(action_str)
  33. def parse_response(self, response) -> str:
  34. action = response.choices[0].message.content
  35. if action is None:
  36. return ''
  37. for lang in ['bash', 'ipython', 'browse']:
  38. if f'<execute_{lang}>' in action and f'</execute_{lang}>' not in action:
  39. action += f'</execute_{lang}>'
  40. return action
  41. def parse_action(self, action_str: str) -> Action:
  42. for action_parser in self.action_parsers:
  43. if action_parser.check_condition(action_str):
  44. return action_parser.parse(action_str)
  45. return self.default_parser.parse(action_str)
  46. class CodeActActionParserFinish(ActionParser):
  47. """
  48. Parser action:
  49. - AgentFinishAction() - end the interaction
  50. """
  51. def __init__(
  52. self,
  53. ):
  54. self.finish_command = None
  55. def check_condition(self, action_str: str) -> bool:
  56. self.finish_command = re.search(r'<finish>.*</finish>', action_str, re.DOTALL)
  57. return self.finish_command is not None
  58. def parse(self, action_str: str) -> Action:
  59. assert (
  60. self.finish_command is not None
  61. ), 'self.finish_command should not be None when parse is called'
  62. thought = action_str.replace(self.finish_command.group(0), '').strip()
  63. return AgentFinishAction(thought=thought)
  64. class CodeActActionParserCmdRun(ActionParser):
  65. """
  66. Parser action:
  67. - CmdRunAction(command) - bash command to run
  68. - AgentFinishAction() - end the interaction
  69. """
  70. def __init__(
  71. self,
  72. ):
  73. self.bash_command = None
  74. def check_condition(self, action_str: str) -> bool:
  75. self.bash_command = re.search(
  76. r'<execute_bash>(.*?)</execute_bash>', action_str, re.DOTALL
  77. )
  78. return self.bash_command is not None
  79. def parse(self, action_str: str) -> Action:
  80. assert (
  81. self.bash_command is not None
  82. ), 'self.bash_command should not be None when parse is called'
  83. thought = action_str.replace(self.bash_command.group(0), '').strip()
  84. # a command was found
  85. command_group = self.bash_command.group(1).strip()
  86. if command_group.strip() == 'exit':
  87. return AgentFinishAction()
  88. return CmdRunAction(command=command_group, thought=thought)
  89. class CodeActActionParserIPythonRunCell(ActionParser):
  90. """
  91. Parser action:
  92. - IPythonRunCellAction(code) - IPython code to run
  93. """
  94. def __init__(
  95. self,
  96. ):
  97. self.python_code = None
  98. self.jupyter_kernel_init_code: str = 'from agentskills import *'
  99. def check_condition(self, action_str: str) -> bool:
  100. self.python_code = re.search(
  101. r'<execute_ipython>(.*?)</execute_ipython>', action_str, re.DOTALL
  102. )
  103. return self.python_code is not None
  104. def parse(self, action_str: str) -> Action:
  105. assert (
  106. self.python_code is not None
  107. ), 'self.python_code should not be None when parse is called'
  108. code_group = self.python_code.group(1).strip()
  109. thought = action_str.replace(self.python_code.group(0), '').strip()
  110. return IPythonRunCellAction(
  111. code=code_group,
  112. thought=thought,
  113. kernel_init_code=self.jupyter_kernel_init_code,
  114. )
  115. class CodeActActionParserAgentDelegate(ActionParser):
  116. """
  117. Parser action:
  118. - AgentDelegateAction(agent, inputs) - delegate action for (sub)task
  119. """
  120. def __init__(
  121. self,
  122. ):
  123. self.agent_delegate = None
  124. def check_condition(self, action_str: str) -> bool:
  125. self.agent_delegate = re.search(
  126. r'<execute_browse>(.*)</execute_browse>', action_str, re.DOTALL
  127. )
  128. return self.agent_delegate is not None
  129. def parse(self, action_str: str) -> Action:
  130. assert (
  131. self.agent_delegate is not None
  132. ), 'self.agent_delegate should not be None when parse is called'
  133. thought = action_str.replace(self.agent_delegate.group(0), '').strip()
  134. browse_actions = self.agent_delegate.group(1).strip()
  135. task = f'{thought}. I should start with: {browse_actions}'
  136. return AgentDelegateAction(agent='BrowsingAgent', inputs={'task': task})
  137. class CodeActActionParserMessage(ActionParser):
  138. """
  139. Parser action:
  140. - MessageAction(content) - Message action to run (e.g. ask for clarification)
  141. """
  142. def __init__(
  143. self,
  144. ):
  145. pass
  146. def check_condition(self, action_str: str) -> bool:
  147. # We assume the LLM is GOOD enough that when it returns pure natural language
  148. # it wants to talk to the user
  149. return True
  150. def parse(self, action_str: str) -> Action:
  151. return MessageAction(content=action_str, wait_for_response=True)