action_parser.py 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. import re
  2. from openhands.controller.action_parser import ActionParser, ResponseParser
  3. from openhands.events.action import (
  4. Action,
  5. AgentDelegateAction,
  6. AgentFinishAction,
  7. CmdRunAction,
  8. IPythonRunCellAction,
  9. MessageAction,
  10. )
  11. class CodeActResponseParser(ResponseParser):
  12. """Parser action:
  13. - CmdRunAction(command) - bash command to run
  14. - IPythonRunCellAction(code) - IPython code to run
  15. - AgentDelegateAction(agent, inputs) - delegate action for (sub)task
  16. - MessageAction(content) - Message action to run (e.g. ask for clarification)
  17. - AgentFinishAction() - end the interaction
  18. """
  19. def __init__(self):
  20. # Need pay attention to the item order in self.action_parsers
  21. super().__init__()
  22. self.action_parsers = [
  23. CodeActActionParserFinish(),
  24. CodeActActionParserCmdRun(),
  25. CodeActActionParserIPythonRunCell(),
  26. CodeActActionParserAgentDelegate(),
  27. ]
  28. self.default_parser = CodeActActionParserMessage()
  29. def parse(self, response) -> Action:
  30. action_str = self.parse_response(response)
  31. return self.parse_action(action_str)
  32. def parse_response(self, response) -> str:
  33. action = response.choices[0].message.content
  34. if action is None:
  35. return ''
  36. for lang in ['bash', 'ipython', 'browse']:
  37. if f'<execute_{lang}>' in action and f'</execute_{lang}>' not in action:
  38. action += f'</execute_{lang}>'
  39. return action
  40. def parse_action(self, action_str: str) -> Action:
  41. for action_parser in self.action_parsers:
  42. if action_parser.check_condition(action_str):
  43. return action_parser.parse(action_str)
  44. return self.default_parser.parse(action_str)
  45. class CodeActActionParserFinish(ActionParser):
  46. """Parser action:
  47. - AgentFinishAction() - end the interaction
  48. """
  49. def __init__(
  50. self,
  51. ):
  52. self.finish_command = None
  53. def check_condition(self, action_str: str) -> bool:
  54. self.finish_command = re.search(r'<finish>.*</finish>', action_str, re.DOTALL)
  55. return self.finish_command is not None
  56. def parse(self, action_str: str) -> Action:
  57. assert (
  58. self.finish_command is not None
  59. ), 'self.finish_command should not be None when parse is called'
  60. thought = action_str.replace(self.finish_command.group(0), '').strip()
  61. return AgentFinishAction(thought=thought)
  62. class CodeActActionParserCmdRun(ActionParser):
  63. """Parser action:
  64. - CmdRunAction(command) - bash command to run
  65. - AgentFinishAction() - end the interaction
  66. """
  67. def __init__(
  68. self,
  69. ):
  70. self.bash_command = None
  71. def check_condition(self, action_str: str) -> bool:
  72. self.bash_command = re.search(
  73. r'<execute_bash>(.*?)</execute_bash>', action_str, re.DOTALL
  74. )
  75. return self.bash_command is not None
  76. def parse(self, action_str: str) -> Action:
  77. assert (
  78. self.bash_command is not None
  79. ), 'self.bash_command should not be None when parse is called'
  80. thought = action_str.replace(self.bash_command.group(0), '').strip()
  81. # a command was found
  82. command_group = self.bash_command.group(1).strip()
  83. if command_group.strip() == 'exit':
  84. return AgentFinishAction(thought=thought)
  85. return CmdRunAction(command=command_group, thought=thought)
  86. class CodeActActionParserIPythonRunCell(ActionParser):
  87. """Parser action:
  88. - IPythonRunCellAction(code) - IPython code to run
  89. """
  90. def __init__(
  91. self,
  92. ):
  93. self.python_code = None
  94. self.jupyter_kernel_init_code: str = 'from agentskills import *'
  95. def check_condition(self, action_str: str) -> bool:
  96. self.python_code = re.search(
  97. r'<execute_ipython>(.*?)</execute_ipython>', action_str, re.DOTALL
  98. )
  99. return self.python_code is not None
  100. def parse(self, action_str: str) -> Action:
  101. assert (
  102. self.python_code is not None
  103. ), 'self.python_code should not be None when parse is called'
  104. code_group = self.python_code.group(1).strip()
  105. thought = action_str.replace(self.python_code.group(0), '').strip()
  106. return IPythonRunCellAction(
  107. code=code_group,
  108. thought=thought,
  109. kernel_init_code=self.jupyter_kernel_init_code,
  110. )
  111. class CodeActActionParserAgentDelegate(ActionParser):
  112. """Parser action:
  113. - AgentDelegateAction(agent, inputs) - delegate action for (sub)task
  114. """
  115. def __init__(
  116. self,
  117. ):
  118. self.agent_delegate = None
  119. def check_condition(self, action_str: str) -> bool:
  120. self.agent_delegate = re.search(
  121. r'<execute_browse>(.*)</execute_browse>', action_str, re.DOTALL
  122. )
  123. return self.agent_delegate is not None
  124. def parse(self, action_str: str) -> Action:
  125. assert (
  126. self.agent_delegate is not None
  127. ), 'self.agent_delegate should not be None when parse is called'
  128. thought = action_str.replace(self.agent_delegate.group(0), '').strip()
  129. browse_actions = self.agent_delegate.group(1).strip()
  130. task = f'{thought}. I should start with: {browse_actions}'
  131. return AgentDelegateAction(agent='BrowsingAgent', inputs={'task': task})
  132. class CodeActActionParserMessage(ActionParser):
  133. """Parser action:
  134. - MessageAction(content) - Message action to run (e.g. ask for clarification)
  135. """
  136. def __init__(
  137. self,
  138. ):
  139. pass
  140. def check_condition(self, action_str: str) -> bool:
  141. # We assume the LLM is GOOD enough that when it returns pure natural language
  142. # it wants to talk to the user
  143. return True
  144. def parse(self, action_str: str) -> Action:
  145. return MessageAction(content=action_str, wait_for_response=True)