action_parser.py 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. import re
  2. from openhands.controller.action_parser import ActionParser, ResponseParser
  3. from openhands.events.action import (
  4. Action,
  5. AgentDelegateAction,
  6. AgentFinishAction,
  7. CmdRunAction,
  8. IPythonRunCellAction,
  9. MessageAction,
  10. )
  11. class CodeActResponseParser(ResponseParser):
  12. """Parser action:
  13. - CmdRunAction(command) - bash command to run
  14. - IPythonRunCellAction(code) - IPython code to run
  15. - AgentDelegateAction(agent, inputs) - delegate action for (sub)task
  16. - MessageAction(content) - Message action to run (e.g. ask for clarification)
  17. - AgentFinishAction() - end the interaction
  18. """
  19. def __init__(self):
  20. # Need pay attention to the item order in self.action_parsers
  21. super().__init__()
  22. self.action_parsers = [
  23. CodeActActionParserFinish(),
  24. CodeActActionParserCmdRun(),
  25. CodeActActionParserIPythonRunCell(),
  26. CodeActActionParserAgentDelegate(),
  27. ]
  28. self.default_parser = CodeActActionParserMessage()
  29. def parse(self, response) -> Action:
  30. action_str = self.parse_response(response)
  31. return self.parse_action(action_str)
  32. def parse_response(self, response) -> str:
  33. action = response.choices[0].message.content
  34. if action is None:
  35. return ''
  36. for lang in ['bash', 'ipython', 'browse']:
  37. # special handling for DeepSeek: it has stop-word bug and returns </execute_ipython instead of </execute_ipython>
  38. if f'</execute_{lang}' in action and f'</execute_{lang}>' not in action:
  39. action = action.replace(f'</execute_{lang}', f'</execute_{lang}>')
  40. if f'<execute_{lang}>' in action and f'</execute_{lang}>' not in action:
  41. action += f'</execute_{lang}>'
  42. return action
  43. def parse_action(self, action_str: str) -> Action:
  44. for action_parser in self.action_parsers:
  45. if action_parser.check_condition(action_str):
  46. return action_parser.parse(action_str)
  47. return self.default_parser.parse(action_str)
  48. class CodeActActionParserFinish(ActionParser):
  49. """Parser action:
  50. - AgentFinishAction() - end the interaction
  51. """
  52. def __init__(
  53. self,
  54. ):
  55. self.finish_command = None
  56. def check_condition(self, action_str: str) -> bool:
  57. self.finish_command = re.search(r'<finish>.*</finish>', action_str, re.DOTALL)
  58. return self.finish_command is not None
  59. def parse(self, action_str: str) -> Action:
  60. assert (
  61. self.finish_command is not None
  62. ), 'self.finish_command should not be None when parse is called'
  63. thought = action_str.replace(self.finish_command.group(0), '').strip()
  64. return AgentFinishAction(thought=thought)
  65. class CodeActActionParserCmdRun(ActionParser):
  66. """Parser action:
  67. - CmdRunAction(command) - bash command to run
  68. - AgentFinishAction() - end the interaction
  69. """
  70. def __init__(
  71. self,
  72. ):
  73. self.bash_command = None
  74. def check_condition(self, action_str: str) -> bool:
  75. self.bash_command = re.search(
  76. r'<execute_bash>(.*?)</execute_bash>', action_str, re.DOTALL
  77. )
  78. return self.bash_command is not None
  79. def parse(self, action_str: str) -> Action:
  80. assert (
  81. self.bash_command is not None
  82. ), 'self.bash_command should not be None when parse is called'
  83. thought = action_str.replace(self.bash_command.group(0), '').strip()
  84. # a command was found
  85. command_group = self.bash_command.group(1).strip()
  86. if command_group.strip() == 'exit':
  87. return AgentFinishAction(thought=thought)
  88. return CmdRunAction(command=command_group, thought=thought)
  89. class CodeActActionParserIPythonRunCell(ActionParser):
  90. """Parser action:
  91. - IPythonRunCellAction(code) - IPython code to run
  92. """
  93. def __init__(
  94. self,
  95. ):
  96. self.python_code = None
  97. self.jupyter_kernel_init_code: str = 'from agentskills import *'
  98. def check_condition(self, action_str: str) -> bool:
  99. self.python_code = re.search(
  100. r'<execute_ipython>(.*?)</execute_ipython>', action_str, re.DOTALL
  101. )
  102. return self.python_code is not None
  103. def parse(self, action_str: str) -> Action:
  104. assert (
  105. self.python_code is not None
  106. ), 'self.python_code should not be None when parse is called'
  107. code_group = self.python_code.group(1).strip()
  108. thought = action_str.replace(self.python_code.group(0), '').strip()
  109. return IPythonRunCellAction(
  110. code=code_group,
  111. thought=thought,
  112. kernel_init_code=self.jupyter_kernel_init_code,
  113. )
  114. class CodeActActionParserAgentDelegate(ActionParser):
  115. """Parser action:
  116. - AgentDelegateAction(agent, inputs) - delegate action for (sub)task
  117. """
  118. def __init__(
  119. self,
  120. ):
  121. self.agent_delegate = None
  122. def check_condition(self, action_str: str) -> bool:
  123. self.agent_delegate = re.search(
  124. r'<execute_browse>(.*)</execute_browse>', action_str, re.DOTALL
  125. )
  126. return self.agent_delegate is not None
  127. def parse(self, action_str: str) -> Action:
  128. assert (
  129. self.agent_delegate is not None
  130. ), 'self.agent_delegate should not be None when parse is called'
  131. thought = action_str.replace(self.agent_delegate.group(0), '').strip()
  132. browse_actions = self.agent_delegate.group(1).strip()
  133. task = f'{thought}. I should start with: {browse_actions}'
  134. return AgentDelegateAction(agent='BrowsingAgent', inputs={'task': task})
  135. class CodeActActionParserMessage(ActionParser):
  136. """Parser action:
  137. - MessageAction(content) - Message action to run (e.g. ask for clarification)
  138. """
  139. def __init__(
  140. self,
  141. ):
  142. pass
  143. def check_condition(self, action_str: str) -> bool:
  144. # We assume the LLM is GOOD enough that when it returns pure natural language
  145. # it wants to talk to the user
  146. return True
  147. def parse(self, action_str: str) -> Action:
  148. return MessageAction(content=action_str, wait_for_response=True)