action_parser.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. import re
  2. from openhands.controller.action_parser import (
  3. ActionParser,
  4. ResponseParser,
  5. )
  6. from openhands.core.exceptions import LLMMalformedActionError
  7. from openhands.core.logger import openhands_logger as logger
  8. from openhands.events.action import (
  9. Action,
  10. AgentDelegateAction,
  11. AgentFinishAction,
  12. CmdRunAction,
  13. FileEditAction,
  14. IPythonRunCellAction,
  15. MessageAction,
  16. )
  17. class CodeActResponseParser(ResponseParser):
  18. """Parser action:
  19. - CmdRunAction(command) - bash command to run
  20. - FileEditAction(path, content) - edit a file
  21. - IPythonRunCellAction(code) - IPython code to run
  22. - AgentDelegateAction(agent, inputs) - delegate action for (sub)task
  23. - MessageAction(content) - Message action to run (e.g. ask for clarification)
  24. - AgentFinishAction() - end the interaction
  25. """
  26. def __init__(self):
  27. # Need pay attention to the item order in self.action_parsers
  28. super().__init__()
  29. self.action_parsers = [
  30. CodeActActionParserFinish(),
  31. CodeActActionParserFileEdit(),
  32. CodeActActionParserCmdRun(),
  33. CodeActActionParserIPythonRunCell(),
  34. CodeActActionParserAgentDelegate(),
  35. ]
  36. self.default_parser = CodeActActionParserMessage()
  37. def parse(self, response) -> Action:
  38. action_str = self.parse_response(response)
  39. return self.parse_action(action_str)
  40. def parse_response(self, response) -> str:
  41. action = response.choices[0].message.content
  42. if action is None:
  43. return ''
  44. for lang in ['bash', 'ipython', 'browse']:
  45. # special handling for DeepSeek: it has stop-word bug and returns </execute_ipython instead of </execute_ipython>
  46. if f'</execute_{lang}' in action and f'</execute_{lang}>' not in action:
  47. action = action.replace(f'</execute_{lang}', f'</execute_{lang}>')
  48. if f'<execute_{lang}>' in action and f'</execute_{lang}>' not in action:
  49. action += f'</execute_{lang}>'
  50. # special handling for DeepSeek: it has stop-word bug and returns </execute_ipython instead of </execute_ipython>
  51. if '</file_edit' in action and '</file_edit>' not in action:
  52. action = action.replace('</file_edit', '</file_edit>')
  53. if '<file_edit' in action and '</file_edit>' not in action:
  54. action += '</file_edit>'
  55. return action
  56. def parse_action(self, action_str: str) -> Action:
  57. for action_parser in self.action_parsers:
  58. if action_parser.check_condition(action_str):
  59. return action_parser.parse(action_str)
  60. return self.default_parser.parse(action_str)
  61. def action_to_str(self, action: Action) -> str:
  62. if isinstance(action, CmdRunAction):
  63. return (
  64. f'{action.thought}\n<execute_bash>\n{action.command}\n</execute_bash>'
  65. )
  66. elif isinstance(action, IPythonRunCellAction):
  67. return f'{action.thought}\n<execute_ipython>\n{action.code}\n</execute_ipython>'
  68. elif isinstance(action, AgentDelegateAction):
  69. return f'{action.thought}\n<execute_browse>\n{action.inputs["task"]}\n</execute_browse>'
  70. elif isinstance(action, FileEditAction):
  71. return f'{action.thought}\n<file_edit path={action.path}>\n{action.content}\n</file_edit>'
  72. elif isinstance(action, MessageAction):
  73. return action.content
  74. elif isinstance(action, AgentFinishAction) and action.source == 'agent':
  75. return action.thought
  76. return ''
  77. class CodeActActionParserFinish(ActionParser):
  78. """Parser action:
  79. - AgentFinishAction() - end the interaction
  80. """
  81. def __init__(
  82. self,
  83. ):
  84. self.finish_command = None
  85. def check_condition(self, action_str: str) -> bool:
  86. self.finish_command = re.search(r'<finish>.*</finish>', action_str, re.DOTALL)
  87. return self.finish_command is not None
  88. def parse(self, action_str: str) -> Action:
  89. assert (
  90. self.finish_command is not None
  91. ), 'self.finish_command should not be None when parse is called'
  92. thought = action_str.replace(self.finish_command.group(0), '').strip()
  93. return AgentFinishAction(thought=thought)
  94. class CodeActActionParserCmdRun(ActionParser):
  95. """Parser action:
  96. - CmdRunAction(command) - bash command to run
  97. - AgentFinishAction() - end the interaction
  98. """
  99. def __init__(
  100. self,
  101. ):
  102. self.bash_command = None
  103. def check_condition(self, action_str: str) -> bool:
  104. self.bash_command = re.search(
  105. r'<execute_bash>(.*?)</execute_bash>', action_str, re.DOTALL
  106. )
  107. return self.bash_command is not None
  108. def parse(self, action_str: str) -> Action:
  109. assert (
  110. self.bash_command is not None
  111. ), 'self.bash_command should not be None when parse is called'
  112. thought = action_str.replace(self.bash_command.group(0), '').strip()
  113. # a command was found
  114. command_group = self.bash_command.group(1).strip()
  115. if command_group.strip() == 'exit':
  116. return AgentFinishAction(thought=thought)
  117. return CmdRunAction(command=command_group, thought=thought)
  118. class CodeActActionParserIPythonRunCell(ActionParser):
  119. """Parser action:
  120. - IPythonRunCellAction(code) - IPython code to run
  121. """
  122. def __init__(
  123. self,
  124. ):
  125. self.python_code = None
  126. self.jupyter_kernel_init_code: str = 'from agentskills import *'
  127. def check_condition(self, action_str: str) -> bool:
  128. self.python_code = re.search(
  129. r'<execute_ipython>(.*?)</execute_ipython>', action_str, re.DOTALL
  130. )
  131. return self.python_code is not None
  132. def parse(self, action_str: str) -> Action:
  133. assert (
  134. self.python_code is not None
  135. ), 'self.python_code should not be None when parse is called'
  136. code_group = self.python_code.group(1).strip()
  137. thought = action_str.replace(self.python_code.group(0), '').strip()
  138. return IPythonRunCellAction(
  139. code=code_group,
  140. thought=thought,
  141. kernel_init_code=self.jupyter_kernel_init_code,
  142. )
  143. class CodeActActionParserAgentDelegate(ActionParser):
  144. """Parser action:
  145. - AgentDelegateAction(agent, inputs) - delegate action for (sub)task
  146. """
  147. def __init__(
  148. self,
  149. ):
  150. self.agent_delegate = None
  151. def check_condition(self, action_str: str) -> bool:
  152. self.agent_delegate = re.search(
  153. r'<execute_browse>(.*)</execute_browse>', action_str, re.DOTALL
  154. )
  155. return self.agent_delegate is not None
  156. def parse(self, action_str: str) -> Action:
  157. assert (
  158. self.agent_delegate is not None
  159. ), 'self.agent_delegate should not be None when parse is called'
  160. thought = action_str.replace(self.agent_delegate.group(0), '').strip()
  161. browse_actions = self.agent_delegate.group(1).strip()
  162. thought = (
  163. f'{thought}\nI should start with: {browse_actions}'
  164. if thought
  165. else f'I should start with: {browse_actions}'
  166. )
  167. return AgentDelegateAction(
  168. agent='BrowsingAgent', thought=thought, inputs={'task': browse_actions}
  169. )
  170. class CodeActActionParserMessage(ActionParser):
  171. """Parser action:
  172. - MessageAction(content) - Message action to run (e.g. ask for clarification)
  173. """
  174. def __init__(
  175. self,
  176. ):
  177. pass
  178. def check_condition(self, action_str: str) -> bool:
  179. # We assume the LLM is GOOD enough that when it returns pure natural language
  180. # it wants to talk to the user
  181. return True
  182. def parse(self, action_str: str) -> Action:
  183. return MessageAction(content=action_str, wait_for_response=True)
  184. class CodeActActionParserFileEdit(ActionParser):
  185. """Parser action:
  186. - FileEditAction(path, content) - edit a file
  187. """
  188. def __init__(self):
  189. self.file_edit_match: re.Match | None = None
  190. def check_condition(self, action_str: str) -> bool:
  191. if '<file_edit' not in action_str:
  192. return False
  193. # Updated regex to make start and end optional
  194. self.file_edit_match = re.search(
  195. r'<file_edit\s+path=(["\']?)(.*?)\1(?:\s+start=(["\']?)(.*?)\3)?(?:\s+end=(["\']?)(.*?)\5)?\s*>(.*?)</file_edit>',
  196. action_str,
  197. re.DOTALL,
  198. )
  199. if self.file_edit_match is None:
  200. logger.error(
  201. f'FileEditAction detected but the format is incorrect. Unable to match for <file_edit> in:\n{"-" * 80}\n{action_str}\n{"-" * 80}'
  202. )
  203. raise LLMMalformedActionError(
  204. 'FileEditAction detected but the format is incorrect. Usage:\n'
  205. '<file_edit path="[path]" start=[start_line] end=[end_line]>\n'
  206. '[content_to_edit]\n'
  207. '</file_edit>\n'
  208. )
  209. path = self.file_edit_match.group(2)
  210. start = self.file_edit_match.group(4)
  211. end = self.file_edit_match.group(6)
  212. if not path:
  213. raise LLMMalformedActionError(
  214. 'FileEditAction detected but no `path` specified. You should specify the path of the file to edit.'
  215. )
  216. if start:
  217. try:
  218. int(start)
  219. except ValueError:
  220. raise LLMMalformedActionError(
  221. f'FileEditAction detected but `start` is not a valid integer: {start}'
  222. )
  223. if end:
  224. try:
  225. int(end)
  226. except ValueError:
  227. raise LLMMalformedActionError(
  228. f'FileEditAction detected but `end` is not a valid integer: {end}'
  229. )
  230. return True
  231. def parse(self, action_str: str) -> Action:
  232. assert (
  233. self.file_edit_match is not None
  234. ), 'self.file_edit_match should not be None when parse is called'
  235. file_path = self.file_edit_match.group(2).strip()
  236. start_line = (
  237. int(self.file_edit_match.group(4))
  238. if self.file_edit_match.group(4)
  239. else None
  240. )
  241. end_line = (
  242. int(self.file_edit_match.group(6))
  243. if self.file_edit_match.group(6)
  244. else None
  245. )
  246. content = self.file_edit_match.group(7)
  247. thought = action_str.replace(self.file_edit_match.group(0), '').strip()
  248. action = FileEditAction(path=file_path, content=content, thought=thought)
  249. if start_line is not None:
  250. action.start = start_line
  251. if end_line is not None:
  252. action.end = end_line
  253. return action