response_parser.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. import ast
  2. import re
  3. from openhands.controller.action_parser import ActionParser, ResponseParser
  4. from openhands.core.logger import openhands_logger as logger
  5. from openhands.events.action import (
  6. Action,
  7. BrowseInteractiveAction,
  8. )
  9. class BrowsingResponseParser(ResponseParser):
  10. def __init__(self):
  11. # Need to pay attention to the item order in self.action_parsers
  12. super().__init__()
  13. self.action_parsers = [BrowsingActionParserMessage()]
  14. self.default_parser = BrowsingActionParserBrowseInteractive()
  15. def parse(self, response: str) -> Action:
  16. action_str = self.parse_response(response)
  17. return self.parse_action(action_str)
  18. def parse_response(self, response) -> str:
  19. action_str = response['choices'][0]['message']['content']
  20. if action_str is None:
  21. return ''
  22. action_str = action_str.strip()
  23. # Ensure action_str ends with ')```'
  24. if action_str:
  25. if not action_str.endswith('```'):
  26. if action_str.endswith(')'):
  27. action_str += '```' # prevent duplicate ending paranthesis, e.g. send_msg_to_user('Done'))
  28. else:
  29. action_str += ')```' # expected format
  30. logger.debug(action_str)
  31. return action_str
  32. def parse_action(self, action_str: str) -> Action:
  33. for action_parser in self.action_parsers:
  34. if action_parser.check_condition(action_str):
  35. return action_parser.parse(action_str)
  36. return self.default_parser.parse(action_str)
  37. class BrowsingActionParserMessage(ActionParser):
  38. """Parser action:
  39. - BrowseInteractiveAction(browser_actions) - unexpected response format, message back to user
  40. """
  41. def __init__(
  42. self,
  43. ):
  44. pass
  45. def check_condition(self, action_str: str) -> bool:
  46. return '```' not in action_str
  47. def parse(self, action_str: str) -> Action:
  48. msg = f'send_msg_to_user("""{action_str}""")'
  49. return BrowseInteractiveAction(
  50. browser_actions=msg,
  51. thought=action_str,
  52. browsergym_send_msg_to_user=action_str,
  53. )
  54. class BrowsingActionParserBrowseInteractive(ActionParser):
  55. """Parser action:
  56. - BrowseInteractiveAction(browser_actions) - handle send message to user function call in BrowserGym
  57. """
  58. def __init__(
  59. self,
  60. ):
  61. pass
  62. def check_condition(self, action_str: str) -> bool:
  63. return True
  64. def parse(self, action_str: str) -> Action:
  65. # parse the action string into browser_actions and thought
  66. # the LLM can return only one string, or both
  67. # when both are returned, it looks like this:
  68. ### Based on the current state of the page and the goal of finding out the president of the USA, the next action should involve searching for information related to the president.
  69. ### To achieve this, we can navigate to a reliable source such as a search engine or a specific website that provides information about the current president of the USA.
  70. ### Here is an example of a valid action to achieve this:
  71. ### ```
  72. ### goto('https://www.whitehouse.gov/about-the-white-house/presidents/'
  73. # in practice, BrowsingResponseParser.parse_response also added )``` to the end of the string
  74. # when the LLM returns only one string, it looks like this:
  75. ### goto('https://www.whitehouse.gov/about-the-white-house/presidents/')
  76. # and parse_response added )``` to the end of the string
  77. parts = action_str.split('```')
  78. browser_actions = (
  79. parts[1].strip() if parts[1].strip() != '' else parts[0].strip()
  80. )
  81. thought = parts[0].strip() if parts[1].strip() != '' else ''
  82. # if the LLM wants to talk to the user, we extract the message
  83. msg_content = ''
  84. for sub_action in browser_actions.split('\n'):
  85. if 'send_msg_to_user(' in sub_action:
  86. try:
  87. tree = ast.parse(sub_action)
  88. args = tree.body[0].value.args # type: ignore
  89. msg_content = args[0].value
  90. except SyntaxError:
  91. logger.error(f'Error parsing action: {sub_action}')
  92. # the syntax was not correct, but we can still try to get the message
  93. # e.g. send_msg_to_user("Hello, world!") or send_msg_to_user('Hello, world!'
  94. match = re.search(r'send_msg_to_user\((["\'])(.*?)\1\)', sub_action)
  95. if match:
  96. msg_content = match.group(2)
  97. else:
  98. msg_content = ''
  99. return BrowseInteractiveAction(
  100. browser_actions=browser_actions,
  101. thought=thought,
  102. browsergym_send_msg_to_user=msg_content,
  103. )