browse.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. from dataclasses import dataclass, field
  2. from browsergym.utils.obs import flatten_axtree_to_str
  3. from openhands.core.schema import ActionType, ObservationType
  4. from openhands.events.observation.observation import Observation
  5. @dataclass
  6. class BrowserOutputObservation(Observation):
  7. """This data class represents the output of a browser."""
  8. url: str
  9. trigger_by_action: str
  10. screenshot: str = field(repr=False) # don't show in repr
  11. error: bool = False
  12. observation: str = ObservationType.BROWSE
  13. # do not include in the memory
  14. open_pages_urls: list = field(default_factory=list)
  15. active_page_index: int = -1
  16. dom_object: dict = field(default_factory=dict, repr=False) # don't show in repr
  17. axtree_object: dict = field(default_factory=dict, repr=False) # don't show in repr
  18. extra_element_properties: dict = field(
  19. default_factory=dict, repr=False
  20. ) # don't show in repr
  21. last_browser_action: str = ''
  22. last_browser_action_error: str = ''
  23. focused_element_bid: str = ''
  24. @property
  25. def message(self) -> str:
  26. return 'Visited ' + self.url
  27. def __str__(self) -> str:
  28. ret = (
  29. '**BrowserOutputObservation**\n'
  30. f'URL: {self.url}\n'
  31. f'Error: {self.error}\n'
  32. f'Open pages: {self.open_pages_urls}\n'
  33. f'Active page index: {self.active_page_index}\n'
  34. f'Last browser action: {self.last_browser_action}\n'
  35. f'Last browser action error: {self.last_browser_action_error}\n'
  36. f'Focused element bid: {self.focused_element_bid}\n'
  37. )
  38. ret += '--- Agent Observation ---\n'
  39. ret += self.get_agent_obs_text()
  40. return ret
  41. def get_agent_obs_text(self) -> str:
  42. """Get a concise text that will be shown to the agent."""
  43. if self.trigger_by_action == ActionType.BROWSE_INTERACTIVE:
  44. text = f'[Current URL: {self.url}]\n'
  45. text += f'[Focused element bid: {self.focused_element_bid}]\n\n'
  46. if self.error:
  47. text += (
  48. '================ BEGIN error message ===============\n'
  49. 'The following error occurred when executing the last action:\n'
  50. f'{self.last_browser_action_error}\n'
  51. '================ END error message ===============\n'
  52. )
  53. else:
  54. text += '[Action executed successfully.]\n'
  55. try:
  56. # We do not filter visible only here because we want to show the full content
  57. # of the web page to the agent for simplicity.
  58. # FIXME: handle the case when the web page is too large
  59. cur_axtree_txt = self.get_axtree_str(filter_visible_only=False)
  60. text += (
  61. f'============== BEGIN accessibility tree ==============\n'
  62. f'{cur_axtree_txt}\n'
  63. f'============== END accessibility tree ==============\n'
  64. )
  65. except Exception as e:
  66. text += (
  67. f'\n[Error encountered when processing the accessibility tree: {e}]'
  68. )
  69. return text
  70. elif self.trigger_by_action == ActionType.BROWSE:
  71. text = f'[Current URL: {self.url}]\n'
  72. if self.error:
  73. text += (
  74. '================ BEGIN error message ===============\n'
  75. 'The following error occurred when trying to visit the URL:\n'
  76. f'{self.last_browser_action_error}\n'
  77. '================ END error message ===============\n'
  78. )
  79. text += '============== BEGIN webpage content ==============\n'
  80. text += self.content
  81. text += '\n============== END webpage content ==============\n'
  82. return text
  83. else:
  84. raise ValueError(f'Invalid trigger_by_action: {self.trigger_by_action}')
  85. def get_axtree_str(self, filter_visible_only: bool = False) -> str:
  86. cur_axtree_txt = flatten_axtree_to_str(
  87. self.axtree_object,
  88. extra_properties=self.extra_element_properties,
  89. with_clickable=True,
  90. skip_generic=False,
  91. filter_visible_only=filter_visible_only,
  92. )
  93. self._axtree_str = cur_axtree_txt
  94. return cur_axtree_txt