utils.py 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. import os
  2. from openhands.core.exceptions import BrowserUnavailableException
  3. from openhands.core.schema import ActionType
  4. from openhands.events.action import BrowseInteractiveAction, BrowseURLAction
  5. from openhands.events.observation import BrowserOutputObservation
  6. from openhands.runtime.browser.browser_env import BrowserEnv
  7. async def browse(
  8. action: BrowseURLAction | BrowseInteractiveAction, browser: BrowserEnv | None
  9. ) -> BrowserOutputObservation:
  10. if browser is None:
  11. raise BrowserUnavailableException()
  12. if isinstance(action, BrowseURLAction):
  13. # legacy BrowseURLAction
  14. asked_url = action.url
  15. if not asked_url.startswith('http'):
  16. asked_url = os.path.abspath(os.curdir) + action.url
  17. action_str = f'goto("{asked_url}")'
  18. elif isinstance(action, BrowseInteractiveAction):
  19. # new BrowseInteractiveAction, supports full featured BrowserGym actions
  20. # action in BrowserGym: see https://github.com/ServiceNow/BrowserGym/blob/main/core/src/browsergym/core/action/functions.py
  21. action_str = action.browser_actions
  22. else:
  23. raise ValueError(f'Invalid action type: {action.action}')
  24. try:
  25. # obs provided by BrowserGym: see https://github.com/ServiceNow/BrowserGym/blob/main/core/src/browsergym/core/env.py#L396
  26. obs = browser.step(action_str)
  27. return BrowserOutputObservation(
  28. content=obs['text_content'], # text content of the page
  29. url=obs.get('url', ''), # URL of the page
  30. screenshot=obs.get('screenshot', None), # base64-encoded screenshot, png
  31. open_pages_urls=obs.get('open_pages_urls', []), # list of open pages
  32. active_page_index=obs.get(
  33. 'active_page_index', -1
  34. ), # index of the active page
  35. dom_object=obs.get('dom_object', {}), # DOM object
  36. axtree_object=obs.get('axtree_object', {}), # accessibility tree object
  37. extra_element_properties=obs.get('extra_element_properties', {}),
  38. focused_element_bid=obs.get(
  39. 'focused_element_bid', None
  40. ), # focused element bid
  41. last_browser_action=obs.get(
  42. 'last_action', ''
  43. ), # last browser env action performed
  44. last_browser_action_error=obs.get('last_action_error', ''),
  45. error=True if obs.get('last_action_error', '') else False, # error flag
  46. )
  47. except Exception as e:
  48. return BrowserOutputObservation(
  49. content=str(e),
  50. screenshot='',
  51. error=True,
  52. last_browser_action_error=str(e),
  53. url=asked_url if action.action == ActionType.BROWSE else '',
  54. )