| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960 |
- import os
- from openhands.core.exceptions import BrowserUnavailableException
- from openhands.core.schema import ActionType
- from openhands.events.action import BrowseInteractiveAction, BrowseURLAction
- from openhands.events.observation import BrowserOutputObservation
- from openhands.runtime.browser.browser_env import BrowserEnv
- async def browse(
- action: BrowseURLAction | BrowseInteractiveAction, browser: BrowserEnv | None
- ) -> BrowserOutputObservation:
- if browser is None:
- raise BrowserUnavailableException()
- if isinstance(action, BrowseURLAction):
- # legacy BrowseURLAction
- asked_url = action.url
- if not asked_url.startswith('http'):
- asked_url = os.path.abspath(os.curdir) + action.url
- action_str = f'goto("{asked_url}")'
- elif isinstance(action, BrowseInteractiveAction):
- # new BrowseInteractiveAction, supports full featured BrowserGym actions
- # action in BrowserGym: see https://github.com/ServiceNow/BrowserGym/blob/main/core/src/browsergym/core/action/functions.py
- action_str = action.browser_actions
- else:
- raise ValueError(f'Invalid action type: {action.action}')
- try:
- # obs provided by BrowserGym: see https://github.com/ServiceNow/BrowserGym/blob/main/core/src/browsergym/core/env.py#L396
- obs = browser.step(action_str)
- return BrowserOutputObservation(
- content=obs['text_content'], # text content of the page
- url=obs.get('url', ''), # URL of the page
- screenshot=obs.get('screenshot', None), # base64-encoded screenshot, png
- open_pages_urls=obs.get('open_pages_urls', []), # list of open pages
- active_page_index=obs.get(
- 'active_page_index', -1
- ), # index of the active page
- dom_object=obs.get('dom_object', {}), # DOM object
- axtree_object=obs.get('axtree_object', {}), # accessibility tree object
- extra_element_properties=obs.get('extra_element_properties', {}),
- focused_element_bid=obs.get(
- 'focused_element_bid', None
- ), # focused element bid
- last_browser_action=obs.get(
- 'last_action', ''
- ), # last browser env action performed
- last_browser_action_error=obs.get('last_action_error', ''),
- error=True if obs.get('last_action_error', '') else False, # error flag
- )
- except Exception as e:
- return BrowserOutputObservation(
- content=str(e),
- screenshot='',
- error=True,
- last_browser_action_error=str(e),
- url=asked_url if action.action == ActionType.BROWSE else '',
- )
|