test_browsing.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. """Browsing-related tests for the EventStreamRuntime, which connects to the RuntimeClient running in the sandbox."""
  2. import asyncio
  3. import json
  4. import pytest
  5. from conftest import _load_runtime
  6. from openhands.core.logger import openhands_logger as logger
  7. from openhands.events.action import (
  8. BrowseInteractiveAction,
  9. BrowseURLAction,
  10. CmdRunAction,
  11. )
  12. from openhands.events.observation import (
  13. BrowserOutputObservation,
  14. CmdOutputObservation,
  15. )
  16. from openhands.runtime.client.runtime import EventStreamRuntime
  17. # ============================================================================================================================
  18. # Browsing tests
  19. # ============================================================================================================================
  20. PY3_FOR_TESTING = '/openhands/miniforge3/bin/mamba run -n base python3'
  21. @pytest.mark.asyncio
  22. async def test_simple_browse(temp_dir, box_class, run_as_openhands):
  23. runtime = await _load_runtime(temp_dir, box_class, run_as_openhands)
  24. # Test browse
  25. action_cmd = CmdRunAction(
  26. command=f'{PY3_FOR_TESTING} -m http.server 8000 > server.log 2>&1 &'
  27. )
  28. logger.info(action_cmd, extra={'msg_type': 'ACTION'})
  29. obs = await runtime.run_action(action_cmd)
  30. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  31. assert isinstance(obs, CmdOutputObservation)
  32. assert obs.exit_code == 0
  33. assert '[1]' in obs.content
  34. action_cmd = CmdRunAction(command='sleep 5 && cat server.log')
  35. logger.info(action_cmd, extra={'msg_type': 'ACTION'})
  36. obs = await runtime.run_action(action_cmd)
  37. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  38. assert obs.exit_code == 0
  39. action_browse = BrowseURLAction(url='http://localhost:8000')
  40. logger.info(action_browse, extra={'msg_type': 'ACTION'})
  41. obs = await runtime.run_action(action_browse)
  42. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  43. assert isinstance(obs, BrowserOutputObservation)
  44. assert 'http://localhost:8000' in obs.url
  45. assert not obs.error
  46. assert obs.open_pages_urls == ['http://localhost:8000/']
  47. assert obs.active_page_index == 0
  48. assert obs.last_browser_action == 'goto("http://localhost:8000")'
  49. assert obs.last_browser_action_error == ''
  50. assert 'Directory listing for /' in obs.content
  51. assert 'server.log' in obs.content
  52. # clean up
  53. action = CmdRunAction(command='rm -rf server.log')
  54. logger.info(action, extra={'msg_type': 'ACTION'})
  55. obs = await runtime.run_action(action)
  56. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  57. assert obs.exit_code == 0
  58. await runtime.close()
  59. await asyncio.sleep(1)
  60. @pytest.mark.asyncio
  61. async def test_browsergym_eval_env(temp_dir):
  62. runtime = await _load_runtime(
  63. temp_dir,
  64. # only supported in event stream runtime
  65. box_class=EventStreamRuntime,
  66. run_as_openhands=False, # need root permission to access file
  67. container_image='xingyaoww/od-eval-miniwob:v1.0',
  68. browsergym_eval_env='browsergym/miniwob.choose-list',
  69. )
  70. from openhands.runtime.browser.browser_env import (
  71. BROWSER_EVAL_GET_GOAL_ACTION,
  72. BROWSER_EVAL_GET_REWARDS_ACTION,
  73. )
  74. # Test browse
  75. action = BrowseInteractiveAction(browser_actions=BROWSER_EVAL_GET_GOAL_ACTION)
  76. logger.info(action, extra={'msg_type': 'ACTION'})
  77. obs = await runtime.run_action(action)
  78. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  79. assert isinstance(obs, BrowserOutputObservation)
  80. assert not obs.error
  81. assert 'Select' in obs.content
  82. assert 'from the list and click Submit' in obs.content
  83. # Make sure the browser can produce observation in eva[l
  84. action = BrowseInteractiveAction(browser_actions='noop()')
  85. logger.info(action, extra={'msg_type': 'ACTION'})
  86. obs = await runtime.run_action(action)
  87. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  88. assert (
  89. obs.url.strip()
  90. == 'file:///miniwob-plusplus/miniwob/html/miniwob/choose-list.html'
  91. )
  92. # Make sure the rewards are working
  93. action = BrowseInteractiveAction(browser_actions=BROWSER_EVAL_GET_REWARDS_ACTION)
  94. logger.info(action, extra={'msg_type': 'ACTION'})
  95. obs = await runtime.run_action(action)
  96. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  97. assert json.loads(obs.content) == [0.0]
  98. await runtime.close()
  99. await asyncio.sleep(1)