test_browsing.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. """Browsing-related tests for the EventStreamRuntime, which connects to the ActionExecutor running in the sandbox."""
  2. import json
  3. from conftest import _close_test_runtime, _load_runtime
  4. from openhands.core.logger import openhands_logger as logger
  5. from openhands.events.action import (
  6. BrowseInteractiveAction,
  7. BrowseURLAction,
  8. CmdRunAction,
  9. )
  10. from openhands.events.observation import (
  11. BrowserOutputObservation,
  12. CmdOutputObservation,
  13. )
  14. # ============================================================================================================================
  15. # Browsing tests
  16. # ============================================================================================================================
  17. PY3_FOR_TESTING = '/openhands/micromamba/bin/micromamba run -n openhands python3'
  18. def test_simple_browse(temp_dir, runtime_cls, run_as_openhands):
  19. runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
  20. # Test browse
  21. action_cmd = CmdRunAction(
  22. command=f'{PY3_FOR_TESTING} -m http.server 8000 > server.log 2>&1 &'
  23. )
  24. logger.info(action_cmd, extra={'msg_type': 'ACTION'})
  25. obs = runtime.run_action(action_cmd)
  26. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  27. assert isinstance(obs, CmdOutputObservation)
  28. assert obs.exit_code == 0
  29. assert '[1]' in obs.content
  30. action_cmd = CmdRunAction(command='sleep 3 && cat server.log')
  31. logger.info(action_cmd, extra={'msg_type': 'ACTION'})
  32. obs = runtime.run_action(action_cmd)
  33. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  34. assert obs.exit_code == 0
  35. action_browse = BrowseURLAction(url='http://localhost:8000')
  36. logger.info(action_browse, extra={'msg_type': 'ACTION'})
  37. obs = runtime.run_action(action_browse)
  38. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  39. assert isinstance(obs, BrowserOutputObservation)
  40. assert 'http://localhost:8000' in obs.url
  41. assert not obs.error
  42. assert obs.open_pages_urls == ['http://localhost:8000/']
  43. assert obs.active_page_index == 0
  44. assert obs.last_browser_action == 'goto("http://localhost:8000")'
  45. assert obs.last_browser_action_error == ''
  46. assert 'Directory listing for /' in obs.content
  47. assert 'server.log' in obs.content
  48. # clean up
  49. action = CmdRunAction(command='rm -rf server.log')
  50. logger.info(action, extra={'msg_type': 'ACTION'})
  51. obs = runtime.run_action(action)
  52. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  53. assert obs.exit_code == 0
  54. _close_test_runtime(runtime)
  55. def test_browsergym_eval_env(runtime_cls, temp_dir):
  56. runtime = _load_runtime(
  57. temp_dir,
  58. runtime_cls=runtime_cls,
  59. run_as_openhands=False, # need root permission to access file
  60. base_container_image='xingyaoww/od-eval-miniwob:v1.0',
  61. browsergym_eval_env='browsergym/miniwob.choose-list',
  62. force_rebuild_runtime=True,
  63. )
  64. from openhands.runtime.browser.browser_env import (
  65. BROWSER_EVAL_GET_GOAL_ACTION,
  66. BROWSER_EVAL_GET_REWARDS_ACTION,
  67. )
  68. # Test browse
  69. action = BrowseInteractiveAction(browser_actions=BROWSER_EVAL_GET_GOAL_ACTION)
  70. logger.info(action, extra={'msg_type': 'ACTION'})
  71. obs = runtime.run_action(action)
  72. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  73. assert isinstance(obs, BrowserOutputObservation)
  74. assert not obs.error
  75. assert 'Select' in obs.content
  76. assert 'from the list and click Submit' in obs.content
  77. # Make sure the browser can produce observation in eva[l
  78. action = BrowseInteractiveAction(browser_actions='noop()')
  79. logger.info(action, extra={'msg_type': 'ACTION'})
  80. obs = runtime.run_action(action)
  81. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  82. assert (
  83. obs.url.strip()
  84. == 'file:///miniwob-plusplus/miniwob/html/miniwob/choose-list.html'
  85. )
  86. # Make sure the rewards are working
  87. action = BrowseInteractiveAction(browser_actions=BROWSER_EVAL_GET_REWARDS_ACTION)
  88. logger.info(action, extra={'msg_type': 'ACTION'})
  89. obs = runtime.run_action(action)
  90. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  91. assert json.loads(obs.content) == [0.0]
  92. _close_test_runtime(runtime)