t05_simple_browsing.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146
  1. import os
  2. import tempfile
  3. from evaluation.integration_tests.tests.base import BaseIntegrationTest, TestResult
  4. from evaluation.utils.shared import assert_and_raise
  5. from openhands.events.action import AgentFinishAction, CmdRunAction, MessageAction
  6. from openhands.events.event import Event
  7. from openhands.events.observation import AgentDelegateObservation
  8. from openhands.runtime.base import Runtime
  9. HTML_FILE = """
  10. <!DOCTYPE html>
  11. <html lang="en">
  12. <head>
  13. <meta charset="UTF-8">
  14. <meta name="viewport" content="width=device-width, initial-scale=1.0">
  15. <title>The Ultimate Answer</title>
  16. <style>
  17. body {
  18. display: flex;
  19. justify-content: center;
  20. align-items: center;
  21. height: 100vh;
  22. margin: 0;
  23. background: linear-gradient(to right, #1e3c72, #2a5298);
  24. color: #fff;
  25. font-family: 'Arial', sans-serif;
  26. text-align: center;
  27. }
  28. .container {
  29. text-align: center;
  30. padding: 20px;
  31. background: rgba(255, 255, 255, 0.1);
  32. border-radius: 10px;
  33. box-shadow: 0 0 10px rgba(0, 0, 0, 0.2);
  34. }
  35. h1 {
  36. font-size: 36px;
  37. margin-bottom: 20px;
  38. }
  39. p {
  40. font-size: 18px;
  41. margin-bottom: 30px;
  42. }
  43. #showButton {
  44. padding: 10px 20px;
  45. font-size: 16px;
  46. color: #1e3c72;
  47. background: #fff;
  48. border: none;
  49. border-radius: 5px;
  50. cursor: pointer;
  51. transition: background 0.3s ease;
  52. }
  53. #showButton:hover {
  54. background: #f0f0f0;
  55. }
  56. #result {
  57. margin-top: 20px;
  58. font-size: 24px;
  59. }
  60. </style>
  61. </head>
  62. <body>
  63. <div class="container">
  64. <h1>The Ultimate Answer</h1>
  65. <p>Click the button to reveal the answer to life, the universe, and everything.</p>
  66. <button id="showButton">Click me</button>
  67. <div id="result"></div>
  68. </div>
  69. <script>
  70. document.getElementById('showButton').addEventListener('click', function() {
  71. document.getElementById('result').innerText = 'The answer is OpenHands is all you need!';
  72. });
  73. </script>
  74. </body>
  75. </html>
  76. """
  77. class Test(BaseIntegrationTest):
  78. INSTRUCTION = 'Browse localhost:8000, and tell me the ultimate answer to life.'
  79. @classmethod
  80. def initialize_runtime(cls, runtime: Runtime) -> None:
  81. action = CmdRunAction(command='mkdir -p /workspace', keep_prompt=False)
  82. obs = runtime.run_action(action)
  83. assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
  84. action = CmdRunAction(command='mkdir -p /tmp/server', keep_prompt=False)
  85. obs = runtime.run_action(action)
  86. assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
  87. # create a file with a typo in /workspace/bad.txt
  88. with tempfile.TemporaryDirectory() as temp_dir:
  89. temp_file_path = os.path.join(temp_dir, 'index.html')
  90. with open(temp_file_path, 'w') as f:
  91. f.write(HTML_FILE)
  92. # Copy the file to the desired location
  93. runtime.copy_to(temp_file_path, '/tmp/server')
  94. # create README.md
  95. action = CmdRunAction(
  96. command='cd /tmp/server && nohup python3 -m http.server 8000 &',
  97. keep_prompt=False,
  98. )
  99. obs = runtime.run_action(action)
  100. @classmethod
  101. def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
  102. from openhands.core.logger import openhands_logger as logger
  103. # check if the "The answer is OpenHands is all you need!" is in any message
  104. message_actions = [
  105. event
  106. for event in histories
  107. if isinstance(
  108. event, (MessageAction, AgentFinishAction, AgentDelegateObservation)
  109. )
  110. ]
  111. logger.debug(f'Total message-like events: {len(message_actions)}')
  112. for event in message_actions:
  113. try:
  114. if isinstance(event, AgentDelegateObservation):
  115. content = event.content
  116. elif isinstance(event, AgentFinishAction):
  117. content = event.outputs.get('content', '')
  118. elif isinstance(event, MessageAction):
  119. content = event.content
  120. else:
  121. logger.warning(f'Unexpected event type: {type(event)}')
  122. continue
  123. if 'OpenHands is all you need!' in content:
  124. return TestResult(success=True)
  125. except Exception as e:
  126. logger.error(f'Error processing event: {e}')
  127. logger.debug(
  128. f'Total messages: {len(message_actions)}. Messages: {message_actions}'
  129. )
  130. return TestResult(
  131. success=False,
  132. reason=f'The answer is not found in any message. Total messages: {len(message_actions)}.',
  133. )