t01_fix_simple_typo.py 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839
  1. import os
  2. import tempfile
  3. from evaluation.integration_tests.tests.base import BaseIntegrationTest, TestResult
  4. from openhands.events.action import CmdRunAction
  5. from openhands.events.event import Event
  6. from openhands.runtime.base import Runtime
  7. class Test(BaseIntegrationTest):
  8. INSTRUCTION = 'Fix typos in bad.txt.'
  9. @classmethod
  10. def initialize_runtime(cls, runtime: Runtime) -> None:
  11. # create a file with a typo in /workspace/bad.txt
  12. with tempfile.TemporaryDirectory() as temp_dir:
  13. temp_file_path = os.path.join(temp_dir, 'bad.txt')
  14. with open(temp_file_path, 'w') as f:
  15. f.write('This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!')
  16. # Copy the file to the desired location
  17. runtime.copy_to(temp_file_path, '/workspace')
  18. @classmethod
  19. def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
  20. # check if the file /workspace/bad.txt has been fixed
  21. action = CmdRunAction(command='cat /workspace/bad.txt', keep_prompt=False)
  22. obs = runtime.run_action(action)
  23. if obs.exit_code != 0:
  24. return TestResult(
  25. success=False, reason=f'Failed to run command: {obs.content}'
  26. )
  27. # check if the file /workspace/bad.txt has been fixed
  28. if (
  29. obs.content.strip().replace('\r\n', '\n')
  30. == 'This is a stupid typo.\nReally?\nNo more typos!\nEnjoy!'
  31. ):
  32. return TestResult(success=True)
  33. return TestResult(success=False, reason=f'File not fixed: {obs.content}')