t02_add_bash_hello.py 1.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940
  1. from evaluation.integration_tests.tests.base import BaseIntegrationTest, TestResult
  2. from evaluation.utils.shared import assert_and_raise
  3. from openhands.events.action import CmdRunAction
  4. from openhands.events.event import Event
  5. from openhands.runtime.base import Runtime
  6. class Test(BaseIntegrationTest):
  7. INSTRUCTION = "Write a shell script '/workspace/hello.sh' that prints 'hello'."
  8. @classmethod
  9. def initialize_runtime(cls, runtime: Runtime) -> None:
  10. action = CmdRunAction(command='mkdir -p /workspace', keep_prompt=False)
  11. obs = runtime.run_action(action)
  12. assert_and_raise(obs.exit_code == 0, f'Failed to run command: {obs.content}')
  13. @classmethod
  14. def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
  15. # check if the file /workspace/hello.sh exists
  16. action = CmdRunAction(command='cat /workspace/hello.sh', keep_prompt=False)
  17. obs = runtime.run_action(action)
  18. if obs.exit_code != 0:
  19. return TestResult(
  20. success=False,
  21. reason=f'Failed to cat /workspace/hello.sh: {obs.content}.',
  22. )
  23. # execute the script
  24. action = CmdRunAction(command='bash /workspace/hello.sh', keep_prompt=False)
  25. obs = runtime.run_action(action)
  26. if obs.exit_code != 0:
  27. return TestResult(
  28. success=False,
  29. reason=f'Failed to execute /workspace/hello.sh: {obs.content}.',
  30. )
  31. if obs.content.strip() != 'hello':
  32. return TestResult(
  33. success=False, reason=f'Script did not print "hello": {obs.content}.'
  34. )
  35. return TestResult(success=True)