t06_github_pr_browsing.py 2.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
  1. from evaluation.integration_tests.tests.base import BaseIntegrationTest, TestResult
  2. from openhands.events.action import AgentFinishAction, MessageAction
  3. from openhands.events.event import Event
  4. from openhands.events.observation import AgentDelegateObservation
  5. from openhands.runtime.base import Runtime
  6. class Test(BaseIntegrationTest):
  7. INSTRUCTION = 'Look at https://github.com/All-Hands-AI/OpenHands/pull/8, and tell me what is happening there and what did @asadm suggest.'
  8. @classmethod
  9. def initialize_runtime(cls, runtime: Runtime) -> None:
  10. pass
  11. @classmethod
  12. def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
  13. from openhands.core.logger import openhands_logger as logger
  14. # check if the license information is in any message
  15. message_actions = [
  16. event
  17. for event in histories
  18. if isinstance(
  19. event, (MessageAction, AgentFinishAction, AgentDelegateObservation)
  20. )
  21. ]
  22. logger.info(f'Total message-like events: {len(message_actions)}')
  23. for event in message_actions:
  24. try:
  25. if isinstance(event, AgentDelegateObservation):
  26. content = event.content
  27. elif isinstance(event, AgentFinishAction):
  28. content = event.outputs.get('content', '')
  29. elif isinstance(event, MessageAction):
  30. content = event.content
  31. else:
  32. logger.warning(f'Unexpected event type: {type(event)}')
  33. continue
  34. if (
  35. 'non-commercial' in content
  36. or 'MIT' in content
  37. or 'Apache 2.0' in content
  38. ):
  39. return TestResult(success=True)
  40. except Exception as e:
  41. logger.error(f'Error processing event: {e}')
  42. logger.debug(
  43. f'Total messages: {len(message_actions)}. Messages: {message_actions}'
  44. )
  45. return TestResult(
  46. success=False,
  47. reason=f'The answer is not found in any message. Total messages: {len(message_actions)}.',
  48. )