t06_github_pr_browsing.py 2.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. from evaluation.integration_tests.tests.base import BaseIntegrationTest, TestResult
  2. from openhands.events.action import AgentFinishAction, MessageAction
  3. from openhands.events.event import Event
  4. from openhands.events.observation import AgentDelegateObservation
  5. from openhands.runtime.base import Runtime
  6. class Test(BaseIntegrationTest):
  7. INSTRUCTION = 'Look at https://github.com/All-Hands-AI/OpenHands/pull/8, and tell me what is happening there and what did @asadm suggest.'
  8. @classmethod
  9. def initialize_runtime(cls, runtime: Runtime) -> None:
  10. pass
  11. @classmethod
  12. def verify_result(cls, runtime: Runtime, histories: list[Event]) -> TestResult:
  13. from openhands.core.logger import openhands_logger as logger
  14. # check if the license information is in any message
  15. message_actions = [
  16. event
  17. for event in histories
  18. if isinstance(
  19. event, (MessageAction, AgentFinishAction, AgentDelegateObservation)
  20. )
  21. ]
  22. logger.info(f'Total message-like events: {len(message_actions)}')
  23. for event in message_actions:
  24. try:
  25. if isinstance(event, AgentDelegateObservation):
  26. content = event.content
  27. elif isinstance(event, AgentFinishAction):
  28. content = event.outputs.get('content', '')
  29. if event.thought:
  30. content += f'\n\n{event.thought}'
  31. elif isinstance(event, MessageAction):
  32. content = event.content
  33. else:
  34. logger.warning(f'Unexpected event type: {type(event)}')
  35. continue
  36. if (
  37. 'non-commercial' in content
  38. or 'MIT' in content
  39. or 'Apache 2.0' in content
  40. ):
  41. return TestResult(success=True)
  42. except Exception as e:
  43. logger.error(f'Error processing event: {e}')
  44. logger.debug(
  45. f'Total messages: {len(message_actions)}. Messages: {message_actions}'
  46. )
  47. return TestResult(
  48. success=False,
  49. reason=f'The answer is not found in any message. Total messages: {len(message_actions)}.',
  50. )