integration-runner.yml 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. name: Run Integration Tests
  2. on:
  3. pull_request:
  4. types: [labeled]
  5. workflow_dispatch:
  6. inputs:
  7. reason:
  8. description: 'Reason for manual trigger'
  9. required: true
  10. default: ''
  11. schedule:
  12. - cron: '30 22 * * *' # Runs at 10:30pm UTC every day
  13. env:
  14. N_PROCESSES: 10 # Global configuration for number of parallel processes for evaluation
  15. jobs:
  16. run-integration-tests:
  17. if: github.event.label.name == 'integration-test' || github.event_name == 'workflow_dispatch' || github.event_name == 'schedule'
  18. runs-on: ubuntu-latest
  19. permissions:
  20. contents: "read"
  21. id-token: "write"
  22. pull-requests: "write"
  23. issues: "write"
  24. strategy:
  25. matrix:
  26. python-version: ["3.12"]
  27. steps:
  28. - name: Checkout repository
  29. uses: actions/checkout@v4
  30. - name: Install poetry via pipx
  31. run: pipx install poetry
  32. - name: Set up Python
  33. uses: actions/setup-python@v5
  34. with:
  35. python-version: ${{ matrix.python-version }}
  36. cache: "poetry"
  37. - name: Comment on PR if 'integration-test' label is present
  38. if: github.event_name == 'pull_request' && github.event.label.name == 'integration-test'
  39. uses: KeisukeYamashita/create-comment@v1
  40. with:
  41. unique: false
  42. comment: |
  43. Hi! I started running the integration tests on your PR. You will receive a comment with the results shortly.
  44. - name: Install Python dependencies using Poetry
  45. run: poetry install --without evaluation,llama-index
  46. - name: Configure config.toml for testing with Haiku
  47. env:
  48. LLM_MODEL: "litellm_proxy/claude-3-5-haiku-20241022"
  49. LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
  50. LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
  51. run: |
  52. echo "[llm.eval]" > config.toml
  53. echo "model = \"$LLM_MODEL\"" >> config.toml
  54. echo "api_key = \"$LLM_API_KEY\"" >> config.toml
  55. echo "base_url = \"$LLM_BASE_URL\"" >> config.toml
  56. echo "temperature = 0.0" >> config.toml
  57. - name: Build environment
  58. run: make build
  59. - name: Run integration test evaluation for Haiku
  60. env:
  61. SANDBOX_FORCE_REBUILD_RUNTIME: True
  62. run: |
  63. poetry run ./evaluation/integration_tests/scripts/run_infer.sh llm.eval HEAD CodeActAgent '' $N_PROCESSES '' 'haiku_run'
  64. # get integration tests report
  65. REPORT_FILE_HAIKU=$(find evaluation/evaluation_outputs/outputs/integration_tests/CodeActAgent/*haiku*_maxiter_10_N* -name "report.md" -type f | head -n 1)
  66. echo "REPORT_FILE: $REPORT_FILE_HAIKU"
  67. echo "INTEGRATION_TEST_REPORT_HAIKU<<EOF" >> $GITHUB_ENV
  68. cat $REPORT_FILE_HAIKU >> $GITHUB_ENV
  69. echo >> $GITHUB_ENV
  70. echo "EOF" >> $GITHUB_ENV
  71. - name: Wait a little bit
  72. run: sleep 10
  73. - name: Configure config.toml for testing with DeepSeek
  74. env:
  75. LLM_MODEL: "litellm_proxy/deepseek-chat"
  76. LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
  77. LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
  78. run: |
  79. echo "[llm.eval]" > config.toml
  80. echo "model = \"$LLM_MODEL\"" >> config.toml
  81. echo "api_key = \"$LLM_API_KEY\"" >> config.toml
  82. echo "base_url = \"$LLM_BASE_URL\"" >> config.toml
  83. echo "temperature = 0.0" >> config.toml
  84. - name: Run integration test evaluation for DeepSeek
  85. env:
  86. SANDBOX_FORCE_REBUILD_RUNTIME: True
  87. run: |
  88. poetry run ./evaluation/integration_tests/scripts/run_infer.sh llm.eval HEAD CodeActAgent '' $N_PROCESSES '' 'deepseek_run'
  89. # get integration tests report
  90. REPORT_FILE_DEEPSEEK=$(find evaluation/evaluation_outputs/outputs/integration_tests/CodeActAgent/deepseek*_maxiter_10_N* -name "report.md" -type f | head -n 1)
  91. echo "REPORT_FILE: $REPORT_FILE_DEEPSEEK"
  92. echo "INTEGRATION_TEST_REPORT_DEEPSEEK<<EOF" >> $GITHUB_ENV
  93. cat $REPORT_FILE_DEEPSEEK >> $GITHUB_ENV
  94. echo >> $GITHUB_ENV
  95. echo "EOF" >> $GITHUB_ENV
  96. - name: Create archive of evaluation outputs
  97. run: |
  98. TIMESTAMP=$(date +'%y-%m-%d-%H-%M')
  99. cd evaluation/evaluation_outputs/outputs # Change to the outputs directory
  100. tar -czvf ../../../integration_tests_${TIMESTAMP}.tar.gz integration_tests/CodeActAgent/* # Only include the actual result directories
  101. - name: Upload evaluation results as artifact
  102. uses: actions/upload-artifact@v4
  103. id: upload_results_artifact
  104. with:
  105. name: integration-test-outputs-${{ github.run_id }}-${{ github.run_attempt }}
  106. path: integration_tests_*.tar.gz
  107. - name: Get artifact URLs
  108. run: |
  109. echo "ARTIFACT_URL=${{ steps.upload_results_artifact.outputs.artifact-url }}" >> $GITHUB_ENV
  110. - name: Set timestamp and trigger reason
  111. run: |
  112. echo "TIMESTAMP=$(date +'%Y-%m-%d-%H-%M')" >> $GITHUB_ENV
  113. if [[ "${{ github.event_name }}" == "pull_request" ]]; then
  114. echo "TRIGGER_REASON=pr-${{ github.event.pull_request.number }}" >> $GITHUB_ENV
  115. elif [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
  116. echo "TRIGGER_REASON=manual-${{ github.event.inputs.reason }}" >> $GITHUB_ENV
  117. else
  118. echo "TRIGGER_REASON=nightly-scheduled" >> $GITHUB_ENV
  119. fi
  120. - name: Comment with results and artifact link
  121. id: create_comment
  122. uses: KeisukeYamashita/create-comment@v1
  123. with:
  124. # if triggered by PR, use PR number, otherwise use 5318 as fallback issue number for manual triggers
  125. number: ${{ github.event_name == 'pull_request' && github.event.pull_request.number || 5318 }}
  126. unique: false
  127. comment: |
  128. Trigger by: ${{ github.event_name == 'pull_request' && format('Pull Request (integration-test label on PR #{0})', github.event.pull_request.number) || (github.event_name == 'workflow_dispatch' && format('Manual Trigger: {0}', github.event.inputs.reason)) || 'Nightly Scheduled Run' }}
  129. Commit: ${{ github.sha }}
  130. **Integration Tests Report (Haiku)**
  131. Haiku LLM Test Results:
  132. ${{ env.INTEGRATION_TEST_REPORT_HAIKU }}
  133. ---
  134. **Integration Tests Report (DeepSeek)**
  135. DeepSeek LLM Test Results:
  136. ${{ env.INTEGRATION_TEST_REPORT_DEEPSEEK }}
  137. ---
  138. Download testing outputs (includes both Haiku and DeepSeek results): [Download](${{ steps.upload_results_artifact.outputs.artifact-url }})