| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158 |
- name: Run Integration Tests
- on:
- pull_request:
- types: [labeled]
- workflow_dispatch:
- inputs:
- reason:
- description: 'Reason for manual trigger'
- required: true
- default: ''
- schedule:
- - cron: '30 22 * * *' # Runs at 10:30pm UTC every day
- env:
- N_PROCESSES: 10 # Global configuration for number of parallel processes for evaluation
- jobs:
- run-integration-tests:
- if: github.event.label.name == 'integration-test' || github.event_name == 'workflow_dispatch' || github.event_name == 'schedule'
- runs-on: ubuntu-latest
- permissions:
- contents: "read"
- id-token: "write"
- pull-requests: "write"
- issues: "write"
- strategy:
- matrix:
- python-version: ["3.12"]
- steps:
- - name: Checkout repository
- uses: actions/checkout@v4
- - name: Install poetry via pipx
- run: pipx install poetry
- - name: Set up Python
- uses: actions/setup-python@v5
- with:
- python-version: ${{ matrix.python-version }}
- cache: "poetry"
- - name: Comment on PR if 'integration-test' label is present
- if: github.event_name == 'pull_request' && github.event.label.name == 'integration-test'
- uses: KeisukeYamashita/create-comment@v1
- with:
- unique: false
- comment: |
- Hi! I started running the integration tests on your PR. You will receive a comment with the results shortly.
- - name: Install Python dependencies using Poetry
- run: poetry install --without evaluation,llama-index
- - name: Configure config.toml for testing with Haiku
- env:
- LLM_MODEL: "litellm_proxy/claude-3-5-haiku-20241022"
- LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
- LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
- run: |
- echo "[llm.eval]" > config.toml
- echo "model = \"$LLM_MODEL\"" >> config.toml
- echo "api_key = \"$LLM_API_KEY\"" >> config.toml
- echo "base_url = \"$LLM_BASE_URL\"" >> config.toml
- echo "temperature = 0.0" >> config.toml
- - name: Build environment
- run: make build
- - name: Run integration test evaluation for Haiku
- env:
- SANDBOX_FORCE_REBUILD_RUNTIME: True
- run: |
- poetry run ./evaluation/integration_tests/scripts/run_infer.sh llm.eval HEAD CodeActAgent '' $N_PROCESSES '' 'haiku_run'
- # get integration tests report
- REPORT_FILE_HAIKU=$(find evaluation/evaluation_outputs/outputs/integration_tests/CodeActAgent/*haiku*_maxiter_10_N* -name "report.md" -type f | head -n 1)
- echo "REPORT_FILE: $REPORT_FILE_HAIKU"
- echo "INTEGRATION_TEST_REPORT_HAIKU<<EOF" >> $GITHUB_ENV
- cat $REPORT_FILE_HAIKU >> $GITHUB_ENV
- echo >> $GITHUB_ENV
- echo "EOF" >> $GITHUB_ENV
- - name: Wait a little bit
- run: sleep 10
- - name: Configure config.toml for testing with DeepSeek
- env:
- LLM_MODEL: "litellm_proxy/deepseek-chat"
- LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
- LLM_BASE_URL: ${{ secrets.LLM_BASE_URL }}
- run: |
- echo "[llm.eval]" > config.toml
- echo "model = \"$LLM_MODEL\"" >> config.toml
- echo "api_key = \"$LLM_API_KEY\"" >> config.toml
- echo "base_url = \"$LLM_BASE_URL\"" >> config.toml
- echo "temperature = 0.0" >> config.toml
- - name: Run integration test evaluation for DeepSeek
- env:
- SANDBOX_FORCE_REBUILD_RUNTIME: True
- run: |
- poetry run ./evaluation/integration_tests/scripts/run_infer.sh llm.eval HEAD CodeActAgent '' $N_PROCESSES '' 'deepseek_run'
- # get integration tests report
- REPORT_FILE_DEEPSEEK=$(find evaluation/evaluation_outputs/outputs/integration_tests/CodeActAgent/deepseek*_maxiter_10_N* -name "report.md" -type f | head -n 1)
- echo "REPORT_FILE: $REPORT_FILE_DEEPSEEK"
- echo "INTEGRATION_TEST_REPORT_DEEPSEEK<<EOF" >> $GITHUB_ENV
- cat $REPORT_FILE_DEEPSEEK >> $GITHUB_ENV
- echo >> $GITHUB_ENV
- echo "EOF" >> $GITHUB_ENV
- - name: Create archive of evaluation outputs
- run: |
- TIMESTAMP=$(date +'%y-%m-%d-%H-%M')
- cd evaluation/evaluation_outputs/outputs # Change to the outputs directory
- tar -czvf ../../../integration_tests_${TIMESTAMP}.tar.gz integration_tests/CodeActAgent/* # Only include the actual result directories
- - name: Upload evaluation results as artifact
- uses: actions/upload-artifact@v4
- id: upload_results_artifact
- with:
- name: integration-test-outputs-${{ github.run_id }}-${{ github.run_attempt }}
- path: integration_tests_*.tar.gz
- - name: Get artifact URLs
- run: |
- echo "ARTIFACT_URL=${{ steps.upload_results_artifact.outputs.artifact-url }}" >> $GITHUB_ENV
- - name: Set timestamp and trigger reason
- run: |
- echo "TIMESTAMP=$(date +'%Y-%m-%d-%H-%M')" >> $GITHUB_ENV
- if [[ "${{ github.event_name }}" == "pull_request" ]]; then
- echo "TRIGGER_REASON=pr-${{ github.event.pull_request.number }}" >> $GITHUB_ENV
- elif [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
- echo "TRIGGER_REASON=manual-${{ github.event.inputs.reason }}" >> $GITHUB_ENV
- else
- echo "TRIGGER_REASON=nightly-scheduled" >> $GITHUB_ENV
- fi
- - name: Comment with results and artifact link
- id: create_comment
- uses: KeisukeYamashita/create-comment@v1
- with:
- # if triggered by PR, use PR number, otherwise use 5318 as fallback issue number for manual triggers
- number: ${{ github.event_name == 'pull_request' && github.event.pull_request.number || 5318 }}
- unique: false
- comment: |
- Trigger by: ${{ github.event_name == 'pull_request' && format('Pull Request (integration-test label on PR #{0})', github.event.pull_request.number) || (github.event_name == 'workflow_dispatch' && format('Manual Trigger: {0}', github.event.inputs.reason)) || 'Nightly Scheduled Run' }}
- Commit: ${{ github.sha }}
- **Integration Tests Report (Haiku)**
- Haiku LLM Test Results:
- ${{ env.INTEGRATION_TEST_REPORT_HAIKU }}
- ---
- **Integration Tests Report (DeepSeek)**
- DeepSeek LLM Test Results:
- ${{ env.INTEGRATION_TEST_REPORT_DEEPSEEK }}
- ---
- Download testing outputs (includes both Haiku and DeepSeek results): [Download](${{ steps.upload_results_artifact.outputs.artifact-url }})
|