Jelajahi Sumber

[runtime hash] fix runtime hash mismatch between inside `app` image and in "development mode" (#4039)

Xingyao Wang 1 tahun lalu
induk
melakukan
34f3b61536

+ 114 - 2
.github/workflows/ghcr_runtime.yml → .github/workflows/ghcr-build.yml

@@ -25,7 +25,69 @@ on:
         required: true
         default: ''
 
+env:
+  BASE_IMAGE_FOR_HASH_EQUIVALENCE_TEST: nikolaik/python-nodejs:python3.11-nodejs22
+
 jobs:
+  # Builds the OpenHands Docker images
+  ghcr_build_app:
+    name: Build App Image
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+    outputs:
+      hash_from_app_image: ${{ steps.get_hash_in_app_image.outputs.hash_from_app_image }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Free Disk Space (Ubuntu)
+        uses: jlumbroso/free-disk-space@main
+        with:
+          # this might remove tools that are actually needed,
+          # if set to "true" but frees about 6 GB
+          tool-cache: true
+          # all of these default to true, but feel free to set to
+          # "false" if necessary for your workflow
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: true
+          docker-images: false
+          swap-storage: true
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+      - name: Login to GHCR
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.repository_owner }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Set up Docker Buildx
+        id: buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Build and push app image
+        if: "!github.event.pull_request.head.repo.fork"
+        run: |
+          ./containers/build.sh openhands ${{ github.repository_owner }} --push
+      - name: Build app image
+        if: "github.event.pull_request.head.repo.fork"
+        run: |
+          ./containers/build.sh openhands image ${{ github.repository_owner }}
+      - name: Get hash in App Image
+        id: get_hash_in_app_image
+        run: |
+          # Lowercase the repository owner
+          export REPO_OWNER=${{ github.repository_owner }}
+          REPO_OWNER=$(echo $REPO_OWNER | tr '[:upper:]' '[:lower:]')
+          # Run the build script in the app image
+          docker run -e SANDBOX_USER_ID=0 -v /var/run/docker.sock:/var/run/docker.sock ghcr.io/${REPO_OWNER}/openhands:${{ github.sha }} /bin/bash -c "mkdir -p containers/runtime; python3 openhands/runtime/utils/runtime_build.py --base_image ${{ env.BASE_IMAGE_FOR_HASH_EQUIVALENCE_TEST }} --build_folder containers/runtime --force_rebuild" 2>&1 | tee docker-outputs.txt
+          # Get the hash from the build script
+          hash_from_app_image=$(cat docker-outputs.txt | grep "Hash for docker build directory" | awk -F "): " '{print $2}' | uniq | head -n1)
+          echo "hash_from_app_image=$hash_from_app_image" >> $GITHUB_OUTPUT
+          echo "Hash from app image: $hash_from_app_image"
+
+
   # Builds the runtime Docker images
   ghcr_build_runtime:
     name: Build Image
@@ -104,6 +166,56 @@ jobs:
           name: runtime-${{ matrix.base_image.tag }}
           path: /tmp/runtime-${{ matrix.base_image.tag }}.tar
 
+  verify_hash_equivalence_in_runtime_and_app:
+    name: Verify Hash Equivalence in Runtime and Docker images
+    runs-on: ubuntu-latest
+    needs: [ghcr_build_runtime, ghcr_build_app]
+    strategy:
+      fail-fast: false
+      matrix:
+        base_image: ['nikolaik']
+    steps:
+      - uses: actions/checkout@v4
+      - name: Cache Poetry dependencies
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cache/pypoetry
+            ~/.virtualenvs
+          key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-poetry-
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+      - name: Install poetry via pipx
+        run: pipx install poetry
+      - name: Install Python dependencies using Poetry
+        run: make install-python-dependencies
+      - name: Get hash in App Image
+        run: |
+          echo "Hash from app image: ${{ needs.ghcr_build_app.outputs.hash_from_app_image }}"
+          echo "hash_from_app_image=${{ needs.ghcr_build_app.outputs.hash_from_app_image }}" >> $GITHUB_ENV
+
+      - name: Get hash using code (development mode)
+        run: |
+          mkdir -p containers/runtime
+          poetry run python3 openhands/runtime/utils/runtime_build.py --base_image ${{ env.BASE_IMAGE_FOR_HASH_EQUIVALENCE_TEST }} --build_folder containers/runtime --force_rebuild > output.txt 2>&1
+          hash_from_code=$(cat output.txt | grep "Hash for docker build directory" | awk -F "): " '{print $2}' | uniq | head -n1)
+          echo "hash_from_code=$hash_from_code" >> $GITHUB_ENV
+
+      - name: Compare hashes
+        run: |
+          echo "Hash from App Image: ${{ env.hash_from_app_image }}"
+          echo "Hash from Code: ${{ env.hash_from_code }}"
+          if [ "${{ env.hash_from_app_image }}" = "${{ env.hash_from_code }}" ]; then
+            echo "Hashes match!"
+          else
+            echo "Hashes do not match!"
+            exit 1
+          fi
+
   # Run unit tests with the EventStream runtime Docker images as root
   test_runtime_root:
     name: RT Unit Tests (Root)
@@ -341,7 +453,7 @@ jobs:
     name: All Runtime Tests Passed
     if: ${{ !cancelled() && !contains(needs.*.result, 'failure') && !contains(needs.*.result, 'cancelled') }}
     runs-on: ubuntu-latest
-    needs: [test_runtime_root, test_runtime_oh, runtime_integration_tests_on_linux]
+    needs: [test_runtime_root, test_runtime_oh, runtime_integration_tests_on_linux, verify_hash_equivalence_in_runtime_and_app]
     steps:
       - name: All tests passed
         run: echo "All runtime tests have passed successfully!"
@@ -350,7 +462,7 @@ jobs:
     name: All Runtime Tests Passed
     if: ${{ cancelled() || contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') }}
     runs-on: ubuntu-latest
-    needs: [test_runtime_root, test_runtime_oh, runtime_integration_tests_on_linux]
+    needs: [test_runtime_root, test_runtime_oh, runtime_integration_tests_on_linux, verify_hash_equivalence_in_runtime_and_app]
     steps:
       - name: Some tests failed
         run: |

+ 0 - 65
.github/workflows/ghcr_app.yml

@@ -1,65 +0,0 @@
-# Workflow that builds, tests and then pushes the app docker images to the ghcr.io repository
-name: Build and Publish App Image
-
-# Always run on "main"
-# Always run on tags
-# Always run on PRs
-# Can also be triggered manually
-on:
-  push:
-    branches:
-      - main
-    tags:
-      - '*'
-  pull_request:
-  workflow_dispatch:
-    inputs:
-      reason:
-        description: 'Reason for manual trigger'
-        required: true
-        default: ''
-
-jobs:
-  # Builds the OpenHands Docker images
-  ghcr_build:
-    name: Build App Image
-    runs-on: ubuntu-latest
-    permissions:
-      contents: read
-      packages: write
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-      - name: Free Disk Space (Ubuntu)
-        uses: jlumbroso/free-disk-space@main
-        with:
-          # this might remove tools that are actually needed,
-          # if set to "true" but frees about 6 GB
-          tool-cache: true
-          # all of these default to true, but feel free to set to
-          # "false" if necessary for your workflow
-          android: true
-          dotnet: true
-          haskell: true
-          large-packages: true
-          docker-images: false
-          swap-storage: true
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
-      - name: Login to GHCR
-        uses: docker/login-action@v3
-        with:
-          registry: ghcr.io
-          username: ${{ github.repository_owner }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-      - name: Set up Docker Buildx
-        id: buildx
-        uses: docker/setup-buildx-action@v3
-      - name: Build and push app image
-        if: "!github.event.pull_request.head.repo.fork"
-        run: |
-          ./containers/build.sh openhands ${{ github.repository_owner }} --push
-      - name: Build app image
-        if: "github.event.pull_request.head.repo.fork"
-        run: |
-          ./containers/build.sh openhands image ${{ github.repository_owner }}

+ 5 - 4
containers/app/Dockerfile

@@ -70,10 +70,11 @@ RUN playwright install --with-deps chromium
 COPY --chown=openhands:app --chmod=770 ./openhands ./openhands
 COPY --chown=openhands:app --chmod=777 ./openhands/runtime/plugins ./openhands/runtime/plugins
 COPY --chown=openhands:app --chmod=770 ./agenthub ./agenthub
-COPY --chown=openhands:app --chmod=770 ./pyproject.toml ./pyproject.toml
-COPY --chown=openhands:app --chmod=770 ./poetry.lock ./poetry.lock
-COPY --chown=openhands:app --chmod=770 ./README.md ./README.md
-COPY --chown=openhands:app --chmod=770 ./MANIFEST.in ./MANIFEST.in
+COPY --chown=openhands:app ./pyproject.toml ./pyproject.toml
+COPY --chown=openhands:app ./poetry.lock ./poetry.lock
+COPY --chown=openhands:app ./README.md ./README.md
+COPY --chown=openhands:app ./MANIFEST.in ./MANIFEST.in
+COPY --chown=openhands:app ./LICENSE ./LICENSE
 
 # This is run as "openhands" user, and will create __pycache__ with openhands:openhands ownership
 RUN python openhands/core/download.py # No-op to download assets

+ 13 - 0
openhands/__init__.py

@@ -1,3 +1,6 @@
+import os
+
+
 def get_version():
     try:
         from importlib.metadata import PackageNotFoundError, version
@@ -19,6 +22,16 @@ def get_version():
     except ImportError:
         pass
 
+    # Try getting the version from pyproject.toml
+    try:
+        root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+        with open(os.path.join(root_dir, 'pyproject.toml'), 'r') as f:
+            for line in f:
+                if line.startswith('version ='):
+                    return line.split('=')[1].strip().strip('"')
+    except FileNotFoundError:
+        pass
+
     return 'unknown'
 
 

+ 12 - 3
openhands/runtime/utils/runtime_build.py

@@ -141,14 +141,23 @@ def prep_docker_build_folder(
         file.write(dockerfile_content)
 
     # Get the MD5 hash of the dir_path directory
-    dist_hash = dirhash(dir_path, 'md5')
+    dir_hash = dirhash(
+        dir_path,
+        'md5',
+        ignore=[
+            '.*/',  # hidden directories
+            '__pycache__/',
+            '*.pyc',
+        ],
+    )
+    hash = f'v{oh_version}_{dir_hash}'
     logger.info(
         f'Input base image: {base_image}\n'
         f'Skip init: {skip_init}\n'
         f'Extra deps: {extra_deps}\n'
-        f'Hash for docker build directory [{dir_path}] (contents: {os.listdir(dir_path)}): {dist_hash}\n'
+        f'Hash for docker build directory [{dir_path}] (contents: {os.listdir(dir_path)}): {hash}\n'
     )
-    return dist_hash
+    return hash
 
 
 def get_runtime_image_repo_and_tag(base_image: str) -> tuple[str, str]:

+ 1 - 1
poetry.lock

@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
 
 [[package]]
 name = "aenum"