Эх сурвалжийг харах

[Arch] Shrink runtime image size (#3051)

* test_runtime_client.py to test _execute_bash()

* runtime_build and runtime tweaks

* fix in docker script

* revert bash changes

* use sandbox_config.update_source_code to control source code update

* add od_version to the sandbox tag

* add doc instruction for update source code

* do not remove whole poetry folder;
add mamba clean

* add missing newlines

---------

Co-authored-by: tobitege <tobitege@gmx.de>
Xingyao Wang 1 жил өмнө
parent
commit
ce8a11a62f

+ 3 - 0
.gitignore

@@ -210,6 +210,7 @@ cache
 
 # configuration
 config.toml
+config.toml_
 config.toml.bak
 
 containers/agnostic_sandbox
@@ -217,3 +218,5 @@ containers/agnostic_sandbox
 # swe-bench-eval
 image_build_logs
 run_instance_logs
+
+od_runtime_*.tar

+ 3 - 0
opendevin/core/config.py

@@ -142,6 +142,8 @@ class SandboxConfig(metaclass=Singleton):
         enable_auto_lint: Whether to enable auto-lint.
         use_host_network: Whether to use the host network.
         initialize_plugins: Whether to initialize plugins.
+        update_source_code: Whether to update the source code in the EventStreamRuntime.
+            Used for development of EventStreamRuntime.
     """
 
     box_type: str = 'ssh'
@@ -157,6 +159,7 @@ class SandboxConfig(metaclass=Singleton):
     )
     use_host_network: bool = False
     initialize_plugins: bool = True
+    update_source_code: bool = False
 
     def defaults_to_dict(self) -> dict:
         """Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional."""

+ 11 - 0
opendevin/runtime/client/client.py

@@ -1,3 +1,14 @@
+"""
+This is the main file for the runtime client.
+It is responsible for executing actions received from OpenDevin backend and producing observations.
+
+NOTE: this will be executed inside the docker sandbox.
+
+If you already have pre-build docker image yet you changed the code in this file OR dependencies, you need to rebuild the docker image to update the source code.
+
+You should add SANDBOX_UPDATE_SOURCE_CODE=True to any `python XXX.py` command you run to update the source code.
+"""
+
 import argparse
 import asyncio
 import os

+ 3 - 2
opendevin/runtime/client/runtime.py

@@ -81,14 +81,15 @@ class EventStreamRuntime(Runtime):
             # NOTE: You can need set DEBUG=true to update the source code
             # inside the container. This is useful when you want to test/debug the
             # latest code in the runtime docker container.
-            update_source_code=config.debug,
+            update_source_code=self.sandbox_config.update_source_code,
         )
         self.container = await self._init_container(
             self.sandbox_workspace_dir,
             mount_dir=config.workspace_mount_path,
             plugins=self.plugins,
         )
-        # Initialize the env vars
+        # MUST call super().ainit() to initialize both default env vars
+        # AND the ones in env vars!
         await super().ainit(env_vars)
 
     @staticmethod

+ 3 - 2
opendevin/runtime/runtime.py

@@ -74,8 +74,9 @@ class Runtime:
 
         This method should be called after the runtime's constructor.
         """
-        logger.debug(f'Adding default env vars: {self.DEFAULT_ENV_VARS}')
-        await self.add_env_vars(self.DEFAULT_ENV_VARS)
+        if self.DEFAULT_ENV_VARS:
+            logger.debug(f'Adding default env vars: {self.DEFAULT_ENV_VARS}')
+            await self.add_env_vars(self.DEFAULT_ENV_VARS)
         if env_vars is not None:
             logger.debug(f'Adding provided env vars: {env_vars}')
             await self.add_env_vars(env_vars)

+ 106 - 34
opendevin/runtime/utils/runtime_build.py

@@ -3,14 +3,23 @@ import os
 import shutil
 import subprocess
 import tempfile
-from importlib.metadata import version
 
 import docker
+import toml
 
 import opendevin
 from opendevin.core.logger import opendevin_logger as logger
 
 
+def _get_package_version():
+    """Read the version from pyproject.toml as the other one may be outdated."""
+    project_root = os.path.dirname(os.path.dirname(os.path.abspath(opendevin.__file__)))
+    pyproject_path = os.path.join(project_root, 'pyproject.toml')
+    with open(pyproject_path, 'r') as f:
+        pyproject_data = toml.load(f)
+    return pyproject_data['tool']['poetry']['version']
+
+
 def _create_project_source_dist():
     """Create a source distribution of the project. Return the path to the tarball."""
     # Copy the project directory to the container
@@ -24,8 +33,10 @@ def _create_project_source_dist():
         logger.error(f'Build failed: {result}')
         raise Exception(f'Build failed: {result}')
 
+    # Fetch the correct version from pyproject.toml
+    package_version = _get_package_version()
     tarball_path = os.path.join(
-        project_root, 'dist', f'opendevin-{version("opendevin")}.tar.gz'
+        project_root, 'dist', f'opendevin-{package_version}.tar.gz'
     )
     if not os.path.exists(tarball_path):
         logger.error(f'Source distribution not found at {tarball_path}')
@@ -60,44 +71,64 @@ def _generate_dockerfile(
     if skip_init:
         dockerfile_content = f'FROM {base_image}\n'
     else:
+        # Ubuntu 22.x has libgl1-mesa-glx, but 24.x and above have libgl1!
+        if 'ubuntu' in base_image and (
+            base_image.endswith(':latest') or base_image.endswith(':24.04')
+        ):
+            LIBGL_MESA = 'libgl1'
+        else:
+            LIBGL_MESA = 'libgl1-mesa-glx'
+
         dockerfile_content = (
             f'FROM {base_image}\n'
-            # FIXME: make this more generic / cross-platform
-            # Install necessary packages
-            # libgl1-mesa-glx is extra dependency for OpenCV
-            'RUN apt-get update && apt-get install -y wget sudo libgl1-mesa-glx\n'
-            'RUN apt-get clean && rm -rf /var/lib/apt/lists/*\n'  # Clean up the apt cache to reduce image size
-            'RUN mkdir -p /opendevin && mkdir -p /opendevin/logs && chmod 777 /opendevin/logs\n'
-            'RUN echo "" > /opendevin/bash.bashrc\n'
-            'RUN if [ ! -d /opendevin/miniforge3 ]; then \\\n'
-            '        wget --progress=bar:force -O Miniforge3.sh "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh" && \\\n'
-            '        bash Miniforge3.sh -b -p /opendevin/miniforge3 && \\\n'
-            '        rm Miniforge3.sh && \\\n'
-            '        chmod -R g+w /opendevin/miniforge3 && \\\n'
-            '        bash -c ". /opendevin/miniforge3/etc/profile.d/conda.sh && conda config --set changeps1 False && conda config --append channels conda-forge"; \\\n'
-            '    fi\n'
+            # Install necessary packages and clean up in one layer
+            f'RUN apt-get update && apt-get install -y wget sudo apt-utils {LIBGL_MESA} libasound2-plugins && \\\n'
+            f'    apt-get clean && rm -rf /var/lib/apt/lists/*\n'
+            # Create necessary directories
+            f'RUN mkdir -p /opendevin && mkdir -p /opendevin/logs && chmod 777 /opendevin/logs && \\\n'
+            f'    echo "" > /opendevin/bash.bashrc\n'
+            # Install Miniforge3
+            f'RUN if [ ! -d /opendevin/miniforge3 ]; then \\\n'
+            f'        wget --progress=bar:force -O Miniforge3.sh "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh" && \\\n'
+            f'        bash Miniforge3.sh -b -p /opendevin/miniforge3 && \\\n'
+            f'        rm Miniforge3.sh && \\\n'
+            f'        chmod -R g+w /opendevin/miniforge3 && \\\n'
+            f'        bash -c ". /opendevin/miniforge3/etc/profile.d/conda.sh && conda config --set changeps1 False && conda config --append channels conda-forge"; \\\n'
+            f'    fi\n'
             'RUN /opendevin/miniforge3/bin/mamba install python=3.11 -y\n'
             'RUN /opendevin/miniforge3/bin/mamba install conda-forge::poetry -y\n'
         )
 
     # Copy the project directory to the container
     dockerfile_content += 'COPY project.tar.gz /opendevin\n'
-    # remove /opendevin/code if it exists
+    # Remove /opendevin/code if it exists
     dockerfile_content += (
         'RUN if [ -d /opendevin/code ]; then rm -rf /opendevin/code; fi\n'
     )
-    # unzip the tarball to /opendevin/code
+    # Unzip the tarball to /opendevin/code
     dockerfile_content += (
         'RUN cd /opendevin && tar -xzvf project.tar.gz && rm project.tar.gz\n'
     )
     dockerfile_content += f'RUN mv /opendevin/{source_code_dirname} /opendevin/code\n'
-    # install (or update) the dependencies
+
+    # ALTERNATIVE, but maybe not complete? (toml error!)
     dockerfile_content += (
         'RUN cd /opendevin/code && '
         '/opendevin/miniforge3/bin/mamba run -n base poetry env use python3.11 && '
-        '/opendevin/miniforge3/bin/mamba run -n base poetry install\n'
-        # for browser (update if needed)
-        'RUN apt-get update && cd /opendevin/code && /opendevin/miniforge3/bin/mamba run -n base poetry run playwright install --with-deps chromium\n'
+        '/opendevin/miniforge3/bin/mamba run -n base poetry install --no-interaction --no-root\n'
+        'RUN /opendevin/miniforge3/bin/mamba run -n base poetry cache clear --all . && \\\n'
+        'apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* &&\\\n'
+        '/opendevin/miniforge3/bin/mamba clean --all\n'
+    )
+
+    # For browser (update if needed)
+    dockerfile_content += (
+        'RUN apt-get update && \\\n'
+        '    cd /opendevin/code && \\\n'
+        '    /opendevin/miniforge3/bin/mamba run -n base poetry run pip install playwright && \\\n'
+        '    /opendevin/miniforge3/bin/mamba run -n base poetry run playwright install --with-deps chromium && \\\n'
+        '    apt-get clean && \\\n'
+        '    rm -rf /var/lib/apt/lists/*\n'
     )
     return dockerfile_content
 
@@ -176,14 +207,17 @@ def _get_new_image_name(base_image: str, dev_mode: bool = False) -> str:
             base_image = base_image + ':latest'
         [repo, tag] = base_image.split(':')
         repo = repo.replace('/', '___')
-        return f'{prefix}:{repo}_tag_{tag}'
+
+        od_version = _get_package_version()
+        return f'{prefix}:od_v{od_version}_image_{repo}_tag_{tag}'
 
 
 def _check_image_exists(image_name: str, docker_client: docker.DockerClient) -> bool:
     images = docker_client.images.list()
-    for image in images:
-        if image_name in image.tags:
-            return True
+    if images:
+        for image in images:
+            if image_name in image.tags:
+                return True
     return False
 
 
@@ -191,31 +225,44 @@ def build_runtime_image(
     base_image: str,
     docker_client: docker.DockerClient,
     update_source_code: bool = False,
+    save_to_local_store: bool = False,  # New parameter to control saving to local store
 ) -> str:
     """Build the runtime image for the OpenDevin runtime.
 
     This is only used for **eventstream runtime**.
     """
     new_image_name = _get_new_image_name(base_image)
+    logger.info(f'New image name: {new_image_name}')
+
+    # Ensure new_image_name contains a colon
+    if ':' not in new_image_name:
+        raise ValueError(
+            f'Invalid image name: {new_image_name}. Expected format "repository:tag".'
+        )
 
     # Try to pull the new image from the registry
     try:
         docker_client.images.pull(new_image_name)
-    except Exception as e:
-        logger.info(f'Error pulling image {new_image_name}, building it from scratch')
-        logger.info(f'Non-fatal error: {e}')
+    except Exception:
+        logger.info(f'Cannot pull image {new_image_name} directly')
 
     # Detect if the sandbox image is built
     image_exists = _check_image_exists(new_image_name, docker_client)
+    if image_exists:
+        logger.info(f'Image {new_image_name} exists')
+    else:
+        logger.info(f'Image {new_image_name} does not exist')
 
     skip_init = False
     if image_exists and not update_source_code:
         # If (1) Image exists & we are not updating the source code, we can reuse the existing production image
+        logger.info('No image build done (not updating source code)')
         return new_image_name
     elif image_exists and update_source_code:
         # If (2) Image exists & we plan to update the source code (in dev mode), we need to rebuild the image
         # and give it a special name
         # e.g., od_runtime:ubuntu_tag_latest -> od_runtime_dev:ubuntu_tag_latest
+        logger.info('Image exists, but updating source code requested')
         base_image = new_image_name
         new_image_name = _get_new_image_name(base_image, dev_mode=True)
 
@@ -223,22 +270,47 @@ def build_runtime_image(
     else:
         # If (3) Image does not exist, we need to build it from scratch
         # e.g., ubuntu:latest -> od_runtime:ubuntu_tag_latest
-        skip_init = False  # since we need to build the image from scratch
-
-    logger.info(f'Building image [{new_image_name}] from scratch')
+        # This snippet would allow to load from archive:
+        # tar_path = f'{new_image_name.replace(":", "_")}.tar'
+        # if os.path.exists(tar_path):
+        #     logger.info(f'Loading image from {tar_path}')
+        #     load_command = ['docker', 'load', '-i', tar_path]
+        #     subprocess.run(load_command, check=True)
+        #     logger.info(f'Image {new_image_name} loaded from {tar_path}')
+        #     return new_image_name
+        skip_init = False
+
+    if not skip_init:
+        logger.info(f'Building image [{new_image_name}] from scratch')
 
     _build_sandbox_image(base_image, new_image_name, docker_client, skip_init=skip_init)
+
+    # Only for development: allow to save image as archive:
+    if not image_exists and save_to_local_store:
+        tar_path = f'{new_image_name.replace(":", "_")}.tar'
+        save_command = ['docker', 'save', '-o', tar_path, new_image_name]
+        subprocess.run(save_command, check=True)
+        logger.info(f'Image saved to {tar_path}')
+
+        load_command = ['docker', 'load', '-i', tar_path]
+        subprocess.run(load_command, check=True)
+        logger.info(f'Image {new_image_name} loaded back into Docker from {tar_path}')
+
     return new_image_name
 
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
-    parser.add_argument('--base_image', type=str, default='ubuntu:latest')
+    parser.add_argument('--base_image', type=str, default='ubuntu:22.04')
     parser.add_argument('--update_source_code', type=bool, default=False)
+    parser.add_argument('--save_to_local_store', type=bool, default=False)
     args = parser.parse_args()
 
     client = docker.from_env()
     image_name = build_runtime_image(
-        args.base_image, client, update_source_code=args.update_source_code
+        args.base_image,
+        client,
+        update_source_code=args.update_source_code,
+        save_to_local_store=args.save_to_local_store,
     )
     print(f'\nBUILT Image: {image_name}\n')