docker.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341
  1. import datetime
  2. import os
  3. import subprocess
  4. import time
  5. import docker
  6. from openhands import __version__ as oh_version
  7. from openhands.core.logger import RollingLogger
  8. from openhands.core.logger import openhands_logger as logger
  9. from openhands.runtime.builder.base import RuntimeBuilder
  10. from openhands.utils.term_color import TermColor, colorize
  11. class DockerRuntimeBuilder(RuntimeBuilder):
  12. def __init__(self, docker_client: docker.DockerClient):
  13. self.docker_client = docker_client
  14. version_info = self.docker_client.version()
  15. server_version = version_info.get('Version', '').replace('-', '.')
  16. if tuple(map(int, server_version.split('.')[:2])) < (18, 9):
  17. raise RuntimeError('Docker server version must be >= 18.09 to use BuildKit')
  18. self.rolling_logger = RollingLogger(max_lines=10)
  19. def build(
  20. self,
  21. path: str,
  22. tags: list[str],
  23. platform: str | None = None,
  24. extra_build_args: list[str] | None = None,
  25. use_local_cache: bool = False,
  26. ) -> str:
  27. """Builds a Docker image using BuildKit and handles the build logs appropriately.
  28. Args:
  29. path (str): The path to the Docker build context.
  30. tags (list[str]): A list of image tags to apply to the built image.
  31. platform (str, optional): The target platform for the build. Defaults to None.
  32. use_local_cache (bool, optional): Whether to use and update the local build cache. Defaults to True.
  33. extra_build_args (list[str], optional): Additional arguments to pass to the Docker build command. Defaults to None.
  34. Returns:
  35. str: The name of the built Docker image.
  36. Raises:
  37. RuntimeError: If the Docker server version is incompatible or if the build process fails.
  38. Note:
  39. This method uses Docker BuildKit for improved build performance and caching capabilities.
  40. If `use_local_cache` is True, it will attempt to use and update the build cache in a local directory.
  41. The `extra_build_args` parameter allows for passing additional Docker build arguments as needed.
  42. """
  43. self.docker_client = docker.from_env()
  44. version_info = self.docker_client.version()
  45. server_version = version_info.get('Version', '').replace('-', '.')
  46. if tuple(map(int, server_version.split('.'))) < (18, 9):
  47. raise RuntimeError('Docker server version must be >= 18.09 to use BuildKit')
  48. target_image_hash_name = tags[0]
  49. target_image_repo, target_image_source_tag = target_image_hash_name.split(':')
  50. target_image_tag = tags[1].split(':')[1] if len(tags) > 1 else None
  51. buildx_cmd = [
  52. 'docker',
  53. 'buildx',
  54. 'build',
  55. '--progress=plain',
  56. f'--build-arg=OPENHANDS_RUNTIME_VERSION={oh_version}',
  57. f'--build-arg=OPENHANDS_RUNTIME_BUILD_TIME={datetime.datetime.now().isoformat()}',
  58. f'--tag={target_image_hash_name}',
  59. '--load',
  60. ]
  61. # Include the platform argument only if platform is specified
  62. if platform:
  63. buildx_cmd.append(f'--platform={platform}')
  64. cache_dir = '/tmp/.buildx-cache'
  65. if use_local_cache and self._is_cache_usable(cache_dir):
  66. buildx_cmd.extend(
  67. [
  68. f'--cache-from=type=local,src={cache_dir}',
  69. f'--cache-to=type=local,dest={cache_dir},mode=max',
  70. ]
  71. )
  72. if extra_build_args:
  73. buildx_cmd.extend(extra_build_args)
  74. buildx_cmd.append(path) # must be last!
  75. self.rolling_logger.start(
  76. '================ DOCKER BUILD STARTED ================'
  77. )
  78. try:
  79. process = subprocess.Popen(
  80. buildx_cmd,
  81. stdout=subprocess.PIPE,
  82. stderr=subprocess.STDOUT,
  83. universal_newlines=True,
  84. bufsize=1,
  85. )
  86. if process.stdout:
  87. for line in iter(process.stdout.readline, ''):
  88. line = line.strip()
  89. if line:
  90. self._output_logs(line)
  91. return_code = process.wait()
  92. if return_code != 0:
  93. raise subprocess.CalledProcessError(
  94. return_code,
  95. process.args,
  96. output=process.stdout.read() if process.stdout else None,
  97. stderr=process.stderr.read() if process.stderr else None,
  98. )
  99. except subprocess.CalledProcessError as e:
  100. logger.error(f'Image build failed:\n{e}')
  101. logger.error(f'Command output:\n{e.output}')
  102. raise
  103. except subprocess.TimeoutExpired:
  104. logger.error('Image build timed out')
  105. raise
  106. except FileNotFoundError as e:
  107. logger.error(f'Python executable not found: {e}')
  108. raise
  109. except PermissionError as e:
  110. logger.error(
  111. f'Permission denied when trying to execute the build command:\n{e}'
  112. )
  113. raise
  114. except Exception as e:
  115. logger.error(f'An unexpected error occurred during the build process: {e}')
  116. raise
  117. logger.info(f'Image [{target_image_hash_name}] build finished.')
  118. if target_image_tag:
  119. image = self.docker_client.images.get(target_image_hash_name)
  120. image.tag(target_image_repo, target_image_tag)
  121. logger.info(
  122. f'Re-tagged image [{target_image_hash_name}] with more generic tag [{target_image_tag}]'
  123. )
  124. # Check if the image is built successfully
  125. image = self.docker_client.images.get(target_image_hash_name)
  126. if image is None:
  127. raise RuntimeError(
  128. f'Build failed: Image {target_image_hash_name} not found'
  129. )
  130. tags_str = (
  131. f'{target_image_source_tag}, {target_image_tag}'
  132. if target_image_tag
  133. else target_image_source_tag
  134. )
  135. logger.info(
  136. f'Image {target_image_repo} with tags [{tags_str}] built successfully'
  137. )
  138. return target_image_hash_name
  139. def image_exists(self, image_name: str, pull_from_repo: bool = True) -> bool:
  140. """Check if the image exists in the registry (try to pull it first) or in the local store.
  141. Args:
  142. image_name (str): The Docker image to check (<image repo>:<image tag>)
  143. pull_from_repo (bool): Whether to pull from the remote repo if the image not present locally
  144. Returns:
  145. bool: Whether the Docker image exists in the registry or in the local store
  146. """
  147. if not image_name:
  148. logger.error(f'Invalid image name: `{image_name}`')
  149. return False
  150. try:
  151. logger.debug(f'Checking, if image exists locally:\n{image_name}')
  152. self.docker_client.images.get(image_name)
  153. logger.debug('Image found locally.')
  154. return True
  155. except docker.errors.ImageNotFound:
  156. if not pull_from_repo:
  157. logger.debug(
  158. f'Image {image_name} {colorize("not found", TermColor.WARNING)} locally'
  159. )
  160. return False
  161. try:
  162. logger.debug(
  163. 'Image not found locally. Trying to pull it, please wait...'
  164. )
  165. layers: dict[str, dict[str, str]] = {}
  166. previous_layer_count = 0
  167. if ':' in image_name:
  168. image_repo, image_tag = image_name.split(':', 1)
  169. else:
  170. image_repo = image_name
  171. image_tag = None
  172. for line in self.docker_client.api.pull(
  173. image_repo, tag=image_tag, stream=True, decode=True
  174. ):
  175. self._output_build_progress(line, layers, previous_layer_count)
  176. previous_layer_count = len(layers)
  177. logger.debug('Image pulled')
  178. return True
  179. except docker.errors.ImageNotFound:
  180. logger.debug('Could not find image locally or in registry.')
  181. return False
  182. except Exception as e:
  183. msg = f'Image {colorize("could not be pulled", TermColor.ERROR)}: '
  184. ex_msg = str(e)
  185. if 'Not Found' in ex_msg:
  186. msg += 'image not found in registry.'
  187. else:
  188. msg += f'{ex_msg}'
  189. logger.debug(msg)
  190. return False
  191. def _output_logs(self, new_line: str) -> None:
  192. if not self.rolling_logger.is_enabled():
  193. logger.debug(new_line)
  194. else:
  195. self.rolling_logger.add_line(new_line)
  196. def _output_build_progress(
  197. self, current_line: dict, layers: dict, previous_layer_count: int
  198. ) -> None:
  199. if 'id' in current_line and 'progressDetail' in current_line:
  200. layer_id = current_line['id']
  201. if layer_id not in layers:
  202. layers[layer_id] = {'status': '', 'progress': '', 'last_logged': 0}
  203. if 'status' in current_line:
  204. layers[layer_id]['status'] = current_line['status']
  205. if 'progress' in current_line:
  206. layers[layer_id]['progress'] = current_line['progress']
  207. if 'progressDetail' in current_line:
  208. progress_detail = current_line['progressDetail']
  209. if 'total' in progress_detail and 'current' in progress_detail:
  210. total = progress_detail['total']
  211. current = progress_detail['current']
  212. percentage = min(
  213. (current / total) * 100, 100
  214. ) # Ensure it doesn't exceed 100%
  215. else:
  216. percentage = (
  217. 100 if layers[layer_id]['status'] == 'Download complete' else 0
  218. )
  219. if self.rolling_logger.is_enabled():
  220. self.rolling_logger.move_back(previous_layer_count)
  221. for lid, layer_data in sorted(layers.items()):
  222. self.rolling_logger.replace_current_line()
  223. status = layer_data['status']
  224. progress = layer_data['progress']
  225. if status == 'Download complete':
  226. self.rolling_logger.write_immediately(
  227. f'Layer {lid}: Download complete'
  228. )
  229. elif status == 'Already exists':
  230. self.rolling_logger.write_immediately(
  231. f'Layer {lid}: Already exists'
  232. )
  233. else:
  234. self.rolling_logger.write_immediately(
  235. f'Layer {lid}: {progress} {status}'
  236. )
  237. elif percentage != 0 and (
  238. percentage - layers[layer_id]['last_logged'] >= 10 or percentage == 100
  239. ):
  240. logger.debug(
  241. f'Layer {layer_id}: {layers[layer_id]["progress"]} {layers[layer_id]["status"]}'
  242. )
  243. layers[layer_id]['last_logged'] = percentage
  244. elif 'status' in current_line:
  245. logger.debug(current_line['status'])
  246. def _prune_old_cache_files(self, cache_dir: str, max_age_days: int = 7) -> None:
  247. """Prune cache files older than the specified number of days.
  248. Args:
  249. cache_dir (str): The path to the cache directory.
  250. max_age_days (int): The maximum age of cache files in days.
  251. """
  252. try:
  253. current_time = time.time()
  254. max_age_seconds = max_age_days * 24 * 60 * 60
  255. for root, _, files in os.walk(cache_dir):
  256. for file in files:
  257. file_path = os.path.join(root, file)
  258. try:
  259. file_age = current_time - os.path.getmtime(file_path)
  260. if file_age > max_age_seconds:
  261. os.remove(file_path)
  262. logger.debug(f'Removed old cache file: {file_path}')
  263. except Exception as e:
  264. logger.warning(f'Error processing cache file {file_path}: {e}')
  265. except Exception as e:
  266. logger.warning(f'Error during build cache pruning: {e}')
  267. def _is_cache_usable(self, cache_dir: str) -> bool:
  268. """Check if the cache directory is usable (exists and is writable).
  269. Args:
  270. cache_dir (str): The path to the cache directory.
  271. Returns:
  272. bool: True if the cache directory is usable, False otherwise.
  273. """
  274. if not os.path.exists(cache_dir):
  275. try:
  276. os.makedirs(cache_dir, exist_ok=True)
  277. logger.debug(f'Created cache directory: {cache_dir}')
  278. except OSError as e:
  279. logger.debug(f'Failed to create cache directory {cache_dir}: {e}')
  280. return False
  281. if not os.access(cache_dir, os.W_OK):
  282. logger.warning(
  283. f'Cache directory {cache_dir} is not writable. Caches will not be used for Docker builds.'
  284. )
  285. return False
  286. self._prune_old_cache_files(cache_dir)
  287. logger.debug(f'Cache directory {cache_dir} is usable')
  288. return True