docker.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340
  1. import datetime
  2. import os
  3. import subprocess
  4. import time
  5. import docker
  6. from openhands import __version__ as oh_version
  7. from openhands.core.logger import RollingLogger
  8. from openhands.core.logger import openhands_logger as logger
  9. from openhands.runtime.builder.base import RuntimeBuilder
  10. class DockerRuntimeBuilder(RuntimeBuilder):
  11. def __init__(self, docker_client: docker.DockerClient):
  12. self.docker_client = docker_client
  13. version_info = self.docker_client.version()
  14. server_version = version_info.get('Version', '').replace('-', '.')
  15. if tuple(map(int, server_version.split('.')[:2])) < (18, 9):
  16. raise RuntimeError('Docker server version must be >= 18.09 to use BuildKit')
  17. self.rolling_logger = RollingLogger(max_lines=10)
  18. def build(
  19. self,
  20. path: str,
  21. tags: list[str],
  22. platform: str | None = None,
  23. use_local_cache: bool = False,
  24. extra_build_args: list[str] | None = None,
  25. ) -> str:
  26. """Builds a Docker image using BuildKit and handles the build logs appropriately.
  27. Args:
  28. path (str): The path to the Docker build context.
  29. tags (list[str]): A list of image tags to apply to the built image.
  30. platform (str, optional): The target platform for the build. Defaults to None.
  31. use_local_cache (bool, optional): Whether to use and update the local build cache. Defaults to True.
  32. extra_build_args (list[str], optional): Additional arguments to pass to the Docker build command. Defaults to None.
  33. Returns:
  34. str: The name of the built Docker image.
  35. Raises:
  36. RuntimeError: If the Docker server version is incompatible or if the build process fails.
  37. Note:
  38. This method uses Docker BuildKit for improved build performance and caching capabilities.
  39. If `use_local_cache` is True, it will attempt to use and update the build cache in a local directory.
  40. The `extra_build_args` parameter allows for passing additional Docker build arguments as needed.
  41. """
  42. self.docker_client = docker.from_env()
  43. version_info = self.docker_client.version()
  44. server_version = version_info.get('Version', '').replace('-', '.')
  45. if tuple(map(int, server_version.split('.'))) < (18, 9):
  46. raise RuntimeError('Docker server version must be >= 18.09 to use BuildKit')
  47. target_image_hash_name = tags[0]
  48. target_image_repo, target_image_source_tag = target_image_hash_name.split(':')
  49. target_image_tag = tags[1].split(':')[1] if len(tags) > 1 else None
  50. buildx_cmd = [
  51. 'docker',
  52. 'buildx',
  53. 'build',
  54. '--progress=plain',
  55. f'--build-arg=OPENHANDS_RUNTIME_VERSION={oh_version}',
  56. f'--build-arg=OPENHANDS_RUNTIME_BUILD_TIME={datetime.datetime.now().isoformat()}',
  57. f'--tag={target_image_hash_name}',
  58. '--load',
  59. ]
  60. # Include the platform argument only if platform is specified
  61. if platform:
  62. buildx_cmd.append(f'--platform={platform}')
  63. cache_dir = '/tmp/.buildx-cache'
  64. if use_local_cache and self._is_cache_usable(cache_dir):
  65. buildx_cmd.extend(
  66. [
  67. f'--cache-from=type=local,src={cache_dir}',
  68. f'--cache-to=type=local,dest={cache_dir},mode=max',
  69. ]
  70. )
  71. if extra_build_args:
  72. buildx_cmd.extend(extra_build_args)
  73. buildx_cmd.append(path) # must be last!
  74. self.rolling_logger.start(
  75. '================ DOCKER BUILD STARTED ================'
  76. )
  77. try:
  78. process = subprocess.Popen(
  79. buildx_cmd,
  80. stdout=subprocess.PIPE,
  81. stderr=subprocess.STDOUT,
  82. universal_newlines=True,
  83. bufsize=1,
  84. )
  85. if process.stdout:
  86. for line in iter(process.stdout.readline, ''):
  87. line = line.strip()
  88. if line:
  89. self._output_logs(line)
  90. return_code = process.wait()
  91. if return_code != 0:
  92. raise subprocess.CalledProcessError(
  93. return_code,
  94. process.args,
  95. output=process.stdout.read() if process.stdout else None,
  96. stderr=process.stderr.read() if process.stderr else None,
  97. )
  98. except subprocess.CalledProcessError as e:
  99. logger.error(f'Image build failed:\n{e}')
  100. logger.error(f'Command output:\n{e.output}')
  101. raise
  102. except subprocess.TimeoutExpired:
  103. logger.error('Image build timed out')
  104. raise
  105. except FileNotFoundError as e:
  106. logger.error(f'Python executable not found: {e}')
  107. raise
  108. except PermissionError as e:
  109. logger.error(
  110. f'Permission denied when trying to execute the build command:\n{e}'
  111. )
  112. raise
  113. except Exception as e:
  114. logger.error(f'An unexpected error occurred during the build process: {e}')
  115. raise
  116. logger.info(f'Image [{target_image_hash_name}] build finished.')
  117. if target_image_tag:
  118. image = self.docker_client.images.get(target_image_hash_name)
  119. image.tag(target_image_repo, target_image_tag)
  120. logger.info(
  121. f'Re-tagged image [{target_image_hash_name}] with more generic tag [{target_image_tag}]'
  122. )
  123. # Check if the image is built successfully
  124. image = self.docker_client.images.get(target_image_hash_name)
  125. if image is None:
  126. raise RuntimeError(
  127. f'Build failed: Image {target_image_hash_name} not found'
  128. )
  129. tags_str = (
  130. f'{target_image_source_tag}, {target_image_tag}'
  131. if target_image_tag
  132. else target_image_source_tag
  133. )
  134. logger.info(
  135. f'Image {target_image_repo} with tags [{tags_str}] built successfully'
  136. )
  137. return target_image_hash_name
  138. def image_exists(self, image_name: str, pull_from_repo: bool = True) -> bool:
  139. """Check if the image exists in the registry (try to pull it first) or in the local store.
  140. Args:
  141. image_name (str): The Docker image to check (<image repo>:<image tag>)
  142. pull_from_repo (bool): Whether to pull from the remote repo if the image not present locally
  143. Returns:
  144. bool: Whether the Docker image exists in the registry or in the local store
  145. """
  146. if not image_name:
  147. logger.error(f'Invalid image name: `{image_name}`')
  148. return False
  149. try:
  150. logger.debug(f'Checking, if image exists locally:\n{image_name}')
  151. self.docker_client.images.get(image_name)
  152. logger.debug('Image found locally.')
  153. return True
  154. except docker.errors.ImageNotFound:
  155. if not pull_from_repo:
  156. logger.debug(f'Image {image_name} not found locally')
  157. return False
  158. try:
  159. logger.debug(
  160. 'Image not found locally. Trying to pull it, please wait...'
  161. )
  162. layers: dict[str, dict[str, str]] = {}
  163. previous_layer_count = 0
  164. if ':' in image_name:
  165. image_repo, image_tag = image_name.split(':', 1)
  166. else:
  167. image_repo = image_name
  168. image_tag = None
  169. for line in self.docker_client.api.pull(
  170. image_repo, tag=image_tag, stream=True, decode=True
  171. ):
  172. self._output_build_progress(line, layers, previous_layer_count)
  173. previous_layer_count = len(layers)
  174. logger.debug('Image pulled')
  175. return True
  176. except docker.errors.ImageNotFound:
  177. logger.debug('Could not find image locally or in registry.')
  178. return False
  179. except Exception as e:
  180. msg = 'Image could not be pulled: '
  181. ex_msg = str(e)
  182. if 'Not Found' in ex_msg:
  183. msg += 'image not found in registry.'
  184. else:
  185. msg += f'{ex_msg}'
  186. logger.debug(msg)
  187. return False
  188. def _output_logs(self, new_line: str) -> None:
  189. if not self.rolling_logger.is_enabled():
  190. logger.debug(new_line)
  191. else:
  192. self.rolling_logger.add_line(new_line)
  193. def _output_build_progress(
  194. self, current_line: dict, layers: dict, previous_layer_count: int
  195. ) -> None:
  196. if 'id' in current_line and 'progressDetail' in current_line:
  197. layer_id = current_line['id']
  198. if layer_id not in layers:
  199. layers[layer_id] = {'status': '', 'progress': '', 'last_logged': 0}
  200. if 'status' in current_line:
  201. layers[layer_id]['status'] = current_line['status']
  202. if 'progress' in current_line:
  203. layers[layer_id]['progress'] = current_line['progress']
  204. if 'progressDetail' in current_line:
  205. progress_detail = current_line['progressDetail']
  206. if 'total' in progress_detail and 'current' in progress_detail:
  207. total = progress_detail['total']
  208. current = progress_detail['current']
  209. percentage = min(
  210. (current / total) * 100, 100
  211. ) # Ensure it doesn't exceed 100%
  212. else:
  213. percentage = (
  214. 100 if layers[layer_id]['status'] == 'Download complete' else 0
  215. )
  216. if self.rolling_logger.is_enabled():
  217. self.rolling_logger.move_back(previous_layer_count)
  218. for lid, layer_data in sorted(layers.items()):
  219. self.rolling_logger.replace_current_line()
  220. status = layer_data['status']
  221. progress = layer_data['progress']
  222. if status == 'Download complete':
  223. self.rolling_logger.write_immediately(
  224. f'Layer {lid}: Download complete'
  225. )
  226. elif status == 'Already exists':
  227. self.rolling_logger.write_immediately(
  228. f'Layer {lid}: Already exists'
  229. )
  230. else:
  231. self.rolling_logger.write_immediately(
  232. f'Layer {lid}: {progress} {status}'
  233. )
  234. elif percentage != 0 and (
  235. percentage - layers[layer_id]['last_logged'] >= 10 or percentage == 100
  236. ):
  237. logger.debug(
  238. f'Layer {layer_id}: {layers[layer_id]["progress"]} {layers[layer_id]["status"]}'
  239. )
  240. layers[layer_id]['last_logged'] = percentage
  241. elif 'status' in current_line:
  242. logger.debug(current_line['status'])
  243. def _prune_old_cache_files(self, cache_dir: str, max_age_days: int = 7) -> None:
  244. """
  245. Prune cache files older than the specified number of days.
  246. Args:
  247. cache_dir (str): The path to the cache directory.
  248. max_age_days (int): The maximum age of cache files in days.
  249. """
  250. try:
  251. current_time = time.time()
  252. max_age_seconds = max_age_days * 24 * 60 * 60
  253. for root, _, files in os.walk(cache_dir):
  254. for file in files:
  255. file_path = os.path.join(root, file)
  256. try:
  257. file_age = current_time - os.path.getmtime(file_path)
  258. if file_age > max_age_seconds:
  259. os.remove(file_path)
  260. logger.debug(f'Removed old cache file: {file_path}')
  261. except Exception as e:
  262. logger.warning(f'Error processing cache file {file_path}: {e}')
  263. except Exception as e:
  264. logger.warning(f'Error during build cache pruning: {e}')
  265. def _is_cache_usable(self, cache_dir: str) -> bool:
  266. """
  267. Check if the cache directory is usable (exists and is writable).
  268. Args:
  269. cache_dir (str): The path to the cache directory.
  270. Returns:
  271. bool: True if the cache directory is usable, False otherwise.
  272. """
  273. if not os.path.exists(cache_dir):
  274. try:
  275. os.makedirs(cache_dir, exist_ok=True)
  276. logger.debug(f'Created cache directory: {cache_dir}')
  277. except OSError as e:
  278. logger.debug(f'Failed to create cache directory {cache_dir}: {e}')
  279. return False
  280. if not os.access(cache_dir, os.W_OK):
  281. logger.warning(
  282. f'Cache directory {cache_dir} is not writable. Caches will not be used for Docker builds.'
  283. )
  284. return False
  285. self._prune_old_cache_files(cache_dir)
  286. logger.debug(f'Cache directory {cache_dir} is usable')
  287. return True