modal_runtime.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286
  1. import os
  2. import tempfile
  3. import threading
  4. from pathlib import Path
  5. from typing import Callable, Generator
  6. import modal
  7. import requests
  8. import tenacity
  9. from openhands.core.config import AppConfig
  10. from openhands.core.logger import openhands_logger as logger
  11. from openhands.events import EventStream
  12. from openhands.runtime.impl.eventstream.eventstream_runtime import (
  13. EventStreamRuntime,
  14. LogBuffer,
  15. )
  16. from openhands.runtime.plugins import PluginRequirement
  17. from openhands.runtime.utils.command import get_remote_startup_command
  18. from openhands.runtime.utils.runtime_build import (
  19. BuildFromImageType,
  20. prep_build_folder,
  21. )
  22. from openhands.utils.async_utils import call_sync_from_async
  23. # FIXME: this will not work in HA mode. We need a better way to track IDs
  24. MODAL_RUNTIME_IDS: dict[str, str] = {}
  25. # Modal's log generator returns strings, but the upstream LogBuffer expects bytes.
  26. def bytes_shim(string_generator) -> Generator[bytes, None, None]:
  27. for line in string_generator:
  28. yield line.encode('utf-8')
  29. class ModalLogBuffer(LogBuffer):
  30. """Synchronous buffer for Modal sandbox logs.
  31. This class provides a thread-safe way to collect, store, and retrieve logs
  32. from a Modal sandbox. It uses a list to store log lines and provides methods
  33. for appending, retrieving, and clearing logs.
  34. """
  35. def __init__(self, sandbox: modal.Sandbox):
  36. self.client_ready = False
  37. self.init_msg = 'Runtime client initialized.'
  38. self.buffer: list[str] = []
  39. self.lock = threading.Lock()
  40. self._stop_event = threading.Event()
  41. self.log_generator = bytes_shim(sandbox.stderr)
  42. self.log_stream_thread = threading.Thread(target=self.stream_logs)
  43. self.log_stream_thread.daemon = True
  44. self.log_stream_thread.start()
  45. class ModalRuntime(EventStreamRuntime):
  46. """This runtime will subscribe the event stream.
  47. When receive an event, it will send the event to runtime-client which run inside the Modal sandbox environment.
  48. Args:
  49. config (AppConfig): The application configuration.
  50. event_stream (EventStream): The event stream to subscribe to.
  51. sid (str, optional): The session ID. Defaults to 'default'.
  52. plugins (list[PluginRequirement] | None, optional): List of plugin requirements. Defaults to None.
  53. env_vars (dict[str, str] | None, optional): Environment variables to set. Defaults to None.
  54. """
  55. container_name_prefix = 'openhands-sandbox-'
  56. sandbox: modal.Sandbox | None
  57. def __init__(
  58. self,
  59. config: AppConfig,
  60. event_stream: EventStream,
  61. sid: str = 'default',
  62. plugins: list[PluginRequirement] | None = None,
  63. env_vars: dict[str, str] | None = None,
  64. status_message_callback: Callable | None = None,
  65. attach_to_existing: bool = False,
  66. ):
  67. assert config.modal_api_token_id, 'Modal API token id is required'
  68. assert config.modal_api_token_secret, 'Modal API token secret is required'
  69. self.config = config
  70. self.sandbox = None
  71. self.modal_client = modal.Client.from_credentials(
  72. config.modal_api_token_id, config.modal_api_token_secret
  73. )
  74. self.app = modal.App.lookup(
  75. 'openhands', create_if_missing=True, client=self.modal_client
  76. )
  77. # workspace_base cannot be used because we can't bind mount into a sandbox.
  78. if self.config.workspace_base is not None:
  79. logger.warning(
  80. 'Setting workspace_base is not supported in the modal runtime.'
  81. )
  82. # This value is arbitrary as it's private to the container
  83. self.container_port = 3000
  84. self.session = requests.Session()
  85. self.status_message_callback = status_message_callback
  86. self.base_container_image_id = self.config.sandbox.base_container_image
  87. self.runtime_container_image_id = self.config.sandbox.runtime_container_image
  88. self.action_semaphore = threading.Semaphore(1) # Ensure one action at a time
  89. # Buffer for container logs
  90. self.log_buffer: LogBuffer | None = None
  91. if self.config.sandbox.runtime_extra_deps:
  92. logger.debug(
  93. f'Installing extra user-provided dependencies in the runtime image: {self.config.sandbox.runtime_extra_deps}'
  94. )
  95. self.init_base_runtime(
  96. config,
  97. event_stream,
  98. sid,
  99. plugins,
  100. env_vars,
  101. status_message_callback,
  102. attach_to_existing,
  103. )
  104. async def connect(self):
  105. self.send_status_message('STATUS$STARTING_RUNTIME')
  106. logger.info(f'ModalRuntime `{self.sid}`')
  107. self.image = self._get_image_definition(
  108. self.base_container_image_id,
  109. self.runtime_container_image_id,
  110. self.config.sandbox.runtime_extra_deps,
  111. )
  112. if self.attach_to_existing:
  113. if self.sid in MODAL_RUNTIME_IDS:
  114. sandbox_id = MODAL_RUNTIME_IDS[self.sid]
  115. logger.info(f'Attaching to existing Modal sandbox: {sandbox_id}')
  116. self.sandbox = modal.Sandbox.from_id(
  117. sandbox_id, client=self.modal_client
  118. )
  119. else:
  120. self.send_status_message('STATUS$PREPARING_CONTAINER')
  121. await call_sync_from_async(
  122. self._init_sandbox,
  123. sandbox_workspace_dir=self.config.workspace_mount_path_in_sandbox,
  124. plugins=self.plugins,
  125. )
  126. self.send_status_message('STATUS$CONTAINER_STARTED')
  127. self.log_buffer = ModalLogBuffer(self.sandbox)
  128. if self.sandbox is None:
  129. raise Exception('Sandbox not initialized')
  130. tunnel = self.sandbox.tunnels()[self.container_port]
  131. self.api_url = tunnel.url
  132. logger.info(f'Container started. Server url: {self.api_url}')
  133. if not self.attach_to_existing:
  134. logger.info('Waiting for client to become ready...')
  135. self.send_status_message('STATUS$WAITING_FOR_CLIENT')
  136. self._wait_until_alive()
  137. self.setup_initial_env()
  138. if not self.attach_to_existing:
  139. self.send_status_message(' ')
  140. def _get_image_definition(
  141. self,
  142. base_container_image_id: str | None,
  143. runtime_container_image_id: str | None,
  144. runtime_extra_deps: str | None,
  145. ) -> modal.Image:
  146. if runtime_container_image_id:
  147. base_runtime_image = modal.Image.from_registry(runtime_container_image_id)
  148. elif base_container_image_id:
  149. build_folder = tempfile.mkdtemp()
  150. prep_build_folder(
  151. build_folder=Path(build_folder),
  152. base_image=base_container_image_id,
  153. build_from=BuildFromImageType.SCRATCH,
  154. extra_deps=runtime_extra_deps,
  155. )
  156. base_runtime_image = modal.Image.from_dockerfile(
  157. path=os.path.join(build_folder, 'Dockerfile'),
  158. context_mount=modal.Mount.from_local_dir(
  159. local_path=build_folder,
  160. remote_path='.', # to current WORKDIR
  161. ),
  162. )
  163. else:
  164. raise ValueError(
  165. 'Neither runtime container image nor base container image is set'
  166. )
  167. return base_runtime_image.run_commands(
  168. """
  169. # Disable bracketed paste
  170. # https://github.com/pexpect/pexpect/issues/669
  171. echo "set enable-bracketed-paste off" >> /etc/inputrc && \\
  172. echo 'export INPUTRC=/etc/inputrc' >> /etc/bash.bashrc
  173. """.strip()
  174. )
  175. @tenacity.retry(
  176. stop=tenacity.stop_after_attempt(5),
  177. wait=tenacity.wait_exponential(multiplier=1, min=4, max=60),
  178. )
  179. def _init_sandbox(
  180. self,
  181. sandbox_workspace_dir: str,
  182. plugins: list[PluginRequirement] | None = None,
  183. ):
  184. try:
  185. logger.info('Preparing to start container...')
  186. plugin_args = []
  187. if plugins is not None and len(plugins) > 0:
  188. plugin_args.append('--plugins')
  189. plugin_args.extend([plugin.name for plugin in plugins])
  190. # Combine environment variables
  191. environment: dict[str, str | None] = {
  192. 'port': str(self.container_port),
  193. 'PYTHONUNBUFFERED': '1',
  194. }
  195. if self.config.debug:
  196. environment['DEBUG'] = 'true'
  197. browsergym_args = []
  198. if self.config.sandbox.browsergym_eval_env is not None:
  199. browsergym_args = [
  200. '-browsergym-eval-env',
  201. self.config.sandbox.browsergym_eval_env,
  202. ]
  203. env_secret = modal.Secret.from_dict(environment)
  204. logger.debug(f'Sandbox workspace: {sandbox_workspace_dir}')
  205. sandbox_start_cmd = get_remote_startup_command(
  206. self.container_port,
  207. sandbox_workspace_dir,
  208. 'openhands' if self.config.run_as_openhands else 'root',
  209. self.config.sandbox.user_id,
  210. plugin_args,
  211. browsergym_args,
  212. )
  213. logger.debug(f'Starting container with command: {sandbox_start_cmd}')
  214. self.sandbox = modal.Sandbox.create(
  215. *sandbox_start_cmd,
  216. secrets=[env_secret],
  217. workdir='/openhands/code',
  218. encrypted_ports=[self.container_port],
  219. image=self.image,
  220. app=self.app,
  221. client=self.modal_client,
  222. timeout=60 * 60,
  223. )
  224. MODAL_RUNTIME_IDS[self.sid] = self.sandbox.object_id
  225. logger.info('Container started')
  226. except Exception as e:
  227. logger.error(f'Error: Instance {self.sid} FAILED to start container!\n')
  228. logger.exception(e)
  229. self.close()
  230. raise e
  231. def close(self):
  232. """Closes the ModalRuntime and associated objects."""
  233. # if self.temp_dir_handler:
  234. # self.temp_dir_handler.__exit__(None, None, None)
  235. if self.log_buffer:
  236. self.log_buffer.close()
  237. if self.session:
  238. self.session.close()
  239. if not self.attach_to_existing and self.sandbox:
  240. self.sandbox.terminate()