sandbox.py 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282
  1. import os
  2. import sys
  3. import uuid
  4. import time
  5. import select
  6. import docker
  7. from typing import Tuple, Dict, List
  8. from collections import namedtuple
  9. import atexit
  10. InputType = namedtuple("InputType", ["content"])
  11. OutputType = namedtuple("OutputType", ["content"])
  12. DIRECTORY_REWRITE = os.getenv(
  13. "DIRECTORY_REWRITE", ""
  14. ) # helpful for docker-in-docker scenarios
  15. CONTAINER_IMAGE = os.getenv("SANDBOX_CONTAINER_IMAGE", "opendevin/sandbox:v0.1")
  16. # FIXME: On some containers, the devin user doesn't have enough permission, e.g. to install packages
  17. # How do we make this more flexible?
  18. RUN_AS_DEVIN = os.getenv("RUN_AS_DEVIN", "true").lower() != "false"
  19. USER_ID = 1000
  20. if os.getenv("SANDBOX_USER_ID") is not None:
  21. USER_ID = int(os.getenv("SANDBOX_USER_ID", ""))
  22. elif hasattr(os, "getuid"):
  23. USER_ID = os.getuid()
  24. class BackgroundCommand:
  25. def __init__(self, id: int, command: str, result):
  26. self.id = id
  27. self.command = command
  28. self.result = result
  29. def parse_docker_exec_output(self, logs: bytes) -> Tuple[bytes, bytes]:
  30. res = b""
  31. tail = b""
  32. i = 0
  33. while i < len(logs):
  34. prefix = logs[i : i + 8]
  35. if len(prefix) < 8:
  36. msg_type = prefix[0:1]
  37. if msg_type in [b"\x00", b"\x01", b"\x02", b"\x03"]:
  38. tail = prefix
  39. break
  40. msg_type = prefix[0:1]
  41. padding = prefix[1:4]
  42. if (
  43. msg_type in [b"\x00", b"\x01", b"\x02", b"\x03"]
  44. and padding == b"\x00\x00\x00"
  45. ):
  46. msg_length = int.from_bytes(prefix[4:8]) # , byteorder='big'
  47. res += logs[i + 8 : i + 8 + msg_length]
  48. i += 8 + msg_length
  49. else:
  50. res += logs[i : i + 1]
  51. i += 1
  52. return res, tail
  53. def read_logs(self) -> str:
  54. # TODO: get an exit code if process is exited
  55. logs = b""
  56. last_remains = b""
  57. while True:
  58. ready_to_read, _, _ = select.select([self.result.output], [], [], 0.1) # type: ignore[has-type]
  59. if ready_to_read:
  60. data = self.result.output.read(4096) # type: ignore[has-type]
  61. if not data:
  62. break
  63. chunk, last_remains = self.parse_docker_exec_output(last_remains + data)
  64. logs += chunk
  65. else:
  66. break
  67. return (logs + last_remains).decode("utf-8")
  68. def kill(self):
  69. # FIXME: this doesn't actually kill the process!
  70. self.result.output.close()
  71. class DockerInteractive:
  72. closed = False
  73. cur_background_id = 0
  74. background_commands: Dict[int, BackgroundCommand] = {}
  75. def __init__(
  76. self,
  77. workspace_dir: str | None = None,
  78. container_image: str | None = None,
  79. timeout: int = 120,
  80. id: str | None = None,
  81. ):
  82. if id is not None:
  83. self.instance_id = id
  84. else:
  85. self.instance_id = str(uuid.uuid4())
  86. if workspace_dir is not None:
  87. assert os.path.exists(
  88. workspace_dir
  89. ), f"Directory {workspace_dir} does not exist."
  90. # expand to absolute path
  91. self.workspace_dir = os.path.abspath(workspace_dir)
  92. else:
  93. self.workspace_dir = os.getcwd()
  94. print(f"workspace unspecified, using current directory: {workspace_dir}")
  95. if DIRECTORY_REWRITE != "":
  96. parts = DIRECTORY_REWRITE.split(":")
  97. self.workspace_dir = self.workspace_dir.replace(parts[0], parts[1])
  98. print("Rewriting workspace directory to:", self.workspace_dir)
  99. # TODO: this timeout is actually essential - need a better way to set it
  100. # if it is too short, the container may still waiting for previous
  101. # command to finish (e.g. apt-get update)
  102. # if it is too long, the user may have to wait for a unnecessary long time
  103. self.timeout: int = timeout
  104. if container_image is None:
  105. self.container_image = CONTAINER_IMAGE
  106. else:
  107. self.container_image = container_image
  108. self.container_name = f"sandbox-{self.instance_id}"
  109. self.restart_docker_container()
  110. if RUN_AS_DEVIN:
  111. self.setup_devin_user()
  112. atexit.register(self.cleanup)
  113. def setup_devin_user(self):
  114. exit_code, logs = self.container.exec_run(
  115. [
  116. "/bin/bash",
  117. "-c",
  118. f'useradd --shell /bin/bash -u {USER_ID} -o -c "" -m devin',
  119. ],
  120. workdir="/workspace",
  121. )
  122. def get_exec_cmd(self, cmd: str) -> List[str]:
  123. if RUN_AS_DEVIN:
  124. return ["su", "devin", "-c", cmd]
  125. else:
  126. return ["/bin/bash", "-c", cmd]
  127. def read_logs(self, id) -> str:
  128. if id not in self.background_commands:
  129. raise ValueError("Invalid background command id")
  130. bg_cmd = self.background_commands[id]
  131. return bg_cmd.read_logs()
  132. def execute(self, cmd: str) -> Tuple[int, str]:
  133. # TODO: each execute is not stateful! We need to keep track of the current working directory
  134. exit_code, logs = self.container.exec_run(
  135. self.get_exec_cmd(cmd), workdir="/workspace"
  136. )
  137. return exit_code, logs.decode("utf-8")
  138. def execute_in_background(self, cmd: str) -> BackgroundCommand:
  139. result = self.container.exec_run(
  140. self.get_exec_cmd(cmd), socket=True, workdir="/workspace"
  141. )
  142. result.output._sock.setblocking(0)
  143. bg_cmd = BackgroundCommand(self.cur_background_id, cmd, result)
  144. self.background_commands[bg_cmd.id] = bg_cmd
  145. self.cur_background_id += 1
  146. return bg_cmd
  147. def kill_background(self, id: int) -> BackgroundCommand:
  148. if id not in self.background_commands:
  149. raise ValueError("Invalid background command id")
  150. bg_cmd = self.background_commands[id]
  151. bg_cmd.kill()
  152. self.background_commands.pop(id)
  153. return bg_cmd
  154. def close(self):
  155. self.stop_docker_container()
  156. self.closed = True
  157. def stop_docker_container(self):
  158. docker_client = docker.from_env()
  159. try:
  160. container = docker_client.containers.get(self.container_name)
  161. container.stop()
  162. container.remove()
  163. elapsed = 0
  164. while container.status != "exited":
  165. time.sleep(1)
  166. elapsed += 1
  167. if elapsed > self.timeout:
  168. break
  169. container = docker_client.containers.get(self.container_name)
  170. except docker.errors.NotFound:
  171. pass
  172. def restart_docker_container(self):
  173. self.stop_docker_container()
  174. docker_client = docker.from_env()
  175. try:
  176. self.container = docker_client.containers.run(
  177. self.container_image,
  178. command="tail -f /dev/null",
  179. network_mode="host",
  180. working_dir="/workspace",
  181. name=self.container_name,
  182. detach=True,
  183. volumes={self.workspace_dir: {"bind": "/workspace", "mode": "rw"}},
  184. )
  185. except Exception as e:
  186. print(f"Failed to start container: {e}")
  187. raise e
  188. # wait for container to be ready
  189. elapsed = 0
  190. while self.container.status != "running":
  191. if self.container.status == "exited":
  192. print("container exited")
  193. print("container logs:")
  194. print(self.container.logs())
  195. break
  196. time.sleep(1)
  197. elapsed += 1
  198. self.container = docker_client.containers.get(self.container_name)
  199. if elapsed > self.timeout:
  200. break
  201. if self.container.status != "running":
  202. raise Exception("Failed to start container")
  203. # clean up the container, cannot do it in __del__ because the python interpreter is already shutting down
  204. def cleanup(self):
  205. if self.closed:
  206. return
  207. self.container.remove(force=True)
  208. if __name__ == "__main__":
  209. import argparse
  210. parser = argparse.ArgumentParser(description="Interactive Docker container")
  211. parser.add_argument(
  212. "-d",
  213. "--directory",
  214. type=str,
  215. default=None,
  216. help="The directory to mount as the workspace in the Docker container.",
  217. )
  218. args = parser.parse_args()
  219. docker_interactive = DockerInteractive(
  220. workspace_dir=args.directory,
  221. )
  222. print("Interactive Docker container started. Type 'exit' or use Ctrl+C to exit.")
  223. bg_cmd = docker_interactive.execute_in_background(
  224. "while true; do echo 'dot ' && sleep 1; done"
  225. )
  226. sys.stdout.flush()
  227. try:
  228. while True:
  229. try:
  230. user_input = input(">>> ")
  231. except EOFError:
  232. print("\nExiting...")
  233. break
  234. if user_input.lower() == "exit":
  235. print("Exiting...")
  236. break
  237. if user_input.lower() == "kill":
  238. docker_interactive.kill_background(bg_cmd.id)
  239. print("Background process killed")
  240. continue
  241. exit_code, output = docker_interactive.execute(user_input)
  242. print("exit code:", exit_code)
  243. print(output + "\n", end="")
  244. if bg_cmd.id in docker_interactive.background_commands:
  245. logs = docker_interactive.read_logs(bg_cmd.id)
  246. print("background logs:", logs, "\n")
  247. sys.stdout.flush()
  248. except KeyboardInterrupt:
  249. print("\nExiting...")
  250. docker_interactive.close()