ssh_box.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442
  1. import atexit
  2. import os
  3. import platform
  4. import sys
  5. import time
  6. import uuid
  7. import asyncio
  8. from collections import namedtuple
  9. from typing import Dict, List, Tuple, Union
  10. import docker
  11. from pexpect import pxssh
  12. from opendevin import config
  13. from opendevin.logger import opendevin_logger as logger
  14. from opendevin.sandbox.sandbox import Sandbox, BackgroundCommand
  15. from opendevin.schema import ConfigType
  16. from opendevin.utils import find_available_tcp_port
  17. from opendevin.exceptions import SandboxInvalidBackgroundCommandError
  18. from opendevin.sandbox.jupyter_kernel import JupyterKernel
  19. InputType = namedtuple('InputType', ['content'])
  20. OutputType = namedtuple('OutputType', ['content'])
  21. SANDBOX_WORKSPACE_DIR = '/workspace'
  22. CONTAINER_IMAGE = config.get(ConfigType.SANDBOX_CONTAINER_IMAGE)
  23. SSH_HOSTNAME = config.get(ConfigType.SSH_HOSTNAME)
  24. USE_HOST_NETWORK = platform.system() == 'Linux'
  25. if config.get(ConfigType.USE_HOST_NETWORK) is not None:
  26. USE_HOST_NETWORK = config.get(
  27. ConfigType.USE_HOST_NETWORK).lower() != 'false'
  28. # FIXME: On some containers, the devin user doesn't have enough permission, e.g. to install packages
  29. # How do we make this more flexible?
  30. RUN_AS_DEVIN = config.get('RUN_AS_DEVIN').lower() != 'false'
  31. USER_ID = 1000
  32. if SANDBOX_USER_ID := config.get('SANDBOX_USER_ID'):
  33. USER_ID = int(SANDBOX_USER_ID)
  34. elif hasattr(os, 'getuid'):
  35. USER_ID = os.getuid()
  36. class DockerSSHBox(Sandbox):
  37. instance_id: str
  38. container_image: str
  39. container_name_prefix = 'opendevin-sandbox-'
  40. container_name: str
  41. container: docker.models.containers.Container
  42. docker_client: docker.DockerClient
  43. _ssh_password: str
  44. _ssh_port: int
  45. cur_background_id = 0
  46. background_commands: Dict[int, BackgroundCommand] = {}
  47. def __init__(
  48. self,
  49. container_image: str | None = None,
  50. timeout: int = 120,
  51. sid: str | None = None,
  52. ):
  53. # Initialize docker client. Throws an exception if Docker is not reachable.
  54. try:
  55. self.docker_client = docker.from_env()
  56. except Exception as ex:
  57. logger.exception(
  58. 'Please check Docker is running using `docker ps`.', exc_info=False)
  59. raise ex
  60. self.instance_id = sid if sid is not None else str(uuid.uuid4())
  61. # TODO: this timeout is actually essential - need a better way to set it
  62. # if it is too short, the container may still waiting for previous
  63. # command to finish (e.g. apt-get update)
  64. # if it is too long, the user may have to wait for a unnecessary long time
  65. self.timeout = timeout
  66. self.container_image = CONTAINER_IMAGE if container_image is None else container_image
  67. self.container_name = self.container_name_prefix + self.instance_id
  68. # set up random user password
  69. self._ssh_password = str(uuid.uuid4())
  70. self._ssh_port = find_available_tcp_port()
  71. # always restart the container, cuz the initial be regarded as a new session
  72. self.restart_docker_container()
  73. self.setup_user()
  74. self.start_ssh_session()
  75. atexit.register(self.close)
  76. def setup_user(self):
  77. # Make users sudoers passwordless
  78. # TODO(sandbox): add this line in the Dockerfile for next minor version of docker image
  79. exit_code, logs = self.container.exec_run(
  80. ['/bin/bash', '-c',
  81. r"echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers"],
  82. workdir=SANDBOX_WORKSPACE_DIR,
  83. )
  84. if exit_code != 0:
  85. raise Exception(
  86. f'Failed to make all users passwordless sudoers in sandbox: {logs}')
  87. # Check if the opendevin user exists
  88. exit_code, logs = self.container.exec_run(
  89. ['/bin/bash', '-c', 'id -u opendevin'],
  90. workdir=SANDBOX_WORKSPACE_DIR,
  91. )
  92. if exit_code == 0:
  93. # User exists, delete it
  94. exit_code, logs = self.container.exec_run(
  95. ['/bin/bash', '-c', 'userdel -r opendevin'],
  96. workdir=SANDBOX_WORKSPACE_DIR,
  97. )
  98. if exit_code != 0:
  99. raise Exception(
  100. f'Failed to remove opendevin user in sandbox: {logs}')
  101. if RUN_AS_DEVIN:
  102. # Create the opendevin user
  103. exit_code, logs = self.container.exec_run(
  104. ['/bin/bash', '-c',
  105. f'useradd -rm -d /home/opendevin -s /bin/bash -g root -G sudo -u {USER_ID} opendevin'],
  106. workdir=SANDBOX_WORKSPACE_DIR,
  107. )
  108. if exit_code != 0:
  109. raise Exception(
  110. f'Failed to create opendevin user in sandbox: {logs}')
  111. exit_code, logs = self.container.exec_run(
  112. ['/bin/bash', '-c',
  113. f"echo 'opendevin:{self._ssh_password}' | chpasswd"],
  114. workdir=SANDBOX_WORKSPACE_DIR,
  115. )
  116. if exit_code != 0:
  117. raise Exception(f'Failed to set password in sandbox: {logs}')
  118. else:
  119. exit_code, logs = self.container.exec_run(
  120. # change password for root
  121. ['/bin/bash', '-c',
  122. f"echo 'root:{self._ssh_password}' | chpasswd"],
  123. workdir=SANDBOX_WORKSPACE_DIR,
  124. )
  125. if exit_code != 0:
  126. raise Exception(
  127. f'Failed to set password for root in sandbox: {logs}')
  128. exit_code, logs = self.container.exec_run(
  129. ['/bin/bash', '-c', "echo 'opendevin-sandbox' > /etc/hostname"],
  130. workdir=SANDBOX_WORKSPACE_DIR,
  131. )
  132. def start_ssh_session(self):
  133. # start ssh session at the background
  134. self.ssh = pxssh.pxssh()
  135. hostname = SSH_HOSTNAME
  136. if RUN_AS_DEVIN:
  137. username = 'opendevin'
  138. else:
  139. username = 'root'
  140. logger.info(
  141. f"Connecting to {username}@{hostname} via ssh. If you encounter any issues, you can try `ssh -v -p {self._ssh_port} {username}@{hostname}` with the password '{self._ssh_password}' and report the issue on GitHub."
  142. )
  143. self.ssh.login(hostname, username, self._ssh_password,
  144. port=self._ssh_port)
  145. # Fix: https://github.com/pexpect/pexpect/issues/669
  146. self.ssh.sendline("bind 'set enable-bracketed-paste off'")
  147. self.ssh.prompt()
  148. # cd to workspace
  149. self.ssh.sendline('cd /workspace')
  150. self.ssh.prompt()
  151. def get_exec_cmd(self, cmd: str) -> List[str]:
  152. if RUN_AS_DEVIN:
  153. return ['su', 'opendevin', '-c', cmd]
  154. else:
  155. return ['/bin/bash', '-c', cmd]
  156. def read_logs(self, id) -> str:
  157. if id not in self.background_commands:
  158. raise SandboxInvalidBackgroundCommandError()
  159. bg_cmd = self.background_commands[id]
  160. return bg_cmd.read_logs()
  161. def execute(self, cmd: str) -> Tuple[int, str]:
  162. # use self.ssh
  163. self.ssh.sendline(cmd)
  164. success = self.ssh.prompt(timeout=self.timeout)
  165. if not success:
  166. logger.exception(
  167. 'Command timed out, killing process...', exc_info=False)
  168. # send a SIGINT to the process
  169. self.ssh.sendintr()
  170. self.ssh.prompt()
  171. command_output = self.ssh.before.decode(
  172. 'utf-8').lstrip(cmd).strip()
  173. return -1, f'Command: "{cmd}" timed out. Sending SIGINT to the process: {command_output}'
  174. command_output = self.ssh.before.decode('utf-8').lstrip(cmd).strip()
  175. # get the exit code
  176. self.ssh.sendline('echo $?')
  177. self.ssh.prompt()
  178. exit_code = self.ssh.before.decode('utf-8')
  179. # remove the echo $? itself
  180. exit_code = int(exit_code.lstrip('echo $?').strip())
  181. return exit_code, command_output
  182. def execute_in_background(self, cmd: str) -> BackgroundCommand:
  183. result = self.container.exec_run(
  184. self.get_exec_cmd(cmd), socket=True, workdir=SANDBOX_WORKSPACE_DIR
  185. )
  186. result.output._sock.setblocking(0)
  187. pid = self.get_pid(cmd)
  188. bg_cmd = BackgroundCommand(self.cur_background_id, cmd, result, pid)
  189. self.background_commands[bg_cmd.id] = bg_cmd
  190. self.cur_background_id += 1
  191. return bg_cmd
  192. def get_pid(self, cmd):
  193. exec_result = self.container.exec_run('ps aux')
  194. processes = exec_result.output.decode('utf-8').splitlines()
  195. cmd = ' '.join(self.get_exec_cmd(cmd))
  196. for process in processes:
  197. if cmd in process:
  198. pid = process.split()[1] # second column is the pid
  199. return pid
  200. return None
  201. def kill_background(self, id: int) -> BackgroundCommand:
  202. if id not in self.background_commands:
  203. raise SandboxInvalidBackgroundCommandError()
  204. bg_cmd = self.background_commands[id]
  205. if bg_cmd.pid is not None:
  206. self.container.exec_run(
  207. f'kill -9 {bg_cmd.pid}', workdir=SANDBOX_WORKSPACE_DIR)
  208. bg_cmd.result.output.close()
  209. self.background_commands.pop(id)
  210. return bg_cmd
  211. def stop_docker_container(self):
  212. try:
  213. container = self.docker_client.containers.get(self.container_name)
  214. container.stop()
  215. container.remove()
  216. elapsed = 0
  217. while container.status != 'exited':
  218. time.sleep(1)
  219. elapsed += 1
  220. if elapsed > self.timeout:
  221. break
  222. container = self.docker_client.containers.get(
  223. self.container_name)
  224. except docker.errors.NotFound:
  225. pass
  226. def is_container_running(self):
  227. try:
  228. container = self.docker_client.containers.get(self.container_name)
  229. if container.status == 'running':
  230. self.container = container
  231. return True
  232. return False
  233. except docker.errors.NotFound:
  234. return False
  235. def _get_port_mapping(self):
  236. return {
  237. f'{self._ssh_port}/tcp': self._ssh_port
  238. }
  239. def restart_docker_container(self):
  240. try:
  241. self.stop_docker_container()
  242. logger.info('Container stopped')
  243. except docker.errors.DockerException as ex:
  244. logger.exception('Failed to stop container', exc_info=False)
  245. raise ex
  246. try:
  247. network_kwargs: Dict[str, Union[str, Dict[str, int]]] = {}
  248. if USE_HOST_NETWORK:
  249. network_kwargs['network_mode'] = 'host'
  250. else:
  251. # FIXME: This is a temporary workaround for Mac OS
  252. network_kwargs['ports'] = self._get_port_mapping()
  253. logger.warning(
  254. ('Using port forwarding. '
  255. 'Server started by OpenDevin will not be accessible from the host machine at the moment. '
  256. 'See https://github.com/OpenDevin/OpenDevin/issues/897 for more information.'
  257. )
  258. )
  259. mount_dir = config.get('WORKSPACE_MOUNT_PATH')
  260. print('Mounting workspace directory: ', mount_dir)
  261. # start the container
  262. self.container = self.docker_client.containers.run(
  263. self.container_image,
  264. # allow root login
  265. command=f"/usr/sbin/sshd -D -p {self._ssh_port} -o 'PermitRootLogin=yes'",
  266. **network_kwargs,
  267. working_dir=SANDBOX_WORKSPACE_DIR,
  268. name=self.container_name,
  269. hostname='opendevin_sandbox',
  270. detach=True,
  271. volumes={
  272. mount_dir: {
  273. 'bind': SANDBOX_WORKSPACE_DIR,
  274. 'mode': 'rw'
  275. },
  276. },
  277. )
  278. logger.info('Container started')
  279. except Exception as ex:
  280. logger.exception('Failed to start container', exc_info=False)
  281. raise ex
  282. # wait for container to be ready
  283. elapsed = 0
  284. while self.container.status != 'running':
  285. if self.container.status == 'exited':
  286. logger.info('container exited')
  287. logger.info('container logs:')
  288. logger.info(self.container.logs())
  289. break
  290. time.sleep(1)
  291. elapsed += 1
  292. self.container = self.docker_client.containers.get(
  293. self.container_name)
  294. logger.info(
  295. f'waiting for container to start: {elapsed}, container status: {self.container.status}')
  296. if elapsed > self.timeout:
  297. break
  298. if self.container.status != 'running':
  299. raise Exception('Failed to start container')
  300. # clean up the container, cannot do it in __del__ because the python interpreter is already shutting down
  301. def close(self):
  302. containers = self.docker_client.containers.list(all=True)
  303. for container in containers:
  304. try:
  305. if container.name.startswith(self.container_name_prefix):
  306. container.remove(force=True)
  307. except docker.errors.NotFound:
  308. pass
  309. def execute_python(self, code: str) -> str:
  310. raise NotImplementedError('execute_python is not supported in DockerSSHBox. Please use DockerSSHJupyterBox.')
  311. class DockerSSHJupyterBox(DockerSSHBox):
  312. _jupyter_port: int
  313. def __init__(
  314. self,
  315. container_image: str | None = None,
  316. timeout: int = 120,
  317. sid: str | None = None,
  318. ):
  319. self._jupyter_port = find_available_tcp_port()
  320. super().__init__(container_image, timeout, sid)
  321. self.setup_jupyter()
  322. def _get_port_mapping(self):
  323. return {
  324. f'{self._ssh_port}/tcp': self._ssh_port,
  325. '8888/tcp': self._jupyter_port,
  326. }
  327. def setup_jupyter(self):
  328. # Setup Jupyter
  329. self.jupyer_background_cmd = self.execute_in_background(
  330. 'jupyter kernelgateway --KernelGatewayApp.ip=0.0.0.0 --KernelGatewayApp.port=8888'
  331. )
  332. self.jupyter_kernel = JupyterKernel(
  333. url_suffix=f'{SSH_HOSTNAME}:{self._jupyter_port}',
  334. convid=self.instance_id,
  335. )
  336. logger.info(f'Jupyter Kernel Gateway started at {SSH_HOSTNAME}:{self._jupyter_port}: {self.jupyer_background_cmd.read_logs()}')
  337. # initialize the kernel
  338. logger.info('Initializing Jupyter Kernel Gateway...')
  339. time.sleep(1) # wait for the kernel to start
  340. loop = asyncio.get_event_loop()
  341. loop.run_until_complete(self.jupyter_kernel.initialize())
  342. logger.info('Jupyter Kernel Gateway initialized')
  343. def execute_python(self, code: str) -> str:
  344. loop = asyncio.get_event_loop()
  345. return loop.run_until_complete(self.jupyter_kernel.execute(code))
  346. if __name__ == '__main__':
  347. try:
  348. ssh_box = DockerSSHJupyterBox()
  349. except Exception as e:
  350. logger.exception('Failed to start Docker container: %s', e)
  351. sys.exit(1)
  352. logger.info(
  353. "Interactive Docker container started. Type 'exit' or use Ctrl+C to exit.")
  354. bg_cmd = ssh_box.execute_in_background(
  355. "while true; do echo 'dot ' && sleep 5; done"
  356. )
  357. sys.stdout.flush()
  358. try:
  359. while True:
  360. try:
  361. user_input = input('>>> ')
  362. except EOFError:
  363. logger.info('Exiting...')
  364. break
  365. if user_input.lower() == 'exit':
  366. logger.info('Exiting...')
  367. break
  368. if user_input.lower() == 'kill':
  369. ssh_box.kill_background(bg_cmd.id)
  370. logger.info('Background process killed')
  371. continue
  372. if user_input.startswith('py:'):
  373. output = ssh_box.execute_python(user_input[3:])
  374. logger.info(output)
  375. sys.stdout.flush()
  376. continue
  377. print('JUPYTER LOG:', ssh_box.jupyer_background_cmd.read_logs())
  378. exit_code, output = ssh_box.execute(user_input)
  379. logger.info('exit code: %d', exit_code)
  380. logger.info(output)
  381. if bg_cmd.id in ssh_box.background_commands:
  382. logs = ssh_box.read_logs(bg_cmd.id)
  383. logger.info('background logs: %s', logs)
  384. sys.stdout.flush()
  385. except KeyboardInterrupt:
  386. logger.info('Exiting...')
  387. ssh_box.close()