ssh_box.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477
  1. import atexit
  2. import os
  3. import platform
  4. import sys
  5. import time
  6. import uuid
  7. import tarfile
  8. from glob import glob
  9. from collections import namedtuple
  10. from typing import Dict, List, Tuple, Union
  11. import docker
  12. from pexpect import pxssh
  13. from opendevin import config
  14. from opendevin.logger import opendevin_logger as logger
  15. from opendevin.sandbox.sandbox import Sandbox
  16. from opendevin.sandbox.process import Process
  17. from opendevin.sandbox.docker.process import DockerProcess
  18. from opendevin.sandbox.plugins.jupyter import JupyterRequirement
  19. from opendevin.schema import ConfigType
  20. from opendevin.utils import find_available_tcp_port
  21. from opendevin.exceptions import SandboxInvalidBackgroundCommandError
  22. InputType = namedtuple('InputType', ['content'])
  23. OutputType = namedtuple('OutputType', ['content'])
  24. SANDBOX_WORKSPACE_DIR = config.get(ConfigType.WORKSPACE_MOUNT_PATH_IN_SANDBOX)
  25. CONTAINER_IMAGE = config.get(ConfigType.SANDBOX_CONTAINER_IMAGE)
  26. SSH_HOSTNAME = config.get(ConfigType.SSH_HOSTNAME)
  27. USE_HOST_NETWORK = platform.system() == 'Linux'
  28. if config.get(ConfigType.USE_HOST_NETWORK) is not None:
  29. USE_HOST_NETWORK = config.get(
  30. ConfigType.USE_HOST_NETWORK).lower() != 'false'
  31. # FIXME: On some containers, the devin user doesn't have enough permission, e.g. to install packages
  32. # How do we make this more flexible?
  33. RUN_AS_DEVIN = config.get('RUN_AS_DEVIN').lower() != 'false'
  34. USER_ID = 1000
  35. if SANDBOX_USER_ID := config.get('SANDBOX_USER_ID'):
  36. USER_ID = int(SANDBOX_USER_ID)
  37. elif hasattr(os, 'getuid'):
  38. USER_ID = os.getuid()
  39. class DockerSSHBox(Sandbox):
  40. instance_id: str
  41. container_image: str
  42. container_name_prefix = 'opendevin-sandbox-'
  43. container_name: str
  44. container: docker.models.containers.Container
  45. docker_client: docker.DockerClient
  46. _ssh_password: str
  47. _ssh_port: int
  48. cur_background_id = 0
  49. background_commands: Dict[int, Process] = {}
  50. def __init__(
  51. self,
  52. container_image: str | None = None,
  53. timeout: int = 120,
  54. sid: str | None = None,
  55. ):
  56. # Initialize docker client. Throws an exception if Docker is not reachable.
  57. try:
  58. self.docker_client = docker.from_env()
  59. except Exception as ex:
  60. logger.exception(
  61. 'Please check Docker is running using `docker ps`.', exc_info=False)
  62. raise ex
  63. self.instance_id = sid if sid is not None else str(uuid.uuid4())
  64. # TODO: this timeout is actually essential - need a better way to set it
  65. # if it is too short, the container may still waiting for previous
  66. # command to finish (e.g. apt-get update)
  67. # if it is too long, the user may have to wait for a unnecessary long time
  68. self.timeout = timeout
  69. self.container_image = CONTAINER_IMAGE if container_image is None else container_image
  70. self.container_name = self.container_name_prefix + self.instance_id
  71. # set up random user password
  72. self._ssh_password = str(uuid.uuid4())
  73. self._ssh_port = find_available_tcp_port()
  74. # always restart the container, cuz the initial be regarded as a new session
  75. self.restart_docker_container()
  76. self.setup_user()
  77. self.start_ssh_session()
  78. atexit.register(self.close)
  79. def setup_user(self):
  80. # Make users sudoers passwordless
  81. # TODO(sandbox): add this line in the Dockerfile for next minor version of docker image
  82. exit_code, logs = self.container.exec_run(
  83. ['/bin/bash', '-c',
  84. r"echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers"],
  85. workdir=SANDBOX_WORKSPACE_DIR,
  86. )
  87. if exit_code != 0:
  88. raise Exception(
  89. f'Failed to make all users passwordless sudoers in sandbox: {logs}')
  90. # Check if the opendevin user exists
  91. exit_code, logs = self.container.exec_run(
  92. ['/bin/bash', '-c', 'id -u opendevin'],
  93. workdir=SANDBOX_WORKSPACE_DIR,
  94. )
  95. if exit_code == 0:
  96. # User exists, delete it
  97. exit_code, logs = self.container.exec_run(
  98. ['/bin/bash', '-c', 'userdel -r opendevin'],
  99. workdir=SANDBOX_WORKSPACE_DIR,
  100. )
  101. if exit_code != 0:
  102. raise Exception(
  103. f'Failed to remove opendevin user in sandbox: {logs}')
  104. if RUN_AS_DEVIN:
  105. # Create the opendevin user
  106. exit_code, logs = self.container.exec_run(
  107. ['/bin/bash', '-c',
  108. f'useradd -rm -d /home/opendevin -s /bin/bash -g root -G sudo -u {USER_ID} opendevin'],
  109. workdir=SANDBOX_WORKSPACE_DIR,
  110. )
  111. if exit_code != 0:
  112. raise Exception(
  113. f'Failed to create opendevin user in sandbox: {logs}')
  114. exit_code, logs = self.container.exec_run(
  115. ['/bin/bash', '-c',
  116. f"echo 'opendevin:{self._ssh_password}' | chpasswd"],
  117. workdir=SANDBOX_WORKSPACE_DIR,
  118. )
  119. if exit_code != 0:
  120. raise Exception(f'Failed to set password in sandbox: {logs}')
  121. # chown the home directory
  122. exit_code, logs = self.container.exec_run(
  123. ['/bin/bash', '-c', 'chown opendevin:root /home/opendevin'],
  124. workdir=SANDBOX_WORKSPACE_DIR,
  125. )
  126. if exit_code != 0:
  127. raise Exception(
  128. f'Failed to chown home directory for opendevin in sandbox: {logs}')
  129. exit_code, logs = self.container.exec_run(
  130. ['/bin/bash', '-c', f'chown opendevin:root {SANDBOX_WORKSPACE_DIR}'],
  131. workdir=SANDBOX_WORKSPACE_DIR,
  132. )
  133. if exit_code != 0:
  134. raise Exception(
  135. f'Failed to chown workspace directory for opendevin in sandbox: {logs}')
  136. else:
  137. exit_code, logs = self.container.exec_run(
  138. # change password for root
  139. ['/bin/bash', '-c',
  140. f"echo 'root:{self._ssh_password}' | chpasswd"],
  141. workdir=SANDBOX_WORKSPACE_DIR,
  142. )
  143. if exit_code != 0:
  144. raise Exception(
  145. f'Failed to set password for root in sandbox: {logs}')
  146. exit_code, logs = self.container.exec_run(
  147. ['/bin/bash', '-c', "echo 'opendevin-sandbox' > /etc/hostname"],
  148. workdir=SANDBOX_WORKSPACE_DIR,
  149. )
  150. def start_ssh_session(self):
  151. # start ssh session at the background
  152. self.ssh = pxssh.pxssh()
  153. hostname = SSH_HOSTNAME
  154. if RUN_AS_DEVIN:
  155. username = 'opendevin'
  156. else:
  157. username = 'root'
  158. logger.info(
  159. f"Connecting to {username}@{hostname} via ssh. If you encounter any issues, you can try `ssh -v -p {self._ssh_port} {username}@{hostname}` with the password '{self._ssh_password}' and report the issue on GitHub."
  160. )
  161. self.ssh.login(hostname, username, self._ssh_password,
  162. port=self._ssh_port)
  163. # Fix: https://github.com/pexpect/pexpect/issues/669
  164. self.ssh.sendline("bind 'set enable-bracketed-paste off'")
  165. self.ssh.prompt()
  166. # cd to workspace
  167. self.ssh.sendline(f'cd {SANDBOX_WORKSPACE_DIR}')
  168. self.ssh.prompt()
  169. def get_exec_cmd(self, cmd: str) -> List[str]:
  170. if RUN_AS_DEVIN:
  171. return ['su', 'opendevin', '-c', cmd]
  172. else:
  173. return ['/bin/bash', '-c', cmd]
  174. def read_logs(self, id) -> str:
  175. if id not in self.background_commands:
  176. raise SandboxInvalidBackgroundCommandError()
  177. bg_cmd = self.background_commands[id]
  178. return bg_cmd.read_logs()
  179. def execute(self, cmd: str) -> Tuple[int, str]:
  180. cmd = cmd.strip()
  181. # use self.ssh
  182. self.ssh.sendline(cmd)
  183. success = self.ssh.prompt(timeout=self.timeout)
  184. if not success:
  185. logger.exception(
  186. 'Command timed out, killing process...', exc_info=False)
  187. # send a SIGINT to the process
  188. self.ssh.sendintr()
  189. self.ssh.prompt()
  190. command_output = self.ssh.before.decode(
  191. 'utf-8').lstrip(cmd).strip()
  192. return -1, f'Command: "{cmd}" timed out. Sending SIGINT to the process: {command_output}'
  193. command_output = self.ssh.before.decode('utf-8').strip()
  194. # NOTE: there's some weird behavior with the prompt (it may come AFTER the command output)
  195. # so we need to check if the command is in the output
  196. n_tries = 5
  197. while not command_output.startswith(cmd) and n_tries > 0:
  198. self.ssh.prompt()
  199. command_output = self.ssh.before.decode('utf-8').strip()
  200. time.sleep(0.5)
  201. n_tries -= 1
  202. if n_tries == 0 and not command_output.startswith(cmd):
  203. raise Exception(
  204. f'Something went wrong with the SSH sanbox, cannot get output for command [{cmd}] after 5 retries'
  205. )
  206. logger.debug(f'Command output GOT SO FAR: {command_output}')
  207. # once out, make sure that we have *every* output, we while loop until we get an empty output
  208. while True:
  209. logger.debug('WAITING FOR .prompt()')
  210. self.ssh.sendline('\n')
  211. timeout_not_reached = self.ssh.prompt(timeout=1)
  212. if not timeout_not_reached:
  213. logger.debug('TIMEOUT REACHED')
  214. break
  215. logger.debug('WAITING FOR .before')
  216. output = self.ssh.before.decode('utf-8').strip()
  217. logger.debug(f'WAITING FOR END OF command output ({bool(output)}): {output}')
  218. if output == '':
  219. break
  220. command_output += output
  221. command_output = command_output.lstrip(cmd).strip()
  222. # get the exit code
  223. self.ssh.sendline('echo $?')
  224. self.ssh.prompt()
  225. exit_code = self.ssh.before.decode('utf-8')
  226. while not exit_code.startswith('echo $?'):
  227. self.ssh.prompt()
  228. exit_code = self.ssh.before.decode('utf-8')
  229. logger.debug(f'WAITING FOR exit code: {exit_code}')
  230. exit_code = int(exit_code.lstrip('echo $?').strip())
  231. return exit_code, command_output
  232. def copy_to(self, host_src: str, sandbox_dest: str, recursive: bool = False):
  233. # mkdir -p sandbox_dest if it doesn't exist
  234. exit_code, logs = self.container.exec_run(
  235. ['/bin/bash', '-c', f'mkdir -p {sandbox_dest}'],
  236. workdir=SANDBOX_WORKSPACE_DIR,
  237. )
  238. if exit_code != 0:
  239. raise Exception(
  240. f'Failed to create directory {sandbox_dest} in sandbox: {logs}')
  241. if recursive:
  242. assert os.path.isdir(host_src), 'Source must be a directory when recursive is True'
  243. files = glob(host_src + '/**/*', recursive=True)
  244. srcname = os.path.basename(host_src)
  245. tar_filename = os.path.join(os.path.dirname(host_src), srcname + '.tar')
  246. with tarfile.open(tar_filename, mode='w') as tar:
  247. for file in files:
  248. tar.add(file, arcname=os.path.relpath(file, os.path.dirname(host_src)))
  249. else:
  250. assert os.path.isfile(host_src), 'Source must be a file when recursive is False'
  251. srcname = os.path.basename(host_src)
  252. tar_filename = os.path.join(os.path.dirname(host_src), srcname + '.tar')
  253. with tarfile.open(tar_filename, mode='w') as tar:
  254. tar.add(host_src, arcname=srcname)
  255. with open(tar_filename, 'rb') as f:
  256. data = f.read()
  257. self.container.put_archive(os.path.dirname(sandbox_dest), data)
  258. os.remove(tar_filename)
  259. def execute_in_background(self, cmd: str) -> Process:
  260. result = self.container.exec_run(
  261. self.get_exec_cmd(cmd), socket=True, workdir=SANDBOX_WORKSPACE_DIR
  262. )
  263. result.output._sock.setblocking(0)
  264. pid = self.get_pid(cmd)
  265. bg_cmd = DockerProcess(self.cur_background_id, cmd, result, pid)
  266. self.background_commands[bg_cmd.pid] = bg_cmd
  267. self.cur_background_id += 1
  268. return bg_cmd
  269. def get_pid(self, cmd):
  270. exec_result = self.container.exec_run('ps aux')
  271. processes = exec_result.output.decode('utf-8').splitlines()
  272. cmd = ' '.join(self.get_exec_cmd(cmd))
  273. for process in processes:
  274. if cmd in process:
  275. pid = process.split()[1] # second column is the pid
  276. return pid
  277. return None
  278. def kill_background(self, id: int) -> Process:
  279. if id not in self.background_commands:
  280. raise SandboxInvalidBackgroundCommandError()
  281. bg_cmd = self.background_commands[id]
  282. if bg_cmd.pid is not None:
  283. self.container.exec_run(
  284. f'kill -9 {bg_cmd.pid}', workdir=SANDBOX_WORKSPACE_DIR)
  285. assert isinstance(bg_cmd, DockerProcess)
  286. bg_cmd.result.output.close()
  287. self.background_commands.pop(id)
  288. return bg_cmd
  289. def stop_docker_container(self):
  290. try:
  291. container = self.docker_client.containers.get(self.container_name)
  292. container.stop()
  293. container.remove()
  294. elapsed = 0
  295. while container.status != 'exited':
  296. time.sleep(1)
  297. elapsed += 1
  298. if elapsed > self.timeout:
  299. break
  300. container = self.docker_client.containers.get(
  301. self.container_name)
  302. except docker.errors.NotFound:
  303. pass
  304. def is_container_running(self):
  305. try:
  306. container = self.docker_client.containers.get(self.container_name)
  307. if container.status == 'running':
  308. self.container = container
  309. return True
  310. return False
  311. except docker.errors.NotFound:
  312. return False
  313. def restart_docker_container(self):
  314. try:
  315. self.stop_docker_container()
  316. logger.info('Container stopped')
  317. except docker.errors.DockerException as ex:
  318. logger.exception('Failed to stop container', exc_info=False)
  319. raise ex
  320. try:
  321. network_kwargs: Dict[str, Union[str, Dict[str, int]]] = {}
  322. if USE_HOST_NETWORK:
  323. network_kwargs['network_mode'] = 'host'
  324. else:
  325. # FIXME: This is a temporary workaround for Mac OS
  326. network_kwargs['ports'] = {f'{self._ssh_port}/tcp': self._ssh_port}
  327. logger.warning(
  328. ('Using port forwarding for Mac OS. '
  329. 'Server started by OpenDevin will not be accessible from the host machine at the moment. '
  330. 'See https://github.com/OpenDevin/OpenDevin/issues/897 for more information.'
  331. )
  332. )
  333. mount_dir = config.get('WORKSPACE_MOUNT_PATH')
  334. print('Mounting workspace directory: ', mount_dir)
  335. # start the container
  336. self.container = self.docker_client.containers.run(
  337. self.container_image,
  338. # allow root login
  339. command=f"/usr/sbin/sshd -D -p {self._ssh_port} -o 'PermitRootLogin=yes'",
  340. **network_kwargs,
  341. working_dir=SANDBOX_WORKSPACE_DIR,
  342. name=self.container_name,
  343. hostname='opendevin_sandbox',
  344. detach=True,
  345. volumes={
  346. mount_dir: {
  347. 'bind': SANDBOX_WORKSPACE_DIR,
  348. 'mode': 'rw'
  349. },
  350. # mount cache directory to /home/opendevin/.cache for pip cache reuse
  351. config.get('CACHE_DIR'): {
  352. 'bind': '/home/opendevin/.cache' if RUN_AS_DEVIN else '/root/.cache',
  353. 'mode': 'rw'
  354. },
  355. },
  356. )
  357. logger.info('Container started')
  358. except Exception as ex:
  359. logger.exception('Failed to start container', exc_info=False)
  360. raise ex
  361. # wait for container to be ready
  362. elapsed = 0
  363. while self.container.status != 'running':
  364. if self.container.status == 'exited':
  365. logger.info('container exited')
  366. logger.info('container logs:')
  367. logger.info(self.container.logs())
  368. break
  369. time.sleep(1)
  370. elapsed += 1
  371. self.container = self.docker_client.containers.get(
  372. self.container_name)
  373. logger.info(
  374. f'waiting for container to start: {elapsed}, container status: {self.container.status}')
  375. if elapsed > self.timeout:
  376. break
  377. if self.container.status != 'running':
  378. raise Exception('Failed to start container')
  379. # clean up the container, cannot do it in __del__ because the python interpreter is already shutting down
  380. def close(self):
  381. containers = self.docker_client.containers.list(all=True)
  382. for container in containers:
  383. try:
  384. if container.name.startswith(self.container_name_prefix):
  385. container.remove(force=True)
  386. except docker.errors.NotFound:
  387. pass
  388. if __name__ == '__main__':
  389. try:
  390. ssh_box = DockerSSHBox()
  391. except Exception as e:
  392. logger.exception('Failed to start Docker container: %s', e)
  393. sys.exit(1)
  394. logger.info(
  395. "Interactive Docker container started. Type 'exit' or use Ctrl+C to exit.")
  396. # Initialize required plugins
  397. ssh_box.init_plugins([JupyterRequirement()])
  398. bg_cmd = ssh_box.execute_in_background(
  399. "while true; do echo 'dot ' && sleep 10; done"
  400. )
  401. sys.stdout.flush()
  402. try:
  403. while True:
  404. try:
  405. user_input = input('>>> ')
  406. except EOFError:
  407. logger.info('Exiting...')
  408. break
  409. if user_input.lower() == 'exit':
  410. logger.info('Exiting...')
  411. break
  412. if user_input.lower() == 'kill':
  413. ssh_box.kill_background(bg_cmd.pid)
  414. logger.info('Background process killed')
  415. continue
  416. exit_code, output = ssh_box.execute(user_input)
  417. logger.info('exit code: %d', exit_code)
  418. logger.info(output)
  419. if bg_cmd.pid in ssh_box.background_commands:
  420. logs = ssh_box.read_logs(bg_cmd.pid)
  421. logger.info('background logs: %s', logs)
  422. sys.stdout.flush()
  423. except KeyboardInterrupt:
  424. logger.info('Exiting...')
  425. ssh_box.close()