ssh_box.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478
  1. import atexit
  2. import os
  3. import sys
  4. import time
  5. import uuid
  6. import tarfile
  7. from glob import glob
  8. from collections import namedtuple
  9. from typing import Dict, List, Tuple, Union
  10. import docker
  11. from pexpect import pxssh
  12. from opendevin import config
  13. from opendevin.logger import opendevin_logger as logger
  14. from opendevin.sandbox.sandbox import Sandbox
  15. from opendevin.sandbox.process import Process
  16. from opendevin.sandbox.docker.process import DockerProcess
  17. from opendevin.sandbox.plugins import JupyterRequirement, SWEAgentCommandsRequirement
  18. from opendevin.schema import ConfigType
  19. from opendevin.utils import find_available_tcp_port
  20. from opendevin.exceptions import SandboxInvalidBackgroundCommandError
  21. InputType = namedtuple('InputType', ['content'])
  22. OutputType = namedtuple('OutputType', ['content'])
  23. SANDBOX_WORKSPACE_DIR = config.get(ConfigType.WORKSPACE_MOUNT_PATH_IN_SANDBOX)
  24. CONTAINER_IMAGE = config.get(ConfigType.SANDBOX_CONTAINER_IMAGE)
  25. SSH_HOSTNAME = config.get(ConfigType.SSH_HOSTNAME)
  26. USE_HOST_NETWORK = config.get(ConfigType.USE_HOST_NETWORK)
  27. # FIXME: On some containers, the devin user doesn't have enough permission, e.g. to install packages
  28. # How do we make this more flexible?
  29. RUN_AS_DEVIN = config.get('RUN_AS_DEVIN').lower() != 'false'
  30. USER_ID = 1000
  31. if SANDBOX_USER_ID := config.get('SANDBOX_USER_ID'):
  32. USER_ID = int(SANDBOX_USER_ID)
  33. elif hasattr(os, 'getuid'):
  34. USER_ID = os.getuid()
  35. class DockerSSHBox(Sandbox):
  36. instance_id: str
  37. container_image: str
  38. container_name_prefix = 'opendevin-sandbox-'
  39. container_name: str
  40. container: docker.models.containers.Container
  41. docker_client: docker.DockerClient
  42. _ssh_password: str
  43. _ssh_port: int
  44. cur_background_id = 0
  45. background_commands: Dict[int, Process] = {}
  46. def __init__(
  47. self,
  48. container_image: str | None = None,
  49. timeout: int = 120,
  50. sid: str | None = None,
  51. ):
  52. # Initialize docker client. Throws an exception if Docker is not reachable.
  53. try:
  54. self.docker_client = docker.from_env()
  55. except Exception as ex:
  56. logger.exception(
  57. 'Please check Docker is running using `docker ps`.', exc_info=False)
  58. raise ex
  59. self.instance_id = sid if sid is not None else str(uuid.uuid4())
  60. # TODO: this timeout is actually essential - need a better way to set it
  61. # if it is too short, the container may still waiting for previous
  62. # command to finish (e.g. apt-get update)
  63. # if it is too long, the user may have to wait for a unnecessary long time
  64. self.timeout = timeout
  65. self.container_image = CONTAINER_IMAGE if container_image is None else container_image
  66. self.container_name = self.container_name_prefix + self.instance_id
  67. # set up random user password
  68. self._ssh_password = str(uuid.uuid4())
  69. self._ssh_port = find_available_tcp_port()
  70. # always restart the container, cuz the initial be regarded as a new session
  71. self.restart_docker_container()
  72. self.setup_user()
  73. self.start_ssh_session()
  74. atexit.register(self.close)
  75. def setup_user(self):
  76. # Make users sudoers passwordless
  77. # TODO(sandbox): add this line in the Dockerfile for next minor version of docker image
  78. exit_code, logs = self.container.exec_run(
  79. ['/bin/bash', '-c',
  80. r"echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers"],
  81. workdir=SANDBOX_WORKSPACE_DIR,
  82. )
  83. if exit_code != 0:
  84. raise Exception(
  85. f'Failed to make all users passwordless sudoers in sandbox: {logs}')
  86. # Check if the opendevin user exists
  87. exit_code, logs = self.container.exec_run(
  88. ['/bin/bash', '-c', 'id -u opendevin'],
  89. workdir=SANDBOX_WORKSPACE_DIR,
  90. )
  91. if exit_code == 0:
  92. # User exists, delete it
  93. exit_code, logs = self.container.exec_run(
  94. ['/bin/bash', '-c', 'userdel -r opendevin'],
  95. workdir=SANDBOX_WORKSPACE_DIR,
  96. )
  97. if exit_code != 0:
  98. raise Exception(
  99. f'Failed to remove opendevin user in sandbox: {logs}')
  100. if RUN_AS_DEVIN:
  101. # Create the opendevin user
  102. exit_code, logs = self.container.exec_run(
  103. ['/bin/bash', '-c',
  104. f'useradd -rm -d /home/opendevin -s /bin/bash -g root -G sudo -u {USER_ID} opendevin'],
  105. workdir=SANDBOX_WORKSPACE_DIR,
  106. )
  107. if exit_code != 0:
  108. raise Exception(
  109. f'Failed to create opendevin user in sandbox: {logs}')
  110. exit_code, logs = self.container.exec_run(
  111. ['/bin/bash', '-c',
  112. f"echo 'opendevin:{self._ssh_password}' | chpasswd"],
  113. workdir=SANDBOX_WORKSPACE_DIR,
  114. )
  115. if exit_code != 0:
  116. raise Exception(f'Failed to set password in sandbox: {logs}')
  117. # chown the home directory
  118. exit_code, logs = self.container.exec_run(
  119. ['/bin/bash', '-c', 'chown opendevin:root /home/opendevin'],
  120. workdir=SANDBOX_WORKSPACE_DIR,
  121. )
  122. if exit_code != 0:
  123. raise Exception(
  124. f'Failed to chown home directory for opendevin in sandbox: {logs}')
  125. exit_code, logs = self.container.exec_run(
  126. ['/bin/bash', '-c', f'chown opendevin:root {SANDBOX_WORKSPACE_DIR}'],
  127. workdir=SANDBOX_WORKSPACE_DIR,
  128. )
  129. if exit_code != 0:
  130. raise Exception(
  131. f'Failed to chown workspace directory for opendevin in sandbox: {logs}')
  132. else:
  133. exit_code, logs = self.container.exec_run(
  134. # change password for root
  135. ['/bin/bash', '-c',
  136. f"echo 'root:{self._ssh_password}' | chpasswd"],
  137. workdir=SANDBOX_WORKSPACE_DIR,
  138. )
  139. if exit_code != 0:
  140. raise Exception(
  141. f'Failed to set password for root in sandbox: {logs}')
  142. exit_code, logs = self.container.exec_run(
  143. ['/bin/bash', '-c', "echo 'opendevin-sandbox' > /etc/hostname"],
  144. workdir=SANDBOX_WORKSPACE_DIR,
  145. )
  146. def start_ssh_session(self):
  147. # start ssh session at the background
  148. self.ssh = pxssh.pxssh()
  149. hostname = SSH_HOSTNAME
  150. if RUN_AS_DEVIN:
  151. username = 'opendevin'
  152. else:
  153. username = 'root'
  154. logger.info(
  155. f"Connecting to {username}@{hostname} via ssh. If you encounter any issues, you can try `ssh -v -p {self._ssh_port} {username}@{hostname}` with the password '{self._ssh_password}' and report the issue on GitHub."
  156. )
  157. self.ssh.login(hostname, username, self._ssh_password,
  158. port=self._ssh_port)
  159. # Fix: https://github.com/pexpect/pexpect/issues/669
  160. self.ssh.sendline("bind 'set enable-bracketed-paste off'")
  161. self.ssh.prompt()
  162. # cd to workspace
  163. self.ssh.sendline(f'cd {SANDBOX_WORKSPACE_DIR}')
  164. self.ssh.prompt()
  165. def get_exec_cmd(self, cmd: str) -> List[str]:
  166. if RUN_AS_DEVIN:
  167. return ['su', 'opendevin', '-c', cmd]
  168. else:
  169. return ['/bin/bash', '-c', cmd]
  170. def read_logs(self, id) -> str:
  171. if id not in self.background_commands:
  172. raise SandboxInvalidBackgroundCommandError()
  173. bg_cmd = self.background_commands[id]
  174. return bg_cmd.read_logs()
  175. def execute(self, cmd: str) -> Tuple[int, str]:
  176. cmd = cmd.strip()
  177. # use self.ssh
  178. self.ssh.sendline(cmd)
  179. success = self.ssh.prompt(timeout=self.timeout)
  180. if not success:
  181. logger.exception(
  182. 'Command timed out, killing process...', exc_info=False)
  183. # send a SIGINT to the process
  184. self.ssh.sendintr()
  185. self.ssh.prompt()
  186. command_output = self.ssh.before.decode(
  187. 'utf-8').lstrip(cmd).strip()
  188. return -1, f'Command: "{cmd}" timed out. Sending SIGINT to the process: {command_output}'
  189. command_output = self.ssh.before.decode('utf-8').strip()
  190. # NOTE: there's some weird behavior with the prompt (it may come AFTER the command output)
  191. # so we need to check if the command is in the output
  192. n_tries = 5
  193. while not command_output.startswith(cmd) and n_tries > 0:
  194. self.ssh.prompt()
  195. command_output = self.ssh.before.decode('utf-8').strip()
  196. time.sleep(0.5)
  197. n_tries -= 1
  198. if n_tries == 0 and not command_output.startswith(cmd):
  199. raise Exception(
  200. f'Something went wrong with the SSH sanbox, cannot get output for command [{cmd}] after 5 retries'
  201. )
  202. logger.debug(f'Command output GOT SO FAR: {command_output}')
  203. # once out, make sure that we have *every* output, we while loop until we get an empty output
  204. while True:
  205. logger.debug('WAITING FOR .prompt()')
  206. self.ssh.sendline('\n')
  207. timeout_not_reached = self.ssh.prompt(timeout=1)
  208. if not timeout_not_reached:
  209. logger.debug('TIMEOUT REACHED')
  210. break
  211. logger.debug('WAITING FOR .before')
  212. output = self.ssh.before.decode('utf-8').strip()
  213. logger.debug(f'WAITING FOR END OF command output ({bool(output)}): {output}')
  214. if output == '':
  215. break
  216. command_output += output
  217. command_output = command_output.lstrip(cmd).strip()
  218. # get the exit code
  219. self.ssh.sendline('echo $?')
  220. self.ssh.prompt()
  221. exit_code = self.ssh.before.decode('utf-8')
  222. while not exit_code.startswith('echo $?'):
  223. self.ssh.prompt()
  224. exit_code = self.ssh.before.decode('utf-8')
  225. logger.debug(f'WAITING FOR exit code: {exit_code}')
  226. exit_code = int(exit_code.lstrip('echo $?').strip())
  227. return exit_code, command_output
  228. def copy_to(self, host_src: str, sandbox_dest: str, recursive: bool = False):
  229. # mkdir -p sandbox_dest if it doesn't exist
  230. exit_code, logs = self.container.exec_run(
  231. ['/bin/bash', '-c', f'mkdir -p {sandbox_dest}'],
  232. workdir=SANDBOX_WORKSPACE_DIR,
  233. )
  234. if exit_code != 0:
  235. raise Exception(
  236. f'Failed to create directory {sandbox_dest} in sandbox: {logs}')
  237. if recursive:
  238. assert os.path.isdir(host_src), 'Source must be a directory when recursive is True'
  239. files = glob(host_src + '/**/*', recursive=True)
  240. srcname = os.path.basename(host_src)
  241. tar_filename = os.path.join(os.path.dirname(host_src), srcname + '.tar')
  242. with tarfile.open(tar_filename, mode='w') as tar:
  243. for file in files:
  244. tar.add(file, arcname=os.path.relpath(file, os.path.dirname(host_src)))
  245. else:
  246. assert os.path.isfile(host_src), 'Source must be a file when recursive is False'
  247. srcname = os.path.basename(host_src)
  248. tar_filename = os.path.join(os.path.dirname(host_src), srcname + '.tar')
  249. with tarfile.open(tar_filename, mode='w') as tar:
  250. tar.add(host_src, arcname=srcname)
  251. with open(tar_filename, 'rb') as f:
  252. data = f.read()
  253. self.container.put_archive(os.path.dirname(sandbox_dest), data)
  254. os.remove(tar_filename)
  255. def execute_in_background(self, cmd: str) -> Process:
  256. result = self.container.exec_run(
  257. self.get_exec_cmd(cmd), socket=True, workdir=SANDBOX_WORKSPACE_DIR
  258. )
  259. result.output._sock.setblocking(0)
  260. pid = self.get_pid(cmd)
  261. bg_cmd = DockerProcess(self.cur_background_id, cmd, result, pid)
  262. self.background_commands[bg_cmd.pid] = bg_cmd
  263. self.cur_background_id += 1
  264. return bg_cmd
  265. def get_pid(self, cmd):
  266. exec_result = self.container.exec_run('ps aux')
  267. processes = exec_result.output.decode('utf-8').splitlines()
  268. cmd = ' '.join(self.get_exec_cmd(cmd))
  269. for process in processes:
  270. if cmd in process:
  271. pid = process.split()[1] # second column is the pid
  272. return pid
  273. return None
  274. def kill_background(self, id: int) -> Process:
  275. if id not in self.background_commands:
  276. raise SandboxInvalidBackgroundCommandError()
  277. bg_cmd = self.background_commands[id]
  278. if bg_cmd.pid is not None:
  279. self.container.exec_run(
  280. f'kill -9 {bg_cmd.pid}', workdir=SANDBOX_WORKSPACE_DIR)
  281. assert isinstance(bg_cmd, DockerProcess)
  282. bg_cmd.result.output.close()
  283. self.background_commands.pop(id)
  284. return bg_cmd
  285. def stop_docker_container(self):
  286. try:
  287. container = self.docker_client.containers.get(self.container_name)
  288. container.stop()
  289. container.remove()
  290. elapsed = 0
  291. while container.status != 'exited':
  292. time.sleep(1)
  293. elapsed += 1
  294. if elapsed > self.timeout:
  295. break
  296. container = self.docker_client.containers.get(
  297. self.container_name)
  298. except docker.errors.NotFound:
  299. pass
  300. def is_container_running(self):
  301. try:
  302. container = self.docker_client.containers.get(self.container_name)
  303. if container.status == 'running':
  304. self.container = container
  305. return True
  306. return False
  307. except docker.errors.NotFound:
  308. return False
  309. def restart_docker_container(self):
  310. try:
  311. self.stop_docker_container()
  312. logger.info('Container stopped')
  313. except docker.errors.DockerException as ex:
  314. logger.exception('Failed to stop container', exc_info=False)
  315. raise ex
  316. try:
  317. network_kwargs: Dict[str, Union[str, Dict[str, int]]] = {}
  318. if USE_HOST_NETWORK:
  319. network_kwargs['network_mode'] = 'host'
  320. else:
  321. # FIXME: This is a temporary workaround for Mac OS
  322. network_kwargs['ports'] = {f'{self._ssh_port}/tcp': self._ssh_port}
  323. logger.warning(
  324. ('Using port forwarding for Mac OS. '
  325. 'Server started by OpenDevin will not be accessible from the host machine at the moment. '
  326. 'See https://github.com/OpenDevin/OpenDevin/issues/897 for more information.'
  327. )
  328. )
  329. mount_dir = config.get(ConfigType.WORKSPACE_MOUNT_PATH)
  330. logger.info(f'Mounting workspace directory: {mount_dir}')
  331. # start the container
  332. self.container = self.docker_client.containers.run(
  333. self.container_image,
  334. # allow root login
  335. command=f"/usr/sbin/sshd -D -p {self._ssh_port} -o 'PermitRootLogin=yes'",
  336. **network_kwargs,
  337. working_dir=SANDBOX_WORKSPACE_DIR,
  338. name=self.container_name,
  339. hostname='opendevin_sandbox',
  340. detach=True,
  341. volumes={
  342. mount_dir: {
  343. 'bind': SANDBOX_WORKSPACE_DIR,
  344. 'mode': 'rw'
  345. },
  346. # mount cache directory to /home/opendevin/.cache for pip cache reuse
  347. config.get('CACHE_DIR'): {
  348. 'bind': '/home/opendevin/.cache' if RUN_AS_DEVIN else '/root/.cache',
  349. 'mode': 'rw'
  350. },
  351. },
  352. )
  353. logger.info('Container started')
  354. except Exception as ex:
  355. logger.exception('Failed to start container', exc_info=False)
  356. raise ex
  357. # wait for container to be ready
  358. elapsed = 0
  359. while self.container.status != 'running':
  360. if self.container.status == 'exited':
  361. logger.info('container exited')
  362. logger.info('container logs:')
  363. logger.info(self.container.logs())
  364. break
  365. time.sleep(1)
  366. elapsed += 1
  367. self.container = self.docker_client.containers.get(
  368. self.container_name)
  369. logger.info(
  370. f'waiting for container to start: {elapsed}, container status: {self.container.status}')
  371. if elapsed > self.timeout:
  372. break
  373. if self.container.status != 'running':
  374. raise Exception('Failed to start container')
  375. # clean up the container, cannot do it in __del__ because the python interpreter is already shutting down
  376. def close(self):
  377. containers = self.docker_client.containers.list(all=True)
  378. for container in containers:
  379. try:
  380. if container.name.startswith(self.container_name_prefix):
  381. container.remove(force=True)
  382. except docker.errors.NotFound:
  383. pass
  384. if __name__ == '__main__':
  385. try:
  386. ssh_box = DockerSSHBox()
  387. except Exception as e:
  388. logger.exception('Failed to start Docker container: %s', e)
  389. sys.exit(1)
  390. logger.info(
  391. "Interactive Docker container started. Type 'exit' or use Ctrl+C to exit.")
  392. # Initialize required plugins
  393. ssh_box.init_plugins([JupyterRequirement(), SWEAgentCommandsRequirement()])
  394. logger.info(
  395. '--- SWE-AGENT COMMAND DOCUMENTATION ---\n'
  396. f'{SWEAgentCommandsRequirement().documentation}\n'
  397. '---'
  398. )
  399. bg_cmd = ssh_box.execute_in_background(
  400. "while true; do echo 'dot ' && sleep 10; done"
  401. )
  402. sys.stdout.flush()
  403. try:
  404. while True:
  405. try:
  406. user_input = input('>>> ')
  407. except EOFError:
  408. logger.info('Exiting...')
  409. break
  410. if user_input.lower() == 'exit':
  411. logger.info('Exiting...')
  412. break
  413. if user_input.lower() == 'kill':
  414. ssh_box.kill_background(bg_cmd.pid)
  415. logger.info('Background process killed')
  416. continue
  417. exit_code, output = ssh_box.execute(user_input)
  418. logger.info('exit code: %d', exit_code)
  419. logger.info(output)
  420. if bg_cmd.pid in ssh_box.background_commands:
  421. logs = ssh_box.read_logs(bg_cmd.pid)
  422. logger.info('background logs: %s', logs)
  423. sys.stdout.flush()
  424. except KeyboardInterrupt:
  425. logger.info('Exiting...')
  426. ssh_box.close()