ssh_box.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533
  1. import atexit
  2. import json
  3. import os
  4. import sys
  5. import tarfile
  6. import time
  7. import uuid
  8. from collections import namedtuple
  9. from glob import glob
  10. import docker
  11. from pexpect import pxssh
  12. from opendevin.const.guide_url import TROUBLESHOOTING_URL
  13. from opendevin.core.config import config
  14. from opendevin.core.exceptions import SandboxInvalidBackgroundCommandError
  15. from opendevin.core.logger import opendevin_logger as logger
  16. from opendevin.runtime.docker.process import DockerProcess, Process
  17. from opendevin.runtime.plugins import (
  18. JupyterRequirement,
  19. SWEAgentCommandsRequirement,
  20. )
  21. from opendevin.runtime.sandbox import Sandbox
  22. from opendevin.runtime.utils import find_available_tcp_port
  23. # FIXME: these are not used, can we remove them?
  24. InputType = namedtuple('InputType', ['content'])
  25. OutputType = namedtuple('OutputType', ['content'])
  26. class DockerSSHBox(Sandbox):
  27. instance_id: str
  28. container_image: str
  29. container_name_prefix = 'opendevin-sandbox-'
  30. container_name: str
  31. container: docker.models.containers.Container
  32. docker_client: docker.DockerClient
  33. _ssh_password: str
  34. _ssh_port: int
  35. cur_background_id = 0
  36. background_commands: dict[int, Process] = {}
  37. def __init__(
  38. self,
  39. container_image: str | None = None,
  40. timeout: int = 120,
  41. sid: str | None = None,
  42. ):
  43. logger.info(
  44. f'SSHBox is running as {"opendevin" if self.run_as_devin else "root"} user with USER_ID={self.user_id} in the sandbox'
  45. )
  46. # Initialize docker client. Throws an exception if Docker is not reachable.
  47. try:
  48. self.docker_client = docker.from_env()
  49. except Exception as ex:
  50. logger.exception(
  51. f'Error creating controller. Please check Docker is running and visit `{TROUBLESHOOTING_URL}` for more debugging information.',
  52. exc_info=False,
  53. )
  54. raise ex
  55. self.instance_id = (
  56. sid + str(uuid.uuid4()) if sid is not None else str(uuid.uuid4())
  57. )
  58. # TODO: this timeout is actually essential - need a better way to set it
  59. # if it is too short, the container may still waiting for previous
  60. # command to finish (e.g. apt-get update)
  61. # if it is too long, the user may have to wait for a unnecessary long time
  62. self.timeout = timeout
  63. self.container_image = (
  64. config.sandbox_container_image
  65. if container_image is None
  66. else container_image
  67. )
  68. self.container_name = self.container_name_prefix + self.instance_id
  69. # set up random user password
  70. self._ssh_password = str(uuid.uuid4())
  71. self._ssh_port = find_available_tcp_port()
  72. # always restart the container, cuz the initial be regarded as a new session
  73. self.restart_docker_container()
  74. self.setup_user()
  75. self.start_ssh_session()
  76. atexit.register(self.close)
  77. super().__init__()
  78. def add_to_env(self, key: str, value: str):
  79. super().add_to_env(key, value)
  80. # Note: json.dumps gives us nice escaping for free
  81. self.execute(f'export {key}={json.dumps(value)}')
  82. def setup_user(self):
  83. # Make users sudoers passwordless
  84. # TODO(sandbox): add this line in the Dockerfile for next minor version of docker image
  85. exit_code, logs = self.container.exec_run(
  86. ['/bin/bash', '-c', r"echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers"],
  87. workdir=self.sandbox_workspace_dir,
  88. environment=self._env,
  89. )
  90. if exit_code != 0:
  91. raise Exception(
  92. f'Failed to make all users passwordless sudoers in sandbox: {logs}'
  93. )
  94. # Check if the opendevin user exists
  95. exit_code, logs = self.container.exec_run(
  96. ['/bin/bash', '-c', 'id -u opendevin'],
  97. workdir=self.sandbox_workspace_dir,
  98. environment=self._env,
  99. )
  100. if exit_code == 0:
  101. # User exists, delete it
  102. exit_code, logs = self.container.exec_run(
  103. ['/bin/bash', '-c', 'userdel -r opendevin'],
  104. workdir=self.sandbox_workspace_dir,
  105. environment=self._env,
  106. )
  107. if exit_code != 0:
  108. raise Exception(f'Failed to remove opendevin user in sandbox: {logs}')
  109. if self.run_as_devin:
  110. # Create the opendevin user
  111. exit_code, logs = self.container.exec_run(
  112. [
  113. '/bin/bash',
  114. '-c',
  115. f'useradd -rm -d /home/opendevin -s /bin/bash -g root -G sudo -u {self.user_id} opendevin',
  116. ],
  117. workdir=self.sandbox_workspace_dir,
  118. environment=self._env,
  119. )
  120. if exit_code != 0:
  121. raise Exception(f'Failed to create opendevin user in sandbox: {logs}')
  122. exit_code, logs = self.container.exec_run(
  123. [
  124. '/bin/bash',
  125. '-c',
  126. f"echo 'opendevin:{self._ssh_password}' | chpasswd",
  127. ],
  128. workdir=self.sandbox_workspace_dir,
  129. environment=self._env,
  130. )
  131. if exit_code != 0:
  132. raise Exception(f'Failed to set password in sandbox: {logs}')
  133. # chown the home directory
  134. exit_code, logs = self.container.exec_run(
  135. ['/bin/bash', '-c', 'chown opendevin:root /home/opendevin'],
  136. workdir=self.sandbox_workspace_dir,
  137. environment=self._env,
  138. )
  139. if exit_code != 0:
  140. raise Exception(
  141. f'Failed to chown home directory for opendevin in sandbox: {logs}'
  142. )
  143. exit_code, logs = self.container.exec_run(
  144. [
  145. '/bin/bash',
  146. '-c',
  147. f'chown opendevin:root {self.sandbox_workspace_dir}',
  148. ],
  149. workdir=self.sandbox_workspace_dir,
  150. environment=self._env,
  151. )
  152. if exit_code != 0:
  153. # This is not a fatal error, just a warning
  154. logger.warning(
  155. f'Failed to chown workspace directory for opendevin in sandbox: {logs}. But this should be fine if the {self.sandbox_workspace_dir=} is mounted by the app docker container.'
  156. )
  157. else:
  158. exit_code, logs = self.container.exec_run(
  159. # change password for root
  160. ['/bin/bash', '-c', f"echo 'root:{self._ssh_password}' | chpasswd"],
  161. workdir=self.sandbox_workspace_dir,
  162. environment=self._env,
  163. )
  164. if exit_code != 0:
  165. raise Exception(f'Failed to set password for root in sandbox: {logs}')
  166. exit_code, logs = self.container.exec_run(
  167. ['/bin/bash', '-c', "echo 'opendevin-sandbox' > /etc/hostname"],
  168. workdir=self.sandbox_workspace_dir,
  169. environment=self._env,
  170. )
  171. def start_ssh_session(self):
  172. # start ssh session at the background
  173. self.ssh = pxssh.pxssh()
  174. hostname = self.ssh_hostname
  175. if self.run_as_devin:
  176. username = 'opendevin'
  177. else:
  178. username = 'root'
  179. logger.info(
  180. f'Connecting to {username}@{hostname} via ssh. '
  181. f"If you encounter any issues, you can try `ssh -v -p {self._ssh_port} {username}@{hostname}` with the password '{self._ssh_password}' and report the issue on GitHub. "
  182. f"If you started OpenDevin with `docker run`, you should try `ssh -v -p {self._ssh_port} {username}@localhost` with the password '{self._ssh_password} on the host machine (where you started the container)."
  183. )
  184. self.ssh.login(hostname, username, self._ssh_password, port=self._ssh_port)
  185. # Fix: https://github.com/pexpect/pexpect/issues/669
  186. self.ssh.sendline("bind 'set enable-bracketed-paste off'")
  187. self.ssh.prompt()
  188. # cd to workspace
  189. self.ssh.sendline(f'cd {self.sandbox_workspace_dir}')
  190. self.ssh.prompt()
  191. def get_exec_cmd(self, cmd: str) -> list[str]:
  192. if self.run_as_devin:
  193. return ['su', 'opendevin', '-c', cmd]
  194. else:
  195. return ['/bin/bash', '-c', cmd]
  196. def read_logs(self, id) -> str:
  197. if id not in self.background_commands:
  198. raise SandboxInvalidBackgroundCommandError()
  199. bg_cmd = self.background_commands[id]
  200. return bg_cmd.read_logs()
  201. def execute(self, cmd: str) -> tuple[int, str]:
  202. cmd = cmd.strip()
  203. # use self.ssh
  204. self.ssh.sendline(cmd)
  205. success = self.ssh.prompt(timeout=self.timeout)
  206. if not success:
  207. logger.exception('Command timed out, killing process...', exc_info=False)
  208. # send a SIGINT to the process
  209. self.ssh.sendintr()
  210. self.ssh.prompt()
  211. command_output = self.ssh.before.decode('utf-8').lstrip(cmd).strip()
  212. return (
  213. -1,
  214. f'Command: "{cmd}" timed out. Sending SIGINT to the process: {command_output}',
  215. )
  216. command_output = self.ssh.before.decode('utf-8').strip()
  217. # once out, make sure that we have *every* output, we while loop until we get an empty output
  218. while True:
  219. logger.debug('WAITING FOR .prompt()')
  220. self.ssh.sendline('\n')
  221. timeout_not_reached = self.ssh.prompt(timeout=1)
  222. if not timeout_not_reached:
  223. logger.debug('TIMEOUT REACHED')
  224. break
  225. logger.debug('WAITING FOR .before')
  226. output = self.ssh.before.decode('utf-8').strip()
  227. logger.debug(
  228. f'WAITING FOR END OF command output ({bool(output)}): {output}'
  229. )
  230. if output == '':
  231. break
  232. command_output += output
  233. command_output = command_output.lstrip(cmd).strip()
  234. # get the exit code
  235. self.ssh.sendline('echo $?')
  236. self.ssh.prompt()
  237. exit_code = self.ssh.before.decode('utf-8')
  238. while not exit_code.startswith('echo $?'):
  239. self.ssh.prompt()
  240. exit_code = self.ssh.before.decode('utf-8')
  241. logger.debug(f'WAITING FOR exit code: {exit_code}')
  242. exit_code = int(exit_code.lstrip('echo $?').strip())
  243. return exit_code, command_output
  244. def copy_to(self, host_src: str, sandbox_dest: str, recursive: bool = False):
  245. # mkdir -p sandbox_dest if it doesn't exist
  246. exit_code, logs = self.container.exec_run(
  247. ['/bin/bash', '-c', f'mkdir -p {sandbox_dest}'],
  248. workdir=self.sandbox_workspace_dir,
  249. environment=self._env,
  250. )
  251. if exit_code != 0:
  252. raise Exception(
  253. f'Failed to create directory {sandbox_dest} in sandbox: {logs}'
  254. )
  255. if recursive:
  256. assert os.path.isdir(
  257. host_src
  258. ), 'Source must be a directory when recursive is True'
  259. files = glob(host_src + '/**/*', recursive=True)
  260. srcname = os.path.basename(host_src)
  261. tar_filename = os.path.join(os.path.dirname(host_src), srcname + '.tar')
  262. with tarfile.open(tar_filename, mode='w') as tar:
  263. for file in files:
  264. tar.add(
  265. file, arcname=os.path.relpath(file, os.path.dirname(host_src))
  266. )
  267. else:
  268. assert os.path.isfile(
  269. host_src
  270. ), 'Source must be a file when recursive is False'
  271. srcname = os.path.basename(host_src)
  272. tar_filename = os.path.join(os.path.dirname(host_src), srcname + '.tar')
  273. with tarfile.open(tar_filename, mode='w') as tar:
  274. tar.add(host_src, arcname=srcname)
  275. with open(tar_filename, 'rb') as f:
  276. data = f.read()
  277. self.container.put_archive(os.path.dirname(sandbox_dest), data)
  278. os.remove(tar_filename)
  279. def execute_in_background(self, cmd: str) -> Process:
  280. result = self.container.exec_run(
  281. self.get_exec_cmd(cmd),
  282. socket=True,
  283. workdir=self.sandbox_workspace_dir,
  284. environment=self._env,
  285. )
  286. result.output._sock.setblocking(0)
  287. pid = self.get_pid(cmd)
  288. bg_cmd = DockerProcess(self.cur_background_id, cmd, result, pid)
  289. self.background_commands[bg_cmd.pid] = bg_cmd
  290. self.cur_background_id += 1
  291. return bg_cmd
  292. def get_pid(self, cmd):
  293. exec_result = self.container.exec_run('ps aux', environment=self._env)
  294. processes = exec_result.output.decode('utf-8').splitlines()
  295. cmd = ' '.join(self.get_exec_cmd(cmd))
  296. for process in processes:
  297. if cmd in process:
  298. pid = process.split()[1] # second column is the pid
  299. return pid
  300. return None
  301. def kill_background(self, id: int) -> Process:
  302. if id not in self.background_commands:
  303. raise SandboxInvalidBackgroundCommandError()
  304. bg_cmd = self.background_commands[id]
  305. if bg_cmd.pid is not None:
  306. self.container.exec_run(
  307. f'kill -9 {bg_cmd.pid}',
  308. workdir=self.sandbox_workspace_dir,
  309. environment=self._env,
  310. )
  311. assert isinstance(bg_cmd, DockerProcess)
  312. bg_cmd.result.output.close()
  313. self.background_commands.pop(id)
  314. return bg_cmd
  315. def stop_docker_container(self):
  316. try:
  317. container = self.docker_client.containers.get(self.container_name)
  318. container.stop()
  319. container.remove()
  320. elapsed = 0
  321. while container.status != 'exited':
  322. time.sleep(1)
  323. elapsed += 1
  324. if elapsed > self.timeout:
  325. break
  326. container = self.docker_client.containers.get(self.container_name)
  327. except docker.errors.NotFound:
  328. pass
  329. def get_working_directory(self):
  330. exit_code, result = self.execute('pwd')
  331. if exit_code != 0:
  332. raise Exception('Failed to get working directory')
  333. return result.strip()
  334. @property
  335. def user_id(self):
  336. return config.sandbox_user_id
  337. @property
  338. def sandbox_user_id(self):
  339. return config.sandbox_user_id
  340. @property
  341. def run_as_devin(self):
  342. return config.run_as_devin
  343. @property
  344. def sandbox_workspace_dir(self):
  345. return config.workspace_mount_path_in_sandbox
  346. @property
  347. def ssh_hostname(self):
  348. return config.ssh_hostname
  349. @property
  350. def use_host_network(self):
  351. return config.use_host_network
  352. def is_container_running(self):
  353. try:
  354. container = self.docker_client.containers.get(self.container_name)
  355. if container.status == 'running':
  356. self.container = container
  357. return True
  358. return False
  359. except docker.errors.NotFound:
  360. return False
  361. def restart_docker_container(self):
  362. try:
  363. self.stop_docker_container()
  364. logger.info('Container stopped')
  365. except docker.errors.DockerException as ex:
  366. logger.exception('Failed to stop container', exc_info=False)
  367. raise ex
  368. try:
  369. network_kwargs: dict[str, str | dict[str, int]] = {}
  370. if self.use_host_network:
  371. network_kwargs['network_mode'] = 'host'
  372. else:
  373. # FIXME: This is a temporary workaround for Mac OS
  374. network_kwargs['ports'] = {f'{self._ssh_port}/tcp': self._ssh_port}
  375. logger.warning(
  376. (
  377. 'Using port forwarding for Mac OS. '
  378. 'Server started by OpenDevin will not be accessible from the host machine at the moment. '
  379. 'See https://github.com/OpenDevin/OpenDevin/issues/897 for more information.'
  380. )
  381. )
  382. mount_dir = config.workspace_mount_path
  383. logger.info(f'Mounting workspace directory: {mount_dir}')
  384. # start the container
  385. self.container = self.docker_client.containers.run(
  386. self.container_image,
  387. # allow root login
  388. command=f"/usr/sbin/sshd -D -p {self._ssh_port} -o 'PermitRootLogin=yes'",
  389. **network_kwargs,
  390. working_dir=self.sandbox_workspace_dir,
  391. name=self.container_name,
  392. detach=True,
  393. volumes={
  394. mount_dir: {'bind': self.sandbox_workspace_dir, 'mode': 'rw'},
  395. # mount cache directory to /home/opendevin/.cache for pip cache reuse
  396. config.cache_dir: {
  397. 'bind': (
  398. '/home/opendevin/.cache'
  399. if self.run_as_devin
  400. else '/root/.cache'
  401. ),
  402. 'mode': 'rw',
  403. },
  404. },
  405. )
  406. logger.info('Container started')
  407. except Exception as ex:
  408. logger.exception('Failed to start container', exc_info=False)
  409. raise ex
  410. # wait for container to be ready
  411. elapsed = 0
  412. while self.container.status != 'running':
  413. if self.container.status == 'exited':
  414. logger.info('container exited')
  415. logger.info('container logs:')
  416. logger.info(self.container.logs())
  417. break
  418. time.sleep(1)
  419. elapsed += 1
  420. self.container = self.docker_client.containers.get(self.container_name)
  421. logger.info(
  422. f'waiting for container to start: {elapsed}, container status: {self.container.status}'
  423. )
  424. if elapsed > self.timeout:
  425. break
  426. if self.container.status != 'running':
  427. raise Exception('Failed to start container')
  428. # clean up the container, cannot do it in __del__ because the python interpreter is already shutting down
  429. def close(self):
  430. containers = self.docker_client.containers.list(all=True)
  431. for container in containers:
  432. try:
  433. if container.name.startswith(self.container_name_prefix):
  434. container.remove(force=True)
  435. except docker.errors.NotFound:
  436. pass
  437. if __name__ == '__main__':
  438. try:
  439. ssh_box = DockerSSHBox()
  440. except Exception as e:
  441. logger.exception('Failed to start Docker container: %s', e)
  442. sys.exit(1)
  443. logger.info(
  444. "Interactive Docker container started. Type 'exit' or use Ctrl+C to exit."
  445. )
  446. # Initialize required plugins
  447. ssh_box.init_plugins([JupyterRequirement(), SWEAgentCommandsRequirement()])
  448. logger.info(
  449. '--- SWE-AGENT COMMAND DOCUMENTATION ---\n'
  450. f'{SWEAgentCommandsRequirement().documentation}\n'
  451. '---'
  452. )
  453. bg_cmd = ssh_box.execute_in_background(
  454. "while true; do echo 'dot ' && sleep 10; done"
  455. )
  456. sys.stdout.flush()
  457. try:
  458. while True:
  459. try:
  460. user_input = input('>>> ')
  461. except EOFError:
  462. logger.info('Exiting...')
  463. break
  464. if user_input.lower() == 'exit':
  465. logger.info('Exiting...')
  466. break
  467. if user_input.lower() == 'kill':
  468. ssh_box.kill_background(bg_cmd.pid)
  469. logger.info('Background process killed')
  470. continue
  471. exit_code, output = ssh_box.execute(user_input)
  472. logger.info('exit code: %d', exit_code)
  473. logger.info(output)
  474. if bg_cmd.pid in ssh_box.background_commands:
  475. logs = ssh_box.read_logs(bg_cmd.pid)
  476. logger.info('background logs: %s', logs)
  477. sys.stdout.flush()
  478. except KeyboardInterrupt:
  479. logger.info('Exiting...')
  480. ssh_box.close()