swe_env_box.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. import sys
  2. import uuid
  3. from opendevin.core.config import config
  4. from opendevin.core.logger import opendevin_logger as logger
  5. from opendevin.runtime.docker.ssh_box import DockerSSHBox
  6. from opendevin.runtime.plugins import JupyterRequirement, SWEAgentCommandsRequirement
  7. SWE_BENCH_CONTAINER_IMAGE = 'ghcr.io/opendevin/eval-swe-bench:full-v1.0'
  8. class SWEBenchSSHBox(DockerSSHBox):
  9. def __init__(
  10. self,
  11. container_image: str,
  12. timeout: int = 120,
  13. sid: str | None = None,
  14. swe_instance_id: str | None = None,
  15. swe_instance: dict | None = None,
  16. skip_workspace_mount: bool = True,
  17. ):
  18. if swe_instance_id is None:
  19. raise ValueError('swe_instance_id must be provided!')
  20. self.swe_instance_id = swe_instance_id
  21. self.swe_instance = swe_instance
  22. self.skip_workspace_mount = skip_workspace_mount
  23. assert (
  24. container_image is not None
  25. ), 'container_image is required for SWEBenchSSHBox!'
  26. # Need to run as root to use SWEBench container
  27. sid = f'swe_bench_{swe_instance_id}' + str(uuid.uuid4())
  28. super().__init__(container_image, timeout, sid)
  29. exit_code, output = self.execute('mv ~/.bashrc ~/.bashrc.bak')
  30. assert exit_code == 0, f'Failed to backup ~/.bashrc: {output}'
  31. exit_code, output = self.execute(
  32. f"echo 'export SWE_INSTANCE_ID={self.swe_instance_id}' >> ~/.bashrc && echo 'export PIP_CACHE_DIR=~/.cache/pip' >> ~/.bashrc && echo \"alias git='git --no-pager'\" >> ~/.bashrc"
  33. )
  34. assert exit_code == 0, f'Failed to set SWE_INSTANCE_ID in ~/.bashrc: {output}'
  35. logger.info('Sourcing swe_entry.sh to set up environment variables')
  36. # larger timeout for SWEBench init to account for long-running installations (e.g., require compilation)
  37. exit_code, output = self.execute('source /swe_util/swe_entry.sh', timeout=600)
  38. logger.info('exit code: %d', exit_code)
  39. logger.info(output)
  40. assert exit_code == 0, f'Failed to source swe_entry.sh: {output}'
  41. logger.info('Sourced swe_entry.sh successfully')
  42. @property
  43. def volumes(self):
  44. if self.skip_workspace_mount:
  45. return {
  46. k: v
  47. for k, v in super().volumes.items()
  48. if not v['bind'] == self.sandbox_workspace_dir
  49. }
  50. return super().volumes
  51. @classmethod
  52. def get_box_for_instance(
  53. cls,
  54. instance,
  55. workspace_dir_name=None,
  56. n_tries=5,
  57. skip_workspace_mount: bool = True,
  58. workspace_mount_path: str | None = None,
  59. ) -> 'SWEBenchSSHBox':
  60. if workspace_dir_name is None:
  61. workspace_dir_name = f"{instance['repo']}__{instance['version']}".replace(
  62. '/', '__'
  63. )
  64. config.workspace_base = workspace_mount_path
  65. config.workspace_mount_path = workspace_mount_path
  66. sandbox = cls(
  67. container_image=SWE_BENCH_CONTAINER_IMAGE,
  68. swe_instance_id=instance['instance_id'],
  69. swe_instance=instance,
  70. skip_workspace_mount=skip_workspace_mount,
  71. )
  72. logger.info(f"SSH box started for instance {instance['instance_id']}.")
  73. # cd to the repo
  74. exit_code, output = sandbox.execute(f'cd /workspace/{workspace_dir_name}')
  75. if exit_code != 0:
  76. logger.error(f'Failed to cd to the repo: {output}')
  77. sys.exit(1)
  78. # remove all future commits & remote following Devin
  79. # https://www.cognition-labs.com/post/swe-bench-technical-report
  80. exit_code, output = sandbox.execute('git reset --hard')
  81. if exit_code != 0:
  82. logger.error(f'Failed to reset the repo: {output}')
  83. sys.exit(1)
  84. exit_code, output = sandbox.execute(
  85. 'for remote_name in $(git remote); do git remote remove "${remote_name}"; done'
  86. )
  87. if exit_code != 0:
  88. logger.error(f'Failed to remove remote: {output}')
  89. sys.exit(1)
  90. return sandbox
  91. def get_diff_patch(self):
  92. # add everything to the index
  93. exit_code, output = self.execute('git add --all')
  94. if exit_code != 0:
  95. logger.error('Failed to add everything to the index')
  96. return ''
  97. # get the git diff
  98. exit_code, git_patch = self.execute(
  99. f'git diff --no-color --cached {self.swe_instance["base_commit"]}'
  100. )
  101. if exit_code != 0:
  102. logger.error('Failed to get git diff')
  103. return ''
  104. return git_patch
  105. if __name__ == '__main__':
  106. EXAMPLE_INSTANCE = {
  107. 'repo': 'django/django',
  108. 'instance_id': 'django__django-11099',
  109. 'base_commit': 'd26b2424437dabeeca94d7900b37d2df4410da0c',
  110. 'patch': "diff --git a/django/contrib/auth/validators.py b/django/contrib/auth/validators.py\n--- a/django/contrib/auth/validators.py\n+++ b/django/contrib/auth/validators.py\n@@ -7,7 +7,7 @@\n \n @deconstructible\n class ASCIIUsernameValidator(validators.RegexValidator):\n- regex = r'^[\\w.@+-]+$'\n+ regex = r'^[\\w.@+-]+\\Z'\n message = _(\n 'Enter a valid username. This value may contain only English letters, '\n 'numbers, and @/./+/-/_ characters.'\n@@ -17,7 +17,7 @@ class ASCIIUsernameValidator(validators.RegexValidator):\n \n @deconstructible\n class UnicodeUsernameValidator(validators.RegexValidator):\n- regex = r'^[\\w.@+-]+$'\n+ regex = r'^[\\w.@+-]+\\Z'\n message = _(\n 'Enter a valid username. This value may contain only letters, '\n 'numbers, and @/./+/-/_ characters.'\n",
  111. 'test_patch': "diff --git a/tests/auth_tests/test_validators.py b/tests/auth_tests/test_validators.py\n--- a/tests/auth_tests/test_validators.py\n+++ b/tests/auth_tests/test_validators.py\n@@ -237,7 +237,7 @@ def test_unicode_validator(self):\n invalid_usernames = [\n \"o'connell\", \"عبد ال\",\n \"zerowidth\\u200Bspace\", \"nonbreaking\\u00A0space\",\n- \"en\\u2013dash\",\n+ \"en\\u2013dash\", 'trailingnewline\\u000A',\n ]\n v = validators.UnicodeUsernameValidator()\n for valid in valid_usernames:\n@@ -250,7 +250,7 @@ def test_unicode_validator(self):\n \n def test_ascii_validator(self):\n valid_usernames = ['glenn', 'GLEnN', 'jean-marc']\n- invalid_usernames = [\"o'connell\", 'Éric', 'jean marc', \"أحمد\"]\n+ invalid_usernames = [\"o'connell\", 'Éric', 'jean marc', \"أحمد\", 'trailingnewline\\n']\n v = validators.ASCIIUsernameValidator()\n for valid in valid_usernames:\n with self.subTest(valid=valid):\n",
  112. 'problem_statement': "UsernameValidator allows trailing newline in usernames\nDescription\n\t\nASCIIUsernameValidator and UnicodeUsernameValidator use the regex \nr'^[\\w.@+-]+$'\nThe intent is to only allow alphanumeric characters as well as ., @, +, and -. However, a little known quirk of Python regexes is that $ will also match a trailing newline. Therefore, the user name validators will accept usernames which end with a newline. You can avoid this behavior by instead using \\A and \\Z to terminate regexes. For example, the validator regex could be changed to\nr'\\A[\\w.@+-]+\\Z'\nin order to reject usernames that end with a newline.\nI am not sure how to officially post a patch, but the required change is trivial - using the regex above in the two validators in contrib.auth.validators.\n",
  113. 'hints_text': '',
  114. 'created_at': '2019-03-20T03:46:18Z',
  115. 'version': '3.0',
  116. 'FAIL_TO_PASS': '["test_ascii_validator (auth_tests.test_validators.UsernameValidatorsTests)", "test_unicode_validator (auth_tests.test_validators.UsernameValidatorsTests)", "test_help_text (auth_tests.test_validators.UserAttributeSimilarityValidatorTest)"]',
  117. 'PASS_TO_PASS': '["test_help_text (auth_tests.test_validators.MinimumLengthValidatorTest)", "test_validate (auth_tests.test_validators.MinimumLengthValidatorTest)", "test_help_text (auth_tests.test_validators.NumericPasswordValidatorTest)", "test_validate (auth_tests.test_validators.NumericPasswordValidatorTest)", "test_validate (auth_tests.test_validators.UserAttributeSimilarityValidatorTest)", "test_validate_property (auth_tests.test_validators.UserAttributeSimilarityValidatorTest)", "test_empty_password_validator_help_text_html (auth_tests.test_validators.PasswordValidationTest)", "test_get_default_password_validators (auth_tests.test_validators.PasswordValidationTest)", "test_get_password_validators_custom (auth_tests.test_validators.PasswordValidationTest)", "test_password_changed (auth_tests.test_validators.PasswordValidationTest)", "test_password_changed_with_custom_validator (auth_tests.test_validators.PasswordValidationTest)", "test_password_validators_help_text_html (auth_tests.test_validators.PasswordValidationTest)", "test_password_validators_help_text_html_escaping (auth_tests.test_validators.PasswordValidationTest)", "test_password_validators_help_texts (auth_tests.test_validators.PasswordValidationTest)", "test_validate_password (auth_tests.test_validators.PasswordValidationTest)", "test_help_text (auth_tests.test_validators.CommonPasswordValidatorTest)", "test_validate (auth_tests.test_validators.CommonPasswordValidatorTest)", "test_validate_custom_list (auth_tests.test_validators.CommonPasswordValidatorTest)", "test_validate_django_supplied_file (auth_tests.test_validators.CommonPasswordValidatorTest)"]',
  118. 'environment_setup_commit': '419a78300f7cd27611196e1e464d50fd0385ff27',
  119. }
  120. sandbox = SWEBenchSSHBox.get_box_for_instance(instance=EXAMPLE_INSTANCE)
  121. # in actual eval, this will be initialized by the controller
  122. sandbox.init_plugins([JupyterRequirement(), SWEAgentCommandsRequirement()])
  123. # PRE TEST
  124. exit_code, output = sandbox.execute('cd $REPO_PATH')
  125. assert exit_code == 0, 'Failed to cd $REPO_PATH'
  126. logger.info(f'cd $REPO_PATH: {output}')
  127. # apply test patch
  128. exit_code, output = sandbox.execute('git apply $SWE_TASK_DIR/test.patch')
  129. assert exit_code == 0, 'Failed to apply test patch'
  130. logger.info(f'git apply $SWE_TASK_DIR/test.patch: {output}')
  131. # TEST
  132. exit_code, output = sandbox.execute(
  133. './tests/runtests.py --verbosity 2 auth_tests.test_validators'
  134. )
  135. assert exit_code == 1, 'Expected exit code 1 (since this is a FAIL_TO_PASS)'
  136. logger.info(f'$TEST_CMD:\n{output}')
  137. # apply gold patch
  138. exit_code, output = sandbox.execute('git apply $SWE_TASK_DIR/gold.patch')
  139. logger.info('exit code: %d', exit_code)
  140. logger.info(f'git apply $SWE_TASK_DIR/gold.patch: {output}')
  141. # TEST
  142. exit_code, output = sandbox.execute(
  143. './tests/runtests.py --verbosity 2 auth_tests.test_validators'
  144. )
  145. assert exit_code == 0, 'Expected exit code 0 (since we applied the gold patch)'
  146. logger.info(f'$TEST_CMD:\n{output}')
  147. # Reset the repo
  148. exit_code, output = sandbox.execute('git reset --hard')
  149. assert exit_code == 0, 'Failed to reset the repo'
  150. logger.info(f'git reset --hard: {output}')
  151. bg_cmd = sandbox.execute_in_background(
  152. "while true; do echo 'dot ' && sleep 10; done"
  153. )
  154. sys.stdout.flush()
  155. try:
  156. while True:
  157. try:
  158. user_input = input('>>> ')
  159. except EOFError:
  160. logger.info('Exiting...')
  161. break
  162. if user_input.lower() == 'exit':
  163. logger.info('Exiting...')
  164. break
  165. if user_input.lower() == 'kill':
  166. sandbox.kill_background(bg_cmd.pid)
  167. logger.info('Background process killed')
  168. continue
  169. exit_code, output = sandbox.execute(user_input)
  170. logger.info('exit code: %d', exit_code)
  171. logger.info(output)
  172. if bg_cmd.pid in sandbox.background_commands:
  173. logs = sandbox.read_logs(bg_cmd.pid)
  174. logger.info('background logs: %s', logs)
  175. sys.stdout.flush()
  176. except KeyboardInterrupt:
  177. logger.info('Exiting...')
  178. sandbox.close()