|
|
@@ -1,12 +1,14 @@
|
|
|
import sys
|
|
|
import uuid
|
|
|
|
|
|
+from datasets import load_dataset
|
|
|
+
|
|
|
from opendevin.core.config import config
|
|
|
from opendevin.core.logger import opendevin_logger as logger
|
|
|
from opendevin.runtime.docker.ssh_box import DockerSSHBox
|
|
|
from opendevin.runtime.plugins import JupyterRequirement, SWEAgentCommandsRequirement
|
|
|
|
|
|
-SWE_BENCH_CONTAINER_IMAGE = 'ghcr.io/opendevin/eval-swe-bench:full-v1.0'
|
|
|
+SWE_BENCH_CONTAINER_IMAGE = 'ghcr.io/opendevin/eval-swe-bench:full-v1.1'
|
|
|
|
|
|
|
|
|
class SWEBenchSSHBox(DockerSSHBox):
|
|
|
@@ -123,20 +125,15 @@ class SWEBenchSSHBox(DockerSSHBox):
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
- EXAMPLE_INSTANCE = {
|
|
|
- 'repo': 'django/django',
|
|
|
- 'instance_id': 'django__django-11099',
|
|
|
- 'base_commit': 'd26b2424437dabeeca94d7900b37d2df4410da0c',
|
|
|
- 'patch': "diff --git a/django/contrib/auth/validators.py b/django/contrib/auth/validators.py\n--- a/django/contrib/auth/validators.py\n+++ b/django/contrib/auth/validators.py\n@@ -7,7 +7,7 @@\n \n @deconstructible\n class ASCIIUsernameValidator(validators.RegexValidator):\n- regex = r'^[\\w.@+-]+$'\n+ regex = r'^[\\w.@+-]+\\Z'\n message = _(\n 'Enter a valid username. This value may contain only English letters, '\n 'numbers, and @/./+/-/_ characters.'\n@@ -17,7 +17,7 @@ class ASCIIUsernameValidator(validators.RegexValidator):\n \n @deconstructible\n class UnicodeUsernameValidator(validators.RegexValidator):\n- regex = r'^[\\w.@+-]+$'\n+ regex = r'^[\\w.@+-]+\\Z'\n message = _(\n 'Enter a valid username. This value may contain only letters, '\n 'numbers, and @/./+/-/_ characters.'\n",
|
|
|
- 'test_patch': "diff --git a/tests/auth_tests/test_validators.py b/tests/auth_tests/test_validators.py\n--- a/tests/auth_tests/test_validators.py\n+++ b/tests/auth_tests/test_validators.py\n@@ -237,7 +237,7 @@ def test_unicode_validator(self):\n invalid_usernames = [\n \"o'connell\", \"عبد ال\",\n \"zerowidth\\u200Bspace\", \"nonbreaking\\u00A0space\",\n- \"en\\u2013dash\",\n+ \"en\\u2013dash\", 'trailingnewline\\u000A',\n ]\n v = validators.UnicodeUsernameValidator()\n for valid in valid_usernames:\n@@ -250,7 +250,7 @@ def test_unicode_validator(self):\n \n def test_ascii_validator(self):\n valid_usernames = ['glenn', 'GLEnN', 'jean-marc']\n- invalid_usernames = [\"o'connell\", 'Éric', 'jean marc', \"أحمد\"]\n+ invalid_usernames = [\"o'connell\", 'Éric', 'jean marc', \"أحمد\", 'trailingnewline\\n']\n v = validators.ASCIIUsernameValidator()\n for valid in valid_usernames:\n with self.subTest(valid=valid):\n",
|
|
|
- 'problem_statement': "UsernameValidator allows trailing newline in usernames\nDescription\n\t\nASCIIUsernameValidator and UnicodeUsernameValidator use the regex \nr'^[\\w.@+-]+$'\nThe intent is to only allow alphanumeric characters as well as ., @, +, and -. However, a little known quirk of Python regexes is that $ will also match a trailing newline. Therefore, the user name validators will accept usernames which end with a newline. You can avoid this behavior by instead using \\A and \\Z to terminate regexes. For example, the validator regex could be changed to\nr'\\A[\\w.@+-]+\\Z'\nin order to reject usernames that end with a newline.\nI am not sure how to officially post a patch, but the required change is trivial - using the regex above in the two validators in contrib.auth.validators.\n",
|
|
|
- 'hints_text': '',
|
|
|
- 'created_at': '2019-03-20T03:46:18Z',
|
|
|
- 'version': '3.0',
|
|
|
- 'FAIL_TO_PASS': '["test_ascii_validator (auth_tests.test_validators.UsernameValidatorsTests)", "test_unicode_validator (auth_tests.test_validators.UsernameValidatorsTests)", "test_help_text (auth_tests.test_validators.UserAttributeSimilarityValidatorTest)"]',
|
|
|
- 'PASS_TO_PASS': '["test_help_text (auth_tests.test_validators.MinimumLengthValidatorTest)", "test_validate (auth_tests.test_validators.MinimumLengthValidatorTest)", "test_help_text (auth_tests.test_validators.NumericPasswordValidatorTest)", "test_validate (auth_tests.test_validators.NumericPasswordValidatorTest)", "test_validate (auth_tests.test_validators.UserAttributeSimilarityValidatorTest)", "test_validate_property (auth_tests.test_validators.UserAttributeSimilarityValidatorTest)", "test_empty_password_validator_help_text_html (auth_tests.test_validators.PasswordValidationTest)", "test_get_default_password_validators (auth_tests.test_validators.PasswordValidationTest)", "test_get_password_validators_custom (auth_tests.test_validators.PasswordValidationTest)", "test_password_changed (auth_tests.test_validators.PasswordValidationTest)", "test_password_changed_with_custom_validator (auth_tests.test_validators.PasswordValidationTest)", "test_password_validators_help_text_html (auth_tests.test_validators.PasswordValidationTest)", "test_password_validators_help_text_html_escaping (auth_tests.test_validators.PasswordValidationTest)", "test_password_validators_help_texts (auth_tests.test_validators.PasswordValidationTest)", "test_validate_password (auth_tests.test_validators.PasswordValidationTest)", "test_help_text (auth_tests.test_validators.CommonPasswordValidatorTest)", "test_validate (auth_tests.test_validators.CommonPasswordValidatorTest)", "test_validate_custom_list (auth_tests.test_validators.CommonPasswordValidatorTest)", "test_validate_django_supplied_file (auth_tests.test_validators.CommonPasswordValidatorTest)"]',
|
|
|
- 'environment_setup_commit': '419a78300f7cd27611196e1e464d50fd0385ff27',
|
|
|
- }
|
|
|
+ # NOTE: It is preferable to load datasets from huggingface datasets and perform post-processing
|
|
|
+ # so we don't need to manage file uploading to OpenDevin's repo
|
|
|
+ dataset = load_dataset('princeton-nlp/SWE-bench_Lite')
|
|
|
+ swe_bench_tests = dataset['test'].to_pandas()
|
|
|
+
|
|
|
+ # INSTANCE_ID = 'django__django-11099'
|
|
|
+ INSTANCE_ID = 'astropy__astropy-12907'
|
|
|
+ swe_bench_tests = swe_bench_tests[swe_bench_tests['instance_id'] == INSTANCE_ID]
|
|
|
+ EXAMPLE_INSTANCE = swe_bench_tests.iloc[0].to_dict()
|
|
|
|
|
|
sandbox = SWEBenchSSHBox.get_box_for_instance(instance=EXAMPLE_INSTANCE)
|
|
|
|
|
|
@@ -154,9 +151,7 @@ if __name__ == '__main__':
|
|
|
logger.info(f'git apply $SWE_TASK_DIR/test.patch: {output}')
|
|
|
|
|
|
# TEST
|
|
|
- exit_code, output = sandbox.execute(
|
|
|
- './tests/runtests.py --verbosity 2 auth_tests.test_validators'
|
|
|
- )
|
|
|
+ exit_code, output = sandbox.execute('$TEST_CMD')
|
|
|
assert exit_code == 1, 'Expected exit code 1 (since this is a FAIL_TO_PASS)'
|
|
|
logger.info(f'$TEST_CMD:\n{output}')
|
|
|
|
|
|
@@ -166,9 +161,7 @@ if __name__ == '__main__':
|
|
|
logger.info(f'git apply $SWE_TASK_DIR/gold.patch: {output}')
|
|
|
|
|
|
# TEST
|
|
|
- exit_code, output = sandbox.execute(
|
|
|
- './tests/runtests.py --verbosity 2 auth_tests.test_validators'
|
|
|
- )
|
|
|
+ exit_code, output = sandbox.execute('$TEST_CMD')
|
|
|
assert exit_code == 0, 'Expected exit code 0 (since we applied the gold patch)'
|
|
|
logger.info(f'$TEST_CMD:\n{output}')
|
|
|
|