فهرست منبع

[Eval] Make SWE-Bench run_infer.sh to default to run SWE-Bench Lite (#3704)

* feat: add SWE-bench fullset support

* fix instance image list

* update eval script and documentation

* increase timeout for remote runtime

* add push script

* handle the case when ret push is an generator

* update pbar

* set SWE-Bench default to run SWE-Bench lite
Xingyao Wang 1 سال پیش
والد
کامیت
d8a87d7ccb
2فایلهای تغییر یافته به همراه12 افزوده شده و 4 حذف شده
  1. 11 3
      evaluation/swe_bench/scripts/run_infer.sh
  2. 1 1
      openhands/runtime/remote/runtime.py

+ 11 - 3
evaluation/swe_bench/scripts/run_infer.sh

@@ -33,9 +33,15 @@ if [ -z "$USE_INSTANCE_IMAGE" ]; then
   USE_INSTANCE_IMAGE=true
 fi
 
-if [ -z "$SUBSET" ]; then
-  echo "SUBSET not specified, use default lite-test"
-  SUBSET="lite-test"
+
+if [ -z "$DATASET" ]; then
+  echo "DATASET not specified, use default princeton-nlp/SWE-bench_Lite"
+  DATASET="princeton-nlp/SWE-bench_Lite"
+fi
+
+if [ -z "$SPLIT" ]; then
+  echo "SPLIT not specified, use default test"
+  SPLIT="test"
 fi
 
 export USE_INSTANCE_IMAGE=$USE_INSTANCE_IMAGE
@@ -46,6 +52,8 @@ get_agent_version
 echo "AGENT: $AGENT"
 echo "AGENT_VERSION: $AGENT_VERSION"
 echo "MODEL_CONFIG: $MODEL_CONFIG"
+echo "DATASET: $DATASET"
+echo "SPLIT: $SPLIT"
 
 # Default to NOT use Hint
 if [ -z "$USE_HINT_TEXT" ]; then

+ 1 - 1
openhands/runtime/remote/runtime.py

@@ -195,7 +195,7 @@ class RemoteRuntime(Runtime):
             retry_exceptions = DEFAULT_RETRY_EXCEPTIONS
 
         @retry(
-            stop=stop_after_attempt(10),
+            stop=stop_after_attempt(30),
             wait=wait_exponential(multiplier=1, min=4, max=60),
             retry=retry_if_exception_type(tuple(retry_exceptions)),
             reraise=True,