Browse Source

Add NUM_WORKERS variable to run_infer.sh scripts for configurable woker settings (#2597)

* Add NUM_WORKERS variable to run_infer.sh scripts for configurable worker settings

* Update evaluation/webarena/scripts/run_infer.sh

---------

Co-authored-by: OpenDevin <opendevin@all-hands.dev>
Graham Neubig 1 năm trước cách đây
mục cha
commit
cab7a288ca

+ 6 - 1
evaluation/EDA/scripts/run_infer.sh

@@ -8,7 +8,12 @@ COMMIT_HASH=$2
 AGENT=$3
 DATASET=$4
 EVAL_LIMIT=$5
+NUM_WORKERS=$6
 
+if [ -z "$NUM_WORKERS" ]; then
+  NUM_WORKERS=1
+  echo "Number of workers not specified, use default $NUM_WORKERS"
+fi
 checkout_eval_branch
 
 if [ -z "$AGENT" ]; then
@@ -46,7 +51,7 @@ COMMAND="poetry run python evaluation/EDA/run_infer.py \
   --max-iterations 20 \
   --OPENAI_API_KEY $OPENAI_API_KEY \
   --max-chars 10000000 \
-  --eval-num-workers 1 \
+  --eval-num-workers $NUM_WORKERS \
   --eval-note ${AGENT_VERSION}_${DATASET}"
 
 if [ -n "$EVAL_LIMIT" ]; then

+ 6 - 1
evaluation/agent_bench/scripts/run_infer.sh

@@ -7,7 +7,12 @@ MODEL_CONFIG=$1
 COMMIT_HASH=$2
 AGENT=$3
 EVAL_LIMIT=$4
+NUM_WORKERS=$5
 
+if [ -z "$NUM_WORKERS" ]; then
+  NUM_WORKERS=1
+  echo "Number of workers not specified, use default $NUM_WORKERS"
+fi
 checkout_eval_branch
 
 if [ -z "$AGENT" ]; then
@@ -26,7 +31,7 @@ COMMAND="export PYTHONPATH=evaluation/agent_bench:\$PYTHONPATH && poetry run pyt
   --llm-config $MODEL_CONFIG \
   --max-iterations 30 \
   --max-chars 10000000 \
-  --eval-num-workers 5 \
+  --eval-num-workers $NUM_WORKERS \
   --eval-note $AGENT_VERSION"
 
 if [ -n "$EVAL_LIMIT" ]; then

+ 6 - 1
evaluation/biocoder/scripts/run_infer.sh

@@ -8,7 +8,12 @@ COMMIT_HASH=$2
 AGENT=$3
 EVAL_LIMIT=$4
 DATASET="biocoder"
+NUM_WORKERS=$5
 
+if [ -z "$NUM_WORKERS" ]; then
+  NUM_WORKERS=1
+  echo "Number of workers not specified, use default $NUM_WORKERS"
+fi
 checkout_eval_branch
 
 if [ -z "$AGENT" ]; then
@@ -28,7 +33,7 @@ COMMAND="poetry run python evaluation/biocoder/run_infer.py \
   --llm-config $MODEL_CONFIG \
   --max-iterations 10 \
   --max-chars 10000000 \
-  --eval-num-workers 1 \
+  --eval-num-workers $NUM_WORKERS \
   --eval-note ${AGENT_VERSION}_${DATASET}"
 
 if [ -n "$EVAL_LIMIT" ]; then

+ 6 - 1
evaluation/bird/scripts/run_infer.sh

@@ -7,7 +7,12 @@ MODEL_CONFIG=$1
 COMMIT_HASH=$2
 AGENT=$3
 EVAL_LIMIT=$4
+NUM_WORKERS=$5
 
+if [ -z "$NUM_WORKERS" ]; then
+  NUM_WORKERS=1
+  echo "Number of workers not specified, use default $NUM_WORKERS"
+fi
 checkout_eval_branch
 
 if [ -z "$AGENT" ]; then
@@ -26,7 +31,7 @@ COMMAND="poetry run python evaluation/bird/run_infer.py \
   --llm-config $MODEL_CONFIG \
   --max-iterations 5 \
   --max-chars 10000000 \
-  --eval-num-workers 1 \
+  --eval-num-workers $NUM_WORKERS \
   --eval-note $AGENT_VERSION" \
 
 if [ -n "$EVAL_LIMIT" ]; then

+ 6 - 1
evaluation/gaia/scripts/run_infer.sh

@@ -8,7 +8,12 @@ COMMIT_HASH=$2
 AGENT=$3
 EVAL_LIMIT=$4
 LEVELS=$5
+NUM_WORKERS=$6
 
+if [ -z "$NUM_WORKERS" ]; then
+  NUM_WORKERS=1
+  echo "Number of workers not specified, use default $NUM_WORKERS"
+fi
 checkout_eval_branch
 
 if [ -z "$AGENT" ]; then
@@ -37,7 +42,7 @@ COMMAND="poetry run python ./evaluation/gaia/run_infer.py \
   --level $LEVELS \
   --data-split validation \
   --max-chars 10000000 \
-  --eval-num-workers 1 \
+  --eval-num-workers $NUM_WORKERS \
   --eval-note ${AGENT_VERSION}_${LEVELS}"
 
 if [ -n "$EVAL_LIMIT" ]; then

+ 6 - 1
evaluation/gorilla/scripts/run_infer.sh

@@ -8,7 +8,12 @@ COMMIT_HASH=$2
 AGENT=$3
 EVAL_LIMIT=$4
 HUBS=$5
+NUM_WORKERS=$6
 
+if [ -z "$NUM_WORKERS" ]; then
+  NUM_WORKERS=1
+  echo "Number of workers not specified, use default $NUM_WORKERS"
+fi
 checkout_eval_branch
 
 if [ -z "$AGENT" ]; then
@@ -35,7 +40,7 @@ COMMAND="poetry run python evaluation/gorilla/run_infer.py \
   --hubs $HUBS \
   --data-split validation \
   --max-chars 10000000 \
-  --eval-num-workers 1 \
+  --eval-num-workers $NUM_WORKERS \
   --eval-note ${AGENT_VERSION}_${LEVELS}"
 
 if [ -n "$EVAL_LIMIT" ]; then

+ 6 - 1
evaluation/gpqa/scripts/run_infer.sh

@@ -8,7 +8,12 @@ COMMIT_HASH=$2
 EVAL_LIMIT=$3
 DATA_SPLIT=$4
 AGENT=$5
+NUM_WORKERS=$6
 
+if [ -z "$NUM_WORKERS" ]; then
+  NUM_WORKERS=1
+  echo "Number of workers not specified, use default $NUM_WORKERS"
+fi
 checkout_eval_branch
 
 if [ -z "$AGENT" ]; then
@@ -33,7 +38,7 @@ COMMAND="poetry run python evaluation/gpqa/run_infer.py \
   --llm-config $MODEL_CONFIG \
   --max-iterations 10 \
   --max-chars 10000000 \
-  --eval-num-workers 1 \
+  --eval-num-workers $NUM_WORKERS \
   --data-split $DATA_SPLIT \
   --eval-note $AGENT_VERSION"
 

+ 6 - 1
evaluation/humanevalfix/scripts/run_infer.sh

@@ -7,7 +7,12 @@ MODEL_CONFIG=$1
 COMMIT_HASH=$2
 AGENT=$3
 EVAL_LIMIT=$4
+NUM_WORKERS=$5
 
+if [ -z "$NUM_WORKERS" ]; then
+  NUM_WORKERS=1
+  echo "Number of workers not specified, use default $NUM_WORKERS"
+fi
 echo "
 ################################################################################
                                   !!!WARNING!!!
@@ -64,7 +69,7 @@ COMMAND="poetry run python evaluation/humanevalfix/run_infer.py \
   --llm-config $MODEL_CONFIG \
   --max-iterations 10 \
   --max-chars 10000000 \
-  --eval-num-workers 1 \
+  --eval-num-workers $NUM_WORKERS \
   --eval-note $AGENT_VERSION"
 
 if [ -n "$EVAL_LIMIT" ]; then

+ 6 - 1
evaluation/logic_reasoning/scripts/run_infer.sh

@@ -8,7 +8,12 @@ MODEL_CONFIG=$2
 COMMIT_HASH=$3
 EVAL_LIMIT=$4
 AGENT=$5
+NUM_WORKERS=$6
 
+if [ -z "$NUM_WORKERS" ]; then
+  NUM_WORKERS=1
+  echo "Number of workers not specified, use default $NUM_WORKERS"
+fi
 # ################################################################################
 
 checkout_eval_branch
@@ -30,7 +35,7 @@ COMMAND="poetry run python evaluation/logic_reasoning/run_infer.py \
   --dataset $DATASET \
   --max-iterations 10 \
   --max-chars 10000000 \
-  --eval-num-workers 1 \
+  --eval-num-workers $NUM_WORKERS \
   --eval-note $AGENT_VERSION"
 
 if [ -n "$EVAL_LIMIT" ]; then

+ 6 - 1
evaluation/miniwob/scripts/run_infer.sh

@@ -16,7 +16,12 @@ COMMIT_HASH=$2
 AGENT=$3
 NOTE=$4
 EVAL_LIMIT=$5
+NUM_WORKERS=$6
 
+if [ -z "$NUM_WORKERS" ]; then
+  NUM_WORKERS=1
+  echo "Number of workers not specified, use default $NUM_WORKERS"
+fi
 checkout_eval_branch
 
 if [ -z "$AGENT" ]; then
@@ -37,7 +42,7 @@ COMMAND="poetry run python evaluation/miniwob/run_infer.py \
   --llm-config $MODEL_CONFIG \
   --max-iterations 10 \
   --max-chars 10000000 \
-  --eval-note $EVAL_NOTE"
+  --eval-num-workers $NUM_WORKERS \
 
 if [ -n "$EVAL_LIMIT" ]; then
   echo "EVAL_LIMIT: $EVAL_LIMIT"

+ 6 - 1
evaluation/mint/scripts/run_infer.sh

@@ -7,7 +7,12 @@ MODEL_CONFIG=$1
 COMMIT_HASH=$2
 SUBSET=$3
 EVAL_LIMIT=$4
+NUM_WORKERS=$5
 
+if [ -z "$NUM_WORKERS" ]; then
+  NUM_WORKERS=1
+  echo "Number of workers not specified, use default $NUM_WORKERS"
+fi
 checkout_eval_branch
 
 # Only 'CodeActAgent' is supported for MINT now
@@ -24,7 +29,7 @@ COMMAND="poetry run python ./evaluation/mint/run_infer.py \
     --llm-config $MODEL_CONFIG \
     --max-iterations 5 \
     --max-propose-solution 2 \
-    --eval-note $AGENT_VERSION"
+    --eval-num-workers $NUM_WORKERS \
 
 if [ -n "$SUBSET" ]; then
   echo "SUBSET: $SUBSET"

+ 6 - 1
evaluation/ml_bench/scripts/run_infer.sh

@@ -8,7 +8,12 @@ COMMIT_HASH=$2
 SPLIT=$3
 AGENT=$4
 EVAL_LIMIT=$5
+NUM_WORKERS=$6
 
+if [ -z "$NUM_WORKERS" ]; then
+  NUM_WORKERS=1
+  echo "Number of workers not specified, use default $NUM_WORKERS"
+fi
 checkout_eval_branch
 
 if [ -z "$MODEL_CONFIG" ]; then
@@ -31,7 +36,7 @@ COMMAND="poetry run python evaluation/ml_bench/run_infer.py \
   --agent-cls $AGENT \
   --llm-config $MODEL_CONFIG \
   --max-iterations 10 \
-  --eval-num-workers 4 \
+  --eval-num-workers $NUM_WORKERS \
   --eval-note $AGENT_VERSION"
 
 if [ -n "$EVAL_LIMIT" ]; then

+ 6 - 1
evaluation/swe_bench/scripts/run_infer.sh

@@ -8,7 +8,12 @@ COMMIT_HASH=$2
 AGENT=$3
 EVAL_LIMIT=$4
 MAX_ITER=$5
+NUM_WORKERS=$6
 
+if [ -z "$NUM_WORKERS" ]; then
+  NUM_WORKERS=1
+  echo "Number of workers not specified, use default $NUM_WORKERS"
+fi
 checkout_eval_branch
 
 if [ -z "$AGENT" ]; then
@@ -45,7 +50,7 @@ COMMAND="poetry run python evaluation/swe_bench/run_infer.py \
   --llm-config $MODEL_CONFIG \
   --max-iterations $MAX_ITER \
   --max-chars 10000000 \
-  --eval-num-workers 8 \
+  --eval-num-workers $NUM_WORKERS \
   --eval-note $EVAL_NOTE"
 
 if [ -n "$EVAL_LIMIT" ]; then

+ 6 - 1
evaluation/toolqa/scripts/run_infer.sh

@@ -10,7 +10,12 @@ EVAL_LIMIT=$4
 DATASET=$5
 HARDNESS=$6
 WOLFRAM_APPID=$7
+NUM_WORKERS=$8
 
+if [ -z "$NUM_WORKERS" ]; then
+  NUM_WORKERS=1
+  echo "Number of workers not specified, use default $NUM_WORKERS"
+fi
 checkout_eval_branch
 
 if [ -z "$AGENT" ]; then
@@ -51,7 +56,7 @@ COMMAND="poetry run python evaluation/toolqa/run_infer.py \
   --wolfram_alpha_appid $WOLFRAM_APPID\
   --data-split validation \
   --max-chars 10000000 \
-  --eval-num-workers 1 \
+  --eval-num-workers $NUM_WORKERS \
   --eval-note ${AGENT_VERSION}_${LEVELS}"
 
 if [ -n "$EVAL_LIMIT" ]; then

+ 6 - 0
evaluation/webarena/scripts/run_infer.sh

@@ -14,7 +14,12 @@ MODEL_CONFIG=$1
 COMMIT_HASH=$2
 AGENT=$3
 EVAL_LIMIT=$4
+NUM_WORKERS=$5
 
+if [ -z "$NUM_WORKERS" ]; then
+  NUM_WORKERS=1
+  echo "Number of workers not specified, use default $NUM_WORKERS"
+fi
 checkout_eval_branch
 
 if [ -z "$AGENT" ]; then
@@ -35,6 +40,7 @@ COMMAND="poetry run python evaluation/webarena/run_infer.py \
   --llm-config $MODEL_CONFIG \
   --max-iterations 15 \
   --max-chars 10000000 \
+  --eval-num-workers $NUM_WORKERS \
   --eval-note $EVAL_NOTE"
 
 if [ -n "$EVAL_LIMIT" ]; then