eval_infer_remote.sh 889 B

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. #!/bin/bash
  2. set -eo pipefail
  3. INPUT_FILE=$1
  4. NUM_WORKERS=$2
  5. DATASET=$3
  6. SPLIT=$4
  7. if [ -z "$INPUT_FILE" ]; then
  8. echo "INPUT_FILE not specified (should be a path to a jsonl file)"
  9. exit 1
  10. fi
  11. if [ -z "$DATASET" ]; then
  12. echo "DATASET not specified, use default princeton-nlp/SWE-bench_Lite"
  13. DATASET="princeton-nlp/SWE-bench_Lite"
  14. fi
  15. if [ -z "$SPLIT" ]; then
  16. echo "SPLIT not specified, use default test"
  17. SPLIT="test"
  18. fi
  19. if [ -z "$NUM_WORKERS" ]; then
  20. echo "NUM_WORKERS not specified, use default 1"
  21. NUM_WORKERS=1
  22. fi
  23. echo "... Evaluating on $INPUT_FILE ..."
  24. COMMAND="poetry run python evaluation/swe_bench/eval_infer.py \
  25. --eval-num-workers $NUM_WORKERS \
  26. --input-file $INPUT_FILE \
  27. --dataset $DATASET \
  28. --split $SPLIT"
  29. if [ -n "$EVAL_LIMIT" ]; then
  30. echo "EVAL_LIMIT: $EVAL_LIMIT"
  31. COMMAND="$COMMAND --eval-n-limit $EVAL_LIMIT"
  32. fi
  33. # Run the command
  34. eval $COMMAND