run_infer.sh 866 B

123456789101112131415161718192021222324252627282930313233343536373839404142
  1. #!/bin/bash
  2. set -eo pipefail
  3. source "evaluation/utils/version_control.sh"
  4. DATASET=$1
  5. MODEL_CONFIG=$2
  6. COMMIT_HASH=$3
  7. EVAL_LIMIT=$4
  8. AGENT=$5
  9. # ################################################################################
  10. checkout_eval_branch
  11. if [ -z "$AGENT" ]; then
  12. echo "Agent not specified, use default CodeActAgent"
  13. AGENT="CodeActAgent"
  14. fi
  15. get_agent_version
  16. echo "AGENT: $AGENT"
  17. echo "AGENT_VERSION: $AGENT_VERSION"
  18. echo "MODEL_CONFIG: $MODEL_CONFIG"
  19. COMMAND="poetry run python evaluation/logic_reasoning/run_infer.py \
  20. --agent-cls $AGENT \
  21. --llm-config $MODEL_CONFIG \
  22. --dataset $DATASET \
  23. --max-iterations 10 \
  24. --max-chars 10000000 \
  25. --eval-num-workers 1 \
  26. --eval-note $AGENT_VERSION"
  27. if [ -n "$EVAL_LIMIT" ]; then
  28. echo "EVAL_LIMIT: $EVAL_LIMIT"
  29. COMMAND="$COMMAND --eval-n-limit $EVAL_LIMIT"
  30. fi
  31. # Run the command
  32. eval $COMMAND