فهرست منبع

Minor SWE-Bench inference config tweak (#2381)

* save infer logs to infer_logs

* set max budget for swebench eval
Xingyao Wang 1 سال پیش
والد
کامیت
11a2d1682d
2فایلهای تغییر یافته به همراه2 افزوده شده و 1 حذف شده
  1. 1 0
      evaluation/swe_bench/README.md
  2. 1 1
      evaluation/swe_bench/run_infer.py

+ 1 - 0
evaluation/swe_bench/README.md

@@ -51,6 +51,7 @@ sandbox_timeout = 120
 use_host_network = false
 run_as_devin = false
 enable_auto_lint = true
+max_budget_per_task = 4 # 4 USD
 
 # TODO: Change these to the model you want to evaluate
 [eval_gpt4_1106_preview]

+ 1 - 1
evaluation/swe_bench/run_infer.py

@@ -209,7 +209,7 @@ def process_instance(
     if reset_logger:
         # Set up logger
         log_file = os.path.join(
-            eval_output_dir, 'logs', f'instance_{instance.instance_id}.log'
+            eval_output_dir, 'infer_logs', f'instance_{instance.instance_id}.log'
         )
         # Remove all existing handlers from logger
         for handler in logger.handlers[:]: