1 rok temu · 11a2d1682d
--- a/evaluation/swe_bench/README.md
+++ b/evaluation/swe_bench/README.md
@@ -51,6 +51,7 @@ sandbox_timeout = 120
 
				 use_host_network = false
			
 
				 run_as_devin = false
			
 
				 enable_auto_lint = true
			
 
				+max_budget_per_task = 4 # 4 USD
			
 
				 
			
 
				 # TODO: Change these to the model you want to evaluate
			
 
				 [eval_gpt4_1106_preview]
			
--- a/evaluation/swe_bench/run_infer.py
+++ b/evaluation/swe_bench/run_infer.py
@@ -209,7 +209,7 @@ def process_instance(
 
				     if reset_logger:
			
 
				         # Set up logger
			
 
				         log_file = os.path.join(
			
 
				-            eval_output_dir, 'logs', f'instance_{instance.instance_id}.log'
			
 
				+            eval_output_dir, 'infer_logs', f'instance_{instance.instance_id}.log'
			
 
				         )
			
 
				         # Remove all existing handlers from logger
			
 
				         for handler in logger.handlers[:]: