Pārlūkot izejas kodu

logname fix; improve test calling instruction (#3666)

tobitege 1 gadu atpakaļ
vecāks
revīzija
dbb671a8a5
2 mainītis faili ar 5 papildinājumiem un 3 dzēšanām
  1. 3 2
      evaluation/aider_bench/run_infer.py
  2. 2 1
      evaluation/utils/shared.py

+ 3 - 2
evaluation/aider_bench/run_infer.py

@@ -177,9 +177,10 @@ def process_instance(
         signature_file=f'{instance.instance_name}.py',
     )
     if USE_UNIT_TESTS:
+        print(f'\nInstruction to run test_file: {instance.instance_name}_test.py\n')
         instruction += (
-            f'Use the test_file: {instance.instance_name}_test.py, to verify '
-            'the correctness of your solution. DO NOT EDIT the test file.\n\n'
+            f'Use `python -m unittest {instance.instance_name}_test.py` to run the test_file '
+            'and verify the correctness of your solution. DO NOT EDIT the test file.\n\n'
         )
 
     instruction += (

+ 2 - 1
evaluation/utils/shared.py

@@ -139,13 +139,14 @@ def make_metadata(
     details: dict[str, Any] | None = None,
 ) -> EvalMetadata:
     model_name = llm_config.model.split('/')[-1]
+    model_path = model_name.replace(':', '_')
     eval_note = f'_N_{eval_note}' if eval_note else ''
 
     eval_output_path = os.path.join(
         eval_output_dir,
         dataset_name,
         agent_class,
-        f'{model_name}_maxiter_{max_iterations}{eval_note}',
+        f'{model_path}_maxiter_{max_iterations}{eval_note}',
     )
 
     pathlib.Path(eval_output_path).mkdir(parents=True, exist_ok=True)