1 سال پیش · 8975fcd714
--- a/evaluation/benchmarks/swe_bench/scripts/eval/summarize_outputs.py
+++ b/evaluation/benchmarks/swe_bench/scripts/eval/summarize_outputs.py
@@ -3,11 +3,11 @@ import argparse
 
				 import glob
			
 
				 import json
			
 
				 import os
			
 
				+import random
			
 
				 from collections import Counter
			
 
				 
			
 
				-import pandas as pd
			
 
				-import random
			
 
				 import numpy as np
			
 
				+import pandas as pd
			
 
				 
			
 
				 from openhands.events.serialization import event_from_dict
			
 
				 from openhands.events.utils import get_pairs_from_events
			
@@ -20,12 +20,11 @@ ERROR_KEYWORDS = [
 
				 ]
			
 
				 
			
 
				 
			
 
				-def get_bootstrap_accuracy_error_bars(values: float | int | bool, num_samples: int = 1000, p_value=0.05) -> tuple[float, float]:
			
 
				+def get_bootstrap_accuracy_error_bars(
			
 
				+    values: float | int | bool, num_samples: int = 1000, p_value=0.05
			
 
				+) -> tuple[float, float]:
			
 
				     sorted_vals = np.sort(
			
 
				-        [
			
 
				-            np.mean(random.sample(values, len(values) // 2))
			
 
				-            for _ in range(num_samples)
			
 
				-        ]
			
 
				+        [np.mean(random.sample(values, len(values) // 2)) for _ in range(num_samples)]
			
 
				     )
			
 
				     bottom_idx = int(num_samples * p_value / 2)
			
 
				     top_idx = int(num_samples * (1.0 - p_value / 2))
			
@@ -118,7 +117,9 @@ def process_file(file_path):
 
				         'resolved': {
			
 
				             'count': num_resolved,
			
 
				             'percentage': (num_resolved / num_lines * 100) if num_lines > 0 else 0,
			
 
				-            'ci': tuple(x * 100 for x in get_bootstrap_accuracy_error_bars(resolved_arr)),
			
 
				+            'ci': tuple(
			
 
				+                x * 100 for x in get_bootstrap_accuracy_error_bars(resolved_arr)
			
 
				+            ),
			
 
				         },
			
 
				         'empty_patches': {
			
 
				             'count': num_empty_patch,
			
--- a/frontend/src/i18n/translation.json
+++ b/frontend/src/i18n/translation.json
@@ -2009,7 +2009,7 @@
 
				     "en": "Running a bash command"
			
 
				   },
			
 
				   "ACTION_MESSAGE$RUN_IPYTHON": {
			
 
				-    "en": "Running a Jupyter command"
			
 
				+    "en": "Running a Python command"
			
 
				   },
			
 
				   "ACTION_MESSAGE$READ": {
			
 
				     "en": "Reading the contents of a file"
			
@@ -2027,7 +2027,7 @@
 
				     "en": "Ran a bash command"
			
 
				   },
			
 
				   "OBSERVATION_MESSAGE$RUN_IPYTHON": {
			
 
				-    "en": "Ran a Jupyter command"
			
 
				+    "en": "Ran a Python command"
			
 
				   },
			
 
				   "OBSERVATION_MESSAGE$READ": {
			
 
				     "en": "Read the contents of a file"