11 månader sedan · b295f5775c
--- a/config.template.toml
+++ b/config.template.toml
@@ -154,11 +154,6 @@ model = "gpt-4o"
 
				 # Drop any unmapped (unsupported) params without causing an exception
			
 
				 #drop_params = false
			
 
				 
			
 
				-# Allow litellm to modify parameters to make them compatible with providers
			
 
				-# for example by inserting a default message (like 'continue') when a message is empty
			
 
				-# and the provider's API would give an error otherwise
			
 
				-#modify_params = true
			
 
				-
			
 
				 # Using the prompt caching feature if provided by the LLM and supported
			
 
				 #caching_prompt = true
			
 
				 
			
--- a/evaluation/benchmarks/EDA/run_infer.py
+++ b/evaluation/benchmarks/EDA/run_infer.py
@@ -201,7 +201,7 @@ if __name__ == '__main__':
 
				 
			
 
				     llm_config = None
			
 
				     if args.llm_config:
			
 
				-        llm_config = get_llm_config_arg(args.llm_config, evaluation=True)
			
 
				+        llm_config = get_llm_config_arg(args.llm_config)
			
 
				     if llm_config is None:
			
 
				         raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
			
 
				 
			
--- a/evaluation/benchmarks/agent_bench/run_infer.py
+++ b/evaluation/benchmarks/agent_bench/run_infer.py
@@ -306,7 +306,7 @@ if __name__ == '__main__':
 
				 
			
 
				     llm_config = None
			
 
				     if args.llm_config:
			
 
				-        llm_config = get_llm_config_arg(args.llm_config, evaluation=True)
			
 
				+        llm_config = get_llm_config_arg(args.llm_config)
			
 
				 
			
 
				     if llm_config is None:
			
 
				         raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
			
--- a/evaluation/benchmarks/aider_bench/run_infer.py
+++ b/evaluation/benchmarks/aider_bench/run_infer.py
@@ -278,7 +278,7 @@ if __name__ == '__main__':
 
				 
			
 
				     llm_config = None
			
 
				     if args.llm_config:
			
 
				-        llm_config = get_llm_config_arg(args.llm_config, evaluation=True)
			
 
				+        llm_config = get_llm_config_arg(args.llm_config)
			
 
				 
			
 
				     if llm_config is None:
			
 
				         raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
			
--- a/evaluation/benchmarks/biocoder/run_infer.py
+++ b/evaluation/benchmarks/biocoder/run_infer.py
@@ -327,7 +327,7 @@ if __name__ == '__main__':
 
				 
			
 
				     llm_config = None
			
 
				     if args.llm_config:
			
 
				-        llm_config = get_llm_config_arg(args.llm_config, evaluation=True)
			
 
				+        llm_config = get_llm_config_arg(args.llm_config)
			
 
				 
			
 
				     if llm_config is None:
			
 
				         raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
			
--- a/evaluation/benchmarks/bird/run_infer.py
+++ b/evaluation/benchmarks/bird/run_infer.py
@@ -455,7 +455,7 @@ if __name__ == '__main__':
 
				 
			
 
				     llm_config = None
			
 
				     if args.llm_config:
			
 
				-        llm_config = get_llm_config_arg(args.llm_config, evaluation=True)
			
 
				+        llm_config = get_llm_config_arg(args.llm_config)
			
 
				     if llm_config is None:
			
 
				         raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
			
 
				 
			
--- a/evaluation/benchmarks/browsing_delegation/run_infer.py
+++ b/evaluation/benchmarks/browsing_delegation/run_infer.py
@@ -141,7 +141,7 @@ if __name__ == '__main__':
 
				 
			
 
				     llm_config = None
			
 
				     if args.llm_config:
			
 
				-        llm_config = get_llm_config_arg(args.llm_config, evaluation=True)
			
 
				+        llm_config = get_llm_config_arg(args.llm_config)
			
 
				 
			
 
				     if llm_config is None:
			
 
				         raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
			
--- a/evaluation/benchmarks/commit0_bench/run_infer.py
+++ b/evaluation/benchmarks/commit0_bench/run_infer.py
@@ -570,7 +570,7 @@ if __name__ == '__main__':
 
				 
			
 
				     llm_config = None
			
 
				     if args.llm_config:
			
 
				-        llm_config = get_llm_config_arg(args.llm_config, evaluation=True)
			
 
				+        llm_config = get_llm_config_arg(args.llm_config)
			
 
				         llm_config.log_completions = True
			
 
				 
			
 
				     if llm_config is None:
			
--- a/evaluation/benchmarks/discoverybench/run_infer.py
+++ b/evaluation/benchmarks/discoverybench/run_infer.py
@@ -465,7 +465,7 @@ if __name__ == '__main__':
 
				 
			
 
				     llm_config = None
			
 
				     if args.llm_config:
			
 
				-        llm_config = get_llm_config_arg(args.llm_config, evaluation=True)
			
 
				+        llm_config = get_llm_config_arg(args.llm_config)
			
 
				     if llm_config is None:
			
 
				         raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
			
 
				 
			
--- a/evaluation/benchmarks/gaia/run_infer.py
+++ b/evaluation/benchmarks/gaia/run_infer.py
@@ -237,7 +237,7 @@ if __name__ == '__main__':
 
				 
			
 
				     llm_config = None
			
 
				     if args.llm_config:
			
 
				-        llm_config = get_llm_config_arg(args.llm_config, evaluation=True)
			
 
				+        llm_config = get_llm_config_arg(args.llm_config)
			
 
				     if llm_config is None:
			
 
				         raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
			
 
				 
			
--- a/evaluation/benchmarks/gorilla/run_infer.py
+++ b/evaluation/benchmarks/gorilla/run_infer.py
@@ -145,7 +145,7 @@ if __name__ == '__main__':
 
				 
			
 
				     llm_config = None
			
 
				     if args.llm_config:
			
 
				-        llm_config = get_llm_config_arg(args.llm_config, evaluation=True)
			
 
				+        llm_config = get_llm_config_arg(args.llm_config)
			
 
				     if llm_config is None:
			
 
				         raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
			
 
				 
			
--- a/evaluation/benchmarks/gpqa/run_infer.py
+++ b/evaluation/benchmarks/gpqa/run_infer.py
@@ -325,7 +325,7 @@ if __name__ == '__main__':
 
				 
			
 
				     llm_config = None
			
 
				     if args.llm_config:
			
 
				-        llm_config = get_llm_config_arg(args.llm_config, evaluation=True)
			
 
				+        llm_config = get_llm_config_arg(args.llm_config)
			
 
				     if llm_config is None:
			
 
				         raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
			
 
				 
			
--- a/evaluation/benchmarks/humanevalfix/run_infer.py
+++ b/evaluation/benchmarks/humanevalfix/run_infer.py
@@ -284,7 +284,7 @@ if __name__ == '__main__':
 
				 
			
 
				     llm_config = None
			
 
				     if args.llm_config:
			
 
				-        llm_config = get_llm_config_arg(args.llm_config, evaluation=True)
			
 
				+        llm_config = get_llm_config_arg(args.llm_config)
			
 
				     if llm_config is None:
			
 
				         raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
			
 
				 
			
--- a/evaluation/benchmarks/logic_reasoning/run_infer.py
+++ b/evaluation/benchmarks/logic_reasoning/run_infer.py
@@ -287,7 +287,7 @@ if __name__ == '__main__':
 
				 
			
 
				     llm_config = None
			
 
				     if args.llm_config:
			
 
				-        llm_config = get_llm_config_arg(args.llm_config, evaluation=True)
			
 
				+        llm_config = get_llm_config_arg(args.llm_config)
			
 
				     if llm_config is None:
			
 
				         raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
			
 
				 
			
--- a/evaluation/benchmarks/miniwob/run_infer.py
+++ b/evaluation/benchmarks/miniwob/run_infer.py
@@ -230,7 +230,7 @@ if __name__ == '__main__':
 
				 
			
 
				     llm_config = None
			
 
				     if args.llm_config:
			
 
				-        llm_config = get_llm_config_arg(args.llm_config, evaluation=True)
			
 
				+        llm_config = get_llm_config_arg(args.llm_config)
			
 
				     if llm_config is None:
			
 
				         raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
			
 
				 
			
--- a/evaluation/benchmarks/mint/run_infer.py
+++ b/evaluation/benchmarks/mint/run_infer.py
@@ -278,7 +278,7 @@ if __name__ == '__main__':
 
				 
			
 
				     llm_config = None
			
 
				     if args.llm_config:
			
 
				-        llm_config = get_llm_config_arg(args.llm_config, evaluation=True)
			
 
				+        llm_config = get_llm_config_arg(args.llm_config)
			
 
				     if llm_config is None:
			
 
				         raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
			
 
				 
			
--- a/evaluation/benchmarks/ml_bench/run_infer.py
+++ b/evaluation/benchmarks/ml_bench/run_infer.py
@@ -291,7 +291,7 @@ if __name__ == '__main__':
 
				 
			
 
				     llm_config = None
			
 
				     if args.llm_config:
			
 
				-        llm_config = get_llm_config_arg(args.llm_config, evaluation=True)
			
 
				+        llm_config = get_llm_config_arg(args.llm_config)
			
 
				     if llm_config is None:
			
 
				         raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
			
 
				 
			
--- a/evaluation/benchmarks/scienceagentbench/run_infer.py
+++ b/evaluation/benchmarks/scienceagentbench/run_infer.py
@@ -271,7 +271,7 @@ if __name__ == '__main__':
 
				 
			
 
				     llm_config = None
			
 
				     if args.llm_config:
			
 
				-        llm_config = get_llm_config_arg(args.llm_config, evaluation=True)
			
 
				+        llm_config = get_llm_config_arg(args.llm_config)
			
 
				     if llm_config is None:
			
 
				         raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
			
 
				 
			
--- a/evaluation/benchmarks/swe_bench/run_infer.py
+++ b/evaluation/benchmarks/swe_bench/run_infer.py
@@ -9,6 +9,7 @@ import toml
 
				 from datasets import load_dataset
			
 
				 
			
 
				 import openhands.agenthub
			
 
				+
			
 
				 from evaluation.utils.shared import (
			
 
				     EvalException,
			
 
				     EvalMetadata,
			
@@ -75,7 +76,7 @@ def get_instruction(instance: pd.Series, metadata: EvalMetadata):
 
				         '4. Rerun your reproduce script and confirm that the error is fixed!\n'
			
 
				         '5. Think about edgecases and make sure your fix handles them as well\n'
			
 
				         "Your thinking should be thorough and so it's fine if it's very long.\n"
			
 
				-    )
			
 
				+        )
			
 
				 
			
 
				     if RUN_WITH_BROWSING:
			
 
				         instruction += (
			
@@ -488,7 +489,7 @@ if __name__ == '__main__':
 
				 
			
 
				     llm_config = None
			
 
				     if args.llm_config:
			
 
				-        llm_config = get_llm_config_arg(args.llm_config, evaluation=True)
			
 
				+        llm_config = get_llm_config_arg(args.llm_config)
			
 
				         llm_config.log_completions = True
			
 
				 
			
 
				     if llm_config is None:
			
--- a/evaluation/benchmarks/toolqa/run_infer.py
+++ b/evaluation/benchmarks/toolqa/run_infer.py
@@ -180,7 +180,7 @@ if __name__ == '__main__':
 
				 
			
 
				     llm_config = None
			
 
				     if args.llm_config:
			
 
				-        llm_config = get_llm_config_arg(args.llm_config, evaluation=True)
			
 
				+        llm_config = get_llm_config_arg(args.llm_config)
			
 
				     if llm_config is None:
			
 
				         raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
			
 
				 
			
--- a/evaluation/benchmarks/webarena/run_infer.py
+++ b/evaluation/benchmarks/webarena/run_infer.py
@@ -211,7 +211,7 @@ if __name__ == '__main__':
 
				 
			
 
				     llm_config = None
			
 
				     if args.llm_config:
			
 
				-        llm_config = get_llm_config_arg(args.llm_config, evaluation=True)
			
 
				+        llm_config = get_llm_config_arg(args.llm_config)
			
 
				     if llm_config is None:
			
 
				         raise ValueError(f'Could not find LLM config: --llm_config {args.llm_config}')
			
 
				 
			
--- a/openhands/core/config/llm_config.py
+++ b/openhands/core/config/llm_config.py
@@ -44,7 +44,6 @@ class LLMConfig:
 
				         log_completions_folder: The folder to log LLM completions to. Required if log_completions is True.
			
 
				         draft_editor: A more efficient LLM to use for file editing. Introduced in [PR 3985](https://github.com/All-Hands-AI/OpenHands/pull/3985).
			
 
				         custom_tokenizer: A custom tokenizer to use for token counting.
			
 
				-        modify_params: Allow litellm to modify parameters to make them compatible with the provider. For example, insert default messages when empty. Defaults to True.
			
 
				     """
			
 
				 
			
 
				     model: str = 'claude-3-5-sonnet-20241022'
			
@@ -80,7 +79,6 @@ class LLMConfig:
 
				     log_completions_folder: str = os.path.join(LOG_DIR, 'completions')
			
 
				     draft_editor: Optional['LLMConfig'] = None
			
 
				     custom_tokenizer: str | None = None
			
 
				-    modify_params: bool = True
			
 
				 
			
 
				     def defaults_to_dict(self) -> dict:
			
 
				         """Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional."""
			
--- a/openhands/core/config/utils.py
+++ b/openhands/core/config/utils.py
@@ -243,9 +243,9 @@ def finalize_config(cfg: AppConfig):
 
				         )
			
 
				 
			
 
				 
			
 
				-# Utility function for command line -l (--llm-config) argument
			
 
				+# Utility function for command line --group argument
			
 
				 def get_llm_config_arg(
			
 
				-    llm_config_arg: str, toml_file: str = 'config.toml', evaluation: bool = False
			
 
				+    llm_config_arg: str, toml_file: str = 'config.toml'
			
 
				 ) -> LLMConfig | None:
			
 
				     """Get a group of llm settings from the config file.
			
 
				 
			
@@ -268,7 +268,6 @@ def get_llm_config_arg(
 
				     Args:
			
 
				         llm_config_arg: The group of llm settings to get from the config.toml file.
			
 
				         toml_file: Path to the configuration file to read from. Defaults to 'config.toml'.
			
 
				-        evaluation: If True, sets modify_params=False for evaluation purposes. Defaults to False.
			
 
				 
			
 
				     Returns:
			
 
				         LLMConfig: The LLMConfig object with the settings from the config file.
			
@@ -297,10 +296,7 @@ def get_llm_config_arg(
 
				 
			
 
				     # update the llm config with the specified section
			
 
				     if 'llm' in toml_config and llm_config_arg in toml_config['llm']:
			
 
				-        config = LLMConfig.from_dict(toml_config['llm'][llm_config_arg])
			
 
				-        if evaluation:
			
 
				-            config.modify_params = False
			
 
				-        return config
			
 
				+        return LLMConfig.from_dict(toml_config['llm'][llm_config_arg])
			
 
				     logger.openhands_logger.debug(f'Loading from toml failed for {llm_config_arg}')
			
 
				     return None
			
 
				 
			
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@@ -142,7 +142,6 @@ class LLM(RetryMixin, DebugMixin):
 
				             temperature=self.config.temperature,
			
 
				             top_p=self.config.top_p,
			
 
				             drop_params=self.config.drop_params,
			
 
				-            modify_params=self.config.modify_params,
			
 
				         )
			
 
				 
			
 
				         self._completion_unwrapped = self._completion
			
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -100,7 +100,6 @@ reportlab = "*"
 
				 [tool.coverage.run]
			
 
				 concurrency = ["gevent"]
			
 
				 
			
 
				-
			
 
				 [tool.poetry.group.runtime.dependencies]
			
 
				 jupyterlab = "*"
			
 
				 notebook = "*"
			
@@ -131,7 +130,6 @@ ignore = ["D1"]
 
				 [tool.ruff.lint.pydocstyle]
			
 
				 convention = "google"
			
 
				 
			
 
				-
			
 
				 [tool.poetry.group.evaluation.dependencies]
			
 
				 streamlit = "*"
			
 
				 whatthepatch = "*"