1 vuosi sitten · e582806004
--- a/agenthub/codeact_agent/codeact_agent.py
+++ b/agenthub/codeact_agent/codeact_agent.py
@@ -204,11 +204,6 @@ class CodeActAgent(Agent):
 
				             ],
			
 
				         }
			
 
				 
			
 
				-        if self.llm.is_caching_prompt_active():
			
 
				-            params['extra_headers'] = {
			
 
				-                'anthropic-beta': 'prompt-caching-2024-07-31',
			
 
				-            }
			
 
				-
			
 
				         response = self.llm.completion(**params)
			
 
				 
			
 
				         return self.action_parser.parse(response)
			
--- a/config.template.toml
+++ b/config.template.toml
@@ -146,8 +146,8 @@ model = "gpt-4o"
 
				 # Drop any unmapped (unsupported) params without causing an exception
			
 
				 #drop_params = false
			
 
				 
			
 
				-# Using the prompt caching feature provided by the LLM
			
 
				-#caching_prompt = false
			
 
				+# Using the prompt caching feature if provided by the LLM and supported
			
 
				+#caching_prompt = true
			
 
				 
			
 
				 # Base URL for the OLLAMA API
			
 
				 #ollama_base_url = ""
			
--- a/openhands/core/config/llm_config.py
+++ b/openhands/core/config/llm_config.py
@@ -1,3 +1,4 @@
 
				+import os
			
 
				 from dataclasses import dataclass, fields
			
 
				 
			
 
				 from openhands.core.config.config_utils import get_field_info
			
@@ -36,7 +37,7 @@ class LLMConfig:
 
				         ollama_base_url: The base URL for the OLLAMA API.
			
 
				         drop_params: Drop any unmapped (unsupported) params without causing an exception.
			
 
				         disable_vision: If model is vision capable, this option allows to disable image processing (useful for cost reduction).
			
 
				-        caching_prompt: Using the prompt caching feature provided by the LLM.
			
 
				+        caching_prompt: Use the prompt caching feature if provided by the LLM and supported by the provider.
			
 
				         log_completions: Whether to log LLM completions to the state.
			
 
				     """
			
 
				 
			
@@ -68,7 +69,7 @@ class LLMConfig:
 
				     ollama_base_url: str | None = None
			
 
				     drop_params: bool = True
			
 
				     disable_vision: bool | None = None
			
 
				-    caching_prompt: bool = False
			
 
				+    caching_prompt: bool = True
			
 
				     log_completions: bool = False
			
 
				 
			
 
				     def defaults_to_dict(self) -> dict:
			
@@ -78,6 +79,18 @@ class LLMConfig:
 
				             result[f.name] = get_field_info(f)
			
 
				         return result
			
 
				 
			
 
				+    def __post_init__(self):
			
 
				+        """
			
 
				+        Post-initialization hook to assign OpenRouter-related variables to environment variables.
			
 
				+        This ensures that these values are accessible to litellm at runtime.
			
 
				+        """
			
 
				+
			
 
				+        # Assign OpenRouter-specific variables to environment variables
			
 
				+        if self.openrouter_site_url:
			
 
				+            os.environ['OR_SITE_URL'] = self.openrouter_site_url
			
 
				+        if self.openrouter_app_name:
			
 
				+            os.environ['OR_APP_NAME'] = self.openrouter_app_name
			
 
				+
			
 
				     def __str__(self):
			
 
				         attr_str = []
			
 
				         for f in fields(self):
			
@@ -101,9 +114,3 @@ class LLMConfig:
 
				             if k in LLM_SENSITIVE_FIELDS:
			
 
				                 ret[k] = '******' if v else None
			
 
				         return ret
			
 
				-
			
 
				-    def set_missing_attributes(self):
			
 
				-        """Set any missing attributes to their default values."""
			
 
				-        for field_name, field_obj in self.__dataclass_fields__.items():
			
 
				-            if not hasattr(self, field_name):
			
 
				-                setattr(self, field_name, field_obj.default)
			
--- a/openhands/core/message.py
+++ b/openhands/core/message.py
@@ -50,6 +50,8 @@ class ImageContent(Content):
 
				 class Message(BaseModel):
			
 
				     role: Literal['user', 'system', 'assistant']
			
 
				     content: list[TextContent | ImageContent] = Field(default=list)
			
 
				+    cache_enabled: bool = False
			
 
				+    vision_enabled: bool = False
			
 
				 
			
 
				     @property
			
 
				     def contains_image(self) -> bool:
			
@@ -58,23 +60,22 @@ class Message(BaseModel):
 
				     @model_serializer
			
 
				     def serialize_model(self) -> dict:
			
 
				         content: list[dict] | str
			
 
				-        if self.role == 'system':
			
 
				-            # For system role, concatenate all text content into a single string
			
 
				-            content = '\n'.join(
			
 
				-                item.text for item in self.content if isinstance(item, TextContent)
			
 
				-            )
			
 
				-        elif self.role == 'assistant' and not self.contains_image:
			
 
				-            # For assistant role without vision, concatenate all text content into a single string
			
 
				-            content = '\n'.join(
			
 
				-                item.text for item in self.content if isinstance(item, TextContent)
			
 
				-            )
			
 
				-        else:
			
 
				-            # For user role or assistant role with vision enabled, serialize each content item
			
 
				+        # two kinds of serializer:
			
 
				+        # 1. vision serializer: when prompt caching or vision is enabled
			
 
				+        # 2. single text serializer: for other cases
			
 
				+        # remove this when liteLLM or providers support this format translation
			
 
				+        if self.cache_enabled or self.vision_enabled:
			
 
				+            # when prompt caching or vision is enabled, use vision serializer
			
 
				             content = []
			
 
				             for item in self.content:
			
 
				                 if isinstance(item, TextContent):
			
 
				                     content.append(item.model_dump())
			
 
				                 elif isinstance(item, ImageContent):
			
 
				                     content.extend(item.model_dump())
			
 
				-
			
 
				+        else:
			
 
				+            # for other cases, concatenate all text content
			
 
				+            # into a single string per message
			
 
				+            content = '\n'.join(
			
 
				+                item.text for item in self.content if isinstance(item, TextContent)
			
 
				+            )
			
 
				         return {'content': content, 'role': self.role}
			
--- a/openhands/llm/__init__.py
+++ b/openhands/llm/__init__.py
@@ -0,0 +1,5 @@
 
				+from openhands.llm.async_llm import AsyncLLM
			
 
				+from openhands.llm.llm import LLM
			
 
				+from openhands.llm.streaming_llm import StreamingLLM
			
 
				+
			
 
				+__all__ = ['LLM', 'AsyncLLM', 'StreamingLLM']
			
--- a/openhands/llm/async_llm.py
+++ b/openhands/llm/async_llm.py
@@ -1,11 +1,12 @@
 
				 import asyncio
			
 
				 from functools import partial
			
 
				+from typing import Any
			
 
				 
			
 
				 from litellm import completion as litellm_acompletion
			
 
				 
			
 
				-from openhands.core.exceptions import LLMResponseError, UserCancelledError
			
 
				+from openhands.core.exceptions import UserCancelledError
			
 
				 from openhands.core.logger import openhands_logger as logger
			
 
				-from openhands.llm.llm import LLM
			
 
				+from openhands.llm.llm import LLM, LLM_RETRY_EXCEPTIONS
			
 
				 from openhands.runtime.utils.shutdown_listener import should_continue
			
 
				 
			
 
				 
			
@@ -33,19 +34,31 @@ class AsyncLLM(LLM):
 
				 
			
 
				         @self.retry_decorator(
			
 
				             num_retries=self.config.num_retries,
			
 
				-            retry_exceptions=self.retry_exceptions,
			
 
				+            retry_exceptions=LLM_RETRY_EXCEPTIONS,
			
 
				             retry_min_wait=self.config.retry_min_wait,
			
 
				             retry_max_wait=self.config.retry_max_wait,
			
 
				             retry_multiplier=self.config.retry_multiplier,
			
 
				         )
			
 
				         async def async_completion_wrapper(*args, **kwargs):
			
 
				             """Wrapper for the litellm acompletion function."""
			
 
				-            # some callers might just send the messages directly
			
 
				-            if 'messages' in kwargs:
			
 
				+            messages: list[dict[str, Any]] | dict[str, Any] = []
			
 
				+
			
 
				+            # some callers might send the model and messages directly
			
 
				+            # litellm allows positional args, like completion(model, messages, **kwargs)
			
 
				+            # see llm.py for more details
			
 
				+            if len(args) > 1:
			
 
				+                messages = args[1] if len(args) > 1 else args[0]
			
 
				+                kwargs['messages'] = messages
			
 
				+
			
 
				+                # remove the first args, they're sent in kwargs
			
 
				+                args = args[2:]
			
 
				+            elif 'messages' in kwargs:
			
 
				                 messages = kwargs['messages']
			
 
				-            else:
			
 
				-                messages = args[1] if len(args) > 1 else []
			
 
				 
			
 
				+            # ensure we work with a list of messages
			
 
				+            messages = messages if isinstance(messages, list) else [messages]
			
 
				+
			
 
				+            # if we have no messages, something went very wrong
			
 
				             if not messages:
			
 
				                 raise ValueError(
			
 
				                     'The messages list is empty. At least one message is required.'
			
@@ -101,7 +114,4 @@ class AsyncLLM(LLM):
 
				     @property
			
 
				     def async_completion(self):
			
 
				         """Decorator for the async litellm acompletion function."""
			
 
				-        try:
			
 
				-            return self._async_completion
			
 
				-        except Exception as e:
			
 
				-            raise LLMResponseError(e)
			
 
				+        return self._async_completion
			
--- a/openhands/llm/debug_mixin.py
+++ b/openhands/llm/debug_mixin.py
@@ -1,3 +1,5 @@
 
				+from typing import Any
			
 
				+
			
 
				 from openhands.core.logger import llm_prompt_logger, llm_response_logger
			
 
				 from openhands.core.logger import openhands_logger as logger
			
 
				 
			
@@ -5,7 +7,7 @@ MESSAGE_SEPARATOR = '\n\n----------\n\n'
 
				 
			
 
				 
			
 
				 class DebugMixin:
			
 
				-    def log_prompt(self, messages):
			
 
				+    def log_prompt(self, messages: list[dict[str, Any]] | dict[str, Any]):
			
 
				         if not messages:
			
 
				             logger.debug('No completion messages!')
			
 
				             return
			
@@ -20,11 +22,11 @@ class DebugMixin:
 
				         else:
			
 
				             logger.debug('No completion messages!')
			
 
				 
			
 
				-    def log_response(self, message_back):
			
 
				+    def log_response(self, message_back: str):
			
 
				         if message_back:
			
 
				             llm_response_logger.debug(message_back)
			
 
				 
			
 
				-    def _format_message_content(self, message):
			
 
				+    def _format_message_content(self, message: dict[str, Any]):
			
 
				         content = message['content']
			
 
				         if isinstance(content, list):
			
 
				             return '\n'.join(
			
@@ -32,7 +34,7 @@ class DebugMixin:
 
				             )
			
 
				         return str(content)
			
 
				 
			
 
				-    def _format_content_element(self, element):
			
 
				+    def _format_content_element(self, element: dict[str, Any]):
			
 
				         if isinstance(element, dict):
			
 
				             if 'text' in element:
			
 
				                 return element['text']
			
@@ -44,10 +46,6 @@ class DebugMixin:
 
				                 return element['image_url']['url']
			
 
				         return str(element)
			
 
				 
			
 
				-    def _log_stats(self, stats):
			
 
				-        if stats:
			
 
				-            logger.info(stats)
			
 
				-
			
 
				     # This method should be implemented in the class that uses DebugMixin
			
 
				     def vision_is_active(self):
			
 
				         raise NotImplementedError
			
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@@ -1,5 +1,4 @@
 
				 import copy
			
 
				-import os
			
 
				 import time
			
 
				 import warnings
			
 
				 from functools import partial
			
@@ -10,16 +9,16 @@ from openhands.core.config import LLMConfig
 
				 with warnings.catch_warnings():
			
 
				     warnings.simplefilter('ignore')
			
 
				     import litellm
			
 
				+from litellm import ModelInfo
			
 
				 from litellm import completion as litellm_completion
			
 
				 from litellm import completion_cost as litellm_completion_cost
			
 
				 from litellm.exceptions import (
			
 
				     APIConnectionError,
			
 
				-    ContentPolicyViolationError,
			
 
				     InternalServerError,
			
 
				-    OpenAIError,
			
 
				     RateLimitError,
			
 
				+    ServiceUnavailableError,
			
 
				 )
			
 
				-from litellm.types.utils import CostPerToken
			
 
				+from litellm.types.utils import CostPerToken, ModelResponse, Usage
			
 
				 
			
 
				 from openhands.core.logger import openhands_logger as logger
			
 
				 from openhands.core.message import Message
			
@@ -29,9 +28,23 @@ from openhands.llm.retry_mixin import RetryMixin
 
				 
			
 
				 __all__ = ['LLM']
			
 
				 
			
 
				-cache_prompting_supported_models = [
			
 
				+# tuple of exceptions to retry on
			
 
				+LLM_RETRY_EXCEPTIONS: tuple[type[Exception], ...] = (
			
 
				+    APIConnectionError,
			
 
				+    InternalServerError,
			
 
				+    RateLimitError,
			
 
				+    ServiceUnavailableError,
			
 
				+)
			
 
				+
			
 
				+# cache prompt supporting models
			
 
				+# remove this when we gemini and deepseek are supported
			
 
				+CACHE_PROMPT_SUPPORTED_MODELS = [
			
 
				     'claude-3-5-sonnet-20240620',
			
 
				     'claude-3-haiku-20240307',
			
 
				+    'claude-3-opus-20240229',
			
 
				+    'anthropic/claude-3-opus-20240229',
			
 
				+    'anthropic/claude-3-haiku-20240307',
			
 
				+    'anthropic/claude-3-5-sonnet-20240620',
			
 
				 ]
			
 
				 
			
 
				 
			
@@ -55,23 +68,17 @@ class LLM(RetryMixin, DebugMixin):
 
				             config: The LLM configuration.
			
 
				             metrics: The metrics to use.
			
 
				         """
			
 
				-        self.metrics = metrics if metrics is not None else Metrics()
			
 
				-        self.cost_metric_supported = True
			
 
				-        self.config = copy.deepcopy(config)
			
 
				-
			
 
				-        os.environ['OR_SITE_URL'] = self.config.openrouter_site_url
			
 
				-        os.environ['OR_APP_NAME'] = self.config.openrouter_app_name
			
 
				+        self.metrics: Metrics = metrics if metrics is not None else Metrics()
			
 
				+        self.cost_metric_supported: bool = True
			
 
				+        self.config: LLMConfig = copy.deepcopy(config)
			
 
				 
			
 
				         # list of LLM completions (for logging purposes). Each completion is a dict with the following keys:
			
 
				         # - 'messages': list of messages
			
 
				         # - 'response': response from the LLM
			
 
				         self.llm_completions: list[dict[str, Any]] = []
			
 
				 
			
 
				-        # Set up config attributes with default values to prevent AttributeError
			
 
				-        LLMConfig.set_missing_attributes(self.config)
			
 
				-
			
 
				         # litellm actually uses base Exception here for unknown model
			
 
				-        self.model_info = None
			
 
				+        self.model_info: ModelInfo | None = None
			
 
				         try:
			
 
				             if self.config.model.startswith('openrouter'):
			
 
				                 self.model_info = litellm.get_model_info(self.config.model)
			
@@ -83,15 +90,6 @@ class LLM(RetryMixin, DebugMixin):
 
				         except Exception as e:
			
 
				             logger.warning(f'Could not get model info for {config.model}:\n{e}')
			
 
				 
			
 
				-        # Tuple of exceptions to retry on
			
 
				-        self.retry_exceptions = (
			
 
				-            APIConnectionError,
			
 
				-            ContentPolicyViolationError,
			
 
				-            InternalServerError,
			
 
				-            OpenAIError,
			
 
				-            RateLimitError,
			
 
				-        )
			
 
				-
			
 
				         # Set the max tokens in an LM-specific way if not set
			
 
				         if self.config.max_input_tokens is None:
			
 
				             if (
			
@@ -135,23 +133,39 @@ class LLM(RetryMixin, DebugMixin):
 
				 
			
 
				         if self.vision_is_active():
			
 
				             logger.debug('LLM: model has vision enabled')
			
 
				+        if self.is_caching_prompt_active():
			
 
				+            logger.debug('LLM: caching prompt enabled')
			
 
				 
			
 
				         completion_unwrapped = self._completion
			
 
				 
			
 
				         @self.retry_decorator(
			
 
				             num_retries=self.config.num_retries,
			
 
				-            retry_exceptions=self.retry_exceptions,
			
 
				+            retry_exceptions=LLM_RETRY_EXCEPTIONS,
			
 
				             retry_min_wait=self.config.retry_min_wait,
			
 
				             retry_max_wait=self.config.retry_max_wait,
			
 
				             retry_multiplier=self.config.retry_multiplier,
			
 
				         )
			
 
				         def wrapper(*args, **kwargs):
			
 
				             """Wrapper for the litellm completion function. Logs the input and output of the completion function."""
			
 
				-            # some callers might just send the messages directly
			
 
				-            if 'messages' in kwargs:
			
 
				+            messages: list[dict[str, Any]] | dict[str, Any] = []
			
 
				+
			
 
				+            # some callers might send the model and messages directly
			
 
				+            # litellm allows positional args, like completion(model, messages, **kwargs)
			
 
				+            if len(args) > 1:
			
 
				+                # ignore the first argument if it's provided (it would be the model)
			
 
				+                # design wise: we don't allow overriding the configured values
			
 
				+                # implementation wise: the partial function set the model as a kwarg already
			
 
				+                # as well as other kwargs
			
 
				+                messages = args[1] if len(args) > 1 else args[0]
			
 
				+                kwargs['messages'] = messages
			
 
				+
			
 
				+                # remove the first args, they're sent in kwargs
			
 
				+                args = args[2:]
			
 
				+            elif 'messages' in kwargs:
			
 
				                 messages = kwargs['messages']
			
 
				-            else:
			
 
				-                messages = args[1] if len(args) > 1 else []
			
 
				+
			
 
				+            # ensure we work with a list of messages
			
 
				+            messages = messages if isinstance(messages, list) else [messages]
			
 
				 
			
 
				             # if we have no messages, something went very wrong
			
 
				             if not messages:
			
@@ -169,7 +183,8 @@ class LLM(RetryMixin, DebugMixin):
 
				                         'anthropic-beta': 'prompt-caching-2024-07-31',
			
 
				                     }
			
 
				 
			
 
				-            resp = completion_unwrapped(*args, **kwargs)
			
 
				+            # we don't support streaming here, thus we get a ModelResponse
			
 
				+            resp: ModelResponse = completion_unwrapped(*args, **kwargs)
			
 
				 
			
 
				             # log for evals or other scripts that need the raw completion
			
 
				             if self.config.log_completions:
			
@@ -182,7 +197,7 @@ class LLM(RetryMixin, DebugMixin):
 
				                     }
			
 
				                 )
			
 
				 
			
 
				-            message_back = resp['choices'][0]['message']['content']
			
 
				+            message_back: str = resp['choices'][0]['message']['content']
			
 
				 
			
 
				             # log the LLM response
			
 
				             self.log_response(message_back)
			
@@ -211,22 +226,29 @@ class LLM(RetryMixin, DebugMixin):
 
				         Returns:
			
 
				             bool: True if model is vision capable. If model is not supported by litellm, it will return False.
			
 
				         """
			
 
				-        try:
			
 
				-            return litellm.supports_vision(self.config.model)
			
 
				-        except Exception:
			
 
				-            return False
			
 
				+        # litellm.supports_vision currently returns False for 'openai/gpt-...' or 'anthropic/claude-...' (with prefixes)
			
 
				+        # but model_info will have the correct value for some reason.
			
 
				+        # we can go with it, but we will need to keep an eye if model_info is correct for Vertex or other providers
			
 
				+        # remove when litellm is updated to fix https://github.com/BerriAI/litellm/issues/5608
			
 
				+        return litellm.supports_vision(self.config.model) or (
			
 
				+            self.model_info is not None
			
 
				+            and self.model_info.get('supports_vision', False)
			
 
				+        )
			
 
				 
			
 
				     def is_caching_prompt_active(self) -> bool:
			
 
				-        """Check if prompt caching is enabled and supported for current model.
			
 
				+        """Check if prompt caching is supported and enabled for current model.
			
 
				 
			
 
				         Returns:
			
 
				-            boolean: True if prompt caching is active for the given model.
			
 
				+            boolean: True if prompt caching is supported and enabled for the given model.
			
 
				         """
			
 
				-        return self.config.caching_prompt is True and any(
			
 
				-            model in self.config.model for model in cache_prompting_supported_models
			
 
				+        return (
			
 
				+            self.config.caching_prompt is True
			
 
				+            and self.model_info is not None
			
 
				+            and self.model_info.get('supports_prompt_caching', False)
			
 
				+            and self.config.model in CACHE_PROMPT_SUPPORTED_MODELS
			
 
				         )
			
 
				 
			
 
				-    def _post_completion(self, response) -> None:
			
 
				+    def _post_completion(self, response: ModelResponse) -> None:
			
 
				         """Post-process the completion response.
			
 
				 
			
 
				         Logs the cost and usage stats of the completion call.
			
@@ -244,7 +266,7 @@ class LLM(RetryMixin, DebugMixin):
 
				                 self.metrics.accumulated_cost,
			
 
				             )
			
 
				 
			
 
				-        usage = response.get('usage')
			
 
				+        usage: Usage | None = response.get('usage')
			
 
				 
			
 
				         if usage:
			
 
				             # keep track of the input and output tokens
			
@@ -366,5 +388,12 @@ class LLM(RetryMixin, DebugMixin):
 
				 
			
 
				     def format_messages_for_llm(self, messages: Message | list[Message]) -> list[dict]:
			
 
				         if isinstance(messages, Message):
			
 
				-            return [messages.model_dump()]
			
 
				+            messages = [messages]
			
 
				+
			
 
				+        # set flags to know how to serialize the messages
			
 
				+        for message in messages:
			
 
				+            message.cache_enabled = self.is_caching_prompt_active()
			
 
				+            message.vision_enabled = self.vision_is_active()
			
 
				+
			
 
				+        # let pydantic handle the serialization
			
 
				         return [message.model_dump() for message in messages]
			
--- a/openhands/llm/streaming_llm.py
+++ b/openhands/llm/streaming_llm.py
@@ -1,9 +1,10 @@
 
				 import asyncio
			
 
				 from functools import partial
			
 
				+from typing import Any
			
 
				 
			
 
				-from openhands.core.exceptions import LLMResponseError, UserCancelledError
			
 
				+from openhands.core.exceptions import UserCancelledError
			
 
				 from openhands.core.logger import openhands_logger as logger
			
 
				-from openhands.llm.async_llm import AsyncLLM
			
 
				+from openhands.llm.async_llm import LLM_RETRY_EXCEPTIONS, AsyncLLM
			
 
				 
			
 
				 
			
 
				 class StreamingLLM(AsyncLLM):
			
@@ -31,18 +32,30 @@ class StreamingLLM(AsyncLLM):
 
				 
			
 
				         @self.retry_decorator(
			
 
				             num_retries=self.config.num_retries,
			
 
				-            retry_exceptions=self.retry_exceptions,
			
 
				+            retry_exceptions=LLM_RETRY_EXCEPTIONS,
			
 
				             retry_min_wait=self.config.retry_min_wait,
			
 
				             retry_max_wait=self.config.retry_max_wait,
			
 
				             retry_multiplier=self.config.retry_multiplier,
			
 
				         )
			
 
				         async def async_streaming_completion_wrapper(*args, **kwargs):
			
 
				-            # some callers might just send the messages directly
			
 
				-            if 'messages' in kwargs:
			
 
				+            messages: list[dict[str, Any]] | dict[str, Any] = []
			
 
				+
			
 
				+            # some callers might send the model and messages directly
			
 
				+            # litellm allows positional args, like completion(model, messages, **kwargs)
			
 
				+            # see llm.py for more details
			
 
				+            if len(args) > 1:
			
 
				+                messages = args[1] if len(args) > 1 else args[0]
			
 
				+                kwargs['messages'] = messages
			
 
				+
			
 
				+                # remove the first args, they're sent in kwargs
			
 
				+                args = args[2:]
			
 
				+            elif 'messages' in kwargs:
			
 
				                 messages = kwargs['messages']
			
 
				-            else:
			
 
				-                messages = args[1] if len(args) > 1 else []
			
 
				 
			
 
				+            # ensure we work with a list of messages
			
 
				+            messages = messages if isinstance(messages, list) else [messages]
			
 
				+
			
 
				+            # if we have no messages, something went very wrong
			
 
				             if not messages:
			
 
				                 raise ValueError(
			
 
				                     'The messages list is empty. At least one message is required.'
			
@@ -90,7 +103,4 @@ class StreamingLLM(AsyncLLM):
 
				     @property
			
 
				     def async_streaming_completion(self):
			
 
				         """Decorator for the async litellm acompletion function with streaming."""
			
 
				-        try:
			
 
				-            return self._async_streaming_completion
			
 
				-        except Exception as e:
			
 
				-            raise LLMResponseError(e)
			
 
				+        return self._async_streaming_completion
			
--- a/tests/unit/test_llm.py
+++ b/tests/unit/test_llm.py
@@ -3,10 +3,9 @@ from unittest.mock import MagicMock, patch
 
				 import pytest
			
 
				 from litellm.exceptions import (
			
 
				     APIConnectionError,
			
 
				-    ContentPolicyViolationError,
			
 
				     InternalServerError,
			
 
				-    OpenAIError,
			
 
				     RateLimitError,
			
 
				+    ServiceUnavailableError,
			
 
				 )
			
 
				 
			
 
				 from openhands.core.config import LLMConfig
			
@@ -139,16 +138,15 @@ def test_completion_with_mocked_logger(
 
				             2,
			
 
				         ),
			
 
				         (
			
 
				-            ContentPolicyViolationError,
			
 
				-            {'model': 'test_model', 'llm_provider': 'test_provider'},
			
 
				+            InternalServerError,
			
 
				+            {'llm_provider': 'test_provider', 'model': 'test_model'},
			
 
				             2,
			
 
				         ),
			
 
				         (
			
 
				-            InternalServerError,
			
 
				+            ServiceUnavailableError,
			
 
				             {'llm_provider': 'test_provider', 'model': 'test_model'},
			
 
				             2,
			
 
				         ),
			
 
				-        (OpenAIError, {}, 2),
			
 
				         (RateLimitError, {'llm_provider': 'test_provider', 'model': 'test_model'}, 2),
			
 
				     ],
			
 
				 )
			
@@ -298,3 +296,39 @@ def test_completion_with_litellm_mock(mock_litellm_completion, default_config):
 
				     assert call_args['model'] == default_config.model
			
 
				     assert call_args['messages'] == [{'role': 'user', 'content': 'Hello!'}]
			
 
				     assert not call_args['stream']
			
 
				+
			
 
				+
			
 
				+@patch('openhands.llm.llm.litellm_completion')
			
 
				+def test_completion_with_two_positional_args(mock_litellm_completion, default_config):
			
 
				+    mock_response = {
			
 
				+        'choices': [{'message': {'content': 'Response to positional args.'}}]
			
 
				+    }
			
 
				+    mock_litellm_completion.return_value = mock_response
			
 
				+
			
 
				+    test_llm = LLM(config=default_config)
			
 
				+    response = test_llm.completion(
			
 
				+        'some-model-to-be-ignored',
			
 
				+        [{'role': 'user', 'content': 'Hello from positional args!'}],
			
 
				+        stream=False,
			
 
				+    )
			
 
				+
			
 
				+    # Assertions
			
 
				+    assert (
			
 
				+        response['choices'][0]['message']['content'] == 'Response to positional args.'
			
 
				+    )
			
 
				+    mock_litellm_completion.assert_called_once()
			
 
				+
			
 
				+    # Check if the correct arguments were passed to litellm_completion
			
 
				+    call_args, call_kwargs = mock_litellm_completion.call_args
			
 
				+    assert (
			
 
				+        call_kwargs['model'] == default_config.model
			
 
				+    )  # Should use the model from config, not the first arg
			
 
				+    assert call_kwargs['messages'] == [
			
 
				+        {'role': 'user', 'content': 'Hello from positional args!'}
			
 
				+    ]
			
 
				+    assert not call_kwargs['stream']
			
 
				+
			
 
				+    # Ensure the first positional argument (model) was ignored
			
 
				+    assert (
			
 
				+        len(call_args) == 0
			
 
				+    )  # No positional args should be passed to litellm_completion here
			
--- a/tests/unit/test_message_serialization.py
+++ b/tests/unit/test_message_serialization.py
@@ -1,7 +1,7 @@
 
				 from openhands.core.message import ImageContent, Message, TextContent
			
 
				 
			
 
				 
			
 
				-def test_message_serialization():
			
 
				+def test_message_with_vision_enabled():
			
 
				     text_content1 = TextContent(text='This is a text message')
			
 
				     image_content1 = ImageContent(
			
 
				         image_urls=['http://example.com/image1.png', 'http://example.com/image2.png']
			
@@ -11,11 +11,12 @@ def test_message_serialization():
 
				         image_urls=['http://example.com/image3.png', 'http://example.com/image4.png']
			
 
				     )
			
 
				 
			
 
				-    message = Message(
			
 
				+    message: Message = Message(
			
 
				         role='user',
			
 
				         content=[text_content1, image_content1, text_content2, image_content2],
			
 
				+        vision_enabled=True,
			
 
				     )
			
 
				-    serialized_message = message.serialize_model()
			
 
				+    serialized_message: dict = message.serialize_model()
			
 
				 
			
 
				     expected_serialized_message = {
			
 
				         'role': 'user',
			
@@ -45,12 +46,14 @@ def test_message_serialization():
 
				     assert message.contains_image is True
			
 
				 
			
 
				 
			
 
				-def test_message_with_only_text_content():
			
 
				+def test_message_with_only_text_content_and_vision_enabled():
			
 
				     text_content1 = TextContent(text='This is a text message')
			
 
				     text_content2 = TextContent(text='This is another text message')
			
 
				 
			
 
				-    message = Message(role='user', content=[text_content1, text_content2])
			
 
				-    serialized_message = message.serialize_model()
			
 
				+    message: Message = Message(
			
 
				+        role='user', content=[text_content1, text_content2], vision_enabled=True
			
 
				+    )
			
 
				+    serialized_message: dict = message.serialize_model()
			
 
				 
			
 
				     expected_serialized_message = {
			
 
				         'role': 'user',
			
@@ -62,3 +65,52 @@ def test_message_with_only_text_content():
 
				 
			
 
				     assert serialized_message == expected_serialized_message
			
 
				     assert message.contains_image is False
			
 
				+
			
 
				+
			
 
				+def test_message_with_only_text_content_and_vision_disabled():
			
 
				+    text_content1 = TextContent(text='This is a text message')
			
 
				+    text_content2 = TextContent(text='This is another text message')
			
 
				+
			
 
				+    message: Message = Message(
			
 
				+        role='user', content=[text_content1, text_content2], vision_enabled=False
			
 
				+    )
			
 
				+    serialized_message: dict = message.serialize_model()
			
 
				+
			
 
				+    expected_serialized_message = {
			
 
				+        'role': 'user',
			
 
				+        'content': 'This is a text message\nThis is another text message',
			
 
				+    }
			
 
				+
			
 
				+    assert serialized_message == expected_serialized_message
			
 
				+    assert message.contains_image is False
			
 
				+
			
 
				+
			
 
				+def test_message_with_mixed_content_and_vision_disabled():
			
 
				+    # Create a message with both text and image content
			
 
				+    text_content1 = TextContent(text='This is a text message')
			
 
				+    image_content1 = ImageContent(
			
 
				+        image_urls=['http://example.com/image1.png', 'http://example.com/image2.png']
			
 
				+    )
			
 
				+    text_content2 = TextContent(text='This is another text message')
			
 
				+    image_content2 = ImageContent(
			
 
				+        image_urls=['http://example.com/image3.png', 'http://example.com/image4.png']
			
 
				+    )
			
 
				+
			
 
				+    # Initialize Message with vision disabled
			
 
				+    message: Message = Message(
			
 
				+        role='user',
			
 
				+        content=[text_content1, image_content1, text_content2, image_content2],
			
 
				+        vision_enabled=False,
			
 
				+    )
			
 
				+    serialized_message: dict = message.serialize_model()
			
 
				+
			
 
				+    # Expected serialization ignores images and concatenates text
			
 
				+    expected_serialized_message = {
			
 
				+        'role': 'user',
			
 
				+        'content': 'This is a text message\nThis is another text message',
			
 
				+    }
			
 
				+
			
 
				+    # Assert serialized message matches expectation
			
 
				+    assert serialized_message == expected_serialized_message
			
 
				+    # Assert that images exist in the original message
			
 
				+    assert message.contains_image