1 year ago · 8c32ef2234
--- a/openhands/llm/async_llm.py
+++ b/openhands/llm/async_llm.py
@@ -2,7 +2,7 @@ import asyncio
 
				 from functools import partial
			
 
				 from typing import Any
			
 
				 
			
 
				-from litellm import completion as litellm_acompletion
			
 
				+from litellm import acompletion as litellm_acompletion
			
 
				 
			
 
				 from openhands.core.exceptions import UserCancelledError
			
 
				 from openhands.core.logger import openhands_logger as logger
			
@@ -40,7 +40,7 @@ class AsyncLLM(LLM):
 
				             retry_multiplier=self.config.retry_multiplier,
			
 
				         )
			
 
				         async def async_completion_wrapper(*args, **kwargs):
			
 
				-            """Wrapper for the litellm acompletion function."""
			
 
				+            """Wrapper for the litellm acompletion function that adds logging and cost tracking."""
			
 
				             messages: list[dict[str, Any]] | dict[str, Any] = []
			
 
				 
			
 
				             # some callers might send the model and messages directly
			
@@ -84,6 +84,8 @@ class AsyncLLM(LLM):
 
				 
			
 
				                 message_back = resp['choices'][0]['message']['content']
			
 
				                 self.log_response(message_back)
			
 
				+
			
 
				+                # log costs and tokens used
			
 
				                 self._post_completion(resp)
			
 
				 
			
 
				                 # We do not support streaming in this method, thus return resp
			
--- a/openhands/llm/llm.py
+++ b/openhands/llm/llm.py
@@ -32,6 +32,8 @@ __all__ = ['LLM']
 
				 # tuple of exceptions to retry on
			
 
				 LLM_RETRY_EXCEPTIONS: tuple[type[Exception], ...] = (
			
 
				     APIConnectionError,
			
 
				+    # FIXME: APIError is useful on 502 from a proxy for example,
			
 
				+    # but it also retries on other errors that are permanent
			
 
				     APIError,
			
 
				     InternalServerError,
			
 
				     RateLimitError,