|
@@ -21,6 +21,7 @@ from litellm.exceptions import (
|
|
|
)
|
|
)
|
|
|
from litellm.types.utils import CostPerToken, ModelResponse, Usage
|
|
from litellm.types.utils import CostPerToken, ModelResponse, Usage
|
|
|
|
|
|
|
|
|
|
+from openhands.core.exceptions import CloudFlareBlockageError
|
|
|
from openhands.core.logger import openhands_logger as logger
|
|
from openhands.core.logger import openhands_logger as logger
|
|
|
from openhands.core.message import Message
|
|
from openhands.core.message import Message
|
|
|
from openhands.core.metrics import Metrics
|
|
from openhands.core.metrics import Metrics
|
|
@@ -187,29 +188,36 @@ class LLM(RetryMixin, DebugMixin):
|
|
|
'anthropic-beta': 'prompt-caching-2024-07-31',
|
|
'anthropic-beta': 'prompt-caching-2024-07-31',
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- # we don't support streaming here, thus we get a ModelResponse
|
|
|
|
|
- resp: ModelResponse = completion_unwrapped(*args, **kwargs)
|
|
|
|
|
-
|
|
|
|
|
- # log for evals or other scripts that need the raw completion
|
|
|
|
|
- if self.config.log_completions:
|
|
|
|
|
- self.llm_completions.append(
|
|
|
|
|
- {
|
|
|
|
|
- 'messages': messages,
|
|
|
|
|
- 'response': resp,
|
|
|
|
|
- 'timestamp': time.time(),
|
|
|
|
|
- 'cost': self._completion_cost(resp),
|
|
|
|
|
- }
|
|
|
|
|
- )
|
|
|
|
|
-
|
|
|
|
|
- message_back: str = resp['choices'][0]['message']['content']
|
|
|
|
|
-
|
|
|
|
|
- # log the LLM response
|
|
|
|
|
- self.log_response(message_back)
|
|
|
|
|
-
|
|
|
|
|
- # post-process the response
|
|
|
|
|
- self._post_completion(resp)
|
|
|
|
|
-
|
|
|
|
|
- return resp
|
|
|
|
|
|
|
+ try:
|
|
|
|
|
+ # we don't support streaming here, thus we get a ModelResponse
|
|
|
|
|
+ resp: ModelResponse = completion_unwrapped(*args, **kwargs)
|
|
|
|
|
+
|
|
|
|
|
+ # log for evals or other scripts that need the raw completion
|
|
|
|
|
+ if self.config.log_completions:
|
|
|
|
|
+ self.llm_completions.append(
|
|
|
|
|
+ {
|
|
|
|
|
+ 'messages': messages,
|
|
|
|
|
+ 'response': resp,
|
|
|
|
|
+ 'timestamp': time.time(),
|
|
|
|
|
+ 'cost': self._completion_cost(resp),
|
|
|
|
|
+ }
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ message_back: str = resp['choices'][0]['message']['content']
|
|
|
|
|
+
|
|
|
|
|
+ # log the LLM response
|
|
|
|
|
+ self.log_response(message_back)
|
|
|
|
|
+
|
|
|
|
|
+ # post-process the response
|
|
|
|
|
+ self._post_completion(resp)
|
|
|
|
|
+
|
|
|
|
|
+ return resp
|
|
|
|
|
+ except APIError as e:
|
|
|
|
|
+ if 'Attention Required! | Cloudflare' in str(e):
|
|
|
|
|
+ raise CloudFlareBlockageError(
|
|
|
|
|
+ 'Request blocked by CloudFlare'
|
|
|
|
|
+ ) from e
|
|
|
|
|
+ raise
|
|
|
|
|
|
|
|
self._completion = wrapper
|
|
self._completion = wrapper
|
|
|
|
|
|