|
|
@@ -1,6 +1,9 @@
|
|
|
+import copy
|
|
|
import warnings
|
|
|
from functools import partial
|
|
|
|
|
|
+from opendevin.core.config import LLMConfig
|
|
|
+
|
|
|
with warnings.catch_warnings():
|
|
|
warnings.simplefilter('ignore')
|
|
|
import litellm
|
|
|
@@ -21,7 +24,6 @@ from tenacity import (
|
|
|
wait_random_exponential,
|
|
|
)
|
|
|
|
|
|
-from opendevin.core.config import config
|
|
|
from opendevin.core.logger import llm_prompt_logger, llm_response_logger
|
|
|
from opendevin.core.logger import opendevin_logger as logger
|
|
|
from opendevin.core.metrics import Metrics
|
|
|
@@ -35,155 +37,71 @@ class LLM:
|
|
|
"""The LLM class represents a Language Model instance.
|
|
|
|
|
|
Attributes:
|
|
|
- model_name (str): The name of the language model.
|
|
|
- api_key (str): The API key for accessing the language model.
|
|
|
- base_url (str): The base URL for the language model API.
|
|
|
- api_version (str): The version of the API to use.
|
|
|
- max_input_tokens (int): The maximum number of tokens to send to the LLM per task.
|
|
|
- max_output_tokens (int): The maximum number of tokens to receive from the LLM per task.
|
|
|
- llm_timeout (int): The maximum time to wait for a response in seconds.
|
|
|
- custom_llm_provider (str): A custom LLM provider.
|
|
|
+ config: an LLMConfig object specifying the configuration of the LLM.
|
|
|
"""
|
|
|
|
|
|
def __init__(
|
|
|
self,
|
|
|
- model=None,
|
|
|
- api_key=None,
|
|
|
- base_url=None,
|
|
|
- api_version=None,
|
|
|
- num_retries=None,
|
|
|
- retry_min_wait=None,
|
|
|
- retry_max_wait=None,
|
|
|
- llm_timeout=None,
|
|
|
- llm_temperature=None,
|
|
|
- llm_top_p=None,
|
|
|
- custom_llm_provider=None,
|
|
|
- max_input_tokens=None,
|
|
|
- max_output_tokens=None,
|
|
|
- llm_config=None,
|
|
|
- metrics=None,
|
|
|
- cost_metric_supported=True,
|
|
|
- input_cost_per_token=None,
|
|
|
- output_cost_per_token=None,
|
|
|
+ config: LLMConfig,
|
|
|
+ metrics: Metrics | None = None,
|
|
|
):
|
|
|
"""Initializes the LLM. If LLMConfig is passed, its values will be the fallback.
|
|
|
|
|
|
Passing simple parameters always overrides config.
|
|
|
|
|
|
Args:
|
|
|
- model (str, optional): The name of the language model. Defaults to LLM_MODEL.
|
|
|
- api_key (str, optional): The API key for accessing the language model. Defaults to LLM_API_KEY.
|
|
|
- base_url (str, optional): The base URL for the language model API. Defaults to LLM_BASE_URL. Not necessary for OpenAI.
|
|
|
- api_version (str, optional): The version of the API to use. Defaults to LLM_API_VERSION. Not necessary for OpenAI.
|
|
|
- num_retries (int, optional): The number of retries for API calls. Defaults to LLM_NUM_RETRIES.
|
|
|
- retry_min_wait (int, optional): The minimum time to wait between retries in seconds. Defaults to LLM_RETRY_MIN_TIME.
|
|
|
- retry_max_wait (int, optional): The maximum time to wait between retries in seconds. Defaults to LLM_RETRY_MAX_TIME.
|
|
|
- max_input_tokens (int, optional): The maximum number of tokens to send to the LLM per task. Defaults to LLM_MAX_INPUT_TOKENS.
|
|
|
- max_output_tokens (int, optional): The maximum number of tokens to receive from the LLM per task. Defaults to LLM_MAX_OUTPUT_TOKENS.
|
|
|
- custom_llm_provider (str, optional): A custom LLM provider. Defaults to LLM_CUSTOM_LLM_PROVIDER.
|
|
|
- llm_timeout (int, optional): The maximum time to wait for a response in seconds. Defaults to LLM_TIMEOUT.
|
|
|
- llm_temperature (float, optional): The temperature for LLM sampling. Defaults to LLM_TEMPERATURE.
|
|
|
- metrics (Metrics, optional): The metrics object to use. Defaults to None.
|
|
|
- cost_metric_supported (bool, optional): Whether the cost metric is supported. Defaults to True.
|
|
|
- input_cost_per_token (float, optional): The cost per input token.
|
|
|
- output_cost_per_token (float, optional): The cost per output token.
|
|
|
+ config: The LLM configuration
|
|
|
"""
|
|
|
- if llm_config is None:
|
|
|
- llm_config = config.get_llm_config()
|
|
|
- model = model if model is not None else llm_config.model
|
|
|
- api_key = api_key if api_key is not None else llm_config.api_key
|
|
|
- base_url = base_url if base_url is not None else llm_config.base_url
|
|
|
- api_version = api_version if api_version is not None else llm_config.api_version
|
|
|
- num_retries = num_retries if num_retries is not None else llm_config.num_retries
|
|
|
- retry_min_wait = (
|
|
|
- retry_min_wait if retry_min_wait is not None else llm_config.retry_min_wait
|
|
|
- )
|
|
|
- retry_max_wait = (
|
|
|
- retry_max_wait if retry_max_wait is not None else llm_config.retry_max_wait
|
|
|
- )
|
|
|
- llm_timeout = llm_timeout if llm_timeout is not None else llm_config.timeout
|
|
|
- llm_temperature = (
|
|
|
- llm_temperature if llm_temperature is not None else llm_config.temperature
|
|
|
- )
|
|
|
- llm_top_p = llm_top_p if llm_top_p is not None else llm_config.top_p
|
|
|
- custom_llm_provider = (
|
|
|
- custom_llm_provider
|
|
|
- if custom_llm_provider is not None
|
|
|
- else llm_config.custom_llm_provider
|
|
|
- )
|
|
|
- max_input_tokens = (
|
|
|
- max_input_tokens
|
|
|
- if max_input_tokens is not None
|
|
|
- else llm_config.max_input_tokens
|
|
|
- )
|
|
|
- max_output_tokens = (
|
|
|
- max_output_tokens
|
|
|
- if max_output_tokens is not None
|
|
|
- else llm_config.max_output_tokens
|
|
|
- )
|
|
|
- input_cost_per_token = (
|
|
|
- input_cost_per_token
|
|
|
- if input_cost_per_token is not None
|
|
|
- else llm_config.input_cost_per_token
|
|
|
- )
|
|
|
- output_cost_per_token = (
|
|
|
- output_cost_per_token
|
|
|
- if output_cost_per_token is not None
|
|
|
- else llm_config.output_cost_per_token
|
|
|
- )
|
|
|
- metrics = metrics if metrics is not None else Metrics()
|
|
|
-
|
|
|
- logger.info(f'Initializing LLM with model: {model}')
|
|
|
- self.model_name = model
|
|
|
- self.api_key = api_key
|
|
|
- self.base_url = base_url
|
|
|
- self.api_version = api_version
|
|
|
- self.max_input_tokens = max_input_tokens
|
|
|
- self.max_output_tokens = max_output_tokens
|
|
|
- self.input_cost_per_token = input_cost_per_token
|
|
|
- self.output_cost_per_token = output_cost_per_token
|
|
|
- self.llm_timeout = llm_timeout
|
|
|
- self.custom_llm_provider = custom_llm_provider
|
|
|
- self.metrics = metrics
|
|
|
- self.cost_metric_supported = cost_metric_supported
|
|
|
+
|
|
|
+ self.config = copy.deepcopy(config)
|
|
|
+ self.metrics = metrics if metrics is not None else Metrics()
|
|
|
+ self.cost_metric_supported = True
|
|
|
|
|
|
# litellm actually uses base Exception here for unknown model
|
|
|
self.model_info = None
|
|
|
try:
|
|
|
- if not self.model_name.startswith('openrouter'):
|
|
|
- self.model_info = litellm.get_model_info(self.model_name.split(':')[0])
|
|
|
+ if not config.model.startswith('openrouter'):
|
|
|
+ self.model_info = litellm.get_model_info(config.model.split(':')[0])
|
|
|
else:
|
|
|
- self.model_info = litellm.get_model_info(self.model_name)
|
|
|
+ self.model_info = litellm.get_model_info(config.model)
|
|
|
# noinspection PyBroadException
|
|
|
except Exception:
|
|
|
- logger.warning(f'Could not get model info for {self.model_name}')
|
|
|
-
|
|
|
- if self.max_input_tokens is None:
|
|
|
- if self.model_info is not None and 'max_input_tokens' in self.model_info:
|
|
|
- self.max_input_tokens = self.model_info['max_input_tokens']
|
|
|
+ logger.warning(f'Could not get model info for {config.model}')
|
|
|
+
|
|
|
+ # Set the max tokens in an LM-specific way if not set
|
|
|
+ if config.max_input_tokens is None:
|
|
|
+ if (
|
|
|
+ self.model_info is not None
|
|
|
+ and 'max_input_tokens' in self.model_info
|
|
|
+ and isinstance(self.model_info['max_input_tokens'], int)
|
|
|
+ ):
|
|
|
+ self.config.max_input_tokens = self.model_info['max_input_tokens']
|
|
|
else:
|
|
|
# Max input tokens for gpt3.5, so this is a safe fallback for any potentially viable model
|
|
|
- self.max_input_tokens = 4096
|
|
|
-
|
|
|
- if self.max_output_tokens is None:
|
|
|
- if self.model_info is not None and 'max_output_tokens' in self.model_info:
|
|
|
- self.max_output_tokens = self.model_info['max_output_tokens']
|
|
|
+ self.config.max_input_tokens = 4096
|
|
|
+
|
|
|
+ if config.max_output_tokens is None:
|
|
|
+ if (
|
|
|
+ self.model_info is not None
|
|
|
+ and 'max_output_tokens' in self.model_info
|
|
|
+ and isinstance(self.model_info['max_output_tokens'], int)
|
|
|
+ ):
|
|
|
+ self.config.max_output_tokens = self.model_info['max_output_tokens']
|
|
|
else:
|
|
|
- # Enough tokens for most output actions, and not too many for a bad llm to get carried away responding
|
|
|
- # with thousands of unwanted tokens
|
|
|
- self.max_output_tokens = 1024
|
|
|
+ # Max output tokens for gpt3.5, so this is a safe fallback for any potentially viable model
|
|
|
+ self.config.max_output_tokens = 1024
|
|
|
|
|
|
self._completion = partial(
|
|
|
litellm_completion,
|
|
|
- model=self.model_name,
|
|
|
- api_key=self.api_key,
|
|
|
- base_url=self.base_url,
|
|
|
- api_version=self.api_version,
|
|
|
- custom_llm_provider=custom_llm_provider,
|
|
|
- max_tokens=self.max_output_tokens,
|
|
|
- timeout=self.llm_timeout,
|
|
|
- temperature=llm_temperature,
|
|
|
- top_p=llm_top_p,
|
|
|
+ model=self.config.model,
|
|
|
+ api_key=self.config.api_key,
|
|
|
+ base_url=self.config.base_url,
|
|
|
+ api_version=self.config.api_version,
|
|
|
+ custom_llm_provider=self.config.custom_llm_provider,
|
|
|
+ max_tokens=self.config.max_output_tokens,
|
|
|
+ timeout=self.config.timeout,
|
|
|
+ temperature=self.config.temperature,
|
|
|
+ top_p=self.config.top_p,
|
|
|
)
|
|
|
|
|
|
completion_unwrapped = self._completion
|
|
|
@@ -197,8 +115,10 @@ class LLM:
|
|
|
|
|
|
@retry(
|
|
|
reraise=True,
|
|
|
- stop=stop_after_attempt(num_retries),
|
|
|
- wait=wait_random_exponential(min=retry_min_wait, max=retry_max_wait),
|
|
|
+ stop=stop_after_attempt(config.num_retries),
|
|
|
+ wait=wait_random_exponential(
|
|
|
+ min=config.retry_min_wait, max=config.retry_max_wait
|
|
|
+ ),
|
|
|
retry=retry_if_exception_type(
|
|
|
(
|
|
|
RateLimitError,
|
|
|
@@ -267,7 +187,7 @@ class LLM:
|
|
|
Returns:
|
|
|
int: The number of tokens.
|
|
|
"""
|
|
|
- return litellm.token_counter(model=self.model_name, messages=messages)
|
|
|
+ return litellm.token_counter(model=self.config.model, messages=messages)
|
|
|
|
|
|
def is_local(self):
|
|
|
"""Determines if the system is using a locally running LLM.
|
|
|
@@ -275,12 +195,12 @@ class LLM:
|
|
|
Returns:
|
|
|
boolean: True if executing a local model.
|
|
|
"""
|
|
|
- if self.base_url is not None:
|
|
|
+ if self.config.base_url is not None:
|
|
|
for substring in ['localhost', '127.0.0.1' '0.0.0.0']:
|
|
|
- if substring in self.base_url:
|
|
|
+ if substring in self.config.base_url:
|
|
|
return True
|
|
|
- elif self.model_name is not None:
|
|
|
- if self.model_name.startswith('ollama'):
|
|
|
+ elif self.config.model is not None:
|
|
|
+ if self.config.model.startswith('ollama'):
|
|
|
return True
|
|
|
return False
|
|
|
|
|
|
@@ -299,12 +219,12 @@ class LLM:
|
|
|
|
|
|
extra_kwargs = {}
|
|
|
if (
|
|
|
- self.input_cost_per_token is not None
|
|
|
- and self.output_cost_per_token is not None
|
|
|
+ self.config.input_cost_per_token is not None
|
|
|
+ and self.config.output_cost_per_token is not None
|
|
|
):
|
|
|
cost_per_token = CostPerToken(
|
|
|
- input_cost_per_token=self.input_cost_per_token,
|
|
|
- output_cost_per_token=self.output_cost_per_token,
|
|
|
+ input_cost_per_token=self.config.input_cost_per_token,
|
|
|
+ output_cost_per_token=self.config.output_cost_per_token,
|
|
|
)
|
|
|
logger.info(f'Using custom cost per token: {cost_per_token}')
|
|
|
extra_kwargs['custom_cost_per_token'] = cost_per_token
|
|
|
@@ -322,11 +242,11 @@ class LLM:
|
|
|
return 0.0
|
|
|
|
|
|
def __str__(self):
|
|
|
- if self.api_version:
|
|
|
- return f'LLM(model={self.model_name}, api_version={self.api_version}, base_url={self.base_url})'
|
|
|
- elif self.base_url:
|
|
|
- return f'LLM(model={self.model_name}, base_url={self.base_url})'
|
|
|
- return f'LLM(model={self.model_name})'
|
|
|
+ if self.config.api_version:
|
|
|
+ return f'LLM(model={self.config.model}, api_version={self.config.api_version}, base_url={self.config.base_url})'
|
|
|
+ elif self.config.base_url:
|
|
|
+ return f'LLM(model={self.config.model}, base_url={self.config.base_url})'
|
|
|
+ return f'LLM(model={self.config.model})'
|
|
|
|
|
|
def __repr__(self):
|
|
|
return str(self)
|