1 gadu atpakaļ · 464bf7ee23
--- a/README.md
+++ b/README.md
@@ -177,6 +177,11 @@ can only be as powerful as the models driving it--fortunately folks on our team
 
				 are actively working on building better open source models!
			
 
				 
			
 
				 
			
 
				+**Note on API retries and rate limits:**
			
 
				+Some LLMs have rate limits and may require retries. OpenDevin will automatically retry requests if it receives a 429 error or API connection error.
			
 
				+You can set LLM_NUM_RETRIES, LLM_RETRY_MIN_WAIT, LLM_RETRY_MAX_WAIT environment variables to control the number of retries and the time between retries.
			
 
				+By default, LLM_NUM_RETRIES is 5 and LLM_RETRY_MIN_WAIT, LLM_RETRY_MAX_WAIT are 3 seconds and respectively 60 seconds.
			
 
				+
			
 
				 ## ⭐️ Research Strategy
			
 
				 
			
 
				 Achieving full replication of production-grade applications with LLMs is a complex endeavor. Our strategy involves:
			
--- a/agenthub/monologue_agent/utils/memory.py
+++ b/agenthub/monologue_agent/utils/memory.py
@@ -1,3 +1,4 @@
 
				+import llama_index.embeddings.openai.base as llama_openai
			
 
				 from threading import Thread
			
 
				 
			
 
				 import chromadb
			
@@ -5,11 +6,46 @@ from llama_index.core import Document
 
				 from llama_index.core.retrievers import VectorIndexRetriever
			
 
				 from llama_index.core import VectorStoreIndex
			
 
				 from llama_index.vector_stores.chroma import ChromaVectorStore
			
 
				+from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_random_exponential
			
 
				+from openai._exceptions import APIConnectionError, RateLimitError, InternalServerError
			
 
				 
			
 
				 from opendevin import config
			
 
				 from opendevin.logger import opendevin_logger as logger
			
 
				 from . import json
			
 
				 
			
 
				+num_retries = config.get('LLM_NUM_RETRIES')
			
 
				+retry_min_wait = config.get('LLM_RETRY_MIN_WAIT')
			
 
				+retry_max_wait = config.get('LLM_RETRY_MAX_WAIT')
			
 
				+
			
 
				+# llama-index includes a retry decorator around openai.get_embeddings() function
			
 
				+# it is initialized with hard-coded values and errors
			
 
				+# this non-customizable behavior is creating issues when it's retrying faster than providers' rate limits
			
 
				+# this block attempts to banish it and replace it with our decorator, to allow users to set their own limits
			
 
				+
			
 
				+if hasattr(llama_openai.get_embeddings, '__wrapped__'):
			
 
				+    original_get_embeddings = llama_openai.get_embeddings.__wrapped__
			
 
				+else:
			
 
				+    logger.warning('Cannot set custom retry limits.')  # warn
			
 
				+    num_retries = 1
			
 
				+    original_get_embeddings = llama_openai.get_embeddings
			
 
				+
			
 
				+
			
 
				+def attempt_on_error(retry_state):
			
 
				+    logger.error(f'{retry_state.outcome.exception()}. Attempt #{retry_state.attempt_number} | You can customize these settings in the configuration.', exc_info=False)
			
 
				+    return True
			
 
				+
			
 
				+
			
 
				+@retry(reraise=True,
			
 
				+       stop=stop_after_attempt(num_retries),
			
 
				+       wait=wait_random_exponential(min=retry_min_wait, max=retry_max_wait),
			
 
				+       retry=retry_if_exception_type((RateLimitError, APIConnectionError, InternalServerError)),
			
 
				+       after=attempt_on_error)
			
 
				+def wrapper_get_embeddings(*args, **kwargs):
			
 
				+    return original_get_embeddings(*args, **kwargs)
			
 
				+
			
 
				+
			
 
				+llama_openai.get_embeddings = wrapper_get_embeddings
			
 
				+
			
 
				 embedding_strategy = config.get('LLM_EMBEDDING_MODEL')
			
 
				 
			
 
				 # TODO: More embeddings: https://docs.llamaindex.ai/en/stable/examples/embeddings/OpenAI/
			
--- a/agenthub/monologue_agent/utils/monologue.py
+++ b/agenthub/monologue_agent/utils/monologue.py
@@ -1,9 +1,9 @@
 
				-import traceback
			
 
				 
			
 
				 from opendevin.llm.llm import LLM
			
 
				 from opendevin.exceptions import AgentEventTypeError
			
 
				 import agenthub.monologue_agent.utils.json as json
			
 
				 import agenthub.monologue_agent.utils.prompts as prompts
			
 
				+from opendevin.logger import opendevin_logger as logger
			
 
				 
			
 
				 
			
 
				 class Monologue:
			
@@ -53,7 +53,7 @@ class Monologue:
 
				             try:
			
 
				                 total_length += len(json.dumps(t))
			
 
				             except TypeError as e:
			
 
				-                print(f'Error serializing thought: {e}')
			
 
				+                logger.error('Error serializing thought: %s', str(e), exc_info=False)
			
 
				         return total_length
			
 
				 
			
 
				     def condense(self, llm: LLM):
			
@@ -64,7 +64,7 @@ class Monologue:
 
				         - llm (LLM): llm to be used for summarization
			
 
				 
			
 
				         Raises:
			
 
				-        - RunTimeError: When the condensing process fails for any reason
			
 
				+        - Exception: the same exception as it got from the llm or processing the response
			
 
				         """
			
 
				 
			
 
				         try:
			
@@ -74,5 +74,7 @@ class Monologue:
 
				             summary_resp = resp['choices'][0]['message']['content']
			
 
				             self.thoughts = prompts.parse_summary_response(summary_resp)
			
 
				         except Exception as e:
			
 
				-            traceback.print_exc()
			
 
				-            raise RuntimeError(f'Error condensing thoughts: {e}')
			
 
				+            logger.error('Error condensing thoughts: %s', str(e), exc_info=False)
			
 
				+
			
 
				+            # TODO If the llm fails with ContextWindowExceededError, we can try to condense the monologue chunk by chunk
			
 
				+            raise
			
--- a/opendevin/config.py
+++ b/opendevin/config.py
@@ -5,6 +5,9 @@ import toml
 
				 from dotenv import load_dotenv
			
 
				 
			
 
				 from opendevin.schema import ConfigType
			
 
				+import logging
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				 
			
 
				 load_dotenv()
			
 
				 
			
@@ -21,8 +24,9 @@ DEFAULT_CONFIG: dict = {
 
				     ConfigType.LLM_EMBEDDING_MODEL: 'local',
			
 
				     ConfigType.LLM_EMBEDDING_DEPLOYMENT_NAME: None,
			
 
				     ConfigType.LLM_API_VERSION: None,
			
 
				-    ConfigType.LLM_NUM_RETRIES: 1,
			
 
				-    ConfigType.LLM_COOLDOWN_TIME: 1,
			
 
				+    ConfigType.LLM_NUM_RETRIES: 5,
			
 
				+    ConfigType.LLM_RETRY_MIN_WAIT: 3,
			
 
				+    ConfigType.LLM_RETRY_MAX_WAIT: 60,
			
 
				     ConfigType.MAX_ITERATIONS: 100,
			
 
				     # GPT-4 pricing is $10 per 1M input tokens. Since tokenization happens on LLM side,
			
 
				     # we cannot easily count number of tokens, but we can count characters.
			
@@ -41,6 +45,16 @@ if os.path.exists('config.toml'):
 
				     with open('config.toml', 'rb') as f:
			
 
				         config_str = f.read().decode('utf-8')
			
 
				 
			
 
				+
			
 
				+def int_value(value, default, config_key):
			
 
				+    # FIXME use a library
			
 
				+    try:
			
 
				+        return int(value)
			
 
				+    except ValueError:
			
 
				+        logger.warning(f'Invalid value for {config_key}: {value} not applied. Using default value {default}')
			
 
				+        return default
			
 
				+
			
 
				+
			
 
				 tomlConfig = toml.loads(config_str)
			
 
				 config = DEFAULT_CONFIG.copy()
			
 
				 for k, v in config.items():
			
@@ -48,6 +62,8 @@ for k, v in config.items():
 
				         config[k] = os.environ[k]
			
 
				     elif k in tomlConfig:
			
 
				         config[k] = tomlConfig[k]
			
 
				+    if k in [ConfigType.LLM_NUM_RETRIES, ConfigType.LLM_RETRY_MIN_WAIT, ConfigType.LLM_RETRY_MAX_WAIT]:
			
 
				+        config[k] = int_value(config[k], v, config_key=k)
			
 
				 
			
 
				 
			
 
				 def get_parser():
			
--- a/opendevin/controller/agent_controller.py
+++ b/opendevin/controller/agent_controller.py
@@ -1,10 +1,9 @@
 
				 import asyncio
			
 
				-import time
			
 
				 import traceback
			
 
				 from typing import Callable, List
			
 
				 
			
 
				-from litellm.exceptions import APIConnectionError
			
 
				-from openai import AuthenticationError
			
 
				+from openai import AuthenticationError, APIConnectionError
			
 
				+from litellm import ContextWindowExceededError
			
 
				 
			
 
				 from opendevin import config
			
 
				 from opendevin.action import (
			
@@ -170,26 +169,23 @@ class AgentController:
 
				         observation: Observation = NullObservation('')
			
 
				         try:
			
 
				             action = self.agent.step(self.state)
			
 
				-            logger.info(action, extra={'msg_type': 'ACTION'})
			
 
				             if action is None:
			
 
				                 raise AgentNoActionError()
			
 
				+            logger.info(action, extra={'msg_type': 'ACTION'})
			
 
				         except Exception as e:
			
 
				             observation = AgentErrorObservation(str(e))
			
 
				             logger.error(e)
			
 
				             logger.debug(traceback.format_exc())
			
 
				 
			
 
				-            if isinstance(e, APIConnectionError):
			
 
				-                time.sleep(3)
			
 
				-
			
 
				             # raise specific exceptions that need to be handled outside
			
 
				-            # note: we are using AuthenticationError class from openai rather than
			
 
				-            # litellm because:
			
 
				+            # note: we are using classes from openai rather than litellm because:
			
 
				             # 1) litellm.exceptions.AuthenticationError is a subclass of openai.AuthenticationError
			
 
				-            # 2) embeddings call, initiated by llama-index, has no wrapper for authentication
			
 
				-            #    errors. This means we have to catch individual authentication errors
			
 
				+            # 2) embeddings call, initiated by llama-index, has no wrapper for errors.
			
 
				+            #    This means we have to catch individual authentication errors
			
 
				             #    from different providers, and OpenAI is one of these.
			
 
				-            if isinstance(e, (AuthenticationError, AgentNoActionError)):
			
 
				+            if isinstance(e, (AuthenticationError, ContextWindowExceededError, APIConnectionError)):
			
 
				                 raise
			
 
				+
			
 
				         self.update_state_after_step()
			
 
				 
			
 
				         await self._run_callbacks(action)
			
--- a/opendevin/llm/llm.py
+++ b/opendevin/llm/llm.py
@@ -1,30 +1,55 @@
 
				 
			
 
				 from litellm import completion as litellm_completion
			
 
				-from tenacity import retry, retry_if_exception_type, stop_after_attempt
			
 
				-from litellm.exceptions import APIConnectionError, RateLimitError
			
 
				+from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_random_exponential
			
 
				+from litellm.exceptions import APIConnectionError, RateLimitError, ServiceUnavailableError
			
 
				 from functools import partial
			
 
				 
			
 
				 from opendevin import config
			
 
				-from opendevin.logger import llm_prompt_logger, llm_response_logger, opendevin_logger
			
 
				+from opendevin.logger import llm_prompt_logger, llm_response_logger
			
 
				+from opendevin.logger import opendevin_logger as logger
			
 
				+
			
 
				 
			
 
				 DEFAULT_API_KEY = config.get('LLM_API_KEY')
			
 
				 DEFAULT_BASE_URL = config.get('LLM_BASE_URL')
			
 
				 DEFAULT_MODEL_NAME = config.get('LLM_MODEL')
			
 
				-DEFAULT_LLM_NUM_RETRIES = config.get('LLM_NUM_RETRIES')
			
 
				-DEFAULT_LLM_COOLDOWN_TIME = config.get('LLM_COOLDOWN_TIME')
			
 
				 DEFAULT_API_VERSION = config.get('LLM_API_VERSION')
			
 
				+LLM_NUM_RETRIES = config.get('LLM_NUM_RETRIES')
			
 
				+LLM_RETRY_MIN_WAIT = config.get('LLM_RETRY_MIN_WAIT')
			
 
				+LLM_RETRY_MAX_WAIT = config.get('LLM_RETRY_MAX_WAIT')
			
 
				 
			
 
				 
			
 
				 class LLM:
			
 
				+    """
			
 
				+    The LLM class represents a Language Model instance.
			
 
				+    """
			
 
				+
			
 
				     def __init__(self,
			
 
				                  model=DEFAULT_MODEL_NAME,
			
 
				                  api_key=DEFAULT_API_KEY,
			
 
				                  base_url=DEFAULT_BASE_URL,
			
 
				-                 num_retries=DEFAULT_LLM_NUM_RETRIES,
			
 
				-                 cooldown_time=DEFAULT_LLM_COOLDOWN_TIME,
			
 
				                  api_version=DEFAULT_API_VERSION,
			
 
				+                 num_retries=LLM_NUM_RETRIES,
			
 
				+                 retry_min_wait=LLM_RETRY_MIN_WAIT,
			
 
				+                 retry_max_wait=LLM_RETRY_MAX_WAIT,
			
 
				                  ):
			
 
				-        opendevin_logger.info(f'Initializing LLM with model: {model}')
			
 
				+        """
			
 
				+        Args:
			
 
				+            model (str, optional): The name of the language model. Defaults to LLM_MODEL.
			
 
				+            api_key (str, optional): The API key for accessing the language model. Defaults to LLM_API_KEY.
			
 
				+            base_url (str, optional): The base URL for the language model API. Defaults to LLM_BASE_URL. Not necessary for OpenAI.
			
 
				+            api_version (str, optional): The version of the API to use. Defaults to LLM_API_VERSION. Not necessary for OpenAI.
			
 
				+            num_retries (int, optional): The number of retries for API calls. Defaults to LLM_NUM_RETRIES.
			
 
				+            retry_min_wait (int, optional): The minimum time to wait between retries in seconds. Defaults to LLM_RETRY_MIN_TIME.
			
 
				+            retry_max_wait (int, optional): The maximum time to wait between retries in seconds. Defaults to LLM_RETRY_MAX_TIME.
			
 
				+
			
 
				+        Attributes:
			
 
				+            model_name (str): The name of the language model.
			
 
				+            api_key (str): The API key for accessing the language model.
			
 
				+            base_url (str): The base URL for the language model API.
			
 
				+            api_version (str): The version of the API to use.
			
 
				+            completion (function): A decorator for the litellm completion function.
			
 
				+        """
			
 
				+        logger.info(f'Initializing LLM with model: {model}')
			
 
				         self.model_name = model
			
 
				         self.api_key = api_key
			
 
				         self.base_url = base_url
			
@@ -35,15 +60,13 @@ class LLM:
 
				 
			
 
				         completion_unwrapped = self._completion
			
 
				 
			
 
				-        def my_wait(retry_state):
			
 
				-            seconds = (retry_state.attempt_number) * cooldown_time
			
 
				-            opendevin_logger.warning(f'LLM error: {retry_state.outcome.exception()}')
			
 
				-            opendevin_logger.info(f'Attempt #{retry_state.attempt_number} | Sleeping for {seconds}s')
			
 
				-            return seconds
			
 
				+        def attempt_on_error(retry_state):
			
 
				+            logger.error(f'{retry_state.outcome.exception()}. Attempt #{retry_state.attempt_number} | You can customize these settings in the configuration.', exc_info=False)
			
 
				+            return True
			
 
				 
			
 
				         @retry(reraise=True,
			
 
				                stop=stop_after_attempt(num_retries),
			
 
				-               wait=my_wait, retry=retry_if_exception_type((APIConnectionError, RateLimitError)))
			
 
				+               wait=wait_random_exponential(min=retry_min_wait, max=retry_max_wait), retry=retry_if_exception_type((RateLimitError, APIConnectionError, ServiceUnavailableError)), after=attempt_on_error)
			
 
				         def wrapper(*args, **kwargs):
			
 
				             if 'messages' in kwargs:
			
 
				                 messages = kwargs['messages']
			
--- a/opendevin/schema/config.py
+++ b/opendevin/schema/config.py
@@ -15,7 +15,8 @@ class ConfigType(str, Enum):
 
				     LLM_EMBEDDING_DEPLOYMENT_NAME = 'LLM_EMBEDDING_DEPLOYMENT_NAME'
			
 
				     LLM_API_VERSION = 'LLM_API_VERSION'
			
 
				     LLM_NUM_RETRIES = 'LLM_NUM_RETRIES'
			
 
				-    LLM_COOLDOWN_TIME = 'LLM_COOLDOWN_TIME'
			
 
				+    LLM_RETRY_MIN_WAIT = 'LLM_RETRY_MIN_WAIT'
			
 
				+    LLM_RETRY_MAX_WAIT = 'LLM_RETRY_MAX_WAIT'
			
 
				     MAX_ITERATIONS = 'MAX_ITERATIONS'
			
 
				     MAX_CHARS = 'MAX_CHARS'
			
 
				     AGENT = 'AGENT'