memory.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
  1. import threading
  2. import chromadb
  3. import llama_index.embeddings.openai.base as llama_openai
  4. from llama_index.core import Document, VectorStoreIndex
  5. from llama_index.core.retrievers import VectorIndexRetriever
  6. from llama_index.vector_stores.chroma import ChromaVectorStore
  7. from openai._exceptions import APIConnectionError, InternalServerError, RateLimitError
  8. from tenacity import (
  9. retry,
  10. retry_if_exception_type,
  11. stop_after_attempt,
  12. wait_random_exponential,
  13. )
  14. from opendevin.core.config import config
  15. from opendevin.core.logger import opendevin_logger as logger
  16. from opendevin.core.utils import json
  17. num_retries = config.llm.num_retries
  18. retry_min_wait = config.llm.retry_min_wait
  19. retry_max_wait = config.llm.retry_max_wait
  20. # llama-index includes a retry decorator around openai.get_embeddings() function
  21. # it is initialized with hard-coded values and errors
  22. # this non-customizable behavior is creating issues when it's retrying faster than providers' rate limits
  23. # this block attempts to banish it and replace it with our decorator, to allow users to set their own limits
  24. if hasattr(llama_openai.get_embeddings, '__wrapped__'):
  25. original_get_embeddings = llama_openai.get_embeddings.__wrapped__
  26. else:
  27. logger.warning('Cannot set custom retry limits.')
  28. num_retries = 1
  29. original_get_embeddings = llama_openai.get_embeddings
  30. def attempt_on_error(retry_state):
  31. logger.error(
  32. f'{retry_state.outcome.exception()}. Attempt #{retry_state.attempt_number} | You can customize these settings in the configuration.',
  33. exc_info=False,
  34. )
  35. return True
  36. @retry(
  37. reraise=True,
  38. stop=stop_after_attempt(num_retries),
  39. wait=wait_random_exponential(min=retry_min_wait, max=retry_max_wait),
  40. retry=retry_if_exception_type(
  41. (RateLimitError, APIConnectionError, InternalServerError)
  42. ),
  43. after=attempt_on_error,
  44. )
  45. def wrapper_get_embeddings(*args, **kwargs):
  46. return original_get_embeddings(*args, **kwargs)
  47. llama_openai.get_embeddings = wrapper_get_embeddings
  48. class EmbeddingsLoader:
  49. """Loader for embedding model initialization."""
  50. @staticmethod
  51. def get_embedding_model(strategy: str):
  52. supported_ollama_embed_models = [
  53. 'llama2',
  54. 'mxbai-embed-large',
  55. 'nomic-embed-text',
  56. 'all-minilm',
  57. 'stable-code',
  58. ]
  59. if strategy in supported_ollama_embed_models:
  60. from llama_index.embeddings.ollama import OllamaEmbedding
  61. return OllamaEmbedding(
  62. model_name=strategy,
  63. base_url=config.llm.embedding_base_url,
  64. ollama_additional_kwargs={'mirostat': 0},
  65. )
  66. elif strategy == 'openai':
  67. from llama_index.embeddings.openai import OpenAIEmbedding
  68. return OpenAIEmbedding(
  69. model='text-embedding-ada-002',
  70. api_key=config.llm.api_key,
  71. )
  72. elif strategy == 'azureopenai':
  73. from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
  74. return AzureOpenAIEmbedding(
  75. model='text-embedding-ada-002',
  76. deployment_name=config.llm.embedding_deployment_name,
  77. api_key=config.llm.api_key,
  78. azure_endpoint=config.llm.base_url,
  79. api_version=config.llm.api_version,
  80. )
  81. elif (strategy is not None) and (strategy.lower() == 'none'):
  82. # TODO: this works but is not elegant enough. The incentive is when
  83. # monologue agent is not used, there is no reason we need to initialize an
  84. # embedding model
  85. return None
  86. else:
  87. from llama_index.embeddings.huggingface import HuggingFaceEmbedding
  88. return HuggingFaceEmbedding(model_name='BAAI/bge-small-en-v1.5')
  89. sema = threading.Semaphore(value=config.agent.memory_max_threads)
  90. class LongTermMemory:
  91. """
  92. Handles storing information for the agent to access later, using chromadb.
  93. """
  94. def __init__(self):
  95. """
  96. Initialize the chromadb and set up ChromaVectorStore for later use.
  97. """
  98. db = chromadb.Client(chromadb.Settings(anonymized_telemetry=False))
  99. self.collection = db.get_or_create_collection(name='memories')
  100. vector_store = ChromaVectorStore(chroma_collection=self.collection)
  101. embedding_strategy = config.llm.embedding_model
  102. embed_model = EmbeddingsLoader.get_embedding_model(embedding_strategy)
  103. self.index = VectorStoreIndex.from_vector_store(vector_store, embed_model)
  104. self.thought_idx = 0
  105. self._add_threads = []
  106. def add_event(self, event: dict):
  107. """
  108. Adds a new event to the long term memory with a unique id.
  109. Parameters:
  110. - event (dict): The new event to be added to memory
  111. """
  112. id = ''
  113. t = ''
  114. if 'action' in event:
  115. t = 'action'
  116. id = event['action']
  117. elif 'observation' in event:
  118. t = 'observation'
  119. id = event['observation']
  120. doc = Document(
  121. text=json.dumps(event),
  122. doc_id=str(self.thought_idx),
  123. extra_info={
  124. 'type': t,
  125. 'id': id,
  126. 'idx': self.thought_idx,
  127. },
  128. )
  129. self.thought_idx += 1
  130. logger.debug('Adding %s event to memory: %d', t, self.thought_idx)
  131. thread = threading.Thread(target=self._add_doc, args=(doc,))
  132. self._add_threads.append(thread)
  133. thread.start() # We add the doc concurrently so we don't have to wait ~500ms for the insert
  134. def _add_doc(self, doc):
  135. with sema:
  136. self.index.insert(doc)
  137. def search(self, query: str, k: int = 10):
  138. """
  139. Searches through the current memory using VectorIndexRetriever
  140. Parameters:
  141. - query (str): A query to match search results to
  142. - k (int): Number of top results to return
  143. Returns:
  144. - list[str]: list of top k results found in current memory
  145. """
  146. retriever = VectorIndexRetriever(
  147. index=self.index,
  148. similarity_top_k=k,
  149. )
  150. results = retriever.retrieve(query)
  151. return [r.get_text() for r in results]