Bläddra i källkod

remove openai key assertion, enable alternate embedding models (#231)

* remove openai key assertion

* support different embedding models

* add todo

* add local embeddings

* Make lint happy (#232)

* Include Azure AI embedding model (#239)

* Include Azure AI embedding model

* updated requirements

---------

Co-authored-by: Rohit Rushil <rohit.rushil@honeywell.com>

* Update agenthub/langchains_agent/utils/memory.py

* Update agenthub/langchains_agent/utils/memory.py

* add base url

* add docs

* Update requirements.txt

* default to local embeddings

* Update llm.py

* fix fn

---------

Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
Co-authored-by: RoHitRushil <43521824+RohitX0X@users.noreply.github.com>
Co-authored-by: Rohit Rushil <rohit.rushil@honeywell.com>
Robert Brennan 1 år sedan
förälder
incheckning
4304aceff3
5 ändrade filer med 57 tillägg och 15 borttagningar
  1. 12 2
      README.md
  2. 0 7
      agenthub/codeact_agent/__init__.py
  3. 36 3
      agenthub/langchains_agent/utils/memory.py
  4. 6 3
      opendevin/llm/llm.py
  5. 3 0
      requirements.txt

+ 12 - 2
README.md

@@ -54,9 +54,19 @@ export LLM_API_KEY="your-api-key"
 export LLM_MODEL="claude-3-opus-20240229"
 ```
 
-### Running on the Command Line
-You can also run OpenDevin from your command line:
+You can also set the base URL for local/custom models:
+```bash
+export LLM_BASE_URL="https://localhost:3000"
+```
+
+And you can customize which embeddings are used for the vector database storage:
+```bash
+export LLM_EMBEDDING_MODEL="llama2" # can be "llama2", "openai", "azureopenai", or "local"
 ```
+
+### Running on the Command Line
+You can run OpenDevin from your command line:
+```bash
 PYTHONPATH=`pwd` python opendevin/main.py -d ./workspace/ -i 100 -t "Write a bash script that prints 'hello world'"
 ```
 

+ 0 - 7
agenthub/codeact_agent/__init__.py

@@ -1,4 +1,3 @@
-import os
 import re
 from typing import List, Mapping
 
@@ -19,12 +18,6 @@ from opendevin.observation import (
 
 from opendevin.llm.llm import LLM
 
-assert (
-    "OPENAI_API_KEY" in os.environ
-), "Please set the OPENAI_API_KEY environment variable."
-
-
-
 SYSTEM_MESSAGE = """You are a helpful assistant. You will be provided access (as root) to a bash shell to complete user-provided tasks.
 You will be able to execute commands in the bash shell, interact with the file system, install packages, and receive the output of your commands.
 

+ 36 - 3
agenthub/langchains_agent/utils/memory.py

@@ -1,18 +1,51 @@
-from . import json
+import os
 
 import chromadb
-
 from llama_index.core import Document
 from llama_index.core.retrievers import VectorIndexRetriever
 from llama_index.core import VectorStoreIndex
 from llama_index.vector_stores.chroma import ChromaVectorStore
 
+from . import json
+
+embedding_strategy = os.getenv("LLM_EMBEDDING_MODEL", "local")
+
+# TODO: More embeddings: https://docs.llamaindex.ai/en/stable/examples/embeddings/OpenAI/
+# There's probably a more programmatic way to do this.
+if embedding_strategy == "llama2":
+    from llama_index.embeddings.ollama import OllamaEmbedding
+    embed_model = OllamaEmbedding(
+        model_name="llama2",
+        base_url=os.getenv("LLM_BASE_URL", "http://localhost:8000"),
+        ollama_additional_kwargs={"mirostat": 0},
+    )
+elif embedding_strategy == "openai":
+    from llama_index.embeddings.openai import OpenAIEmbedding
+    embed_model = OpenAIEmbedding(
+        base_url=os.getenv("LLM_BASE_URL"),
+    )
+elif embedding_strategy == "azureopenai":
+    from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding  # Need to instruct to set these env variables in documentation
+    embed_model = AzureOpenAIEmbedding(
+        model="text-embedding-ada-002",
+        deployment_name=os.getenv("LLM_DEPLOYMENT_NAME"),
+        api_key=os.getenv("LLM_API_KEY"),
+        azure_endpoint=os.getenv("LLM_BASE_URL"),
+        api_version=os.getenv("LLM_API_VERSION"),
+    )
+else:
+    from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+    embed_model = HuggingFaceEmbedding(
+        model_name="BAAI/bge-small-en-v1.5"
+    )
+
+
 class LongTermMemory:
     def __init__(self):
         db = chromadb.Client()
         self.collection = db.get_or_create_collection(name="memories")
         vector_store = ChromaVectorStore(chroma_collection=self.collection)
-        self.index = VectorStoreIndex.from_vector_store(vector_store)
+        self.index = VectorStoreIndex.from_vector_store(vector_store, embed_model=embed_model)
         self.thought_idx = 0
 
     def add_event(self, event):

+ 6 - 3
opendevin/llm/llm.py

@@ -6,16 +6,19 @@ from functools import partial
 
 DEFAULT_MODEL = os.getenv("LLM_MODEL", "gpt-4-0125-preview")
 DEFAULT_API_KEY = os.getenv("LLM_API_KEY")
+DEFAULT_BASE_URL = os.getenv("LLM_BASE_URL")
 PROMPT_DEBUG_DIR = os.getenv("PROMPT_DEBUG_DIR", "")
 
 class LLM:
-    def __init__(self, model=DEFAULT_MODEL, api_key=DEFAULT_API_KEY, debug_dir=PROMPT_DEBUG_DIR):
+    def __init__(self, model=DEFAULT_MODEL, api_key=DEFAULT_API_KEY, base_url=DEFAULT_BASE_URL, debug_dir=PROMPT_DEBUG_DIR):
         self.model = model if model else DEFAULT_MODEL
         self.api_key = api_key if api_key else DEFAULT_API_KEY
-        self._debug_dir = debug_dir
+        self.base_url = base_url if base_url else DEFAULT_BASE_URL
+        self._debug_dir = debug_dir if debug_dir else PROMPT_DEBUG_DIR
         self._debug_idx = 0
         self._debug_id = uuid.uuid4().hex
-        self._completion = partial(litellm_completion, model=self.model, api_key=self.api_key)
+
+        self._completion = partial(litellm_completion, model=self.model, api_key=self.api_key, base_url=self.base_url)
 
         if self._debug_dir:
             print(f"Logging prompts to {self._debug_dir}/{self._debug_id}")

+ 3 - 0
requirements.txt

@@ -16,3 +16,6 @@ langchain-community
 llama-index
 llama-index-vector-stores-chroma
 chromadb
+llama-index-embeddings-huggingface
+llama-index-embeddings-azure-openai
+llama-index-embeddings-ollama