Browse Source

remove openai key assertion, enable alternate embedding models (#231)

* remove openai key assertion

* support different embedding models

* add todo

* add local embeddings

* Make lint happy (#232)

* Include Azure AI embedding model (#239)

* Include Azure AI embedding model

* updated requirements

---------

Co-authored-by: Rohit Rushil <rohit.rushil@honeywell.com>

* Update agenthub/langchains_agent/utils/memory.py

* Update agenthub/langchains_agent/utils/memory.py

* add base url

* add docs

* Update requirements.txt

* default to local embeddings

* Update llm.py

* fix fn

---------

Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
Co-authored-by: RoHitRushil <43521824+RohitX0X@users.noreply.github.com>
Co-authored-by: Rohit Rushil <rohit.rushil@honeywell.com>
Robert Brennan 2 years ago
parent
commit
4304aceff3

+ 12 - 2
README.md

@@ -54,9 +54,19 @@ export LLM_API_KEY="your-api-key"
 export LLM_MODEL="claude-3-opus-20240229"
 export LLM_MODEL="claude-3-opus-20240229"
 ```
 ```
 
 
-### Running on the Command Line
-You can also run OpenDevin from your command line:
+You can also set the base URL for local/custom models:
+```bash
+export LLM_BASE_URL="https://localhost:3000"
+```
+
+And you can customize which embeddings are used for the vector database storage:
+```bash
+export LLM_EMBEDDING_MODEL="llama2" # can be "llama2", "openai", "azureopenai", or "local"
 ```
 ```
+
+### Running on the Command Line
+You can run OpenDevin from your command line:
+```bash
 PYTHONPATH=`pwd` python opendevin/main.py -d ./workspace/ -i 100 -t "Write a bash script that prints 'hello world'"
 PYTHONPATH=`pwd` python opendevin/main.py -d ./workspace/ -i 100 -t "Write a bash script that prints 'hello world'"
 ```
 ```
 
 

+ 0 - 7
agenthub/codeact_agent/__init__.py

@@ -1,4 +1,3 @@
-import os
 import re
 import re
 from typing import List, Mapping
 from typing import List, Mapping
 
 
@@ -19,12 +18,6 @@ from opendevin.observation import (
 
 
 from opendevin.llm.llm import LLM
 from opendevin.llm.llm import LLM
 
 
-assert (
-    "OPENAI_API_KEY" in os.environ
-), "Please set the OPENAI_API_KEY environment variable."
-
-
-
 SYSTEM_MESSAGE = """You are a helpful assistant. You will be provided access (as root) to a bash shell to complete user-provided tasks.
 SYSTEM_MESSAGE = """You are a helpful assistant. You will be provided access (as root) to a bash shell to complete user-provided tasks.
 You will be able to execute commands in the bash shell, interact with the file system, install packages, and receive the output of your commands.
 You will be able to execute commands in the bash shell, interact with the file system, install packages, and receive the output of your commands.
 
 

+ 36 - 3
agenthub/langchains_agent/utils/memory.py

@@ -1,18 +1,51 @@
-from . import json
+import os
 
 
 import chromadb
 import chromadb
-
 from llama_index.core import Document
 from llama_index.core import Document
 from llama_index.core.retrievers import VectorIndexRetriever
 from llama_index.core.retrievers import VectorIndexRetriever
 from llama_index.core import VectorStoreIndex
 from llama_index.core import VectorStoreIndex
 from llama_index.vector_stores.chroma import ChromaVectorStore
 from llama_index.vector_stores.chroma import ChromaVectorStore
 
 
+from . import json
+
+embedding_strategy = os.getenv("LLM_EMBEDDING_MODEL", "local")
+
+# TODO: More embeddings: https://docs.llamaindex.ai/en/stable/examples/embeddings/OpenAI/
+# There's probably a more programmatic way to do this.
+if embedding_strategy == "llama2":
+    from llama_index.embeddings.ollama import OllamaEmbedding
+    embed_model = OllamaEmbedding(
+        model_name="llama2",
+        base_url=os.getenv("LLM_BASE_URL", "http://localhost:8000"),
+        ollama_additional_kwargs={"mirostat": 0},
+    )
+elif embedding_strategy == "openai":
+    from llama_index.embeddings.openai import OpenAIEmbedding
+    embed_model = OpenAIEmbedding(
+        base_url=os.getenv("LLM_BASE_URL"),
+    )
+elif embedding_strategy == "azureopenai":
+    from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding  # Need to instruct to set these env variables in documentation
+    embed_model = AzureOpenAIEmbedding(
+        model="text-embedding-ada-002",
+        deployment_name=os.getenv("LLM_DEPLOYMENT_NAME"),
+        api_key=os.getenv("LLM_API_KEY"),
+        azure_endpoint=os.getenv("LLM_BASE_URL"),
+        api_version=os.getenv("LLM_API_VERSION"),
+    )
+else:
+    from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+    embed_model = HuggingFaceEmbedding(
+        model_name="BAAI/bge-small-en-v1.5"
+    )
+
+
 class LongTermMemory:
 class LongTermMemory:
     def __init__(self):
     def __init__(self):
         db = chromadb.Client()
         db = chromadb.Client()
         self.collection = db.get_or_create_collection(name="memories")
         self.collection = db.get_or_create_collection(name="memories")
         vector_store = ChromaVectorStore(chroma_collection=self.collection)
         vector_store = ChromaVectorStore(chroma_collection=self.collection)
-        self.index = VectorStoreIndex.from_vector_store(vector_store)
+        self.index = VectorStoreIndex.from_vector_store(vector_store, embed_model=embed_model)
         self.thought_idx = 0
         self.thought_idx = 0
 
 
     def add_event(self, event):
     def add_event(self, event):

+ 6 - 3
opendevin/llm/llm.py

@@ -6,16 +6,19 @@ from functools import partial
 
 
 DEFAULT_MODEL = os.getenv("LLM_MODEL", "gpt-4-0125-preview")
 DEFAULT_MODEL = os.getenv("LLM_MODEL", "gpt-4-0125-preview")
 DEFAULT_API_KEY = os.getenv("LLM_API_KEY")
 DEFAULT_API_KEY = os.getenv("LLM_API_KEY")
+DEFAULT_BASE_URL = os.getenv("LLM_BASE_URL")
 PROMPT_DEBUG_DIR = os.getenv("PROMPT_DEBUG_DIR", "")
 PROMPT_DEBUG_DIR = os.getenv("PROMPT_DEBUG_DIR", "")
 
 
 class LLM:
 class LLM:
-    def __init__(self, model=DEFAULT_MODEL, api_key=DEFAULT_API_KEY, debug_dir=PROMPT_DEBUG_DIR):
+    def __init__(self, model=DEFAULT_MODEL, api_key=DEFAULT_API_KEY, base_url=DEFAULT_BASE_URL, debug_dir=PROMPT_DEBUG_DIR):
         self.model = model if model else DEFAULT_MODEL
         self.model = model if model else DEFAULT_MODEL
         self.api_key = api_key if api_key else DEFAULT_API_KEY
         self.api_key = api_key if api_key else DEFAULT_API_KEY
-        self._debug_dir = debug_dir
+        self.base_url = base_url if base_url else DEFAULT_BASE_URL
+        self._debug_dir = debug_dir if debug_dir else PROMPT_DEBUG_DIR
         self._debug_idx = 0
         self._debug_idx = 0
         self._debug_id = uuid.uuid4().hex
         self._debug_id = uuid.uuid4().hex
-        self._completion = partial(litellm_completion, model=self.model, api_key=self.api_key)
+
+        self._completion = partial(litellm_completion, model=self.model, api_key=self.api_key, base_url=self.base_url)
 
 
         if self._debug_dir:
         if self._debug_dir:
             print(f"Logging prompts to {self._debug_dir}/{self._debug_id}")
             print(f"Logging prompts to {self._debug_dir}/{self._debug_id}")

+ 3 - 0
requirements.txt

@@ -16,3 +16,6 @@ langchain-community
 llama-index
 llama-index
 llama-index-vector-stores-chroma
 llama-index-vector-stores-chroma
 chromadb
 chromadb
+llama-index-embeddings-huggingface
+llama-index-embeddings-azure-openai
+llama-index-embeddings-ollama