Bladeren bron

Replace environment variables with configuration file (#339)

* Replace environment variables with configuration file

* Add config.toml to .gitignore

* Remove unused os imports

* Update README.md

* Update README.md

* Update README.md

* Fix merge conflict

* Fallback to environment variables

* Use template file for config.toml

* Update config.toml.template

* Update config.toml.template

---------

Co-authored-by: Robert Brennan <accounts@rbren.io>
Jim Su 1 jaar geleden
bovenliggende
commit
b1b96df8a8

+ 3 - 0
.gitignore

@@ -192,3 +192,6 @@ yarn-error.log*
 .envrc
 /workspace
 /debug
+
+# configuration
+config.toml

+ 20 - 13
README.md

@@ -26,51 +26,58 @@ First, make sure Docker is running:
 ```bash
 docker ps # this should exit successfully
 ```
+
 Then pull our latest image [here](https://github.com/opendevin/OpenDevin/pkgs/container/sandbox)
 ```bash
 docker pull ghcr.io/opendevin/sandbox:v0.1
 ```
+
+Then copy `config.toml.template` to `config.toml`. Add an API key to `config.toml`.
+(See below for how to use different models.)
+```toml
+OPENAI_API_KEY="..."
+WORKSPACE_DIR="..."
+```
+
+Next, start the backend.
 We manage python packages and the virtual environment with `pipenv`.
-Make sure python >= 3.10.
+Make sure you have python >= 3.10.
 ```bash
 python -m pip install pipenv
 pipenv install -v
 pipenv shell
 
-export OPENAI_API_KEY="..."
-export WORKSPACE_DIR="/path/to/your/project"
 python -m pip install -r requirements.txt
 uvicorn opendevin.server.listen:app --port 3000
 ```
 
-Then in a second terminal:
+Then, in a second terminal, start the frontend:
 ```bash
 cd frontend
 npm install
 npm start
 ```
-The virtual environment is now activated and you should see `(OpenDevin)` in front of your cmdline prompt.
 
 ### Picking a Model
 We use LiteLLM, so you can run OpenDevin with any foundation model, including OpenAI, Claude, and Gemini.
 LiteLLM has a [full list of providers](https://docs.litellm.ai/docs/providers).
 
-To change the model, set the `LLM_MODEL` and `LLM_API_KEY` environment variables.
+To change the model, set the `LLM_MODEL` and `LLM_API_KEY` in `config.toml`.
 
 For example, to run Claude:
-```bash
-export LLM_API_KEY="your-api-key"
-export LLM_MODEL="claude-3-opus-20240229"
+```toml
+LLM_API_KEY="your-api-key"
+LLM_MODEL="claude-3-opus-20240229"
 ```
 
 You can also set the base URL for local/custom models:
-```bash
-export LLM_BASE_URL="https://localhost:3000"
+```toml
+LLM_BASE_URL="https://localhost:3000"
 ```
 
 And you can customize which embeddings are used for the vector database storage:
-```bash
-export LLM_EMBEDDING_MODEL="llama2" # can be "llama2", "openai", "azureopenai", or "local"
+```toml
+LLM_EMBEDDING_MODEL="llama2" # can be "llama2", "openai", "azureopenai", or "local"
 ```
 
 ### Running the app

+ 8 - 9
agenthub/langchains_agent/utils/memory.py

@@ -1,14 +1,13 @@
-import os
-
 import chromadb
 from llama_index.core import Document
 from llama_index.core.retrievers import VectorIndexRetriever
 from llama_index.core import VectorStoreIndex
 from llama_index.vector_stores.chroma import ChromaVectorStore
 
+from opendevin import config
 from . import json
 
-embedding_strategy = os.getenv("LLM_EMBEDDING_MODEL", "local")
+embedding_strategy = config.get_or_default("LLM_EMBEDDING_MODEL", "local")
 
 # TODO: More embeddings: https://docs.llamaindex.ai/en/stable/examples/embeddings/OpenAI/
 # There's probably a more programmatic way to do this.
@@ -16,22 +15,22 @@ if embedding_strategy == "llama2":
     from llama_index.embeddings.ollama import OllamaEmbedding
     embed_model = OllamaEmbedding(
         model_name="llama2",
-        base_url=os.getenv("LLM_BASE_URL", "http://localhost:8000"),
+        base_url=config.get_or_default("LLM_BASE_URL", "http://localhost:8000"),
         ollama_additional_kwargs={"mirostat": 0},
     )
 elif embedding_strategy == "openai":
     from llama_index.embeddings.openai import OpenAIEmbedding
     embed_model = OpenAIEmbedding(
-        base_url=os.getenv("LLM_BASE_URL"),
+        base_url=config.get_or_error("LLM_BASE_URL"),
     )
 elif embedding_strategy == "azureopenai":
     from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding  # Need to instruct to set these env variables in documentation
     embed_model = AzureOpenAIEmbedding(
         model="text-embedding-ada-002",
-        deployment_name=os.getenv("LLM_DEPLOYMENT_NAME"),
-        api_key=os.getenv("LLM_API_KEY"),
-        azure_endpoint=os.getenv("LLM_BASE_URL"),
-        api_version=os.getenv("LLM_API_VERSION"),
+        deployment_name=config.get_or_error("LLM_DEPLOYMENT_NAME"),
+        api_key=config.get_or_error("LLM_API_KEY"),
+        azure_endpoint=config.get_or_error("LLM_BASE_URL"),
+        api_version=config.get_or_error("LLM_API_VERSION"),
     )
 else:
     from llama_index.embeddings.huggingface import HuggingFaceEmbedding

+ 3 - 3
agenthub/langchains_agent/utils/prompts.py

@@ -1,12 +1,12 @@
-import os
-
 from typing import List, Dict, Type
 
 from langchain_core.pydantic_v1 import BaseModel
 from langchain.prompts import PromptTemplate
 from langchain_core.output_parsers import JsonOutputParser
 
-if os.getenv("DEBUG"):
+from opendevin import config
+
+if config.get_or_default("DEBUG", False):
     from langchain.globals import set_debug
     set_debug(True)
 

+ 4 - 0
config.toml.template

@@ -0,0 +1,4 @@
+# This is a template. Run `cp config.toml.template config.toml` to use it.
+
+OPENAI_API_KEY="<YOUR OPENAI API KEY>"
+WORKSPACE_DIR="./workspace"

+ 3 - 2
evaluation/regression/run_tests.py

@@ -1,12 +1,13 @@
-import os
 import sys
 import pytest
 
+from opendevin import config
+
 if __name__ == '__main__':
     args = ['-v', 'evaluation/regression/cases']
     for arg in sys.argv[1:]:
         if arg.startswith('--OPENAI_API_KEY='):
-            os.environ['OPENAI_API_KEY'] = arg.split('=')[1]
+            config.config['OPENAI_API_KEY'] = arg.split('=')[1]
         elif arg.startswith('--model='):
             args.append(f'-o model={arg.split('=')[1]}')
     pytest.main(args)

+ 36 - 0
opendevin/config.py

@@ -0,0 +1,36 @@
+import os
+import tomllib
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+with open("config.toml", "rb") as f:
+    config = tomllib.load(f)
+
+def _get(key: str, default):
+    value = config.get(key, default)
+    if not value:
+        value = os.environ.get(key, default)
+    return value
+
+def get_or_error(key: str):
+    """
+    Get a key from the config, or raise an error if it doesn't exist.
+    """
+    value = get_or_none(key)
+    if not value:
+        raise KeyError(f"Please set '{key}' in `config.toml` or `.env`.")
+    return value
+
+def get_or_default(key: str, default):
+    """
+    Get a key from the config, or return a default value if it doesn't exist.
+    """
+    return _get(key, default)
+
+def get_or_none(key: str):
+    """
+    Get a key from the config, or return None if it doesn't exist.
+    """
+    return _get(key, None)

+ 6 - 4
opendevin/llm/llm.py

@@ -4,10 +4,12 @@ import uuid
 from litellm import completion as litellm_completion
 from functools import partial
 
-DEFAULT_MODEL = os.getenv("LLM_MODEL", "gpt-4-0125-preview")
-DEFAULT_API_KEY = os.getenv("LLM_API_KEY")
-DEFAULT_BASE_URL = os.getenv("LLM_BASE_URL")
-PROMPT_DEBUG_DIR = os.getenv("PROMPT_DEBUG_DIR", "")
+from opendevin import config
+
+DEFAULT_MODEL = config.get_or_default("LLM_MODEL", "gpt-4-0125-preview")
+DEFAULT_API_KEY = config.get_or_none("LLM_API_KEY")
+DEFAULT_BASE_URL = config.get_or_none("LLM_BASE_URL")
+PROMPT_DEBUG_DIR = config.get_or_default("PROMPT_DEBUG_DIR", "")
 
 class LLM:
     def __init__(self, model=DEFAULT_MODEL, api_key=DEFAULT_API_KEY, base_url=DEFAULT_BASE_URL, debug_dir=PROMPT_DEBUG_DIR):

+ 2 - 2
opendevin/main.py

@@ -1,10 +1,10 @@
-import os
 import asyncio
 import argparse
 
 from typing import Type
 
 import agenthub # noqa F401 (we import this to get the agents registered)
+from opendevin import config
 from opendevin.agent import Agent
 from opendevin.controller import AgentController
 from opendevin.llm.llm import LLM
@@ -35,7 +35,7 @@ if __name__ == "__main__":
     parser.add_argument(
         "-m",
         "--model-name",
-        default=os.getenv("LLM_MODEL") or "gpt-4-0125-preview",
+        default=config.get_or_default("LLM_MODEL", "gpt-4-0125-preview"),
         type=str,
         help="The (litellm) model name to use",
     )

+ 7 - 5
opendevin/sandbox/sandbox.py

@@ -10,19 +10,21 @@ from typing import Dict, List, Tuple
 import docker
 import concurrent.futures
 
+from opendevin import config
+
 InputType = namedtuple("InputType", ["content"])
 OutputType = namedtuple("OutputType", ["content"])
 
-DIRECTORY_REWRITE = os.getenv(
+DIRECTORY_REWRITE = config.get_or_default(
     "DIRECTORY_REWRITE", ""
 )  # helpful for docker-in-docker scenarios
-CONTAINER_IMAGE = os.getenv("SANDBOX_CONTAINER_IMAGE", "ghcr.io/opendevin/sandbox:v0.1")
+CONTAINER_IMAGE = config.get_or_default("SANDBOX_CONTAINER_IMAGE", "ghcr.io/opendevin/sandbox:v0.1")
 # FIXME: On some containers, the devin user doesn't have enough permission, e.g. to install packages
 # How do we make this more flexible?
-RUN_AS_DEVIN = os.getenv("RUN_AS_DEVIN", "true").lower() != "false"
+RUN_AS_DEVIN = config.get_or_default("RUN_AS_DEVIN", "true").lower() != "false"
 USER_ID = 1000
-if os.getenv("SANDBOX_USER_ID") is not None:
-    USER_ID = int(os.getenv("SANDBOX_USER_ID", ""))
+if config.get_or_none("SANDBOX_USER_ID") is not None:
+    USER_ID = int(config.get_or_default("SANDBOX_USER_ID", ""))
 elif hasattr(os, "getuid"):
     USER_ID = os.getuid()
 

+ 3 - 2
opendevin/server/session.py

@@ -4,6 +4,7 @@ from typing import Optional
 
 from fastapi import WebSocketDisconnect
 
+from opendevin import config
 from opendevin.action import (
     Action,
     NullAction,
@@ -14,8 +15,8 @@ from opendevin.controller import AgentController
 from opendevin.llm.llm import LLM
 from opendevin.observation import Observation, UserMessageObservation
 
-DEFAULT_WORKSPACE_DIR = os.getenv("WORKSPACE_DIR", os.path.join(os.getcwd(), "workspace"))
-LLM_MODEL = os.getenv("LLM_MODEL", "gpt-4-0125-preview")
+DEFAULT_WORKSPACE_DIR = config.get_or_default("WORKSPACE_DIR", os.path.join(os.getcwd(), "workspace"))
+LLM_MODEL = config.get_or_default("LLM_MODEL", "gpt-4-0125-preview")
 
 class Session:
     def __init__(self, websocket):