Browse Source

Added local ollama models (#2433)

* added local ollama models

* add ollama_base_url config

* Update listen.py

* add docs

* Update opendevin/server/listen.py

Co-authored-by: Graham Neubig <neubig@gmail.com>

* lint

---------

Co-authored-by: Graham Neubig <neubig@gmail.com>
மனோஜ்குமார் பழனிச்சாமி 1 year ago
parent
commit
688bd2a8fc
3 changed files with 19 additions and 1 deletions
  1. 3 0
      docs/modules/usage/llms/localLLMs.md
  2. 2 0
      opendevin/core/config.py
  3. 14 1
      opendevin/server/listen.py

+ 3 - 0
docs/modules/usage/llms/localLLMs.md

@@ -35,8 +35,11 @@ But when running `docker run`, you'll need to add a few more arguments:
 --add-host host.docker.internal:host-gateway \
 -e LLM_API_KEY="ollama" \
 -e LLM_BASE_URL="http://host.docker.internal:11434" \
+-e LLM_OLLAMA_BASE_URL="http://host.docker.internal:11434" \
 ```
 
+LLM_OLLAMA_BASE_URL is optional. If you set it, it will be used to show the available installed models in the UI. 
+
 For example:
 
 ```bash

+ 2 - 0
opendevin/core/config.py

@@ -47,6 +47,7 @@ class LLMConfig(metaclass=Singleton):
         max_output_tokens: The maximum number of output tokens. This is sent to the LLM.
         input_cost_per_token: The cost per input token. This will available in logs for the user to check.
         output_cost_per_token: The cost per output token. This will available in logs for the user to check.
+        ollama_base_url: The base URL for the OLLAMA API.
     """
 
     model: str = 'gpt-4o'
@@ -71,6 +72,7 @@ class LLMConfig(metaclass=Singleton):
     max_output_tokens: int | None = None
     input_cost_per_token: float | None = None
     output_cost_per_token: float | None = None
+    ollama_base_url: str | None = None
 
     def defaults_to_dict(self) -> dict:
         """

+ 14 - 1
opendevin/server/listen.py

@@ -3,6 +3,7 @@ import re
 import uuid
 import warnings
 
+import requests
 from pathspec import PathSpec
 from pathspec.patterns import GitWildMatchPattern
 
@@ -190,7 +191,7 @@ async def attach_session(request: Request, call_next):
 async def websocket_endpoint(websocket: WebSocket):
     """
     WebSocket endpoint for receiving events from the client (i.e., the browser).
-    Once connected, you can send various actions:
+    Once connected, the client can send various actions:
     - Initialize the agent:
     session management, and event streaming.
         ```json
@@ -311,6 +312,18 @@ async def get_litellm_models():
     )
     bedrock_model_list = bedrock.list_foundation_models()
     model_list = litellm_model_list_without_bedrock + bedrock_model_list
+    ollama_base_url = config.llm.ollama_base_url
+    if config.llm.model.startswith('ollama'):
+        if not ollama_base_url:
+            ollama_base_url = config.llm.base_url
+    if ollama_base_url:
+        ollama_url = ollama_base_url.strip('/') + '/api/tags'
+        try:
+            ollama_models_list = requests.get(ollama_url, timeout=3).json()['models']
+            for model in ollama_models_list:
+                model_list.append('ollama/' + model['name'])
+        except requests.exceptions.RequestException as e:
+            logger.error(f'Error getting OLLAMA models: {e}', exc_info=True)
 
     return list(sorted(set(model_list)))