Sfoglia il codice sorgente

Add voyage ai embeddings (#5547)

Engel Nyst 11 mesi fa
parent
commit
ac5190c283
4 ha cambiato i file con 43 aggiunte e 1 eliminazioni
  1. 1 0
      openhands/memory/memory.py
  2. 6 0
      openhands/utils/embeddings.py
  3. 34 1
      poetry.lock
  4. 2 0
      pyproject.toml

+ 1 - 0
openhands/memory/memory.py

@@ -51,6 +51,7 @@ class LongTermMemory:
         self.embed_model = EmbeddingsLoader.get_embedding_model(
             embedding_strategy, llm_config
         )
+        logger.debug(f'Using embedding model: {self.embed_model}')
 
         # instantiate the index
         self.index = VectorStoreIndex.from_vector_store(vector_store, self.embed_model)

+ 6 - 0
openhands/utils/embeddings.py

@@ -102,6 +102,12 @@ class EmbeddingsLoader:
                 azure_endpoint=llm_config.base_url,
                 api_version=llm_config.api_version,
             )
+        elif strategy == 'voyage':
+            from llama_index.embeddings.voyageai import VoyageEmbedding
+
+            return VoyageEmbedding(
+                model_name='voyage-code-3',
+            )
         elif (strategy is not None) and (strategy.lower() == 'none'):
             # TODO: this works but is not elegant enough. The incentive is when
             # an agent using embeddings is not used, there is no reason we need to

+ 34 - 1
poetry.lock

@@ -3936,6 +3936,21 @@ files = [
 llama-index-core = ">=0.12.0,<0.13.0"
 openai = ">=1.1.0"
 
+[[package]]
+name = "llama-index-embeddings-voyageai"
+version = "0.3.1"
+description = "llama-index embeddings voyageai integration"
+optional = false
+python-versions = "<4.0,>=3.9"
+files = [
+    {file = "llama_index_embeddings_voyageai-0.3.1-py3-none-any.whl", hash = "sha256:f0e0b327ab21669a2b0501f207a6862f7a0b0a115bff15b6ceac712273a6fa03"},
+    {file = "llama_index_embeddings_voyageai-0.3.1.tar.gz", hash = "sha256:cfbc0a0697bda39c18398418628596c6ae8c668a0306d504a4fc16100fcd7d57"},
+]
+
+[package.dependencies]
+llama-index-core = ">=0.12.0,<0.13.0"
+voyageai = {version = ">=0.2.1,<0.4.0", markers = "python_version >= \"3.9\" and python_version < \"3.13\""}
+
 [[package]]
 name = "llama-index-indices-managed-llama-cloud"
 version = "0.6.2"
@@ -9360,6 +9375,24 @@ platformdirs = ">=3.9.1,<5"
 docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"]
 test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"]
 
+[[package]]
+name = "voyageai"
+version = "0.2.4"
+description = ""
+optional = false
+python-versions = "<4.0.0,>=3.7.1"
+files = [
+    {file = "voyageai-0.2.4-py3-none-any.whl", hash = "sha256:e3070e5c78dec89adae43231334b4637aa88933dad99b1c33d3219fdfc94dfa4"},
+    {file = "voyageai-0.2.4.tar.gz", hash = "sha256:b9911d8629e8a4e363291c133482fead49a3536afdf1e735f3ab3aaccd8d250d"},
+]
+
+[package.dependencies]
+aiohttp = ">=3.5,<4.0"
+aiolimiter = ">=1.1.0,<2.0.0"
+numpy = ">=1.11"
+requests = ">=2.20,<3.0"
+tenacity = ">=8.0.1"
+
 [[package]]
 name = "watchdog"
 version = "6.0.0"
@@ -10061,4 +10094,4 @@ testing = ["coverage[toml]", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.12"
-content-hash = "039581f859df4446dc9491bf39913a54f53c5d71e9bad86ff71ddd1d1682f9af"
+content-hash = "7334dd947fe93756227b5fc8f86303852c5e9aaf8787cc35b0ce23fc1540df67"

+ 2 - 0
pyproject.toml

@@ -76,6 +76,8 @@ llama-index-embeddings-huggingface = "*"
 torch = "2.5.1"
 llama-index-embeddings-azure-openai = "*"
 llama-index-embeddings-ollama = "*"
+voyageai = "*"
+llama-index-embeddings-voyageai = "*"
 
 [tool.poetry.group.dev.dependencies]
 ruff = "0.8.0"