embeddings.py 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176
  1. import importlib.util
  2. import os
  3. from joblib import Parallel, delayed
  4. from openhands.core.config import LLMConfig
  5. try:
  6. # check if those we need later are available using importlib
  7. if importlib.util.find_spec('chromadb') is None:
  8. raise ImportError(
  9. 'chromadb is not available. Please install it using poetry install --with llama-index'
  10. )
  11. if (
  12. importlib.util.find_spec(
  13. 'llama_index.core.indices.vector_store.retrievers.retriever'
  14. )
  15. is None
  16. or importlib.util.find_spec('llama_index.core.indices.vector_store.base')
  17. is None
  18. ):
  19. raise ImportError(
  20. 'llama_index is not available. Please install it using poetry install --with llama-index'
  21. )
  22. from llama_index.core import Document, VectorStoreIndex
  23. from llama_index.core.base.embeddings.base import BaseEmbedding
  24. from llama_index.core.ingestion import IngestionPipeline
  25. from llama_index.core.schema import TextNode
  26. LLAMA_INDEX_AVAILABLE = True
  27. except ImportError:
  28. LLAMA_INDEX_AVAILABLE = False
  29. # Define supported embedding models
  30. SUPPORTED_OLLAMA_EMBED_MODELS = [
  31. 'llama2',
  32. 'mxbai-embed-large',
  33. 'nomic-embed-text',
  34. 'all-minilm',
  35. 'stable-code',
  36. 'bge-m3',
  37. 'bge-large',
  38. 'paraphrase-multilingual',
  39. 'snowflake-arctic-embed',
  40. ]
  41. def check_llama_index():
  42. """Utility function to check the availability of llama_index.
  43. Raises:
  44. ImportError: If llama_index is not available.
  45. """
  46. if not LLAMA_INDEX_AVAILABLE:
  47. raise ImportError(
  48. 'llama_index and its dependencies are not installed. '
  49. 'To use memory features, please run: poetry install --with llama-index.'
  50. )
  51. class EmbeddingsLoader:
  52. """Loader for embedding model initialization."""
  53. @staticmethod
  54. def get_embedding_model(strategy: str, llm_config: LLMConfig) -> 'BaseEmbedding':
  55. """Initialize and return the appropriate embedding model based on the strategy.
  56. Parameters:
  57. - strategy: The embedding strategy to use.
  58. - llm_config: Configuration for the LLM.
  59. Returns:
  60. - An instance of the selected embedding model or None.
  61. """
  62. if strategy in SUPPORTED_OLLAMA_EMBED_MODELS:
  63. from llama_index.embeddings.ollama import OllamaEmbedding
  64. return OllamaEmbedding(
  65. model_name=strategy,
  66. base_url=llm_config.embedding_base_url,
  67. ollama_additional_kwargs={'mirostat': 0},
  68. )
  69. elif strategy == 'openai':
  70. from llama_index.embeddings.openai import OpenAIEmbedding
  71. return OpenAIEmbedding(
  72. model='text-embedding-ada-002',
  73. api_key=llm_config.api_key,
  74. )
  75. elif strategy == 'azureopenai':
  76. from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
  77. return AzureOpenAIEmbedding(
  78. model='text-embedding-ada-002',
  79. deployment_name=llm_config.embedding_deployment_name,
  80. api_key=llm_config.api_key,
  81. azure_endpoint=llm_config.base_url,
  82. api_version=llm_config.api_version,
  83. )
  84. elif (strategy is not None) and (strategy.lower() == 'none'):
  85. # TODO: this works but is not elegant enough. The incentive is when
  86. # an agent using embeddings is not used, there is no reason we need to
  87. # initialize an embedding model
  88. return None
  89. else:
  90. from llama_index.embeddings.huggingface import HuggingFaceEmbedding
  91. # initialize the local embedding model
  92. local_embed_model = HuggingFaceEmbedding(
  93. model_name='BAAI/bge-small-en-v1.5'
  94. )
  95. # for local embeddings, we need torch
  96. import torch
  97. # choose the best device
  98. # first determine what is available: CUDA, MPS, or CPU
  99. if torch.cuda.is_available():
  100. device = 'cuda'
  101. elif torch.backends.mps.is_available() and torch.backends.mps.is_built():
  102. device = 'mps'
  103. else:
  104. device = 'cpu'
  105. os.environ['CUDA_VISIBLE_DEVICES'] = ''
  106. os.environ['PYTORCH_FORCE_CPU'] = (
  107. '1' # try to force CPU to avoid errors
  108. )
  109. # override CUDA availability
  110. torch.cuda.is_available = lambda: False
  111. # disable MPS to avoid errors
  112. if device != 'mps' and hasattr(torch.backends, 'mps'):
  113. torch.backends.mps.is_available = lambda: False
  114. torch.backends.mps.is_built = False
  115. # the device being used
  116. print(f'Using device for embeddings: {device}')
  117. return local_embed_model
  118. # --------------------------------------------------------------------------
  119. # Utility functions to run pipelines, split out for profiling
  120. # --------------------------------------------------------------------------
  121. def run_pipeline(
  122. embed_model: 'BaseEmbedding', documents: list['Document'], num_workers: int
  123. ) -> list['TextNode']:
  124. """Run a pipeline embedding documents."""
  125. # set up a pipeline with the transformations to make
  126. pipeline = IngestionPipeline(
  127. transformations=[
  128. embed_model,
  129. ],
  130. )
  131. # run the pipeline with num_workers
  132. nodes = pipeline.run(
  133. documents=documents, show_progress=True, num_workers=num_workers
  134. )
  135. return nodes
  136. def insert_batch_docs(
  137. index: 'VectorStoreIndex', documents: list['Document'], num_workers: int
  138. ) -> list['TextNode']:
  139. """Run the document indexing in parallel."""
  140. results = Parallel(n_jobs=num_workers, backend='threading')(
  141. delayed(index.insert)(doc) for doc in documents
  142. )
  143. return results