Browse Source

feat: file explorer: better sorting; .gitignore support; file upload config (#2621)

* feat: file explorer: better sorting; .gitignore support; file upload config

* resolved poetry

* move config settings (no extra file); updated uploading of files; fix exception on refresh of removed folder

* removed console cmds; fix in a toast

* attempt fix of upload toasts

* fix new options' assignments in listen.py
tobitege 1 year ago
parent
commit
7d31057904

+ 83 - 29
frontend/src/components/file-explorer/FileExplorer.tsx

@@ -107,15 +107,60 @@ function FileExplorer() {
       return;
     }
     dispatch(setRefreshID(Math.random()));
-    setFiles(await listFiles("/"));
+    try {
+      const fileList = await listFiles("/");
+      setFiles(fileList);
+      if (fileList.length === 0) {
+        toast.info(t(I18nKey.EXPLORER$EMPTY_WORKSPACE_MESSAGE));
+      }
+    } catch (error) {
+      toast.error("refresh-error", t(I18nKey.EXPLORER$REFRESH_ERROR_MESSAGE));
+    }
   };
 
   const uploadFileData = async (toAdd: FileList) => {
     try {
-      await uploadFiles(toAdd);
+      const result = await uploadFiles(toAdd);
+
+      if (result.error) {
+        // Handle error response
+        toast.error(
+          `upload-error-${new Date().getTime()}`,
+          result.error || t(I18nKey.EXPLORER$UPLOAD_ERROR_MESSAGE),
+        );
+        return;
+      }
+
+      const uploadedCount = result.uploadedFiles.length;
+      const skippedCount = result.skippedFiles.length;
+
+      if (uploadedCount > 0) {
+        toast.success(
+          `upload-success-${new Date().getTime()}`,
+          t(I18nKey.EXPLORER$UPLOAD_SUCCESS_MESSAGE, {
+            count: uploadedCount,
+          }),
+        );
+      }
+
+      if (skippedCount > 0) {
+        const message = t(I18nKey.EXPLORER$UPLOAD_PARTIAL_SUCCESS_MESSAGE, {
+          count: skippedCount,
+        });
+        toast.info(message);
+      }
+
+      if (uploadedCount === 0 && skippedCount === 0) {
+        toast.info(t(I18nKey.EXPLORER$NO_FILES_UPLOADED_MESSAGE));
+      }
+
       await refreshWorkspace();
     } catch (error) {
-      toast.error("ws", t(I18nKey.EXPLORER$UPLOAD_ERROR_MESSAGE));
+      // Handle unexpected errors (network issues, etc.)
+      toast.error(
+        `upload-error-${new Date().getTime()}`,
+        t(I18nKey.EXPLORER$UPLOAD_ERROR_MESSAGE),
+      );
     }
   };
 
@@ -148,13 +193,16 @@ function FileExplorer() {
   }
 
   return (
-    <div className="relative">
+    <div className="relative h-full">
       {isDragging && (
         <div
           data-testid="dropzone"
           onDrop={(event) => {
             event.preventDefault();
-            uploadFileData(event.dataTransfer.files);
+            const { files: droppedFiles } = event.dataTransfer;
+            if (droppedFiles.length > 0) {
+              uploadFileData(droppedFiles);
+            }
           }}
           onDragOver={(event) => event.preventDefault()}
           className="z-10 absolute flex flex-col justify-center items-center bg-black top-0 bottom-0 left-0 right-0 opacity-65"
@@ -167,32 +215,37 @@ function FileExplorer() {
       )}
       <div
         className={twMerge(
-          "bg-neutral-800 h-full border-r-1 border-r-neutral-600 flex flex-col transition-all ease-soft-spring overflow-auto",
+          "bg-neutral-800 h-full border-r-1 border-r-neutral-600 flex flex-col transition-all ease-soft-spring",
           isHidden ? "min-w-[48px]" : "min-w-[228px]",
         )}
       >
-        <div className="flex flex-col p-2 relative">
-          <div
-            className={twMerge(
-              "flex items-center mt-2 mb-1",
-              isHidden ? "justify-center" : "justify-between",
-            )}
-          >
-            {!isHidden && (
-              <div className="ml-1 text-neutral-300 font-bold text-sm">
-                {t(I18nKey.EXPLORER$LABEL_WORKSPACE)}
-              </div>
-            )}
-            <ExplorerActions
-              isHidden={isHidden}
-              toggleHidden={() => setIsHidden((prev) => !prev)}
-              onRefresh={refreshWorkspace}
-              onUpload={selectFileInput}
-            />
+        <div className="flex flex-col relative h-full">
+          <div className="sticky top-0 bg-neutral-800 z-10">
+            <div
+              className={twMerge(
+                "flex items-center mt-2 mb-1 p-2",
+                isHidden ? "justify-center" : "justify-between",
+              )}
+            >
+              {!isHidden && (
+                <div className="ml-1 text-neutral-300 font-bold text-sm">
+                  <div className="ml-1 text-neutral-300 font-bold text-sm">
+                    {t(I18nKey.EXPLORER$LABEL_WORKSPACE)}
+                  </div>
+                </div>
+              )}
+              <ExplorerActions
+                isHidden={isHidden}
+                toggleHidden={() => setIsHidden((prev) => !prev)}
+                onRefresh={refreshWorkspace}
+                onUpload={selectFileInput}
+              />
+            </div>
           </div>
-
-          <div style={{ display: isHidden ? "none" : "block" }}>
-            <ExplorerTree files={files} defaultOpen />
+          <div className="overflow-auto flex-grow">
+            <div style={{ display: isHidden ? "none" : "block" }}>
+              <ExplorerTree files={files} defaultOpen />
+            </div>
           </div>
         </div>
         <input
@@ -202,8 +255,9 @@ function FileExplorer() {
           ref={fileInputRef}
           style={{ display: "none" }}
           onChange={(event) => {
-            if (event.target.files) {
-              uploadFileData(event.target.files);
+            const { files: selectedFiles } = event.target;
+            if (selectedFiles && selectedFiles.length > 0) {
+              uploadFileData(selectedFiles);
             }
           }}
         />

+ 30 - 0
frontend/src/i18n/translation.json

@@ -308,6 +308,36 @@
     "zh-CN": "工作区",
     "de": "Arbeitsbereich"
   },
+  "EXPLORER$EMPTY_WORKSPACE_MESSAGE": {
+    "en": "No files in workspace",
+    "zh-CN": "工作区没有文件",
+    "de": "Keine Dateien im Arbeitsbereich"
+  },
+  "EXPLORER$REFRESH_ERROR_MESSAGE": {
+    "en": "Error refreshing workspace",
+    "zh-CN": "工作区刷新错误",
+    "de": "Fehler beim Aktualisieren des Arbeitsbereichs"
+  },
+  "EXPLORER$UPLOAD_SUCCESS_MESSAGE": {
+    "en": "Successfully uploaded {{count}} file(s)",
+    "zh-CN": "成功上传 {{count}} 个文件",
+    "de": "Erfolgreich {{count}} Datei(en) hochgeladen"
+  },
+  "EXPLORER$NO_FILES_UPLOADED_MESSAGE": {
+    "en": "No files were uploaded",
+    "zh-CN": "没有文件上传",
+    "de": "Keine Dateien wurden hochgeladen"
+  },
+  "EXPLORER$UPLOAD_PARTIAL_SUCCESS_MESSAGE": {
+    "en": "{{count}} file(s) were skipped during upload",
+    "zh-CN": "{{count}} 个文件在上传过程中被跳过",
+    "de": "{{count}} Datei(en) wurden während des Hochladens übersprungen"
+  },
+  "EXPLORER$UPLOAD_UNEXPECTED_RESPONSE_MESSAGE": {
+    "en": "Unexpected response structure from server",
+    "zh-CN": "服务器响应结构不符合预期",
+    "de": "Unerwartetes Antwortformat vom Server"
+  },
   "LOAD_SESSION$MODAL_TITLE": {
     "en": "Return to existing session?",
     "de": "Zurück zu vorhandener Sitzung?",

+ 57 - 6
frontend/src/services/fileService.ts

@@ -1,23 +1,74 @@
 import { request } from "./api";
 
 export async function selectFile(file: string): Promise<string> {
-  const data = await request(`/api/select-file?file=${file}`);
+  const encodedFile = encodeURIComponent(file);
+  const data = await request(`/api/select-file?file=${encodedFile}`);
   return data.code as string;
 }
 
-export async function uploadFiles(files: FileList) {
+interface UploadResult {
+  message: string;
+  uploadedFiles: string[];
+  skippedFiles: Array<{ name: string; reason: string }>;
+  error?: string;
+}
+
+export async function uploadFiles(files: FileList): Promise<UploadResult> {
   const formData = new FormData();
+  const skippedFiles: Array<{ name: string; reason: string }> = [];
+
+  let uploadedCount = 0;
+
   for (let i = 0; i < files.length; i += 1) {
-    formData.append("files", files[i]);
+    const file = files[i];
+
+    if (
+      file.name.includes("..") ||
+      file.name.includes("/") ||
+      file.name.includes("\\")
+    ) {
+      skippedFiles.push({
+        name: file.name,
+        reason: "Invalid file name",
+      });
+    } else {
+      formData.append("files", file);
+      uploadedCount += 1;
+    }
   }
 
-  await request("/api/upload-files", {
+  // Add skippedFilesCount to formData
+  formData.append("skippedFilesCount", skippedFiles.length.toString());
+
+  // Add uploadedFilesCount to formData
+  formData.append("uploadedFilesCount", uploadedCount.toString());
+
+  const response = await request("/api/upload-files", {
     method: "POST",
     body: formData,
   });
+
+  if (
+    typeof response.message !== "string" ||
+    !Array.isArray(response.uploaded_files) ||
+    !Array.isArray(response.skipped_files)
+  ) {
+    throw new Error("Unexpected response structure from server");
+  }
+
+  return {
+    message: response.message,
+    uploadedFiles: response.uploaded_files,
+    skippedFiles: [...skippedFiles, ...response.skipped_files],
+  };
 }
 
 export async function listFiles(path: string = "/"): Promise<string[]> {
-  const data = await request(`/api/list-files?path=${path}`);
-  return data as string[];
+  try {
+    const encodedPath = encodeURIComponent(path);
+    const data = await request(`/api/list-files?path=${encodedPath}`);
+    return data as string[];
+  } catch (error) {
+    return [];
+  }
 }

+ 6 - 0
opendevin/core/config.py

@@ -156,6 +156,9 @@ class AppConfig(metaclass=Singleton):
         sandbox_timeout: The timeout for the sandbox.
         debug: Whether to enable debugging.
         enable_auto_lint: Whether to enable auto linting. This is False by default, for regular runs of the app. For evaluation, please set this to True.
+        file_uploads_max_file_size_mb: Maximum file size for uploads in megabytes. 0 means no limit.
+        file_uploads_restrict_file_types: Whether to restrict file types for file uploads. Defaults to False.
+        file_uploads_allowed_extensions: List of allowed file extensions for uploads. ['.*'] means all extensions are allowed.
     """
 
     llm: LLMConfig = field(default_factory=LLMConfig)
@@ -194,6 +197,9 @@ class AppConfig(metaclass=Singleton):
     enable_auto_lint: bool = (
         False  # once enabled, OpenDevin would lint files after editing
     )
+    file_uploads_max_file_size_mb: int = 0
+    file_uploads_restrict_file_types: bool = False
+    file_uploads_allowed_extensions: list[str] = field(default_factory=lambda: ['.*'])
 
     defaults_dict: ClassVar[dict] = {}
 

+ 3 - 0
opendevin/core/schema/config.py

@@ -42,3 +42,6 @@ class ConfigType(str, Enum):
     SSH_HOSTNAME = 'SSH_HOSTNAME'
     DISABLE_COLOR = 'DISABLE_COLOR'
     DEBUG = 'DEBUG'
+    FILE_UPLOADS_MAX_FILE_SIZE_MB = 'FILE_UPLOADS_MAX_FILE_SIZE_MB'
+    FILE_UPLOADS_RESTRICT_FILE_TYPES = 'FILE_UPLOADS_RESTRICT_FILE_TYPES'
+    FILE_UPLOADS_ALLOWED_EXTENSIONS = 'FILE_UPLOADS_ALLOWED_EXTENSIONS'

+ 240 - 46
opendevin/server/listen.py

@@ -1,12 +1,16 @@
+import os
+import re
 import uuid
 import warnings
 
+from pathspec import PathSpec
+from pathspec.patterns import GitWildMatchPattern
+
 from opendevin.server.data_models.feedback import FeedbackDataModel, store_feedback
 
 with warnings.catch_warnings():
     warnings.simplefilter('ignore')
     import litellm
-from pathlib import Path
 
 from fastapi import FastAPI, Request, Response, UploadFile, WebSocket, status
 from fastapi.middleware.cors import CORSMiddleware
@@ -19,7 +23,10 @@ from opendevin.controller.agent import Agent
 from opendevin.core.config import config
 from opendevin.core.logger import opendevin_logger as logger
 from opendevin.events.action import ChangeAgentStateAction, NullAction
-from opendevin.events.observation import AgentStateChangedObservation, NullObservation
+from opendevin.events.observation import (
+    AgentStateChangedObservation,
+    NullObservation,
+)
 from opendevin.events.serialization import event_to_dict
 from opendevin.llm import bedrock
 from opendevin.server.auth import get_sid_from_token, sign_token
@@ -37,6 +44,96 @@ app.add_middleware(
 security_scheme = HTTPBearer()
 
 
+def load_file_upload_config() -> tuple[int, bool, list[str]]:
+    """
+    Load file upload configuration from the config object.
+
+    This function retrieves the file upload settings from the global config object.
+    It handles the following settings:
+    - Maximum file size for uploads
+    - Whether to restrict file types
+    - List of allowed file extensions
+
+    It also performs sanity checks on the values to ensure they are valid and safe.
+
+    Returns:
+        tuple: A tuple containing:
+            - max_file_size_mb (int): Maximum file size in MB. 0 means no limit.
+            - restrict_file_types (bool): Whether file type restrictions are enabled.
+            - allowed_extensions (set): Set of allowed file extensions.
+    """
+    # Retrieve values from config
+    max_file_size_mb = config.file_uploads_max_file_size_mb
+    restrict_file_types = config.file_uploads_restrict_file_types
+    allowed_extensions = config.file_uploads_allowed_extensions
+
+    # Sanity check for max_file_size_mb
+    MAX_ALLOWED_SIZE = 1024  # Maximum allowed file size 1 GB
+    if not isinstance(max_file_size_mb, int) or max_file_size_mb < 0:
+        logger.warning(
+            f'Invalid max_file_size_mb: {max_file_size_mb}. Setting to 0 (no limit).'
+        )
+        max_file_size_mb = 0
+    elif max_file_size_mb > MAX_ALLOWED_SIZE:
+        logger.warning(
+            f'max_file_size_mb exceeds maximum allowed size. Capping at {MAX_ALLOWED_SIZE}MB.'
+        )
+        max_file_size_mb = MAX_ALLOWED_SIZE
+
+    # Sanity check for allowed_extensions
+    if not isinstance(allowed_extensions, (list, set)) or not allowed_extensions:
+        logger.warning(
+            f'Invalid allowed_extensions: {allowed_extensions}. Setting to [".*"].'
+        )
+        allowed_extensions = ['.*']
+    else:
+        # Ensure all extensions start with a dot and are lowercase
+        allowed_extensions = [
+            ext.lower() if ext.startswith('.') else f'.{ext.lower()}'
+            for ext in allowed_extensions
+        ]
+
+    # If restrictions are disabled, allow all
+    if not restrict_file_types:
+        allowed_extensions = ['.*']
+
+    logger.info(
+        f'File upload config: max_size={max_file_size_mb}MB, '
+        f'restrict_types={restrict_file_types}, '
+        f'allowed_extensions={allowed_extensions}'
+    )
+
+    return max_file_size_mb, restrict_file_types, allowed_extensions
+
+
+# Load configuration
+MAX_FILE_SIZE_MB, RESTRICT_FILE_TYPES, ALLOWED_EXTENSIONS = load_file_upload_config()
+
+
+def is_extension_allowed(filename):
+    """
+    Check if the file extension is allowed based on the current configuration.
+
+    This function supports wildcards and files without extensions.
+    The check is case-insensitive for extensions.
+
+    Args:
+        filename (str): The name of the file to check.
+
+    Returns:
+        bool: True if the file extension is allowed, False otherwise.
+    """
+    if not RESTRICT_FILE_TYPES:
+        return True
+
+    file_ext = os.path.splitext(filename)[1].lower()  # Convert to lowercase
+    return (
+        '.*' in ALLOWED_EXTENSIONS
+        or file_ext in (ext.lower() for ext in ALLOWED_EXTENSIONS)
+        or (file_ext == '' and '.' in ALLOWED_EXTENSIONS)
+    )
+
+
 @app.middleware('http')
 async def attach_session(request: Request, call_next):
     if request.url.path.startswith('/api/options/') or not request.url.path.startswith(
@@ -225,48 +322,85 @@ def list_files(request: Request, path: str = '/'):
             content={'error': 'Runtime not yet initialized'},
         )
 
-    exclude_list = (
-        '.git',
-        '.DS_Store',
-        '.svn',
-        '.hg',
-        '.idea',
-        '.vscode',
-        '.settings',
-        '.pytest_cache',
-        '__pycache__',
-        'node_modules',
-        'vendor',
-        'build',
-        'dist',
-        'bin',
-        'logs',
-        'log',
-        'tmp',
-        'temp',
-        'coverage',
-        'venv',
-        'env',
-    )
-
     try:
-        entries = request.state.session.agent_session.runtime.file_store.list(path)
+        # Get the full path of the requested directory
+        full_path = (
+            request.state.session.agent_session.runtime.file_store.get_full_path(path)
+        )
 
-        # Filter entries, excluding special folders
-        if entries:
-            return [
-                entry
-                for entry in entries
-                if Path(entry).parts and Path(entry).parts[-1] not in exclude_list
+        # Check if the directory exists
+        if not os.path.exists(full_path) or not os.path.isdir(full_path):
+            return []
+
+        # Check if .gitignore exists
+        gitignore_path = os.path.join(full_path, '.gitignore')
+        if os.path.exists(gitignore_path):
+            # Use PathSpec to parse .gitignore
+            with open(gitignore_path, 'r') as f:
+                spec = PathSpec.from_lines(GitWildMatchPattern, f.readlines())
+        else:
+            # Fallback to default exclude list if .gitignore doesn't exist
+            default_exclude = [
+                '.git',
+                '.DS_Store',
+                '.svn',
+                '.hg',
+                '.idea',
+                '.vscode',
+                '.settings',
+                '.pytest_cache',
+                '__pycache__',
+                'node_modules',
+                'vendor',
+                'build',
+                'dist',
+                'bin',
+                'logs',
+                'log',
+                'tmp',
+                'temp',
+                'coverage',
+                'venv',
+                'env',
             ]
-        return []
+            spec = PathSpec.from_lines(GitWildMatchPattern, default_exclude)
+
+        entries = request.state.session.agent_session.runtime.file_store.list(path)
+
+        # Filter entries using PathSpec
+        filtered_entries = [
+            entry
+            for entry in entries
+            if not spec.match_file(os.path.relpath(entry, full_path))
+        ]
+
+        # Separate directories and files
+        directories = []
+        files = []
+        for entry in filtered_entries:
+            # Remove leading slash and any parent directory components
+            entry_relative = entry.lstrip('/').split('/')[-1]
+
+            # Construct the full path by joining the base path with the relative entry path
+            full_entry_path = os.path.join(full_path, entry_relative)
+            if os.path.exists(full_entry_path):
+                is_dir = os.path.isdir(full_entry_path)
+                if is_dir:
+                    directories.append(entry)
+                else:
+                    files.append(entry)
+
+        # Sort directories and files separately
+        directories.sort(key=str.lower)
+        files.sort(key=str.lower)
+
+        # Combine sorted directories and files
+        sorted_entries = directories + files
+        return sorted_entries
+
     except Exception as e:
-        logger.error(f'Error refreshing files: {e}', exc_info=False)
-        error_msg = f'Error refreshing files: {e}'
-        return JSONResponse(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            content={'error': error_msg},
-        )
+        logger.error(f'Error listing files: {e}', exc_info=True)
+        return []
 
 
 @app.get('/api/select-file')
@@ -291,6 +425,22 @@ def select_file(file: str, request: Request):
     return {'code': content}
 
 
+def sanitize_filename(filename):
+    """
+    Sanitize the filename to prevent directory traversal
+    """
+    # Remove any directory components
+    filename = os.path.basename(filename)
+    # Remove any non-alphanumeric characters except for .-_
+    filename = re.sub(r'[^\w\-_\.]', '', filename)
+    # Limit the filename length
+    max_length = 255
+    if len(filename) > max_length:
+        name, ext = os.path.splitext(filename)
+        filename = name[: max_length - len(ext)] + ext
+    return filename
+
+
 @app.post('/api/upload-files')
 async def upload_file(request: Request, files: list[UploadFile]):
     """
@@ -302,24 +452,68 @@ async def upload_file(request: Request, files: list[UploadFile]):
     ```
     """
     try:
+        uploaded_files = []
+        skipped_files = []
         for file in files:
+            safe_filename = sanitize_filename(file.filename)
             file_contents = await file.read()
+
+            if (
+                MAX_FILE_SIZE_MB > 0
+                and len(file_contents) > MAX_FILE_SIZE_MB * 1024 * 1024
+            ):
+                skipped_files.append(
+                    {
+                        'name': safe_filename,
+                        'reason': f'Exceeds maximum size limit of {MAX_FILE_SIZE_MB}MB',
+                    }
+                )
+                continue
+
+            if not is_extension_allowed(safe_filename):
+                skipped_files.append(
+                    {'name': safe_filename, 'reason': 'File type not allowed'}
+                )
+                continue
+
             request.state.session.agent_session.runtime.file_store.write(
-                file.filename, file_contents
+                safe_filename, file_contents
             )
+            uploaded_files.append(safe_filename)
+
+        response_content = {
+            'message': 'File upload process completed',
+            'uploaded_files': uploaded_files,
+            'skipped_files': skipped_files,
+        }
+
+        if not uploaded_files and skipped_files:
+            return JSONResponse(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                content={
+                    **response_content,
+                    'error': 'No files were uploaded successfully',
+                },
+            )
+
+        return JSONResponse(status_code=status.HTTP_200_OK, content=response_content)
+
     except Exception as e:
-        logger.error(f'Error saving files: {e}', exc_info=True)
+        logger.error(f'Error during file upload: {e}', exc_info=True)
         return JSONResponse(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            content={'error': f'Error saving file:s {e}'},
+            content={
+                'error': f'Error during file upload: {str(e)}',
+                'uploaded_files': [],
+                'skipped_files': [],
+            },
         )
-    return {'message': 'Files uploaded successfully', 'file_count': len(files)}
 
 
 @app.post('/api/submit-feedback')
 async def submit_feedback(request: Request, feedback: FeedbackDataModel):
     """
-    Upload files to the workspace.
+    Upload feedback data to the feedback site.
 
     To upload files:
     ```sh
@@ -327,7 +521,7 @@ async def submit_feedback(request: Request, feedback: FeedbackDataModel):
     ```
     """
     # Assuming the storage service is already configured in the backend
-    # and there is a function  to handle the storage.
+    # and there is a function to handle the storage.
     try:
         feedback_data = store_feedback(feedback)
         return JSONResponse(status_code=200, content=feedback_data)

+ 3 - 2
opendevin/storage/local.py

@@ -15,10 +15,11 @@ class LocalFileStore(FileStore):
             path = path[1:]
         return os.path.join(self.root, path)
 
-    def write(self, path: str, contents: str) -> None:
+    def write(self, path: str, contents: str | bytes):
         full_path = self.get_full_path(path)
         os.makedirs(os.path.dirname(full_path), exist_ok=True)
-        with open(full_path, 'w') as f:
+        mode = 'w' if isinstance(contents, str) else 'wb'
+        with open(full_path, mode) as f:
             f.write(contents)
 
     def read(self, path: str) -> str:

+ 13 - 3
poetry.lock

@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
 
 [[package]]
 name = "aenum"
@@ -4114,7 +4114,6 @@ description = "Nvidia JIT LTO Library"
 optional = false
 python-versions = ">=3"
 files = [
-    {file = "nvidia_nvjitlink_cu12-12.5.40-py3-none-manylinux2014_aarch64.whl", hash = "sha256:004186d5ea6a57758fd6d57052a123c73a4815adf365eb8dd6a85c9eaa7535ff"},
     {file = "nvidia_nvjitlink_cu12-12.5.40-py3-none-manylinux2014_x86_64.whl", hash = "sha256:d9714f27c1d0f0895cd8915c07a87a1d0029a0aa36acaf9156952ec2a8a12189"},
     {file = "nvidia_nvjitlink_cu12-12.5.40-py3-none-win_amd64.whl", hash = "sha256:c3401dc8543b52d3a8158007a0c1ab4e9c768fcbd24153a48c86972102197ddd"},
 ]
@@ -4544,6 +4543,17 @@ sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-d
 test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"]
 xml = ["lxml (>=4.9.2)"]
 
+[[package]]
+name = "pathspec"
+version = "0.12.1"
+description = "Utility library for gitignore style pattern matching of file paths."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"},
+    {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"},
+]
+
 [[package]]
 name = "pexpect"
 version = "4.9.0"
@@ -7737,4 +7747,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.11"
-content-hash = "dba3c8c3812d657e413a57e3bd87ad6f80adadc08857948ff1fd6e1c62692ca7"
+content-hash = "d30ba49e7737bdacfb1c08a821ab1d41f97e00c19b691bec504e6eae301ee0e7"

+ 1 - 0
pyproject.toml

@@ -34,6 +34,7 @@ gevent = "^24.2.1"
 pyarrow = "16.1.0" # transitive dependency, pinned here to avoid conflicts
 tenacity = "^8.4.2"
 zope-interface = "6.4.post2"
+pathspec = "^0.12.1"
 
 [tool.poetry.group.llama-index.dependencies]
 llama-index = "*"