Browse Source

Add Google Sheets integration for GitHub user verification (#4671)

Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Graham Neubig <neubig@gmail.com>
Robert Brennan 1 năm trước cách đây
mục cha
commit
b27fabe504
4 tập tin đã thay đổi với 172 bổ sung31 xóa
  1. 81 29
      openhands/server/github.py
  2. 68 0
      openhands/server/sheets_client.py
  3. 20 2
      poetry.lock
  4. 3 0
      pyproject.toml

+ 81 - 29
openhands/server/github.py

@@ -3,37 +3,93 @@ import os
 import httpx
 
 from openhands.core.logger import openhands_logger as logger
+from openhands.server.sheets_client import GoogleSheetsClient
 
 GITHUB_CLIENT_ID = os.getenv('GITHUB_CLIENT_ID', '').strip()
 GITHUB_CLIENT_SECRET = os.getenv('GITHUB_CLIENT_SECRET', '').strip()
-GITHUB_USER_LIST = None
 
 
-def load_github_user_list():
-    global GITHUB_USER_LIST
-    waitlist = os.getenv('GITHUB_USER_LIST_FILE')
-    if waitlist:
-        with open(waitlist, 'r') as f:
-            GITHUB_USER_LIST = [line.strip() for line in f if line.strip()]
+class UserVerifier:
+    def __init__(self) -> None:
+        logger.info('Initializing UserVerifier')
+        self.file_users: list[str] | None = None
+        self.sheets_client: GoogleSheetsClient | None = None
+        self.spreadsheet_id: str | None = None
+
+        # Initialize from environment variables
+        self._init_file_users()
+        self._init_sheets_client()
+
+    def _init_file_users(self) -> None:
+        """Load users from text file if configured"""
+        waitlist = os.getenv('GITHUB_USER_LIST_FILE')
+        if not waitlist:
+            logger.info('GITHUB_USER_LIST_FILE not configured')
+            return
+
+        if not os.path.exists(waitlist):
+            logger.error(f'User list file not found: {waitlist}')
+            raise FileNotFoundError(f'User list file not found: {waitlist}')
+
+        try:
+            with open(waitlist, 'r') as f:
+                self.file_users = [line.strip() for line in f if line.strip()]
+            logger.info(
+                f'Successfully loaded {len(self.file_users)} users from {waitlist}'
+            )
+        except Exception as e:
+            logger.error(f'Error reading user list file {waitlist}: {str(e)}')
+
+    def _init_sheets_client(self) -> None:
+        """Initialize Google Sheets client if configured"""
+        sheet_id = os.getenv('GITHUB_USERS_SHEET_ID')
+
+        if not sheet_id:
+            logger.info('GITHUB_USERS_SHEET_ID not configured')
+            return
+
+        logger.info('Initializing Google Sheets integration')
+        self.sheets_client = GoogleSheetsClient()
+        self.spreadsheet_id = sheet_id
+
+    def is_user_allowed(self, username: str) -> bool:
+        """Check if user is allowed based on file and/or sheet configuration"""
+        if not self.file_users and not self.sheets_client:
+            logger.debug('No verification sources configured - allowing all users')
+            return True
+        logger.info(f'Checking if GitHub user {username} is allowed')
+
+        if self.file_users:
+            if username in self.file_users:
+                logger.info(f'User {username} found in text file allowlist')
+                return True
+            logger.debug(f'User {username} not found in text file allowlist')
+
+        if self.sheets_client and self.spreadsheet_id:
+            sheet_users = self.sheets_client.get_usernames(self.spreadsheet_id)
+            if username in sheet_users:
+                logger.info(f'User {username} found in Google Sheets allowlist')
+                return True
+            logger.debug(f'User {username} not found in Google Sheets allowlist')
+
+        logger.info(f'User {username} not found in any allowlist')
+        return False
 
 
-load_github_user_list()
+# Global instance of user verifier
+user_verifier = UserVerifier()
 
 
 async def authenticate_github_user(auth_token) -> bool:
     logger.info('Checking GitHub token')
-    if not GITHUB_USER_LIST:
-        return True
 
     if not auth_token:
         logger.warning('No GitHub token provided')
         return False
 
-    login, error = await get_github_user(auth_token)
-    if error:
-        logger.warning(f'Invalid GitHub token: {error}')
-        return False
-    if login not in GITHUB_USER_LIST:
+    login = await get_github_user(auth_token)
+
+    if not user_verifier.is_user_allowed(login):
         logger.warning(f'GitHub user {login} not in allow list')
         return False
 
@@ -41,7 +97,7 @@ async def authenticate_github_user(auth_token) -> bool:
     return True
 
 
-async def get_github_user(token: str) -> tuple[str | None, str | None]:
+async def get_github_user(token: str) -> str:
     """Get GitHub user info from token.
 
     Args:
@@ -52,21 +108,17 @@ async def get_github_user(token: str) -> tuple[str | None, str | None]:
         If successful, error_message is None
         If failed, login is None and error_message contains the error
     """
+    logger.info('Fetching GitHub user info from token')
     headers = {
         'Accept': 'application/vnd.github+json',
         'Authorization': f'Bearer {token}',
         'X-GitHub-Api-Version': '2022-11-28',
     }
-    try:
-        async with httpx.AsyncClient() as client:
-            response = await client.get('https://api.github.com/user', headers=headers)
-            if response.status_code == 200:
-                user_data = response.json()
-                return user_data.get('login'), None
-            else:
-                return (
-                    None,
-                    f'GitHub API error: {response.status_code} - {response.text}',
-                )
-    except Exception as e:
-        return None, f'Error connecting to GitHub: {str(e)}'
+    async with httpx.AsyncClient() as client:
+        logger.debug('Making request to GitHub API')
+        response = await client.get('https://api.github.com/user', headers=headers)
+        response.raise_for_status()
+        user_data = response.json()
+        login = user_data.get('login')
+        logger.info(f'Successfully retrieved GitHub user: {login}')
+        return login

+ 68 - 0
openhands/server/sheets_client.py

@@ -0,0 +1,68 @@
+from typing import List
+
+from google.auth import default
+from googleapiclient.discovery import build
+from googleapiclient.errors import HttpError
+
+from openhands.core.logger import openhands_logger as logger
+
+
+class GoogleSheetsClient:
+    def __init__(self):
+        """Initialize Google Sheets client using workload identity.
+        Uses application default credentials which supports workload identity when running in GCP.
+        """
+        logger.info('Initializing Google Sheets client with workload identity')
+        try:
+            credentials, project = default(
+                scopes=['https://www.googleapis.com/auth/spreadsheets.readonly']
+            )
+            logger.info(f'Successfully obtained credentials for project: {project}')
+            self.service = build('sheets', 'v4', credentials=credentials)
+            logger.info('Successfully initialized Google Sheets API service')
+        except Exception as e:
+            logger.error(f'Failed to initialize Google Sheets client: {str(e)}')
+            self.service = None
+
+    def get_usernames(self, spreadsheet_id: str, range_name: str = 'A:A') -> List[str]:
+        """Get list of usernames from specified Google Sheet.
+
+        Args:
+            spreadsheet_id: The ID of the Google Sheet
+            range_name: The A1 notation of the range to fetch
+
+        Returns:
+            List of usernames from the sheet
+        """
+        if not self.service:
+            logger.error('Google Sheets service not initialized')
+            return []
+
+        try:
+            logger.info(
+                f'Fetching usernames from sheet {spreadsheet_id}, range {range_name}'
+            )
+            result = (
+                self.service.spreadsheets()
+                .values()
+                .get(spreadsheetId=spreadsheet_id, range=range_name)
+                .execute()
+            )
+
+            values = result.get('values', [])
+            usernames = [
+                str(cell[0]).strip() for cell in values if cell and cell[0].strip()
+            ]
+            logger.info(
+                f'Successfully fetched {len(usernames)} usernames from Google Sheet'
+            )
+            return usernames
+
+        except HttpError as err:
+            logger.error(f'Error accessing Google Sheet {spreadsheet_id}: {err}')
+            return []
+        except Exception as e:
+            logger.error(
+                f'Unexpected error accessing Google Sheet {spreadsheet_id}: {str(e)}'
+            )
+            return []

+ 20 - 2
poetry.lock

@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
 
 [[package]]
 name = "aenum"
@@ -2319,6 +2319,24 @@ files = [
 google-auth = "*"
 httplib2 = ">=0.19.0"
 
+[[package]]
+name = "google-auth-oauthlib"
+version = "1.2.1"
+description = "Google Authentication Library"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "google_auth_oauthlib-1.2.1-py2.py3-none-any.whl", hash = "sha256:2d58a27262d55aa1b87678c3ba7142a080098cbc2024f903c62355deb235d91f"},
+    {file = "google_auth_oauthlib-1.2.1.tar.gz", hash = "sha256:afd0cad092a2eaa53cd8e8298557d6de1034c6cb4a740500b5357b648af97263"},
+]
+
+[package.dependencies]
+google-auth = ">=2.15.0"
+requests-oauthlib = ">=0.7.0"
+
+[package.extras]
+tool = ["click (>=6.0.0)"]
+
 [[package]]
 name = "google-cloud-aiplatform"
 version = "1.70.0"
@@ -10109,4 +10127,4 @@ testing = ["coverage[toml]", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.12"
-content-hash = "2b268ef696ace0d8170276407dbdeb414134477839ebe4b7ecf29b1a1fe2cef3"
+content-hash = "2a4f90bb5c7f7d82160f57d71af7e81c7acef69426d0e1e46e1da09972a6215f"

+ 3 - 0
pyproject.toml

@@ -16,6 +16,9 @@ datasets = "*"
 pandas = "*"
 litellm = "^1.51.1"
 google-generativeai = "*" # To use litellm with Gemini Pro API
+google-api-python-client = "*" # For Google Sheets API
+google-auth-httplib2 = "*" # For Google Sheets authentication
+google-auth-oauthlib = "*" # For Google Sheets OAuth
 termcolor = "*"
 seaborn = "*"
 docker = "*"