Skip to content

Commit

Permalink
Add Google Sheets integration for GitHub user verification (#4671)
Browse files Browse the repository at this point in the history
Co-authored-by: openhands <[email protected]>
Co-authored-by: Graham Neubig <[email protected]>
  • Loading branch information
3 people authored Nov 1, 2024
1 parent adf7ab5 commit b27fabe
Show file tree
Hide file tree
Showing 4 changed files with 172 additions and 31 deletions.
110 changes: 81 additions & 29 deletions openhands/server/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,45 +3,101 @@
import httpx

from openhands.core.logger import openhands_logger as logger
from openhands.server.sheets_client import GoogleSheetsClient

GITHUB_CLIENT_ID = os.getenv('GITHUB_CLIENT_ID', '').strip()
GITHUB_CLIENT_SECRET = os.getenv('GITHUB_CLIENT_SECRET', '').strip()
GITHUB_USER_LIST = None


def load_github_user_list():
global GITHUB_USER_LIST
waitlist = os.getenv('GITHUB_USER_LIST_FILE')
if waitlist:
with open(waitlist, 'r') as f:
GITHUB_USER_LIST = [line.strip() for line in f if line.strip()]
class UserVerifier:
def __init__(self) -> None:
logger.info('Initializing UserVerifier')
self.file_users: list[str] | None = None
self.sheets_client: GoogleSheetsClient | None = None
self.spreadsheet_id: str | None = None

# Initialize from environment variables
self._init_file_users()
self._init_sheets_client()

def _init_file_users(self) -> None:
"""Load users from text file if configured"""
waitlist = os.getenv('GITHUB_USER_LIST_FILE')
if not waitlist:
logger.info('GITHUB_USER_LIST_FILE not configured')
return

if not os.path.exists(waitlist):
logger.error(f'User list file not found: {waitlist}')
raise FileNotFoundError(f'User list file not found: {waitlist}')

try:
with open(waitlist, 'r') as f:
self.file_users = [line.strip() for line in f if line.strip()]
logger.info(
f'Successfully loaded {len(self.file_users)} users from {waitlist}'
)
except Exception as e:
logger.error(f'Error reading user list file {waitlist}: {str(e)}')

def _init_sheets_client(self) -> None:
"""Initialize Google Sheets client if configured"""
sheet_id = os.getenv('GITHUB_USERS_SHEET_ID')

if not sheet_id:
logger.info('GITHUB_USERS_SHEET_ID not configured')
return

logger.info('Initializing Google Sheets integration')
self.sheets_client = GoogleSheetsClient()
self.spreadsheet_id = sheet_id

def is_user_allowed(self, username: str) -> bool:
"""Check if user is allowed based on file and/or sheet configuration"""
if not self.file_users and not self.sheets_client:
logger.debug('No verification sources configured - allowing all users')
return True
logger.info(f'Checking if GitHub user {username} is allowed')

if self.file_users:
if username in self.file_users:
logger.info(f'User {username} found in text file allowlist')
return True
logger.debug(f'User {username} not found in text file allowlist')

if self.sheets_client and self.spreadsheet_id:
sheet_users = self.sheets_client.get_usernames(self.spreadsheet_id)
if username in sheet_users:
logger.info(f'User {username} found in Google Sheets allowlist')
return True
logger.debug(f'User {username} not found in Google Sheets allowlist')

logger.info(f'User {username} not found in any allowlist')
return False


load_github_user_list()
# Global instance of user verifier
user_verifier = UserVerifier()


async def authenticate_github_user(auth_token) -> bool:
logger.info('Checking GitHub token')
if not GITHUB_USER_LIST:
return True

if not auth_token:
logger.warning('No GitHub token provided')
return False

login, error = await get_github_user(auth_token)
if error:
logger.warning(f'Invalid GitHub token: {error}')
return False
if login not in GITHUB_USER_LIST:
login = await get_github_user(auth_token)

if not user_verifier.is_user_allowed(login):
logger.warning(f'GitHub user {login} not in allow list')
return False

logger.info(f'GitHub user {login} authenticated')
return True


async def get_github_user(token: str) -> tuple[str | None, str | None]:
async def get_github_user(token: str) -> str:
"""Get GitHub user info from token.
Args:
Expand All @@ -52,21 +108,17 @@ async def get_github_user(token: str) -> tuple[str | None, str | None]:
If successful, error_message is None
If failed, login is None and error_message contains the error
"""
logger.info('Fetching GitHub user info from token')
headers = {
'Accept': 'application/vnd.github+json',
'Authorization': f'Bearer {token}',
'X-GitHub-Api-Version': '2022-11-28',
}
try:
async with httpx.AsyncClient() as client:
response = await client.get('https://api.github.com/user', headers=headers)
if response.status_code == 200:
user_data = response.json()
return user_data.get('login'), None
else:
return (
None,
f'GitHub API error: {response.status_code} - {response.text}',
)
except Exception as e:
return None, f'Error connecting to GitHub: {str(e)}'
async with httpx.AsyncClient() as client:
logger.debug('Making request to GitHub API')
response = await client.get('https://api.github.com/user', headers=headers)
response.raise_for_status()
user_data = response.json()
login = user_data.get('login')
logger.info(f'Successfully retrieved GitHub user: {login}')
return login
68 changes: 68 additions & 0 deletions openhands/server/sheets_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from typing import List

from google.auth import default
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError

from openhands.core.logger import openhands_logger as logger


class GoogleSheetsClient:
def __init__(self):
"""Initialize Google Sheets client using workload identity.
Uses application default credentials which supports workload identity when running in GCP.
"""
logger.info('Initializing Google Sheets client with workload identity')
try:
credentials, project = default(
scopes=['https://www.googleapis.com/auth/spreadsheets.readonly']
)
logger.info(f'Successfully obtained credentials for project: {project}')
self.service = build('sheets', 'v4', credentials=credentials)
logger.info('Successfully initialized Google Sheets API service')
except Exception as e:
logger.error(f'Failed to initialize Google Sheets client: {str(e)}')
self.service = None

def get_usernames(self, spreadsheet_id: str, range_name: str = 'A:A') -> List[str]:
"""Get list of usernames from specified Google Sheet.
Args:
spreadsheet_id: The ID of the Google Sheet
range_name: The A1 notation of the range to fetch
Returns:
List of usernames from the sheet
"""
if not self.service:
logger.error('Google Sheets service not initialized')
return []

try:
logger.info(
f'Fetching usernames from sheet {spreadsheet_id}, range {range_name}'
)
result = (
self.service.spreadsheets()
.values()
.get(spreadsheetId=spreadsheet_id, range=range_name)
.execute()
)

values = result.get('values', [])
usernames = [
str(cell[0]).strip() for cell in values if cell and cell[0].strip()
]
logger.info(
f'Successfully fetched {len(usernames)} usernames from Google Sheet'
)
return usernames

except HttpError as err:
logger.error(f'Error accessing Google Sheet {spreadsheet_id}: {err}')
return []
except Exception as e:
logger.error(
f'Unexpected error accessing Google Sheet {spreadsheet_id}: {str(e)}'
)
return []
22 changes: 20 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ datasets = "*"
pandas = "*"
litellm = "^1.51.1"
google-generativeai = "*" # To use litellm with Gemini Pro API
google-api-python-client = "*" # For Google Sheets API
google-auth-httplib2 = "*" # For Google Sheets authentication
google-auth-oauthlib = "*" # For Google Sheets OAuth
termcolor = "*"
seaborn = "*"
docker = "*"
Expand Down

0 comments on commit b27fabe

Please sign in to comment.