diff --git a/backend/ee/onyx/db/connector_credential_pair.py b/backend/ee/onyx/db/connector_credential_pair.py index 0ba1a6f2b6b..0ffbcd48002 100644 --- a/backend/ee/onyx/db/connector_credential_pair.py +++ b/backend/ee/onyx/db/connector_credential_pair.py @@ -4,6 +4,7 @@ from onyx.configs.constants import DocumentSource from onyx.db.connector_credential_pair import get_connector_credential_pair from onyx.db.enums import AccessType +from onyx.db.enums import ConnectorCredentialPairStatus from onyx.db.models import Connector from onyx.db.models import ConnectorCredentialPair from onyx.db.models import UserGroup__ConnectorCredentialPair @@ -36,6 +37,7 @@ def get_cc_pairs_by_source( db_session: Session, source_type: DocumentSource, only_sync: bool, + only_valid: bool, ) -> list[ConnectorCredentialPair]: """ Get all cc_pairs for a given source type (and optionally only sync) @@ -51,6 +53,11 @@ def get_cc_pairs_by_source( if only_sync: query = query.filter(ConnectorCredentialPair.access_type == AccessType.SYNC) + if only_valid: + query = query.filter( + ConnectorCredentialPair.status == ConnectorCredentialPairStatus.ACTIVE + ) + cc_pairs = query.all() return cc_pairs diff --git a/backend/ee/onyx/external_permissions/google_drive/doc_sync.py b/backend/ee/onyx/external_permissions/google_drive/doc_sync.py index 60ec0d3ecbc..8eaa7813f3b 100644 --- a/backend/ee/onyx/external_permissions/google_drive/doc_sync.py +++ b/backend/ee/onyx/external_permissions/google_drive/doc_sync.py @@ -62,12 +62,14 @@ def _fetch_permissions_for_permission_ids( user_email=(owner_email or google_drive_connector.primary_admin_email), ) + # We continue on 404 or 403 because the document may not exist or the user may not have access to it fetched_permissions = execute_paginated_retrieval( retrieval_function=drive_service.permissions().list, list_key="permissions", fileId=doc_id, fields="permissions(id, emailAddress, type, domain)", supportsAllDrives=True, + continue_on_404_or_403=True, ) permissions_for_doc_id = [] @@ -104,7 +106,13 @@ def _get_permissions_from_slim_doc( user_emails: set[str] = set() group_emails: set[str] = set() public = False + skipped_permissions = 0 + for permission in permissions_list: + if not permission: + skipped_permissions += 1 + continue + permission_type = permission["type"] if permission_type == "user": user_emails.add(permission["emailAddress"]) @@ -121,6 +129,9 @@ def _get_permissions_from_slim_doc( elif permission_type == "anyone": public = True + logger.info( + f"Skipped {skipped_permissions} permissions for document {slim_doc.id}– {len(permissions_list)} permissions fetched" + ) drive_id = permission_info.get("drive_id") group_ids = group_emails | ({drive_id} if drive_id is not None else set()) diff --git a/backend/onyx/background/celery/tasks/doc_permission_syncing/tasks.py b/backend/onyx/background/celery/tasks/doc_permission_syncing/tasks.py index d1264f1fcb5..cd91c359863 100644 --- a/backend/onyx/background/celery/tasks/doc_permission_syncing/tasks.py +++ b/backend/onyx/background/celery/tasks/doc_permission_syncing/tasks.py @@ -42,8 +42,10 @@ from onyx.configs.constants import OnyxRedisConstants from onyx.configs.constants import OnyxRedisLocks from onyx.configs.constants import OnyxRedisSignals +from onyx.connectors.factory import validate_ccpair_for_user from onyx.db.connector import mark_cc_pair_as_permissions_synced from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id +from onyx.db.connector_credential_pair import update_connector_credential_pair from onyx.db.document import upsert_document_by_connector_credential_pair from onyx.db.engine import get_session_with_current_tenant from onyx.db.enums import AccessType @@ -193,12 +195,19 @@ def check_for_doc_permissions_sync(self: Task, *, tenant_id: str) -> bool | None monitor_ccpair_permissions_taskset( tenant_id, key_bytes, r, db_session ) + task_logger.info(f"check_for_doc_permissions_sync finished: tenant={tenant_id}") except SoftTimeLimitExceeded: task_logger.info( "Soft time limit exceeded, task is being terminated gracefully." ) - except Exception: - task_logger.exception(f"Unexpected exception: tenant={tenant_id}") + except Exception as e: + error_msg = str(e).replace("\n", " ") + task_logger.warning( + f"Unexpected check_for_doc_permissions_sync exception: tenant={tenant_id} {error_msg}" + ) + task_logger.exception( + f"Unexpected check_for_doc_permissions_sync exception: tenant={tenant_id}" + ) finally: if lock_beat.owned(): lock_beat.release() @@ -282,13 +291,19 @@ def try_creating_permissions_sync_task( redis_connector.permissions.set_fence(payload) payload_id = payload.id - except Exception: - task_logger.exception(f"Unexpected exception: cc_pair={cc_pair_id}") + except Exception as e: + error_msg = str(e).replace("\n", " ") + task_logger.warning( + f"Unexpected try_creating_permissions_sync_task exception: cc_pair={cc_pair_id} {error_msg}" + ) return None finally: if lock.owned(): lock.release() + task_logger.info( + f"try_creating_permissions_sync_task finished: cc_pair={cc_pair_id} payload_id={payload_id}" + ) return payload_id @@ -388,6 +403,30 @@ def connector_permission_sync_generator_task( f"No connector credential pair found for id: {cc_pair_id}" ) + try: + created = validate_ccpair_for_user( + cc_pair.connector.id, + cc_pair.credential.id, + db_session, + tenant_id, + enforce_creation=False, + ) + if not created: + task_logger.warning( + f"Unable to create connector credential pair for id: {cc_pair_id}" + ) + except Exception: + task_logger.exception( + f"validate_ccpair_permissions_sync exceptioned: cc_pair={cc_pair_id}" + ) + update_connector_credential_pair( + db_session=db_session, + connector_id=cc_pair.connector.id, + credential_id=cc_pair.credential.id, + status=ConnectorCredentialPairStatus.INVALID, + ) + raise + source_type = cc_pair.connector.source doc_sync_func = DOC_PERMISSIONS_FUNC_MAP.get(source_type) @@ -439,6 +478,10 @@ def connector_permission_sync_generator_task( redis_connector.permissions.generator_complete = tasks_generated except Exception as e: + error_msg = str(e).replace("\n", " ") + task_logger.warning( + f"Permission sync exceptioned: cc_pair={cc_pair_id} payload_id={payload_id} {error_msg}" + ) task_logger.exception( f"Permission sync exceptioned: cc_pair={cc_pair_id} payload_id={payload_id}" ) @@ -512,13 +555,20 @@ def update_external_document_permissions_task( f"elapsed={elapsed:.2f}" ) - except Exception: + except Exception as e: + error_msg = str(e).replace("\n", " ") + task_logger.warning( + f"Exception in update_external_document_permissions_task: connector_id={connector_id} doc_id={doc_id} {error_msg}" + ) task_logger.exception( f"Exception in update_external_document_permissions_task: " f"connector_id={connector_id} doc_id={doc_id}" ) return False + task_logger.info( + f"update_external_document_permissions_task finished: connector_id={connector_id} doc_id={doc_id}" + ) return True diff --git a/backend/onyx/background/celery/tasks/external_group_syncing/tasks.py b/backend/onyx/background/celery/tasks/external_group_syncing/tasks.py index 50bdb5f32e4..0d46197e761 100644 --- a/backend/onyx/background/celery/tasks/external_group_syncing/tasks.py +++ b/backend/onyx/background/celery/tasks/external_group_syncing/tasks.py @@ -37,8 +37,11 @@ from onyx.configs.constants import OnyxRedisConstants from onyx.configs.constants import OnyxRedisLocks from onyx.configs.constants import OnyxRedisSignals +from onyx.connectors.exceptions import ConnectorValidationError +from onyx.connectors.factory import validate_ccpair_for_user from onyx.db.connector import mark_cc_pair_as_external_group_synced from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id +from onyx.db.connector_credential_pair import update_connector_credential_pair from onyx.db.engine import get_session_with_current_tenant from onyx.db.enums import AccessType from onyx.db.enums import ConnectorCredentialPairStatus @@ -148,7 +151,7 @@ def check_for_external_group_sync(self: Task, *, tenant_id: str | None) -> bool for source in GROUP_PERMISSIONS_IS_CC_PAIR_AGNOSTIC: # These are ordered by cc_pair id so the first one is the one we want cc_pairs_to_dedupe = get_cc_pairs_by_source( - db_session, source, only_sync=True + db_session, source, only_sync=True, only_valid=True ) # We only want to sync one cc_pair per source type # in GROUP_PERMISSIONS_IS_CC_PAIR_AGNOSTIC so we dedupe here @@ -195,12 +198,17 @@ def check_for_external_group_sync(self: Task, *, tenant_id: str | None) -> bool task_logger.info( "Soft time limit exceeded, task is being terminated gracefully." ) - except Exception: + except Exception as e: + error_msg = str(e).replace("\n", " ") + task_logger.warning( + f"Unexpected check_for_external_group_sync exception: tenant={tenant_id} {error_msg}" + ) task_logger.exception(f"Unexpected exception: tenant={tenant_id}") finally: if lock_beat.owned(): lock_beat.release() + task_logger.info(f"check_for_external_group_sync finished: tenant={tenant_id}") return True @@ -267,12 +275,19 @@ def try_creating_external_group_sync_task( redis_connector.external_group_sync.set_fence(payload) payload_id = payload.id - except Exception: + except Exception as e: + error_msg = str(e).replace("\n", " ") + task_logger.warning( + f"Unexpected try_creating_external_group_sync_task exception: cc_pair={cc_pair_id} {error_msg}" + ) task_logger.exception( f"Unexpected exception while trying to create external group sync task: cc_pair={cc_pair_id}" ) return None + task_logger.info( + f"try_creating_external_group_sync_task finished: cc_pair={cc_pair_id} payload_id={payload_id}" + ) return payload_id @@ -361,12 +376,37 @@ def connector_external_group_sync_generator_task( cc_pair = get_connector_credential_pair_from_id( db_session=db_session, cc_pair_id=cc_pair_id, + eager_load_credential=True, ) if cc_pair is None: raise ValueError( f"No connector credential pair found for id: {cc_pair_id}" ) + try: + created = validate_ccpair_for_user( + cc_pair.connector.id, + cc_pair.credential.id, + db_session, + tenant_id, + enforce_creation=False, + ) + if not created: + task_logger.warning( + f"Unable to create connector credential pair for id: {cc_pair_id}" + ) + except Exception: + task_logger.exception( + f"validate_ccpair_permissions_sync exceptioned: cc_pair={cc_pair_id}" + ) + update_connector_credential_pair( + db_session=db_session, + connector_id=cc_pair.connector.id, + credential_id=cc_pair.credential.id, + status=ConnectorCredentialPairStatus.INVALID, + ) + raise + source_type = cc_pair.connector.source ext_group_sync_func = GROUP_PERMISSIONS_FUNC_MAP.get(source_type) @@ -378,8 +418,18 @@ def connector_external_group_sync_generator_task( logger.info( f"Syncing external groups for {source_type} for cc_pair: {cc_pair_id}" ) - - external_user_groups: list[ExternalUserGroup] = ext_group_sync_func(cc_pair) + external_user_groups: list[ExternalUserGroup] = [] + try: + external_user_groups = ext_group_sync_func(cc_pair) + except ConnectorValidationError as e: + msg = f"Error syncing external groups for {source_type} for cc_pair: {cc_pair_id} {e}" + update_connector_credential_pair( + db_session=db_session, + connector_id=cc_pair.connector.id, + credential_id=cc_pair.credential.id, + status=ConnectorCredentialPairStatus.INVALID, + ) + raise e logger.info( f"Syncing {len(external_user_groups)} external user groups for {source_type}" @@ -405,6 +455,14 @@ def connector_external_group_sync_generator_task( sync_status=SyncStatus.SUCCESS, ) except Exception as e: + error_msg = str(e).replace("\n", " ") + task_logger.warning( + f"External group sync exceptioned: cc_pair={cc_pair_id} payload_id={payload.id} {error_msg}" + ) + task_logger.exception( + f"External group sync exceptioned: cc_pair={cc_pair_id} payload_id={payload.id}" + ) + msg = f"External group sync exceptioned: cc_pair={cc_pair_id} payload_id={payload.id}" task_logger.exception(msg) emit_background_error(msg + f"\n\n{e}", cc_pair_id=cc_pair_id) diff --git a/backend/onyx/background/celery/tasks/indexing/tasks.py b/backend/onyx/background/celery/tasks/indexing/tasks.py index eb4284ff1eb..3e313a3f094 100644 --- a/backend/onyx/background/celery/tasks/indexing/tasks.py +++ b/backend/onyx/background/celery/tasks/indexing/tasks.py @@ -48,7 +48,7 @@ from onyx.configs.constants import OnyxRedisConstants from onyx.configs.constants import OnyxRedisLocks from onyx.configs.constants import OnyxRedisSignals -from onyx.connectors.interfaces import ConnectorValidationError +from onyx.connectors.exceptions import ConnectorValidationError from onyx.db.connector import mark_ccpair_with_indexing_trigger from onyx.db.connector_credential_pair import fetch_connector_credential_pairs from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id @@ -77,6 +77,7 @@ from shared_configs.configs import INDEXING_MODEL_SERVER_PORT from shared_configs.configs import MULTI_TENANT from shared_configs.configs import SENTRY_DSN +from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR logger = setup_logger() @@ -617,6 +618,12 @@ def connector_indexing_task( This will cause the primary worker to abort the indexing attempt and clean up. """ + tenant_id_from_context = CURRENT_TENANT_ID_CONTEXTVAR.get() + logger.info(f"connector_indexing_task with tenant_id={tenant_id_from_context}") + logger.info( + f"connector_indexing_task with args={index_attempt_id}, {cc_pair_id}, {search_settings_id}, {is_ee}, {tenant_id}" + ) + # Since connector_indexing_proxy_task spawns a new process using this function as # the entrypoint, we init Sentry here. if SENTRY_DSN: @@ -924,6 +931,7 @@ def connector_indexing_proxy_task( task_logger.error("self.request.id is None!") client = SimpleJobClient() + task_logger.info(f"submitting connector_indexing_task with tenant_id={tenant_id}") job = client.submit( connector_indexing_task, @@ -1070,6 +1078,7 @@ def connector_indexing_proxy_task( if not index_attempt.is_finished(): continue + except Exception: # if the DB exceptioned, just restart the check. # polling the index attempt status doesn't need to be strongly consistent @@ -1079,6 +1088,7 @@ def connector_indexing_proxy_task( ) ) continue + except Exception as e: result.status = IndexingWatchdogTerminalStatus.WATCHDOG_EXCEPTIONED if isinstance(e, ConnectorValidationError): diff --git a/backend/onyx/background/celery/tasks/pruning/tasks.py b/backend/onyx/background/celery/tasks/pruning/tasks.py index 2cb6e26baeb..3a3c32562b3 100644 --- a/backend/onyx/background/celery/tasks/pruning/tasks.py +++ b/backend/onyx/background/celery/tasks/pruning/tasks.py @@ -194,12 +194,14 @@ def check_for_pruning(self: Task, *, tenant_id: str | None) -> bool | None: task_logger.info( "Soft time limit exceeded, task is being terminated gracefully." ) - except Exception: + except Exception as e: + error_msg = str(e).replace("\n", " ") + task_logger.warning(f"Unexpected pruning check exception: {error_msg}") task_logger.exception("Unexpected exception during pruning check") finally: if lock_beat.owned(): lock_beat.release() - + task_logger.info(f"check_for_pruning finished: tenant={tenant_id}") return True @@ -301,13 +303,19 @@ def try_creating_prune_generator_task( redis_connector.prune.set_fence(payload) payload_id = payload.id - except Exception: + except Exception as e: + error_msg = str(e).replace("\n", " ") + task_logger.warning( + f"Unexpected try_creating_prune_generator_task exception: cc_pair={cc_pair.id} {error_msg}" + ) task_logger.exception(f"Unexpected exception: cc_pair={cc_pair.id}") return None finally: if lock.owned(): lock.release() - + task_logger.info( + f"try_creating_prune_generator_task finished: cc_pair={cc_pair.id} payload_id={payload_id}" + ) return payload_id diff --git a/backend/onyx/background/celery/tasks/shared/tasks.py b/backend/onyx/background/celery/tasks/shared/tasks.py index ec531fbc3f2..64da1e514a6 100644 --- a/backend/onyx/background/celery/tasks/shared/tasks.py +++ b/backend/onyx/background/celery/tasks/shared/tasks.py @@ -192,6 +192,8 @@ def document_by_cc_pair_cleanup_task( ) return False + error_msg = str(e).replace("\n", " ") + task_logger.warning(f"Unexpected exception: doc={document_id} {error_msg}") task_logger.exception(f"Unexpected exception: doc={document_id}") if self.request.retries < DOCUMENT_BY_CC_PAIR_CLEANUP_MAX_RETRIES: @@ -219,6 +221,7 @@ def document_by_cc_pair_cleanup_task( mark_document_as_modified(document_id, db_session) return False + task_logger.info(f"document_by_cc_pair_cleanup_task finished: doc={document_id}") return True diff --git a/backend/onyx/background/indexing/job_client.py b/backend/onyx/background/indexing/job_client.py index cb3b62c1842..1a51bf8e474 100644 --- a/backend/onyx/background/indexing/job_client.py +++ b/backend/onyx/background/indexing/job_client.py @@ -17,6 +17,9 @@ from onyx.configs.constants import POSTGRES_CELERY_WORKER_INDEXING_CHILD_APP_NAME from onyx.db.engine import SqlEngine from onyx.utils.logger import setup_logger +from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA +from shared_configs.configs import TENANT_ID_PREFIX +from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR logger = setup_logger() @@ -54,6 +57,15 @@ def _initializer( kwargs = {} logger.info("Initializing spawned worker child process.") + # 1. Get tenant_id from args or fallback to default + tenant_id = POSTGRES_DEFAULT_SCHEMA + for arg in reversed(args): + if arg.startswith(TENANT_ID_PREFIX): + tenant_id = arg + break + + # 2. Set the tenant context before running anything + token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id) # Reset the engine in the child process SqlEngine.reset_engine() @@ -81,6 +93,8 @@ def _initializer( queue.put(error_msg) # Send the exception to the parent process sys.exit(255) # use 255 to indicate a generic exception + finally: + CURRENT_TENANT_ID_CONTEXTVAR.reset(token) def _run_in_process( diff --git a/backend/onyx/background/indexing/run_indexing.py b/backend/onyx/background/indexing/run_indexing.py index 41823754a94..4b07e83d200 100644 --- a/backend/onyx/background/indexing/run_indexing.py +++ b/backend/onyx/background/indexing/run_indexing.py @@ -20,8 +20,8 @@ from onyx.configs.constants import DocumentSource from onyx.configs.constants import MilestoneRecordType from onyx.connectors.connector_runner import ConnectorRunner +from onyx.connectors.exceptions import ConnectorValidationError from onyx.connectors.factory import instantiate_connector -from onyx.connectors.interfaces import ConnectorValidationError from onyx.connectors.models import ConnectorCheckpoint from onyx.connectors.models import ConnectorFailure from onyx.connectors.models import Document diff --git a/backend/onyx/connectors/blob/connector.py b/backend/onyx/connectors/blob/connector.py index 674f1f82b7f..90de5752d6a 100644 --- a/backend/onyx/connectors/blob/connector.py +++ b/backend/onyx/connectors/blob/connector.py @@ -7,11 +7,18 @@ import boto3 # type: ignore from botocore.client import Config # type: ignore +from botocore.exceptions import ClientError +from botocore.exceptions import NoCredentialsError +from botocore.exceptions import PartialCredentialsError from mypy_boto3_s3 import S3Client # type: ignore from onyx.configs.app_configs import INDEX_BATCH_SIZE from onyx.configs.constants import BlobType from onyx.configs.constants import DocumentSource +from onyx.connectors.exceptions import ConnectorValidationError +from onyx.connectors.exceptions import CredentialExpiredError +from onyx.connectors.exceptions import InsufficientPermissionsError +from onyx.connectors.exceptions import UnexpectedError from onyx.connectors.interfaces import GenerateDocumentsOutput from onyx.connectors.interfaces import LoadConnector from onyx.connectors.interfaces import PollConnector @@ -240,6 +247,73 @@ def poll_source( return None + def validate_connector_settings(self) -> None: + if self.s3_client is None: + raise ConnectorMissingCredentialError( + "Blob storage credentials not loaded." + ) + + if not self.bucket_name: + raise ConnectorValidationError( + "No bucket name was provided in connector settings." + ) + + try: + # We only fetch one object/page as a light-weight validation step. + # This ensures we trigger typical S3 permission checks (ListObjectsV2, etc.). + self.s3_client.list_objects_v2( + Bucket=self.bucket_name, Prefix=self.prefix, MaxKeys=1 + ) + + except NoCredentialsError: + raise ConnectorMissingCredentialError( + "No valid blob storage credentials found or provided to boto3." + ) + except PartialCredentialsError: + raise ConnectorMissingCredentialError( + "Partial or incomplete blob storage credentials provided to boto3." + ) + except ClientError as e: + error_code = e.response["Error"].get("Code", "") + status_code = e.response["ResponseMetadata"].get("HTTPStatusCode") + + # Most common S3 error cases + if error_code in [ + "AccessDenied", + "InvalidAccessKeyId", + "SignatureDoesNotMatch", + ]: + if status_code == 403 or error_code == "AccessDenied": + raise InsufficientPermissionsError( + f"Insufficient permissions to list objects in bucket '{self.bucket_name}'. " + "Please check your bucket policy and/or IAM policy." + ) + if status_code == 401 or error_code == "SignatureDoesNotMatch": + raise CredentialExpiredError( + "Provided blob storage credentials appear invalid or expired." + ) + + raise CredentialExpiredError( + f"Credential issue encountered ({error_code})." + ) + + if error_code == "NoSuchBucket" or status_code == 404: + raise ConnectorValidationError( + f"Bucket '{self.bucket_name}' does not exist or cannot be found." + ) + + raise ConnectorValidationError( + f"Unexpected S3 client error (code={error_code}, status={status_code}): {e}" + ) + + except Exception as e: + # Catch-all for anything not captured by the above + # Since we are unsure of the error and it may not disable the connector, + # raise an unexpected error (does not disable connector) + raise UnexpectedError( + f"Unexpected error during blob storage settings validation: {e}" + ) + if __name__ == "__main__": credentials_dict = { diff --git a/backend/onyx/connectors/bookstack/connector.py b/backend/onyx/connectors/bookstack/connector.py index 290c9b79f77..c49d7d469af 100644 --- a/backend/onyx/connectors/bookstack/connector.py +++ b/backend/onyx/connectors/bookstack/connector.py @@ -9,10 +9,10 @@ from onyx.connectors.bookstack.client import BookStackApiClient from onyx.connectors.bookstack.client import BookStackClientRequestFailedError from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc -from onyx.connectors.interfaces import ConnectorValidationError -from onyx.connectors.interfaces import CredentialExpiredError +from onyx.connectors.exceptions import ConnectorValidationError +from onyx.connectors.exceptions import CredentialExpiredError +from onyx.connectors.exceptions import InsufficientPermissionsError from onyx.connectors.interfaces import GenerateDocumentsOutput -from onyx.connectors.interfaces import InsufficientPermissionsError from onyx.connectors.interfaces import LoadConnector from onyx.connectors.interfaces import PollConnector from onyx.connectors.interfaces import SecondsSinceUnixEpoch diff --git a/backend/onyx/connectors/confluence/connector.py b/backend/onyx/connectors/confluence/connector.py index 88110098057..3188538b12a 100644 --- a/backend/onyx/connectors/confluence/connector.py +++ b/backend/onyx/connectors/confluence/connector.py @@ -4,6 +4,8 @@ from typing import Any from urllib.parse import quote +from requests.exceptions import HTTPError + from onyx.configs.app_configs import CONFLUENCE_CONNECTOR_LABELS_TO_SKIP from onyx.configs.app_configs import CONFLUENCE_TIMEZONE_OFFSET from onyx.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE @@ -16,6 +18,10 @@ from onyx.connectors.confluence.utils import datetime_from_string from onyx.connectors.confluence.utils import extract_text_from_confluence_html from onyx.connectors.confluence.utils import validate_attachment_filetype +from onyx.connectors.exceptions import ConnectorValidationError +from onyx.connectors.exceptions import CredentialExpiredError +from onyx.connectors.exceptions import InsufficientPermissionsError +from onyx.connectors.exceptions import UnexpectedError from onyx.connectors.interfaces import GenerateDocumentsOutput from onyx.connectors.interfaces import GenerateSlimDocumentOutput from onyx.connectors.interfaces import LoadConnector @@ -30,6 +36,8 @@ from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface from onyx.utils.logger import setup_logger +# from onyx.connectors.exceptions import ConnectorValidationError + logger = setup_logger() # Potential Improvements @@ -397,3 +405,34 @@ def retrieve_all_slim_documents( callback.progress("retrieve_all_slim_documents", 1) yield doc_metadata_list + + def validate_connector_settings(self) -> None: + if self._confluence_client is None: + raise ConnectorMissingCredentialError("Confluence credentials not loaded.") + + try: + spaces = self._confluence_client.get_all_spaces(limit=1) + except HTTPError as e: + status_code = e.response.status_code if e.response else None + if status_code == 401: + raise CredentialExpiredError( + "Invalid or expired Confluence credentials (HTTP 401)." + ) + elif status_code == 403: + raise InsufficientPermissionsError( + "Insufficient permissions to access Confluence resources (HTTP 403)." + ) + else: + raise UnexpectedError( + f"Unexpected Confluence error (status={status_code}): {e}" + ) + except Exception as e: + raise UnexpectedError( + f"Unexpected error while validating Confluence settings: {e}" + ) + + if not spaces or not spaces.get("results"): + raise ConnectorValidationError( + "No Confluence spaces found. Either your credentials lack permissions, or " + "there truly are no spaces in this Confluence instance." + ) diff --git a/backend/onyx/connectors/confluence/onyx_confluence.py b/backend/onyx/connectors/confluence/onyx_confluence.py index 728bf206520..8f36534724a 100644 --- a/backend/onyx/connectors/confluence/onyx_confluence.py +++ b/backend/onyx/connectors/confluence/onyx_confluence.py @@ -11,6 +11,7 @@ from pydantic import BaseModel from requests import HTTPError +from onyx.connectors.exceptions import ConnectorValidationError from onyx.utils.logger import setup_logger logger = setup_logger() @@ -508,11 +509,15 @@ def build_confluence_client( is_cloud: bool, wiki_base: str, ) -> OnyxConfluence: - _validate_connector_configuration( - credentials=credentials, - is_cloud=is_cloud, - wiki_base=wiki_base, - ) + try: + _validate_connector_configuration( + credentials=credentials, + is_cloud=is_cloud, + wiki_base=wiki_base, + ) + except Exception as e: + raise ConnectorValidationError(str(e)) + return OnyxConfluence( api_version="cloud" if is_cloud else "latest", # Remove trailing slash from wiki_base if present diff --git a/backend/onyx/connectors/dropbox/connector.py b/backend/onyx/connectors/dropbox/connector.py index b26be1477cb..49cade07e1b 100644 --- a/backend/onyx/connectors/dropbox/connector.py +++ b/backend/onyx/connectors/dropbox/connector.py @@ -10,10 +10,10 @@ from onyx.configs.app_configs import INDEX_BATCH_SIZE from onyx.configs.constants import DocumentSource -from onyx.connectors.interfaces import ConnectorValidationError -from onyx.connectors.interfaces import CredentialInvalidError +from onyx.connectors.exceptions import ConnectorValidationError +from onyx.connectors.exceptions import CredentialInvalidError +from onyx.connectors.exceptions import InsufficientPermissionsError from onyx.connectors.interfaces import GenerateDocumentsOutput -from onyx.connectors.interfaces import InsufficientPermissionsError from onyx.connectors.interfaces import LoadConnector from onyx.connectors.interfaces import PollConnector from onyx.connectors.interfaces import SecondsSinceUnixEpoch diff --git a/backend/onyx/connectors/exceptions.py b/backend/onyx/connectors/exceptions.py new file mode 100644 index 00000000000..93239cbf7f8 --- /dev/null +++ b/backend/onyx/connectors/exceptions.py @@ -0,0 +1,49 @@ +class ValidationError(Exception): + """General exception for validation errors.""" + + def __init__(self, message: str): + self.message = message + super().__init__(self.message) + + +class ConnectorValidationError(ValidationError): + """General exception for connector validation errors.""" + + def __init__(self, message: str): + self.message = message + super().__init__(self.message) + + +class UnexpectedError(ValidationError): + """Raised when an unexpected error occurs during connector validation. + + Unexpected errors don't necessarily mean the credential is invalid, + but rather that there was an error during the validation process + or we encountered a currently unhandled error case. + """ + + def __init__(self, message: str = "Unexpected error during connector validation"): + super().__init__(message) + + +class CredentialInvalidError(ConnectorValidationError): + """Raised when a connector's credential is invalid.""" + + def __init__(self, message: str = "Credential is invalid"): + super().__init__(message) + + +class CredentialExpiredError(ConnectorValidationError): + """Raised when a connector's credential is expired.""" + + def __init__(self, message: str = "Credential has expired"): + super().__init__(message) + + +class InsufficientPermissionsError(ConnectorValidationError): + """Raised when the credential does not have sufficient API permissions.""" + + def __init__( + self, message: str = "Insufficient permissions for the requested operation" + ): + super().__init__(message) diff --git a/backend/onyx/connectors/factory.py b/backend/onyx/connectors/factory.py index b4f497f65dd..bc77aebb425 100644 --- a/backend/onyx/connectors/factory.py +++ b/backend/onyx/connectors/factory.py @@ -17,6 +17,7 @@ from onyx.connectors.document360.connector import Document360Connector from onyx.connectors.dropbox.connector import DropboxConnector from onyx.connectors.egnyte.connector import EgnyteConnector +from onyx.connectors.exceptions import ConnectorValidationError from onyx.connectors.file.connector import LocalFileConnector from onyx.connectors.fireflies.connector import FirefliesConnector from onyx.connectors.freshdesk.connector import FreshdeskConnector @@ -31,7 +32,6 @@ from onyx.connectors.hubspot.connector import HubSpotConnector from onyx.connectors.interfaces import BaseConnector from onyx.connectors.interfaces import CheckpointConnector -from onyx.connectors.interfaces import ConnectorValidationError from onyx.connectors.interfaces import EventConnector from onyx.connectors.interfaces import LoadConnector from onyx.connectors.interfaces import PollConnector @@ -55,9 +55,8 @@ from onyx.connectors.zulip.connector import ZulipConnector from onyx.db.connector import fetch_connector_by_id from onyx.db.credentials import backend_update_credential_json -from onyx.db.credentials import fetch_credential_by_id_for_user +from onyx.db.credentials import fetch_credential_by_id from onyx.db.models import Credential -from onyx.db.models import User class ConnectorMissingException(Exception): @@ -184,23 +183,21 @@ def validate_ccpair_for_user( connector_id: int, credential_id: int, db_session: Session, - user: User | None, tenant_id: str | None, -) -> None: + enforce_creation: bool = True, +) -> bool: # Validate the connector settings connector = fetch_connector_by_id(connector_id, db_session) - credential = fetch_credential_by_id_for_user( + credential = fetch_credential_by_id( credential_id, - user, db_session, - get_editable=False, ) if not connector: raise ValueError("Connector not found") if connector.source == DocumentSource.INGESTION_API: - return + return False if not credential: raise ValueError("Credential not found") @@ -214,7 +211,13 @@ def validate_ccpair_for_user( credential=credential, tenant_id=tenant_id, ) + except ConnectorValidationError as e: + raise e except Exception as e: - raise ConnectorValidationError(str(e)) + if enforce_creation: + raise ConnectorValidationError(str(e)) + else: + return False runnable_connector.validate_connector_settings() + return True diff --git a/backend/onyx/connectors/github/connector.py b/backend/onyx/connectors/github/connector.py index 437651e97c1..531fc36a16d 100644 --- a/backend/onyx/connectors/github/connector.py +++ b/backend/onyx/connectors/github/connector.py @@ -17,14 +17,14 @@ from onyx.configs.app_configs import GITHUB_CONNECTOR_BASE_URL from onyx.configs.app_configs import INDEX_BATCH_SIZE from onyx.configs.constants import DocumentSource -from onyx.connectors.interfaces import ConnectorValidationError -from onyx.connectors.interfaces import CredentialExpiredError +from onyx.connectors.exceptions import ConnectorValidationError +from onyx.connectors.exceptions import CredentialExpiredError +from onyx.connectors.exceptions import InsufficientPermissionsError +from onyx.connectors.exceptions import UnexpectedError from onyx.connectors.interfaces import GenerateDocumentsOutput -from onyx.connectors.interfaces import InsufficientPermissionsError from onyx.connectors.interfaces import LoadConnector from onyx.connectors.interfaces import PollConnector from onyx.connectors.interfaces import SecondsSinceUnixEpoch -from onyx.connectors.interfaces import UnexpectedError from onyx.connectors.models import ConnectorMissingCredentialError from onyx.connectors.models import Document from onyx.connectors.models import Section diff --git a/backend/onyx/connectors/gmail/connector.py b/backend/onyx/connectors/gmail/connector.py index f76bf1a64b2..177264afd76 100644 --- a/backend/onyx/connectors/gmail/connector.py +++ b/backend/onyx/connectors/gmail/connector.py @@ -305,6 +305,7 @@ def _fetch_threads( userId=user_email, fields=THREAD_FIELDS, id=thread["id"], + continue_on_404_or_403=True, ) # full_threads is an iterator containing a single thread # so we need to convert it to a list and grab the first element @@ -336,6 +337,7 @@ def _fetch_slim_threads( userId=user_email, fields=THREAD_LIST_FIELDS, q=query, + continue_on_404_or_403=True, ): doc_batch.append( SlimDocument( diff --git a/backend/onyx/connectors/google_drive/connector.py b/backend/onyx/connectors/google_drive/connector.py index 1287a896076..29093d0ecaf 100644 --- a/backend/onyx/connectors/google_drive/connector.py +++ b/backend/onyx/connectors/google_drive/connector.py @@ -13,6 +13,9 @@ from onyx.configs.app_configs import INDEX_BATCH_SIZE from onyx.configs.app_configs import MAX_FILE_SIZE_BYTES from onyx.configs.constants import DocumentSource +from onyx.connectors.exceptions import ConnectorValidationError +from onyx.connectors.exceptions import CredentialExpiredError +from onyx.connectors.exceptions import InsufficientPermissionsError from onyx.connectors.google_drive.doc_conversion import build_slim_document from onyx.connectors.google_drive.doc_conversion import ( convert_drive_item_to_document, @@ -42,6 +45,7 @@ from onyx.connectors.interfaces import PollConnector from onyx.connectors.interfaces import SecondsSinceUnixEpoch from onyx.connectors.interfaces import SlimConnector +from onyx.connectors.models import ConnectorMissingCredentialError from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface from onyx.utils.logger import setup_logger from onyx.utils.retry_wrapper import retry_builder @@ -137,7 +141,7 @@ def __init__( "Please visit the docs for help with the new setup: " f"{SCOPE_DOC_URL}" ) - raise ValueError( + raise ConnectorValidationError( "Google Drive connector received old input parameters. " "Please visit the docs for help with the new setup: " f"{SCOPE_DOC_URL}" @@ -151,7 +155,7 @@ def __init__( and not my_drive_emails and not shared_drive_urls ): - raise ValueError( + raise ConnectorValidationError( "Nothing to index. Please specify at least one of the following: " "include_shared_drives, include_my_drives, include_files_shared_with_me, " "shared_folder_urls, or my_drive_emails" @@ -220,7 +224,15 @@ def creds(self) -> OAuthCredentials | ServiceAccountCredentials: return self._creds def load_credentials(self, credentials: dict[str, Any]) -> dict[str, str] | None: - self._primary_admin_email = credentials[DB_CREDENTIALS_PRIMARY_ADMIN_KEY] + try: + self._primary_admin_email = credentials[DB_CREDENTIALS_PRIMARY_ADMIN_KEY] + except KeyError: + logger.debug(f"Credentials: {credentials}") # TODO: remove + raise ValueError( + "Primary admin email missing, " + "should not call this property " + "before calling load_credentials" + ) self._creds, new_creds_dict = get_google_creds( credentials=credentials, @@ -602,3 +614,70 @@ def retrieve_all_slim_documents( if MISSING_SCOPES_ERROR_STR in str(e): raise PermissionError(ONYX_SCOPE_INSTRUCTIONS) from e raise e + + def validate_connector_settings(self) -> None: + """ + Validate that we can connect to Google Drive (and optionally Admin APIs) with the provided credentials. + Attempts a small listing of files to confirm scope and access. + + Raises: + ConnectorMissingCredentialError: If no credentials are loaded. + CredentialExpiredError: If credentials are invalid/expired (HTTP 401). + InsufficientPermissionsError: If we lack the Drive scope or are otherwise denied (HTTP 403). + ConnectorValidationError: Any other unexpected errors (e.g. missing domain, no files). + """ + + if self._creds is None: + raise ConnectorMissingCredentialError( + "Google Drive credentials not loaded." + ) + + if self._primary_admin_email is None: + raise ConnectorValidationError( + "Primary admin email not found in credentials. " + "Ensure DB_CREDENTIALS_PRIMARY_ADMIN_KEY is set." + ) + + try: + # Try a minimal file listing to confirm we have scope and valid credentials + drive_service = get_drive_service(self._creds, self._primary_admin_email) + response = ( + drive_service.files().list(pageSize=1, fields="files(id)").execute() + ) + + # If listing returns no files, that's OK for validation + # but we've at least confirmed we have the necessary scopes and can connect. + # If you *require* at least 1 file in Drive, you could handle that here. + _ = response.get("files", []) + + if isinstance(self._creds, ServiceAccountCredentials): + retry_builder()(get_root_folder_id)(drive_service) + + except HttpError as e: + status_code = e.resp.status if e.resp else None + if status_code == 401: + raise CredentialExpiredError( + "Invalid or expired Google Drive credentials (401)." + ) + elif status_code == 403: + # Could mean missing scopes or the account lacks permission + raise InsufficientPermissionsError( + "Google Drive app lacks required permissions (403). " + "Please ensure the necessary scopes are granted and Drive " + "apps are enabled." + ) + else: + raise ConnectorValidationError( + f"Unexpected Google Drive error (status={status_code}): {e}" + ) + + except Exception as e: + # Check for scope-related hints from the error message + if MISSING_SCOPES_ERROR_STR in str(e): + raise InsufficientPermissionsError( + "Google Drive credentials are missing required scopes. " + f"{ONYX_SCOPE_INSTRUCTIONS}" + ) + raise ConnectorValidationError( + f"Unexpected error during Google Drive validation: {e}" + ) diff --git a/backend/onyx/connectors/hubspot/connector.py b/backend/onyx/connectors/hubspot/connector.py index d769795598d..2e47f4c1d51 100644 --- a/backend/onyx/connectors/hubspot/connector.py +++ b/backend/onyx/connectors/hubspot/connector.py @@ -87,16 +87,18 @@ def _process_tickets( contact = api_client.crm.contacts.basic_api.get_by_id( contact_id=contact.id ) - associated_emails.append(contact.properties["email"]) + email = contact.properties.get("email") + if email is not None: + associated_emails.append(email) if notes: for note in notes.results: note = api_client.crm.objects.notes.basic_api.get_by_id( note_id=note.id, properties=["content", "hs_body_preview"] ) - if note.properties["hs_body_preview"] is None: - continue - associated_notes.append(note.properties["hs_body_preview"]) + preview = note.properties.get("hs_body_preview") + if preview is not None: + associated_notes.append(preview) associated_emails_str = " ,".join(associated_emails) associated_notes_str = " ".join(associated_notes) diff --git a/backend/onyx/connectors/interfaces.py b/backend/onyx/connectors/interfaces.py index e49065a9bd8..8516d08a382 100644 --- a/backend/onyx/connectors/interfaces.py +++ b/backend/onyx/connectors/interfaces.py @@ -146,46 +146,3 @@ def load_from_checkpoint( ``` """ raise NotImplementedError - - -class ConnectorValidationError(Exception): - """General exception for connector validation errors.""" - - def __init__(self, message: str): - self.message = message - super().__init__(self.message) - - -class UnexpectedError(Exception): - """Raised when an unexpected error occurs during connector validation. - - Unexpected errors don't necessarily mean the credential is invalid, - but rather that there was an error during the validation process - or we encountered a currently unhandled error case. - """ - - def __init__(self, message: str = "Unexpected error during connector validation"): - super().__init__(message) - - -class CredentialInvalidError(ConnectorValidationError): - """Raised when a connector's credential is invalid.""" - - def __init__(self, message: str = "Credential is invalid"): - super().__init__(message) - - -class CredentialExpiredError(ConnectorValidationError): - """Raised when a connector's credential is expired.""" - - def __init__(self, message: str = "Credential has expired"): - super().__init__(message) - - -class InsufficientPermissionsError(ConnectorValidationError): - """Raised when the credential does not have sufficient API permissions.""" - - def __init__( - self, message: str = "Insufficient permissions for the requested operation" - ): - super().__init__(message) diff --git a/backend/onyx/connectors/notion/connector.py b/backend/onyx/connectors/notion/connector.py index 70f1f001a97..70b5d216cd5 100644 --- a/backend/onyx/connectors/notion/connector.py +++ b/backend/onyx/connectors/notion/connector.py @@ -16,10 +16,11 @@ from onyx.connectors.cross_connector_utils.rate_limit_wrapper import ( rl_requests, ) -from onyx.connectors.interfaces import ConnectorValidationError -from onyx.connectors.interfaces import CredentialExpiredError +from onyx.connectors.exceptions import ConnectorValidationError +from onyx.connectors.exceptions import CredentialExpiredError +from onyx.connectors.exceptions import InsufficientPermissionsError +from onyx.connectors.exceptions import UnexpectedError from onyx.connectors.interfaces import GenerateDocumentsOutput -from onyx.connectors.interfaces import InsufficientPermissionsError from onyx.connectors.interfaces import LoadConnector from onyx.connectors.interfaces import PollConnector from onyx.connectors.interfaces import SecondsSinceUnixEpoch @@ -670,12 +671,12 @@ def validate_connector_settings(self) -> None: "Please try again later." ) else: - raise Exception( + raise UnexpectedError( f"Unexpected Notion HTTP error (status={status_code}): {http_err}" ) from http_err except Exception as exc: - raise Exception( + raise UnexpectedError( f"Unexpected error during Notion settings validation: {exc}" ) diff --git a/backend/onyx/connectors/onyx_jira/connector.py b/backend/onyx/connectors/onyx_jira/connector.py index 98c56808738..89eec0b165c 100644 --- a/backend/onyx/connectors/onyx_jira/connector.py +++ b/backend/onyx/connectors/onyx_jira/connector.py @@ -12,11 +12,11 @@ from onyx.configs.app_configs import JIRA_CONNECTOR_MAX_TICKET_SIZE from onyx.configs.constants import DocumentSource from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc -from onyx.connectors.interfaces import ConnectorValidationError -from onyx.connectors.interfaces import CredentialExpiredError +from onyx.connectors.exceptions import ConnectorValidationError +from onyx.connectors.exceptions import CredentialExpiredError +from onyx.connectors.exceptions import InsufficientPermissionsError from onyx.connectors.interfaces import GenerateDocumentsOutput from onyx.connectors.interfaces import GenerateSlimDocumentOutput -from onyx.connectors.interfaces import InsufficientPermissionsError from onyx.connectors.interfaces import LoadConnector from onyx.connectors.interfaces import PollConnector from onyx.connectors.interfaces import SecondsSinceUnixEpoch diff --git a/backend/onyx/connectors/slack/connector.py b/backend/onyx/connectors/slack/connector.py index 940a2a728d6..b74c5d871b0 100644 --- a/backend/onyx/connectors/slack/connector.py +++ b/backend/onyx/connectors/slack/connector.py @@ -18,6 +18,10 @@ from onyx.configs.app_configs import ENABLE_EXPENSIVE_EXPERT_CALLS from onyx.configs.app_configs import INDEX_BATCH_SIZE from onyx.configs.constants import DocumentSource +from onyx.connectors.exceptions import ConnectorValidationError +from onyx.connectors.exceptions import CredentialExpiredError +from onyx.connectors.exceptions import InsufficientPermissionsError +from onyx.connectors.exceptions import UnexpectedError from onyx.connectors.interfaces import CheckpointConnector from onyx.connectors.interfaces import CheckpointOutput from onyx.connectors.interfaces import GenerateSlimDocumentOutput @@ -82,14 +86,14 @@ def get_channels( get_public: bool = True, get_private: bool = True, ) -> list[ChannelType]: - """Get all channels in the workspace""" + """Get all channels in the workspace.""" channels: list[dict[str, Any]] = [] channel_types = [] if get_public: channel_types.append("public_channel") if get_private: channel_types.append("private_channel") - # try getting private channels as well at first + # Try fetching both public and private channels first: try: channels = _collect_paginated_channels( client=client, @@ -97,19 +101,18 @@ def get_channels( channel_types=channel_types, ) except SlackApiError as e: - logger.info(f"Unable to fetch private channels due to - {e}") - logger.info("trying again without private channels") + logger.info(f"Unable to fetch private channels due to: {e}") + logger.info("Trying again without private channels.") if get_public: channel_types = ["public_channel"] else: - logger.warning("No channels to fetch") + logger.warning("No channels to fetch.") return [] channels = _collect_paginated_channels( client=client, exclude_archived=exclude_archived, channel_types=channel_types, ) - return channels @@ -666,6 +669,92 @@ def load_from_checkpoint( ) return checkpoint + def validate_connector_settings(self) -> None: + """ + 1. Verifies the bot token is valid for the workspace (via auth_test). + 2. Ensures the bot has enough scope to list channels. + 3. Checks that every channel specified in self.channels exists. + """ + if self.client is None: + raise ConnectorMissingCredentialError("Slack credentials not loaded.") + + try: + # 1) Validate connection to workspace + auth_response = self.client.auth_test() + if not auth_response.get("ok", False): + error_msg = auth_response.get( + "error", "Unknown error from Slack auth_test" + ) + raise ConnectorValidationError(f"Failed Slack auth_test: {error_msg}") + + # 2) Minimal test to confirm listing channels works + test_resp = self.client.conversations_list( + limit=1, types=["public_channel"] + ) + if not test_resp.get("ok", False): + error_msg = test_resp.get("error", "Unknown error from Slack") + if error_msg == "invalid_auth": + raise ConnectorValidationError( + f"Invalid Slack bot token ({error_msg})." + ) + elif error_msg == "not_authed": + raise CredentialExpiredError( + f"Invalid or expired Slack bot token ({error_msg})." + ) + raise UnexpectedError(f"Slack API returned a failure: {error_msg}") + + # 3) If channels are specified, verify each is accessible + if self.channels: + accessible_channels = get_channels( + client=self.client, + exclude_archived=True, + get_public=True, + get_private=True, + ) + # For quick lookups by name or ID, build a map: + accessible_channel_names = {ch["name"] for ch in accessible_channels} + accessible_channel_ids = {ch["id"] for ch in accessible_channels} + + for user_channel in self.channels: + # If your connector expects channel "names" (e.g., "general"), + # verify user_channel is in channel names. Otherwise, if you + # expect channel "ids" (e.g., "C12345"), check accessible_channel_ids. + if ( + user_channel not in accessible_channel_names + and user_channel not in accessible_channel_ids + ): + raise ConnectorValidationError( + f"Channel '{user_channel}' not found or inaccessible in this workspace." + ) + + except SlackApiError as e: + slack_error = e.response.get("error", "") + if slack_error == "missing_scope": + # The needed scope is typically "channels:read" or "groups:read" + # for reading channels. The error may contain more detail. + raise InsufficientPermissionsError( + "Slack bot token lacks the necessary scope to list/access channels. " + "Please ensure your Slack app has 'channels:read' (and/or 'groups:read' for private channels)." + ) + elif slack_error == "invalid_auth": + raise CredentialExpiredError( + f"Invalid Slack bot token ({slack_error})." + ) + elif slack_error == "not_authed": + raise CredentialExpiredError( + f"Invalid or expired Slack bot token ({slack_error})." + ) + else: + raise UnexpectedError( + f"Unexpected Slack error '{slack_error}' during settings validation." + ) + except ConnectorValidationError as e: + raise e + except Exception as e: + raise UnexpectedError( + f"Unexpected error during Slack settings validation: {e}" + ) + if __name__ == "__main__": import os diff --git a/backend/onyx/connectors/teams/connector.py b/backend/onyx/connectors/teams/connector.py index 50559e6fc6a..02c8543410f 100644 --- a/backend/onyx/connectors/teams/connector.py +++ b/backend/onyx/connectors/teams/connector.py @@ -5,6 +5,7 @@ import msal # type: ignore from office365.graph_client import GraphClient # type: ignore +from office365.runtime.client_request_exception import ClientRequestException # type: ignore from office365.teams.channels.channel import Channel # type: ignore from office365.teams.chats.messages.message import ChatMessage # type: ignore from office365.teams.team import Team # type: ignore @@ -12,6 +13,10 @@ from onyx.configs.app_configs import INDEX_BATCH_SIZE from onyx.configs.constants import DocumentSource from onyx.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc +from onyx.connectors.exceptions import ConnectorValidationError +from onyx.connectors.exceptions import CredentialExpiredError +from onyx.connectors.exceptions import InsufficientPermissionsError +from onyx.connectors.exceptions import UnexpectedError from onyx.connectors.interfaces import GenerateDocumentsOutput from onyx.connectors.interfaces import LoadConnector from onyx.connectors.interfaces import PollConnector @@ -279,6 +284,64 @@ def poll_source( end_datetime = datetime.fromtimestamp(end, timezone.utc) return self._fetch_from_teams(start=start_datetime, end=end_datetime) + def validate_connector_settings(self) -> None: + """ + Validate that we can connect to Microsoft Teams with the provided MSAL/Graph credentials + and that we can see at least one Team. If the user has specified a list of Teams by name, + confirm at least one of them is found. + + Raises: + ConnectorMissingCredentialError: If the Graph client is not yet set (missing credentials). + CredentialExpiredError: If credentials appear invalid/expired (e.g. 401 Unauthorized). + InsufficientPermissionsError: If the app lacks required permissions to read Teams. + ConnectorValidationError: If no Teams are found, or if requested Teams are not found. + """ + if self.graph_client is None: + raise ConnectorMissingCredentialError("Teams credentials not loaded.") + + try: + # Minimal call to confirm we can retrieve Teams + found_teams = self._get_all_teams() + + # You may optionally catch the Graph/Office365 request exception if available: + except ClientRequestException as e: + status_code = e.response.status_code + if status_code == 401: + raise CredentialExpiredError( + "Invalid or expired Microsoft Teams credentials (401 Unauthorized)." + ) + elif status_code == 403: + raise InsufficientPermissionsError( + "Your app lacks sufficient permissions to read Teams (403 Forbidden)." + ) + else: + raise UnexpectedError(f"Unexpected error retrieving teams: {e}") + + except Exception as e: + error_str = str(e).lower() + if ( + "unauthorized" in error_str + or "401" in error_str + or "invalid_grant" in error_str + ): + raise CredentialExpiredError( + "Invalid or expired Microsoft Teams credentials." + ) + elif "forbidden" in error_str or "403" in error_str: + raise InsufficientPermissionsError( + "App lacks required permissions to read from Microsoft Teams." + ) + raise ConnectorValidationError( + f"Unexpected error during Teams validation: {e}" + ) + + # If we get this far, the Graph call succeeded. Check for presence of Teams: + if not found_teams: + raise ConnectorValidationError( + "No Teams found for the given credentials. " + "Either there are no Teams in this tenant, or your app does not have permission to view them." + ) + if __name__ == "__main__": connector = TeamsConnector(teams=os.environ["TEAMS"].split(",")) diff --git a/backend/onyx/connectors/web/connector.py b/backend/onyx/connectors/web/connector.py index 0d9f546bc8c..9bd6e2073af 100644 --- a/backend/onyx/connectors/web/connector.py +++ b/backend/onyx/connectors/web/connector.py @@ -25,12 +25,12 @@ from onyx.configs.app_configs import WEB_CONNECTOR_OAUTH_TOKEN_URL from onyx.configs.app_configs import WEB_CONNECTOR_VALIDATE_URLS from onyx.configs.constants import DocumentSource -from onyx.connectors.interfaces import ConnectorValidationError -from onyx.connectors.interfaces import CredentialExpiredError +from onyx.connectors.exceptions import ConnectorValidationError +from onyx.connectors.exceptions import CredentialExpiredError +from onyx.connectors.exceptions import InsufficientPermissionsError +from onyx.connectors.exceptions import UnexpectedError from onyx.connectors.interfaces import GenerateDocumentsOutput -from onyx.connectors.interfaces import InsufficientPermissionsError from onyx.connectors.interfaces import LoadConnector -from onyx.connectors.interfaces import UnexpectedError from onyx.connectors.models import Document from onyx.connectors.models import Section from onyx.file_processing.extract_file_text import read_pdf_file @@ -440,7 +440,10 @@ def validate_connector_settings(self) -> None: "No URL configured. Please provide at least one valid URL." ) - if self.web_connector_type == WEB_CONNECTOR_VALID_SETTINGS.SITEMAP.value: + if ( + self.web_connector_type == WEB_CONNECTOR_VALID_SETTINGS.SITEMAP.value + or self.web_connector_type == WEB_CONNECTOR_VALID_SETTINGS.RECURSIVE.value + ): return None # We'll just test the first URL for connectivity and correctness diff --git a/backend/onyx/db/connector_credential_pair.py b/backend/onyx/db/connector_credential_pair.py index 712e81894a0..e42f7c6b00d 100644 --- a/backend/onyx/db/connector_credential_pair.py +++ b/backend/onyx/db/connector_credential_pair.py @@ -194,9 +194,14 @@ def get_connector_credential_pair_from_id_for_user( def get_connector_credential_pair_from_id( db_session: Session, cc_pair_id: int, + eager_load_credential: bool = False, ) -> ConnectorCredentialPair | None: stmt = select(ConnectorCredentialPair).distinct() stmt = stmt.where(ConnectorCredentialPair.id == cc_pair_id) + + if eager_load_credential: + stmt = stmt.options(joinedload(ConnectorCredentialPair.credential)) + result = db_session.execute(stmt) return result.scalar_one_or_none() @@ -396,8 +401,8 @@ def add_credential_to_connector( # If we are in the seeding flow, we shouldn't need to check if the credential belongs to the user if seeding_flow: credential = fetch_credential_by_id( - db_session=db_session, credential_id=credential_id, + db_session=db_session, ) else: credential = fetch_credential_by_id_for_user( diff --git a/backend/onyx/db/credentials.py b/backend/onyx/db/credentials.py index 1e4c7673cce..40edbcef3d4 100644 --- a/backend/onyx/db/credentials.py +++ b/backend/onyx/db/credentials.py @@ -169,8 +169,8 @@ def fetch_credential_by_id_for_user( def fetch_credential_by_id( - db_session: Session, credential_id: int, + db_session: Session, ) -> Credential | None: stmt = select(Credential).distinct() stmt = stmt.where(Credential.id == credential_id) @@ -422,8 +422,8 @@ def create_initial_public_credential(db_session: Session) -> None: "There must exist an empty public credential for data connectors that do not require additional Auth." ) first_credential = fetch_credential_by_id( - db_session=db_session, credential_id=PUBLIC_CREDENTIAL_ID, + db_session=db_session, ) if first_credential is not None: diff --git a/backend/onyx/db/milestone.py b/backend/onyx/db/milestone.py index 4d38d367578..caa0af8c3c5 100644 --- a/backend/onyx/db/milestone.py +++ b/backend/onyx/db/milestone.py @@ -30,6 +30,9 @@ def create_milestone( def create_milestone_if_not_exists( user: User | None, event_type: MilestoneRecordType, db_session: Session ) -> tuple[Milestone, bool]: + # print(f"Setting search path to {CURRENT_TENANT_ID_CONTEXTVAR.get()}") + # Set the search path using the connection directly + # db_session.connection().execute(f"SET search_path TO {CURRENT_TENANT_ID_CONTEXTVAR.get()}") # Check if it exists milestone = db_session.execute( select(Milestone).where(Milestone.event_type == event_type) diff --git a/backend/onyx/server/documents/cc_pair.py b/backend/onyx/server/documents/cc_pair.py index b383bc3dd2d..3744341b2c0 100644 --- a/backend/onyx/server/documents/cc_pair.py +++ b/backend/onyx/server/documents/cc_pair.py @@ -25,8 +25,8 @@ from onyx.background.indexing.models import IndexAttemptErrorPydantic from onyx.configs.constants import OnyxCeleryPriority from onyx.configs.constants import OnyxCeleryTask +from onyx.connectors.exceptions import ValidationError from onyx.connectors.factory import validate_ccpair_for_user -from onyx.connectors.interfaces import ConnectorValidationError from onyx.db.connector import delete_connector from onyx.db.connector_credential_pair import add_credential_to_connector from onyx.db.connector_credential_pair import ( @@ -620,9 +620,7 @@ def associate_credential_to_connector( ) try: - validate_ccpair_for_user( - connector_id, credential_id, db_session, user, tenant_id - ) + validate_ccpair_for_user(connector_id, credential_id, db_session, tenant_id) response = add_credential_to_connector( db_session=db_session, @@ -649,7 +647,7 @@ def associate_credential_to_connector( return response - except ConnectorValidationError as e: + except ValidationError as e: # If validation fails, delete the connector and commit the changes # Ensures we don't leave invalid connectors in the database # NOTE: consensus is that it makes sense to unify connector and ccpair creation flows @@ -660,7 +658,6 @@ def associate_credential_to_connector( raise HTTPException( status_code=400, detail="Connector validation error: " + str(e) ) - except IntegrityError as e: logger.error(f"IntegrityError: {e}") raise HTTPException(status_code=400, detail="Name must be unique") diff --git a/backend/onyx/server/documents/connector.py b/backend/onyx/server/documents/connector.py index 7abc8819189..edd82971c0d 100644 --- a/backend/onyx/server/documents/connector.py +++ b/backend/onyx/server/documents/connector.py @@ -28,6 +28,7 @@ from onyx.configs.constants import MilestoneRecordType from onyx.configs.constants import OnyxCeleryPriority from onyx.configs.constants import OnyxCeleryTask +from onyx.connectors.exceptions import ConnectorValidationError from onyx.connectors.factory import validate_ccpair_for_user from onyx.connectors.google_utils.google_auth import ( get_google_oauth_creds, @@ -62,7 +63,6 @@ from onyx.connectors.google_utils.shared_constants import ( GoogleOAuthAuthenticationMethod, ) -from onyx.connectors.interfaces import ConnectorValidationError from onyx.db.connector import create_connector from onyx.db.connector import delete_connector from onyx.db.connector import fetch_connector_by_id @@ -854,7 +854,6 @@ def create_connector_with_mock_credential( connector_id=connector_id, credential_id=credential_id, db_session=db_session, - user=user, tenant_id=tenant_id, ) response = add_credential_to_connector( diff --git a/backend/onyx/server/documents/credential.py b/backend/onyx/server/documents/credential.py index 54769632d95..6ef58768218 100644 --- a/backend/onyx/server/documents/credential.py +++ b/backend/onyx/server/documents/credential.py @@ -106,7 +106,6 @@ def swap_credentials_for_connector( credential_swap_req.connector_id, credential_swap_req.new_credential_id, db_session, - user, tenant_id, ) diff --git a/web/src/app/admin/connector/[ccPairId]/ModifyStatusButtonCluster.tsx b/web/src/app/admin/connector/[ccPairId]/ModifyStatusButtonCluster.tsx index b5b4e7ecbf2..e78605ee526 100644 --- a/web/src/app/admin/connector/[ccPairId]/ModifyStatusButtonCluster.tsx +++ b/web/src/app/admin/connector/[ccPairId]/ModifyStatusButtonCluster.tsx @@ -8,6 +8,7 @@ import { buildCCPairInfoUrl } from "./lib"; import { setCCPairStatus } from "@/lib/ccPair"; import { useState } from "react"; import { LoadingAnimation } from "@/components/Loading"; +import { ConfirmEntityModal } from "@/components/modals/ConfirmEntityModal"; export function ModifyStatusButtonCluster({ ccPair, @@ -16,11 +17,24 @@ export function ModifyStatusButtonCluster({ }) { const { popup, setPopup } = usePopup(); const [isUpdating, setIsUpdating] = useState(false); + const [showConfirmModal, setShowConfirmModal] = useState(false); const handleStatusChange = async ( newStatus: ConnectorCredentialPairStatus ) => { if (isUpdating) return; // Prevent double-clicks or multiple requests + + if ( + ccPair.status === ConnectorCredentialPairStatus.INVALID && + newStatus === ConnectorCredentialPairStatus.ACTIVE + ) { + setShowConfirmModal(true); + } else { + await updateStatus(newStatus); + } + }; + + const updateStatus = async (newStatus: ConnectorCredentialPairStatus) => { setIsUpdating(true); try { @@ -39,12 +53,14 @@ export function ModifyStatusButtonCluster({ // Compute the button text based on current state and backend status const buttonText = - ccPair.status === ConnectorCredentialPairStatus.PAUSED + ccPair.status === ConnectorCredentialPairStatus.PAUSED || + ccPair.status === ConnectorCredentialPairStatus.INVALID ? "Re-Enable" : "Pause"; const tooltip = - ccPair.status === ConnectorCredentialPairStatus.PAUSED + ccPair.status === ConnectorCredentialPairStatus.PAUSED || + ccPair.status === ConnectorCredentialPairStatus.INVALID ? "Click to start indexing again!" : "When paused, the connector's documents will still be visible. However, no new documents will be indexed."; @@ -54,14 +70,16 @@ export function ModifyStatusButtonCluster({