diff --git a/backend/ee/onyx/db/connector_credential_pair.py b/backend/ee/onyx/db/connector_credential_pair.py index 0ba1a6f2b6b..0ffbcd48002 100644 --- a/backend/ee/onyx/db/connector_credential_pair.py +++ b/backend/ee/onyx/db/connector_credential_pair.py @@ -4,6 +4,7 @@ from onyx.configs.constants import DocumentSource from onyx.db.connector_credential_pair import get_connector_credential_pair from onyx.db.enums import AccessType +from onyx.db.enums import ConnectorCredentialPairStatus from onyx.db.models import Connector from onyx.db.models import ConnectorCredentialPair from onyx.db.models import UserGroup__ConnectorCredentialPair @@ -36,6 +37,7 @@ def get_cc_pairs_by_source( db_session: Session, source_type: DocumentSource, only_sync: bool, + only_valid: bool, ) -> list[ConnectorCredentialPair]: """ Get all cc_pairs for a given source type (and optionally only sync) @@ -51,6 +53,11 @@ def get_cc_pairs_by_source( if only_sync: query = query.filter(ConnectorCredentialPair.access_type == AccessType.SYNC) + if only_valid: + query = query.filter( + ConnectorCredentialPair.status == ConnectorCredentialPairStatus.ACTIVE + ) + cc_pairs = query.all() return cc_pairs diff --git a/backend/ee/onyx/external_permissions/google_drive/doc_sync.py b/backend/ee/onyx/external_permissions/google_drive/doc_sync.py index 60ec0d3ecbc..8eaa7813f3b 100644 --- a/backend/ee/onyx/external_permissions/google_drive/doc_sync.py +++ b/backend/ee/onyx/external_permissions/google_drive/doc_sync.py @@ -62,12 +62,14 @@ def _fetch_permissions_for_permission_ids( user_email=(owner_email or google_drive_connector.primary_admin_email), ) + # We continue on 404 or 403 because the document may not exist or the user may not have access to it fetched_permissions = execute_paginated_retrieval( retrieval_function=drive_service.permissions().list, list_key="permissions", fileId=doc_id, fields="permissions(id, emailAddress, type, domain)", supportsAllDrives=True, + continue_on_404_or_403=True, ) permissions_for_doc_id = [] @@ -104,7 +106,13 @@ def _get_permissions_from_slim_doc( user_emails: set[str] = set() group_emails: set[str] = set() public = False + skipped_permissions = 0 + for permission in permissions_list: + if not permission: + skipped_permissions += 1 + continue + permission_type = permission["type"] if permission_type == "user": user_emails.add(permission["emailAddress"]) @@ -121,6 +129,9 @@ def _get_permissions_from_slim_doc( elif permission_type == "anyone": public = True + logger.info( + f"Skipped {skipped_permissions} permissions for document {slim_doc.id}– {len(permissions_list)} permissions fetched" + ) drive_id = permission_info.get("drive_id") group_ids = group_emails | ({drive_id} if drive_id is not None else set()) diff --git a/backend/onyx/background/celery/tasks/external_group_syncing/tasks.py b/backend/onyx/background/celery/tasks/external_group_syncing/tasks.py index bca2afb9ae3..4170ec4e179 100644 --- a/backend/onyx/background/celery/tasks/external_group_syncing/tasks.py +++ b/backend/onyx/background/celery/tasks/external_group_syncing/tasks.py @@ -37,8 +37,10 @@ from onyx.configs.constants import OnyxRedisConstants from onyx.configs.constants import OnyxRedisLocks from onyx.configs.constants import OnyxRedisSignals +from onyx.connectors.factory import validate_ccpair_for_user from onyx.db.connector import mark_cc_pair_as_external_group_synced from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id +from onyx.db.connector_credential_pair import update_connector_credential_pair from onyx.db.engine import get_session_with_current_tenant from onyx.db.enums import AccessType from onyx.db.enums import ConnectorCredentialPairStatus @@ -148,7 +150,7 @@ def check_for_external_group_sync(self: Task, *, tenant_id: str | None) -> bool for source in GROUP_PERMISSIONS_IS_CC_PAIR_AGNOSTIC: # These are ordered by cc_pair id so the first one is the one we want cc_pairs_to_dedupe = get_cc_pairs_by_source( - db_session, source, only_sync=True + db_session, source, only_sync=True, only_valid=True ) # We only want to sync one cc_pair per source type # in GROUP_PERMISSIONS_IS_CC_PAIR_AGNOSTIC so we dedupe here @@ -380,6 +382,30 @@ def connector_external_group_sync_generator_task( f"No connector credential pair found for id: {cc_pair_id}" ) + try: + created = validate_ccpair_for_user( + cc_pair.connector.id, + cc_pair.credential.id, + db_session, + tenant_id, + enforce_creation=False, + ) + if not created: + task_logger.warning( + f"Unable to create connector credential pair for id: {cc_pair_id}" + ) + except Exception: + task_logger.exception( + f"validate_ccpair_permissions_sync exceptioned: cc_pair={cc_pair_id}" + ) + update_connector_credential_pair( + db_session=db_session, + connector_id=cc_pair.connector.id, + credential_id=cc_pair.credential.id, + status=ConnectorCredentialPairStatus.INVALID, + ) + raise + source_type = cc_pair.connector.source ext_group_sync_func = GROUP_PERMISSIONS_FUNC_MAP.get(source_type) diff --git a/backend/onyx/connectors/google_drive/connector.py b/backend/onyx/connectors/google_drive/connector.py index 5cdb297d4e2..36fe3c56e9d 100644 --- a/backend/onyx/connectors/google_drive/connector.py +++ b/backend/onyx/connectors/google_drive/connector.py @@ -626,6 +626,7 @@ def validate_connector_settings(self) -> None: InsufficientPermissionsError: If we lack the Drive scope or are otherwise denied (HTTP 403). ConnectorValidationError: Any other unexpected errors (e.g. missing domain, no files). """ + if self._creds is None: raise ConnectorMissingCredentialError( "Google Drive credentials not loaded." @@ -649,6 +650,9 @@ def validate_connector_settings(self) -> None: # If you *require* at least 1 file in Drive, you could handle that here. _ = response.get("files", []) + if isinstance(self._creds, ServiceAccountCredentials): + retry_builder()(get_root_folder_id)(drive_service) + except HttpError as e: status_code = e.resp.status if e.resp else None if status_code == 401: @@ -659,7 +663,8 @@ def validate_connector_settings(self) -> None: # Could mean missing scopes or the account lacks permission raise InsufficientPermissionsError( "Google Drive app lacks required permissions (403). " - "Please ensure the necessary scopes are granted." + "Please ensure the necessary scopes are granted and Drive " + "apps are enabled." ) else: raise ConnectorValidationError( diff --git a/backend/onyx/connectors/slack/connector.py b/backend/onyx/connectors/slack/connector.py index 1f64dfdfb39..16ffc3a219a 100644 --- a/backend/onyx/connectors/slack/connector.py +++ b/backend/onyx/connectors/slack/connector.py @@ -86,14 +86,14 @@ def get_channels( get_public: bool = True, get_private: bool = True, ) -> list[ChannelType]: - """Get all channels in the workspace""" + """Get all channels in the workspace.""" channels: list[dict[str, Any]] = [] channel_types = [] if get_public: channel_types.append("public_channel") if get_private: channel_types.append("private_channel") - # try getting private channels as well at first + # Try fetching both public and private channels first: try: channels = _collect_paginated_channels( client=client, @@ -101,19 +101,18 @@ def get_channels( channel_types=channel_types, ) except SlackApiError as e: - logger.info(f"Unable to fetch private channels due to - {e}") - logger.info("trying again without private channels") + logger.info(f"Unable to fetch private channels due to: {e}") + logger.info("Trying again without private channels.") if get_public: channel_types = ["public_channel"] else: - logger.warning("No channels to fetch") + logger.warning("No channels to fetch.") return [] channels = _collect_paginated_channels( client=client, exclude_archived=exclude_archived, channel_types=channel_types, ) - return channels @@ -671,19 +670,32 @@ def load_from_checkpoint( return checkpoint def validate_connector_settings(self) -> None: + """ + 1. Verifies the bot token is valid for the workspace (via auth_test). + 2. Ensures the bot has enough scope to list channels. + 3. Checks that every channel specified in self.channels exists. + """ if self.client is None: raise ConnectorMissingCredentialError("Slack credentials not loaded.") try: - # Minimal API call to confirm we can list channels - # We set limit=1 for a lightweight check - response = self.client.conversations_list(limit=1, types=["public_channel"]) - # Just ensure Slack responded "ok: True" - if not response.get("ok", False): - error_msg = response.get("error", "Unknown error from Slack") + # 1) Validate connection to workspace + auth_response = self.client.auth_test() + if not auth_response.get("ok", False): + error_msg = auth_response.get( + "error", "Unknown error from Slack auth_test" + ) + raise ConnectorValidationError(f"Failed Slack auth_test: {error_msg}") + + # 2) Minimal test to confirm listing channels works + test_resp = self.client.conversations_list( + limit=1, types=["public_channel"] + ) + if not test_resp.get("ok", False): + error_msg = test_resp.get("error", "Unknown error from Slack") if error_msg == "invalid_auth": raise ConnectorValidationError( - f"Invalid or expired Slack bot token ({error_msg})." + f"Invalid Slack bot token ({error_msg})." ) elif error_msg == "not_authed": raise CredentialExpiredError( @@ -691,31 +703,54 @@ def validate_connector_settings(self) -> None: ) raise UnexpectedError(f"Slack API returned a failure: {error_msg}") + # 3) If channels are specified, verify each is accessible + if self.channels: + accessible_channels = get_channels( + client=self.client, + exclude_archived=True, + get_public=True, + get_private=True, + ) + # For quick lookups by name or ID, build a map: + accessible_channel_names = {ch["name"] for ch in accessible_channels} + accessible_channel_ids = {ch["id"] for ch in accessible_channels} + + for user_channel in self.channels: + # If your connector expects channel "names" (e.g., "general"), + # verify user_channel is in channel names. Otherwise, if you + # expect channel "ids" (e.g., "C12345"), check accessible_channel_ids. + if ( + user_channel not in accessible_channel_names + and user_channel not in accessible_channel_ids + ): + raise ConnectorValidationError( + f"Channel '{user_channel}' not found or inaccessible in this workspace." + ) + except SlackApiError as e: slack_error = e.response.get("error", "") if slack_error == "missing_scope": # The needed scope is typically "channels:read" or "groups:read" - # for viewing channels. The error message might also contain the - # specific scope needed vs. provided. + # for reading channels. The error may contain more detail. raise InsufficientPermissionsError( - "Slack bot token lacks the necessary scope to list channels. " - "Please ensure your Slack app has 'channels:read' (or 'groups:read' for private channels) enabled." + "Slack bot token lacks the necessary scope to list/access channels. " + "Please ensure your Slack app has 'channels:read' (and/or 'groups:read' for private channels)." ) elif slack_error == "invalid_auth": raise CredentialExpiredError( - f"Invalid or expired Slack bot token ({slack_error})." + f"Invalid Slack bot token ({slack_error})." ) elif slack_error == "not_authed": raise CredentialExpiredError( f"Invalid or expired Slack bot token ({slack_error})." ) else: - # Generic Slack error raise UnexpectedError( f"Unexpected Slack error '{slack_error}' during settings validation." ) + except ConnectorValidationError as e: + raise e except Exception as e: - # Catch-all for unexpected exceptions raise UnexpectedError( f"Unexpected error during Slack settings validation: {e}" ) diff --git a/web/src/app/admin/connector/[ccPairId]/ModifyStatusButtonCluster.tsx b/web/src/app/admin/connector/[ccPairId]/ModifyStatusButtonCluster.tsx index b5b4e7ecbf2..e78605ee526 100644 --- a/web/src/app/admin/connector/[ccPairId]/ModifyStatusButtonCluster.tsx +++ b/web/src/app/admin/connector/[ccPairId]/ModifyStatusButtonCluster.tsx @@ -8,6 +8,7 @@ import { buildCCPairInfoUrl } from "./lib"; import { setCCPairStatus } from "@/lib/ccPair"; import { useState } from "react"; import { LoadingAnimation } from "@/components/Loading"; +import { ConfirmEntityModal } from "@/components/modals/ConfirmEntityModal"; export function ModifyStatusButtonCluster({ ccPair, @@ -16,11 +17,24 @@ export function ModifyStatusButtonCluster({ }) { const { popup, setPopup } = usePopup(); const [isUpdating, setIsUpdating] = useState(false); + const [showConfirmModal, setShowConfirmModal] = useState(false); const handleStatusChange = async ( newStatus: ConnectorCredentialPairStatus ) => { if (isUpdating) return; // Prevent double-clicks or multiple requests + + if ( + ccPair.status === ConnectorCredentialPairStatus.INVALID && + newStatus === ConnectorCredentialPairStatus.ACTIVE + ) { + setShowConfirmModal(true); + } else { + await updateStatus(newStatus); + } + }; + + const updateStatus = async (newStatus: ConnectorCredentialPairStatus) => { setIsUpdating(true); try { @@ -39,12 +53,14 @@ export function ModifyStatusButtonCluster({ // Compute the button text based on current state and backend status const buttonText = - ccPair.status === ConnectorCredentialPairStatus.PAUSED + ccPair.status === ConnectorCredentialPairStatus.PAUSED || + ccPair.status === ConnectorCredentialPairStatus.INVALID ? "Re-Enable" : "Pause"; const tooltip = - ccPair.status === ConnectorCredentialPairStatus.PAUSED + ccPair.status === ConnectorCredentialPairStatus.PAUSED || + ccPair.status === ConnectorCredentialPairStatus.INVALID ? "Click to start indexing again!" : "When paused, the connector's documents will still be visible. However, no new documents will be indexed."; @@ -54,14 +70,16 @@ export function ModifyStatusButtonCluster({