Skip to content

Refactor gdrive_downloader.py - Google auth session for private doc… #611

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 15 additions & 13 deletions daras_ai_v2/asr.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
from __future__ import annotations

import multiprocessing
import os.path
import tempfile
import threading
import typing
from enum import Enum
from functools import lru_cache

import gooey_gui as gui
import requests
Expand Down Expand Up @@ -42,6 +46,7 @@

if typing.TYPE_CHECKING:
import google.cloud.speech_v2
from google.auth.transport.requests import AuthorizedSession

TRANSLATE_BATCH_SIZE = 8

Expand Down Expand Up @@ -932,9 +937,6 @@ def _translate_text(
return result.strip()


_session = None


def _MinT_translate_one_text(
text: str, source_language: str, target_language: str
) -> str:
Expand All @@ -953,20 +955,20 @@ def _MinT_translate_one_text(
return tanslation.get("translation", text)


def get_google_auth_session():
global _session
_session_lock = threading.Lock()

if _session is None:

@lru_cache
def get_google_auth_session(*scopes: str) -> tuple[AuthorizedSession, str]:
if not scopes:
scopes = ("https://www.googleapis.com/auth/cloud-platform",)
with _session_lock:
import google.auth
from google.auth.transport.requests import AuthorizedSession

creds, project = google.auth.default(
scopes=["https://www.googleapis.com/auth/cloud-platform"]
)
# takes care of refreshing the token and adding it to request headers
_session = AuthorizedSession(credentials=creds), project

return _session
creds, project = google.auth.default(scopes=scopes)
# AuthorizedSession takes care of refreshing the token and adding it to request headers
return AuthorizedSession(credentials=creds), project


def run_asr(
Expand Down
7 changes: 4 additions & 3 deletions daras_ai_v2/gdrive_downloader.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import io
import typing
from furl import furl
import requests

from daras_ai_v2.exceptions import UserError
from daras_ai_v2.functional import flatmap_parallel
Expand Down Expand Up @@ -74,6 +73,7 @@ def gdrive_download(
) -> tuple[bytes, str]:
from googleapiclient import discovery
from googleapiclient.http import MediaIoBaseDownload
from daras_ai_v2.asr import get_google_auth_session

if export_links is None:
export_links = {}
Expand All @@ -87,9 +87,10 @@ def gdrive_download(
# export google docs to appropriate type
export_mime_type = DOCS_EXPORT_MIMETYPES.get(mime_type, mime_type)
if f_url_export := export_links.get(export_mime_type, None):
r = requests.get(f_url_export)
file_bytes = r.content
session, _ = get_google_auth_session("https://www.googleapis.com/auth/drive.readonly")
r = session.get(f_url_export)
raise_for_status(r, is_user_url=True)
file_bytes = r.content
return file_bytes, export_mime_type

request = service.files().get_media(
Expand Down
13 changes: 11 additions & 2 deletions daras_ai_v2/vector_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,8 +381,17 @@ def doc_url_to_file_metadata(f_url: str) -> FileMetadata:
except HttpError as e:
if e.status_code == 404:
raise UserError(
f"Could not download the google doc at {f_url} "
f"Please make sure to make the document public for viewing."
# language=HTML
f"""\
<p>This knowledge base Google Doc is not accessible: <a href="{f_url}" target="_blank">{f_url}</a></p>
<p>To address this:</p>
<ul>
<li>Please make sure this Google Doc exists and is publicly viewable, or</li>
<li>
Share the Doc or its parent folder with <a href="mailto:[email protected]" target="_blank">[email protected]</a> as an authorized viewer and drop us an email.
</li>
</ul>
"""
) from e
else:
raise
Expand Down
Loading