Skip to content

Commit 2c1e08a

Browse files
committed
Refactor gdrive_downloader.py - Google auth session for private documents
- Used a double-check locking to ensure that multiple threads won't create redundant auth sessions simultaneously.
1 parent 0645f9e commit 2c1e08a

File tree

3 files changed

+38
-15
lines changed

3 files changed

+38
-15
lines changed

daras_ai_v2/asr.py

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import multiprocessing
22
import os.path
33
import tempfile
4+
import threading
45
import typing
56
from enum import Enum
67

@@ -932,9 +933,6 @@ def _translate_text(
932933
return result.strip()
933934

934935

935-
_session = None
936-
937-
938936
def _MinT_translate_one_text(
939937
text: str, source_language: str, target_language: str
940938
) -> str:
@@ -953,18 +951,27 @@ def _MinT_translate_one_text(
953951
return tanslation.get("translation", text)
954952

955953

956-
def get_google_auth_session():
954+
_session = None
955+
_session_lock = threading.Lock()
956+
957+
958+
def get_google_auth_session(scopes: typing.Optional[list[str]] = None):
957959
global _session
958960

959961
if _session is None:
960-
import google.auth
961-
from google.auth.transport.requests import AuthorizedSession
962+
with _session_lock:
963+
if _session is None:
964+
import google.auth
965+
from google.auth.transport.requests import AuthorizedSession
962966

963-
creds, project = google.auth.default(
964-
scopes=["https://www.googleapis.com/auth/cloud-platform"]
965-
)
966-
# takes care of refreshing the token and adding it to request headers
967-
_session = AuthorizedSession(credentials=creds), project
967+
if not scopes:
968+
scopes = ["https://www.googleapis.com/auth/cloud-platform"]
969+
970+
creds, project = google.auth.default(
971+
scopes=scopes,
972+
)
973+
# takes care of refreshing the token and adding it to request headers
974+
_session = AuthorizedSession(credentials=creds), project
968975

969976
return _session
970977

daras_ai_v2/gdrive_downloader.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import io
22
import typing
33
from furl import furl
4-
import requests
54

65
from daras_ai_v2.exceptions import UserError
76
from daras_ai_v2.functional import flatmap_parallel
@@ -74,6 +73,7 @@ def gdrive_download(
7473
) -> tuple[bytes, str]:
7574
from googleapiclient import discovery
7675
from googleapiclient.http import MediaIoBaseDownload
76+
from daras_ai_v2.asr import get_google_auth_session
7777

7878
if export_links is None:
7979
export_links = {}
@@ -87,7 +87,9 @@ def gdrive_download(
8787
# export google docs to appropriate type
8888
export_mime_type = DOCS_EXPORT_MIMETYPES.get(mime_type, mime_type)
8989
if f_url_export := export_links.get(export_mime_type, None):
90-
r = requests.get(f_url_export)
90+
drive_scopes = ["https://www.googleapis.com/auth/drive.readonly"]
91+
session, _ = get_google_auth_session(drive_scopes)
92+
r = session.get(f_url_export)
9193
file_bytes = r.content
9294
raise_for_status(r, is_user_url=True)
9395
return file_bytes, export_mime_type

daras_ai_v2/vector_search.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -377,9 +377,23 @@ def doc_url_to_file_metadata(f_url: str) -> FileMetadata:
377377
meta = gdrive_metadata(url_to_gdrive_file_id(f))
378378
except HttpError as e:
379379
if e.status_code == 404:
380+
from google.oauth2.service_account import Credentials
381+
382+
service_account_client_email = Credentials.from_service_account_file(
383+
settings.service_account_key_path
384+
).service_account_email
385+
380386
raise UserError(
381-
f"Could not download the google doc at {f_url} "
382-
f"Please make sure to make the document public for viewing."
387+
# language=HTML
388+
f"""<p>This knowledge base Google Doc is not accessible: <a href="{f_url}" target="_blank">{f_url}</a></p>
389+
<p>To address this:</p>
390+
<ul>
391+
<li>Please make the Google Doc publicly viewable, or</li>
392+
<li>Share the Doc or its parent folder with <br>
393+
<a href="mailto:{service_account_client_email}">{service_account_client_email}</a>
394+
as an authorized viewer.
395+
</li>
396+
</ul>"""
383397
) from e
384398
else:
385399
raise

0 commit comments

Comments
 (0)