Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Truncate snapshot filename for long URLs #687

Merged
merged 1 commit into from
Apr 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions bookmarks/services/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,9 @@ def create_html_snapshot(bookmark: Bookmark):
asset.save()


MAX_SNAPSHOT_FILENAME_LENGTH = 192


def _generate_snapshot_filename(asset: BookmarkAsset) -> str:
def sanitize_char(char):
if char.isalnum() or char in ("-", "_", "."):
Expand All @@ -249,6 +252,13 @@ def sanitize_char(char):
formatted_datetime = asset.date_created.strftime("%Y-%m-%d_%H%M%S")
sanitized_url = "".join(sanitize_char(char) for char in asset.bookmark.url)

# Calculate the length of the non-URL parts of the filename
non_url_length = len(f"{asset.asset_type}{formatted_datetime}__.html.gz")
# Calculate the maximum length for the URL part
max_url_length = MAX_SNAPSHOT_FILENAME_LENGTH - non_url_length
# Truncate the URL if necessary
sanitized_url = sanitized_url[:max_url_length]

return f"{asset.asset_type}_{formatted_datetime}_{sanitized_url}.html.gz"


Expand Down
15 changes: 15 additions & 0 deletions bookmarks/tests/test_bookmarks_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,6 +556,21 @@ def test_create_html_snapshot_should_update_file_info(self):
self.assertEqual(asset.file, expected_filename)
self.assertTrue(asset.gzip)

@override_settings(LD_ENABLE_SNAPSHOTS=True)
def test_create_html_snapshot_truncate_filename(self):
# Create a bookmark with a very long URL
long_url = "http://" + "a" * 300 + ".com"
bookmark = self.setup_bookmark(url=long_url)

tasks.create_html_snapshot(bookmark)
BookmarkAsset.objects.get(bookmark=bookmark)

# Run periodic task to process the snapshot
tasks._schedule_html_snapshots_task()

asset = BookmarkAsset.objects.get(bookmark=bookmark)
self.assertEqual(len(asset.file), 192)

@override_settings(LD_ENABLE_SNAPSHOTS=True)
def test_create_html_snapshot_should_handle_error(self):
bookmark = self.setup_bookmark(url="https://example.com")
Expand Down