Skip to content

Commit

Permalink
Proxito: remove redirect for README.html files (#11443)
Browse files Browse the repository at this point in the history
* Proxito: remove redirect for `README.html` files

Fully removal after deprecation and browndates.

Reference:
* #9993
* #11348
* https://about.readthedocs.com/blog/2024/05/readme-html-deprecated/

* Simplify the logic for tryfiles on El Proxito

* Add missing case

* Remove README from the docstring
  • Loading branch information
humitos authored Jul 11, 2024
1 parent 2058090 commit e0a45ef
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 167 deletions.
3 changes: 1 addition & 2 deletions readthedocs/projects/tasks/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,8 +203,7 @@ def _create_imported_files_and_search_index(

# Create the imported file only if it's a top-level 404 file,
# or if it's an index file. We don't need to keep track of all files.
# TODO: delete README.html from this list after deprecation.
tryfiles = ["index.html", "README.html"]
tryfiles = ["index.html"]
if relpath == "404.html" or filename in tryfiles:
html_files_to_save.append(html_file)

Expand Down
91 changes: 0 additions & 91 deletions readthedocs/proxito/tests/test_full.py
Original file line number Diff line number Diff line change
Expand Up @@ -994,32 +994,6 @@ def test_versioned_no_slash(self):
"/en/latest/",
)

@mock.patch.object(BuildMediaFileSystemStorageTest, "open")
def test_directory_indexes_readme_serving(self, storage_open):
self.project.versions.update(active=True, built=True)

get(
HTMLFile,
project=self.project,
version=self.version,
path="readme-exists/README.html",
name="README.html",
)

# Confirm we've serving from storage for the `index-exists/index.html` file
response = self.client.get(
reverse(
"proxito_404_handler",
kwargs={"proxito_path": "/en/latest/readme-exists"},
),
headers={"host": "project.readthedocs.io"},
)
self.assertEqual(response.status_code, 302)
self.assertEqual(
response["location"],
"/en/latest/readme-exists/README.html",
)

def test_directory_indexes_get_args(self):
self.project.versions.update(active=True, built=True)
get(
Expand Down Expand Up @@ -1075,71 +1049,6 @@ def test_404_storage_serves_custom_404_sphinx(self, storage_open):
self.assertEqual(response.status_code, 404)
storage_open.assert_called_once_with("html/project/fancy-version/404.html")

def test_redirects_to_correct_index_ending_with_slash(self):
"""When the path ends with a slash, we try README.html as index."""
self.project.versions.update(active=True, built=True)
version = fixture.get(
Version,
slug="fancy-version",
privacy_level=constants.PUBLIC,
active=True,
built=True,
project=self.project,
documentation_type=SPHINX,
)

get(
HTMLFile,
project=self.project,
version=version,
path="not-found/README.html",
name="README.html",
)
response = self.client.get(
reverse(
"proxito_404_handler",
kwargs={"proxito_path": "/en/fancy-version/not-found/"},
),
headers={"host": "project.readthedocs.io"},
)
self.assertEqual(response.status_code, 302)
self.assertEqual(
response["location"], "/en/fancy-version/not-found/README.html"
)

def test_redirects_to_correct_index_ending_without_slash(self):
"""When the path doesn't end with a slash, we try both, index.html and README.html."""
self.project.versions.update(active=True, built=True)
version = fixture.get(
Version,
slug="fancy-version",
privacy_level=constants.PUBLIC,
active=True,
built=True,
project=self.project,
documentation_type=SPHINX,
)

get(
HTMLFile,
project=self.project,
version=version,
path="not-found/README.html",
name="README.html",
)

response = self.client.get(
reverse(
"proxito_404_handler",
kwargs={"proxito_path": "/en/fancy-version/not-found"},
),
headers={"host": "project.readthedocs.io"},
)
self.assertEqual(response.status_code, 302)
self.assertEqual(
response["location"], "/en/fancy-version/not-found/README.html"
)

@mock.patch.object(BuildMediaFileSystemStorageTest, "open")
def test_404_index_redirect_skips_not_built_versions(self, storage_open):
self.version.built = False
Expand Down
72 changes: 24 additions & 48 deletions readthedocs/proxito/views/serve.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
ServeRedirectMixin,
StorageFileNotFound,
)
from readthedocs.proxito.views.utils import allow_readme_html_as_index
from readthedocs.redirects.exceptions import InfiniteRedirectException
from readthedocs.storage import build_media_storage

Expand Down Expand Up @@ -389,7 +388,6 @@ def get(self, request, proxito_path):
This does a couple of things:
* Handles directory indexing for URLs that don't end in a slash
* Handles directory indexing for README.html (for now)
* Check for user redirects
* Record the broken link for analytics
* Handles custom 404 serving
Expand Down Expand Up @@ -491,7 +489,7 @@ def get(self, request, proxito_path):

# Check and perform redirects on 404 handler for non-external domains only.
# NOTE: This redirect check must be done after trying files like
# ``index.html`` and ``README.html`` to emulate the behavior we had when
# ``index.html`` to emulate the behavior we had when
# serving directly from NGINX without passing through Python.
if not unresolved_domain.is_from_external_domain:
try:
Expand Down Expand Up @@ -637,58 +635,36 @@ def _get_custom_404_page(self, request, project, version=None):

def _get_index_file_redirect(self, request, project, version, filename, full_path):
"""
Check if a file is a directory and redirect to its index/README file.
Check if a file is a directory and redirect to its index file.
For example:
- /en/latest/foo -> /en/latest/foo/index.html
- /en/latest/foo -> /en/latest/foo/README.html
- /en/latest/foo/ -> /en/latest/foo/README.html
"""
# If the path ends with `/`, we already tried to serve
# the `/index.html` file.
if full_path.endswith("/"):
return None

if allow_readme_html_as_index():
tryfiles = ["index.html", "README.html"]
# If the path ends with `/`, we already tried to serve
# the `/index.html` file, so we only need to test for
# the `/README.html` file.
if full_path.endswith("/"):
tryfiles = ["README.html"]
else:
tryfiles = ["index.html"]

tryfiles = [
(filename.rstrip("/") + f"/{tryfile}").lstrip("/") for tryfile in tryfiles
]
available_index_files = list(
HTMLFile.objects.filter(version=version, path__in=tryfiles).values_list(
"path", flat=True
)
)

for tryfile in tryfiles:
if tryfile not in available_index_files:
continue

log.info("Redirecting to index file.", tryfile=tryfile)
# Use urlparse so that we maintain GET args in our redirect
parts = urlparse(full_path)
if allow_readme_html_as_index() and tryfile.endswith("README.html"):
new_path = parts.path.rstrip("/") + "/README.html"
else:
new_path = parts.path.rstrip("/") + "/"

# `full_path` doesn't include query params.`
query = urlparse(request.get_full_path()).query
redirect_url = parts._replace(
path=new_path,
query=query,
).geturl()

# TODO: decide if we need to check for infinite redirect here
# (from URL == to URL)
return HttpResponseRedirect(redirect_url)
tryfile = (filename.rstrip("/") + "/index.html").lstrip("/")
if not HTMLFile.objects.filter(version=version, path=tryfile).exists():
return None

return None
log.info("Redirecting to index file.", tryfile=tryfile)
# Use urlparse so that we maintain GET args in our redirect
parts = urlparse(full_path)
new_path = parts.path.rstrip("/") + "/"

# `full_path` doesn't include query params.`
query = urlparse(request.get_full_path()).query
redirect_url = parts._replace(
path=new_path,
query=query,
).geturl()

# TODO: decide if we need to check for infinite redirect here
# (from URL == to URL)
return HttpResponseRedirect(redirect_url)


class ServeError404(SettingsOverrideObject):
Expand Down
26 changes: 0 additions & 26 deletions readthedocs/proxito/views/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
import datetime

import pytz
import structlog
from django.conf import settings
from django.http import HttpResponse
from django.shortcuts import render

Expand Down Expand Up @@ -60,25 +56,3 @@ def proxito_404_page_handler(
)
r.status_code = http_status
return r


def allow_readme_html_as_index():
if not settings.RTD_ENFORCE_BROWNOUTS_FOR_DEPRECATIONS:
return True

tzinfo = pytz.timezone("America/Los_Angeles")
now = datetime.datetime.now(tz=tzinfo)

# Brownout dates as published in https://about.readthedocs.com/blog/2024/05/readme-html-deprecated/
# fmt: off
return not any([
# 12 hours browndate
datetime.datetime(2024, 6, 10, 0, 0, 0, tzinfo=tzinfo) < now < datetime.datetime(2024, 6, 10, 12, 0, 0, tzinfo=tzinfo),
# 24 hours browndate
datetime.datetime(2024, 6, 17, 0, 0, 0, tzinfo=tzinfo) < now < datetime.datetime(2024, 6, 18, 0, 0, 0, tzinfo=tzinfo),
# 48 hours browndate
datetime.datetime(2024, 6, 24, 0, 0, 0, tzinfo=tzinfo) < now < datetime.datetime(2024, 6, 26, 0, 0, 0, tzinfo=tzinfo),
# Deprecated after July 1st
datetime.datetime(2024, 7, 1, 0, 0, 0, tzinfo=tzinfo) < now,
])
# fmt: on

0 comments on commit e0a45ef

Please sign in to comment.