Skip to content

Commit

Permalink
Proxito: always check 404/index.hmtml (#9983)
Browse files Browse the repository at this point in the history
* Proxito: always check `404/index.hmtml`

With the introduction of `build.commands` we cannot use
`version.documentation_type` anymore since those versions will be `generic` and
we can't skip checking for this file location.

Note this commit may add and extra call to S3 API for all the 404 pages where
our regular Maze will be shown. However, it removes 2 databsae calls from all
the 404 requests.

We could only add this extra check on S3 for
`version.documentation_type='generic'`, but that would make the code a little
more complex and we won't be removing these 2 db queries.

Reference: readthedocs/sphinx-notfound-page#215 (comment)

* Docs: mention `404/index.html` in hosting docs

* Proxito: get the `Version` from inside `register_page_view`

* Proxit: get the version after checking for bot score

* Proxit: call `register_page_view` just once

* Log: index file (`index.html` or `README.html`)

I'm interested in `README.html` since we should probably deprecate that "feature"

* Tests: add extra checks to make tests pass

* Lint
  • Loading branch information
humitos authored Feb 7, 2023
1 parent e85ee7d commit 889e9d1
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 51 deletions.
8 changes: 4 additions & 4 deletions docs/user/hosting.rst
Original file line number Diff line number Diff line change
Expand Up @@ -74,18 +74,18 @@ Custom Not Found (404) Pages
----------------------------

If you want your project to use a custom page for not found pages instead of the "Maze Found" default,
you can put a ``404.html`` at the top level of your project's HTML output.
you can put a ``404.html`` or ``404/index.html`` in your project's HTML output.

When a 404 is returned,
Read the Docs checks if there is a ``404.html`` in the root of your project's output
Read the Docs checks if there is a ``404.html`` or ``404/index.html`` in your project's output
corresponding to the *current* version
and uses it if it exists.
Otherwise, it tries to fall back to the ``404.html`` page
Otherwise, it tries to fall back to the ``404.html`` or ``404/index.html`` page
corresponding to the *default* version of the project.

We recommend the `sphinx-notfound-page`_ extension,
which Read the Docs maintains.
It automatically creates a ``404.html`` page for your documentation,
It automatically creates a the 404 page for your documentation,
matching the theme of your project.
See its documentation_ for how to install and customize it.

Expand Down
33 changes: 20 additions & 13 deletions readthedocs/proxito/tests/test_full.py
Original file line number Diff line number Diff line change
Expand Up @@ -819,8 +819,10 @@ def test_404_all_paths_checked_sphinx(self, storage_exists, storage_open):
)
storage_exists.assert_has_calls(
[
mock.call('html/project/fancy-version/404.html'),
mock.call('html/project/latest/404.html'),
mock.call("html/project/fancy-version/404.html"),
mock.call("html/project/fancy-version/404/index.html"),
mock.call("html/project/latest/404.html"),
mock.call("html/project/latest/404/index.html"),
]
)

Expand Down Expand Up @@ -848,8 +850,10 @@ def test_404_all_paths_checked_sphinx_single_html(self, storage_exists, storage_
)
storage_exists.assert_has_calls(
[
mock.call('html/project/fancy-version/404.html'),
mock.call('html/project/latest/404.html'),
mock.call("html/project/fancy-version/404.html"),
mock.call("html/project/fancy-version/404/index.html"),
mock.call("html/project/latest/404.html"),
mock.call("html/project/latest/404/index.html"),
]
)

Expand Down Expand Up @@ -908,10 +912,12 @@ def test_404_all_paths_checked_mkdocs(self,storage_exists):
)
storage_exists.assert_has_calls(
[
mock.call('html/project/fancy-version/not-found/index.html'),
mock.call('html/project/fancy-version/not-found/README.html'),
mock.call('html/project/fancy-version/404.html'),
mock.call('html/project/latest/404.html')
mock.call("html/project/fancy-version/not-found/index.html"),
mock.call("html/project/fancy-version/not-found/README.html"),
mock.call("html/project/fancy-version/404.html"),
mock.call("html/project/fancy-version/404/index.html"),
mock.call("html/project/latest/404.html"),
mock.call("html/project/latest/404/index.html"),
]
)

Expand All @@ -938,11 +944,12 @@ def test_404_all_paths_checked_default_version_different_doc_type(self, storage_
)
storage_exists.assert_has_calls(
[
mock.call('html/project/fancy-version/not-found/index.html'),
mock.call('html/project/fancy-version/not-found/README.html'),
mock.call('html/project/fancy-version/404.html'),
mock.call('html/project/latest/404.html'),
mock.call('html/project/latest/404/index.html'),
mock.call("html/project/fancy-version/not-found/index.html"),
mock.call("html/project/fancy-version/not-found/README.html"),
mock.call("html/project/fancy-version/404.html"),
mock.call("html/project/fancy-version/404/index.html"),
mock.call("html/project/latest/404.html"),
mock.call("html/project/latest/404/index.html"),
]
)

Expand Down
54 changes: 20 additions & 34 deletions readthedocs/proxito/views/serve.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from readthedocs.core.resolver import resolve_path
from readthedocs.core.utils.extend import SettingsOverrideObject
from readthedocs.projects import constants
from readthedocs.projects.constants import SPHINX_HTMLDIR
from readthedocs.projects.models import Feature
from readthedocs.projects.templatetags.projects_tags import sort_version_aware
from readthedocs.redirects.exceptions import InfiniteRedirectException
Expand Down Expand Up @@ -306,7 +305,7 @@ def get(self, request, proxito_path, template_name='404.html'):
)
log.debug("Trying index filename.")
if build_media_storage.exists(storage_filename_path):
log.info("Redirecting to index file.")
log.info("Redirecting to index file.", tryfile=tryfile)
# Use urlparse so that we maintain GET args in our redirect
parts = urlparse(proxito_path)
if tryfile == "README.html":
Expand Down Expand Up @@ -344,27 +343,30 @@ def get(self, request, proxito_path, template_name='404.html'):
# Continue with our normal 404 handling in this case
pass

# If that doesn't work, attempt to serve the 404 of the current version (version_slug)
# Secondly, try to serve the 404 page for the default version
version = Version.objects.filter(
project=final_project, slug=version_slug
).first()

# If there are no redirect,
# try to serve the custom 404 of the current version (version_slug)
# Then, try to serve the custom 404 page for the default version
# (project.get_default_version())
version = (
Version.objects.filter(project=final_project, slug=version_slug)
.only("documentation_type")
.first()
)
versions = []
if version:
versions.append((version.slug, version.documentation_type))
versions.append(version_slug)
default_version_slug = final_project.get_default_version()
if default_version_slug != version_slug:
default_version_doc_type = (
Version.objects.filter(project=final_project, slug=default_version_slug)
.values_list('documentation_type', flat=True)
.first()
)
versions.append((default_version_slug, default_version_doc_type))
versions.append(default_version_slug)

for version_slug_404, doc_type_404 in versions:
# Register 404 pages into our database for user's analytics
self._register_broken_link(
project=final_project,
version=version,
path=filename,
full_path=proxito_path,
)

for version_slug_404 in versions:
if not self.allowed_user(request, final_project, version_slug_404):
continue

Expand All @@ -374,11 +376,7 @@ def get(self, request, proxito_path, template_name='404.html'):
include_file=False,
version_type=self.version_type,
)
tryfiles = ['404.html']
# SPHINX_HTMLDIR is the only builder
# that could output a 404/index.html file.
if doc_type_404 == SPHINX_HTMLDIR:
tryfiles.append('404/index.html')
tryfiles = ["404.html", "404/index.html"]
for tryfile in tryfiles:
storage_filename_path = build_media_storage.join(storage_root_path, tryfile)
if build_media_storage.exists(storage_filename_path):
Expand All @@ -389,20 +387,8 @@ def get(self, request, proxito_path, template_name='404.html'):
)
resp = HttpResponse(build_media_storage.open(storage_filename_path).read())
resp.status_code = 404
self._register_broken_link(
project=final_project,
version=version,
path=filename,
full_path=proxito_path,
)
return resp

self._register_broken_link(
project=final_project,
version=version,
path=filename,
full_path=proxito_path,
)
raise Http404('No custom 404 page found.')

def _register_broken_link(self, project, version, path, full_path):
Expand Down

0 comments on commit 889e9d1

Please sign in to comment.