Skip to content

Commit

Permalink
Redirects: fix infinite loop detection (#11038)
Browse files Browse the repository at this point in the history
An additional check was added, we now check if the to_url
is a subpath.


---------

Co-authored-by: Manuel Kaufmann <humitos@gmail.com>
  • Loading branch information
stsewd and humitos authored Jan 17, 2024
1 parent 4a759ee commit e358382
Show file tree
Hide file tree
Showing 2 changed files with 105 additions and 14 deletions.
73 changes: 73 additions & 0 deletions readthedocs/proxito/tests/test_old_redirects.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

from readthedocs.builds.constants import EXTERNAL
from readthedocs.builds.models import Version
from readthedocs.projects.constants import SINGLE_VERSION_WITHOUT_TRANSLATIONS
from readthedocs.projects.models import Feature
from readthedocs.redirects.constants import (
CLEAN_URL_TO_HTML_REDIRECT,
Expand Down Expand Up @@ -407,6 +408,78 @@ def test_page_redirect_avoid_infinite_redirect(self):
headers={"host": "project.dev.readthedocs.io"},
)

def test_exact_redirect_to_parent_path(self):
self.project.versioning_scheme = SINGLE_VERSION_WITHOUT_TRANSLATIONS
self.project.save()
fixture.get(
Redirect,
project=self.project,
redirect_type=EXACT_REDIRECT,
from_url="/en/latest/*",
to_url="/:splat",
)
r = self.client.get(
"/en/latest/dir/redirect.html",
headers={"host": "project.dev.readthedocs.io"},
)
self.assertEqual(r.status_code, 302)
self.assertEqual(
r["Location"],
"http://project.dev.readthedocs.io/dir/redirect.html",
)

fixture.get(
Redirect,
project=self.project,
redirect_type=EXACT_REDIRECT,
from_url="/one/two/*",
to_url="/one/:splat",
)
r = self.client.get(
"/one/two/three/redirect.html",
headers={"host": "project.dev.readthedocs.io"},
)
self.assertEqual(r.status_code, 302)
self.assertEqual(
r["Location"],
"http://project.dev.readthedocs.io/one/three/redirect.html",
)

def test_page_redirect_to_parent_path(self):
fixture.get(
Redirect,
project=self.project,
redirect_type=PAGE_REDIRECT,
from_url="/guides/*",
to_url="/:splat",
)
r = self.client.get(
"/en/latest/guides/redirect.html",
headers={"host": "project.dev.readthedocs.io"},
)
self.assertEqual(r.status_code, 302)
self.assertEqual(
r["Location"],
"http://project.dev.readthedocs.io/en/latest/redirect.html",
)

fixture.get(
Redirect,
project=self.project,
redirect_type=PAGE_REDIRECT,
from_url="/one/two/*",
to_url="/one/:splat",
)
r = self.client.get(
"/en/latest/one/two/three/redirect.html",
headers={"host": "project.dev.readthedocs.io"},
)
self.assertEqual(r.status_code, 302)
self.assertEqual(
r["Location"],
"http://project.dev.readthedocs.io/en/latest/one/three/redirect.html",
)

def test_redirect_root(self):
Redirect.objects.create(
project=self.project,
Expand Down
46 changes: 32 additions & 14 deletions readthedocs/redirects/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,26 +260,44 @@ def get_redirect_path(self, filename, path=None, language=None, version_slug=Non

def _redirect_with_wildcard(self, current_path):
if self.from_url.endswith("*"):
# Detect infinite redirects of the form:
# /dir/* -> /dir/subdir/:splat
# For example:
# /dir/test.html will redirect to /dir/subdir/test.html,
# and if file doesn't exist, it will redirect to
# /dir/subdir/subdir/test.html and then to /dir/subdir/subdir/test.html and so on.
if ":splat" in self.to_url:
to_url_without_splat = self.to_url.split(":splat", maxsplit=1)[0]
if current_path.startswith(to_url_without_splat):
log.debug(
"Infinite redirect loop detected",
redirect=self,
)
return None
if self._will_cause_infinite_redirect(current_path):
log.debug(
"Infinite redirect loop detected",
redirect=self,
)
return None

splat = current_path[len(self.from_url_without_rest) :]
to_url = self.to_url.replace(":splat", splat)
return to_url
return self.to_url

def _will_cause_infinite_redirect(self, current_path):
"""
Check if this redirect will cause an infinite redirect for the given path.
We detect infinite redirects of the form:
/dir/* -> /dir/subdir/:splat
For example, /dir/test.html will redirect to /dir/subdir/test.html,
and if the file doesn't exist, it will redirect to
/dir/subdir/subdir/test.html and then to /dir/subdir/subdir/subdir/test.html and so on.
We do this by checking if we will redirect to a subdirectory of the current path,
and if the current path already starts with the path we will redirect to.
"""
if self.from_url.endswith("*") and ":splat" in self.to_url:
to_url_without_splat = self.to_url.split(":splat", maxsplit=1)[0]

redirects_to_subpath = to_url_without_splat.startswith(
self.from_url_without_rest
)
if redirects_to_subpath and current_path.startswith(to_url_without_splat):
return True

return False

def redirect_page(self, filename, path, language=None, version_slug=None):
log.debug("Redirecting...", redirect=self)
to_url = self._redirect_with_wildcard(current_path=filename)
Expand Down

0 comments on commit e358382

Please sign in to comment.