Redirects: fix infinite loop detection (#11038)

An additional check was added, we now check if the to_url is a subpath. --------- Co-authored-by: Manuel Kaufmann <humitos@gmail.com>
readthedocs · Jan 17, 2024 · e358382 · e358382
1 parent 4a759ee
commit e358382
Show file tree

Hide file tree

Showing 2 changed files with 105 additions and 14 deletions.
diff --git a/readthedocs/proxito/tests/test_old_redirects.py b/readthedocs/proxito/tests/test_old_redirects.py
@@ -15,6 +15,7 @@
 
 from readthedocs.builds.constants import EXTERNAL
 from readthedocs.builds.models import Version
+from readthedocs.projects.constants import SINGLE_VERSION_WITHOUT_TRANSLATIONS
 from readthedocs.projects.models import Feature
 from readthedocs.redirects.constants import (
     CLEAN_URL_TO_HTML_REDIRECT,
@@ -407,6 +408,78 @@ def test_page_redirect_avoid_infinite_redirect(self):
                 headers={"host": "project.dev.readthedocs.io"},
             )
 
+    def test_exact_redirect_to_parent_path(self):
+        self.project.versioning_scheme = SINGLE_VERSION_WITHOUT_TRANSLATIONS
+        self.project.save()
+        fixture.get(
+            Redirect,
+            project=self.project,
+            redirect_type=EXACT_REDIRECT,
+            from_url="/en/latest/*",
+            to_url="/:splat",
+        )
+        r = self.client.get(
+            "/en/latest/dir/redirect.html",
+            headers={"host": "project.dev.readthedocs.io"},
+        )
+        self.assertEqual(r.status_code, 302)
+        self.assertEqual(
+            r["Location"],
+            "http://project.dev.readthedocs.io/dir/redirect.html",
+        )
+
+        fixture.get(
+            Redirect,
+            project=self.project,
+            redirect_type=EXACT_REDIRECT,
+            from_url="/one/two/*",
+            to_url="/one/:splat",
+        )
+        r = self.client.get(
+            "/one/two/three/redirect.html",
+            headers={"host": "project.dev.readthedocs.io"},
+        )
+        self.assertEqual(r.status_code, 302)
+        self.assertEqual(
+            r["Location"],
+            "http://project.dev.readthedocs.io/one/three/redirect.html",
+        )
+
+    def test_page_redirect_to_parent_path(self):
+        fixture.get(
+            Redirect,
+            project=self.project,
+            redirect_type=PAGE_REDIRECT,
+            from_url="/guides/*",
+            to_url="/:splat",
+        )
+        r = self.client.get(
+            "/en/latest/guides/redirect.html",
+            headers={"host": "project.dev.readthedocs.io"},
+        )
+        self.assertEqual(r.status_code, 302)
+        self.assertEqual(
+            r["Location"],
+            "http://project.dev.readthedocs.io/en/latest/redirect.html",
+        )
+
+        fixture.get(
+            Redirect,
+            project=self.project,
+            redirect_type=PAGE_REDIRECT,
+            from_url="/one/two/*",
+            to_url="/one/:splat",
+        )
+        r = self.client.get(
+            "/en/latest/one/two/three/redirect.html",
+            headers={"host": "project.dev.readthedocs.io"},
+        )
+        self.assertEqual(r.status_code, 302)
+        self.assertEqual(
+            r["Location"],
+            "http://project.dev.readthedocs.io/en/latest/one/three/redirect.html",
+        )
+
     def test_redirect_root(self):
         Redirect.objects.create(
             project=self.project,

diff --git a/readthedocs/redirects/models.py b/readthedocs/redirects/models.py
@@ -260,26 +260,44 @@ def get_redirect_path(self, filename, path=None, language=None, version_slug=Non
 
     def _redirect_with_wildcard(self, current_path):
         if self.from_url.endswith("*"):
-            # Detect infinite redirects of the form:
-            # /dir/* -> /dir/subdir/:splat
-            # For example:
-            # /dir/test.html will redirect to /dir/subdir/test.html,
-            # and if file doesn't exist, it will redirect to
-            # /dir/subdir/subdir/test.html and then to /dir/subdir/subdir/test.html and so on.
-            if ":splat" in self.to_url:
-                to_url_without_splat = self.to_url.split(":splat", maxsplit=1)[0]
-                if current_path.startswith(to_url_without_splat):
-                    log.debug(
-                        "Infinite redirect loop detected",
-                        redirect=self,
-                    )
-                    return None
+            if self._will_cause_infinite_redirect(current_path):
+                log.debug(
+                    "Infinite redirect loop detected",
+                    redirect=self,
+                )
+                return None
 
             splat = current_path[len(self.from_url_without_rest) :]
             to_url = self.to_url.replace(":splat", splat)
             return to_url
         return self.to_url
 
+    def _will_cause_infinite_redirect(self, current_path):
+        """
+        Check if this redirect will cause an infinite redirect for the given path.
+
+        We detect infinite redirects of the form:
+
+          /dir/* -> /dir/subdir/:splat
+
+        For example, /dir/test.html will redirect to /dir/subdir/test.html,
+        and if the file doesn't exist, it will redirect to
+        /dir/subdir/subdir/test.html and then to /dir/subdir/subdir/subdir/test.html and so on.
+
+        We do this by checking if we will redirect to a subdirectory of the current path,
+        and if the current path already starts with the path we will redirect to.
+        """
+        if self.from_url.endswith("*") and ":splat" in self.to_url:
+            to_url_without_splat = self.to_url.split(":splat", maxsplit=1)[0]
+
+            redirects_to_subpath = to_url_without_splat.startswith(
+                self.from_url_without_rest
+            )
+            if redirects_to_subpath and current_path.startswith(to_url_without_splat):
+                return True
+
+        return False
+
     def redirect_page(self, filename, path, language=None, version_slug=None):
         log.debug("Redirecting...", redirect=self)
         to_url = self._redirect_with_wildcard(current_path=filename)