diff --git a/common/djangoapps/student/role_helpers.py b/common/djangoapps/student/role_helpers.py index 132595f5a271..8a12bfa0ac90 100644 --- a/common/djangoapps/student/role_helpers.py +++ b/common/djangoapps/student/role_helpers.py @@ -1,7 +1,9 @@ """ Helpers for student roles """ +from __future__ import annotations +from django.contrib.auth import get_user_model from openedx.core.djangoapps.django_comment_common.models import ( FORUM_ROLE_ADMINISTRATOR, @@ -12,15 +14,20 @@ ) from openedx.core.lib.cache_utils import request_cached from common.djangoapps.student.roles import ( + CourseAccessRole, CourseBetaTesterRole, CourseInstructorRole, CourseStaffRole, GlobalStaff, OrgInstructorRole, - OrgStaffRole + OrgStaffRole, + RoleCache, ) +User = get_user_model() + + @request_cached() def has_staff_roles(user, course_key): """ @@ -40,3 +47,32 @@ def has_staff_roles(user, course_key): is_org_instructor, is_global_staff, has_forum_role]): return True return False + + +@request_cached() +def get_role_cache(user: User) -> RoleCache: + """ + Returns a populated RoleCache for the given user. + + The returned RoleCache is also cached on the provided `user` to improve performance on future roles checks. + + :param user: User + :return: All roles for all courses that this user has access to. + """ + # pylint: disable=protected-access + if not hasattr(user, '_roles'): + user._roles = RoleCache(user) + return user._roles + + +@request_cached() +def get_course_roles(user: User) -> list[CourseAccessRole]: + """ + Returns a list of all course-level roles that this user has. + + :param user: User + :return: All roles for all courses that this user has access to. + """ + # pylint: disable=protected-access + role_cache = get_role_cache(user) + return list(role_cache._roles) diff --git a/common/djangoapps/student/tests/test_roles.py b/common/djangoapps/student/tests/test_roles.py index dff86047d2ff..9037eb902f61 100644 --- a/common/djangoapps/student/tests/test_roles.py +++ b/common/djangoapps/student/tests/test_roles.py @@ -8,6 +8,7 @@ from opaque_keys.edx.keys import CourseKey from common.djangoapps.student.roles import ( + CourseAccessRole, CourseBetaTesterRole, CourseInstructorRole, CourseRole, @@ -23,6 +24,7 @@ OrgStaffRole, RoleCache ) +from common.djangoapps.student.role_helpers import get_course_roles, has_staff_roles from common.djangoapps.student.tests.factories import AnonymousUserFactory, InstructorFactory, StaffFactory, UserFactory @@ -48,6 +50,32 @@ def test_global_staff(self): assert not GlobalStaff().has_user(self.course_instructor) assert GlobalStaff().has_user(self.global_staff) + def test_has_staff_roles(self): + assert has_staff_roles(self.global_staff, self.course_key) + assert has_staff_roles(self.course_staff, self.course_key) + assert has_staff_roles(self.course_instructor, self.course_key) + assert not has_staff_roles(self.student, self.course_key) + + def test_get_course_roles(self): + assert not list(get_course_roles(self.student)) + assert not list(get_course_roles(self.global_staff)) + assert list(get_course_roles(self.course_staff)) == [ + CourseAccessRole( + user=self.course_staff, + course_id=self.course_key, + org=self.course_key.org, + role=CourseStaffRole.ROLE, + ) + ] + assert list(get_course_roles(self.course_instructor)) == [ + CourseAccessRole( + user=self.course_instructor, + course_id=self.course_key, + org=self.course_key.org, + role=CourseInstructorRole.ROLE, + ) + ] + def test_group_name_case_sensitive(self): uppercase_course_id = "ORG/COURSE/NAME" lowercase_course_id = uppercase_course_id.lower() diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py index 1c7824d04b90..d32956c688d0 100644 --- a/openedx/core/djangoapps/content/search/documents.py +++ b/openedx/core/djangoapps/content/search/documents.py @@ -8,6 +8,7 @@ from django.utils.text import slugify from opaque_keys.edx.keys import UsageKey, LearningContextKey +from openedx.core.djangoapps.content.search.models import SearchAccess from openedx.core.djangoapps.content_libraries import api as lib_api from openedx.core.djangoapps.content_tagging import api as tagging_api from openedx.core.djangoapps.xblock import api as xblock_api @@ -29,6 +30,7 @@ class Fields: block_type = "block_type" context_key = "context_key" org = "org" + access_id = "access_id" # .models.SearchAccess.id # breadcrumbs: an array of {"display_name": "..."} entries. First one is the name of the course/library itself. # After that is the name of any parent Section/Subsection/Unit/etc. # It's a list of dictionaries because for now we just include the name of each but in future we may add their IDs. @@ -78,6 +80,14 @@ def _meili_id_from_opaque_key(usage_key: UsageKey) -> str: return slugify(str(usage_key)) + "-" + suffix +def _meili_access_id_from_context_key(context_key: LearningContextKey) -> int: + """ + Retrieve the numeric access id for the given course/library context. + """ + access, _ = SearchAccess.objects.get_or_create(context_key=context_key) + return access.id + + def _fields_from_block(block) -> dict: """ Given an XBlock instance, call its index_dictionary() method to load any @@ -96,6 +106,7 @@ class implementation returns only: # This is called context_key so it's the same for courses and libraries Fields.context_key: str(block.usage_key.context_key), # same as lib_key Fields.org: str(block.usage_key.context_key.org), + Fields.access_id: _meili_access_id_from_context_key(block.usage_key.context_key), Fields.breadcrumbs: [] } # Get the breadcrumbs (course, section, subsection, etc.): diff --git a/openedx/core/djangoapps/content/search/handlers.py b/openedx/core/djangoapps/content/search/handlers.py index e69de29bb2d1..9184b2b2e273 100644 --- a/openedx/core/djangoapps/content/search/handlers.py +++ b/openedx/core/djangoapps/content/search/handlers.py @@ -0,0 +1,23 @@ +""" +Signal/event handlers for content search +""" +from django.db.models.signals import post_delete +from django.dispatch import receiver +from openedx_events.content_authoring.data import ContentLibraryData +from openedx_events.content_authoring.signals import CONTENT_LIBRARY_DELETED + +from openedx.core.djangoapps.content.course_overviews.models import CourseOverview +from openedx.core.djangoapps.content.search.models import SearchAccess + + +# Using post_delete here because there is no COURSE_DELETED event defined. +@receiver(post_delete, sender=CourseOverview) +def delete_course_search_access(sender, instance, **kwargs): # pylint: disable=unused-argument + """Deletes the SearchAccess instance for deleted CourseOverview""" + SearchAccess.objects.filter(context_key=instance.id).delete() + + +@receiver(CONTENT_LIBRARY_DELETED) +def delete_library_search_access(content_library: ContentLibraryData, **kwargs): + """Deletes the SearchAccess instance for deleted content libraries""" + SearchAccess.objects.filter(context_key=content_library.library_key).delete() diff --git a/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py index 0f52eea51d4d..8d39ef3746eb 100644 --- a/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py +++ b/openedx/core/djangoapps/content/search/management/commands/reindex_studio.py @@ -84,6 +84,7 @@ def handle(self, *args, **options): Fields.org, Fields.tags, Fields.type, + Fields.access_id, ]) # Mark which attributes are used for keyword search, in order of importance: client.index(temp_index_name).update_searchable_attributes([ diff --git a/openedx/core/djangoapps/content/search/migrations/0001_initial.py b/openedx/core/djangoapps/content/search/migrations/0001_initial.py new file mode 100644 index 000000000000..fc332a0d30e5 --- /dev/null +++ b/openedx/core/djangoapps/content/search/migrations/0001_initial.py @@ -0,0 +1,25 @@ +# Generated by Django 4.2.10 on 2024-04-02 04:55 + +from django.db import migrations, models +from opaque_keys.edx.django.models import LearningContextKeyField +from opaque_keys.edx.locator import LibraryLocatorV2 + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ('course_overviews', '0001_initial'), + ('content_libraries', '0001_initial'), + ] + + operations = [ + migrations.CreateModel( + name='SearchAccess', + fields=[ + ('id', models.BigAutoField(help_text='Numeric ID for each Course / Library context. This ID will generally require fewer bits than the full LearningContextKey, allowing more courses and libraries to be represented in content search filters.', primary_key=True, serialize=False)), + ('context_key', LearningContextKeyField(max_length=255, unique=True)), + ], + ), + ] diff --git a/openedx/core/djangoapps/content/search/migrations/__init__.py b/openedx/core/djangoapps/content/search/migrations/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/openedx/core/djangoapps/content/search/models.py b/openedx/core/djangoapps/content/search/models.py new file mode 100644 index 000000000000..711c493ff895 --- /dev/null +++ b/openedx/core/djangoapps/content/search/models.py @@ -0,0 +1,67 @@ +"""Database models for content search""" + +from __future__ import annotations + +from django.db import models +from django.utils.translation import gettext_lazy as _ +from opaque_keys.edx.django.models import LearningContextKeyField +from rest_framework.request import Request + +from common.djangoapps.student.role_helpers import get_course_roles +from common.djangoapps.student.roles import CourseInstructorRole, CourseStaffRole +from openedx.core.djangoapps.content_libraries.api import get_libraries_for_user + + +class SearchAccess(models.Model): + """ + Stores a numeric ID for each ContextKey. + + We use this shorter ID instead of the full ContextKey when determining a user's access to search-indexed course and + library content because: + + a) in some deployments, users may be granted access to more than 1_000 individual courses, and + b) the search filter request is stored in the JWT, which is limited to 8Kib. + """ + id = models.BigAutoField( + primary_key=True, + help_text=_( + "Numeric ID for each Course / Library context. This ID will generally require fewer bits than the full " + "LearningContextKey, allowing more courses and libraries to be represented in content search filters." + ), + ) + context_key = LearningContextKeyField( + max_length=255, unique=True, null=False, + ) + + +def get_access_ids_for_request(request: Request, omit_orgs: list[str] = None) -> list[int]: + """ + Returns a list of SearchAccess.id values for courses and content libraries that the requesting user has been + individually grated access to. + + Omits any courses/libraries with orgs in the `omit_orgs` list. + """ + omit_orgs = omit_orgs or [] + + course_roles = get_course_roles(request.user) + course_clause = models.Q(context_key__in=[ + role.course_id + for role in course_roles + if ( + role.role in [CourseInstructorRole.ROLE, CourseStaffRole.ROLE] + and role.org not in omit_orgs + ) + ]) + + libraries = get_libraries_for_user(user=request.user) + library_clause = models.Q(context_key__in=[ + lib.library_key for lib in libraries + if lib.library_key.org not in omit_orgs + ]) + + # Sort by descending access ID to simulate prioritizing the "most recently created context keys". + return list( + SearchAccess.objects.filter( + course_clause | library_clause + ).order_by('-id').values_list("id", flat=True) + ) diff --git a/openedx/core/djangoapps/content/search/tests/test_documents.py b/openedx/core/djangoapps/content/search/tests/test_documents.py index 9bbc33f4cca3..914cae880a41 100644 --- a/openedx/core/djangoapps/content/search/tests/test_documents.py +++ b/openedx/core/djangoapps/content/search/tests/test_documents.py @@ -8,7 +8,14 @@ from xmodule.modulestore.tests.django_utils import SharedModuleStoreTestCase from xmodule.modulestore.tests.factories import BlockFactory, ToyCourseFactory -from ..documents import searchable_doc_for_course_block +try: + # This import errors in the lms because content.search is not an installed app there. + from ..documents import searchable_doc_for_course_block + from ..models import SearchAccess +except RuntimeError: + searchable_doc_for_course_block = lambda x: x + SearchAccess = {} + STUDIO_SEARCH_ENDPOINT_URL = "/api/content_search/v2/studio/" @@ -26,6 +33,7 @@ def setUpClass(cls): cls.store = modulestore() cls.toy_course = ToyCourseFactory.create() # See xmodule/modulestore/tests/sample_courses.py cls.toy_course_key = cls.toy_course.id + # Get references to some blocks in the toy course cls.html_block_key = cls.toy_course_key.make_usage_key("html", "toyjumpto") # Create a problem in library @@ -55,6 +63,16 @@ def setUpClass(cls): tagging_api.tag_object(str(cls.html_block_key), cls.subject_tags, tags=["Chinese", "Jump Links"]) tagging_api.tag_object(str(cls.html_block_key), cls.difficulty_tags, tags=["Normal"]) + @property + def toy_course_access_id(self): + """ + Returns the SearchAccess.id created for the toy course. + + This SearchAccess object is created when documents are added to the search index, so this method must be called + after this step, or risk a DoesNotExist error. + """ + return SearchAccess.objects.get(context_key=self.toy_course_key).id + def test_problem_block(self): """ Test how a problem block gets represented in the search index @@ -71,6 +89,7 @@ def test_problem_block(self): "block_id": "Test_Problem", "context_key": "course-v1:edX+toy+2012_Fall", "org": "edX", + "access_id": self.toy_course_access_id, "display_name": "Test Problem", "breadcrumbs": [ {"display_name": "Toy Course"}, @@ -105,6 +124,7 @@ def test_html_block(self): "block_id": "toyjumpto", "context_key": "course-v1:edX+toy+2012_Fall", "org": "edX", + "access_id": self.toy_course_access_id, "display_name": "Text", "breadcrumbs": [ {"display_name": "Toy Course"}, @@ -139,6 +159,7 @@ def test_video_block_untagged(self): "block_id": "Welcome", "context_key": "course-v1:edX+toy+2012_Fall", "org": "edX", + "access_id": self.toy_course_access_id, "display_name": "Welcome", "breadcrumbs": [ {"display_name": "Toy Course"}, diff --git a/openedx/core/djangoapps/content/search/tests/test_models.py b/openedx/core/djangoapps/content/search/tests/test_models.py new file mode 100644 index 000000000000..ef42a1c04879 --- /dev/null +++ b/openedx/core/djangoapps/content/search/tests/test_models.py @@ -0,0 +1,247 @@ +"""Content search model tests""" +from __future__ import annotations + +import ddt +from django.test import RequestFactory +from django.utils.crypto import get_random_string +from organizations.models import Organization + +from common.djangoapps.student.auth import update_org_role +from common.djangoapps.student.roles import CourseInstructorRole, CourseStaffRole, OrgInstructorRole, OrgStaffRole +from common.djangoapps.student.tests.factories import UserFactory +from openedx.core.djangoapps.content.course_overviews.tests.factories import CourseOverviewFactory +from openedx.core.djangoapps.content_libraries import api as library_api +from openedx.core.djangolib.testing.utils import skip_unless_cms +from xmodule.modulestore.tests.django_utils import SharedModuleStoreTestCase +from xmodule.modulestore.tests.factories import CourseFactory + +try: + # This import errors in the lms because content.search is not an installed app there. + from openedx.core.djangoapps.content.search.models import SearchAccess, get_access_ids_for_request +except RuntimeError: + SearchAccess = {} + get_access_ids_for_request = lambda request: [] + + +class StudioSearchTestMixin: + """ + Sets up user, org, course, library, and access for studio search tests. + """ + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.global_staff = UserFactory( + username='staff', email='staff@example.com', is_staff=True, password='staff_pass' + ) + cls.student = UserFactory.create( + username='student', email='student@example.com', is_staff=False, password='student_pass' + ) + cls.course_staff = UserFactory.create( + username='course_staff', email='course_staff@example.com', is_staff=False, password='course_staff_pass' + ) + cls.course_instructor = UserFactory.create( + username='course_instr', email='course_instr@example.com', is_staff=False, password='course_instr_pass' + ) + cls.org_staff = UserFactory.create( + username='org_staff', email='org_staff@example.com', is_staff=False, password='org_staff_pass' + ) + cls.org_instructor = UserFactory.create( + username='org_instr', email='org_instr@example.com', is_staff=False, password='org_instr_pass' + ) + + def setUp(self): + """ + Add users, orgs, courses, and libraries. + """ + super().setUp() + + self.course_user_keys = [] + self.staff_user_keys = [] + + # Create a few courses that global_staff, course_staff and course_instructor can access + for num in range(3): + course_location = self.store.make_course_key('Org', 'CreatedCourse' + str(num), 'Run') + self.last_course = self._create_course(course_location) + CourseStaffRole(course_location).add_users(self.course_staff) + CourseInstructorRole(course_location).add_users(self.course_instructor) + self.course_user_keys.append(course_location) + + # Create a few courses that only global_staff can access + for num in range(3): + course_location = self.store.make_course_key('Org', 'StaffCourse' + str(num), 'Run') + self._create_course(course_location) + + # Create orgs to test library access + self.org1, _ = Organization.objects.get_or_create( + short_name='org1', + defaults={'name': "Org One"}, + ) + self.org2, _ = Organization.objects.get_or_create( + short_name='org2', + defaults={'name': "Org Two"}, + ) + update_org_role(caller=self.global_staff, role=OrgStaffRole, user=self.org_staff, orgs=['org1']) + update_org_role(caller=self.global_staff, role=OrgInstructorRole, user=self.org_instructor, orgs=['org1']) + + # Create a few libraries that global_staff, course_staff and course_instructor can access + for num in range(2): + self.last_library = self._create_library(self.org1, num) + library_api.set_library_user_permissions( + self.last_library.key, + self.course_staff, + library_api.AccessLevel.READ_LEVEL, + ) + library_api.set_library_user_permissions( + self.last_library.key, + self.course_instructor, + library_api.AccessLevel.READ_LEVEL, + ) + self.course_user_keys.append(self.last_library.key) + self.staff_user_keys.append(self.last_library.key) + + # Create a few libraries in org2, which only global_staff can access. + for num in range(2): + library = self._create_library(self.org2, num) + self.staff_user_keys.append(library.key) + + def _create_course(self, course_location): + """ + Create dummy course and overview. + """ + CourseFactory.create( + org=course_location.org, + number=course_location.course, + run=course_location.run + ) + course = CourseOverviewFactory.create(id=course_location, org=course_location.org) + return course + + def _create_library(self, org, num): + """ + Create dummy library. + """ + slug = get_random_string(4) + library = library_api.create_library( + org=org, + slug=slug, + title=f"Dummy Library {num}", + ) + return library + + +@ddt.ddt +@skip_unless_cms +class StudioSearchAccessTest(StudioSearchTestMixin, SharedModuleStoreTestCase): + """ + Tests the SearchAccess model, handlers, and helper functions. + """ + + def _create_course(self, course_location): + """ + Creates a SearchAccess object for each new course. + + Usually these are created when documents are indexed, but in these model tests, we need to create them manually. + """ + course = super()._create_course(course_location) + SearchAccess.objects.create(context_key=course.id) + return course + + def _create_library(self, org, num): + """ + Creates a SearchAccess object for each new library. + + Usually these are created when documents are indexed, but in these model tests, we need to create them manually. + """ + library = super()._create_library(org, num) + SearchAccess.objects.create(context_key=library.key) + return library + + def _check_access_ids(self, access_ids, expected_keys): + """ + Checks the returned list of access_ids to ensure: + + * no duplicates + * sorted descending order (i.e. most recently-created first) + * expected keys match access_ids + """ + assert len(set(access_ids)) == len(access_ids) + + sorted_access_ids = access_ids + sorted_access_ids.sort(reverse=True) + assert access_ids == sorted_access_ids + + access_keys = SearchAccess.objects.filter( + id__in=access_ids + ).only('context_key').values_list('context_key', flat=True) + assert set(access_keys) == set(expected_keys) + + def test_course_staff_get_access_ids_for_request(self): + """Course staff can access the courses and libraries in org1.""" + request = RequestFactory().get('/course') + request.user = self.course_staff + + access_ids = get_access_ids_for_request(request) + self._check_access_ids(access_ids, self.course_user_keys) + + def test_course_instructor_get_access_ids_for_request(self): + """Course instructor can access the courses and libraries in org1.""" + request = RequestFactory().get('/course') + request.user = self.course_instructor + + access_ids = get_access_ids_for_request(request) + self._check_access_ids(access_ids, self.course_user_keys) + + @ddt.data( + 'org_staff', + 'org_instructor', + ) + def test_org_get_access_ids_for_request(self, user_attr): + """ + Org staff & instructors can see all courses and libraries in their org. + But if they don't have any individual access granted, then no access_ids will be returned. + """ + request = RequestFactory().get('/course') + request.user = getattr(self, user_attr) + + access_ids = get_access_ids_for_request(request) + self._check_access_ids(access_ids, []) + + def test_staff_get_access_ids_for_request(self): + """ + Global staff can see all courses and libraries, but they only have individual access granted for libraries. + """ + request = RequestFactory().get('/course') + request.user = self.global_staff + + access_ids = get_access_ids_for_request(request) + self._check_access_ids(access_ids, self.staff_user_keys) + + def test_get_access_ids_for_request_omit_orgs(self): + """ + Omit the org1 library keys from the returned list. + """ + request = RequestFactory().get('/course') + request.user = self.global_staff + + access_ids = get_access_ids_for_request(request, omit_orgs=['org1']) + self._check_access_ids(access_ids, self.staff_user_keys[-2:]) + + def test_delete_removes_access_ids_for_request(self): + """Removing courses and library should remove their associated access_ids.""" + remaining_keys = self.staff_user_keys + remaining_keys.remove(self.last_library.key) + self.last_course.delete() + library_api.delete_library(self.last_library.key) + + request = RequestFactory().get('/course') + request.user = self.global_staff + + access_ids = get_access_ids_for_request(request) + self._check_access_ids(access_ids, remaining_keys) + + def test_no_access_ids_for_request(self): + """Users without special access cannot see any courses or libraries.""" + request = RequestFactory().get('/course') + request.user = self.student + access_ids = get_access_ids_for_request(request) + assert not access_ids diff --git a/openedx/core/djangoapps/content/search/tests/test_views.py b/openedx/core/djangoapps/content/search/tests/test_views.py index e117cf1b969a..853f5b5149a6 100644 --- a/openedx/core/djangoapps/content/search/tests/test_views.py +++ b/openedx/core/djangoapps/content/search/tests/test_views.py @@ -1,37 +1,65 @@ """ Tests for the Studio content search REST API. """ -from django.test import override_settings -from rest_framework.test import APITestCase, APIClient +import functools from unittest import mock -from common.djangoapps.student.tests.factories import UserFactory +import ddt +from django.test import override_settings +from rest_framework.test import APIClient + +from common.djangoapps.student.auth import update_org_role +from common.djangoapps.student.roles import OrgStaffRole from openedx.core.djangolib.testing.utils import skip_unless_cms +from xmodule.modulestore.tests.django_utils import SharedModuleStoreTestCase + +from .test_models import StudioSearchTestMixin + +try: + # This import errors in the lms because content.search is not an installed app there. + from openedx.core.djangoapps.content.search.models import SearchAccess +except RuntimeError: + SearchAccess = {} + STUDIO_SEARCH_ENDPOINT_URL = "/api/content_search/v2/studio/" +MOCK_API_KEY_UID = "3203d764-370f-4e99-a917-d47ab7f29739" -@skip_unless_cms -class StudioSearchViewTest(APITestCase): +def mock_meilisearch(enabled=True): """ - General tests for the Studio search REST API. + Decorator that mocks the required parts of content.search.views to simulate a running Meilisearch instance. """ + def decorator(func): + """ + Overrides settings and patches to enable view tests. + """ + @functools.wraps(func) + def wrapper(*args, **kwargs): + with override_settings( + MEILISEARCH_ENABLED=enabled, + MEILISEARCH_PUBLIC_URL="http://meilisearch.url", + ): + with mock.patch( + 'openedx.core.djangoapps.content.search.views._get_meili_api_key_uid', + return_value=MOCK_API_KEY_UID, + ): + return func(*args, **kwargs) + return wrapper + return decorator - @classmethod - def setUpClass(cls): - super().setUpClass() - cls.staff = UserFactory.create( - username='staff', email='staff@example.com', is_staff=True, password='staff_pass' - ) - cls.student = UserFactory.create( - username='student', email='student@example.com', is_staff=False, password='student_pass' - ) +@ddt.ddt +@skip_unless_cms +class StudioSearchViewTest(StudioSearchTestMixin, SharedModuleStoreTestCase): + """ + General tests for the Studio search REST API. + """ def setUp(self): super().setUp() self.client = APIClient() - @override_settings(MEILISEARCH_ENABLED=False) + @mock_meilisearch(enabled=False) def test_studio_search_unathenticated_disabled(self): """ Whether or not Meilisearch is enabled, the API endpoint requires authentication. @@ -39,7 +67,7 @@ def test_studio_search_unathenticated_disabled(self): result = self.client.get(STUDIO_SEARCH_ENDPOINT_URL) assert result.status_code == 401 - @override_settings(MEILISEARCH_ENABLED=True) + @mock_meilisearch(enabled=True) def test_studio_search_unathenticated_enabled(self): """ Whether or not Meilisearch is enabled, the API endpoint requires authentication. @@ -47,7 +75,7 @@ def test_studio_search_unathenticated_enabled(self): result = self.client.get(STUDIO_SEARCH_ENDPOINT_URL) assert result.status_code == 401 - @override_settings(MEILISEARCH_ENABLED=False) + @mock_meilisearch(enabled=False) def test_studio_search_disabled(self): """ When Meilisearch is disabled, the Studio search endpoint gives a 404 @@ -56,27 +84,175 @@ def test_studio_search_disabled(self): result = self.client.get(STUDIO_SEARCH_ENDPOINT_URL) assert result.status_code == 404 - @override_settings(MEILISEARCH_ENABLED=True) - def test_studio_search_student_forbidden(self): + @mock_meilisearch(enabled=True) + def test_studio_search_enabled(self): """ - Until we implement fine-grained permissions, only global staff can use - the Studio search endpoint. + We've implement fine-grained permissions on the meilisearch content, + so any logged-in user can get a restricted API key for Meilisearch using the REST API. """ self.client.login(username='student', password='student_pass') result = self.client.get(STUDIO_SEARCH_ENDPOINT_URL) - assert result.status_code == 403 + assert result.status_code == 200 + assert result.data["index_name"] == "studio_content" + assert result.data["url"] == "http://meilisearch.url" + assert result.data["api_key"] and isinstance(result.data["api_key"], str) + + def _mock_generate_tenant_token(self, mock_search_client): + """ + Return a mocked meilisearch.Client.generate_tenant_token method. + """ + mock_generate_tenant_token = mock.Mock(return_value='restricted_api_key') + mock_search_client.return_value = mock.Mock( + generate_tenant_token=mock_generate_tenant_token, + ) + return mock_generate_tenant_token + + @mock_meilisearch(enabled=True) + @mock.patch('openedx.core.djangoapps.content.search.views.meilisearch.Client') + def test_studio_search_student_no_access(self, mock_search_client): + """ + Users without access to any courses or libraries will have all documents filtered out. + """ + self.client.login(username='student', password='student_pass') + mock_generate_tenant_token = self._mock_generate_tenant_token(mock_search_client) + result = self.client.get(STUDIO_SEARCH_ENDPOINT_URL) + assert result.status_code == 200 + mock_generate_tenant_token.assert_called_once_with( + api_key_uid=MOCK_API_KEY_UID, + search_rules={ + "studio_content": { + "filter": "org IN [] OR access_id IN []", + } + }, + expires_at=mock.ANY, + ) - @override_settings(MEILISEARCH_ENABLED=True) - @mock.patch('openedx.core.djangoapps.content.search.views._get_meili_api_key_uid') - def test_studio_search_staff(self, mock_get_api_key_uid): + @mock_meilisearch(enabled=True) + @mock.patch('openedx.core.djangoapps.content.search.views.meilisearch.Client') + def test_studio_search_staff(self, mock_search_client): """ - Global staff can get a restricted API key for Meilisearch using the REST - API. + Users with global staff access can search any document. """ self.client.login(username='staff', password='staff_pass') - mock_get_api_key_uid.return_value = "3203d764-370f-4e99-a917-d47ab7f29739" + mock_generate_tenant_token = self._mock_generate_tenant_token(mock_search_client) result = self.client.get(STUDIO_SEARCH_ENDPOINT_URL) assert result.status_code == 200 - assert result.data["index_name"] == "studio_content" - assert result.data["url"].startswith("http") - assert result.data["api_key"] and isinstance(result.data["api_key"], str) + mock_generate_tenant_token.assert_called_once_with( + api_key_uid=MOCK_API_KEY_UID, + search_rules={ + "studio_content": {} + }, + expires_at=mock.ANY, + ) + + @mock_meilisearch(enabled=True) + @mock.patch('openedx.core.djangoapps.content.search.views.meilisearch.Client') + def test_studio_search_course_staff_access(self, mock_search_client): + """ + Users with staff or instructor access to a course or library will be limited to these courses/libraries. + """ + self.client.login(username='course_staff', password='course_staff_pass') + mock_generate_tenant_token = self._mock_generate_tenant_token(mock_search_client) + result = self.client.get(STUDIO_SEARCH_ENDPOINT_URL) + assert result.status_code == 200 + + expected_access_ids = list(SearchAccess.objects.filter( + context_key__in=self.course_user_keys, + ).only('id').values_list('id', flat=True)) + expected_access_ids.sort(reverse=True) + + mock_generate_tenant_token.assert_called_once_with( + api_key_uid=MOCK_API_KEY_UID, + search_rules={ + "studio_content": { + "filter": f"org IN [] OR access_id IN {expected_access_ids}", + } + }, + expires_at=mock.ANY, + ) + + @ddt.data( + 'org_staff', + 'org_instr', + ) + @mock_meilisearch(enabled=True) + @mock.patch('openedx.core.djangoapps.content.search.views.meilisearch.Client') + def test_studio_search_org_access(self, username, mock_search_client): + """ + Users with org access to any courses or libraries will use the org filter. + """ + self.client.login(username=username, password=f'{username}_pass') + mock_generate_tenant_token = self._mock_generate_tenant_token(mock_search_client) + result = self.client.get(STUDIO_SEARCH_ENDPOINT_URL) + assert result.status_code == 200 + mock_generate_tenant_token.assert_called_once_with( + api_key_uid=MOCK_API_KEY_UID, + search_rules={ + "studio_content": { + "filter": "org IN ['org1'] OR access_id IN []", + } + }, + expires_at=mock.ANY, + ) + + @mock_meilisearch(enabled=True) + @mock.patch('openedx.core.djangoapps.content.search.views.meilisearch.Client') + def test_studio_search_omit_orgs(self, mock_search_client): + """ + Grant org access to our staff user to ensure that org's access_ids are omitted from the search filter. + """ + update_org_role(caller=self.global_staff, role=OrgStaffRole, user=self.course_staff, orgs=['org1']) + self.client.login(username='course_staff', password='course_staff_pass') + mock_generate_tenant_token = self._mock_generate_tenant_token(mock_search_client) + result = self.client.get(STUDIO_SEARCH_ENDPOINT_URL) + assert result.status_code == 200 + + expected_access_ids = list(SearchAccess.objects.filter( + context_key__in=[key for key in self.course_user_keys if key.org != 'org1'], + ).only('id').values_list('id', flat=True)) + expected_access_ids.sort(reverse=True) + + mock_generate_tenant_token.assert_called_once_with( + api_key_uid=MOCK_API_KEY_UID, + search_rules={ + "studio_content": { + "filter": f"org IN ['org1'] OR access_id IN {expected_access_ids}", + } + }, + expires_at=mock.ANY, + ) + + @mock_meilisearch(enabled=True) + @mock.patch('openedx.core.djangoapps.content.search.views._get_user_orgs') + @mock.patch('openedx.core.djangoapps.content.search.views.get_access_ids_for_request') + @mock.patch('openedx.core.djangoapps.content.search.views.meilisearch.Client') + def test_studio_search_limits(self, mock_search_client, mock_get_access_ids, mock_get_user_orgs): + """ + Users with access to many courses/libraries or orgs will only be able to search content + from the most recent 1_000 courses/libraries and orgs. + """ + self.client.login(username='student', password='student_pass') + mock_generate_tenant_token = self._mock_generate_tenant_token(mock_search_client) + + mock_get_access_ids.return_value = list(range(2000)) + expected_access_ids = list(range(1000)) + + mock_get_user_orgs.return_value = [ + f"studio-search-org{x}" for x in range(2000) + ] + expected_user_orgs = [ + f"studio-search-org{x}" for x in range(1000) + ] + + result = self.client.get(STUDIO_SEARCH_ENDPOINT_URL) + assert result.status_code == 200 + mock_get_access_ids.assert_called_once() + mock_generate_tenant_token.assert_called_once_with( + api_key_uid=MOCK_API_KEY_UID, + search_rules={ + "studio_content": { + "filter": f"org IN {expected_user_orgs} OR access_id IN {expected_access_ids}", + } + }, + expires_at=mock.ANY, + ) diff --git a/openedx/core/djangoapps/content/search/views.py b/openedx/core/djangoapps/content/search/views.py index 69b686a3434f..5b29a8db1b94 100644 --- a/openedx/core/djangoapps/content/search/views.py +++ b/openedx/core/djangoapps/content/search/views.py @@ -1,22 +1,29 @@ """ REST API for content search """ +from __future__ import annotations + from datetime import datetime, timedelta, timezone import logging from django.conf import settings from django.contrib.auth import get_user_model import meilisearch -from rest_framework.exceptions import NotFound, PermissionDenied +from rest_framework.exceptions import NotFound +from rest_framework.request import Request from rest_framework.response import Response from rest_framework.views import APIView +from common.djangoapps.student.role_helpers import get_course_roles from common.djangoapps.student.roles import GlobalStaff from openedx.core.lib.api.view_utils import view_auth_classes from openedx.core.djangoapps.content.search.documents import STUDIO_INDEX_NAME +from openedx.core.djangoapps.content.search.models import get_access_ids_for_request User = get_user_model() log = logging.getLogger(__name__) +MAX_ACCESS_IDS_IN_FILTER = 1_000 +MAX_ORGS_IN_FILTER = 1_000 def _get_meili_api_key_uid(): @@ -29,6 +36,38 @@ def _get_meili_api_key_uid(): return _get_meili_api_key_uid.uid +def _get_meili_access_filter(request: Request) -> dict: + """ + Return meilisearch filter based on the requesting user's permissions. + """ + # Global staff can see anything, so no filters required. + if GlobalStaff().has_user(request.user): + return {} + + # Everyone else is limited to their org staff roles... + user_orgs = _get_user_orgs(request)[:MAX_ORGS_IN_FILTER] + + # ...or the N most recent courses and libraries they can access. + access_ids = get_access_ids_for_request(request, omit_orgs=user_orgs)[:MAX_ACCESS_IDS_IN_FILTER] + return { + "filter": f"org IN {user_orgs} OR access_id IN {access_ids}", + } + + +def _get_user_orgs(request: Request) -> list[str]: + """ + Get the org.short_names for the organizations that the requesting user has OrgStaffRole or OrgInstructorRole. + + Note: org-level roles have course_id=None to distinguish them from course-level roles. + """ + course_roles = get_course_roles(request.user) + return list(set( + role.org + for role in course_roles + if role.course_id is None and role.role in ['staff', 'instructor'] + )) + + @view_auth_classes(is_authenticated=True) class StudioSearchView(APIView): """ @@ -41,21 +80,13 @@ def get(self, request): """ if not settings.MEILISEARCH_ENABLED: raise NotFound("Meilisearch features are not enabled.") - if not GlobalStaff().has_user(request.user): - # Until we enforce permissions properly (see below), this endpoint is restricted to global staff, - # because it lets you search data from any course/library. - raise PermissionDenied("For the moment, use of this search preview is restricted to global staff.") client = meilisearch.Client(settings.MEILISEARCH_URL, settings.MEILISEARCH_API_KEY) index_name = settings.MEILISEARCH_INDEX_PREFIX + STUDIO_INDEX_NAME # Create an API key that only allows the user to search content that they have permission to view: expires_at = datetime.now(tz=timezone.utc) + timedelta(days=7) search_rules = { - index_name: { - # TODO: Apply filters here based on the user's permissions, so they can only search for content - # that they have permission to view. Example: - # 'filter': 'org = BradenX' - } + index_name: _get_meili_access_filter(request), } # Note: the following is just generating a JWT. It doesn't actually make an API call to Meilisearch. restricted_api_key = client.generate_tenant_token( diff --git a/openedx/core/djangoapps/content_tagging/rules.py b/openedx/core/djangoapps/content_tagging/rules.py index 863a5baaa1b1..265194860159 100644 --- a/openedx/core/djangoapps/content_tagging/rules.py +++ b/openedx/core/djangoapps/content_tagging/rules.py @@ -10,6 +10,7 @@ from organizations.models import Organization from common.djangoapps.student.auth import has_studio_read_access, has_studio_write_access +from common.djangoapps.student.role_helpers import get_course_roles, get_role_cache from common.djangoapps.student.roles import ( CourseInstructorRole, CourseStaffRole, @@ -79,17 +80,13 @@ def user_has_role_ignore_course_id(user, role_name, org_name) -> bool: """ Returns True if the given user has the given role for the given org, OR for any courses in this org. """ - # We use the user's RolesCache here to avoid re-querying. - # This cache gets populated the first time the user's permissions are checked (i.e when - # _get_content_creator_orgs is called). - - # pylint: disable=protected-access - roles_cache = user._roles - assert roles_cache + # We use the user's RoleCache here to avoid re-querying. + roles_cache = get_role_cache(user) + course_roles = get_course_roles(user) return any( access_role.role in roles_cache.get_roles(role_name) and access_role.org == org_name - for access_role in roles_cache._roles + for access_role in course_roles ) return [ diff --git a/openedx/core/djangoapps/enrollments/data.py b/openedx/core/djangoapps/enrollments/data.py index 3a242ecb34bb..9986830a3491 100644 --- a/openedx/core/djangoapps/enrollments/data.py +++ b/openedx/core/djangoapps/enrollments/data.py @@ -28,7 +28,7 @@ EnrollmentClosedError, NonExistentCourseError ) -from common.djangoapps.student.roles import RoleCache +from common.djangoapps.student.role_helpers import get_course_roles log = logging.getLogger(__name__) @@ -347,16 +347,12 @@ def get_course_enrollment_info(course_id, include_expired=False): def get_user_roles(username): """ - Returns a list of all roles that this user has. + Returns a set of all roles that this user has. :param username: The id of the selected user. :return: All roles for all courses that this user has. """ - # pylint: disable=protected-access user = _get_user(username) - if not hasattr(user, '_roles'): - user._roles = RoleCache(user) - role_cache = user._roles - return role_cache._roles + return set(get_course_roles(user)) def serialize_enrollments(enrollments):