Skip to content

Commit

Permalink
Merge pull request #541 from NASA-IMPACT/538-support-urlindexincluded
Browse files Browse the repository at this point in the history
Support URL includes
  • Loading branch information
code-geek authored Nov 30, 2023
2 parents e8b198e + 1c1484d commit 493fc38
Show file tree
Hide file tree
Showing 11 changed files with 304 additions and 12 deletions.
11 changes: 8 additions & 3 deletions config_generation/db_to_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,14 +293,19 @@ def add_url_include(self, url_pattern: str) -> None:
includes a url or url pattern, such as
- https://webb.nasa.gov/content/forEducators/realworld*
- https://webb.nasa.gov/content/features/index.html
- *.rtf
I'm not sure if exclusion rules override includes or if includes override
exclusion rules.
"""

xml_root = self.xml_tree.getroot()
ET.SubElement(
xml_root, "UrlIndexIncluded"
).text = url_pattern # this adds an indexing rule (doesn't overwrite)

for url_index_included in xml_root.findall("UrlIndexIncluded"):
if url_index_included.text == url_pattern:
return # stop the function if the url pattern already exists

# add the url pattern if it doesn't already exist
ET.SubElement(xml_root, "UrlIndexIncluded").text = url_pattern

def _find_treeroot_field(self):
treeroot = self.xml_tree.find("TreeRoot")
Expand Down
3 changes: 2 additions & 1 deletion sde_collections/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from .models.candidate_url import CandidateURL
from .models.collection import Collection
from .models.pattern import TitlePattern
from .models.pattern import IncludePattern, TitlePattern
from .tasks import import_candidate_urls_from_api


Expand Down Expand Up @@ -245,3 +245,4 @@ class TitlePatternAdmin(admin.ModelAdmin):

admin.site.register(CandidateURL, CandidateURLAdmin)
admin.site.register(TitlePattern, TitlePatternAdmin)
admin.site.register(IncludePattern)
64 changes: 64 additions & 0 deletions sde_collections/migrations/0039_includepattern.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Generated by Django 4.2.6 on 2023-11-30 16:26

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):
dependencies = [
("sde_collections", "0038_merge_20231126_1152"),
]

operations = [
migrations.CreateModel(
name="IncludePattern",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"match_pattern",
models.CharField(
help_text="This pattern is compared against the URL of all the documents in the collection and matching documents will be returned",
verbose_name="Pattern",
),
),
(
"match_pattern_type",
models.IntegerField(
choices=[
(1, "Individual URL Pattern"),
(2, "Multi-URL Pattern"),
],
default=1,
),
),
(
"candidate_urls",
models.ManyToManyField(
related_name="%(class)s_urls", to="sde_collections.candidateurl"
),
),
(
"collection",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="%(class)s",
related_query_name="%(class)ss",
to="sde_collections.collection",
),
),
],
options={
"verbose_name": "Include Pattern",
"verbose_name_plural": "Include Patterns",
"unique_together": {("collection", "match_pattern")},
},
),
]
11 changes: 11 additions & 0 deletions sde_collections/models/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,12 @@ def _process_exclude_list(self):
pattern._process_match_pattern() for pattern in self.excludepattern.all()
]

def _process_include_list(self):
"""Process the include list."""
return [
pattern._process_match_pattern() for pattern in self.includepattern.all()
]

def _process_title_list(self):
"""Process the title list"""
title_rules = []
Expand Down Expand Up @@ -218,6 +224,7 @@ def update_config_xml(self, original_config_string):
editor = XmlEditor(original_config_string)

URL_EXCLUDES = self._process_exclude_list()
URL_INCLUDES = self._process_include_list()
TITLE_RULES = self._process_title_list()
DOCUMENT_TYPE_RULES = self._process_document_type_list()

Expand All @@ -227,6 +234,8 @@ def update_config_xml(self, original_config_string):

for url in URL_EXCLUDES:
editor.add_url_exclude(url)
for url in URL_INCLUDES:
editor.add_url_include(url)
for title_rule in TITLE_RULES:
editor.add_title_mapping(**title_rule)
for rule in DOCUMENT_TYPE_RULES:
Expand Down Expand Up @@ -385,6 +394,8 @@ def apply_all_patterns(self) -> None:
"""Apply all the patterns."""
for pattern in self.excludepattern.all():
pattern.apply()
for pattern in self.includepattern.all():
pattern.apply()
for pattern in self.titlepattern.all():
pattern.apply()
for pattern in self.documenttypepattern.all():
Expand Down
27 changes: 26 additions & 1 deletion sde_collections/models/pattern.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def apply(self) -> None:
)

def unapply(self) -> None:
"Unapplies automatically by deleting excludpattern through objects in a cascade"
"Unapplies automatically by deleting include pattern through objects in a cascade"
return

class Meta:
Expand All @@ -115,6 +115,31 @@ class Meta:
unique_together = ("collection", "match_pattern")


class IncludePattern(BaseMatchPattern):
def apply(self) -> None:
matched_urls = self.matched_urls()
candidate_url_ids = list(matched_urls.values_list("id", flat=True))
self.candidate_urls.through.objects.bulk_create(
objs=[
IncludePattern.candidate_urls.through(
candidateurl_id=candidate_url_id, includepattern_id=self.id
)
for candidate_url_id in candidate_url_ids
]
)

def unapply(self) -> None:
"Unapplies automatically by deleting includepattern through objects in a cascade"
return

class Meta:
"""Meta definition for IncludePattern."""

verbose_name = "Include Pattern"
verbose_name_plural = "Include Patterns"
unique_together = ("collection", "match_pattern")


class TitlePattern(BaseMatchPattern):
title_pattern = models.CharField(
"Title Pattern",
Expand Down
13 changes: 12 additions & 1 deletion sde_collections/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@
from .models.candidate_url import CandidateURL
from .models.collection import Collection
from .models.collection_choice_fields import DocumentTypes
from .models.pattern import DocumentTypePattern, ExcludePattern, TitlePattern
from .models.pattern import (
DocumentTypePattern,
ExcludePattern,
IncludePattern,
TitlePattern,
)


class CollectionSerializer(serializers.ModelSerializer):
Expand Down Expand Up @@ -116,6 +121,12 @@ class Meta:
fields = BasePatternSerializer.Meta.fields + ("reason",)


class IncludePatternSerializer(BasePatternSerializer, serializers.ModelSerializer):
class Meta:
model = IncludePattern
fields = BasePatternSerializer.Meta.fields


class TitlePatternSerializer(BasePatternSerializer, serializers.ModelSerializer):
class Meta:
model = TitlePattern
Expand Down
9 changes: 9 additions & 0 deletions sde_collections/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,15 @@ def test_create_exclude_pattern(self):
self.assertCountEqual(response, "hey")


class CreateIncludePatternTestCase(TestCase):
def test_create_include_pattern(self):
factory = APIRequestFactory()
response = factory.post(
"/api/create-include-pattern", {"title": "new idea"}, format="json"
)
self.assertCountEqual(response, "hey")


class ImportCandidateURLsTestCase(TestCase):
def test_import_all_candidate_urls_from_api(self):
import_candidate_urls_from_api()
Expand Down
1 change: 1 addition & 0 deletions sde_collections/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
router.register(r"collections-read", views.CollectionReadViewSet)
router.register(r"candidate-urls", views.CandidateURLViewSet)
router.register(r"exclude-patterns", views.ExcludePatternViewSet)
router.register(r"include-patterns", views.IncludePatternViewSet)
router.register(r"title-patterns", views.TitlePatternViewSet)
router.register(r"document-type-patterns", views.DocumentTypePatternViewSet)

Expand Down
28 changes: 27 additions & 1 deletion sde_collections/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,20 @@
DocumentTypes,
WorkflowStatusChoices,
)
from .models.pattern import DocumentTypePattern, ExcludePattern, TitlePattern
from .models.pattern import (
DocumentTypePattern,
ExcludePattern,
IncludePattern,
TitlePattern,
)
from .serializers import (
CandidateURLBulkCreateSerializer,
CandidateURLSerializer,
CollectionReadSerializer,
CollectionSerializer,
DocumentTypePatternSerializer,
ExcludePatternSerializer,
IncludePatternSerializer,
TitlePatternSerializer,
)
from .tasks import push_to_github_task
Expand Down Expand Up @@ -257,6 +263,26 @@ def create(self, request, *args, **kwargs):
return super().create(request, *args, **kwargs)


class IncludePatternViewSet(CollectionFilterMixin, viewsets.ModelViewSet):
queryset = IncludePattern.objects.all()
serializer_class = IncludePatternSerializer

def get_queryset(self):
return super().get_queryset().order_by("match_pattern")

def create(self, request, *args, **kwargs):
match_pattern = request.POST.get("match_pattern")
collection_id = request.POST.get("collection")
try:
IncludePattern.objects.get(
collection_id=Collection.objects.get(id=collection_id),
match_pattern=match_pattern,
).delete()
return Response(status=status.HTTP_200_OK)
except IncludePattern.DoesNotExist:
return super().create(request, *args, **kwargs)


class TitlePatternViewSet(CollectionFilterMixin, viewsets.ModelViewSet):
queryset = TitlePattern.objects.all()
serializer_class = TitlePatternSerializer
Expand Down
Loading

0 comments on commit 493fc38

Please sign in to comment.