diff --git a/sde_collections/models/pattern.py b/sde_collections/models/pattern.py index 1c7eae57..9eb755e6 100644 --- a/sde_collections/models/pattern.py +++ b/sde_collections/models/pattern.py @@ -2,6 +2,7 @@ from django.db import models +from ..pattern_interpreter import interpret_title_pattern from .collection_choice_fields import DocumentTypes @@ -149,7 +150,16 @@ class TitlePattern(BaseMatchPattern): def apply(self) -> None: matched_urls = self.matched_urls() - matched_urls.update(generated_title=self.title_pattern) + + # since this is not running in celery, this is a bit slow + for url, scraped_title in matched_urls.values_list("url", "scraped_title"): + generated_title = interpret_title_pattern( + url, scraped_title, self.title_pattern + ) + matched_urls.filter(url=url, scraped_title=scraped_title).update( + generated_title=generated_title + ) + candidate_url_ids = list(matched_urls.values_list("id", flat=True)) self.candidate_urls.through.objects.bulk_create( objs=[ diff --git a/sde_collections/pattern_interpreter.py b/sde_collections/pattern_interpreter.py new file mode 100644 index 00000000..53a2aa8e --- /dev/null +++ b/sde_collections/pattern_interpreter.py @@ -0,0 +1,8 @@ +def interpret_title_pattern(url, scraped_title, title_pattern): + """Interpret a title pattern.""" + # If "{title}" is in the title_pattern, replace it with scraped_title + if "{title}" in title_pattern: + return title_pattern.replace("{title}", scraped_title) + # If "{title}" is not in the title_pattern, return title_pattern as is + else: + return title_pattern