From 107296e4b67ea77e9d00124afd68e5fc4658aff0 Mon Sep 17 00:00:00 2001 From: David Fischer Date: Tue, 3 Dec 2024 16:09:11 -0800 Subject: [PATCH] Domain aggregation --- adserver/admin.py | 37 ++++++++++---- .../0101_domainimpression_aggregation.py | 34 +++++++++++++ adserver/models.py | 24 +++++++++ adserver/tasks.py | 51 +++++++++++++++++++ adserver/tests/test_tasks.py | 23 +++++++++ 5 files changed, 158 insertions(+), 11 deletions(-) create mode 100644 adserver/migrations/0101_domainimpression_aggregation.py diff --git a/adserver/admin.py b/adserver/admin.py index da3e7076..5bdd516c 100644 --- a/adserver/admin.py +++ b/adserver/admin.py @@ -27,6 +27,7 @@ from .models import Campaign from .models import Click from .models import CountryRegion +from .models import DomainImpression from .models import Flight from .models import GeoImpression from .models import Keyword @@ -880,6 +881,28 @@ class UpliftImpressionAdmin(ImpressionsAdmin): pass +@admin.register(DomainImpression) +class DomainImpressionAdmin(ImpressionsAdmin): + date_hierarchy = "date" + readonly_fields = ( + "date", + "domain", + "advertisement", + "views", + "clicks", + "offers", + "decisions", + "click_to_offer_rate", + "view_to_offer_rate", + "modified", + "created", + ) + list_display = readonly_fields + list_filter = ("advertisement__flight__campaign__advertiser",) + list_select_related = ("advertisement",) + search_fields = ("advertisement__slug", "advertisement__name", "domain") + + @admin.register(RotationImpression) class RotationImpressionAdmin(ImpressionsAdmin): date_hierarchy = "date" @@ -910,7 +933,7 @@ class AdBaseAdmin(RemoveDeleteMixin, admin.ModelAdmin): "date", "advertisement", "publisher", - "page_url", + "domain", "keywords", "country", "browser_family", @@ -922,6 +945,7 @@ class AdBaseAdmin(RemoveDeleteMixin, admin.ModelAdmin): "ip", "div_id", "ad_type_slug", + "url", "client_id", "modified", "created", @@ -938,7 +962,7 @@ class AdBaseAdmin(RemoveDeleteMixin, admin.ModelAdmin): paginator = EstimatedCountPaginator search_fields = ( "advertisement__name", - "url", + "domain", "ip", "country", "user_agent", @@ -946,15 +970,6 @@ class AdBaseAdmin(RemoveDeleteMixin, admin.ModelAdmin): ) show_full_result_count = False - def page_url(self, instance): - if instance.url: - return format_html( - '{}', - instance.url, - instance.url, - ) - return None - def has_add_permission(self, request): """Clicks and views cannot be added through the admin.""" return False diff --git a/adserver/migrations/0101_domainimpression_aggregation.py b/adserver/migrations/0101_domainimpression_aggregation.py new file mode 100644 index 00000000..5f91de16 --- /dev/null +++ b/adserver/migrations/0101_domainimpression_aggregation.py @@ -0,0 +1,34 @@ +# Generated by Django 5.0.9 on 2024-12-03 23:42 + +import django.db.models.deletion +import django_extensions.db.fields +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('adserver', '0100_add_offer_domain'), + ] + + operations = [ + migrations.CreateModel( + name='DomainImpression', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('created', django_extensions.db.fields.CreationDateTimeField(auto_now_add=True, verbose_name='created')), + ('modified', django_extensions.db.fields.ModificationDateTimeField(auto_now=True, verbose_name='modified')), + ('date', models.DateField(db_index=True, verbose_name='Date')), + ('decisions', models.PositiveIntegerField(default=0, help_text="The number of times the Ad Decision API was called. The server might not respond with an ad if there isn't inventory.", verbose_name='Decisions')), + ('offers', models.PositiveIntegerField(default=0, help_text='The number of times an ad was proposed by the ad server. The client may not load the ad (a view) for a variety of reasons ', verbose_name='Offers')), + ('views', models.PositiveIntegerField(default=0, help_text='Number of times the ad was legitimately viewed', verbose_name='Views')), + ('clicks', models.PositiveIntegerField(default=0, help_text='Number of times the ad was legitimately clicked', verbose_name='Clicks')), + ('domain', models.CharField(max_length=1000, verbose_name='Domain')), + ('advertisement', models.ForeignKey(null=True, on_delete=django.db.models.deletion.PROTECT, related_name='domain_impressions', to='adserver.advertisement')), + ], + options={ + 'ordering': ('-date',), + 'unique_together': {('advertisement', 'date', 'domain')}, + }, + ), + ] diff --git a/adserver/models.py b/adserver/models.py index 6009e53a..f19770c2 100644 --- a/adserver/models.py +++ b/adserver/models.py @@ -2498,6 +2498,30 @@ def __str__(self): return "Uplift of %s on %s" % (self.advertisement, self.date) +class DomainImpression(BaseImpression): + """ + Create an index of domains for each advertisement + + Indexed one per ad/domain per day. + """ + + domain = models.CharField(_("Domain"), max_length=1000) + advertisement = models.ForeignKey( + Advertisement, + related_name="domain_impressions", + on_delete=models.PROTECT, + null=True, + ) + + class Meta: + ordering = ("-date",) + unique_together = ("advertisement", "date", "domain") + + def __str__(self): + """Simple override.""" + return "Domain %s of %s on %s" % (self.domain, self.advertisement, self.date) + + class RotationImpression(BaseImpression): """ Create an index of ads that were rotated. diff --git a/adserver/tasks.py b/adserver/tasks.py index 1e60afd5..a6b211d9 100644 --- a/adserver/tasks.py +++ b/adserver/tasks.py @@ -29,6 +29,7 @@ from .models import Advertisement from .models import Advertiser from .models import AdvertiserImpression +from .models import DomainImpression from .models import Flight from .models import GeoImpression from .models import KeywordImpression @@ -509,6 +510,55 @@ def daily_update_uplift(day=None): ) +@app.task() +def daily_update_domains(day=None): + """ + Generate the daily index of DomainImpressions. + + :arg day: An optional datetime object representing a day + """ + start_date, end_date = get_day(day) + + log.info("Updating domains for %s-%s", start_date, end_date) + + queryset = Offer.objects.using(settings.REPLICA_SLUG).filter( + date__gte=start_date, + date__lt=end_date, # Things at UTC midnight should count towards tomorrow + ) + + for values in ( + queryset.values("advertisement", "domain") + .annotate( + total_decisions=Count("publisher"), + total_offers=Count("domain", filter=Q(advertisement__isnull=False)), + total_views=Count("domain", filter=Q(viewed=True)), + total_clicks=Count("domain", filter=Q(clicked=True)), + ) + .exclude(domain__isnull=True) + .order_by("-total_decisions") + .values( + "advertisement", + "domain", + "total_decisions", + "total_offers", + "total_views", + "total_clicks", + ) + .iterator() + ): + impression, _ = DomainImpression.objects.using("default").get_or_create( + advertisement_id=values["advertisement"], + domain=values["domain"], + date=start_date, + ) + DomainImpression.objects.using("default").filter(pk=impression.pk).update( + decisions=values["total_decisions"], + offers=values["total_offers"], + views=values["total_views"], + clicks=values["total_clicks"], + ) + + @app.task() def daily_update_rotations(day=None): """ @@ -698,6 +748,7 @@ def update_previous_day_reports(day=None): daily_update_publishers(start_date) # Important: after daily_update_impressions daily_update_keywords(start_date) daily_update_uplift(start_date) + daily_update_domains(start_date) daily_update_rotations(start_date) daily_update_regiontopic(start_date) diff --git a/adserver/tests/test_tasks.py b/adserver/tests/test_tasks.py index cacfb2c2..d15fe66a 100644 --- a/adserver/tests/test_tasks.py +++ b/adserver/tests/test_tasks.py @@ -10,6 +10,7 @@ from ..constants import HOUSE_CAMPAIGN from ..models import AdImpression from ..models import AdvertiserImpression +from ..models import DomainImpression from ..models import Flight from ..models import GeoImpression from ..models import KeywordImpression @@ -23,6 +24,7 @@ from ..tasks import calculate_ad_ctrs from ..tasks import calculate_publisher_ctrs from ..tasks import daily_update_advertisers +from ..tasks import daily_update_domains from ..tasks import daily_update_geos from ..tasks import daily_update_impressions from ..tasks import daily_update_keywords @@ -464,6 +466,7 @@ def setUp(self): keywords=["backend"], div_id="id_1", ad_type_slug=self.text_ad_type.slug, + domain="example.com", ) get( Offer, @@ -475,6 +478,7 @@ def setUp(self): keywords=["backend"], div_id="id_1", ad_type_slug=self.text_ad_type.slug, + domain="example.com", ) get( Offer, @@ -488,6 +492,7 @@ def setUp(self): keywords=["backend"], div_id="id_1", ad_type_slug=self.text_ad_type.slug, + domain="example.com", ) get( Offer, @@ -499,6 +504,7 @@ def setUp(self): keywords=["backend"], div_id="id_2", ad_type_slug=self.text_ad_type.slug, + domain="sub.example.com", ) get( Offer, @@ -511,6 +517,7 @@ def setUp(self): keywords=["security"], div_id="id_2", ad_type_slug=self.text_ad_type.slug, + domain="example.com", ) get( Offer, @@ -523,6 +530,7 @@ def setUp(self): keywords=["security"], div_id="id_2", ad_type_slug=self.text_ad_type.slug, + domain="sub.example.com", ) def test_daily_update_impressions(self): @@ -763,6 +771,21 @@ def test_daily_update_uplift(self): self.assertEqual(uplift2.views, 2) self.assertEqual(uplift2.clicks, 0) + def test_daily_update_domains(self): + daily_update_domains() + + imp1 = DomainImpression.objects.filter(advertisement=self.ad1, domain="example.com").first() + self.assertIsNotNone(imp1) + self.assertEqual(imp1.offers, 3) + self.assertEqual(imp1.views, 2) + self.assertEqual(imp1.clicks, 1) + + imp2 = DomainImpression.objects.filter(advertisement=self.ad2, domain="sub.example.com").first() + self.assertIsNotNone(imp2) + self.assertEqual(imp2.offers, 1) + self.assertEqual(imp2.views, 1) + self.assertEqual(imp2.clicks, 0) + def test_daily_update_placements(self): # Ad1/id_1 - offered/decision=3, views=2, clicks=1 # Ad1/id_2 - offered/decisions=1, views=1, clicks=0