diff --git a/checks/migrations/0019_hall_of_fame_plus_triggers.py b/checks/migrations/0019_hall_of_fame_plus_triggers.py new file mode 100644 index 000000000..86972f8c5 --- /dev/null +++ b/checks/migrations/0019_hall_of_fame_plus_triggers.py @@ -0,0 +1,179 @@ +# Partly generated by Django 4.2.20 on 2025-05-25 15:24 together with manual RunSQL + +from django.db import migrations, models +from django.db.models import Case, F, Q, Value, When +from django.db.models.functions import Greatest +from django.db.models.lookups import GreaterThan +import django.db.models.deletion +import pgtrigger.compiler +import pgtrigger.migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("checks", "0018_domaintesttls_caa_records"), + ] + + operations = [ + # Note db_index is False on the ForeignKey to prevent extra indices that are not needed + # AutoField has to be primary key in Django, to solve this manually alter SQL this field + # see https://github.com/django/django/blob/787f3130f751283140fe2be8188eb5299552232d/django/db/models/fields/__init__.py#L2801 + migrations.CreateModel( + name="Fame", + fields=[ + ("id", models.IntegerField(serialize=False, verbose_name="ID")), + ("domain", models.CharField(max_length=255, primary_key=True, serialize=False)), + ( + "site_report", + models.ForeignKey( + db_index=False, + null=True, + on_delete=django.db.models.deletion.CASCADE, + to="checks.domaintestreport", + ), + ), + ("site_report_timestamp", models.DateTimeField(null=True)), + ( + "mail_report", + models.ForeignKey( + db_index=False, + null=True, + on_delete=django.db.models.deletion.CASCADE, + to="checks.mailtestreport", + ), + ), + ("mail_report_timestamp", models.DateTimeField(null=True)), + ], + ), + migrations.AddIndex( + model_name="fame", + index=models.Index( + condition=models.Q(site_report_id__isnull=False), + fields=["-site_report_timestamp", "domain", "site_report_id"], + name="checks_fame_sites_idx", + ), + ), + migrations.AddIndex( + model_name="fame", + index=models.Index( + condition=models.Q(mail_report_id__isnull=False), + fields=["-mail_report_timestamp", "domain", "mail_report_id"], + name="checks_fame_mail_idx", + ), + ), + migrations.AddIndex( + model_name="fame", + index=models.Index( + models.OrderBy( + Greatest("site_report_timestamp", "mail_report_timestamp"), + descending=True, + ), + "domain", + Case( + models.When( + GreaterThan(F("site_report_timestamp"), F("mail_report_timestamp")), + then=Value("s"), + ), + default=Value("m"), + output_field=models.CharField(max_length=1), + ), + Case( + When( + GreaterThan(F("site_report_timestamp"), F("mail_report_timestamp")), + then="site_report_id", + ), + default="mail_report_id", + ), + condition=Q(site_report_id__isnull=False) & Q(mail_report_id__isnull=False), + name="checks_fame_champions_idx", + ), + ), + pgtrigger.migrations.AddTrigger( + model_name="domaintestreport", + trigger=pgtrigger.compiler.Trigger( + name="update_fame_on_site_report", + sql=pgtrigger.compiler.UpsertTriggerSql( + func=""" +IF NEW.score IS NULL THEN + -- DO NOTHING +ELSIF NEW.score = 100 THEN + INSERT INTO checks_fame (domain, site_report_id, site_report_timestamp, mail_report_id, mail_report_timestamp) + VALUES (NEW.domain, NEW.id, NEW.timestamp, NULL, NULL) + ON CONFLICT (domain) + DO UPDATE SET site_report_id = NEW.id, site_report_timestamp = NEW.timestamp; +ELSE + MERGE INTO ONLY checks_fame c1 + USING checks_fame c2 ON c1.domain = c2.domain AND c1.domain = NEW.domain + WHEN NOT MATCHED THEN + DO NOTHING + WHEN MATCHED AND c1.mail_report_id IS NOT NULL THEN + UPDATE SET site_report_id = NULL, site_report_timestamp = NULL + WHEN MATCHED AND c1.mail_report_id IS NULL THEN + DELETE; + END IF; +RETURN NEW; +""", + hash="b4f792b06123914de71b57669c202a19b04e9e9c", + operation='INSERT OR UPDATE OF "score"', + pgid="pgtrigger_update_fame_on_site_report_e4fdc", + table="checks_domaintestreport", + when="AFTER", + ), + ), + ), + pgtrigger.migrations.AddTrigger( + model_name="mailtestreport", + trigger=pgtrigger.compiler.Trigger( + name="update_fame_on_mail_report", + sql=pgtrigger.compiler.UpsertTriggerSql( + func=""" +IF NEW.score IS NULL THEN + -- DO NOTHING +ELSIF NEW.score = 100 THEN + INSERT INTO checks_fame (domain, site_report_id, site_report_timestamp, mail_report_id, mail_report_timestamp) + VALUES (NEW.domain, NULL, NULL, NEW.id, NEW.timestamp) + ON CONFLICT (domain) + DO UPDATE SET mail_report_id = NEW.id, mail_report_timestamp = NEW.timestamp; +ELSE + MERGE INTO ONLY checks_fame c1 + USING checks_fame c2 ON c1.domain = c2.domain AND c1.domain = NEW.domain + WHEN NOT MATCHED THEN + DO NOTHING + WHEN MATCHED AND c1.site_report_id IS NOT NULL THEN + UPDATE SET mail_report_id = NULL, mail_report_timestamp = NULL + WHEN MATCHED AND c1.site_report_id IS NULL THEN + DELETE; + END IF; +RETURN NEW; +""", + hash="707aefc7a83dd041dd815511f1d1cf7e8f84f944", + operation='INSERT OR UPDATE OF "score"', + pgid="pgtrigger_update_fame_on_mail_report_b3a27", + table="checks_mailtestreport", + when="AFTER", + ), + ), + ), + migrations.RunSQL( + sql=[ + 'ALTER TABLE "checks_fame" ALTER COLUMN "id" ADD GENERATED BY DEFAULT AS IDENTITY;', + """ +WITH + site_fame AS ( + SELECT domain, id AS site_report_id, timestamp AS site_report_timestamp FROM ( + SELECT domain, score, id, timestamp, rank() OVER (PARTITION BY domain ORDER BY id DESC) FROM checks_domaintestreport + ) alias WHERE rank = 1 AND score = 100), + mail_fame AS ( + SELECT domain, id AS mail_report_id, timestamp AS mail_report_timestamp FROM ( + SELECT domain, score, id, timestamp, rank() OVER (PARTITION BY domain ORDER BY id DESC) FROM checks_mailtestreport + ) alias WHERE rank = 1 AND score = 100) +INSERT INTO checks_fame (domain, site_report_id, site_report_timestamp, mail_report_id, mail_report_timestamp) + SELECT * FROM site_fame FULL OUTER JOIN mail_fame USING (domain); +""", + ], + reverse_sql=[ + 'DELETE FROM "checks_fame";', + 'ALTER TABLE "checks_fame" ALTER COLUMN "id" DROP IDENTITY;', + ], + ), + ] diff --git a/checks/models.py b/checks/models.py index 20f8a0304..c3048c3f3 100644 --- a/checks/models.py +++ b/checks/models.py @@ -7,9 +7,13 @@ from django.core.exceptions import SuspiciousFileOperation from django.db import models, transaction +from django.db.models import Case, F, Q, Value, When +from django.db.models.functions import Greatest +from django.db.models.lookups import GreaterThan from django.utils import timezone from enumfields import Enum as LabelEnum from enumfields import EnumField, EnumIntegerField +import pgtrigger class ListField(models.TextField): @@ -947,6 +951,34 @@ def __dir__(self): class Meta: app_label = "checks" + triggers = [ + pgtrigger.Trigger( + name="update_fame_on_site_report", + when=pgtrigger.After, + operation=pgtrigger.Insert | pgtrigger.UpdateOf("score"), + func=""" +IF NEW.score IS NULL THEN + -- DO NOTHING +ELSIF NEW.score = 100 THEN + INSERT INTO checks_fame (domain, site_report_id, site_report_timestamp, mail_report_id, mail_report_timestamp) + VALUES (NEW.domain, NEW.id, NEW.timestamp, NULL, NULL) + ON CONFLICT (domain) + DO UPDATE SET site_report_id = NEW.id, site_report_timestamp = NEW.timestamp; +ELSE + MERGE INTO ONLY checks_fame c1 + USING checks_fame c2 ON c1.domain = c2.domain AND c1.domain = NEW.domain + WHEN NOT MATCHED THEN + DO NOTHING + WHEN MATCHED AND c1.mail_report_id IS NOT NULL THEN + UPDATE SET site_report_id = NULL, site_report_timestamp = NULL + WHEN MATCHED AND c1.mail_report_id IS NULL THEN + DELETE; + END IF; +RETURN NEW; +""", + ), + ] + ### # Mail test @@ -1093,6 +1125,79 @@ def __dir__(self): class Meta: app_label = "checks" + triggers = [ + pgtrigger.Trigger( + name="update_fame_on_mail_report", + when=pgtrigger.After, + operation=pgtrigger.Insert | pgtrigger.UpdateOf("score"), + func=""" +IF NEW.score IS NULL THEN + -- DO NOTHING +ELSIF NEW.score = 100 THEN + INSERT INTO checks_fame (domain, site_report_id, site_report_timestamp, mail_report_id, mail_report_timestamp) + VALUES (NEW.domain, NULL, NULL, NEW.id, NEW.timestamp) + ON CONFLICT (domain) + DO UPDATE SET mail_report_id = NEW.id, mail_report_timestamp = NEW.timestamp; +ELSE + MERGE INTO ONLY checks_fame c1 + USING checks_fame c2 ON c1.domain = c2.domain AND c1.domain = NEW.domain + WHEN NOT MATCHED THEN + DO NOTHING + WHEN MATCHED AND c1.site_report_id IS NOT NULL THEN + UPDATE SET mail_report_id = NULL, mail_report_timestamp = NULL + WHEN MATCHED AND c1.site_report_id IS NULL THEN + DELETE; + END IF; +RETURN NEW; +""", + ), + ] + + +class Fame(models.Model): + id = models.IntegerField(serialize=False, verbose_name="ID") + domain = models.CharField(max_length=255, primary_key=True, serialize=False) + site_report = models.ForeignKey(DomainTestReport, null=True, on_delete=models.CASCADE, db_index=False) + site_report_timestamp = models.DateTimeField(null=True) + mail_report = models.ForeignKey(MailTestReport, null=True, on_delete=models.CASCADE, db_index=False) + mail_report_timestamp = models.DateTimeField(null=True) + + def __dir__(self): + return ["domain", "site_report", "site_report_timestamp", "mail_report", "mail_report_timestamp"] + + class Meta: + app_label = "checks" + + indexes = [ + models.Index( + condition=Q(site_report_id__isnull=False), + fields=["-site_report_timestamp", "domain", "site_report_id"], + name="checks_fame_sites_idx", + ), + models.Index( + condition=Q(mail_report_id__isnull=False), + fields=["-mail_report_timestamp", "domain", "mail_report_id"], + name="checks_fame_mail_idx", + ), + # TODO: is there a way to alias/annotate the expressions? + # (so psql `\d checks_fame_champions_idx` looks nice) + models.Index( + Greatest("site_report_timestamp", "mail_report_timestamp").desc(), + "domain", + Case( + When(GreaterThan(F("site_report_timestamp"), F("mail_report_timestamp")), then=Value("s")), + default=Value("m"), + output_field=models.CharField(max_length=1), + ), + Case( + When(GreaterThan(F("site_report_timestamp"), F("mail_report_timestamp")), then="site_report_id"), + default="mail_report_id", + ), + condition=Q(site_report_id__isnull=False) & Q(mail_report_id__isnull=False), + name="checks_fame_champions_idx", + ), + ] + class BatchUser(models.Model): """ diff --git a/checks/tasks/update.py b/checks/tasks/update.py deleted file mode 100644 index d287d9e89..000000000 --- a/checks/tasks/update.py +++ /dev/null @@ -1,154 +0,0 @@ -# Copyright: 2022, ECP, NLnet Labs and the Internet.nl contributors -# SPDX-License-Identifier: Apache-2.0 -from celery import shared_task -from celery.utils.log import get_task_logger -from django.core.cache import cache -from django.db import transaction - -from checks.models import DomainTestReport, MailTestReport -from interface import redis_id -from interface.batch import util - -logger = get_task_logger(__name__) - - -class HOFEntry: - def __init__(self, domain): - self.domain = domain - self.web_timestamp = None - self.web_permalink = None - self.mail_timestamp = None - self.mail_permalink = None - self.mail_nomx = None - - def __str__(self): - return f"""------- {self.domain} - web_timestamp: {self.web_timestamp} - web_permalink: {self.web_permalink} - mail_timestamp: {self.mail_timestamp} - mail_permalink: {self.mail_permalink} - mail_nomx: {self.mail_nomx} - """ - - -def _create_hof_entry(hof, domain_name): - """ - Create an entry in the Hall of Fame. - - """ - if domain_name in hof: - return hof[domain_name] - hof[domain_name] = HOFEntry(domain_name) - return hof[domain_name] - - -def _update_web_entry(hof, domain_name, report_id, timestamp): - """ - Update a web entry in the Hall of Fame. - - """ - entry = _create_hof_entry(hof, domain_name) - entry.web_timestamp = timestamp - entry.web_permalink = f"/site/{domain_name}/{report_id}/" - - -def _update_mail_entry(hof, domain_name, report_id, timestamp): - """ - Update a mail entry in the Hall of Fame. - - """ - entry = _create_hof_entry(hof, domain_name) - entry.mail_timestamp = timestamp - entry.mail_permalink = f"/mail/{domain_name}/{report_id}/" - report = MailTestReport.objects.get(id=report_id) - ipv6_report = report.ipv6.report - if not isinstance(ipv6_report, dict): - return - entry.mail_nomx = ipv6_report["mx_aaaa"]["verdict"] == "detail mail ipv6 mx-AAAA verdict other" - - -def _populate_HOF(hof, model, entry_creation): - """ - Find entries that qualify for the Hall of Fame. - - """ - previousname = None - previousscore = 0 - previoustimestamp = None - previousreportid = None - for report in model.objects.all().order_by("domain", "timestamp"): - if previousname != report.domain and previousname is not None: - if previousscore >= 100: - entry_creation(hof, previousname, previousreportid, previoustimestamp) - previousname = report.domain - previousscore = report.score or 0 - previoustimestamp = report.timestamp - previousreportid = report.id - - else: - report_score = report.score or 0 - if report_score != previousscore: - previoustimestamp = report.timestamp - previousname = report.domain - previousreportid = report.id - previousscore = report_score - - # Last domain name. - if previousscore >= 100: - entry_creation(hof, previousname, previousreportid, previoustimestamp) - - -@transaction.atomic -def _update_hof(): - """ - Populate the Hall of Fame with domains that scored 100% in the website - and/or the mail test. - - .. note:: Domains that are part of the HoF are domains that their *latest* - test scored 100%. - - """ - hof = dict() - for model, entry_creation in ((DomainTestReport, _update_web_entry), (MailTestReport, _update_mail_entry)): - _populate_HOF(hof, model, entry_creation) - - champions = [] - web = [] - mail = [] - for entry in hof.values(): - is_web = False - is_mail = False - if entry.web_permalink: - web.append({"permalink": entry.web_permalink, "domain": entry.domain, "timestamp": entry.web_timestamp}) - is_web = True - if entry.mail_permalink: - mail.append({"permalink": entry.mail_permalink, "domain": entry.domain, "timestamp": entry.mail_timestamp}) - is_mail = True - if is_web and is_mail: - timestamp = entry.mail_timestamp - permalink = entry.mail_permalink - if entry.web_timestamp > entry.mail_timestamp: - timestamp = entry.web_timestamp - permalink = entry.web_permalink - champions.append({"permalink": permalink, "domain": entry.domain, "timestamp": timestamp}) - champions = sorted(champions, key=lambda x: x["timestamp"], reverse=True) - web = sorted(web, key=lambda x: x["timestamp"], reverse=True) - mail = sorted(mail, key=lambda x: x["timestamp"], reverse=True) - - for data, red_id in ((champions, redis_id.hof_champions), (web, redis_id.hof_web), (mail, redis_id.hof_mail)): - cached_data = {"date": None, "count": 0, "data": data} - if cached_data["data"]: - cached_data["date"] = cached_data["data"][0]["timestamp"] - cached_data["count"] = len(cached_data["data"]) - cache_id = red_id.id - cache_ttl = red_id.ttl - cache.set(cache_id, cached_data, cache_ttl) - - -@shared_task -def update_hof(): - lock_id = redis_id.hof_lock.id - lock_ttl = redis_id.hof_lock.ttl - with util.memcache_lock(lock_id, lock_ttl) as acquired: - if acquired: - _update_hof() diff --git a/docker/compose.yaml b/docker/compose.yaml index 9a2def1a4..6e7d8186a 100644 --- a/docker/compose.yaml +++ b/docker/compose.yaml @@ -115,7 +115,6 @@ services: - INTERNET_NL_CHECK_SUPPORT_RPKI - PUBLIC_SUFFIX_LIST_URL - ENABLE_BATCH - - ENABLE_HOF - RABBIT_HOST=rabbitmq:15672 - SECRET_KEY - GENERATE_SECRET_KEY @@ -191,7 +190,6 @@ services: - CACHE_LOCATION= # disable batch checks - ENABLE_BATCH=False - - ENABLE_HOF=False - DEBUG - DEBUG_LOG - DEBUG_LOG_UNBOUND @@ -258,7 +256,6 @@ services: - INTERNET_NL_CHECK_SUPPORT_RPKI - PUBLIC_SUFFIX_LIST_URL - ENABLE_BATCH - - ENABLE_HOF - RABBIT_HOST=rabbitmq:15672 - SECRET_KEY - GENERATE_SECRET_KEY @@ -369,7 +366,6 @@ services: - INTERNET_NL_CHECK_SUPPORT_APPSECPRIV - INTERNET_NL_CHECK_SUPPORT_RPKI - ENABLE_BATCH - - ENABLE_HOF - RABBIT_HOST=rabbitmq:15672 - SECRET_KEY - GENERATE_SECRET_KEY diff --git a/docker/defaults.env b/docker/defaults.env index 8031010a7..f4a6d72c2 100644 --- a/docker/defaults.env +++ b/docker/defaults.env @@ -74,9 +74,6 @@ SENTRY_SERVER_NAME=unknown # enable batch API ENABLE_BATCH=False -# enable hall of fame -ENABLE_HOF=True - # URL of yaml file with manual hosters Hall of Fame HOSTERS_HOF_URL= # manual HoF pages to include diff --git a/docker/test.env b/docker/test.env index 12156a729..628b78367 100644 --- a/docker/test.env +++ b/docker/test.env @@ -8,9 +8,6 @@ INTERNETNL_DOMAINNAME=internet.test # use unique name to not conflict with integration tests COMPOSE_PROJECT_NAME=internetnl-test -# disable as it messes with batch jobs -ENABLE_HOF=True - # enable manual hof entryB HOSTERS_HOF_URL=http://static/static/hosters.yaml MANUAL_HOF_PAGES=hosters diff --git a/documentation/Docker-deployment-batch.md b/documentation/Docker-deployment-batch.md index 5c2fca461..9b8cc9aea 100644 --- a/documentation/Docker-deployment-batch.md +++ b/documentation/Docker-deployment-batch.md @@ -94,7 +94,6 @@ For instance specific configuration use the `docker/local.env` file. Please refe Batch installations require the following settings: - `ENABLE_BATCH`: Must be set to `True`, to enable batch API -- `ENABLE_HOF`: Must be set to `False`, to disable Hall of Fame processing And optionally: @@ -105,7 +104,6 @@ For example: cat >> docker/local.env < # allowed IP's to visit web interface without password diff --git a/integration_tests/integration/test_hof.py b/integration_tests/integration/test_hof.py index 82f750b2d..6f609a1d6 100644 --- a/integration_tests/integration/test_hof.py +++ b/integration_tests/integration/test_hof.py @@ -30,9 +30,6 @@ def test_hof_update(page, app_url, trigger_scheduled_task, unique_id, docker_com ), ) - # generate hof - trigger_scheduled_task("generate_HoF") - page.goto(app_url) page.get_by_role("link", name="Hall of Fame", exact=True).click() page.get_by_text("Websites").click() diff --git a/interface/redis_id.py b/interface/redis_id.py index f67b0b7fe..66b450f99 100644 --- a/interface/redis_id.py +++ b/interface/redis_id.py @@ -29,26 +29,12 @@ # Request limit per address req_limit = REDIS_RECORD("dom:req_limit:{}", 2 * 60 * 60) -# Lock for HoF updater -hof_lock = REDIS_RECORD("hof:updater:lock", 60 * 5) - -# HoF data -hof_champions = REDIS_RECORD("hof:champions", None) -hof_web = REDIS_RECORD("hof:web", None) -hof_mail = REDIS_RECORD("hof:mail", None) - # Public suffix list data psl_data = REDIS_RECORD("public:suffix:list", settings.PUBLIC_SUFFIX_LIST_RENEWAL) # Public suffix list loading flag psl_loading = REDIS_RECORD("public:suffix:list:loading", 60) -# Home page stats data -home_stats_data = REDIS_RECORD("home:stats:{}", None) - -# Home page stats lock -home_stats_lock = REDIS_RECORD("home:stats:lock", 60 * 2) - # Started connection test conn_test = REDIS_RECORD("conn:{}", settings.CACHE_TTL) diff --git a/interface/views/__init__.py b/interface/views/__init__.py index 09d18f9ac..0450e873e 100644 --- a/interface/views/__init__.py +++ b/interface/views/__init__.py @@ -6,22 +6,44 @@ from django.conf import settings from django.core.cache import cache from django.core.exceptions import DisallowedRedirect +from django.db import models, transaction +from django.db.models import Case, Count, F, Q, Value, When +from django.db.models.functions import Coalesce, Greatest +from django.db.models.lookups import GreaterThan from django.http import HttpResponse, HttpResponseRedirect from django.shortcuts import redirect, render from django.utils import translation from django.utils.translation import gettext as _ +from checks.models import ConnectionTest, DomainTestReport, Fame, MailTestReport from interface import redis_id, simple_cache_page -from interface.views.shared import ( - get_hof_champions, - get_hof_mail, - get_hof_manual, - get_hof_web, - update_base_stats, - SafeHttpResponseRedirect, +from interface.views.shared import get_hof_manual, SafeHttpResponseRedirect + +hof_champion = ( + Fame.objects.alias( + timestamp=Greatest("site_report_timestamp", "mail_report_timestamp"), + report_type=Case( + When(GreaterThan(F("site_report_timestamp"), F("mail_report_timestamp")), then=Value("s")), + default=Value("m"), + output_field=models.CharField(max_length=1), + ), + report_id=Case( + When(GreaterThan(F("site_report_timestamp"), F("mail_report_timestamp")), then="site_report_id"), + default="mail_report_id", + ), + ) + .annotate(timestamp=F("timestamp"), report_type=F("report_type"), report_id=F("report_id")) + .filter(Q(site_report_id__isnull=False) & Q(mail_report_id__isnull=False)) + .order_by("-timestamp") ) +def make_hof_champion_permalink(entry): + return "/{report_type}/{domain}/{report_id}/".format( + report_type="site" if entry.report_type == "s" else "mail", domain=entry.domain, report_id=entry.report_id + ) + + def page404(request, exception): return render( request, "404.html", dict(pageclass="error404", pagetitle=_("page404 title"), pagemenu="home"), status=404 @@ -29,25 +51,39 @@ def page404(request, exception): @simple_cache_page +@transaction.atomic def indexpage(request): if settings.INTERNETNL_BRANDING: articles = _("article .index").split() else: articles = _("article custom .index").split() articles = articles[0:6] - cache_id = redis_id.home_stats_data.id - novalue = "…" - statswebsite = cache.get(cache_id.format("statswebsite"), novalue) - statswebsitegood = cache.get(cache_id.format("statswebsitegood"), novalue) - statswebsitebad = cache.get(cache_id.format("statswebsitebad"), novalue) - statsmail = cache.get(cache_id.format("statsmail"), novalue) - statsmailgood = cache.get(cache_id.format("statsmailgood"), novalue) - statsmailbad = cache.get(cache_id.format("statsmailbad"), novalue) - statsconnection = cache.get(cache_id.format("statsconnection"), novalue) - statsconnectiongood = cache.get(cache_id.format("statsconnectiongood"), novalue) - statsconnectionbad = cache.get(cache_id.format("statsconnectionbad"), novalue) - update_base_stats() - hof_date, hof_count, hof_entries = get_hof_champions(10) + statswebsite = DomainTestReport.objects.aggregate(n=Count("domain", distinct=True))["n"] + statswebsitegood = Fame.objects.all().filter(Q(site_report_id__isnull=False)).count() + statswebsitebad = statswebsite - statswebsitegood + statsmail = MailTestReport.objects.aggregate(n=Count("domain", distinct=True))["n"] + statsmailgood = Fame.objects.all().filter(Q(mail_report_id__isnull=False)).count() + statsmailbad = statsmail - statsmailgood + statsconnection = ( + ConnectionTest.objects.all() + .filter(finished=True) + .aggregate(n=Count(Coalesce("ipv4_addr", "ipv6_addr"), distinct=True))["n"] + ) + statsconnectiongood = ( + ConnectionTest.objects.all() + .filter(finished=True, score_dnssec=100, score_ipv6=100) + .aggregate(n=Count(Coalesce("ipv4_addr", "ipv6_addr"), distinct=True))["n"] + ) + statsconnectionbad = statsconnection - statsconnectiongood + + hof_count = Fame.objects.filter(Q(site_report_id__isnull=False) & Q(mail_report_id__isnull=False)).count() + hof_entries = [] + hof_date = None + for entry in hof_champion.only("domain").all()[:10]: + if hof_date is None: + hof_date = entry.timestamp + hof_entries.append({"domain": entry.domain, "permalink": make_hof_champion_permalink(entry)}) + return render( request, "index.html", @@ -261,7 +297,10 @@ def _update_hof_with_manual(template_dict, current=None): @simple_cache_page def hofchampionspage(request): - hof_date, hof_count, hof_entries = get_hof_champions() + hof_entries = [] + for entry in hof_champion.only("domain").iterator(): + hof_entries.append({"domain": entry.domain, "permalink": make_hof_champion_permalink(entry)}) + template_dict = dict( pageclass="hall-of-fame", pagetitle=_("base halloffame champions"), @@ -270,8 +309,7 @@ def hofchampionspage(request): cpage="champions", hof_text="halloffame champions text", hof_subtitle="halloffame champions subtitle", - latest=hof_date, - count=hof_count, + count=len(hof_entries), halloffame=hof_entries, ) _update_hof_with_manual(template_dict) @@ -280,7 +318,11 @@ def hofchampionspage(request): @simple_cache_page def hofwebpage(request): - hof_date, hof_count, hof_entries = get_hof_web() + hof_entries = [] + hof_site = Fame.objects.alias().filter(Q(site_report_id__isnull=False)).order_by("-site_report_timestamp") + for entry in hof_site.only("domain", "site_report_id").iterator(): + hof_entries.append({"domain": entry.domain, "permalink": f"/site/{entry.domain}/{entry.site_report_id}/"}) + template_dict = dict( pageclass="hall-of-fame", pagetitle=_("base halloffame web"), @@ -289,8 +331,7 @@ def hofwebpage(request): cpage="web", hof_text="halloffame web text", hof_subtitle="halloffame web subtitle", - latest=hof_date, - count=hof_count, + count=len(hof_entries), halloffame=hof_entries, ) _update_hof_with_manual(template_dict) @@ -299,7 +340,11 @@ def hofwebpage(request): @simple_cache_page def hofmailpage(request): - hof_date, hof_count, hof_entries = get_hof_mail() + hof_entries = [] + hof_mail = Fame.objects.alias().filter(Q(mail_report_id__isnull=False)).order_by("-mail_report_timestamp") + for entry in hof_mail.only("domain", "mail_report_id").iterator(): + hof_entries.append({"domain": entry.domain, "permalink": f"/mail/{entry.domain}/{entry.mail_report_id}/"}) + template_dict = dict( pageclass="hall-of-fame", pagetitle=_("base halloffame mail"), @@ -308,8 +353,7 @@ def hofmailpage(request): cpage="mail", hof_text="halloffame mail text", hof_subtitle="halloffame mail subtitle", - latest=hof_date, - count=hof_count, + count=len(hof_entries), halloffame=hof_entries, ) _update_hof_with_manual(template_dict) diff --git a/interface/views/shared.py b/interface/views/shared.py index 79a0db942..3732904fa 100644 --- a/interface/views/shared.py +++ b/interface/views/shared.py @@ -8,11 +8,9 @@ import dns import idna import yaml -from celery import shared_task from django.conf import settings from django.core.cache import cache from django.core.exceptions import DisallowedRedirect -from django.db import connection from django.http import HttpResponseRedirect from django.shortcuts import render from django.utils import timezone @@ -24,7 +22,6 @@ from checks.resolver import dns_resolve, dns_resolve_soa from checks.tasks.dispatcher import ProbeTaskResult -from interface import redis_id from internetnl import log @@ -37,20 +34,6 @@ # ASCII label must consist entirely of letters (alphabetic characters a-z)". regex_dname = r"^([a-zA-Z0-9]([a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])?\.)+" "([a-zA-Z]{2,63}|xn--[a-zA-Z0-9]+)$" -HOME_STATS_LOCK_ID = redis_id.home_stats_lock.id -HOME_STATS_LOCK_TTL = redis_id.home_stats_lock.ttl - - -def execsql(sql): - """ - Execute raw SQL query. - - """ - with connection.cursor() as cursor: - cursor.execute(sql, []) - row = cursor.fetchone() - return row[0] - def validate_dname(dname): """ @@ -236,18 +219,6 @@ def get_hof_cache(cache_id, count): return (cached_data["date"], cached_data["count"], cached_data["data"][:count]) -def get_hof_champions(count=1000): - return get_hof_cache(redis_id.hof_champions.id, count) - - -def get_hof_web(count=1000): - return get_hof_cache(redis_id.hof_web.id, count) - - -def get_hof_mail(count=1000): - return get_hof_cache(redis_id.hof_mail.id, count) - - def get_hof_manual(manual): hof_entries = [] try: @@ -305,164 +276,6 @@ def redirect_invalid_domain(request, domain_type): return HttpResponseRedirect("/") -@shared_task( - soft_time_limit=settings.SHARED_TASK_SOFT_TIME_LIMIT_LOW, - time_limit=settings.SHARED_TASK_TIME_LIMIT_LOW, - ignore_result=True, -) -def run_stats_queries(): - """ - Run the queries for the home page statistics and save the results in redis. - """ - - query = """ - select - count(distinct r.domain) as count - from - checks_domaintestreport as r - inner join - ( - select - domain, - max(timestamp) as timestamp - from - checks_domaintestreport - group by - domain - ) as rmax - on r.domain = rmax.domain - and r.timestamp = rmax.timestamp - """ - statswebsite = execsql(query) - statswebsitegood = get_hof_web(count=1)[1] - statswebsitebad = max(statswebsite - statswebsitegood, 0) - - query = """ - select - count(distinct r.domain) as count - from - checks_mailtestreport as r - inner join - ( - select - domain, - max(timestamp) as timestamp - from - checks_mailtestreport - group by - domain - ) as rmax - on r.domain = rmax.domain - and r.timestamp = rmax.timestamp - """ - statsmail = execsql(query) - statsmailgood = get_hof_mail(count=1)[1] - statsmailbad = max(statsmail - statsmailgood, 0) - - query = """ - select - count(distinct coalesce(ipv4_addr, - ipv6_addr)) as count - from - checks_connectiontest as r - inner join - ( - select - coalesce(ipv4_addr, - ipv6_addr) as source, - max(timestamp) as timestamp - from - checks_connectiontest - where - finished = true - group by - coalesce(ipv4_addr, - ipv6_addr) - ) as rmax - on coalesce(r.ipv4_addr, - r.ipv6_addr) = rmax.source - where - finished = true - """ - statsconnection = execsql(query) - - query = """ - select - count(distinct coalesce(ipv4_addr, - ipv6_addr)) as count - from - checks_connectiontest as r - inner join - ( - select - coalesce(ipv4_addr, - ipv6_addr) as source, - max(timestamp) as timestamp - from - checks_connectiontest - where - finished = true - group by - coalesce(ipv4_addr, - ipv6_addr) - ) as rmax - on coalesce(r.ipv4_addr, - r.ipv6_addr) = rmax.source - where - finished = true - and score_dnssec = 100 - and score_ipv6 = 100 - """ - statsconnectiongood = execsql(query) - statsconnectionbad = max(statsconnection - statsconnectiongood, 0) - - cache_id = redis_id.home_stats_data.id - cache_ttl = redis_id.home_stats_data.ttl - cache.set(cache_id.format("statswebsite"), statswebsite, cache_ttl) - cache.set(cache_id.format("statswebsitegood"), statswebsitegood, cache_ttl) - cache.set(cache_id.format("statswebsitebad"), statswebsitebad, cache_ttl) - cache.set(cache_id.format("statsmail"), statsmail, cache_ttl) - cache.set(cache_id.format("statsmailgood"), statsmailgood, cache_ttl) - cache.set(cache_id.format("statsmailbad"), statsmailbad, cache_ttl) - cache.set(cache_id.format("statsconnection"), statsconnection, cache_ttl) - cache.set(cache_id.format("statsconnectiongood"), statsconnectiongood, cache_ttl) - cache.set(cache_id.format("statsconnectionbad"), statsconnectionbad, cache_ttl) - - -@shared_task( - soft_time_limit=settings.SHARED_TASK_SOFT_TIME_LIMIT_LOW, - time_limit=settings.SHARED_TASK_TIME_LIMIT_LOW, - ignore_result=True, -) -def update_running_status(results): - """ - Signal that the queries for the home page statistics finished running. - - """ - cache_id = HOME_STATS_LOCK_ID - cache_ttl = HOME_STATS_LOCK_TTL - if cache.get(cache_id): - cache.set(cache_id, False, cache_ttl) - - -def update_base_stats(): - """ - If the queries for the home page statistics are not already running, - run them. - - This is done to: - - Not having to run the queries for every visit; - - Avoid queueing unnecessary tasks. - - """ - cache_id = HOME_STATS_LOCK_ID - cache_ttl = HOME_STATS_LOCK_TTL - if not cache.get(cache_id): - cache.set(cache_id, True, cache_ttl) - task_set = run_stats_queries.s() | update_running_status.s() - task_set() - - class SafeHttpResponseRedirect(HttpResponseRedirect): """ This light wrapper around HttpResponseRedirect refuses redirects to diff --git a/internetnl/celery.py b/internetnl/celery.py index b5bdd9a6a..3242f105e 100644 --- a/internetnl/celery.py +++ b/internetnl/celery.py @@ -43,13 +43,6 @@ def dummy_task(number: int = 0): "schedule": app.conf.BATCH_SCHEDULER_INTERVAL, } -if app.conf.ENABLE_HOF: - # Disable HoF when on batch mode, too much DB activity. - app.conf.beat_schedule["generate_HoF"] = { - "task": "checks.tasks.update.update_hof", - "schedule": app.conf.HOF_UPDATE_INTERVAL, - } - @receiver(autoreload_started) def restart_worker_on_autorestart(sender, **kwargs): diff --git a/internetnl/settings.py b/internetnl/settings.py index a1647f40a..a33868102 100644 --- a/internetnl/settings.py +++ b/internetnl/settings.py @@ -117,8 +117,6 @@ RABBIT_USER = getenv("RABBIT_USER", "guest") RABBIT_PASS = getenv("RABBIT_PASS", "guest") -ENABLE_HOF = get_boolean_env("ENABLE_HOF", True) - AUTORELOAD = get_boolean_env("INTERNETNL_AUTORELOAD", False) # -- End of manual configuration @@ -144,6 +142,7 @@ "checks", "django_hosts", "django_statsd", + "pgtrigger", ] if AUTORELOAD: INSTALLED_APPS += ["django_browser_reload"] @@ -297,7 +296,6 @@ CELERY_TASK_SEND_SENT_EVENT = True CELERY_IMPORTS = ( - "checks.tasks.update", "interface.batch.scheduler", "interface.batch.util", ) @@ -321,9 +319,6 @@ "checks.tasks.appsecpriv.web_callback": {"queue": "db_worker"}, "checks.tasks.rpki.web_callback": {"queue": "db_worker"}, "checks.tasks.rpki.mail_callback": {"queue": "db_worker"}, - "interface.views.shared.run_stats_queries": {"queue": "slow_db_worker"}, - "interface.views.shared.update_running_status": {"queue": "slow_db_worker"}, - "checks.tasks.update.update_hof": {"queue": "slow_db_worker"}, "checks.tasks.tls.web_cert": {"queue": "nassl_worker"}, "checks.tasks.tls.web_conn": {"queue": "nassl_worker"}, "checks.tasks.tls.mail_smtp_starttls": {"queue": "nassl_worker"}, @@ -538,8 +533,6 @@ "icon_file": "embed-badge-hosters-v3.svg", } -HOF_UPDATE_INTERVAL = 600 # seconds - LOGGING = { "version": 1, "disable_existing_loggers": False, diff --git a/requirements.in b/requirements.in index 9b1e00413..d21e2cdc1 100644 --- a/requirements.in +++ b/requirements.in @@ -17,6 +17,7 @@ django-bleach django-enumfields django-hosts django-markdown_deux +django-pgtrigger eventlet gevent libsass diff --git a/requirements.txt b/requirements.txt index cd772587d..8e2b53252 100644 --- a/requirements.txt +++ b/requirements.txt @@ -25,7 +25,9 @@ beautifulsoup4==4.12.3 billiard==4.2.1 # via celery bleach[css]==5.0.1 - # via django-bleach + # via + # bleach + # django-bleach cached-property==2.0.1 # via -r requirements.in celery==5.4.0 @@ -67,6 +69,7 @@ django==4.2.20 # -r requirements.in # django-bleach # django-browser-reload + # django-pgtrigger # django-redis django-bleach==3.1.0 # via -r requirements.in @@ -78,6 +81,8 @@ django-hosts==6.0 # via -r requirements.in django-markdown-deux==1.0.6 # via -r requirements.in +django-pgtrigger==4.15.2 + # via -r requirements.in django-redis==4.12.1 # via -r requirements.in django-statsd-mozilla==0.4.0