From b128d64563ed69890799eba7915abac88cdf8e59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Casaj=C3=BAs?= Date: Thu, 7 Apr 2022 19:17:37 +0200 Subject: [PATCH] Moved spamd check to a custom file and cached the result --- app/email_utils.py | 32 -------- app/handler/__init__.py | 0 app/handler/spamd_result.py | 125 +++++++++++++++++++++++++++++ app/models.py | 71 +--------------- email_handler.py | 20 +++-- tests/handler/__init__.py | 0 tests/handler/test_spamd_result.py | 34 ++++++++ tests/test_email_utils.py | 32 -------- 8 files changed, 172 insertions(+), 142 deletions(-) create mode 100644 app/handler/__init__.py create mode 100644 app/handler/spamd_result.py create mode 100644 tests/handler/__init__.py create mode 100644 tests/handler/test_spamd_result.py diff --git a/app/email_utils.py b/app/email_utils.py index 207e113d..6ab133c2 100644 --- a/app/email_utils.py +++ b/app/email_utils.py @@ -70,10 +70,6 @@ from app.models import ( TransactionalEmail, IgnoreBounceSender, InvalidMailboxDomain, - DmarcCheckResult, - SpamdResult, - SPFCheckResult, - Phase, ) from app.utils import ( random_string, @@ -1442,31 +1438,3 @@ def save_email_for_debugging(msg: Message, file_name_prefix=None) -> str: return file_name return "" - - -def get_spamd_result( - msg: Message, send_event: bool = True, phase: Phase = Phase.unknown -) -> Optional[SpamdResult]: - spam_result_header = msg.get_all(headers.SPAMD_RESULT) - if not spam_result_header: - newrelic.agent.record_custom_event("SpamdCheck", {"header": "missing"}) - return None - - spam_entries = [entry.strip() for entry in str(spam_result_header[-1]).split("\n")] - for entry_pos in range(len(spam_entries)): - sep = spam_entries[entry_pos].find("(") - if sep > -1: - spam_entries[entry_pos] = spam_entries[entry_pos][:sep] - - spamd_result = SpamdResult(phase) - - for header_value, dmarc_result in DmarcCheckResult.get_string_dict().items(): - if header_value in spam_entries: - spamd_result.set_dmarc_result(dmarc_result) - for header_value, spf_result in SPFCheckResult.get_string_dict().items(): - if header_value in spam_entries: - spamd_result.set_spf_result(spf_result) - - if send_event: - newrelic.agent.record_custom_event("SpamdCheck", spamd_result.event_data()) - return spamd_result diff --git a/app/handler/__init__.py b/app/handler/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/app/handler/spamd_result.py b/app/handler/spamd_result.py new file mode 100644 index 00000000..cd5c8ce6 --- /dev/null +++ b/app/handler/spamd_result.py @@ -0,0 +1,125 @@ +from __future__ import annotations +from typing import Dict, Optional + +import newrelic + +from app.email import headers +from app.models import EnumE +from email.message import Message + + +class Phase(EnumE): + unknown = 0 + forward = 1 + reply = 2 + + +class DmarcCheckResult(EnumE): + allow = 0 + soft_fail = 1 + quarantine = 2 + reject = 3 + not_available = 4 + bad_policy = 5 + + @staticmethod + def get_string_dict(): + return { + "DMARC_POLICY_ALLOW": DmarcCheckResult.allow, + "DMARC_POLICY_SOFTFAIL": DmarcCheckResult.soft_fail, + "DMARC_POLICY_QUARANTINE": DmarcCheckResult.quarantine, + "DMARC_POLICY_REJECT": DmarcCheckResult.reject, + "DMARC_NA": DmarcCheckResult.not_available, + "DMARC_BAD_POLICY": DmarcCheckResult.bad_policy, + } + + +class SPFCheckResult(EnumE): + allow = 0 + fail = 1 + soft_fail = 1 + neutral = 2 + temp_error = 3 + not_available = 4 + perm_error = 5 + + @staticmethod + def get_string_dict(): + return { + "R_SPF_ALLOW": SPFCheckResult.allow, + "R_SPF_FAIL": SPFCheckResult.fail, + "R_SPF_SOFTFAIL": SPFCheckResult.soft_fail, + "R_SPF_NEUTRAL": SPFCheckResult.neutral, + "R_SPF_DNSFAIL": SPFCheckResult.temp_error, + "R_SPF_NA": SPFCheckResult.not_available, + "R_SPF_PERMFAIL": SPFCheckResult.perm_error, + } + + +class SpamdResult: + def __init__(self, phase: Phase = Phase.unknown): + self.phase: Phase = phase + self.dmarc: DmarcCheckResult = DmarcCheckResult.not_available + self.spf: SPFCheckResult = SPFCheckResult.not_available + + def set_dmarc_result(self, dmarc_result: DmarcCheckResult): + self.dmarc = dmarc_result + + def set_spf_result(self, spf_result: SPFCheckResult): + self.spf = spf_result + + def event_data(self) -> Dict: + return { + "header": "present", + "dmarc": self.dmarc, + "spf": self.spf, + "phase": self.phase, + } + + @classmethod + def extract_from_headers( + cls, msg: Message, phase: Phase = Phase.unknown + ) -> Optional[SpamdResult]: + cached = cls._get_from_message(msg) + if cached: + return cached + + spam_result_header = msg.get_all(headers.SPAMD_RESULT) + if not spam_result_header: + return None + + spam_entries = [ + entry.strip() for entry in str(spam_result_header[-1]).split("\n") + ] + for entry_pos in range(len(spam_entries)): + sep = spam_entries[entry_pos].find("(") + if sep > -1: + spam_entries[entry_pos] = spam_entries[entry_pos][:sep] + + spamd_result = SpamdResult(phase) + + for header_value, dmarc_result in DmarcCheckResult.get_string_dict().items(): + if header_value in spam_entries: + spamd_result.set_dmarc_result(dmarc_result) + for header_value, spf_result in SPFCheckResult.get_string_dict().items(): + if header_value in spam_entries: + spamd_result.set_spf_result(spf_result) + + cls._store_in_message(spamd_result, msg) + return spamd_result + + @classmethod + def _store_in_message(cls, check: SpamdResult, msg: Message): + msg.spamd_check = check + + @classmethod + def _get_from_message(cls, msg: Message) -> Optional[SpamdResult]: + return getattr(msg, "spamd_check", None) + + @classmethod + def send_to_new_relic(cls, msg: Message): + check = cls._get_from_message(msg) + if check: + newrelic.agent.record_custom_event("SpamdCheck", check.event_data()) + else: + newrelic.agent.record_custom_event("SpamdCheck", {"header": "missing"}) diff --git a/app/models.py b/app/models.py index 3e13d422..3f77b506 100644 --- a/app/models.py +++ b/app/models.py @@ -3,7 +3,7 @@ import os import random import uuid from email.utils import formataddr -from typing import List, Tuple, Optional, Dict +from typing import List, Tuple, Optional import arrow import sqlalchemy as sa @@ -237,75 +237,6 @@ class AuditLogActionEnum(EnumE): extend_subscription = 7 -class Phase(EnumE): - unknown = 0 - forward = 1 - reply = 2 - - -class DmarcCheckResult(EnumE): - allow = 0 - soft_fail = 1 - quarantine = 2 - reject = 3 - not_available = 4 - bad_policy = 5 - - @staticmethod - def get_string_dict(): - return { - "DMARC_POLICY_ALLOW": DmarcCheckResult.allow, - "DMARC_POLICY_SOFTFAIL": DmarcCheckResult.soft_fail, - "DMARC_POLICY_QUARANTINE": DmarcCheckResult.quarantine, - "DMARC_POLICY_REJECT": DmarcCheckResult.reject, - "DMARC_NA": DmarcCheckResult.not_available, - "DMARC_BAD_POLICY": DmarcCheckResult.bad_policy, - } - - -class SPFCheckResult(EnumE): - allow = 0 - fail = 1 - soft_fail = 1 - neutral = 2 - temp_error = 3 - not_available = 4 - perm_error = 5 - - @staticmethod - def get_string_dict(): - return { - "R_SPF_ALLOW": SPFCheckResult.allow, - "R_SPF_FAIL": SPFCheckResult.fail, - "R_SPF_SOFTFAIL": SPFCheckResult.soft_fail, - "R_SPF_NEUTRAL": SPFCheckResult.neutral, - "R_SPF_DNSFAIL": SPFCheckResult.temp_error, - "R_SPF_NA": SPFCheckResult.not_available, - "R_SPF_PERMFAIL": SPFCheckResult.perm_error, - } - - -class SpamdResult: - def __init__(self, phase: Phase = Phase.unknown): - self.phase: Phase = phase - self.dmarc: DmarcCheckResult = DmarcCheckResult.not_available - self.spf: SPFCheckResult = SPFCheckResult.not_available - - def set_dmarc_result(self, dmarc_result: DmarcCheckResult): - self.dmarc = dmarc_result - - def set_spf_result(self, spf_result: SPFCheckResult): - self.spf = spf_result - - def event_data(self) -> Dict: - return { - "header": "present", - "dmarc": self.dmarc, - "spf": self.spf, - "phase": self.phase, - } - - class Hibp(Base, ModelMixin): __tablename__ = "hibp" name = sa.Column(sa.String(), nullable=False, unique=True, index=True) diff --git a/email_handler.py b/email_handler.py index 3734ffd6..a8fe37d6 100644 --- a/email_handler.py +++ b/email_handler.py @@ -92,6 +92,12 @@ from app.config import ( ALERT_DMARC_FAILED_REPLY_PHASE, ) from app.db import Session +from app.handler.spamd_result import ( + SpamdResult, + Phase, + DmarcCheckResult, + SPFCheckResult, +) from app.email import status, headers from app.email.rate_limit import rate_limited from app.email.spam import get_spam_score @@ -131,7 +137,6 @@ from app.email_utils import ( get_orig_message_from_yahoo_complaint, get_mailbox_bounce_info, save_email_for_debugging, - get_spamd_result, ) from app.errors import ( NonReverseAliasInReplyPhase, @@ -157,9 +162,6 @@ from app.models import ( DeletedAlias, DomainDeletedAlias, Notification, - DmarcCheckResult, - SPFCheckResult, - Phase, ) from app.pgp_utils import PGPException, sign_data_with_pgpy, sign_data from app.utils import sanitize_email @@ -547,7 +549,7 @@ def handle_email_sent_to_ourself(alias, from_addr: str, msg: Message, user): def apply_dmarc_policy_for_forward_phase( alias: Alias, contact: Contact, envelope: Envelope, msg: Message ) -> Optional[str]: - spam_result = get_spamd_result(msg, Phase.forward) + spam_result = SpamdResult.extract_from_headers(msg, Phase.forward) if not DMARC_CHECK_ENABLED or not spam_result: return None @@ -640,7 +642,7 @@ def quarantine_dmarc_failed_forward_email(alias, contact, envelope, msg) -> Emai def apply_dmarc_policy_for_reply_phase( alias_from: Alias, contact_recipient: Contact, envelope: Envelope, msg: Message ) -> Optional[str]: - spam_result = get_spamd_result(msg, Phase.reply) + spam_result = SpamdResult.extract_from_headers(msg, Phase.reply) if not DMARC_CHECK_ENABLED or not spam_result: return None @@ -2657,9 +2659,9 @@ class MailHandler: return_status = handle(envelope, msg) elapsed = time.time() - start # Only bounce messages if the return-path passes the spf check. Otherwise black-hole it. + spamd_result = SpamdResult.extract_from_headers(msg) if return_status[0] == "5": - spamd_result = get_spamd_result(msg, send_event=False) - if spamd_result and get_spamd_result(msg).spf in ( + if spamd_result and spamd_result.spf in ( SPFCheckResult.fail, SPFCheckResult.soft_fail, ): @@ -2675,6 +2677,8 @@ class MailHandler: elapsed, return_status, ) + + SpamdResult.send_to_new_relic(msg) newrelic.agent.record_custom_metric("Custom/email_handler_time", elapsed) newrelic.agent.record_custom_metric("Custom/number_incoming_email", 1) return return_status diff --git a/tests/handler/__init__.py b/tests/handler/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/handler/test_spamd_result.py b/tests/handler/test_spamd_result.py new file mode 100644 index 00000000..641a8fab --- /dev/null +++ b/tests/handler/test_spamd_result.py @@ -0,0 +1,34 @@ +from app.handler.spamd_result import DmarcCheckResult, SpamdResult +from tests.utils import load_eml_file + + +def test_dmarc_result_softfail(): + msg = load_eml_file("dmarc_gmail_softfail.eml") + assert DmarcCheckResult.soft_fail == SpamdResult.extract_from_headers(msg).dmarc + + +def test_dmarc_result_quarantine(): + msg = load_eml_file("dmarc_quarantine.eml") + assert DmarcCheckResult.quarantine == SpamdResult.extract_from_headers(msg).dmarc + + +def test_dmarc_result_reject(): + msg = load_eml_file("dmarc_reject.eml") + assert DmarcCheckResult.reject == SpamdResult.extract_from_headers(msg).dmarc + + +def test_dmarc_result_allow(): + msg = load_eml_file("dmarc_allow.eml") + assert DmarcCheckResult.allow == SpamdResult.extract_from_headers(msg).dmarc + + +def test_dmarc_result_na(): + msg = load_eml_file("dmarc_na.eml") + assert DmarcCheckResult.not_available == SpamdResult.extract_from_headers(msg).dmarc + + +def test_dmarc_result_bad_policy(): + msg = load_eml_file("dmarc_bad_policy.eml") + assert SpamdResult._get_from_message(msg) is None + assert DmarcCheckResult.bad_policy == SpamdResult.extract_from_headers(msg).dmarc + assert SpamdResult._get_from_message(msg) is not None diff --git a/tests/test_email_utils.py b/tests/test_email_utils.py index e5d16744..91f439f1 100644 --- a/tests/test_email_utils.py +++ b/tests/test_email_utils.py @@ -36,7 +36,6 @@ from app.email_utils import ( get_orig_message_from_bounce, get_mailbox_bounce_info, is_invalid_mailbox_domain, - get_spamd_result, ) from app.models import ( User, @@ -46,7 +45,6 @@ from app.models import ( EmailLog, IgnoreBounceSender, InvalidMailboxDomain, - DmarcCheckResult, ) # flake8: noqa: E101, W191 @@ -793,33 +791,3 @@ def test_is_invalid_mailbox_domain(flask_client): assert is_invalid_mailbox_domain("sub1.sub2.ab.cd") assert not is_invalid_mailbox_domain("xy.zt") - - -def test_dmarc_result_softfail(): - msg = load_eml_file("dmarc_gmail_softfail.eml") - assert DmarcCheckResult.soft_fail == get_spamd_result(msg).dmarc - - -def test_dmarc_result_quarantine(): - msg = load_eml_file("dmarc_quarantine.eml") - assert DmarcCheckResult.quarantine == get_spamd_result(msg).dmarc - - -def test_dmarc_result_reject(): - msg = load_eml_file("dmarc_reject.eml") - assert DmarcCheckResult.reject == get_spamd_result(msg).dmarc - - -def test_dmarc_result_allow(): - msg = load_eml_file("dmarc_allow.eml") - assert DmarcCheckResult.allow == get_spamd_result(msg).dmarc - - -def test_dmarc_result_na(): - msg = load_eml_file("dmarc_na.eml") - assert DmarcCheckResult.not_available == get_spamd_result(msg).dmarc - - -def test_dmarc_result_bad_policy(): - msg = load_eml_file("dmarc_bad_policy.eml") - assert DmarcCheckResult.bad_policy == get_spamd_result(msg).dmarc