Moved spamd check to a custom file and cached the result

This commit is contained in:
Adrià Casajús 2022-04-07 19:17:37 +02:00
parent 44c77439c1
commit b128d64563
No known key found for this signature in database
GPG Key ID: F0033226A5AFC9B9
8 changed files with 172 additions and 142 deletions

View File

@ -70,10 +70,6 @@ from app.models import (
TransactionalEmail, TransactionalEmail,
IgnoreBounceSender, IgnoreBounceSender,
InvalidMailboxDomain, InvalidMailboxDomain,
DmarcCheckResult,
SpamdResult,
SPFCheckResult,
Phase,
) )
from app.utils import ( from app.utils import (
random_string, random_string,
@ -1442,31 +1438,3 @@ def save_email_for_debugging(msg: Message, file_name_prefix=None) -> str:
return file_name return file_name
return "" return ""
def get_spamd_result(
msg: Message, send_event: bool = True, phase: Phase = Phase.unknown
) -> Optional[SpamdResult]:
spam_result_header = msg.get_all(headers.SPAMD_RESULT)
if not spam_result_header:
newrelic.agent.record_custom_event("SpamdCheck", {"header": "missing"})
return None
spam_entries = [entry.strip() for entry in str(spam_result_header[-1]).split("\n")]
for entry_pos in range(len(spam_entries)):
sep = spam_entries[entry_pos].find("(")
if sep > -1:
spam_entries[entry_pos] = spam_entries[entry_pos][:sep]
spamd_result = SpamdResult(phase)
for header_value, dmarc_result in DmarcCheckResult.get_string_dict().items():
if header_value in spam_entries:
spamd_result.set_dmarc_result(dmarc_result)
for header_value, spf_result in SPFCheckResult.get_string_dict().items():
if header_value in spam_entries:
spamd_result.set_spf_result(spf_result)
if send_event:
newrelic.agent.record_custom_event("SpamdCheck", spamd_result.event_data())
return spamd_result

0
app/handler/__init__.py Normal file
View File

125
app/handler/spamd_result.py Normal file
View File

@ -0,0 +1,125 @@
from __future__ import annotations
from typing import Dict, Optional
import newrelic
from app.email import headers
from app.models import EnumE
from email.message import Message
class Phase(EnumE):
unknown = 0
forward = 1
reply = 2
class DmarcCheckResult(EnumE):
allow = 0
soft_fail = 1
quarantine = 2
reject = 3
not_available = 4
bad_policy = 5
@staticmethod
def get_string_dict():
return {
"DMARC_POLICY_ALLOW": DmarcCheckResult.allow,
"DMARC_POLICY_SOFTFAIL": DmarcCheckResult.soft_fail,
"DMARC_POLICY_QUARANTINE": DmarcCheckResult.quarantine,
"DMARC_POLICY_REJECT": DmarcCheckResult.reject,
"DMARC_NA": DmarcCheckResult.not_available,
"DMARC_BAD_POLICY": DmarcCheckResult.bad_policy,
}
class SPFCheckResult(EnumE):
allow = 0
fail = 1
soft_fail = 1
neutral = 2
temp_error = 3
not_available = 4
perm_error = 5
@staticmethod
def get_string_dict():
return {
"R_SPF_ALLOW": SPFCheckResult.allow,
"R_SPF_FAIL": SPFCheckResult.fail,
"R_SPF_SOFTFAIL": SPFCheckResult.soft_fail,
"R_SPF_NEUTRAL": SPFCheckResult.neutral,
"R_SPF_DNSFAIL": SPFCheckResult.temp_error,
"R_SPF_NA": SPFCheckResult.not_available,
"R_SPF_PERMFAIL": SPFCheckResult.perm_error,
}
class SpamdResult:
def __init__(self, phase: Phase = Phase.unknown):
self.phase: Phase = phase
self.dmarc: DmarcCheckResult = DmarcCheckResult.not_available
self.spf: SPFCheckResult = SPFCheckResult.not_available
def set_dmarc_result(self, dmarc_result: DmarcCheckResult):
self.dmarc = dmarc_result
def set_spf_result(self, spf_result: SPFCheckResult):
self.spf = spf_result
def event_data(self) -> Dict:
return {
"header": "present",
"dmarc": self.dmarc,
"spf": self.spf,
"phase": self.phase,
}
@classmethod
def extract_from_headers(
cls, msg: Message, phase: Phase = Phase.unknown
) -> Optional[SpamdResult]:
cached = cls._get_from_message(msg)
if cached:
return cached
spam_result_header = msg.get_all(headers.SPAMD_RESULT)
if not spam_result_header:
return None
spam_entries = [
entry.strip() for entry in str(spam_result_header[-1]).split("\n")
]
for entry_pos in range(len(spam_entries)):
sep = spam_entries[entry_pos].find("(")
if sep > -1:
spam_entries[entry_pos] = spam_entries[entry_pos][:sep]
spamd_result = SpamdResult(phase)
for header_value, dmarc_result in DmarcCheckResult.get_string_dict().items():
if header_value in spam_entries:
spamd_result.set_dmarc_result(dmarc_result)
for header_value, spf_result in SPFCheckResult.get_string_dict().items():
if header_value in spam_entries:
spamd_result.set_spf_result(spf_result)
cls._store_in_message(spamd_result, msg)
return spamd_result
@classmethod
def _store_in_message(cls, check: SpamdResult, msg: Message):
msg.spamd_check = check
@classmethod
def _get_from_message(cls, msg: Message) -> Optional[SpamdResult]:
return getattr(msg, "spamd_check", None)
@classmethod
def send_to_new_relic(cls, msg: Message):
check = cls._get_from_message(msg)
if check:
newrelic.agent.record_custom_event("SpamdCheck", check.event_data())
else:
newrelic.agent.record_custom_event("SpamdCheck", {"header": "missing"})

View File

@ -3,7 +3,7 @@ import os
import random import random
import uuid import uuid
from email.utils import formataddr from email.utils import formataddr
from typing import List, Tuple, Optional, Dict from typing import List, Tuple, Optional
import arrow import arrow
import sqlalchemy as sa import sqlalchemy as sa
@ -237,75 +237,6 @@ class AuditLogActionEnum(EnumE):
extend_subscription = 7 extend_subscription = 7
class Phase(EnumE):
unknown = 0
forward = 1
reply = 2
class DmarcCheckResult(EnumE):
allow = 0
soft_fail = 1
quarantine = 2
reject = 3
not_available = 4
bad_policy = 5
@staticmethod
def get_string_dict():
return {
"DMARC_POLICY_ALLOW": DmarcCheckResult.allow,
"DMARC_POLICY_SOFTFAIL": DmarcCheckResult.soft_fail,
"DMARC_POLICY_QUARANTINE": DmarcCheckResult.quarantine,
"DMARC_POLICY_REJECT": DmarcCheckResult.reject,
"DMARC_NA": DmarcCheckResult.not_available,
"DMARC_BAD_POLICY": DmarcCheckResult.bad_policy,
}
class SPFCheckResult(EnumE):
allow = 0
fail = 1
soft_fail = 1
neutral = 2
temp_error = 3
not_available = 4
perm_error = 5
@staticmethod
def get_string_dict():
return {
"R_SPF_ALLOW": SPFCheckResult.allow,
"R_SPF_FAIL": SPFCheckResult.fail,
"R_SPF_SOFTFAIL": SPFCheckResult.soft_fail,
"R_SPF_NEUTRAL": SPFCheckResult.neutral,
"R_SPF_DNSFAIL": SPFCheckResult.temp_error,
"R_SPF_NA": SPFCheckResult.not_available,
"R_SPF_PERMFAIL": SPFCheckResult.perm_error,
}
class SpamdResult:
def __init__(self, phase: Phase = Phase.unknown):
self.phase: Phase = phase
self.dmarc: DmarcCheckResult = DmarcCheckResult.not_available
self.spf: SPFCheckResult = SPFCheckResult.not_available
def set_dmarc_result(self, dmarc_result: DmarcCheckResult):
self.dmarc = dmarc_result
def set_spf_result(self, spf_result: SPFCheckResult):
self.spf = spf_result
def event_data(self) -> Dict:
return {
"header": "present",
"dmarc": self.dmarc,
"spf": self.spf,
"phase": self.phase,
}
class Hibp(Base, ModelMixin): class Hibp(Base, ModelMixin):
__tablename__ = "hibp" __tablename__ = "hibp"
name = sa.Column(sa.String(), nullable=False, unique=True, index=True) name = sa.Column(sa.String(), nullable=False, unique=True, index=True)

View File

@ -92,6 +92,12 @@ from app.config import (
ALERT_DMARC_FAILED_REPLY_PHASE, ALERT_DMARC_FAILED_REPLY_PHASE,
) )
from app.db import Session from app.db import Session
from app.handler.spamd_result import (
SpamdResult,
Phase,
DmarcCheckResult,
SPFCheckResult,
)
from app.email import status, headers from app.email import status, headers
from app.email.rate_limit import rate_limited from app.email.rate_limit import rate_limited
from app.email.spam import get_spam_score from app.email.spam import get_spam_score
@ -131,7 +137,6 @@ from app.email_utils import (
get_orig_message_from_yahoo_complaint, get_orig_message_from_yahoo_complaint,
get_mailbox_bounce_info, get_mailbox_bounce_info,
save_email_for_debugging, save_email_for_debugging,
get_spamd_result,
) )
from app.errors import ( from app.errors import (
NonReverseAliasInReplyPhase, NonReverseAliasInReplyPhase,
@ -157,9 +162,6 @@ from app.models import (
DeletedAlias, DeletedAlias,
DomainDeletedAlias, DomainDeletedAlias,
Notification, Notification,
DmarcCheckResult,
SPFCheckResult,
Phase,
) )
from app.pgp_utils import PGPException, sign_data_with_pgpy, sign_data from app.pgp_utils import PGPException, sign_data_with_pgpy, sign_data
from app.utils import sanitize_email from app.utils import sanitize_email
@ -547,7 +549,7 @@ def handle_email_sent_to_ourself(alias, from_addr: str, msg: Message, user):
def apply_dmarc_policy_for_forward_phase( def apply_dmarc_policy_for_forward_phase(
alias: Alias, contact: Contact, envelope: Envelope, msg: Message alias: Alias, contact: Contact, envelope: Envelope, msg: Message
) -> Optional[str]: ) -> Optional[str]:
spam_result = get_spamd_result(msg, Phase.forward) spam_result = SpamdResult.extract_from_headers(msg, Phase.forward)
if not DMARC_CHECK_ENABLED or not spam_result: if not DMARC_CHECK_ENABLED or not spam_result:
return None return None
@ -640,7 +642,7 @@ def quarantine_dmarc_failed_forward_email(alias, contact, envelope, msg) -> Emai
def apply_dmarc_policy_for_reply_phase( def apply_dmarc_policy_for_reply_phase(
alias_from: Alias, contact_recipient: Contact, envelope: Envelope, msg: Message alias_from: Alias, contact_recipient: Contact, envelope: Envelope, msg: Message
) -> Optional[str]: ) -> Optional[str]:
spam_result = get_spamd_result(msg, Phase.reply) spam_result = SpamdResult.extract_from_headers(msg, Phase.reply)
if not DMARC_CHECK_ENABLED or not spam_result: if not DMARC_CHECK_ENABLED or not spam_result:
return None return None
@ -2657,9 +2659,9 @@ class MailHandler:
return_status = handle(envelope, msg) return_status = handle(envelope, msg)
elapsed = time.time() - start elapsed = time.time() - start
# Only bounce messages if the return-path passes the spf check. Otherwise black-hole it. # Only bounce messages if the return-path passes the spf check. Otherwise black-hole it.
spamd_result = SpamdResult.extract_from_headers(msg)
if return_status[0] == "5": if return_status[0] == "5":
spamd_result = get_spamd_result(msg, send_event=False) if spamd_result and spamd_result.spf in (
if spamd_result and get_spamd_result(msg).spf in (
SPFCheckResult.fail, SPFCheckResult.fail,
SPFCheckResult.soft_fail, SPFCheckResult.soft_fail,
): ):
@ -2675,6 +2677,8 @@ class MailHandler:
elapsed, elapsed,
return_status, return_status,
) )
SpamdResult.send_to_new_relic(msg)
newrelic.agent.record_custom_metric("Custom/email_handler_time", elapsed) newrelic.agent.record_custom_metric("Custom/email_handler_time", elapsed)
newrelic.agent.record_custom_metric("Custom/number_incoming_email", 1) newrelic.agent.record_custom_metric("Custom/number_incoming_email", 1)
return return_status return return_status

View File

View File

@ -0,0 +1,34 @@
from app.handler.spamd_result import DmarcCheckResult, SpamdResult
from tests.utils import load_eml_file
def test_dmarc_result_softfail():
msg = load_eml_file("dmarc_gmail_softfail.eml")
assert DmarcCheckResult.soft_fail == SpamdResult.extract_from_headers(msg).dmarc
def test_dmarc_result_quarantine():
msg = load_eml_file("dmarc_quarantine.eml")
assert DmarcCheckResult.quarantine == SpamdResult.extract_from_headers(msg).dmarc
def test_dmarc_result_reject():
msg = load_eml_file("dmarc_reject.eml")
assert DmarcCheckResult.reject == SpamdResult.extract_from_headers(msg).dmarc
def test_dmarc_result_allow():
msg = load_eml_file("dmarc_allow.eml")
assert DmarcCheckResult.allow == SpamdResult.extract_from_headers(msg).dmarc
def test_dmarc_result_na():
msg = load_eml_file("dmarc_na.eml")
assert DmarcCheckResult.not_available == SpamdResult.extract_from_headers(msg).dmarc
def test_dmarc_result_bad_policy():
msg = load_eml_file("dmarc_bad_policy.eml")
assert SpamdResult._get_from_message(msg) is None
assert DmarcCheckResult.bad_policy == SpamdResult.extract_from_headers(msg).dmarc
assert SpamdResult._get_from_message(msg) is not None

View File

@ -36,7 +36,6 @@ from app.email_utils import (
get_orig_message_from_bounce, get_orig_message_from_bounce,
get_mailbox_bounce_info, get_mailbox_bounce_info,
is_invalid_mailbox_domain, is_invalid_mailbox_domain,
get_spamd_result,
) )
from app.models import ( from app.models import (
User, User,
@ -46,7 +45,6 @@ from app.models import (
EmailLog, EmailLog,
IgnoreBounceSender, IgnoreBounceSender,
InvalidMailboxDomain, InvalidMailboxDomain,
DmarcCheckResult,
) )
# flake8: noqa: E101, W191 # flake8: noqa: E101, W191
@ -793,33 +791,3 @@ def test_is_invalid_mailbox_domain(flask_client):
assert is_invalid_mailbox_domain("sub1.sub2.ab.cd") assert is_invalid_mailbox_domain("sub1.sub2.ab.cd")
assert not is_invalid_mailbox_domain("xy.zt") assert not is_invalid_mailbox_domain("xy.zt")
def test_dmarc_result_softfail():
msg = load_eml_file("dmarc_gmail_softfail.eml")
assert DmarcCheckResult.soft_fail == get_spamd_result(msg).dmarc
def test_dmarc_result_quarantine():
msg = load_eml_file("dmarc_quarantine.eml")
assert DmarcCheckResult.quarantine == get_spamd_result(msg).dmarc
def test_dmarc_result_reject():
msg = load_eml_file("dmarc_reject.eml")
assert DmarcCheckResult.reject == get_spamd_result(msg).dmarc
def test_dmarc_result_allow():
msg = load_eml_file("dmarc_allow.eml")
assert DmarcCheckResult.allow == get_spamd_result(msg).dmarc
def test_dmarc_result_na():
msg = load_eml_file("dmarc_na.eml")
assert DmarcCheckResult.not_available == get_spamd_result(msg).dmarc
def test_dmarc_result_bad_policy():
msg = load_eml_file("dmarc_bad_policy.eml")
assert DmarcCheckResult.bad_policy == get_spamd_result(msg).dmarc