Moved spamd check to a custom file and cached the result

This commit is contained in:
Adrià Casajús 2022-04-07 19:17:37 +02:00
parent 44c77439c1
commit b128d64563
No known key found for this signature in database
GPG Key ID: F0033226A5AFC9B9
8 changed files with 172 additions and 142 deletions

View File

@ -70,10 +70,6 @@ from app.models import (
TransactionalEmail,
IgnoreBounceSender,
InvalidMailboxDomain,
DmarcCheckResult,
SpamdResult,
SPFCheckResult,
Phase,
)
from app.utils import (
random_string,
@ -1442,31 +1438,3 @@ def save_email_for_debugging(msg: Message, file_name_prefix=None) -> str:
return file_name
return ""
def get_spamd_result(
msg: Message, send_event: bool = True, phase: Phase = Phase.unknown
) -> Optional[SpamdResult]:
spam_result_header = msg.get_all(headers.SPAMD_RESULT)
if not spam_result_header:
newrelic.agent.record_custom_event("SpamdCheck", {"header": "missing"})
return None
spam_entries = [entry.strip() for entry in str(spam_result_header[-1]).split("\n")]
for entry_pos in range(len(spam_entries)):
sep = spam_entries[entry_pos].find("(")
if sep > -1:
spam_entries[entry_pos] = spam_entries[entry_pos][:sep]
spamd_result = SpamdResult(phase)
for header_value, dmarc_result in DmarcCheckResult.get_string_dict().items():
if header_value in spam_entries:
spamd_result.set_dmarc_result(dmarc_result)
for header_value, spf_result in SPFCheckResult.get_string_dict().items():
if header_value in spam_entries:
spamd_result.set_spf_result(spf_result)
if send_event:
newrelic.agent.record_custom_event("SpamdCheck", spamd_result.event_data())
return spamd_result

0
app/handler/__init__.py Normal file
View File

125
app/handler/spamd_result.py Normal file
View File

@ -0,0 +1,125 @@
from __future__ import annotations
from typing import Dict, Optional
import newrelic
from app.email import headers
from app.models import EnumE
from email.message import Message
class Phase(EnumE):
unknown = 0
forward = 1
reply = 2
class DmarcCheckResult(EnumE):
allow = 0
soft_fail = 1
quarantine = 2
reject = 3
not_available = 4
bad_policy = 5
@staticmethod
def get_string_dict():
return {
"DMARC_POLICY_ALLOW": DmarcCheckResult.allow,
"DMARC_POLICY_SOFTFAIL": DmarcCheckResult.soft_fail,
"DMARC_POLICY_QUARANTINE": DmarcCheckResult.quarantine,
"DMARC_POLICY_REJECT": DmarcCheckResult.reject,
"DMARC_NA": DmarcCheckResult.not_available,
"DMARC_BAD_POLICY": DmarcCheckResult.bad_policy,
}
class SPFCheckResult(EnumE):
allow = 0
fail = 1
soft_fail = 1
neutral = 2
temp_error = 3
not_available = 4
perm_error = 5
@staticmethod
def get_string_dict():
return {
"R_SPF_ALLOW": SPFCheckResult.allow,
"R_SPF_FAIL": SPFCheckResult.fail,
"R_SPF_SOFTFAIL": SPFCheckResult.soft_fail,
"R_SPF_NEUTRAL": SPFCheckResult.neutral,
"R_SPF_DNSFAIL": SPFCheckResult.temp_error,
"R_SPF_NA": SPFCheckResult.not_available,
"R_SPF_PERMFAIL": SPFCheckResult.perm_error,
}
class SpamdResult:
def __init__(self, phase: Phase = Phase.unknown):
self.phase: Phase = phase
self.dmarc: DmarcCheckResult = DmarcCheckResult.not_available
self.spf: SPFCheckResult = SPFCheckResult.not_available
def set_dmarc_result(self, dmarc_result: DmarcCheckResult):
self.dmarc = dmarc_result
def set_spf_result(self, spf_result: SPFCheckResult):
self.spf = spf_result
def event_data(self) -> Dict:
return {
"header": "present",
"dmarc": self.dmarc,
"spf": self.spf,
"phase": self.phase,
}
@classmethod
def extract_from_headers(
cls, msg: Message, phase: Phase = Phase.unknown
) -> Optional[SpamdResult]:
cached = cls._get_from_message(msg)
if cached:
return cached
spam_result_header = msg.get_all(headers.SPAMD_RESULT)
if not spam_result_header:
return None
spam_entries = [
entry.strip() for entry in str(spam_result_header[-1]).split("\n")
]
for entry_pos in range(len(spam_entries)):
sep = spam_entries[entry_pos].find("(")
if sep > -1:
spam_entries[entry_pos] = spam_entries[entry_pos][:sep]
spamd_result = SpamdResult(phase)
for header_value, dmarc_result in DmarcCheckResult.get_string_dict().items():
if header_value in spam_entries:
spamd_result.set_dmarc_result(dmarc_result)
for header_value, spf_result in SPFCheckResult.get_string_dict().items():
if header_value in spam_entries:
spamd_result.set_spf_result(spf_result)
cls._store_in_message(spamd_result, msg)
return spamd_result
@classmethod
def _store_in_message(cls, check: SpamdResult, msg: Message):
msg.spamd_check = check
@classmethod
def _get_from_message(cls, msg: Message) -> Optional[SpamdResult]:
return getattr(msg, "spamd_check", None)
@classmethod
def send_to_new_relic(cls, msg: Message):
check = cls._get_from_message(msg)
if check:
newrelic.agent.record_custom_event("SpamdCheck", check.event_data())
else:
newrelic.agent.record_custom_event("SpamdCheck", {"header": "missing"})

View File

@ -3,7 +3,7 @@ import os
import random
import uuid
from email.utils import formataddr
from typing import List, Tuple, Optional, Dict
from typing import List, Tuple, Optional
import arrow
import sqlalchemy as sa
@ -237,75 +237,6 @@ class AuditLogActionEnum(EnumE):
extend_subscription = 7
class Phase(EnumE):
unknown = 0
forward = 1
reply = 2
class DmarcCheckResult(EnumE):
allow = 0
soft_fail = 1
quarantine = 2
reject = 3
not_available = 4
bad_policy = 5
@staticmethod
def get_string_dict():
return {
"DMARC_POLICY_ALLOW": DmarcCheckResult.allow,
"DMARC_POLICY_SOFTFAIL": DmarcCheckResult.soft_fail,
"DMARC_POLICY_QUARANTINE": DmarcCheckResult.quarantine,
"DMARC_POLICY_REJECT": DmarcCheckResult.reject,
"DMARC_NA": DmarcCheckResult.not_available,
"DMARC_BAD_POLICY": DmarcCheckResult.bad_policy,
}
class SPFCheckResult(EnumE):
allow = 0
fail = 1
soft_fail = 1
neutral = 2
temp_error = 3
not_available = 4
perm_error = 5
@staticmethod
def get_string_dict():
return {
"R_SPF_ALLOW": SPFCheckResult.allow,
"R_SPF_FAIL": SPFCheckResult.fail,
"R_SPF_SOFTFAIL": SPFCheckResult.soft_fail,
"R_SPF_NEUTRAL": SPFCheckResult.neutral,
"R_SPF_DNSFAIL": SPFCheckResult.temp_error,
"R_SPF_NA": SPFCheckResult.not_available,
"R_SPF_PERMFAIL": SPFCheckResult.perm_error,
}
class SpamdResult:
def __init__(self, phase: Phase = Phase.unknown):
self.phase: Phase = phase
self.dmarc: DmarcCheckResult = DmarcCheckResult.not_available
self.spf: SPFCheckResult = SPFCheckResult.not_available
def set_dmarc_result(self, dmarc_result: DmarcCheckResult):
self.dmarc = dmarc_result
def set_spf_result(self, spf_result: SPFCheckResult):
self.spf = spf_result
def event_data(self) -> Dict:
return {
"header": "present",
"dmarc": self.dmarc,
"spf": self.spf,
"phase": self.phase,
}
class Hibp(Base, ModelMixin):
__tablename__ = "hibp"
name = sa.Column(sa.String(), nullable=False, unique=True, index=True)

View File

@ -92,6 +92,12 @@ from app.config import (
ALERT_DMARC_FAILED_REPLY_PHASE,
)
from app.db import Session
from app.handler.spamd_result import (
SpamdResult,
Phase,
DmarcCheckResult,
SPFCheckResult,
)
from app.email import status, headers
from app.email.rate_limit import rate_limited
from app.email.spam import get_spam_score
@ -131,7 +137,6 @@ from app.email_utils import (
get_orig_message_from_yahoo_complaint,
get_mailbox_bounce_info,
save_email_for_debugging,
get_spamd_result,
)
from app.errors import (
NonReverseAliasInReplyPhase,
@ -157,9 +162,6 @@ from app.models import (
DeletedAlias,
DomainDeletedAlias,
Notification,
DmarcCheckResult,
SPFCheckResult,
Phase,
)
from app.pgp_utils import PGPException, sign_data_with_pgpy, sign_data
from app.utils import sanitize_email
@ -547,7 +549,7 @@ def handle_email_sent_to_ourself(alias, from_addr: str, msg: Message, user):
def apply_dmarc_policy_for_forward_phase(
alias: Alias, contact: Contact, envelope: Envelope, msg: Message
) -> Optional[str]:
spam_result = get_spamd_result(msg, Phase.forward)
spam_result = SpamdResult.extract_from_headers(msg, Phase.forward)
if not DMARC_CHECK_ENABLED or not spam_result:
return None
@ -640,7 +642,7 @@ def quarantine_dmarc_failed_forward_email(alias, contact, envelope, msg) -> Emai
def apply_dmarc_policy_for_reply_phase(
alias_from: Alias, contact_recipient: Contact, envelope: Envelope, msg: Message
) -> Optional[str]:
spam_result = get_spamd_result(msg, Phase.reply)
spam_result = SpamdResult.extract_from_headers(msg, Phase.reply)
if not DMARC_CHECK_ENABLED or not spam_result:
return None
@ -2657,9 +2659,9 @@ class MailHandler:
return_status = handle(envelope, msg)
elapsed = time.time() - start
# Only bounce messages if the return-path passes the spf check. Otherwise black-hole it.
spamd_result = SpamdResult.extract_from_headers(msg)
if return_status[0] == "5":
spamd_result = get_spamd_result(msg, send_event=False)
if spamd_result and get_spamd_result(msg).spf in (
if spamd_result and spamd_result.spf in (
SPFCheckResult.fail,
SPFCheckResult.soft_fail,
):
@ -2675,6 +2677,8 @@ class MailHandler:
elapsed,
return_status,
)
SpamdResult.send_to_new_relic(msg)
newrelic.agent.record_custom_metric("Custom/email_handler_time", elapsed)
newrelic.agent.record_custom_metric("Custom/number_incoming_email", 1)
return return_status

View File

View File

@ -0,0 +1,34 @@
from app.handler.spamd_result import DmarcCheckResult, SpamdResult
from tests.utils import load_eml_file
def test_dmarc_result_softfail():
msg = load_eml_file("dmarc_gmail_softfail.eml")
assert DmarcCheckResult.soft_fail == SpamdResult.extract_from_headers(msg).dmarc
def test_dmarc_result_quarantine():
msg = load_eml_file("dmarc_quarantine.eml")
assert DmarcCheckResult.quarantine == SpamdResult.extract_from_headers(msg).dmarc
def test_dmarc_result_reject():
msg = load_eml_file("dmarc_reject.eml")
assert DmarcCheckResult.reject == SpamdResult.extract_from_headers(msg).dmarc
def test_dmarc_result_allow():
msg = load_eml_file("dmarc_allow.eml")
assert DmarcCheckResult.allow == SpamdResult.extract_from_headers(msg).dmarc
def test_dmarc_result_na():
msg = load_eml_file("dmarc_na.eml")
assert DmarcCheckResult.not_available == SpamdResult.extract_from_headers(msg).dmarc
def test_dmarc_result_bad_policy():
msg = load_eml_file("dmarc_bad_policy.eml")
assert SpamdResult._get_from_message(msg) is None
assert DmarcCheckResult.bad_policy == SpamdResult.extract_from_headers(msg).dmarc
assert SpamdResult._get_from_message(msg) is not None

View File

@ -36,7 +36,6 @@ from app.email_utils import (
get_orig_message_from_bounce,
get_mailbox_bounce_info,
is_invalid_mailbox_domain,
get_spamd_result,
)
from app.models import (
User,
@ -46,7 +45,6 @@ from app.models import (
EmailLog,
IgnoreBounceSender,
InvalidMailboxDomain,
DmarcCheckResult,
)
# flake8: noqa: E101, W191
@ -793,33 +791,3 @@ def test_is_invalid_mailbox_domain(flask_client):
assert is_invalid_mailbox_domain("sub1.sub2.ab.cd")
assert not is_invalid_mailbox_domain("xy.zt")
def test_dmarc_result_softfail():
msg = load_eml_file("dmarc_gmail_softfail.eml")
assert DmarcCheckResult.soft_fail == get_spamd_result(msg).dmarc
def test_dmarc_result_quarantine():
msg = load_eml_file("dmarc_quarantine.eml")
assert DmarcCheckResult.quarantine == get_spamd_result(msg).dmarc
def test_dmarc_result_reject():
msg = load_eml_file("dmarc_reject.eml")
assert DmarcCheckResult.reject == get_spamd_result(msg).dmarc
def test_dmarc_result_allow():
msg = load_eml_file("dmarc_allow.eml")
assert DmarcCheckResult.allow == get_spamd_result(msg).dmarc
def test_dmarc_result_na():
msg = load_eml_file("dmarc_na.eml")
assert DmarcCheckResult.not_available == get_spamd_result(msg).dmarc
def test_dmarc_result_bad_policy():
msg = load_eml_file("dmarc_bad_policy.eml")
assert DmarcCheckResult.bad_policy == get_spamd_result(msg).dmarc