1
0
mirror of https://blitiri.com.ar/repos/chasquid synced 2025-12-18 14:47:03 +00:00

smtp: Try all entries in MX, not just the first one

Currently, we pick the first host in the MX list, and attempt delivery
there. If it fails, we just report the failure to the queue, which will
wait for some time and then try again.

This is not ideal: we should fall back to the other MXs in the list, as
the first host could be having issues for a long time, and not
attempting with the rest just delays delivery.

This patch implements the fallback, so we try all MXs before deciding to
report a failed delivery (unless, of course, an MX returned a permanent
failure).
This commit is contained in:
Alberto Bertogli
2017-02-26 03:26:07 +00:00
parent c2ea8a8ef0
commit b8551729db
2 changed files with 111 additions and 59 deletions

View File

@@ -27,7 +27,7 @@ var (
"port to use for outgoing SMTP connections, ONLY FOR TESTING")
// Fake MX records, used for testing only.
fakeMX = map[string]string{}
fakeMX = map[string][]string{}
)
// Exported variables.
@@ -42,31 +42,72 @@ type SMTP struct {
}
func (s *SMTP) Deliver(from string, to string, data []byte) (error, bool) {
tr := trace.New("Courier.SMTP", to)
defer tr.Finish()
tr.Debugf("%s -> %s", from, to)
a := &attempt{
courier: s,
from: from,
to: to,
data: data,
tr: trace.New("Courier.SMTP", to),
}
defer a.tr.Finish()
a.tr.Debugf("%s -> %s", from, to)
// smtp.Client.Mail will add the <> for us when the address is empty.
if a.from == "<>" {
a.from = ""
}
toDomain := envelope.DomainOf(to)
mx, err := lookupMX(tr, toDomain)
mxs, err := lookupMXs(a.tr, toDomain)
if err != nil {
// Note this is considered a permanent error.
// This is in line with what other servers (Exim) do. However, the
// downside is that temporary DNS issues can affect delivery, so we
// have to make sure we try hard enough on the lookup above.
return tr.Errorf("Could not find mail server: %v", err), true
return a.tr.Errorf("Could not find mail server: %v", err), true
}
// Issue an EHLO with a valid domain; otherwise, some servers like postfix
// will complain.
helloDomain, err := idna.ToASCII(envelope.DomainOf(from))
a.helloDomain, err = idna.ToASCII(envelope.DomainOf(from))
if err != nil {
return tr.Errorf("Sender domain not IDNA compliant: %v", err), true
return a.tr.Errorf("Sender domain not IDNA compliant: %v", err), true
}
if helloDomain == "" {
if a.helloDomain == "" {
// This can happen when sending bounces. Last resort.
helloDomain, _ = os.Hostname()
a.helloDomain, _ = os.Hostname()
}
for _, mx := range mxs {
var permanent bool
err, permanent = a.deliver(mx)
if err == nil {
return nil, false
}
if permanent {
return err, true
}
a.tr.Errorf("%q returned transient error: %v", mx, err)
}
// We exhausted all MXs failed to deliver, try again later.
return a.tr.Errorf("all MXs returned transient failures (last: %v)", err), false
}
type attempt struct {
courier *SMTP
from string
to string
data []byte
toDomain string
helloDomain string
tr *trace.Trace
}
func (a *attempt) deliver(mx string) (error, bool) {
// Do we use insecure TLS?
// Set as fallback when retrying.
insecure := false
@@ -75,18 +116,18 @@ func (s *SMTP) Deliver(from string, to string, data []byte) (error, bool) {
retry:
conn, err := net.DialTimeout("tcp", mx+":"+*smtpPort, smtpDialTimeout)
if err != nil {
return tr.Errorf("Could not dial: %v", err), false
return a.tr.Errorf("Could not dial: %v", err), false
}
defer conn.Close()
conn.SetDeadline(time.Now().Add(smtpTotalTimeout))
c, err := smtp.NewClient(conn, mx)
if err != nil {
return tr.Errorf("Error creating client: %v", err), false
return a.tr.Errorf("Error creating client: %v", err), false
}
if err = c.Hello(helloDomain); err != nil {
return tr.Errorf("Error saying hello: %v", err), false
if err = c.Hello(a.helloDomain); err != nil {
return a.tr.Errorf("Error saying hello: %v", err), false
}
if ok, _ := c.Extension("STARTTLS"); ok {
@@ -100,101 +141,110 @@ retry:
// fail verification we just try again without validating.
if insecure {
tlsCount.Add("tls:failed", 1)
return tr.Errorf("TLS error: %v", err), false
return a.tr.Errorf("TLS error: %v", err), false
}
insecure = true
tr.Debugf("TLS error, retrying insecurely")
a.tr.Debugf("TLS error, retrying insecurely")
goto retry
}
if config.InsecureSkipVerify {
tr.Debugf("Insecure - using TLS, but cert does not match %s", mx)
a.tr.Debugf("Insecure - using TLS, but cert does not match %s", mx)
tlsCount.Add("tls:insecure", 1)
secLevel = domaininfo.SecLevel_TLS_INSECURE
} else {
tlsCount.Add("tls:secure", 1)
tr.Debugf("Secure - using TLS")
a.tr.Debugf("Secure - using TLS")
secLevel = domaininfo.SecLevel_TLS_SECURE
}
} else {
tlsCount.Add("plain", 1)
tr.Debugf("Insecure - NOT using TLS")
a.tr.Debugf("Insecure - NOT using TLS")
}
if toDomain != "" && !s.Dinfo.OutgoingSecLevel(toDomain, secLevel) {
if a.toDomain != "" && !a.courier.Dinfo.OutgoingSecLevel(a.toDomain, secLevel) {
// We consider the failure transient, so transient misconfigurations
// do not affect deliveries.
slcResults.Add("fail", 1)
return tr.Errorf("Security level check failed (level:%s)", secLevel), false
return a.tr.Errorf("Security level check failed (level:%s)", secLevel), false
}
slcResults.Add("pass", 1)
// c.Mail will add the <> for us when the address is empty.
if from == "<>" {
from = ""
}
if err = c.MailAndRcpt(from, to); err != nil {
return tr.Errorf("MAIL+RCPT %v", err), smtp.IsPermanent(err)
if err = c.MailAndRcpt(a.from, a.to); err != nil {
return a.tr.Errorf("MAIL+RCPT %v", err), smtp.IsPermanent(err)
}
w, err := c.Data()
if err != nil {
return tr.Errorf("DATA %v", err), smtp.IsPermanent(err)
return a.tr.Errorf("DATA %v", err), smtp.IsPermanent(err)
}
_, err = w.Write(data)
_, err = w.Write(a.data)
if err != nil {
return tr.Errorf("DATA writing: %v", err), smtp.IsPermanent(err)
return a.tr.Errorf("DATA writing: %v", err), smtp.IsPermanent(err)
}
err = w.Close()
if err != nil {
return tr.Errorf("DATA closing %v", err), smtp.IsPermanent(err)
return a.tr.Errorf("DATA closing %v", err), smtp.IsPermanent(err)
}
c.Quit()
tr.Debugf("done")
a.tr.Debugf("done")
return nil, false
}
func lookupMX(tr *trace.Trace, domain string) (string, error) {
func lookupMXs(tr *trace.Trace, domain string) ([]string, error) {
if v, ok := fakeMX[domain]; ok {
return v, nil
}
domain, err := idna.ToASCII(domain)
if err != nil {
return "", err
return nil, err
}
mxs, err := net.LookupMX(domain)
if err == nil {
if len(mxs) == 0 {
tr.Debugf("domain %q has no MX, falling back to A", domain)
return domain, nil
}
tr.Debugf("MX %s", mxs[0].Host)
return mxs[0].Host, nil
}
mxs := []string{}
mxRecords, err := net.LookupMX(domain)
if err != nil {
// There was an error. It could be that the domain has no MX, in which
// case we have to fall back to A, or a bigger problem.
// Unfortunately, go's API doesn't let us easily distinguish between them.
// For now, if the error is permanent, we assume it's because there was no
// MX and fall back, otherwise we return.
// Unfortunately, go's API doesn't let us easily distinguish between
// them. For now, if the error is permanent, we assume it's because
// there was no MX and fall back, otherwise we return.
// TODO: Find a better way to do this.
dnsErr, ok := err.(*net.DNSError)
if !ok {
tr.Debugf("MX lookup error: %v", err)
return "", err
return nil, err
} else if dnsErr.Temporary() {
tr.Debugf("temporary DNS error: %v", dnsErr)
return "", err
return nil, err
}
// Permanent error, we assume MX does not exist and fall back to A.
tr.Debugf("failed to resolve MX for %s, falling back to A", domain)
return domain, nil
mxs = []string{domain}
} else {
// Convert the DNS records to a plain string slice. They're already
// sorted by priority.
for _, r := range mxRecords {
mxs = append(mxs, r.Host)
}
}
// Note that mxs could be empty; in that case we do NOT fall back to A.
// This case is explicitly covered by the SMTP RFC.
// https://tools.ietf.org/html/rfc5321#section-5.1
// Cap the list of MXs to 5 hosts, to keep delivery attempt times sane
// and prevent abuse.
if len(mxs) > 5 {
mxs = mxs[:5]
}
tr.Debugf("MXs: %v", mxs)
return mxs, nil
}

View File

@@ -87,7 +87,9 @@ func TestSMTP(t *testing.T) {
addr := fakeServer(t, responses)
host, port, _ := net.SplitHostPort(addr)
fakeMX["to"] = host
// Put a non-existing host first, so we check that if the first host
// doesn't work, we try with the rest.
fakeMX["to"] = []string{"nonexistinghost", host}
*smtpPort = port
s, tmpDir := newSMTP(t)
@@ -148,7 +150,7 @@ func TestSMTPErrors(t *testing.T) {
addr := fakeServer(t, rs)
host, port, _ := net.SplitHostPort(addr)
fakeMX["to"] = host
fakeMX["to"] = []string{host}
*smtpPort = port
s, tmpDir := newSMTP(t)