mirror of
https://blitiri.com.ar/repos/chasquid
synced 2025-12-21 15:17:01 +00:00
When using STARTTLS, the SMTP courier needs to determine whether the server certificates are valid or not. Today, that's implemented via connecting once with full certificate verification, and if that fails, reconnecting with verification disabled. This works okay in practice, but it is slower on insecure servers (due to the reconnection), and some of them even complain because we connect too frequently, causing delivery problems. The latter has only been observed once, on the drv-berlin-brandenburg.de MX servers. To improve on that situation, this patch makes the courier do the TLS connection only once, and uses the verification results directly. The behaviour of the server is otherwise unchanged. The only difference is that when delivering mail to servers that have invalid certificates, we now connect once instead of twice. The tests are expanded to increase coverage for this particular case.
299 lines
8.0 KiB
Go
299 lines
8.0 KiB
Go
package courier
|
|
|
|
import (
|
|
"context"
|
|
"crypto/tls"
|
|
"crypto/x509"
|
|
"flag"
|
|
"net"
|
|
"time"
|
|
|
|
"golang.org/x/net/idna"
|
|
|
|
"blitiri.com.ar/go/chasquid/internal/domaininfo"
|
|
"blitiri.com.ar/go/chasquid/internal/envelope"
|
|
"blitiri.com.ar/go/chasquid/internal/expvarom"
|
|
"blitiri.com.ar/go/chasquid/internal/smtp"
|
|
"blitiri.com.ar/go/chasquid/internal/sts"
|
|
"blitiri.com.ar/go/chasquid/internal/trace"
|
|
)
|
|
|
|
var (
|
|
// Timeouts for SMTP delivery.
|
|
smtpDialTimeout = 1 * time.Minute
|
|
smtpTotalTimeout = 10 * time.Minute
|
|
|
|
// Port for outgoing SMTP.
|
|
// Tests can override this.
|
|
smtpPort = flag.String("testing__outgoing_smtp_port", "25",
|
|
"port to use for outgoing SMTP connections, ONLY FOR TESTING")
|
|
|
|
// Allow overriding of net.LookupMX for testing purposes.
|
|
// TODO: replace this with proper lookup interception once it is supported
|
|
// by Go.
|
|
netLookupMX = net.LookupMX
|
|
)
|
|
|
|
// Exported variables.
|
|
var (
|
|
tlsCount = expvarom.NewMap("chasquid/smtpOut/tlsCount",
|
|
"result", "count of TLS status on outgoing connections")
|
|
slcResults = expvarom.NewMap("chasquid/smtpOut/securityLevelChecks",
|
|
"result", "count of security level checks on outgoing connections")
|
|
|
|
stsSecurityModes = expvarom.NewMap("chasquid/smtpOut/sts/mode",
|
|
"mode", "count of STS checks on outgoing connections")
|
|
stsSecurityResults = expvarom.NewMap("chasquid/smtpOut/sts/security",
|
|
"result", "count of STS security checks on outgoing connections")
|
|
)
|
|
|
|
// SMTP delivers remote mail via outgoing SMTP.
|
|
type SMTP struct {
|
|
HelloDomain string
|
|
Dinfo *domaininfo.DB
|
|
STSCache *sts.PolicyCache
|
|
}
|
|
|
|
// Deliver an email. On failures, returns an error, and whether or not it is
|
|
// permanent.
|
|
func (s *SMTP) Deliver(from string, to string, data []byte) (error, bool) {
|
|
a := &attempt{
|
|
courier: s,
|
|
from: from,
|
|
to: to,
|
|
toDomain: envelope.DomainOf(to),
|
|
data: data,
|
|
tr: trace.New("Courier.SMTP", to),
|
|
}
|
|
defer a.tr.Finish()
|
|
a.tr.Debugf("%s -> %s", from, to)
|
|
|
|
// smtp.Client.Mail will add the <> for us when the address is empty.
|
|
if a.from == "<>" {
|
|
a.from = ""
|
|
}
|
|
|
|
mxs, err, perm := lookupMXs(a.tr, a.toDomain)
|
|
if err != nil || len(mxs) == 0 {
|
|
// Note this is considered a permanent error.
|
|
// This is in line with what other servers (Exim) do. However, the
|
|
// downside is that temporary DNS issues can affect delivery, so we
|
|
// have to make sure we try hard enough on the lookup above.
|
|
return a.tr.Errorf("Could not find mail server: %v", err), perm
|
|
}
|
|
|
|
a.stsPolicy = s.fetchSTSPolicy(a.tr, a.toDomain)
|
|
|
|
for _, mx := range mxs {
|
|
if a.stsPolicy != nil && !a.stsPolicy.MXIsAllowed(mx) {
|
|
a.tr.Printf("%q skipped as per MTA-STA policy", mx)
|
|
continue
|
|
}
|
|
|
|
var permanent bool
|
|
err, permanent = a.deliver(mx)
|
|
if err == nil {
|
|
return nil, false
|
|
}
|
|
if permanent {
|
|
return err, true
|
|
}
|
|
a.tr.Errorf("%q returned transient error: %v", mx, err)
|
|
}
|
|
|
|
// We exhausted all MXs failed to deliver, try again later.
|
|
return a.tr.Errorf("all MXs returned transient failures (last: %v)", err), false
|
|
}
|
|
|
|
type attempt struct {
|
|
courier *SMTP
|
|
|
|
from string
|
|
to string
|
|
data []byte
|
|
|
|
toDomain string
|
|
|
|
stsPolicy *sts.Policy
|
|
|
|
tr *trace.Trace
|
|
}
|
|
|
|
func (a *attempt) deliver(mx string) (error, bool) {
|
|
conn, err := net.DialTimeout("tcp", mx+":"+*smtpPort, smtpDialTimeout)
|
|
if err != nil {
|
|
return a.tr.Errorf("Could not dial: %v", err), false
|
|
}
|
|
defer conn.Close()
|
|
conn.SetDeadline(time.Now().Add(smtpTotalTimeout))
|
|
|
|
c, err := smtp.NewClient(conn, mx)
|
|
if err != nil {
|
|
return a.tr.Errorf("Error creating client: %v", err), false
|
|
}
|
|
|
|
if err = c.Hello(a.courier.HelloDomain); err != nil {
|
|
return a.tr.Errorf("Error saying hello: %v", err), false
|
|
}
|
|
|
|
secLevel := domaininfo.SecLevel_PLAIN
|
|
if ok, _ := c.Extension("STARTTLS"); ok {
|
|
config := &tls.Config{
|
|
ServerName: mx,
|
|
|
|
// Unfortunately, many servers use self-signed and invalid
|
|
// certificates. So we use a custom verification (identical to
|
|
// Go's) to distinguish between invalid and valid certificates.
|
|
// That information is used to track the security level, to
|
|
// prevent downgrade attacks.
|
|
InsecureSkipVerify: true,
|
|
VerifyConnection: func(cs tls.ConnectionState) error {
|
|
secLevel = a.verifyConnection(cs)
|
|
return nil
|
|
},
|
|
}
|
|
err = c.StartTLS(config)
|
|
if err != nil {
|
|
tlsCount.Add("tls:failed", 1)
|
|
return a.tr.Errorf("TLS error: %v", err), false
|
|
}
|
|
} else {
|
|
tlsCount.Add("plain", 1)
|
|
a.tr.Debugf("Insecure - NOT using TLS")
|
|
}
|
|
|
|
if !a.courier.Dinfo.OutgoingSecLevel(a.toDomain, secLevel) {
|
|
// We consider the failure transient, so transient misconfigurations
|
|
// do not affect deliveries.
|
|
slcResults.Add("fail", 1)
|
|
return a.tr.Errorf("Security level check failed (level:%s)", secLevel), false
|
|
}
|
|
slcResults.Add("pass", 1)
|
|
|
|
if a.stsPolicy != nil && a.stsPolicy.Mode == sts.Enforce {
|
|
// The connection MUST be validated by TLS.
|
|
// https://tools.ietf.org/html/rfc8461#section-4.2
|
|
if secLevel != domaininfo.SecLevel_TLS_SECURE {
|
|
stsSecurityResults.Add("fail", 1)
|
|
return a.tr.Errorf("invalid security level (%v) for STS policy",
|
|
secLevel), false
|
|
}
|
|
stsSecurityResults.Add("pass", 1)
|
|
a.tr.Debugf("STS policy: connection is using valid TLS")
|
|
}
|
|
|
|
if err = c.MailAndRcpt(a.from, a.to); err != nil {
|
|
return a.tr.Errorf("MAIL+RCPT %v", err), smtp.IsPermanent(err)
|
|
}
|
|
|
|
w, err := c.Data()
|
|
if err != nil {
|
|
return a.tr.Errorf("DATA %v", err), smtp.IsPermanent(err)
|
|
}
|
|
_, err = w.Write(a.data)
|
|
if err != nil {
|
|
return a.tr.Errorf("DATA writing: %v", err), smtp.IsPermanent(err)
|
|
}
|
|
|
|
err = w.Close()
|
|
if err != nil {
|
|
return a.tr.Errorf("DATA closing %v", err), smtp.IsPermanent(err)
|
|
}
|
|
|
|
_ = c.Quit()
|
|
a.tr.Debugf("done")
|
|
|
|
return nil, false
|
|
}
|
|
|
|
func (a *attempt) verifyConnection(cs tls.ConnectionState) domaininfo.SecLevel {
|
|
// Validate certificates, using the same logic Go does, and following the
|
|
// official example at
|
|
// https://pkg.go.dev/crypto/tls#example-Config-VerifyConnection.
|
|
opts := x509.VerifyOptions{
|
|
DNSName: cs.ServerName,
|
|
Intermediates: x509.NewCertPool(),
|
|
}
|
|
for _, cert := range cs.PeerCertificates[1:] {
|
|
opts.Intermediates.AddCert(cert)
|
|
}
|
|
_, err := cs.PeerCertificates[0].Verify(opts)
|
|
|
|
if err != nil {
|
|
// Invalid TLS cert, since it could not be verified.
|
|
a.tr.Debugf("Insecure - using TLS, but with an invalid cert")
|
|
tlsCount.Add("tls:insecure", 1)
|
|
return domaininfo.SecLevel_TLS_INSECURE
|
|
} else {
|
|
tlsCount.Add("tls:secure", 1)
|
|
a.tr.Debugf("Secure - using TLS")
|
|
return domaininfo.SecLevel_TLS_SECURE
|
|
}
|
|
}
|
|
|
|
func (s *SMTP) fetchSTSPolicy(tr *trace.Trace, domain string) *sts.Policy {
|
|
if s.STSCache == nil {
|
|
return nil
|
|
}
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
|
|
defer cancel()
|
|
|
|
policy, err := s.STSCache.Fetch(ctx, domain)
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
|
|
tr.Debugf("got STS policy")
|
|
stsSecurityModes.Add(string(policy.Mode), 1)
|
|
|
|
return policy
|
|
}
|
|
|
|
func lookupMXs(tr *trace.Trace, domain string) ([]string, error, bool) {
|
|
domain, err := idna.ToASCII(domain)
|
|
if err != nil {
|
|
return nil, err, true
|
|
}
|
|
|
|
mxs := []string{}
|
|
|
|
mxRecords, err := netLookupMX(domain)
|
|
if err != nil {
|
|
// There was an error. It could be that the domain has no MX, in which
|
|
// case we have to fall back to A, or a bigger problem.
|
|
dnsErr, ok := err.(*net.DNSError)
|
|
if !ok {
|
|
tr.Debugf("Error resolving MX on %q: %v", domain, err)
|
|
return nil, err, false
|
|
} else if dnsErr.IsNotFound {
|
|
// MX not found, fall back to A.
|
|
tr.Debugf("MX for %s not found, falling back to A", domain)
|
|
mxs = []string{domain}
|
|
} else {
|
|
tr.Debugf("MX lookup error on %q: %v", domain, dnsErr)
|
|
return nil, err, !dnsErr.Temporary()
|
|
}
|
|
|
|
} else {
|
|
// Convert the DNS records to a plain string slice. They're already
|
|
// sorted by priority.
|
|
for _, r := range mxRecords {
|
|
mxs = append(mxs, r.Host)
|
|
}
|
|
}
|
|
|
|
// Note that mxs could be empty; in that case we do NOT fall back to A.
|
|
// This case is explicitly covered by the SMTP RFC.
|
|
// https://tools.ietf.org/html/rfc5321#section-5.1
|
|
|
|
// Cap the list of MXs to 5 hosts, to keep delivery attempt times
|
|
// sane and prevent abuse.
|
|
if len(mxs) > 5 {
|
|
mxs = mxs[:5]
|
|
}
|
|
|
|
tr.Debugf("MXs: %v", mxs)
|
|
return mxs, nil, true
|
|
}
|