1
0
mirror of https://blitiri.com.ar/repos/chasquid synced 2025-12-18 14:47:03 +00:00

courier/smtp: Retry over plaintext on STARTTLS errors

When the SMTP courier gets an error on STARTTLS (either because the
command itself failed, or because there was a low-level TLS negotiation
error), today we just fail that attempt.

This can cause messages to never be delivered if the underlying reason
is a server misconfiguration (e.g. a server certificate that Go cannot
parse). This is quite rare in practice, but it can happen.

To prevent this situation, this patch adds logic so that the SMTP
courier retries over plaintext when STARTTLS fails.

This is still subject to security level checks, so this type of failures
cannot be used to downgrade connections to domains we successfully
established a TLS connection previously.

Note that certificate validation issues are NOT included in this
type of failure, so they will not trigger a retry. The certificate
validation handling is unchanged by this patch.
This commit is contained in:
Alberto Bertogli
2023-03-03 09:51:48 +00:00
parent 1927e15ea2
commit fd9c6a748b
3 changed files with 61 additions and 22 deletions

View File

@@ -20,14 +20,16 @@ type FakeServer struct {
responses map[string]string responses map[string]string
wg *sync.WaitGroup wg *sync.WaitGroup
addr string addr string
conns int
tlsConfig *tls.Config tlsConfig *tls.Config
} }
func newFakeServer(t *testing.T, responses map[string]string) *FakeServer { func newFakeServer(t *testing.T, responses map[string]string, conns int) *FakeServer {
s := &FakeServer{ s := &FakeServer{
t: t, t: t,
tmpDir: testlib.MustTempDir(t), tmpDir: testlib.MustTempDir(t),
responses: responses, responses: responses,
conns: conns,
wg: &sync.WaitGroup{}, wg: &sync.WaitGroup{},
} }
s.start() s.start()
@@ -82,11 +84,10 @@ func (s *FakeServer) start() string {
s.initTLS() s.initTLS()
s.wg.Add(1) s.wg.Add(s.conns)
go func() { accept := func() {
defer s.wg.Done() defer s.wg.Done()
defer l.Close()
c, err := l.Accept() c, err := l.Accept()
if err != nil { if err != nil {
@@ -134,7 +135,11 @@ func (s *FakeServer) start() string {
c.Write([]byte(s.responses["_DATA"])) c.Write([]byte(s.responses["_DATA"]))
} }
} }
}() }
for i := 0; i < s.conns; i++ {
go accept()
}
return s.addr return s.addr
} }

View File

@@ -120,6 +120,8 @@ type attempt struct {
} }
func (a *attempt) deliver(mx string) (error, bool) { func (a *attempt) deliver(mx string) (error, bool) {
skipTLS := false
retry:
conn, err := net.DialTimeout("tcp", mx+":"+*smtpPort, smtpDialTimeout) conn, err := net.DialTimeout("tcp", mx+":"+*smtpPort, smtpDialTimeout)
if err != nil { if err != nil {
return a.tr.Errorf("Could not dial: %v", err), false return a.tr.Errorf("Could not dial: %v", err), false
@@ -137,7 +139,7 @@ func (a *attempt) deliver(mx string) (error, bool) {
} }
secLevel := domaininfo.SecLevel_PLAIN secLevel := domaininfo.SecLevel_PLAIN
if ok, _ := c.Extension("STARTTLS"); ok { if ok, _ := c.Extension("STARTTLS"); ok && !skipTLS {
config := &tls.Config{ config := &tls.Config{
ServerName: mx, ServerName: mx,
@@ -155,8 +157,21 @@ func (a *attempt) deliver(mx string) (error, bool) {
err = c.StartTLS(config) err = c.StartTLS(config)
if err != nil { if err != nil {
// If we could not complete a jump to TLS (either because the
// STARTTLS command itself failed server-side, or because we got a
// TLS negotiation error), retry but without trying to use TLS.
// This should be quite rare, but it can happen if the server
// certificate is not parseable by the Go library, or if it has a
// broken TLS stack.
// Note that invalid and self-signed certs do NOT fall in this
// category, those are handled by the VerifyConnection function
// above, and don't need a retry. This is only needed for lower
// level errors.
tlsCount.Add("tls:failed", 1) tlsCount.Add("tls:failed", 1)
return a.tr.Errorf("TLS error: %v", err), false a.tr.Errorf("TLS error, retrying without TLS: %v", err)
skipTLS = true
conn.Close()
goto retry
} }
} else { } else {
tlsCount.Add("plain", 1) tlsCount.Add("plain", 1)

View File

@@ -51,7 +51,7 @@ func TestSMTP(t *testing.T) {
"_DATA": "250 data ok\n", "_DATA": "250 data ok\n",
"QUIT": "250 quit ok\n", "QUIT": "250 quit ok\n",
} }
srv := newFakeServer(t, responses) srv := newFakeServer(t, responses, 1)
defer srv.Cleanup() defer srv.Cleanup()
host, port := srv.HostPort() host, port := srv.HostPort()
@@ -124,7 +124,7 @@ func TestSMTPErrors(t *testing.T) {
} }
for _, rs := range responses { for _, rs := range responses {
srv := newFakeServer(t, rs) srv := newFakeServer(t, rs, 1)
defer srv.Cleanup() defer srv.Cleanup()
host, port := srv.HostPort() host, port := srv.HostPort()
@@ -257,7 +257,7 @@ var tlsResponses = map[string]string{
func TestTLS(t *testing.T) { func TestTLS(t *testing.T) {
smtpTotalTimeout = 5 * time.Second smtpTotalTimeout = 5 * time.Second
srv := newFakeServer(t, tlsResponses) srv := newFakeServer(t, tlsResponses, 1)
defer srv.Cleanup() defer srv.Cleanup()
_, *smtpPort = srv.HostPort() _, *smtpPort = srv.HostPort()
@@ -285,7 +285,7 @@ func TestTLS(t *testing.T) {
"_DATA": "250 data ok\n", "_DATA": "250 data ok\n",
"QUIT": "250 quit ok\n", "QUIT": "250 quit ok\n",
} }
srv = newFakeServer(t, responses) srv = newFakeServer(t, responses, 1)
defer srv.Cleanup() defer srv.Cleanup()
_, *smtpPort = srv.HostPort() _, *smtpPort = srv.HostPort()
@@ -305,12 +305,27 @@ func TestTLSError(t *testing.T) {
smtpTotalTimeout = 5 * time.Second smtpTotalTimeout = 5 * time.Second
responses := map[string]string{ responses := map[string]string{
"_welcome": "220 welcome\n", "_welcome": "220 welcome\n",
// STARTTLS should be advertised so we try to initiate it.
"EHLO hello": "250-ehlo ok\n250 STARTTLS\n", "EHLO hello": "250-ehlo ok\n250 STARTTLS\n",
"STARTTLS": "500 starttls err\n",
"_STARTTLS": "no", // Error in STARTTLS request. Note that a TLS-layer error also falls
// under this code path, so both situations are covered by this test.
"STARTTLS": "500 starttls err\n",
"_STARTTLS": "no",
// Rest of the transaction is normal and straightforward.
"MAIL FROM:<me@me>": "250 mail ok\n",
"RCPT TO:<to@to>": "250 rcpt ok\n",
"DATA": "354 send data\n",
"_DATA": "250 data ok\n",
"QUIT": "250 quit ok\n",
} }
srv := newFakeServer(t, responses) // Note we expect 2 connections to the fake server (because of the retry
// after the failed STARTTLS). Note this also checks that we correctly
// close the errored connection, instead of leaving it lingering.
srv := newFakeServer(t, responses, 2)
defer srv.Cleanup() defer srv.Cleanup()
_, *smtpPort = srv.HostPort() _, *smtpPort = srv.HostPort()
@@ -320,12 +335,16 @@ func TestTLSError(t *testing.T) {
s, tmpDir := newSMTP(t) s, tmpDir := newSMTP(t)
defer testlib.RemoveIfOk(t, tmpDir) defer testlib.RemoveIfOk(t, tmpDir)
err, permanent := s.Deliver("me@me", "to@to", []byte("data")) err, _ := s.Deliver("me@me", "to@to", []byte("data"))
if !strings.Contains(err.Error(), "TLS error:") { if err != nil {
t.Errorf("expected TLS error, got: %v", err) t.Errorf("deliver failed: %v", err)
} }
if permanent != false {
t.Errorf("expected transient failure, got permanent") // Double check that we delivered over a plaintext connection.
tr := trace.New("test", "test")
defer tr.Finish()
if !s.Dinfo.OutgoingSecLevel(tr, "to", domaininfo.SecLevel_PLAIN) {
t.Errorf("delivery did not took place over plaintext as expected")
} }
srv.Wait() srv.Wait()
@@ -333,7 +352,7 @@ func TestTLSError(t *testing.T) {
func TestSTSPolicyEnforcement(t *testing.T) { func TestSTSPolicyEnforcement(t *testing.T) {
smtpTotalTimeout = 5 * time.Second smtpTotalTimeout = 5 * time.Second
srv := newFakeServer(t, tlsResponses) srv := newFakeServer(t, tlsResponses, 1)
defer srv.Cleanup() defer srv.Cleanup()
_, *smtpPort = srv.HostPort() _, *smtpPort = srv.HostPort()
@@ -372,7 +391,7 @@ func TestSTSPolicyEnforcement(t *testing.T) {
// Do another delivery attempt, but this time we trust the server cert. // Do another delivery attempt, but this time we trust the server cert.
// This time it should be successful, because the connection level should // This time it should be successful, because the connection level should
// be TLS_SECURE which is required by the STS policy. // be TLS_SECURE which is required by the STS policy.
srv = newFakeServer(t, tlsResponses) srv = newFakeServer(t, tlsResponses, 1)
_, *smtpPort = srv.HostPort() _, *smtpPort = srv.HostPort()
defer srv.Cleanup() defer srv.Cleanup()