smtpsrv: Strict CRLF enforcement in DATA contents

The RFCs are very clear that in DATA contents: > CR and LF MUST only occur together as CRLF; they MUST NOT appear > independently in the body. https://www.rfc-editor.org/rfc/rfc5322#section-2.3 https://www.rfc-editor.org/rfc/rfc5321#section-2.3.8 Allowing "independent" CR and LF can cause a number of problems. In particular, there is a new "SMTP smuggling attack" published recently that involves the server incorrectly parsing the end of DATA marker `\r\n.\r\n`, which an attacker can exploit to impersonate a server when email is transmitted server-to-server. https://www.postfix.org/smtp-smuggling.html https://sec-consult.com/blog/detail/smtp-smuggling-spoofing-e-mails-worldwide/ Currently, chasquid is vulnerable to this attack, because Go's standard libraries net/textproto and net/mail do not enforce CRLF strictly. This patch fixes the problem by introducing a new "dot reader" function that strictly enforces CRLF when reading dot-terminated data, used in the DATA input processing. When an invalid newline terminator is found, the connection is aborted immediately because we cannot safely recover from that state. We still keep the internal representation as LF-terminated for convenience and simplicity. However, the MDA courier is changed to pass CRLF-terminated lines, since that is an external program which could be strict when receiving email messages. See https://github.com/albertito/chasquid/issues/47 for more details and discussion.
2026-02-07 22:25:57 +00:00 · 2023-12-23 02:38:07 +00:00
parent e03594a2c7
commit a996106eee
15 changed files with 431 additions and 86 deletions
--- a/internal/courier/mda.go
+++ b/internal/courier/mda.go
@@ -11,6 +11,7 @@ import (
 	"unicode"
 	"blitiri.com.ar/go/chasquid/internal/envelope"
 	"blitiri.com.ar/go/chasquid/internal/normalize"
 	"blitiri.com.ar/go/chasquid/internal/trace"
 )
@@ -60,7 +61,12 @@ func (p *MDA) Deliver(from string, to string, data []byte) (error, bool) {
 	ctx, cancel := context.WithTimeout(context.Background(), p.Timeout)
 	defer cancel()
 	cmd := exec.CommandContext(ctx, p.Binary, args...)
-	cmd.Stdin = bytes.NewReader(data)
+
 	// Pass the email data via stdin. Normalize it to CRLF which is what the
 	// RFC-compliant representation require. By doing this at this end, we can
 	// keep a simpler internal representation and ensure there won't be any
 	// inconsistencies in newlines within the message (e.g. added headers).
 	cmd.Stdin = bytes.NewReader(normalize.ToCRLF(data))
 	output, err := cmd.CombinedOutput()
 	if ctx.Err() == context.DeadlineExceeded {
--- a/internal/normalize/normalize.go
+++ b/internal/normalize/normalize.go
@@ -3,6 +3,7 @@
 package normalize
 import (
 	"bytes"
 	"strings"
 	"blitiri.com.ar/go/chasquid/internal/envelope"
@@ -72,3 +73,23 @@ func DomainToUnicode(addr string) (string, error) {
 	domain, err := Domain(domain)
 	return user + "@" + domain, err
 }
 // ToCRLF converts the given buffer to CRLF line endings. If a line has a
 // preexisting CRLF, it leaves it be. It assumes that CR is never used on its
 // own.
 func ToCRLF(in []byte) []byte {
 	b := bytes.NewBuffer(nil)
 	b.Grow(len(in))
 	for _, c := range in {
 		switch c {
 		case '\r':
 			// Ignore CR, we'll add it back later. It should never appear
 			// alone in the contexts where this function is used.
 		case '\n':
 			b.Write([]byte("\r\n"))
 		default:
 			b.WriteByte(c)
 		}
 	}
 	return b.Bytes()
 }
--- a/internal/normalize/normalize_test.go
+++ b/internal/normalize/normalize_test.go
@@ -129,6 +129,22 @@ func TestDomainToUnicode(t *testing.T) {
 	}
 }
 func TestToCRLF(t *testing.T) {
 	cases := []struct {
 		in, out string
 	}{
 		{"", ""},
 		{"a\nb", "a\r\nb"},
 		{"a\r\nb", "a\r\nb"},
 	}
 	for _, c := range cases {
 		got := string(ToCRLF([]byte(c.in)))
 		if got != c.out {
 			t.Errorf("ToCRLF(%q) = %q, expected %q", c.in, got, c.out)
 		}
 	}
 }
 func FuzzUser(f *testing.F) {
 	f.Fuzz(func(t *testing.T, user string) {
 		User(user)
--- a/internal/smtpsrv/conn.go
+++ b/internal/smtpsrv/conn.go
@@ -11,7 +11,6 @@ import (
 	"math/rand"
 	"net"
 	"net/mail"
 	"net/textproto"
 	"os"
 	"os/exec"
 	"strconv"
@@ -312,6 +311,12 @@ loop:
 			if err != nil {
 				break
 			}
 		} else if code < 0 {
 			// Negative code means that we have to break the connection.
 			// TODO: This is hacky, it's probably worth it at this point to
 			// refactor this into using a custom response type.
 			c.tr.Errorf("%s closed the connection: %s", cmd, msg)
 			break
 		}
 	}
@@ -638,19 +643,19 @@ func (c *Conn) DATA(params string) (code int, msg string) {
 	// one, we don't want the command timeout to interfere.
 	c.conn.SetDeadline(c.deadline)
-	// Create a dot reader, limited to the maximum size.
+	// Read the data. Enforce CRLF correctness, and maximum size.
-	dotr := textproto.NewReader(bufio.NewReader(
+	c.data, err = readUntilDot(c.reader, c.maxDataSize)
 		io.LimitReader(c.reader, c.maxDataSize))).DotReader()
 	c.data, err = io.ReadAll(dotr)
 	if err != nil {
-		if err == io.ErrUnexpectedEOF {
+		if err == errMessageTooLarge {
-			// Message is too big already. But we need to keep reading until we see
+			// Message is too big; excess data has already been discarded.
 			// the "\r\n.\r\n", otherwise we will treat the remanent data that
 			// the user keeps sending as commands, and that's a security
 			// issue.
 			readUntilDot(c.reader)
 			return 552, "5.3.4 Message too big"
 		}
 		if err == errInvalidLineEnding {
 			// We can't properly recover from this, so we have to drop the
 			// connection.
 			c.writeResponse(521, "5.5.2 Error reading DATA: invalid line ending")
 			return -1, "Invalid line ending, closing connection"
 		}
 		return 554, fmt.Sprintf("5.4.0 Error reading DATA: %v", err)
 	}
@@ -952,24 +957,6 @@ func boolToStr(b bool) string {
 	return "0"
 }
 func readUntilDot(r *bufio.Reader) {
 	prevMore := false
 	for {
 		// The reader will not read more than the size of the buffer,
 		// so this doesn't cause increased memory consumption.
 		// The reader's data deadline will prevent this from continuing
 		// forever.
 		l, more, err := r.ReadLine()
 		if err != nil {
 			break
 		}
 		if !more && !prevMore && string(l) == "." {
 			break
 		}
 		prevMore = more
 	}
 }
 // STARTTLS SMTP command handler.
 func (c *Conn) STARTTLS(params string) (code int, msg string) {
 	if c.onTLS {
--- a/internal/smtpsrv/conn_test.go
+++ b/internal/smtpsrv/conn_test.go
@@ -1,10 +1,8 @@
 package smtpsrv
 import (
 	"bufio"
 	"net"
 	"os"
 	"strings"
 	"testing"
 	"blitiri.com.ar/go/chasquid/internal/domaininfo"
@@ -87,57 +85,6 @@ func TestIsHeader(t *testing.T) {
 	}
 }
 func TestReadUntilDot(t *testing.T) {
 	// This must be > than the minimum buffer size for bufio.Reader, which
 	// unfortunately is not available to us. The current value is 16, these
 	// tests will break if it gets increased, and the nonfinal cases will need
 	// to be adjusted.
 	size := 20
 	xs := "12345678901234567890"
 	final := []string{
 		"", ".", "..",
 		".\r\n", "\r\n.", "\r\n.\r\n",
 		".\n", "\n.", "\n.\n",
 		".\r", "\r.", "\r.\r",
 		xs + "\r\n.\r\n",
 		xs + "1234\r\n.\r\n",
 		xs + xs + "\r\n.\r\n",
 		xs + xs + xs + "\r\n.\r\n",
 		xs + "." + xs + "\n.",
 		xs + ".\n" + xs + "\n.",
 	}
 	for _, s := range final {
 		t.Logf("testing %q", s)
 		buf := bufio.NewReaderSize(strings.NewReader(s), size)
 		readUntilDot(buf)
 		if r := buf.Buffered(); r != 0 {
 			t.Errorf("%q: there are %d remaining bytes", s, r)
 		}
 	}
 	nonfinal := []struct {
 		s string
 		r int
 	}{
 		{".\na", 1},
 		{"\n.\na", 1},
 		{"\n.\nabc", 3},
 		{"\n.\n12345678", 8},
 		{"\n.\n" + xs, size - 3},
 		{"\n.\n" + xs + xs, size - 3},
 		{"\n.\n.\n", 2},
 	}
 	for _, c := range nonfinal {
 		t.Logf("testing %q", c.s)
 		buf := bufio.NewReaderSize(strings.NewReader(c.s), size)
 		readUntilDot(buf)
 		if r := buf.Buffered(); r != c.r {
 			t.Errorf("%q: expected %d remaining bytes, got %d", c.s, c.r, r)
 		}
 	}
 }
 func TestAddrLiteral(t *testing.T) {
 	// TCP addresses.
 	casesTCP := []struct {
--- a/internal/smtpsrv/dotreader.go
+++ b/internal/smtpsrv/dotreader.go
@@ -0,0 +1,111 @@
 package smtpsrv
 import (
 	"bufio"
 	"bytes"
 	"errors"
 	"io"
 )
 var (
 	// TODO: Include the line number and specific error, and have the
 	// caller add them to the trace.
 	errMessageTooLarge   = errors.New("message too large")
 	errInvalidLineEnding = errors.New("invalid line ending")
 )
 // readUntilDot reads from r until it encounters a dot-terminated line, or we
 // read max bytes. It enforces that input lines are terminated by "\r\n", and
 // that there are not "lonely" "\r" or "\n"s in the input.
 // It returns \n-terminated lines, which is what we use for our internal
 // representation for convenience (same as textproto DotReader does).
 func readUntilDot(r *bufio.Reader, max int64) ([]byte, error) {
 	buf := make([]byte, 0, 1024)
 	n := int64(0)
 	// Little state machine.
 	const (
 		prevOther = iota
 		prevCR
 		prevCRLF
 	)
 	// Start as if we just came from a '\r\n'; that way we avoid the need
 	// for special-casing the dot-stuffing at the very beginning.
 	prev := prevCRLF
 	last4 := make([]byte, 4)
 	skip := false
 loop:
 	for {
 		b, err := r.ReadByte()
 		if err == io.EOF {
 			return buf, io.ErrUnexpectedEOF
 		} else if err != nil {
 			return buf, err
 		}
 		n++
 		switch b {
 		case '\r':
 			if prev == prevCR {
 				return buf, errInvalidLineEnding
 			}
 			prev = prevCR
 			// We return a LF-terminated line, so skip the CR. This simplifies
 			// internal representation and makes it easier/less error prone to
 			// work with. It is converted back to CRLF on endpoints (e.g. in
 			// the couriers).
 			skip = true
 		case '\n':
 			if prev != prevCR {
 				return buf, errInvalidLineEnding
 			}
 			// If we come from a '\r\n.\r', we're done.
 			if bytes.Equal(last4, []byte("\r\n.\r")) {
 				break loop
 			}
 			// If we are only starting and see ".\r\n", we're also done; in
 			// that case the message is empty.
 			if n == 3 && bytes.Equal(last4, []byte("\x00\x00.\r")) {
 				return []byte{}, nil
 			}
 			prev = prevCRLF
 		default:
 			if prev == prevCR {
 				return buf, errInvalidLineEnding
 			}
 			if b == '.' && prev == prevCRLF {
 				// We come from "\r\n" and got a "."; as per dot-stuffing
 				// rules, we should skip that '.' in the output.
 				// https://www.rfc-editor.org/rfc/rfc5321#section-4.5.2
 				skip = true
 			}
 			prev = prevOther
 		}
 		// Keep the last 4 bytes separately, because they may not be in buf on
 		// messages that are too large.
 		copy(last4, last4[1:])
 		last4[3] = b
 		if int64(len(buf)) < max && !skip {
 			buf = append(buf, b)
 		}
 		skip = false
 	}
 	// Return an error if the message is too large. It is important to do this
 	// _outside_ the loop, because we need to keep reading until we get to the
 	// final "." before we return an error, so the SMTP dialog can continue
 	// properly after that.
 	// If we return too early, the remainder of the email is interpreted as
 	// part of the SMTP dialog (and exposing ourselves to smuggling attacks).
 	if n > max {
 		return buf, errMessageTooLarge
 	}
 	// If we made it this far, buf naturally ends in "\n" because we skipped
 	// the '.' due to dot-stuffing, and skip "\r"s.
 	return buf, nil
 }
--- a/internal/smtpsrv/dotreader_test.go
+++ b/internal/smtpsrv/dotreader_test.go
@@ -0,0 +1,89 @@
 package smtpsrv
 import (
 	"bufio"
 	"bytes"
 	"io"
 	"strings"
 	"testing"
 )
 func TestReadUntilDot(t *testing.T) {
 	cases := []struct {
 		input   string
 		max     int64
 		want    string
 		wantErr error
 	}{
 		// EOF before any input -> unexpected EOF.
 		{"", 0, "", io.ErrUnexpectedEOF},
 		{"", 1, "", io.ErrUnexpectedEOF},
 		// EOF after exceeding max -> unexpected EOF.
 		{"abcdef", 2, "ab", io.ErrUnexpectedEOF},
 		// \n at the beginning of the buffer are just as invalid, and the
 		// error takes precedence over the unexpected EOF.
 		{"\n", 0, "", errInvalidLineEnding},
 		{"\n", 1, "", errInvalidLineEnding},
 		{"\n", 2, "", errInvalidLineEnding},
 		{"\n\r\n.\r\n", 10, "", errInvalidLineEnding},
 		// \r and then EOF -> unexpected EOF, because we never had a chance to
 		// assess if the line ending is valid or not.
 		{"\r", 2, "", io.ErrUnexpectedEOF},
 		// Lonely \r -> invalid line ending.
 		{"abc\rdef", 10, "abc", errInvalidLineEnding},
 		{"abc\r\rdef", 10, "abc", errInvalidLineEnding},
 		// Lonely \n -> invalid line ending.
 		{"abc\ndef", 10, "abc", errInvalidLineEnding},
 		// Various valid cases.
 		{"abc\r\n.\r\n", 10, "abc\n", nil},
 		{"\r\n.\r\n", 10, "\n", nil},
 		// Start with the final dot - the smallest "message" (empty).
 		{".\r\n", 10, "", nil},
 		// Max bytes reached -> message too large.
 		{"abc\r\n.\r\n", 5, "abc\n", errMessageTooLarge},
 		{"abcdefg\r\n.\r\n", 5, "abcde", errMessageTooLarge},
 		{"ab\r\ncdefg\r\n.\r\n", 5, "ab\ncd", errMessageTooLarge},
 		// Dot-stuffing.
 		// https://www.rfc-editor.org/rfc/rfc5321#section-4.5.2
 		{"abc\r\n.def\r\n.\r\n", 20, "abc\ndef\n", nil},
 		{"abc\r\n..def\r\n.\r\n", 20, "abc\n.def\n", nil},
 		{"abc\r\n..\r\n.\r\n", 20, "abc\n.\n", nil},
 		{".x\r\n.\r\n", 20, "x\n", nil},
 		{"..\r\n.\r\n", 20, ".\n", nil},
 	}
 	for i, c := range cases {
 		r := bufio.NewReader(strings.NewReader(c.input))
 		got, err := readUntilDot(r, c.max)
 		if err != c.wantErr {
 			t.Errorf("case %d %q: got error %v, want %v", i, c.input, err, c.wantErr)
 		}
 		if !bytes.Equal(got, []byte(c.want)) {
 			t.Errorf("case %d %q: got %q, want %q", i, c.input, got, c.want)
 		}
 	}
 }
 type badBuffer bytes.Buffer
 func (b *badBuffer) Read(p []byte) (int, error) {
 	// Return an arbitrary non-EOF error for testing.
 	return 0, io.ErrNoProgress
 }
 func TestReadUntilDotReadError(t *testing.T) {
 	r := bufio.NewReader(&badBuffer{})
 	_, err := readUntilDot(r, 10)
 	if err != io.ErrNoProgress {
 		t.Errorf("got error %v, want %v", err, io.ErrNoProgress)
 	}
 }
--- a/test/t-12-minor_dialogs/bad_data_dot.cmy
+++ b/test/t-12-minor_dialogs/bad_data_dot.cmy
@@ -0,0 +1,29 @@
 c tcp_connect localhost:1025
 c <~ 220
 c -> EHLO localhost
 c <... 250 HELP
 c -> MAIL FROM: <>
 c <~ 250
 c -> RCPT TO: user@testserver
 c <~ 250
 c -> DATA
 c <~ 354
 c -> From: Mailer daemon <somewhere@horns.com>
 c -> Subject: I've come to haunt you
 c -> 
 c -> Muahahahaha
 c -> 
 # An MTA must not accept isolated line breaks, otherwise it may fall victim to
 # an SMTP smuggling attack. See readUntilDot for more details.
 # This test triggers that condition with an invalid dot-ending, so we verify
 # the server returns an error in this case.
 c ~> '.\n'
 c -> That was a bad line ending, this is a good one.
 c ~> 'xxx\r\n.\r\n'
 c <- 521 5.5.2 Error reading DATA: invalid line ending
--- a/test/t-12-minor_dialogs/bad_data_dot_2.cmy
+++ b/test/t-12-minor_dialogs/bad_data_dot_2.cmy
@@ -0,0 +1,29 @@
 c tcp_connect localhost:1025
 c <~ 220
 c -> EHLO localhost
 c <... 250 HELP
 c -> MAIL FROM: <>
 c <~ 250
 c -> RCPT TO: user@testserver
 c <~ 250
 c -> DATA
 c <~ 354
 c -> From: Mailer daemon <somewhere@horns.com>
 c -> Subject: I've come to haunt you
 c -> 
 c -> Muahahahaha
 c -> 
 # An MTA must not accept isolated line breaks, otherwise it may fall victim to
 # an SMTP smuggling attack. See readUntilDot for more details.
 # This test triggers that condition with an invalid dot-ending, so we verify
 # the server returns an error in this case.
 c ~> 'xxx\n.\n'
 c -> That was a bad line ending, this is a good one.
 c ~> '\r\n.\r\n'
 c <- 521 5.5.2 Error reading DATA: invalid line ending
--- a/test/t-12-minor_dialogs/bad_data_dot_on_message_too_big.cmy
+++ b/test/t-12-minor_dialogs/bad_data_dot_on_message_too_big.cmy
@@ -0,0 +1,37 @@
 c tcp_connect localhost:1025
 c <~ 220
 c -> EHLO localhost
 c <... 250 HELP
 c -> MAIL FROM: <>
 c <~ 250
 c -> RCPT TO: user@testserver
 c <~ 250
 c -> DATA
 c <~ 354
 c -> Subject: Message too big
 c -> 
 # Max message size is 1 MiB. Note this includes line endings but converted to
 # \n (as per textproto.DotReader), and excluding the final ".".
 # We already sent (in the header) 26.
 # Send lines of len 900 to stay under the limit.
 # (1024 * 1024 - 26) - (900 * 1165) = 50
 c ~> ('a' * 899 + '\r\n') * 1165
 # We have 50 characters left before the message is too big.
 c ~> 'b' * 55 + '\r\n'
 # At this point the message is too big. The remainder data should be
 # discarded.
 # We use a "bad ." to try to do an SMTP smuggling attack.
 c ~> '.\n'
 c -> HELP
 c -> HELP
 # And now the "good .".
 c -> .
 c <- 521 5.5.2 Error reading DATA: invalid line ending
--- a/test/t-12-minor_dialogs/config/chasquid.conf
+++ b/test/t-12-minor_dialogs/config/chasquid.conf
@@ -13,3 +13,6 @@ mail_log_path: "../.logs/mail_log"
 suffix_separators: "+-"
 drop_characters: "._"
 # Small max data size so we can reach it more easily in the tests.
 max_data_size_mb: 1
--- a/test/t-12-minor_dialogs/data_dot_stuffing.cmy
+++ b/test/t-12-minor_dialogs/data_dot_stuffing.cmy
@@ -0,0 +1,28 @@
 c tcp_connect localhost:1025
 c <~ 220
 c -> EHLO localhost
 c <... 250 HELP
 c -> MAIL FROM: <>
 c <~ 250
 c -> RCPT TO: user@testserver
 c <~ 250
 c -> DATA
 c <~ 354
 c -> .From: Mailer daemon <somewhere@horns.com>
 c -> Subject: I've come to haunt you
 c -> 
 c -> .Muahahahaha
 c -> 
 c -> ..x
 c -> 
 c -> ..
 c ->
 c -> .This is stuffy.
 c -> 
 c -> .
 c <~ 250
 c -> QUIT
 c <~ 221
--- a/test/t-12-minor_dialogs/data_dot_stuffing.cmy.verify
+++ b/test/t-12-minor_dialogs/data_dot_stuffing.cmy.verify
@@ -0,0 +1,11 @@
 From: Mailer daemon <somewhere@horns.com>
 Subject: I've come to haunt you
 Muahahahaha
 .x
 .
 This is stuffy.
--- a/test/t-12-minor_dialogs/message_too_big.cmy
+++ b/test/t-12-minor_dialogs/message_too_big.cmy
@@ -0,0 +1,28 @@
 c tcp_connect localhost:1025
 c <~ 220
 c -> EHLO localhost
 c <... 250 HELP
 c -> MAIL FROM: <>
 c <~ 250
 c -> RCPT TO: user@testserver
 c <~ 250
 c -> DATA
 c <~ 354
 c -> Subject: Message too big
 c -> 
 # Max message size is 1 MiB. Note this includes line endings but converted to
 # \n (as per textproto.DotReader), and excluding the final ".".
 # We already sent (in the header) 26.
 # Send lines of len 900 to stay under the limit.
 # (1024 * 1024 - 26) - (900 * 1166) = -850
 c ~> ('a' * 899 + '\r\n') * 1166
 c -> .
 c <~ 552 5.3.4 Message too big
 c -> QUIT
 c <~ 221
--- a/test/util/chamuyero
+++ b/test/util/chamuyero
@@ -204,12 +204,15 @@ class Interpreter (object):
                sock.connect()
                self.procs[proc] = sock
-            # ->   Send to a process stdin, with a \n at the end.
+            # ->   Send to a process stdin, with a \r\n at the end.
-            # .>   Send to a process stdin, no \n at the end.
+            # .>   Send to a process stdin, no \r\n at the end.
            # ~>   Send to a process stdin, string is python-evaluated.
            elif op == "->":
-                self.procs[proc].write(params + "\n")
+                self.procs[proc].write(params + "\r\n")
            elif op == ".>":
                self.procs[proc].write(params)
            elif op == "~>":
                self.procs[proc].write(eval(params))
            # <-   Read from the process, expect matching input.
            # <~   Read from the process, match input using regexp.