normalize: Improve ToCRLF/StringToCRLF performance

The ToCRLF/StringToCRLF functions are not very performance critical, but we call it for each mail, and the current implementation is very inefficient (mainly because it goes one byte at a time). This patch replaces it with a better implementation that goes line by line. The new implementation of ToCRLF is ~40% faster, and StringToCRLF is ~60% faster. ``` $ benchstat old.txt new.txt goos: linux goarch: amd64 pkg: blitiri.com.ar/go/chasquid/internal/normalize cpu: 13th Gen Intel(R) Core(TM) i9-13900T │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ ToCRLF-32 162.96µ ± 6% 95.42µ ± 12% -41.44% (p=0.000 n=10) StringToCRLF-32 190.70µ ± 14% 76.51µ ± 6% -59.88% (p=0.000 n=10) geomean 176.3µ 85.44µ -51.53% ```
2026-02-09 22:46:04 +00:00 · 2024-10-31 13:05:57 +00:00
parent 723c47d352
commit 41bb7b6f5e
2 changed files with 95 additions and 12 deletions
--- a/internal/normalize/normalize.go
+++ b/internal/normalize/normalize.go
@@ -78,23 +78,63 @@ func DomainToUnicode(addr string) (string, error) {
 // preexisting CRLF, it leaves it be. It assumes that CR is never used on its
 // own.
 func ToCRLF(in []byte) []byte {
-	b := bytes.NewBuffer(nil)
+	b := bytes.Buffer{}
 	b.Grow(len(in))
-	for _, c := range in {
+
-		switch c {
+	// We go line by line, but beware:
-		case '\r':
+	//   Split("a\nb", "\n") -> ["a", "b"]
-			// Ignore CR, we'll add it back later. It should never appear
+	//   Split("a\nb\n", "\n") -> ["a", "b", ""]
-			// alone in the contexts where this function is used.
+	// So we handle the last line separately.
-		case '\n':
+	lines := bytes.Split(in, []byte("\n"))
-			b.Write([]byte("\r\n"))
+	for i, line := range lines {
-		default:
+		b.Write(line)
-			b.WriteByte(c)
+		if i == len(lines)-1 {
 			// Do not add newline to the last line:
 			//  - If the string ends with a newline, we already added it in
 			//    the previous-to-last line, and this line is "".
 			//  - If the string does NOT end with a newline, this preserves
 			//    that property.
 			break
 		}
 		if !bytes.HasSuffix(line, []byte("\r")) {
 			// Missing the CR.
 			b.WriteByte('\r')
 		}
 		b.WriteByte('\n')
 	}
 	return b.Bytes()
 }
 // StringToCRLF is like ToCRLF, but operates on strings.
 func StringToCRLF(in string) string {
-	return string(ToCRLF([]byte(in)))
+	// We implement it the same way as ToCRLF, but with string versions.
 	// This is significantly faster than converting the string to a byte
 	// slice, calling ToCRLF, and converting it back.
 	b := strings.Builder{}
 	b.Grow(len(in))
 	// We go line by line, but beware:
 	//   Split("a\nb", "\n") -> ["a", "b"]
 	//   Split("a\nb\n", "\n") -> ["a", "b", ""]
 	// So we handle the last line separately.
 	lines := strings.Split(in, "\n")
 	for i, line := range lines {
 		b.WriteString(line)
 		if i == len(lines)-1 {
 			// Do not add newline to the last line:
 			//  - If the string ends with a newline, we already added it in
 			//    the previous-to-last line, and this line is "".
 			//  - If the string does NOT end with a newline, this preserves
 			//    that property.
 			break
 		}
 		if !strings.HasSuffix(line, "\r") {
 			// Missing the CR.
 			b.WriteByte('\r')
 		}
 		b.WriteByte('\n')
 	}
 	return b.String()
 }
--- a/internal/normalize/normalize_test.go
+++ b/internal/normalize/normalize_test.go
@@ -1,6 +1,10 @@
 package normalize
-import "testing"
+import (
 	"bytes"
 	"strings"
 	"testing"
 )
 func TestUser(t *testing.T) {
 	valid := []struct{ user, norm string }{
@@ -134,8 +138,19 @@ func TestToCRLF(t *testing.T) {
 		in, out string
 	}{
 		{"", ""},
 		{"a", "a"},
 		// Does not end in newline.
 		{"a\n", "a\r\n"},
 		{"a\nb", "a\r\nb"},
 		{"a\r\nb", "a\r\nb"},
 		// Ends in newline.
 		{"a\nb\n", "a\r\nb\r\n"},
 		{"a\r\nb\n", "a\r\nb\r\n"},
 		{"a\r\nb\r\n", "a\r\nb\r\n"},
 		{"a\r\nb\n\n", "a\r\nb\r\n\r\n"},
 		{"a\r\nb\r\n\r\n", "a\r\nb\r\n\r\n"},
 	}
 	for _, c := range cases {
 		got := string(ToCRLF([]byte(c.in)))
@@ -173,3 +188,31 @@ func FuzzDomainToUnicode(f *testing.F) {
 		DomainToUnicode(addr)
 	})
 }
 func BenchmarkToCRLF(b *testing.B) {
 	// Generate a 1000-line message.
 	bb := bytes.Buffer{}
 	for i := 0; i < 1000; i++ {
 		bb.WriteString("this is a very pretty line 🐅\n")
 	}
 	buf := bb.Bytes()
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
 		ToCRLF(buf)
 	}
 }
 func BenchmarkStringToCRLF(b *testing.B) {
 	// Generate a 1000-line message.
 	sb := strings.Builder{}
 	for i := 0; i < 1000; i++ {
 		sb.WriteString("this is a very pretty line 🐅\n")
 	}
 	s := sb.String()
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
 		StringToCRLF(s)
 	}
 }