From 6d828e813c3cf10b45d58b7759f7cd17492238a5 Mon Sep 17 00:00:00 2001 From: Lucas Hinderberger Date: Tue, 9 Jul 2024 16:21:38 +0200 Subject: [PATCH] SMTP: Adding multi-header search --- internal/smtp/http.go | 46 ++++----- internal/smtp/search.go | 30 ++++-- internal/smtp/smtp_test.go | 160 ++++++++++++++++++++++++++++- internal/smtp/smtp_testsession.txt | 14 ++- 4 files changed, 213 insertions(+), 37 deletions(-) diff --git a/internal/smtp/http.go b/internal/smtp/http.go index e727237..a11dfd5 100644 --- a/internal/smtp/http.go +++ b/internal/smtp/http.go @@ -242,15 +242,15 @@ func (h *smtpHTTPHandler) handleGUIMessage(w http.ResponseWriter, r *http.Reques } func (h *smtpHTTPHandler) handleMessageIndex(w http.ResponseWriter, r *http.Request) { - headerSearchRgx, err := extractSearchRegex(w, r.URL.Query(), "header") + headerSearchRxs, err := extractSearchRegexes(w, r.URL.Query(), "header") if err != nil { handlerutil.RespondWithErr(w, http.StatusBadRequest, err) return } receivedMessages := h.server.ReceivedMessages() - if headerSearchRgx != nil { - receivedMessages = SearchByHeader(receivedMessages, headerSearchRgx) + if len(headerSearchRxs) > 0 { + receivedMessages = SearchByHeader(receivedMessages, headerSearchRxs...) } messagesOut := make([]any, 0) @@ -276,15 +276,15 @@ func (h *smtpHTTPHandler) handleMessageRaw(w http.ResponseWriter, r *http.Reques } func (h *smtpHTTPHandler) handleMultipartIndex(w http.ResponseWriter, r *http.Request, c *ReceivedContent) { - headerSearchRgx, err := extractSearchRegex(w, r.URL.Query(), "header") + headerSearchRxs, err := extractSearchRegexes(w, r.URL.Query(), "header") if err != nil { handlerutil.RespondWithErr(w, http.StatusBadRequest, err) return } multiparts := c.Multiparts() - if headerSearchRgx != nil { - multiparts = SearchByHeader(multiparts, headerSearchRgx) + if len(headerSearchRxs) > 0 { + multiparts = SearchByHeader(multiparts, headerSearchRxs...) } handlerutil.RespondWithJSON(w, http.StatusOK, buildMultipartIndex(multiparts)) @@ -475,28 +475,28 @@ func buildMultipartMeta(part *ReceivedPart) map[string]any { return out } -// extractSearchRegex tries to extract a regular expression from the referenced -// query parameter. If no query parameter is given and otherwise no error has -// occurred, this function returns (nil, nil). -func extractSearchRegex( +// extractSearchRegexes tries to extract the regular expression(s) from the +// referenced query parameter. If no query parameter is given and otherwise +// no error has occurred, this function returns no error. +func extractSearchRegexes( w http.ResponseWriter, queryParams map[string][]string, paramName string, -) (*regexp.Regexp, error) { - searchParam, ok := queryParams[paramName] +) ([]*regexp.Regexp, error) { + searchParams, ok := queryParams[paramName] if ok { - if len(searchParam) != 1 { - return nil, fmt.Errorf( - "Encountered multiple %q params", paramName, - ) - } + out := make([]*regexp.Regexp, len(searchParams)) + + for i, p := range searchParams { + re, err := regexp.Compile(p) + if err != nil { + return nil, fmt.Errorf( + "could not compile %q regex %q: %w", paramName, p, err, + ) + } - re, err := regexp.Compile(searchParam[0]) - if err != nil { - return nil, fmt.Errorf( - "could not compile %q regex: %w", paramName, err, - ) + out[i] = re } - return re, nil + return out, nil } return nil, nil diff --git a/internal/smtp/search.go b/internal/smtp/search.go index 7446e42..8fa38e4 100644 --- a/internal/smtp/search.go +++ b/internal/smtp/search.go @@ -5,19 +5,21 @@ import ( "regexp" ) -// SearchByHeader returns the list of all given ContentHavers that -// have at least one header matching the given regular expression. +// SearchByHeader returns the list of all given ContentHavers that, +// for each of the given regular expressions, has at least one header +// matching it (different regexes can be matched by different headers or +// the same header). // -// Note that the regex is performed for each header value individually, -// including for multi-value headers. The header value is first serialized -// by concatenating it after the header name, colon and space. It is not -// being encoded as if for transport (e.g. quoted-printable), -// but concatenated as-is. -func SearchByHeader[T ContentHaver](haystack []T, re *regexp.Regexp) []T { +// Note that in the context of this function, a regex is performed for each +// header value individually, including for multi-value headers. The header +// value is first serialized by concatenating it after the header name, colon +// and space. It is not being encoded as if for transport (e.g. quoted- +// printable), but concatenated as-is. +func SearchByHeader[T ContentHaver](haystack []T, rxs ...*regexp.Regexp) []T { out := make([]T, 0, len(haystack)) for _, c := range haystack { - if anyHeaderMatches(c.Content().Headers(), re) { + if allRegexesMatchAnyHeader(c.Content().Headers(), rxs) { out = append(out, c) } } @@ -25,6 +27,16 @@ func SearchByHeader[T ContentHaver](haystack []T, re *regexp.Regexp) []T { return out } +func allRegexesMatchAnyHeader(headers map[string][]string, rxs []*regexp.Regexp) bool { + for _, re := range rxs { + if !anyHeaderMatches(headers, re) { + return false + } + } + + return true +} + func anyHeaderMatches(headers map[string][]string, re *regexp.Regexp) bool { for k, vs := range headers { for _, v := range vs { diff --git a/internal/smtp/smtp_test.go b/internal/smtp/smtp_test.go index 01731ec..00df991 100644 --- a/internal/smtp/smtp_test.go +++ b/internal/smtp/smtp_test.go @@ -5,6 +5,7 @@ import ( _ "embed" "net" "regexp" + "strconv" "strings" "testing" "time" @@ -103,6 +104,54 @@ func TestMessageSearch(t *testing.T) { } } +func TestMessageSearchAND(t *testing.T) { + testCases := []struct { + queries []string + expectedIndices []int + }{ + { + queries: []string{ + "Subject: Example Message", + "To: testreceiver6\\@.*", + }, + expectedIndices: []int{5}, + }, + { + queries: []string{ + "From: testsender5\\@.*", + "Content-Transfer-Encoding: .*", + }, + expectedIndices: []int{4}, + }, + { + queries: []string{ + "Subject: Example Message", + "Content-Type: text/plain", + }, + expectedIndices: []int{2, 4}, + }, + } + + for i := range testCases { + testCase := testCases[i] + + t.Run(strconv.Itoa(i), func(t *testing.T) { + rxs := make([]*regexp.Regexp, len(testCase.queries)) + for j, query := range testCase.queries { + rxs[j] = regexp.MustCompile(query) + } + + actual := SearchByHeader(server.ReceivedMessages(), rxs...) + + actualIndices := make([]int, len(actual)) + for ai, av := range actual { + actualIndices[ai] = av.index + } + assert.ElementsMatch(t, testCase.expectedIndices, actualIndices) + }) + } +} + func TestMultipartSearch(t *testing.T) { // This test uses message #8 for all of its tests. @@ -119,11 +168,16 @@ func TestMultipartSearch(t *testing.T) { }, { queries: []string{ - "X-Funky-Header", "Content-Transfer-Encoding", }, expectedIndices: []int{0, 1}, }, + { + queries: []string{ + "X-Funky-Header", + }, + expectedIndices: []int{0, 1, 2, 3}, + }, { queries: []string{ "X-Funky-Header: Käse", @@ -170,6 +224,52 @@ func TestMultipartSearch(t *testing.T) { } } +func TestMultipartSearchAND(t *testing.T) { + // This test uses message #8 for all of its tests. + + testCases := []struct { + queries []string + expectedIndices []int + }{ + { + queries: []string{ + "X-Funky-Header: .*se$", + "Content-Type: text/plain", + }, + expectedIndices: []int{1, 3}, + }, + { + queries: []string{ + "X-Funky-Header: ..se$", + "Content-Type: text/plain", + }, + expectedIndices: []int{1}, + }, + } + + for i := range testCases { + testCase := testCases[i] + + t.Run(strconv.Itoa(i), func(t *testing.T) { + rxs := make([]*regexp.Regexp, len(testCase.queries)) + for j, query := range testCase.queries { + rxs[j] = regexp.MustCompile(query) + } + + msg, err := server.ReceivedMessage(8) + require.NoError(t, err) + + actual := SearchByHeader(msg.Content().Multiparts(), rxs...) + + actualIndices := make([]int, len(actual)) + for ai, av := range actual { + actualIndices[ai] = av.index + } + assert.ElementsMatch(t, testCase.expectedIndices, actualIndices) + }) + } +} + func assertHeadersEqual(t *testing.T, expected, actual map[string][]string) { assert.Equal(t, len(expected), len(actual)) @@ -351,6 +451,7 @@ Trailing text is ignored.`), smtpRcptTo: []string{"testreceiver3@programmfabrik.de"}, rawMessageData: []byte(`From: testsender3@programmfabrik.de To: testreceiver3@programmfabrik.de +Subject: Example Message Content-Type: text/plain; charset=utf-8 Noch eine Testmail. Diesmal mit nicht-ASCII-Zeichen: äöüß`), @@ -359,6 +460,7 @@ Noch eine Testmail. Diesmal mit nicht-ASCII-Zeichen: äöüß`), headers: map[string][]string{ "From": {"testsender3@programmfabrik.de"}, "To": {"testreceiver3@programmfabrik.de"}, + "Subject": {"Example Message"}, "Content-Type": {"text/plain; charset=utf-8"}, }, body: []byte(`Noch eine Testmail. Diesmal mit nicht-ASCII-Zeichen: äöüß`), @@ -401,6 +503,7 @@ w6TDtsO8w58K`), smtpRcptTo: []string{"testreceiver5@programmfabrik.de"}, rawMessageData: []byte(`From: testsender5@programmfabrik.de To: testreceiver5@programmfabrik.de +Subject: Example Message Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: quoted-printable @@ -411,6 +514,7 @@ d-printable.`), headers: map[string][]string{ "From": {"testsender5@programmfabrik.de"}, "To": {"testreceiver5@programmfabrik.de"}, + "Subject": {"Example Message"}, "Content-Type": {"text/plain; charset=utf-8"}, "Content-Transfer-Encoding": {"quoted-printable"}, }, @@ -573,6 +677,16 @@ X-Funky-Header: Käse Noch eine Testmail mit =C3=A4=C3=B6=C3=BC=C3=9F, diesmal enkodiert in quote= d-printable. +--d36c3118be4745f9a1cb4556d11fe92d +Content-Type: text/html; charset=utf-8 +X-Funky-Header: Nase + +Foo +--d36c3118be4745f9a1cb4556d11fe92d +Content-Type: text/plain; charset=utf-8 +X-Funky-Header: Phase + +Foobar. --d36c3118be4745f9a1cb4556d11fe92d--`), receivedAt: testTime, content: &ReceivedContent{ @@ -598,6 +712,16 @@ X-Funky-Header: Käse Noch eine Testmail mit =C3=A4=C3=B6=C3=BC=C3=9F, diesmal enkodiert in quote= d-printable. +--d36c3118be4745f9a1cb4556d11fe92d +Content-Type: text/html; charset=utf-8 +X-Funky-Header: Nase + +Foo +--d36c3118be4745f9a1cb4556d11fe92d +Content-Type: text/plain; charset=utf-8 +X-Funky-Header: Phase + +Foobar. --d36c3118be4745f9a1cb4556d11fe92d--`), contentType: "multipart/mixed", contentTypeParams: map[string]string{ @@ -636,6 +760,34 @@ d-printable. }, }, }, + { + index: 2, + content: &ReceivedContent{ + headers: map[string][]string{ + "Content-Type": {"text/html; charset=utf-8"}, + "X-Funky-Header": {"Nase"}, + }, + body: []byte(`Foo`), + contentType: "text/html", + contentTypeParams: map[string]string{ + "charset": "utf-8", + }, + }, + }, + { + index: 3, + content: &ReceivedContent{ + headers: map[string][]string{ + "Content-Type": {"text/plain; charset=utf-8"}, + "X-Funky-Header": {"Phase"}, + }, + body: []byte(`Foobar.`), + contentType: "text/plain", + contentTypeParams: map[string]string{ + "charset": "utf-8", + }, + }, + }, }, }, }, @@ -653,7 +805,7 @@ Content-type: multipart/alternative; boundary="d36c3118be4745f9a1cb4556d11fe92d" --d36c3118be4745f9a1cb4556d11fe92d Content-Type: text/plain; charset=utf-8 -Some plain text for clients that don't support multipart. +Some plain text for clients that don't support nested multipart. --d36c3118be4745f9a1cb4556d11fe92d Content-Type: multipart/mixed; boundary="710d3e95c17247d4bb35d621f25e094e" @@ -680,7 +832,7 @@ This is the second subpart. body: []byte(`--d36c3118be4745f9a1cb4556d11fe92d Content-Type: text/plain; charset=utf-8 -Some plain text for clients that don't support multipart. +Some plain text for clients that don't support nested multipart. --d36c3118be4745f9a1cb4556d11fe92d Content-Type: multipart/mixed; boundary="710d3e95c17247d4bb35d621f25e094e" @@ -706,7 +858,7 @@ This is the second subpart. headers: map[string][]string{ "Content-Type": {"text/plain; charset=utf-8"}, }, - body: []byte(`Some plain text for clients that don't support multipart.`), + body: []byte(`Some plain text for clients that don't support nested multipart.`), contentType: "text/plain", contentTypeParams: map[string]string{ diff --git a/internal/smtp/smtp_testsession.txt b/internal/smtp/smtp_testsession.txt index 4a40665..567082c 100644 --- a/internal/smtp/smtp_testsession.txt +++ b/internal/smtp/smtp_testsession.txt @@ -37,6 +37,7 @@ RCPT TO: testreceiver3@programmfabrik.de DATA From: testsender3@programmfabrik.de To: testreceiver3@programmfabrik.de +Subject: Example Message Content-Type: text/plain; charset=utf-8 Noch eine Testmail. Diesmal mit nicht-ASCII-Zeichen: äöüß @@ -57,6 +58,7 @@ RCPT TO: testreceiver5@programmfabrik.de DATA From: testsender5@programmfabrik.de To: testreceiver5@programmfabrik.de +Subject: Example Message Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: quoted-printable @@ -131,6 +133,16 @@ X-Funky-Header: Käse Noch eine Testmail mit =C3=A4=C3=B6=C3=BC=C3=9F, diesmal enkodiert in quote= d-printable. +--d36c3118be4745f9a1cb4556d11fe92d +Content-Type: text/html; charset=utf-8 +X-Funky-Header: Nase + +Foo +--d36c3118be4745f9a1cb4556d11fe92d +Content-Type: text/plain; charset=utf-8 +X-Funky-Header: Phase + +Foobar. --d36c3118be4745f9a1cb4556d11fe92d-- . MAIL FROM: testsender10@programmfabrik.de @@ -146,7 +158,7 @@ Content-type: multipart/alternative; boundary="d36c3118be4745f9a1cb4556d11fe92d" --d36c3118be4745f9a1cb4556d11fe92d Content-Type: text/plain; charset=utf-8 -Some plain text for clients that don't support multipart. +Some plain text for clients that don't support nested multipart. --d36c3118be4745f9a1cb4556d11fe92d Content-Type: multipart/mixed; boundary="710d3e95c17247d4bb35d621f25e094e"