From 4b1f3d1ecad71e61bdabef071def9d62e9e07060 Mon Sep 17 00:00:00 2001 From: Lucas Hinderberger Date: Mon, 1 Jul 2024 18:39:57 +0200 Subject: [PATCH] SMTP: ReceivedContent refactoring part 4 - Rewriting HTTP endpoints to enable traversing nested Multipart --- README.md | 25 ++- internal/smtp/http.go | 275 +++++++++++------------------ internal/smtp/smtp_test.go | 136 +++++++++++++- internal/smtp/smtp_testsession.txt | 28 +++ 4 files changed, 285 insertions(+), 179 deletions(-) diff --git a/README.md b/README.md index bb63f32..b5ba1f9 100644 --- a/README.md +++ b/README.md @@ -2675,7 +2675,7 @@ the corresponding index is made available as JSON: ```json { - "body_size": 306, + "bodySize": 306, "from": [ "testsender2@programmfabrik.de" ], @@ -2732,46 +2732,50 @@ following schema: "count": 2, "multiparts": [ { - "body_size": 15, + "bodySize": 15, "headers": { "Content-Type": [ "text/plain; charset=utf-8" ] }, - "idx": 0 + "isMultipart": false }, { - "body_size": 39, + "bodySize": 39, "headers": { "Content-Type": [ "text/html; charset=utf-8" ] }, - "idx": 1 + "isMultipart": false } ] } ``` -#### /smtp/$idx/multipart/$partIdx +#### /smtp/$idx[/multipart/$partIdx]+ On the `/smtp/$idx/multipart/$partIdx` endpoint (e.g. `/smtp/1/multipart/0`), metadata about the multipart with the corresponding index is made available: ```json { - "body_size": 15, + "bodySize": 15, "headers": { "Content-Type": [ "text/plain; charset=utf-8" ] }, - "idx": 0 + "idx": 0, + "isMultipart": false } ``` Headers that were encoded according to RFC2047 are decoded first. -#### /smtp/$idx/multipart/$partIdx/body +The endpoint can be called recursively for nested multipart messages, e.g. +`/smtp/1/multipart/0/multipart/1`. + +#### /smtp/$idx[/multipart/$partIdx]+/body On the `/smtp/$idx/multipart/$partIdx/body` endpoint (e.g. `/smtp/1/multipart/0/body`), the body of the multipart (excluding headers) is made available. @@ -2781,3 +2785,6 @@ or `quoted-printable`, the endpoint returns the decoded body. If the message was sent with a `Content-Type` header, it will be passed through to the HTTP response. + +The endpoint can be called recursively for nested multipart messages, e.g. +`/smtp/1/multipart/0/multipart/1/body`. diff --git a/internal/smtp/http.go b/internal/smtp/http.go index 3907014..7d81499 100644 --- a/internal/smtp/http.go +++ b/internal/smtp/http.go @@ -51,7 +51,7 @@ func (h *smtpHTTPHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { // We now know that pathParts must have at least length 1, since empty path // was already handled above. - if pathParts[0] == "gui" { + if pathParts[0] == "gui" && len(pathParts) == 1 { h.handleGUI(w, r) return } @@ -65,47 +65,101 @@ func (h *smtpHTTPHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { return } - switch len(pathParts) { - case 1: - h.handleMessageMeta(w, r, idx) + msg, err := h.server.ReceivedMessage(idx) + if err != nil { + handlerutil.RespondWithErr(w, http.StatusNotFound, err) return - case 2: - switch pathParts[1] { + } + + if len(pathParts) == 1 { + h.handleMessageMeta(w, r, msg) + return + } + if len(pathParts) == 2 && pathParts[1] == "raw" { + h.handleMessageRaw(w, r, msg) + return + } + + h.routeContentEndpoint(w, r, msg.Content(), pathParts[1:]) +} + +// routeContentEndpoint recursively finds a route for the remaining path parts +// based on the given ReceivedContent. +func (h *smtpHTTPHandler) routeContentEndpoint( + w http.ResponseWriter, r *http.Request, c *ReceivedContent, remainingPathParts []string, +) { + ensureIsMultipart := func() bool { + if !c.IsMultipart() { + handlerutil.RespondWithErr(w, http.StatusNotFound, fmt.Errorf( + "multipart endpoint was requested for non-multipart content", + )) + return false + } + + return true + } + + if len(remainingPathParts) == 1 { + switch remainingPathParts[0] { case "body": - h.handleMessageBody(w, r, idx) + h.handleContentBody(w, r, c) return case "multipart": - h.handleMultipartIndex(w, r, idx) + if !ensureIsMultipart() { + return + } + + h.handleMultipartIndex(w, r, c) return - case "raw": - h.handleRawMessageData(w, r, idx) + } + } + + if len(remainingPathParts) > 1 && remainingPathParts[0] == "multipart" { + if !ensureIsMultipart() { return } - case 3, 4: - if pathParts[1] == "multipart" { - partIdx, err := strconv.Atoi(pathParts[2]) - if err != nil { - handlerutil.RespondWithErr( - w, http.StatusBadRequest, - fmt.Errorf("could not parse multipart index: %w", err), - ) - return - } - if len(pathParts) == 3 { - h.handleMultipartMeta(w, r, idx, partIdx) - return - } else if pathParts[3] == "body" { - h.handleMultipartBody(w, r, idx, partIdx) - return - } + multiparts := c.Multiparts() + + partIdx, err := strconv.Atoi(remainingPathParts[1]) + if err != nil { + handlerutil.RespondWithErr( + w, http.StatusBadRequest, + fmt.Errorf("could not parse multipart index: %w", err), + ) + return } + + if partIdx >= len(multiparts) { + handlerutil.RespondWithErr(w, http.StatusNotFound, fmt.Errorf( + "ReceivedContent does not contain multipart with index %d", partIdx, + )) + return + } + + part := multiparts[partIdx] + + if len(remainingPathParts) == 2 { + h.handleMultipartMeta(w, r, part) + return + } + + h.routeContentEndpoint(w, r, part.Content(), remainingPathParts[2:]) } // If routing failed, return status 404. w.WriteHeader(http.StatusNotFound) } +func (h *smtpHTTPHandler) handleContentBody(w http.ResponseWriter, r *http.Request, c *ReceivedContent) { + contentType, ok := c.Headers()["Content-Type"] + if ok { + w.Header()["Content-Type"] = contentType + } + + w.Write(c.Body()) +} + func (h *smtpHTTPHandler) handleGUI(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "text/html") @@ -140,59 +194,30 @@ func (h *smtpHTTPHandler) handleMessageIndex(w http.ResponseWriter, r *http.Requ handlerutil.RespondWithJSON(w, http.StatusOK, out) } -func (h *smtpHTTPHandler) handleMessageMeta(w http.ResponseWriter, r *http.Request, idx int) { - msg := h.retrieveMessage(w, idx) - if msg == nil { - return - } - - content := msg.Content() - +func (h *smtpHTTPHandler) handleMessageMeta(w http.ResponseWriter, r *http.Request, msg *ReceivedMessage) { out := buildMessageBasicMeta(msg) + contentMeta := buildContentMeta(msg.Content()) - out["body_size"] = len(content.Body()) - - headers := make(map[string]any) - for k, v := range content.Headers() { - headers[k] = v + for k, v := range contentMeta { + out[k] = v } - out["headers"] = headers handlerutil.RespondWithJSON(w, http.StatusOK, out) } -func (h *smtpHTTPHandler) handleMessageBody(w http.ResponseWriter, r *http.Request, idx int) { - msg := h.retrieveMessage(w, idx) - if msg == nil { - return - } - - content := msg.Content() - - contentType, ok := content.Headers()["Content-Type"] - if ok { - w.Header()["Content-Type"] = contentType - } - - w.Write(content.Body()) +func (h *smtpHTTPHandler) handleMessageRaw(w http.ResponseWriter, r *http.Request, msg *ReceivedMessage) { + w.Header().Set("Content-Type", "message/rfc822") + w.Write(msg.RawMessageData()) } -func (h *smtpHTTPHandler) handleMultipartIndex(w http.ResponseWriter, r *http.Request, idx int) { - msg := h.retrieveMessage(w, idx) - if msg == nil { - return - } - if !ensureIsMultipart(w, msg) { - return - } - +func (h *smtpHTTPHandler) handleMultipartIndex(w http.ResponseWriter, r *http.Request, c *ReceivedContent) { headerSearchRgx, err := extractSearchRegex(w, r.URL.Query(), "header") if err != nil { handlerutil.RespondWithErr(w, http.StatusBadRequest, err) return } - multiparts := msg.Content().Multiparts() + multiparts := c.Multiparts() if headerSearchRgx != nil { multiparts = SearchByHeader(multiparts, headerSearchRgx) } @@ -200,7 +225,7 @@ func (h *smtpHTTPHandler) handleMultipartIndex(w http.ResponseWriter, r *http.Re multipartsOut := make([]any, 0) for _, part := range multiparts { - multipartsOut = append(multipartsOut, buildMultipartMeta(part)) + multipartsOut = append(multipartsOut, buildContentMeta(part.Content())) } out := make(map[string]any) @@ -211,88 +236,34 @@ func (h *smtpHTTPHandler) handleMultipartIndex(w http.ResponseWriter, r *http.Re } func (h *smtpHTTPHandler) handleMultipartMeta( - w http.ResponseWriter, r *http.Request, idx, partIdx int, -) { - msg := h.retrieveMessage(w, idx) - if msg == nil { - return - } - if !ensureIsMultipart(w, msg) { - return - } - part := retrievePart(w, msg, partIdx) - if part == nil { - return - } - - handlerutil.RespondWithJSON(w, http.StatusOK, buildMultipartMeta(part)) -} - -func (h *smtpHTTPHandler) handleMultipartBody( - w http.ResponseWriter, r *http.Request, idx, partIdx int, + w http.ResponseWriter, r *http.Request, part *ReceivedPart, ) { - msg := h.retrieveMessage(w, idx) - if msg == nil { - return - } - - if !ensureIsMultipart(w, msg) { - return - } - - part := retrievePart(w, msg, partIdx) - if part == nil { - return - } - - content := part.Content() - - contentType, ok := content.Headers()["Content-Type"] - if ok { - w.Header()["Content-Type"] = contentType + out := map[string]any{ + "idx": part.Index(), } - w.Write(content.Body()) -} + contentMeta := buildContentMeta(part.Content()) -func (h *smtpHTTPHandler) handleRawMessageData(w http.ResponseWriter, r *http.Request, idx int) { - msg := h.retrieveMessage(w, idx) - if msg == nil { - return + for k, v := range contentMeta { + out[k] = v } - w.Header().Set("Content-Type", "message/rfc822") - - w.Write(msg.RawMessageData()) + handlerutil.RespondWithJSON(w, http.StatusOK, out) } -// retrieveMessage tries to retrieve the ReceivedMessage with the given index. -// If found, returns the message. If not found, responds with Status 404 -// and returns nil. -func (h *smtpHTTPHandler) retrieveMessage(w http.ResponseWriter, idx int) *ReceivedMessage { - msg, err := h.server.ReceivedMessage(idx) - if err != nil { - handlerutil.RespondWithErr(w, http.StatusNotFound, err) - return nil +func buildContentMeta(c *ReceivedContent) map[string]any { + out := map[string]any{ + "bodySize": len(c.Body()), + "isMultipart": c.IsMultipart(), } - return msg -} - -// retrievePart tries to retrieve the ReceivedPart with the given index. -// If found, returns the part. If not found, responds with Status 404 -// and returns nil. -func retrievePart(w http.ResponseWriter, msg *ReceivedMessage, partIdx int) *ReceivedPart { - multiparts := msg.Content().Multiparts() - - if partIdx >= len(multiparts) { - handlerutil.RespondWithErr(w, http.StatusNotFound, fmt.Errorf( - "ReceivedMessage does not contain multipart with index %d", partIdx, - )) - return nil + headers := make(map[string]any) + for k, v := range c.Headers() { + headers[k] = v } + out["headers"] = headers - return multiparts[partIdx] + return out } func buildMessageBasicMeta(msg *ReceivedMessage) map[string]any { @@ -322,38 +293,6 @@ func buildMessageBasicMeta(msg *ReceivedMessage) map[string]any { return out } -func buildMultipartMeta(part *ReceivedPart) map[string]any { - content := part.Content() - - out := map[string]any{ - "idx": part.Index(), - "body_size": len(content.Body()), - } - - headers := make(map[string]any) - for k, v := range content.Headers() { - headers[k] = v - } - out["headers"] = headers - - return out -} - -// ensureIsMultipart checks whether the referenced message is a multipart -// message, returns true and does nothing further if so, returns false after -// replying with Status 404 if not. -func ensureIsMultipart(w http.ResponseWriter, msg *ReceivedMessage) bool { - if msg.Content().IsMultipart() { - return true - } - - handlerutil.RespondWithErr(w, http.StatusNotFound, fmt.Errorf( - "multipart information was requested for non-multipart message", - )) - - return false -} - // extractSearchRegex tries to extract a regular expression from the referenced // query parameter. If no query parameter is given and otherwise no error has // occurred, this function returns (nil, nil). diff --git a/internal/smtp/smtp_test.go b/internal/smtp/smtp_test.go index 2e4c66e..e4639b3 100644 --- a/internal/smtp/smtp_test.go +++ b/internal/smtp/smtp_test.go @@ -36,7 +36,7 @@ func TestMessageSearch(t *testing.T) { }{ { queries: []string{``}, - expectedIndices: []int{0, 1, 2, 3, 4, 5, 6, 7, 8}, + expectedIndices: []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, }, { queries: []string{ @@ -47,7 +47,7 @@ func TestMessageSearch(t *testing.T) { `Content-Type:.*`, `^Content-Type:.*$`, }, - expectedIndices: []int{1, 2, 3, 4, 5, 8}, + expectedIndices: []int{1, 2, 3, 4, 5, 8, 9}, }, { queries: []string{ @@ -639,6 +639,138 @@ d-printable. }, }, }, + { + index: 9, + smtpFrom: "testsender10@programmfabrik.de", + smtpRcptTo: []string{"testreceiver10@programmfabrik.de"}, + rawMessageData: []byte(`MIME-Version: 1.0 +From: testsender10@programmfabrik.de +To: testreceiver10@programmfabrik.de +Date: Tue, 25 Jun 2024 11:15:57 +0200 +Subject: Example Nested Message +Content-type: multipart/alternative; boundary="d36c3118be4745f9a1cb4556d11fe92d" + +--d36c3118be4745f9a1cb4556d11fe92d +Content-Type: text/plain; charset=utf-8 + +Some plain text for clients that don't support multipart. +--d36c3118be4745f9a1cb4556d11fe92d +Content-Type: multipart/mixed; boundary="710d3e95c17247d4bb35d621f25e094e" + +--710d3e95c17247d4bb35d621f25e094e +Content-Type: text/plain; charset=ascii + +This is the first subpart. +--710d3e95c17247d4bb35d621f25e094e +Content-Type: text/html; charset=utf-8 + +This is the second subpart. +--710d3e95c17247d4bb35d621f25e094e-- +--d36c3118be4745f9a1cb4556d11fe92d--`), + receivedAt: testTime, + content: &ReceivedContent{ + headers: map[string][]string{ + "Mime-Version": {"1.0"}, + "From": {"testsender10@programmfabrik.de"}, + "To": {"testreceiver10@programmfabrik.de"}, + "Date": {"Tue, 25 Jun 2024 11:15:57 +0200"}, + "Subject": {"Example Nested Message"}, + "Content-Type": {`multipart/alternative; boundary="d36c3118be4745f9a1cb4556d11fe92d"`}, + }, + body: []byte(`--d36c3118be4745f9a1cb4556d11fe92d +Content-Type: text/plain; charset=utf-8 + +Some plain text for clients that don't support multipart. +--d36c3118be4745f9a1cb4556d11fe92d +Content-Type: multipart/mixed; boundary="710d3e95c17247d4bb35d621f25e094e" + +--710d3e95c17247d4bb35d621f25e094e +Content-Type: text/plain; charset=ascii + +This is the first subpart. +--710d3e95c17247d4bb35d621f25e094e +Content-Type: text/html; charset=utf-8 + +This is the second subpart. +--710d3e95c17247d4bb35d621f25e094e-- +--d36c3118be4745f9a1cb4556d11fe92d--`), + contentType: "multipart/alternative", + contentTypeParams: map[string]string{ + "boundary": "d36c3118be4745f9a1cb4556d11fe92d", + }, + isMultipart: true, + multiparts: []*ReceivedPart{ + { + index: 0, + content: &ReceivedContent{ + headers: map[string][]string{ + "Content-Type": {"text/plain; charset=utf-8"}, + }, + body: []byte(`Some plain text for clients that don't support multipart.`), + + contentType: "text/plain", + contentTypeParams: map[string]string{ + "charset": "utf-8", + }, + }, + }, + { + index: 1, + content: &ReceivedContent{ + headers: map[string][]string{ + "Content-Type": {`multipart/mixed; boundary="710d3e95c17247d4bb35d621f25e094e"`}, + }, + body: []byte(`--710d3e95c17247d4bb35d621f25e094e +Content-Type: text/plain; charset=ascii + +This is the first subpart. +--710d3e95c17247d4bb35d621f25e094e +Content-Type: text/html; charset=utf-8 + +This is the second subpart. +--710d3e95c17247d4bb35d621f25e094e--`), + + contentType: "multipart/mixed", + contentTypeParams: map[string]string{ + "boundary": "710d3e95c17247d4bb35d621f25e094e", + }, + + isMultipart: true, + multiparts: []*ReceivedPart{ + { + index: 0, + content: &ReceivedContent{ + headers: map[string][]string{ + "Content-Type": {"text/plain; charset=ascii"}, + }, + body: []byte(`This is the first subpart.`), + + contentType: "text/plain", + contentTypeParams: map[string]string{ + "charset": "ascii", + }, + }, + }, + { + index: 1, + content: &ReceivedContent{ + headers: map[string][]string{ + "Content-Type": {"text/html; charset=utf-8"}, + }, + body: []byte(`This is the second subpart.`), + + contentType: "text/html", + contentTypeParams: map[string]string{ + "charset": "utf-8", + }, + }, + }, + }, + }, + }, + }, + }, + }, } // the following calls pre-format the test data defined above to match diff --git a/internal/smtp/smtp_testsession.txt b/internal/smtp/smtp_testsession.txt index 5a6b39f..4a40665 100644 --- a/internal/smtp/smtp_testsession.txt +++ b/internal/smtp/smtp_testsession.txt @@ -133,5 +133,33 @@ Noch eine Testmail mit =C3=A4=C3=B6=C3=BC=C3=9F, diesmal enkodiert in quote= d-printable. --d36c3118be4745f9a1cb4556d11fe92d-- . +MAIL FROM: testsender10@programmfabrik.de +RCPT TO: testreceiver10@programmfabrik.de +DATA +MIME-Version: 1.0 +From: testsender10@programmfabrik.de +To: testreceiver10@programmfabrik.de +Date: Tue, 25 Jun 2024 11:15:57 +0200 +Subject: Example Nested Message +Content-type: multipart/alternative; boundary="d36c3118be4745f9a1cb4556d11fe92d" + +--d36c3118be4745f9a1cb4556d11fe92d +Content-Type: text/plain; charset=utf-8 + +Some plain text for clients that don't support multipart. +--d36c3118be4745f9a1cb4556d11fe92d +Content-Type: multipart/mixed; boundary="710d3e95c17247d4bb35d621f25e094e" + +--710d3e95c17247d4bb35d621f25e094e +Content-Type: text/plain; charset=ascii + +This is the first subpart. +--710d3e95c17247d4bb35d621f25e094e +Content-Type: text/html; charset=utf-8 + +This is the second subpart. +--710d3e95c17247d4bb35d621f25e094e-- +--d36c3118be4745f9a1cb4556d11fe92d-- +. QUIT