From 3a9827ce29c220aa849289643bfb2e9d732f3d0f Mon Sep 17 00:00:00 2001 From: Matt Brown Date: Mon, 9 Dec 2024 17:41:04 +0000 Subject: [PATCH] Fix email name parser --- emailparser/name_parser.go | 79 +++++++++++++++++---------------- emailparser/name_parses_test.go | 10 +++++ 2 files changed, 51 insertions(+), 38 deletions(-) diff --git a/emailparser/name_parser.go b/emailparser/name_parser.go index 96be998..024bbf7 100644 --- a/emailparser/name_parser.go +++ b/emailparser/name_parser.go @@ -41,6 +41,44 @@ var ( surnames map[string]bool ) +func Parse(email string) (ParsedEmail, error) { + ok, cleanEmail, username, _ := syntax.NormalizeEmailAddress(email) + if !ok { + return ParsedEmail{}, ErrInvalidEmail + } + + username = strings.ToLower(username) + + // Try delimited format first if it contains a dot + if strings.Contains(username, ".") { + if result, ok := tryDelimitedFormat(username); ok { + result.Email = cleanEmail + return result, nil + } + + // If delimited format didn't match, remove dots before trying other patterns + username = strings.ReplaceAll(username, ".", "") + } + + // Try other patterns with cleaned username + for _, tryPattern := range []func(string) (ParsedEmail, bool){ + trySingleName, + tryCombinedName, + tryNameWithInitial, + tryInitialSurname, + } { + if result, ok := tryPattern(username); ok { + result.Email = cleanEmail + return result, nil + } + } + + return ParsedEmail{ + Email: cleanEmail, + Pattern: string(PatternUnknown), + }, nil +} + // tryDelimitedFormat handles all patterns with dots func tryDelimitedFormat(username string) (ParsedEmail, bool) { parts := strings.Split(username, ".") @@ -97,6 +135,9 @@ func tryDelimitedFormat(username string) (ParsedEmail, bool) { // tryNameWithInitial handles trailing initials without dots func tryNameWithInitial(username string) (ParsedEmail, bool) { + if len(username) < 5 { + return ParsedEmail{}, false + } // Try to find a known first name for i := 2; i < len(username); i++ { possibleName := username[:i] @@ -177,44 +218,6 @@ func trySingleName(username string) (ParsedEmail, bool) { return ParsedEmail{}, false } -func Parse(email string) (ParsedEmail, error) { - ok, cleanEmail, username, _ := syntax.NormalizeEmailAddress(email) - if !ok { - return ParsedEmail{}, ErrInvalidEmail - } - - username = strings.ToLower(username) - - // Try delimited format first if it contains a dot - if strings.Contains(username, ".") { - if result, ok := tryDelimitedFormat(username); ok { - result.Email = cleanEmail - return result, nil - } - - // If delimited format didn't match, remove dots before trying other patterns - username = strings.ReplaceAll(username, ".", "") - } - - // Try other patterns with cleaned username - for _, tryPattern := range []func(string) (ParsedEmail, bool){ - tryCombinedName, - tryNameWithInitial, - tryInitialSurname, - trySingleName, - } { - if result, ok := tryPattern(username); ok { - result.Email = cleanEmail - return result, nil - } - } - - return ParsedEmail{ - Email: cleanEmail, - Pattern: string(PatternUnknown), - }, nil -} - func init() { var err error firstNames, err = names.LoadFirstNames() diff --git a/emailparser/name_parses_test.go b/emailparser/name_parses_test.go index 05631dd..34b882c 100644 --- a/emailparser/name_parses_test.go +++ b/emailparser/name_parses_test.go @@ -129,6 +129,16 @@ func TestParse(t *testing.T) { Pattern: string(PatternFirstName), }, }, + { + name: "firstname only #3", + email: "alex@acme.com", + want: ParsedEmail{ + Email: "alex@acme.com", + FirstName: "Alex", + LastName: "", + Pattern: string(PatternFirstName), + }, + }, } for _, tt := range tests {