Skip to content

Commit

Permalink
Fix email name parser
Browse files Browse the repository at this point in the history
  • Loading branch information
mattbr0wn committed Dec 9, 2024
1 parent 1f5677c commit 3a9827c
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 38 deletions.
79 changes: 41 additions & 38 deletions emailparser/name_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,44 @@ var (
surnames map[string]bool
)

func Parse(email string) (ParsedEmail, error) {
ok, cleanEmail, username, _ := syntax.NormalizeEmailAddress(email)
if !ok {
return ParsedEmail{}, ErrInvalidEmail
}

username = strings.ToLower(username)

// Try delimited format first if it contains a dot
if strings.Contains(username, ".") {
if result, ok := tryDelimitedFormat(username); ok {
result.Email = cleanEmail
return result, nil
}

// If delimited format didn't match, remove dots before trying other patterns
username = strings.ReplaceAll(username, ".", "")
}

// Try other patterns with cleaned username
for _, tryPattern := range []func(string) (ParsedEmail, bool){
trySingleName,
tryCombinedName,
tryNameWithInitial,
tryInitialSurname,
} {
if result, ok := tryPattern(username); ok {
result.Email = cleanEmail
return result, nil
}
}

return ParsedEmail{
Email: cleanEmail,
Pattern: string(PatternUnknown),
}, nil
}

// tryDelimitedFormat handles all patterns with dots
func tryDelimitedFormat(username string) (ParsedEmail, bool) {
parts := strings.Split(username, ".")
Expand Down Expand Up @@ -97,6 +135,9 @@ func tryDelimitedFormat(username string) (ParsedEmail, bool) {

// tryNameWithInitial handles trailing initials without dots
func tryNameWithInitial(username string) (ParsedEmail, bool) {
if len(username) < 5 {
return ParsedEmail{}, false
}
// Try to find a known first name
for i := 2; i < len(username); i++ {
possibleName := username[:i]
Expand Down Expand Up @@ -177,44 +218,6 @@ func trySingleName(username string) (ParsedEmail, bool) {
return ParsedEmail{}, false
}

func Parse(email string) (ParsedEmail, error) {
ok, cleanEmail, username, _ := syntax.NormalizeEmailAddress(email)
if !ok {
return ParsedEmail{}, ErrInvalidEmail
}

username = strings.ToLower(username)

// Try delimited format first if it contains a dot
if strings.Contains(username, ".") {
if result, ok := tryDelimitedFormat(username); ok {
result.Email = cleanEmail
return result, nil
}

// If delimited format didn't match, remove dots before trying other patterns
username = strings.ReplaceAll(username, ".", "")
}

// Try other patterns with cleaned username
for _, tryPattern := range []func(string) (ParsedEmail, bool){
tryCombinedName,
tryNameWithInitial,
tryInitialSurname,
trySingleName,
} {
if result, ok := tryPattern(username); ok {
result.Email = cleanEmail
return result, nil
}
}

return ParsedEmail{
Email: cleanEmail,
Pattern: string(PatternUnknown),
}, nil
}

func init() {
var err error
firstNames, err = names.LoadFirstNames()
Expand Down
10 changes: 10 additions & 0 deletions emailparser/name_parses_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,16 @@ func TestParse(t *testing.T) {
Pattern: string(PatternFirstName),
},
},
{
name: "firstname only #3",
email: "alex@acme.com",
want: ParsedEmail{
Email: "alex@acme.com",
FirstName: "Alex",
LastName: "",
Pattern: string(PatternFirstName),
},
},
}

for _, tt := range tests {
Expand Down

0 comments on commit 3a9827c

Please sign in to comment.