Skip to content

Commit

Permalink
Merge pull request #4 from Clarilab/update-parsing-logic
Browse files Browse the repository at this point in the history
feat: add short month date parsing logic
  • Loading branch information
kevincali authored Jul 18, 2023
2 parents 079c468 + dcfc69c commit 226c8d1
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 1 deletion.
34 changes: 34 additions & 0 deletions formatparser.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@ package claridate
import (
"regexp"
"strings"
"time"
)

var dashedDateYearFirstRegex = regexp.MustCompile(`^\d{4}(-\d{1,2}){0,2}$`)
var dashedDateYearLastRegex = regexp.MustCompile(`^(\d{1,2}-){0,2}\d{4}$`)
var dottedDateRegex = regexp.MustCompile(`^(\d{1,2}\.){0,2}\d{4}$`)
var slashedDateYearLastRegex = regexp.MustCompile(`^(\d{1,2}/){0,2}\d{4}$`)
var slashedDateYearFirstRegex = regexp.MustCompile(`^\d{4}(/\d{1,2}){0,2}$`)
var shortMonthRegex = regexp.MustCompile(`\b(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\b`)

// DetermineDateFormat receives a date string and returns the format the date string is in.
// It returns an empty string and no error if the input is an empty string.
Expand Down Expand Up @@ -62,6 +64,14 @@ func TransformToDashedDate(date string) (string, error) {
return "", nil
}

if strings.Contains(date, ";") {
date, _, _ = strings.Cut(date, ";")
}

if strings.Contains(date, "ca.") {
_, date, _ = strings.Cut(date, "ca. ")
}

if dashedDateYearFirstRegex.MatchString(date) {
return date, nil
}
Expand All @@ -71,6 +81,10 @@ func TransformToDashedDate(date string) (string, error) {
return strings.Join(reverse(strings.Split(date, "-")), "-"), nil
}

if shortMonthRegex.MatchString(date) {
return parseShortMonthDate(date)
}

isDotted := dottedDateRegex.MatchString(date)
isSlashedYearLast := slashedDateYearLastRegex.MatchString(date)
isSlashedYearFirst := slashedDateYearFirstRegex.MatchString(date)
Expand Down Expand Up @@ -109,6 +123,26 @@ func TransformToDashedDate(date string) (string, error) {
}
}

func parseShortMonthDate(date string) (string, error) {
wordsAmount := len(strings.Fields(date))
switch wordsAmount {
case 2: // example: Jul 1957
parsedDate, err := time.Parse("Jan 2006", date)
if err != nil {
return "", ErrUnsupportedDateFormat
}
return parsedDate.Format("2006-01"), nil
case 3: // example: 30 Jul 1957
parsedDate, err := time.Parse("02 Jan 2006", date)
if err != nil {
return "", ErrUnsupportedDateFormat
}
return parsedDate.Format("2006-01-02"), nil
default:
return "", ErrUnsupportedDateFormat
}
}

func reverse(strSlice []string) []string {
for i, j := 0, len(strSlice)-1; i < j; i, j = i+1, j-1 {
strSlice[i], strSlice[j] = strSlice[j], strSlice[i]
Expand Down
9 changes: 8 additions & 1 deletion formatparser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,14 @@ func Test_TransformToDashedDate(t *testing.T) {
"can handle forward slashes 2": {"1983/07/20", "1983-07-20", nil},
"can handle dashed date with year last": {"20-07-1983", "1983-07-20", nil},
"can handle dashed date with year last 2": {"20-7-1983", "1983-7-20", nil},
"leading/trailing space": {" 30.07.1957 ", "1957-07-30", nil},
"ca. DD.MM.YYYY": {"ca. 30.07.1984", "1984-07-30", nil},
"ca. MM.YYYY": {"ca. 07.1984", "1984-07", nil},
"ca. YYYY": {"ca. 1984", "1984", nil},
"DD Mon YYYY with ;": {"30 Jul 1957; 1958", "1957-07-30", nil},
"Mon YYYY": {"Jul 1867", "1867-07", nil},
"DD.MM.YYYY with ;": {"30.07.1957; 1958", "1957-07-30", nil},
"ca. gibberish": {"ca. foobar", "", ErrUnsupportedDateFormat},
}

for name, tc := range testCases {
Expand All @@ -92,5 +100,4 @@ func Test_TransformToDashedDate(t *testing.T) {
assert.Equal(t, tc.expectedOutput, parsedFormat)
})
}

}

0 comments on commit 226c8d1

Please sign in to comment.