HW03 is complited

DimVlas · May 3, 2024 · 35ec0c6 · 35ec0c6
1 parent 5b4595d
commit 35ec0c6
Show file tree

Hide file tree

Showing 3 changed files with 140 additions and 79 deletions.
diff --git a/.golangci.yml b/.golangci.yml
@@ -8,6 +8,21 @@ linters-settings:
   funlen:
     lines: 150
     statements: 80
+  depguard:
+    rules:
+      main:
+        list-mode: lax
+        files:
+        - $all
+        - "!$test"
+        allow:
+        - $gostd
+      test:
+        files:
+        - "$test"
+        allow:
+        - $gostd
+        - "github.com/stretchr/testify/require"
 
 issues:
   exclude-rules:

diff --git a/hw03_frequency_analysis/top.go b/hw03_frequency_analysis/top.go
@@ -2,11 +2,17 @@ package hw03frequencyanalysis
 
 import (
 	"regexp"
-	"slices"
 	"sort"
 	"strings"
 )
 
+var (
+	// Дополнительное задание: не учитывать регистр букв и знаки препинания по краям слова.
+	isAsteriksTask = true
+	patt           = `[a-zA-Zа-яА-Я0-9]+[\.\-\,]*[a-zA-Zа-яА-Я0-9]+|\-{2,}|[a-zA-Zа-яА-Я0-9]+`
+	pattern        = regexp.MustCompile(patt)
+)
+
 // Разбивает текст на слова, возвращает срез слов.
 func splitWords(text string, pattern *regexp.Regexp) []string {
 	if len(text) < 1 {
@@ -15,19 +21,16 @@ func splitWords(text string, pattern *regexp.Regexp) []string {
 
 	if pattern == nil {
 		return strings.Fields(text)
-	} else {
-		//res := pattern.Split(text, -1)
-		return slices.DeleteFunc(pattern.Split(text, -1), func(s string) bool {
-			return len(s) == 0
-		})
 	}
+
+	return pattern.FindAllString(text, -1)
 }
 
 // Считает частоту элементов в строковом срезе.
 // Возвращает slice структур wordWidth - слова с их весами.
-// Slice отсортирован в зависимости от веса
-// isNotCaseSens - true, не учитывать регистр
-func wordsWidthsSort(s []string, isNotCaseSens bool) []wordWidth {
+// Slice отсортирован в зависимости от веса.
+// isIgnoreCase - true, не учитывать регистр.
+func wordsWidthsSort(s []string, isIgnoreCase bool) []wordWidth {
 	if len(s) < 1 {
 		return []wordWidth{}
 	}
@@ -36,15 +39,15 @@ func wordsWidthsSort(s []string, isNotCaseSens bool) []wordWidth {
 
 	for _, w := range s {
 		word := w
-		if isNotCaseSens {
-			word = strings.ToUpper(word)
+		if isIgnoreCase {
+			word = strings.ToLower(word)
 		}
 
 		if width, ok := m[word]; ok {
 			width.Width++
 			m[word] = width
 		} else {
-			m[word] = wordWidth{Word: w, Width: 1}
+			m[word] = wordWidth{Word: word, Width: 1}
 		}
 	}
 
@@ -61,29 +64,28 @@ func wordsWidthsSort(s []string, isNotCaseSens bool) []wordWidth {
 	return ww
 }
 
-// Слово с его весом
+// Слово с его весом.
 type wordWidth struct {
 	Word  string
 	Width int
 }
 
-// func (w wordWidth) GetKey() string {
-// 	return fmt.Sprintf("%04d%s", w.Width, w.Word)
-// }
-
-var pattern *regexp.Regexp = regexp.MustCompile(`[\s,.]+`) //"(?U)\\W+".
-
 func Top10(text string) []string {
 	if len(text) < 1 {
 		return []string{}
 	}
 
-	words := splitWords(text, pattern)
+	var words []string
+	if isAsteriksTask {
+		words = splitWords(text, pattern)
+	} else {
+		words = splitWords(text, nil)
+	}
 	if len(words) < 1 {
 		return []string{}
 	}
 
-	widths := wordsWidthsSort(words, false)
+	widths := wordsWidthsSort(words, isAsteriksTask)
 
 	res := make([]string, 0, 10)
 

diff --git a/hw03_frequency_analysis/top_test.go b/hw03_frequency_analysis/top_test.go
@@ -7,7 +7,7 @@ import (
 )
 
 // Change to true if needed.
-var taskWithAsteriskIsCompleted = true
+var taskWithAsteriskIsCompleted = isAsteriksTask
 
 var text = `Как видите, он  спускается  по  лестнице  вслед  за  своим
 	другом   Кристофером   Робином,   головой   вниз,  пересчитывая
@@ -83,79 +83,113 @@ func TestTop10(t *testing.T) {
 
 func TestSplitWords(t *testing.T) {
 	tests := []struct {
-		name     string
-		input    string
-		expected []string
+		name        string
+		input       string
+		expected    []string
+		expectedReg []string
 	}{
-		{name: "empty", input: "", expected: []string{}},
-		{name: "en", input: "alfa, beta   gamma    ", expected: []string{"alfa", "beta", "gamma"}},
-		// {
-		// 	name: "ru",
-		// 	input: `Предложения  	складываются в абзацы -
-		// 	и вы наслаждетесь очередным бредошедевром.`,
-		// 	expected: []string{
-		// 		"Предложения",
-		// 		"складываются",
-		// 		"в",
-		// 		"абзацы",
-		// 		"-",
-		// 		"и",
-		// 		"вы",
-		// 		"наслаждетесь",
-		// 		"очередным",
-		// 		"бредошедевром.",
-		// 	},
-		// },
+		{name: "empty", input: "", expected: []string{}, expectedReg: []string{}},
+		{
+			name:  "en.dog_cat",
+			input: "dog,    cat; dog,,,cat	dog...cat ,dog - cat",
+			expected: []string{
+				"dog,",
+				"cat;",
+				"dog,,,cat",
+				"dog...cat",
+				",dog",
+				"-",
+				"cat",
+			},
+			expectedReg: []string{
+				"dog",
+				"cat",
+				"dog,,,cat",
+				"dog...cat",
+				"dog",
+				"cat",
+			},
+		},
+		{
+			name: "ru.text",
+			input: `Предложения  	складываются в абзацы -
+			и вы...мы наслаждаетесь	каким-то	очередным ------ бредошедевром?`,
+			expected: []string{
+				"Предложения",
+				"складываются",
+				"в",
+				"абзацы",
+				"-", "и",
+				"вы...мы",
+				"наслаждаетесь",
+				"каким-то",
+				"очередным",
+				"------",
+				"бредошедевром?",
+			},
+			expectedReg: []string{
+				"Предложения",
+				"складываются",
+				"в",
+				"абзацы",
+				"и",
+				"вы...мы",
+				"наслаждаетесь",
+				"каким-то",
+				"очередным",
+				"------",
+				"бредошедевром",
+			},
+		},
 	}
 
 	for _, tc := range tests {
 		tc := tc
 		t.Run(tc.name, func(t *testing.T) {
-			result := splitWords(tc.input, pattern)
-			require.Equal(t, tc.expected, result)
+			var result []string
+			if taskWithAsteriskIsCompleted {
+				result = splitWords(tc.input, pattern)
+				require.Equal(t, tc.expectedReg, result)
+			} else {
+				result = splitWords(tc.input, nil)
+				require.Equal(t, tc.expected, result)
+			}
 		})
 	}
 }
 
 func TestWordsWidthsSort(t *testing.T) {
 	tests := []struct {
-		name          string
-		isNotCaseSens bool
-		input         []string
-		expected      []wordWidth
+		name               string
+		input              []string
+		expected           []wordWidth
+		expectedIgnoreCase []wordWidth
 	}{
 		{
-			name:          "empty",
-			isNotCaseSens: false,
-			input:         []string{},
-			expected:      []wordWidth{},
-		},
-		{
-			name:          "en. two_two_one",
-			isNotCaseSens: true,
-			input:         []string{"alfa", "beta", "gamma", "Beta", "Alfa"},
-			expected: []wordWidth{
-				{Word: "alfa", Width: 2},
-				{Word: "beta", Width: 2},
-				{Word: "gamma", Width: 1},
-			},
+			name:               "empty",
+			input:              []string{},
+			expected:           []wordWidth{},
+			expectedIgnoreCase: []wordWidth{},
 		},
 		{
-			name:          "en. all_one",
-			isNotCaseSens: false,
-			input:         []string{"alfa", "beta", "gamma", "Beta", "Alfa"},
+			name:  "en",
+			input: []string{"alfa", "beta", "gamma", "Beta", "Alfa"},
 			expected: []wordWidth{
 				{Word: "Alfa", Width: 1},
 				{Word: "Beta", Width: 1},
 				{Word: "alfa", Width: 1},
 				{Word: "beta", Width: 1},
 				{Word: "gamma", Width: 1},
 			},
+			expectedIgnoreCase: []wordWidth{
+				{Word: "alfa", Width: 2},
+				{Word: "beta", Width: 2},
+				{Word: "gamma", Width: 1},
+			},
 		},
 		{
-			name:          "ru: two_all_one",
-			isNotCaseSens: false,
-			input:         []string{"Мама", "мыла", "раму,", "раму", "мыла", "мама", "Мыла", "Раму", "мамА"},
+			name:  "ru",
+			input: []string{"Мама", "мыла", "раму,", "раму", "мыла", "мама", "Мыла", "Раму", "мамА"},
 			expected: []wordWidth{
 				{"мыла", 2},
 				{"Мама", 1},
@@ -166,25 +200,35 @@ func TestWordsWidthsSort(t *testing.T) {
 				{"раму", 1},
 				{"раму,", 1},
 			},
-		},
-		{
-			name:          "ru: three_three_two_one",
-			isNotCaseSens: true,
-			input:         []string{"Мама", "мыла", "раму,", "раму", "мыла", "мама", "Мыла", "Раму", "мамА"},
-			expected: []wordWidth{
-				{"Мама", 3},
+			expectedIgnoreCase: []wordWidth{
+				{"мама", 3},
 				{"мыла", 3},
 				{"раму", 2},
 				{"раму,", 1},
 			},
 		},
+		// {
+		// 	name:          "ru: three_three_two_one",
+		// 	isNotCaseSens: true,
+		// 	input:         []string{"Мама", "мыла", "раму,", "раму", "мыла", "мама", "Мыла", "Раму", "мамА"},
+		// 	expected: []wordWidth{
+		// 		{"Мама", 3},
+		// 		{"мыла", 3},
+		// 		{"раму", 2},
+		// 		{"раму,", 1},
+		// 	},
+		// },
 	}
 
 	for _, tc := range tests {
 		tc := tc
 		t.Run(tc.name, func(t *testing.T) {
-			result := wordsWidthsSort(tc.input, tc.isNotCaseSens)
-			require.Equal(t, tc.expected, result)
+			result := wordsWidthsSort(tc.input, taskWithAsteriskIsCompleted)
+			if taskWithAsteriskIsCompleted {
+				require.Equal(t, tc.expectedIgnoreCase, result)
+			} else {
+				require.Equal(t, tc.expected, result)
+			}
 		})
 	}
 }