Skip to content

Commit

Permalink
первая попытка написать через регулярку
Browse files Browse the repository at this point in the history
  • Loading branch information
DimVlas committed Apr 29, 2024
1 parent 71830a6 commit 5b4595d
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 37 deletions.
25 changes: 18 additions & 7 deletions hw03_frequency_analysis/top.go
Original file line number Diff line number Diff line change
@@ -1,23 +1,32 @@
package hw03frequencyanalysis

import (
"fmt"
"regexp"
"slices"
"sort"
"strings"
)

// Разбивает текст на слова, возвращает срез слов.
func splitWords(text string) []string {
func splitWords(text string, pattern *regexp.Regexp) []string {
if len(text) < 1 {
return []string{}
}

return strings.Fields(text)
if pattern == nil {
return strings.Fields(text)
} else {
//res := pattern.Split(text, -1)
return slices.DeleteFunc(pattern.Split(text, -1), func(s string) bool {
return len(s) == 0
})
}
}

// Считает частоту элементов в строковом срезе.
// Возвращает slice структур wordWidth - слова с их весами.
// Slice отсортирован в зависимости от веса
// isNotCaseSens - true, не учитывать регистр
func wordsWidthsSort(s []string, isNotCaseSens bool) []wordWidth {
if len(s) < 1 {
return []wordWidth{}
Expand Down Expand Up @@ -58,16 +67,18 @@ type wordWidth struct {
Width int
}

func (w wordWidth) GetKey() string {
return fmt.Sprintf("%04d%s", w.Width, w.Word)
}
// func (w wordWidth) GetKey() string {
// return fmt.Sprintf("%04d%s", w.Width, w.Word)
// }

var pattern *regexp.Regexp = regexp.MustCompile(`[\s,.]+`) //"(?U)\\W+".

func Top10(text string) []string {
if len(text) < 1 {
return []string{}
}

words := splitWords(text)
words := splitWords(text, pattern)
if len(words) < 1 {
return []string{}
}
Expand Down
90 changes: 60 additions & 30 deletions hw03_frequency_analysis/top_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,72 +88,102 @@ func TestSplitWords(t *testing.T) {
expected []string
}{
{name: "empty", input: "", expected: []string{}},
{name: "en", input: "alfa beta gamma ", expected: []string{"alfa", "beta", "gamma"}},
{
name: "ru",
input: `Предложения складываются в абзацы -
и вы наслаждетесь очередным бредошедевром.`,
expected: []string{
"Предложения",
"складываются",
"в",
"абзацы",
"-",
"и",
"вы",
"наслаждетесь",
"очередным",
"бредошедевром.",
},
},
{name: "en", input: "alfa, beta gamma ", expected: []string{"alfa", "beta", "gamma"}},
// {
// name: "ru",
// input: `Предложения складываются в абзацы -
// и вы наслаждетесь очередным бредошедевром.`,
// expected: []string{
// "Предложения",
// "складываются",
// "в",
// "абзацы",
// "-",
// "и",
// "вы",
// "наслаждетесь",
// "очередным",
// "бредошедевром.",
// },
// },
}

for _, tc := range tests {
tc := tc
t.Run(tc.name, func(t *testing.T) {
result := splitWords(tc.input)
result := splitWords(tc.input, pattern)
require.Equal(t, tc.expected, result)
})
}
}

func TestWordsWidthsSort(t *testing.T) {
tests := []struct {
name string
input []string
expected []wordWidth
name string
isNotCaseSens bool
input []string
expected []wordWidth
}{
{
name: "empty",
input: []string{},
expected: []wordWidth{},
name: "empty",
isNotCaseSens: false,
input: []string{},
expected: []wordWidth{},
},
{
name: "en. two_two_one",
isNotCaseSens: true,
input: []string{"alfa", "beta", "gamma", "Beta", "Alfa"},
expected: []wordWidth{
{Word: "alfa", Width: 2},
{Word: "beta", Width: 2},
{Word: "gamma", Width: 1},
},
},
{
name: "en. all_one",
input: []string{"alfa", "beta", "gamma"},
name: "en. all_one",
isNotCaseSens: false,
input: []string{"alfa", "beta", "gamma", "Beta", "Alfa"},
expected: []wordWidth{
{Word: "Alfa", Width: 1},
{Word: "Beta", Width: 1},
{Word: "alfa", Width: 1},
{Word: "beta", Width: 1},
{Word: "gamma", Width: 1},
},
},
{
name: "second_two",
input: []string{"Мама", "мыла", "раму,", "раму", "мыла", "мама"},
name: "ru: two_all_one",
isNotCaseSens: false,
input: []string{"Мама", "мыла", "раму,", "раму", "мыла", "мама", "Мыла", "Раму", "мамА"},
expected: []wordWidth{
{"мыла", 2},
{"Мама", 1},
{"Мыла", 1},
{"Раму", 1},
{"мамА", 1},
{"мама", 1},
{"раму", 1},
{"раму,", 1},
},
},
{
name: "ru: three_three_two_one",
isNotCaseSens: true,
input: []string{"Мама", "мыла", "раму,", "раму", "мыла", "мама", "Мыла", "Раму", "мамА"},
expected: []wordWidth{
{"Мама", 3},
{"мыла", 3},
{"раму", 2},
{"раму,", 1},
},
},
}

for _, tc := range tests {
tc := tc
t.Run(tc.name, func(t *testing.T) {
result := wordsWidthsSort(tc.input, !taskWithAsteriskIsCompleted)
result := wordsWidthsSort(tc.input, tc.isNotCaseSens)
require.Equal(t, tc.expected, result)
})
}
Expand Down

0 comments on commit 5b4595d

Please sign in to comment.