Skip to content

Commit

Permalink
HW03 is complited
Browse files Browse the repository at this point in the history
  • Loading branch information
DimVlas committed May 3, 2024
1 parent 5b4595d commit 35ec0c6
Show file tree
Hide file tree
Showing 3 changed files with 140 additions and 79 deletions.
15 changes: 15 additions & 0 deletions .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,21 @@ linters-settings:
funlen:
lines: 150
statements: 80
depguard:
rules:
main:
list-mode: lax
files:
- $all
- "!$test"
allow:
- $gostd
test:
files:
- "$test"
allow:
- $gostd
- "github.com/stretchr/testify/require"

issues:
exclude-rules:
Expand Down
44 changes: 23 additions & 21 deletions hw03_frequency_analysis/top.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,17 @@ package hw03frequencyanalysis

import (
"regexp"
"slices"
"sort"
"strings"
)

var (
// Дополнительное задание: не учитывать регистр букв и знаки препинания по краям слова.
isAsteriksTask = true
patt = `[a-zA-Zа-яА-Я0-9]+[\.\-\,]*[a-zA-Zа-яА-Я0-9]+|\-{2,}|[a-zA-Zа-яА-Я0-9]+`
pattern = regexp.MustCompile(patt)
)

// Разбивает текст на слова, возвращает срез слов.
func splitWords(text string, pattern *regexp.Regexp) []string {
if len(text) < 1 {
Expand All @@ -15,19 +21,16 @@ func splitWords(text string, pattern *regexp.Regexp) []string {

if pattern == nil {
return strings.Fields(text)
} else {
//res := pattern.Split(text, -1)
return slices.DeleteFunc(pattern.Split(text, -1), func(s string) bool {
return len(s) == 0
})
}

return pattern.FindAllString(text, -1)
}

// Считает частоту элементов в строковом срезе.
// Возвращает slice структур wordWidth - слова с их весами.
// Slice отсортирован в зависимости от веса
// isNotCaseSens - true, не учитывать регистр
func wordsWidthsSort(s []string, isNotCaseSens bool) []wordWidth {
// Slice отсортирован в зависимости от веса.
// isIgnoreCase - true, не учитывать регистр.
func wordsWidthsSort(s []string, isIgnoreCase bool) []wordWidth {
if len(s) < 1 {
return []wordWidth{}
}
Expand All @@ -36,15 +39,15 @@ func wordsWidthsSort(s []string, isNotCaseSens bool) []wordWidth {

for _, w := range s {
word := w
if isNotCaseSens {
word = strings.ToUpper(word)
if isIgnoreCase {
word = strings.ToLower(word)
}

if width, ok := m[word]; ok {
width.Width++
m[word] = width
} else {
m[word] = wordWidth{Word: w, Width: 1}
m[word] = wordWidth{Word: word, Width: 1}
}
}

Expand All @@ -61,29 +64,28 @@ func wordsWidthsSort(s []string, isNotCaseSens bool) []wordWidth {
return ww
}

// Слово с его весом
// Слово с его весом.
type wordWidth struct {
Word string
Width int
}

// func (w wordWidth) GetKey() string {
// return fmt.Sprintf("%04d%s", w.Width, w.Word)
// }

var pattern *regexp.Regexp = regexp.MustCompile(`[\s,.]+`) //"(?U)\\W+".

func Top10(text string) []string {
if len(text) < 1 {
return []string{}
}

words := splitWords(text, pattern)
var words []string
if isAsteriksTask {
words = splitWords(text, pattern)
} else {
words = splitWords(text, nil)
}
if len(words) < 1 {
return []string{}
}

widths := wordsWidthsSort(words, false)
widths := wordsWidthsSort(words, isAsteriksTask)

res := make([]string, 0, 10)

Expand Down
160 changes: 102 additions & 58 deletions hw03_frequency_analysis/top_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (
)

// Change to true if needed.
var taskWithAsteriskIsCompleted = true
var taskWithAsteriskIsCompleted = isAsteriksTask

var text = `Как видите, он спускается по лестнице вслед за своим
другом Кристофером Робином, головой вниз, пересчитывая
Expand Down Expand Up @@ -83,79 +83,113 @@ func TestTop10(t *testing.T) {

func TestSplitWords(t *testing.T) {
tests := []struct {
name string
input string
expected []string
name string
input string
expected []string
expectedReg []string
}{
{name: "empty", input: "", expected: []string{}},
{name: "en", input: "alfa, beta gamma ", expected: []string{"alfa", "beta", "gamma"}},
// {
// name: "ru",
// input: `Предложения складываются в абзацы -
// и вы наслаждетесь очередным бредошедевром.`,
// expected: []string{
// "Предложения",
// "складываются",
// "в",
// "абзацы",
// "-",
// "и",
// "вы",
// "наслаждетесь",
// "очередным",
// "бредошедевром.",
// },
// },
{name: "empty", input: "", expected: []string{}, expectedReg: []string{}},
{
name: "en.dog_cat",
input: "dog, cat; dog,,,cat dog...cat ,dog - cat",
expected: []string{
"dog,",
"cat;",
"dog,,,cat",
"dog...cat",
",dog",
"-",
"cat",
},
expectedReg: []string{
"dog",
"cat",
"dog,,,cat",
"dog...cat",
"dog",
"cat",
},
},
{
name: "ru.text",
input: `Предложения складываются в абзацы -
и вы...мы наслаждаетесь каким-то очередным ------ бредошедевром?`,
expected: []string{
"Предложения",
"складываются",
"в",
"абзацы",
"-", "и",
"вы...мы",
"наслаждаетесь",
"каким-то",
"очередным",
"------",
"бредошедевром?",
},
expectedReg: []string{
"Предложения",
"складываются",
"в",
"абзацы",
"и",
"вы...мы",
"наслаждаетесь",
"каким-то",
"очередным",
"------",
"бредошедевром",
},
},
}

for _, tc := range tests {
tc := tc
t.Run(tc.name, func(t *testing.T) {
result := splitWords(tc.input, pattern)
require.Equal(t, tc.expected, result)
var result []string
if taskWithAsteriskIsCompleted {
result = splitWords(tc.input, pattern)
require.Equal(t, tc.expectedReg, result)
} else {
result = splitWords(tc.input, nil)
require.Equal(t, tc.expected, result)
}
})
}
}

func TestWordsWidthsSort(t *testing.T) {
tests := []struct {
name string
isNotCaseSens bool
input []string
expected []wordWidth
name string
input []string
expected []wordWidth
expectedIgnoreCase []wordWidth
}{
{
name: "empty",
isNotCaseSens: false,
input: []string{},
expected: []wordWidth{},
},
{
name: "en. two_two_one",
isNotCaseSens: true,
input: []string{"alfa", "beta", "gamma", "Beta", "Alfa"},
expected: []wordWidth{
{Word: "alfa", Width: 2},
{Word: "beta", Width: 2},
{Word: "gamma", Width: 1},
},
name: "empty",
input: []string{},
expected: []wordWidth{},
expectedIgnoreCase: []wordWidth{},
},
{
name: "en. all_one",
isNotCaseSens: false,
input: []string{"alfa", "beta", "gamma", "Beta", "Alfa"},
name: "en",
input: []string{"alfa", "beta", "gamma", "Beta", "Alfa"},
expected: []wordWidth{
{Word: "Alfa", Width: 1},
{Word: "Beta", Width: 1},
{Word: "alfa", Width: 1},
{Word: "beta", Width: 1},
{Word: "gamma", Width: 1},
},
expectedIgnoreCase: []wordWidth{
{Word: "alfa", Width: 2},
{Word: "beta", Width: 2},
{Word: "gamma", Width: 1},
},
},
{
name: "ru: two_all_one",
isNotCaseSens: false,
input: []string{"Мама", "мыла", "раму,", "раму", "мыла", "мама", "Мыла", "Раму", "мамА"},
name: "ru",
input: []string{"Мама", "мыла", "раму,", "раму", "мыла", "мама", "Мыла", "Раму", "мамА"},
expected: []wordWidth{
{"мыла", 2},
{"Мама", 1},
Expand All @@ -166,25 +200,35 @@ func TestWordsWidthsSort(t *testing.T) {
{"раму", 1},
{"раму,", 1},
},
},
{
name: "ru: three_three_two_one",
isNotCaseSens: true,
input: []string{"Мама", "мыла", "раму,", "раму", "мыла", "мама", "Мыла", "Раму", "мамА"},
expected: []wordWidth{
{"Мама", 3},
expectedIgnoreCase: []wordWidth{
{"мама", 3},
{"мыла", 3},
{"раму", 2},
{"раму,", 1},
},
},
// {
// name: "ru: three_three_two_one",
// isNotCaseSens: true,
// input: []string{"Мама", "мыла", "раму,", "раму", "мыла", "мама", "Мыла", "Раму", "мамА"},
// expected: []wordWidth{
// {"Мама", 3},
// {"мыла", 3},
// {"раму", 2},
// {"раму,", 1},
// },
// },
}

for _, tc := range tests {
tc := tc
t.Run(tc.name, func(t *testing.T) {
result := wordsWidthsSort(tc.input, tc.isNotCaseSens)
require.Equal(t, tc.expected, result)
result := wordsWidthsSort(tc.input, taskWithAsteriskIsCompleted)
if taskWithAsteriskIsCompleted {
require.Equal(t, tc.expectedIgnoreCase, result)
} else {
require.Equal(t, tc.expected, result)
}
})
}
}

0 comments on commit 35ec0c6

Please sign in to comment.