-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathspellcheck.go
215 lines (173 loc) · 4.87 KB
/
spellcheck.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
package spellcheck
import (
"bufio"
"io"
"net/http"
"sync"
)
const (
// DEFAULT_ALPHABET is the default alphabet used for generating variations
DEFAULT_ALPHABET = "abcdefghijklmnopqrstuvwxyz"
// DEFAULT_REMOTE_WORDS_URL is the default url containing the words
DEFAULT_REMOTE_WORDS_URL = "https://raw.githubusercontent.com/makifdb/spellcheck/main/words.txt"
// DEFAULT_DEPTH is the default depth used for generating variations
DEFAULT_DEPTH = 1
)
type LetterNode struct {
// children is a map of the children nodes
children map[rune]*LetterNode
// isWord is true if the node is the end of a word
isWord bool
}
type Trie struct {
// root is the root node of the trie
root *LetterNode
// depth is the depth used for generating variations
depth int
// mutex is used for locking the trie
sync.RWMutex
}
// Insert adds a word to the trie
func (t *Trie) Insert(word string) {
t.Lock()
defer t.Unlock()
// create the root node if it doesn't exist
if t.root == nil {
t.root = &LetterNode{make(map[rune]*LetterNode), false}
}
// check if the word is already in the trie
node := t.root
for _, char := range word {
if _, ok := node.children[char]; !ok {
node.children[char] = &LetterNode{make(map[rune]*LetterNode), false}
}
node = node.children[char]
}
// check if the word is already in the trie
if node.isWord {
return
}
// mark the end of the word
node.isWord = true
}
// InsertReader adds words from a reader to the trie
func (t *Trie) InsertReader(r io.Reader) *bufio.Scanner {
// create a scanner to read the file line by line
scanner := bufio.NewScanner(r)
// read the file line by line
for scanner.Scan() {
word := scanner.Text()
t.Insert(word)
}
// check for errors
if err := scanner.Err(); err != nil {
return nil
}
return scanner
}
// Search checks if a word is in the trie and returns with a list of suggestions
func (t *Trie) Search(word string) (bool, []string) {
t.RLock()
defer t.RUnlock()
node := t.root
// check the tire for the full word
for _, char := range word {
if _, ok := node.children[char]; !ok {
// word not found, generate suggestions
// generate variations
suggestions := generateVariations(word, t.depth)
var validSuggestions []string
// check if the suggestions are in the trie
for _, suggestion := range suggestions {
if found := t.SearchDirect(suggestion); found {
// add the suggestion to the list of valid suggestions
validSuggestions = append(validSuggestions, suggestion)
}
}
// return false and the list of suggestions
return false, validSuggestions
}
// move to the next node
node = node.children[char]
}
// word found
return node.isWord, nil
}
// SearchDirect checks if a word is in the trie
func (t *Trie) SearchDirect(word string) bool {
t.RLock()
defer t.RUnlock()
// check the tire for the full word
node := t.root
for _, char := range word {
if _, ok := node.children[char]; !ok {
return false
}
node = node.children[char]
}
return node.isWord
}
// generateVariations generates variations of a word with a given depth
func generateVariations(word string, depth int) []string {
// check if the depth is 0 or the word is empty
if depth == 0 || len(word) == 0 {
return []string{}
}
var result []string
// Deletes
// word -> ord, wod, wrd, wor
for i := 0; i < len(word); i++ {
result = append(result, word[:i]+word[i+1:])
}
// Transposes
// word -> owrd, wrod, wodr, word
for i := 0; i < len(word)-1; i++ {
result = append(result, word[:i]+string(word[i+1])+string(word[i])+word[i+2:])
}
// Replaces
// word -> aord, bord, cord, ... zord
for i := 0; i < len(word); i++ {
for _, c := range DEFAULT_ALPHABET {
result = append(result, word[:i]+string(c)+word[i+1:])
}
}
// Inserts
// word -> aword, bword, cword, ... zword
for i := 0; i < len(word)+1; i++ {
for _, c := range DEFAULT_ALPHABET {
result = append(result, word[:i]+string(c)+word[i:])
}
}
// Recursive call
// generate variations for each variation
for _, variation := range result {
result = append(result, generateVariations(variation, depth-1)...)
}
return clearSuggestions(result)
}
// clearSuggestions removes duplicates from a list of suggestions
func clearSuggestions(suggestions []string) []string {
encountered := map[string]bool{}
result := []string{}
for _, suggestion := range suggestions {
if encountered[suggestion] {
continue
}
encountered[suggestion] = true
result = append(result, suggestion)
}
return result
}
// New creates a new trie and populates it with the words
func New() (*Trie, error) {
// create a new trie
t := &Trie{&LetterNode{make(map[rune]*LetterNode), false}, DEFAULT_DEPTH, sync.RWMutex{}}
resp, err := http.Get(DEFAULT_REMOTE_WORDS_URL)
if err != nil {
return nil, err
}
defer resp.Body.Close()
// create a scanner to read the file line by line and insert to trie
t.InsertReader(resp.Body)
return t, nil
}