-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcorpus.go
518 lines (516 loc) · 24.3 KB
/
corpus.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
package toyspellingcorrector
type corpus struct {
tests1, tests2 map[string][]string
}
func developmentCorpus() *corpus {
return &corpus{
tests1: map[string][]string{
"access": []string{"acess"},
"accessing": []string{"accesing"},
"accommodation": []string{"accomodation", "acommodation", "acomodation"},
"account": []string{"acount"},
"address": []string{"adress", "adres"},
"addressable": []string{"addresable"},
"arranged": []string{"aranged", "arrainged"},
"arrangeing": []string{"aranging"},
"arrangement": []string{"arragment"},
"articles": []string{"articals"},
"aunt": []string{"annt", "anut", "arnt"},
"auxiliary": []string{"auxillary"},
"available": []string{"avaible"},
"awful": []string{"awfall", "afful"},
"basically": []string{"basicaly"},
"beginning": []string{"begining"},
"benefit": []string{"benifit"},
"benefits": []string{"benifits"},
"between": []string{"beetween"},
"bicycle": []string{"bicycal", "bycicle", "bycycle"},
"biscuits": []string{"biscits", "biscutes", "biscuts", "bisquits", "buiscits", "buiscuts"},
"built": []string{"biult"},
"cake": []string{"cak"},
"career": []string{"carrer"},
"cemetery": []string{"cemetary", "semetary"},
"centrally": []string{"centraly"},
"certain": []string{"cirtain"},
"challenges": []string{"chalenges", "chalenges"},
"chapter": []string{"chaper", "chaphter", "chaptur"},
"choice": []string{"choise"},
"choosing": []string{"chosing"},
"clerical": []string{"clearical"},
"committee": []string{"comittee"},
"compare": []string{"compair"},
"completely": []string{"completly"},
"consider": []string{"concider"},
"considerable": []string{"conciderable"},
"contented": []string{"contenpted", "contende", "contended", "contentid"},
"curtains": []string{"cartains", "certans", "courtens", "cuaritains", "curtans", "curtians", "curtions"},
"decide": []string{"descide"},
"decided": []string{"descided"},
"definitely": []string{"definately", "difinately"},
"definition": []string{"defenition"},
"definitions": []string{"defenitions"},
"description": []string{"discription"},
"desiccate": []string{"desicate", "dessicate", "dessiccate"},
"diagrammatically": []string{"diagrammaticaally"},
"different": []string{"diffrent"},
"driven": []string{"dirven"},
"ecstasy": []string{"exstacy", "ecstacy"},
"embarrass": []string{"embaras", "embarass"},
"establishing": []string{"astablishing", "establising"},
"experience": []string{"experance", "experiance"},
"experiences": []string{"experances"},
"extended": []string{"extented"},
"extremely": []string{"extreamly"},
"fails": []string{"failes"},
"families": []string{"familes"},
"february": []string{"febuary"},
"further": []string{"futher"},
"gallery": []string{"galery", "gallary", "gallerry", "gallrey"},
"hierarchal": []string{"hierachial"},
"hierarchy": []string{"hierchy"},
"inconvenient": []string{"inconvienient", "inconvient", "inconvinient"},
"independent": []string{"independant", "independant"},
"initial": []string{"intial"},
"initials": []string{"inetials", "inistals", "initails", "initals", "intials"},
"juice": []string{"guic", "juce", "jucie", "juise", "juse"},
"latest": []string{"lates", "latets", "latiest", "latist"},
"laugh": []string{"lagh", "lauf", "laught", "lugh"},
"level": []string{"leval"},
"levels": []string{"levals"},
"liaison": []string{"liaision", "liason"},
"lieu": []string{"liew"},
"literature": []string{"litriture"},
"loans": []string{"lones"},
"locally": []string{"localy"},
"magnificent": []string{"magnificnet", "magificent", "magnifcent", "magnifecent", "magnifiscant", "magnifisent", "magnificant"},
"management": []string{"managment"},
"meant": []string{"ment"},
"minuscule": []string{"miniscule"},
"minutes": []string{"muinets"},
"monitoring": []string{"monitering"},
"necessary": []string{"neccesary", "necesary", "neccesary", "necassary", "necassery", "neccasary"},
"occurrence": []string{"occurence", "occurence"},
"often": []string{"ofen", "offen", "offten", "ofton"},
"opposite": []string{"opisite", "oppasite", "oppesite", "oppisit", "oppisite", "opposit", "oppossite", "oppossitte"},
"parallel": []string{"paralel", "paralell", "parrallel", "parralell", "parrallell"},
"particular": []string{"particulaur"},
"perhaps": []string{"perhapse"},
"personnel": []string{"personnell"},
"planned": []string{"planed"},
"poem": []string{"poame"},
"poems": []string{"poims", "pomes"},
"poetry": []string{"poartry", "poertry", "poetre", "poety", "powetry"},
"position": []string{"possition"},
"possible": []string{"possable"},
"pretend": []string{"pertend", "protend", "prtend", "pritend"},
"problem": []string{"problam", "proble", "promblem", "proplen"},
"pronunciation": []string{"pronounciation"},
"purple": []string{"perple", "perpul", "poarple"},
"questionnaire": []string{"questionaire"},
"really": []string{"realy", "relley", "relly"},
"receipt": []string{"receit", "receite", "reciet", "recipt"},
"receive": []string{"recieve"},
"refreshment": []string{"reafreshment", "refreshmant", "refresment", "refressmunt"},
"remember": []string{"rember", "remeber", "rememmer", "rermember"},
"remind": []string{"remine", "remined"},
"scarcely": []string{"scarcly", "scarecly", "scarely", "scarsely"},
"scissors": []string{"scisors", "sissors"},
"separate": []string{"seperate"},
"singular": []string{"singulaur"},
"someone": []string{"somone"},
"sources": []string{"sorces"},
"southern": []string{"southen"},
"special": []string{"speaical", "specail", "specal", "speical"},
"splendid": []string{"spledid", "splended", "splened", "splended"},
"standardizing": []string{"stanerdizing"},
"stomach": []string{"stomac", "stomache", "stomec", "stumache"},
"supersede": []string{"supercede", "superceed"},
"there": []string{"ther"},
"totally": []string{"totaly"},
"transferred": []string{"transfred"},
"transportability": []string{"transportibility"},
"triangular": []string{"triangulaur"},
"understand": []string{"undersand", "undistand"},
"unexpected": []string{"unexpcted", "unexpeted", "unexspected"},
"unfortunately": []string{"unfortunatly"},
"unique": []string{"uneque"},
"useful": []string{"usefull"},
"valuable": []string{"valubale", "valuble"},
"variable": []string{"varable"},
"variant": []string{"vairiant"},
"various": []string{"vairious"},
"visited": []string{"fisited", "viseted", "vistid", "vistied"},
"visitors": []string{"vistors"},
"voluntary": []string{"volantry"},
"voting": []string{"voteing"},
"wanted": []string{"wantid", "wonted"},
"whether": []string{"wether"},
"wrote": []string{"rote", "wote"},
},
tests2: map[string][]string{
"forbidden": []string{"forbiden"},
"decisions": []string{"deciscions", "descisions"},
"supposedly": []string{"supposidly"},
"embellishing": []string{"embelishing"},
"technique": []string{"tecnique"},
"permanently": []string{"perminantly"},
"confirmation": []string{"confermation"},
"appointment": []string{"appoitment"},
"progression": []string{"progresion"},
"accompanying": []string{"acompaning"},
"applicable": []string{"aplicable"},
"regained": []string{"regined"},
"guidelines": []string{"guidlines"},
"surrounding": []string{"serounding"},
"titles": []string{"tittles"},
"unavailable": []string{"unavailble"},
"advantageous": []string{"advantageos"},
"brief": []string{"brif"},
"appeal": []string{"apeal"},
"consisting": []string{"consisiting"},
"clerk": []string{"cleark", "clerck"},
"component": []string{"componant"},
"favourable": []string{"faverable"},
"separation": []string{"seperation"},
"search": []string{"serch"},
"receive": []string{"recieve"},
"employees": []string{"emploies"},
"prior": []string{"piror"},
"resulting": []string{"reulting"},
"suggestion": []string{"sugestion"},
"opinion": []string{"oppinion"},
"cancellation": []string{"cancelation"},
"criticism": []string{"citisum"},
"useful": []string{"usful"},
"humour": []string{"humor"},
"anomalies": []string{"anomolies"},
"would": []string{"whould"},
"doubt": []string{"doupt"},
"examination": []string{"eximination"},
"therefore": []string{"therefoe"},
"recommend": []string{"recomend"},
"separated": []string{"seperated"},
"successful": []string{"sucssuful", "succesful"},
"apparent": []string{"apparant"},
"occurred": []string{"occureed"},
"particular": []string{"paerticulaur"},
"pivoting": []string{"pivting"},
"announcing": []string{"anouncing"},
"challenge": []string{"chalange"},
"arrangements": []string{"araingements"},
"proportions": []string{"proprtions"},
"organized": []string{"oranised"},
"accept": []string{"acept"},
"dependence": []string{"dependance"},
"unequalled": []string{"unequaled"},
"numbers": []string{"numbuers"},
"sense": []string{"sence"},
"conversely": []string{"conversly"},
"provide": []string{"provid"},
"arrangement": []string{"arrangment"},
"responsibilities": []string{"responsiblities"},
"fourth": []string{"forth"},
"ordinary": []string{"ordenary"},
"description": []string{"desription", "descvription", "desacription"},
"inconceivable": []string{"inconcievable"},
"data": []string{"dsata"},
"register": []string{"rgister"},
"supervision": []string{"supervison"},
"encompassing": []string{"encompasing"},
"negligible": []string{"negligable"},
"allow": []string{"alow"},
"operations": []string{"operatins"},
"executed": []string{"executted"},
"interpretation": []string{"interpritation"},
"hierarchy": []string{"heiarky"},
"indeed": []string{"indead"},
"years": []string{"yesars"},
"through": []string{"throut"},
"committee": []string{"committe"},
"inquiries": []string{"equiries"},
"before": []string{"befor"},
"continued": []string{"contuned"},
"permanent": []string{"perminant"},
"choose": []string{"chose"},
"virtually": []string{"vertually"},
"correspondence": []string{"correspondance"},
"eventually": []string{"eventully"},
"lonely": []string{"lonley"},
"profession": []string{"preffeson"},
"they": []string{"thay"},
"now": []string{"noe"},
"desperately": []string{"despratly"},
"university": []string{"unversity"},
"adjournment": []string{"adjurnment"},
"possibilities": []string{"possablities"},
"stopped": []string{"stoped"},
"mean": []string{"meen"},
"weighted": []string{"wagted"},
"adequately": []string{"adequattly"},
"shown": []string{"hown"},
"matrix": []string{"matriiix"},
"profit": []string{"proffit"},
"encourage": []string{"encorage"},
"collate": []string{"colate"},
"disaggregate": []string{"disaggreagte", "disaggreaget"},
"receiving": []string{"recieving", "reciving"},
"proviso": []string{"provisoe"},
"umbrella": []string{"umberalla"},
"approached": []string{"aproached"},
"pleasant": []string{"plesent"},
"difficulty": []string{"dificulty"},
"appointments": []string{"apointments"},
"base": []string{"basse"},
"conditioning": []string{"conditining"},
"earliest": []string{"earlyest"},
"beginning": []string{"begining"},
"universally": []string{"universaly"},
"unresolved": []string{"unresloved"},
"length": []string{"lengh"},
"exponentially": []string{"exponentualy"},
"utilized": []string{"utalised"},
"set": []string{"et"},
"surveys": []string{"servays"},
"families": []string{"familys"},
"system": []string{"sysem"},
"approximately": []string{"aproximatly"},
"their": []string{"ther"},
"scheme": []string{"scheem"},
"speaking": []string{"speeking"},
"repetitive": []string{"repetative"},
"inefficient": []string{"ineffiect"},
"geneva": []string{"geniva"},
"exactly": []string{"exsactly"},
"immediate": []string{"imediate"},
"appreciation": []string{"apreciation"},
"luckily": []string{"luckeley"},
"eliminated": []string{"elimiated"},
"believe": []string{"belive"},
"appreciated": []string{"apreciated"},
"readjusted": []string{"reajusted"},
"were": []string{"wer", "where"},
"feeling": []string{"fealing"},
"and": []string{"anf"},
"false": []string{"faulse"},
"seen": []string{"seeen"},
"interrogating": []string{"interogationg"},
"academically": []string{"academicly"},
"relatively": []string{"relativly", "relitivly"},
"traditionally": []string{"traditionaly"},
"studying": []string{"studing"},
"majority": []string{"majorty"},
"build": []string{"biuld"},
"aggravating": []string{"agravating"},
"transactions": []string{"trasactions"},
"arguing": []string{"aurguing"},
"sheets": []string{"sheertes"},
"successive": []string{"sucsesive", "sucessive"},
"segment": []string{"segemnt"},
"especially": []string{"especaily"},
"later": []string{"latter"},
"senior": []string{"sienior"},
"dragged": []string{"draged"},
"atmosphere": []string{"atmospher"},
"drastically": []string{"drasticaly"},
"particularly": []string{"particulary"},
"visitor": []string{"vistor"},
"session": []string{"sesion"},
"continually": []string{"contually"},
"availability": []string{"avaiblity"},
"busy": []string{"buisy"},
"parameters": []string{"perametres"},
"surroundings": []string{"suroundings", "seroundings"},
"employed": []string{"emploied"},
"adequate": []string{"adiquate"},
"handle": []string{"handel"},
"means": []string{"meens"},
"familiar": []string{"familer"},
"between": []string{"beeteen"},
"overall": []string{"overal"},
"timing": []string{"timeing"},
"committees": []string{"comittees", "commitees"},
"queries": []string{"quies"},
"econometric": []string{"economtric"},
"erroneous": []string{"errounous"},
"decides": []string{"descides"},
"reference": []string{"refereence", "refference"},
"intelligence": []string{"inteligence"},
"edition": []string{"ediion", "ediition"},
"are": []string{"arte"},
"apologies": []string{"appologies"},
"thermawear": []string{"thermawere", "thermawhere"},
"techniques": []string{"tecniques"},
"voluntary": []string{"volantary"},
"subsequent": []string{"subsequant", "subsiquent"},
"currently": []string{"curruntly"},
"forecast": []string{"forcast"},
"weapons": []string{"wepons"},
"routine": []string{"rouint"},
"neither": []string{"niether"},
"approach": []string{"aproach"},
"available": []string{"availble"},
"recently": []string{"reciently"},
"ability": []string{"ablity"},
"nature": []string{"natior"},
"commercial": []string{"comersial"},
"agencies": []string{"agences"},
"however": []string{"howeverr"},
"suggested": []string{"sugested"},
"career": []string{"carear"},
"many": []string{"mony"},
"annual": []string{"anual"},
"according": []string{"acording"},
"receives": []string{"recives", "recieves"},
"interesting": []string{"intresting"},
"expense": []string{"expence"},
"relevant": []string{"relavent", "relevaant"},
"table": []string{"tasble"},
"throughout": []string{"throuout"},
"conference": []string{"conferance"},
"sensible": []string{"sensable"},
"described": []string{"discribed", "describd"},
"union": []string{"unioun"},
"interest": []string{"intrest"},
"flexible": []string{"flexable"},
"refered": []string{"reffered"},
"controlled": []string{"controled"},
"sufficient": []string{"suficient"},
"dissension": []string{"desention"},
"adaptable": []string{"adabtable"},
"representative": []string{"representitive"},
"irrelevant": []string{"irrelavent"},
"unnecessarily": []string{"unessasarily"},
"applied": []string{"upplied"},
"apologised": []string{"appologised"},
"these": []string{"thees", "thess"},
"choices": []string{"choises"},
"will": []string{"wil"},
"procedure": []string{"proceduer"},
"shortened": []string{"shortend"},
"manually": []string{"manualy"},
"disappointing": []string{"dissapoiting"},
"excessively": []string{"exessively"},
"comments": []string{"coments"},
"containing": []string{"containg"},
"develop": []string{"develope"},
"credit": []string{"creadit"},
"government": []string{"goverment"},
"acquaintances": []string{"aquantences"},
"orientated": []string{"orentated"},
"widely": []string{"widly"},
"advise": []string{"advice"},
"difficult": []string{"dificult"},
"investigated": []string{"investegated"},
"bonus": []string{"bonas"},
"conceived": []string{"concieved"},
"nationally": []string{"nationaly"},
"compared": []string{"comppared", "compased"},
"moving": []string{"moveing"},
"necessity": []string{"nessesity"},
"opportunity": []string{"oppertunity", "oppotunity", "opperttunity"},
"thoughts": []string{"thorts"},
"equalled": []string{"equaled"},
"variety": []string{"variatry"},
"analysis": []string{"analiss", "analsis", "analisis"},
"patterns": []string{"pattarns"},
"qualities": []string{"quaties"},
"easily": []string{"easyly"},
"organization": []string{"oranisation", "oragnisation"},
"the": []string{"thw", "hte", "thi"},
"corporate": []string{"corparate"},
"composed": []string{"compossed"},
"enormously": []string{"enomosly"},
"financially": []string{"financialy"},
"functionally": []string{"functionaly"},
"discipline": []string{"disiplin"},
"announcement": []string{"anouncement"},
"progresses": []string{"progressess"},
"except": []string{"excxept"},
"recommending": []string{"recomending"},
"mathematically": []string{"mathematicaly"},
"source": []string{"sorce"},
"combine": []string{"comibine"},
"input": []string{"inut"},
"careers": []string{"currers", "carrers"},
"resolved": []string{"resoved"},
"demands": []string{"diemands"},
"unequivocally": []string{"unequivocaly"},
"suffering": []string{"suufering"},
"immediately": []string{"imidatly", "imediatly"},
"accepted": []string{"acepted"},
"projects": []string{"projeccts"},
"necessary": []string{"necasery", "nessasary", "nessisary", "neccassary"},
"journalism": []string{"journaism"},
"unnecessary": []string{"unessessay"},
"night": []string{"nite"},
"output": []string{"oputput"},
"security": []string{"seurity"},
"essential": []string{"esential"},
"beneficial": []string{"benificial", "benficial"},
"explaining": []string{"explaning"},
"supplementary": []string{"suplementary"},
"questionnaire": []string{"questionare"},
"employment": []string{"empolyment"},
"proceeding": []string{"proceding"},
"decision": []string{"descisions", "descision"},
"per": []string{"pere"},
"discretion": []string{"discresion"},
"reaching": []string{"reching"},
"analysed": []string{"analised"},
"expansion": []string{"expanion"},
"although": []string{"athough"},
"subtract": []string{"subtrcat"},
"analysing": []string{"aalysing"},
"comparison": []string{"comparrison"},
"months": []string{"monthes"},
"hierarchal": []string{"hierachial"},
"misleading": []string{"missleading"},
"commit": []string{"comit"},
"auguments": []string{"aurgument"},
"within": []string{"withing"},
"obtaining": []string{"optaning"},
"accounts": []string{"acounts"},
"primarily": []string{"pimarily"},
"operator": []string{"opertor"},
"accumulated": []string{"acumulated"},
"extremely": []string{"extreemly"},
"there": []string{"thear"},
"summarys": []string{"sumarys"},
"analyse": []string{"analiss"},
"understandable": []string{"understadable"},
"safeguard": []string{"safegaurd"},
"consist": []string{"consisit"},
"declarations": []string{"declaratrions"},
"minutes": []string{"muinutes", "muiuets"},
"associated": []string{"assosiated"},
"accessibility": []string{"accessability"},
"examine": []string{"examin"},
"surveying": []string{"servaying"},
"politics": []string{"polatics"},
"annoying": []string{"anoying"},
"again": []string{"agiin"},
"assessing": []string{"accesing"},
"ideally": []string{"idealy"},
"scrutinized": []string{"scrutiniesed"},
"simular": []string{"similar"},
"personnel": []string{"personel"},
"whereas": []string{"wheras"},
"when": []string{"whn"},
"geographically": []string{"goegraphicaly"},
"gaining": []string{"ganing"},
"requested": []string{"rquested"},
"separate": []string{"seporate"},
"students": []string{"studens"},
"prepared": []string{"prepaired"},
"generated": []string{"generataed"},
"graphically": []string{"graphicaly"},
"suited": []string{"suted"},
"variable": []string{"varible", "vaiable"},
"building": []string{"biulding"},
"required": []string{"reequired"},
"necessitates": []string{"nessisitates"},
"together": []string{"togehter"},
"profits": []string{"proffits"},
},
}
}