Skip to content

Commit

Permalink
Repaired issue #634
Browse files Browse the repository at this point in the history
  • Loading branch information
Hugo-ter-Doest committed Jul 2, 2024
1 parent d2dcb3c commit da68353
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 1 deletion.
2 changes: 1 addition & 1 deletion lib/natural/tfidf/tfidf.js
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ class TfIdf {
term,
tf: TfIdf.tf(term, _this.documents[d]),
idf: _this.idf(term),
tfidf: _this.tfidf(term, d)
tfidf: _this.tfidf([term], d)
})
}
}
Expand Down
11 changes: 11 additions & 0 deletions spec/tfidf_spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,17 @@ describe('tfidf', function () {
})
})

// Issue #634 - prevent tfidf to apply a tokenizer to terms that are already tokenized
describe('tfidf with tokenized terms', function () {
tfidf = new TfIdf()
tfidf.addDocument(['domain', 'google.com'])
const terms: TfIdfTerm[] = tfidf.listTerms(0)
it ('should list important terms correctly without tokenizing again', function () {
expect(terms[0].tfidf).toBeGreaterThan(0)
expect(terms[1].tfidf).toBeGreaterThan(0)
})
})

describe('special cases', function () {
// In response to
it('should handle reserved function names correctly in documents', function () {
Expand Down

0 comments on commit da68353

Please sign in to comment.