forked from ghmagazine/rustbook
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
86 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,98 @@ | ||
//! `bicycle_book_wordcount` はシンプルな文字、単語、行の出現頻度の計数機能を提供します。 | ||
//! 詳しくは[`count`](fn.count.html)関数のドキュメントを見て下さい。 | ||
use regex::Regex; | ||
use std::collections::HashMap; | ||
use std::io::BufRead; | ||
|
||
pub fn count(input: impl BufRead) -> HashMap<String, usize> { | ||
/// `input` から1行ずつUTF-8文字列を読み込み、頻度を数える。 | ||
/// | ||
/// 頻度を数える対象はオプションによって制御される。 | ||
/// * [`CountOption::Char`](enum.CountOption.html#variant.Char): Unicodeの1文字毎に頻度を数える | ||
/// * [`CountOption::Word`](enum.CountOption.html#variant.Word): 正規表現 `\w+` にマッチする単語毎に頻度を数える | ||
/// * [`CountOption::Line`](enum.CountOption.html#variant.Line): `\n`または`\r\n` で区切られた1行毎に頻度を数える | ||
/// | ||
/// # Examples | ||
/// 入力中の単語の出現頻度を数える例 | ||
/// | ||
/// ``` | ||
/// use std::io::Cursor; | ||
/// use wcount::{count, CountOption}; | ||
/// | ||
/// let mut input = Cursor::new("aa bb cc bb"); | ||
/// let freq = count(input, CountOption::Word); | ||
/// | ||
/// assert_eq!(freq["aa"], 1); | ||
/// assert_eq!(freq["bb"], 2); | ||
/// assert_eq!(freq["cc"], 1); | ||
/// ``` | ||
/// | ||
/// # Panics | ||
/// | ||
/// 入力がUTF-8でフォーマットされていない場合にパニックする。 | ||
pub fn count(input: impl BufRead, option: CountOption) -> HashMap<String, usize> { | ||
let re = Regex::new(r"\w+").unwrap(); | ||
let mut freqs = HashMap::new(); // HashMap<String, usize>型 | ||
|
||
for line in input.lines() { | ||
let line = line.unwrap(); | ||
for m in re.find_iter(&line) { | ||
let word = m.as_str().to_string(); | ||
// 5. 出現した単語の出現頻度を数える | ||
*freqs.entry(word).or_insert(0) += 1; | ||
use crate::CountOption::*; | ||
match option { | ||
Char => { | ||
for c in line.chars() { | ||
*freqs.entry(c.to_string()).or_insert(0) += 1; | ||
} | ||
} | ||
Word => { | ||
for m in re.find_iter(&line) { | ||
let word = m.as_str().to_string(); | ||
// 5. 出現した単語の出現頻度を数える | ||
*freqs.entry(word).or_insert(0) += 1; | ||
} | ||
} | ||
Line => *freqs.entry(line.to_string()).or_insert(0) += 1, | ||
} | ||
} | ||
freqs | ||
} | ||
|
||
/// [`count`](fn.count.html)で使うオプション | ||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] | ||
pub enum CountOption { | ||
/// 文字毎に頻度を数える | ||
Char, | ||
/// 単語毎に頻度を数える | ||
Word, | ||
/// 行毎に頻度を数える | ||
Line, | ||
} | ||
|
||
/// オプションのデフォルトは [`Word`](enum.CountOption.html#variant.Word) | ||
impl Default for CountOption { | ||
fn default() -> Self { | ||
CountOption::Word | ||
} | ||
} | ||
|
||
#[test] | ||
fn word_count_works() { | ||
use std::io::Cursor; | ||
|
||
let mut exp = HashMap::new(); | ||
exp.insert("aa".to_string(), 1); | ||
exp.insert("bb".to_string(), 2); | ||
exp.insert("cc".to_string(), 1); | ||
|
||
assert_eq!(count(Cursor::new("aa bb cc bb"), CountOption::Word), exp); | ||
} | ||
#[test] | ||
fn word_count_works2() { | ||
use std::io::Cursor; | ||
|
||
let mut exp = HashMap::new(); | ||
exp.insert("aa".to_string(), 1); | ||
exp.insert("cc".to_string(), 1); | ||
exp.insert("dd".to_string(), 1); | ||
|
||
assert_eq!(count(Cursor::new("aa cc dd"), CountOption::Word), exp); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters