# HG changeset patch # User unc0rr # Date 1546033637 -3600 # Node ID ba29aa03db87f5450840f02f5a548a839eff5ab1 # Parent 6cc0fce249f9684099017aa8ded974ed798cf23a Implement letter repeat check diff -r 6cc0fce249f9 -r ba29aa03db87 rust/chat_sanitizer/Cargo.toml --- a/rust/chat_sanitizer/Cargo.toml Fri Dec 28 03:10:05 2018 +0300 +++ b/rust/chat_sanitizer/Cargo.toml Fri Dec 28 22:47:17 2018 +0100 @@ -6,3 +6,4 @@ [dependencies] unicode_skeleton = "0.1" +itertools = "0.8.0" diff -r 6cc0fce249f9 -r ba29aa03db87 rust/chat_sanitizer/src/bad_words.rs --- a/rust/chat_sanitizer/src/bad_words.rs Fri Dec 28 03:10:05 2018 +0300 +++ b/rust/chat_sanitizer/src/bad_words.rs Fri Dec 28 22:47:17 2018 +0100 @@ -19,13 +19,17 @@ } impl MessageChecker for BadWordsChecker { - fn check(&self, player_id: T, message: &str) -> Severity { + fn check(&self, _player_id: T, message: &str) -> Severity { let msg = normalized_message(message); // silly implementation, allows bad messages with a single good word - for badword in &self.blacklist { - if msg.contains(badword) { - if !self.whitelist.iter().any(|goodword| msg.contains(goodword)) { + for bad_word in &self.blacklist { + if msg.contains(bad_word) { + if !self + .whitelist + .iter() + .any(|good_word| msg.contains(good_word)) + { return Severity::Warn; } } @@ -48,7 +52,7 @@ // this one fails //assert_eq!(checker.check(0, "poop 'fsck -y' poop"), Severity::Warn); - // ideally this one shouldn't fail + // ideally this one shouldn't fail, need a better confusables check // assert_eq!(checker.check(0, "P00P"), Severity::Warn); } } diff -r 6cc0fce249f9 -r ba29aa03db87 rust/chat_sanitizer/src/letter_repeat.rs --- a/rust/chat_sanitizer/src/letter_repeat.rs Fri Dec 28 03:10:05 2018 +0300 +++ b/rust/chat_sanitizer/src/letter_repeat.rs Fri Dec 28 22:47:17 2018 +0100 @@ -1,9 +1,45 @@ use crate::{MessageChecker, Severity}; -struct LetterRepeatChecker {} +use itertools::Itertools; +use std::marker::PhantomData; + +struct LetterRepeatChecker { + threshold: usize, + player_id_type: PhantomData, +} -impl MessageChecker for LetterRepeatChecker { - fn check(&self, player_id: T, message: &str) -> Severity { +impl LetterRepeatChecker { + pub fn new(threshold: usize) -> Self { + Self { + threshold, + player_id_type: PhantomData, + } + } +} + +impl MessageChecker for LetterRepeatChecker { + fn check(&self, _player_id: T, message: &str) -> Severity { + for (_key, group) in &message.chars().into_iter().group_by(|c| *c) { + if group.count() >= self.threshold { + return Severity::Warn; + } + } + Severity::Pass } } + +#[cfg(test)] +mod tests { + use super::*; + #[test] + fn it_works() { + let checker = LetterRepeatChecker::new(3); + assert_eq!(checker.check(0, "Hello world!"), Severity::Pass); + assert_eq!(checker.check(0, "ooops"), Severity::Warn); + assert_eq!( + checker.check(0, "жираф - длинношеее животное"), + Severity::Warn + ); + } +} diff -r 6cc0fce249f9 -r ba29aa03db87 rust/chat_sanitizer/src/lib.rs --- a/rust/chat_sanitizer/src/lib.rs Fri Dec 28 03:10:05 2018 +0300 +++ b/rust/chat_sanitizer/src/lib.rs Fri Dec 28 22:47:17 2018 +0100 @@ -1,4 +1,5 @@ pub mod bad_words; +pub mod letter_repeat; use unicode_skeleton::UnicodeSkeleton;