Implement letter repeat check
authorunc0rr
Fri, 28 Dec 2018 22:47:17 +0100
changeset 14510 ba29aa03db87
parent 14509 6cc0fce249f9
child 14511 a4d560aeda96
Implement letter repeat check
rust/chat_sanitizer/Cargo.toml
rust/chat_sanitizer/src/bad_words.rs
rust/chat_sanitizer/src/letter_repeat.rs
rust/chat_sanitizer/src/lib.rs
--- a/rust/chat_sanitizer/Cargo.toml	Fri Dec 28 03:10:05 2018 +0300
+++ b/rust/chat_sanitizer/Cargo.toml	Fri Dec 28 22:47:17 2018 +0100
@@ -6,3 +6,4 @@
 
 [dependencies]
 unicode_skeleton = "0.1"
+itertools = "0.8.0"
--- a/rust/chat_sanitizer/src/bad_words.rs	Fri Dec 28 03:10:05 2018 +0300
+++ b/rust/chat_sanitizer/src/bad_words.rs	Fri Dec 28 22:47:17 2018 +0100
@@ -19,13 +19,17 @@
 }
 
 impl<T> MessageChecker<T> for BadWordsChecker<T> {
-    fn check(&self, player_id: T, message: &str) -> Severity {
+    fn check(&self, _player_id: T, message: &str) -> Severity {
         let msg = normalized_message(message);
 
         // silly implementation, allows bad messages with a single good word
-        for badword in &self.blacklist {
-            if msg.contains(badword) {
-                if !self.whitelist.iter().any(|goodword| msg.contains(goodword)) {
+        for bad_word in &self.blacklist {
+            if msg.contains(bad_word) {
+                if !self
+                    .whitelist
+                    .iter()
+                    .any(|good_word| msg.contains(good_word))
+                {
                     return Severity::Warn;
                 }
             }
@@ -48,7 +52,7 @@
         // this one fails
         //assert_eq!(checker.check(0, "poop 'fsck -y' poop"), Severity::Warn);
 
-        // ideally this one shouldn't fail
+        // ideally this one shouldn't fail, need a better confusables check
         // assert_eq!(checker.check(0, "P00P"), Severity::Warn);
     }
 }
--- a/rust/chat_sanitizer/src/letter_repeat.rs	Fri Dec 28 03:10:05 2018 +0300
+++ b/rust/chat_sanitizer/src/letter_repeat.rs	Fri Dec 28 22:47:17 2018 +0100
@@ -1,9 +1,45 @@
 use crate::{MessageChecker, Severity};
 
-struct LetterRepeatChecker {}
+use itertools::Itertools;
+use std::marker::PhantomData;
+
+struct LetterRepeatChecker<T> {
+    threshold: usize,
+    player_id_type: PhantomData<T>,
+}
 
-impl<T> MessageChecker<T> for LetterRepeatChecker {
-    fn check(&self, player_id: T, message: &str) -> Severity {
+impl<T> LetterRepeatChecker<T> {
+    pub fn new(threshold: usize) -> Self {
+        Self {
+            threshold,
+            player_id_type: PhantomData,
+        }
+    }
+}
+
+impl<T> MessageChecker<T> for LetterRepeatChecker<T> {
+    fn check(&self, _player_id: T, message: &str) -> Severity {
+        for (_key, group) in &message.chars().into_iter().group_by(|c| *c) {
+            if group.count() >= self.threshold {
+                return Severity::Warn;
+            }
+        }
+
         Severity::Pass
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    #[test]
+    fn it_works() {
+        let checker = LetterRepeatChecker::new(3);
+        assert_eq!(checker.check(0, "Hello world!"), Severity::Pass);
+        assert_eq!(checker.check(0, "ooops"), Severity::Warn);
+        assert_eq!(
+            checker.check(0, "жираф - длинношеее животное"),
+            Severity::Warn
+        );
+    }
+}
--- a/rust/chat_sanitizer/src/lib.rs	Fri Dec 28 03:10:05 2018 +0300
+++ b/rust/chat_sanitizer/src/lib.rs	Fri Dec 28 22:47:17 2018 +0100
@@ -1,4 +1,5 @@
 pub mod bad_words;
+pub mod letter_repeat;
 
 use unicode_skeleton::UnicodeSkeleton;