tools/confuse.hs
branchhedgeroid
changeset 15515 7030706266df
parent 10075 dbaf90a0fbe0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/confuse.hs	Fri Dec 06 22:20:53 2019 +0100
@@ -0,0 +1,41 @@
+{-# LANGUAGE OverloadedStrings #-}
+module Confuse where
+
+import Numeric
+import Data.Char
+import Control.Monad
+import qualified Data.ByteString as B
+import qualified Data.ByteString.UTF8 as UTF8
+import qualified Data.Map as Map
+
+hx :: [Char] -> String
+hx cs = let ch = (chr . fst . last . readHex $ cs) in
+            case ch of
+                 '\'' -> "''"
+                 '\\' -> "\\\\"
+                 c -> c : []
+
+conv :: String -> B.ByteString
+conv s = B.concat ["('", UTF8.fromString i, "', '", UTF8.fromString r, "')"]
+    where
+        i :: String
+        i = hx s
+        r :: String
+        r = concatMap hx . words . takeWhile ((/=) ';') . tail $ dropWhile ((/=) '\t') s
+
+convRules :: (B.ByteString, [B.ByteString]) -> B.ByteString
+convRules (a, b) = B.concat ["<reset>", u a, "</reset>\n<s>", B.concat $ map u b, "</s>"]
+    where
+        u a = B.concat ["\\","u",a]
+
+toPair :: String -> (B.ByteString, [B.ByteString])
+toPair s = (UTF8.fromString $ takeWhile isHexDigit s, map UTF8.fromString . words . takeWhile ((/=) ';') . tail $ dropWhile ((/=) '\t') s)
+
+
+main = do
+    ll <- liftM (filter (isHexDigit . head) . filter (not . null) . lines) $ readFile "confusables.txt"
+    B.writeFile "rules.txt" . B.intercalate "\n" . map convRules . Map.toList . Map.fromList . filter notTooLong . filter fits16bit . map toPair $ ll
+    where
+        notTooLong = (>) 6 . length . snd
+        fits16bit (a, b) = let f = (>) 5 . B.length in all f $ a:b
+