tools/confuse.hs
changeset 10073 865a4089278d
parent 10064 bf1a5ef4ef14
child 10075 dbaf90a0fbe0
--- a/tools/confuse.hs	Fri Jan 24 13:19:35 2014 +0100
+++ b/tools/confuse.hs	Fri Jan 24 22:38:15 2014 +0400
@@ -6,6 +6,7 @@
 import Control.Monad
 import qualified Data.ByteString as B
 import qualified Data.ByteString.UTF8 as UTF8
+import qualified Data.Map as Map
 
 hx :: [Char] -> String
 hx cs = let ch = (chr . fst . last . readHex $ cs) in
@@ -22,6 +23,15 @@
         r :: String
         r = concatMap hx . words . takeWhile ((/=) ';') . tail $ dropWhile ((/=) '\t') s
 
+convRules :: (B.ByteString, [B.ByteString]) -> B.ByteString
+convRules (a, b) = B.concat ["<reset>", u a, "</reset>\n<s>", B.concat $ map u b, "</s>"]
+    where
+        u a = B.concat ["\\","u",a]
+
+toPair :: String -> (B.ByteString, [B.ByteString])
+toPair s = (UTF8.fromString $ takeWhile isHexDigit s, map UTF8.fromString . words . takeWhile ((/=) ';') . tail $ dropWhile ((/=) '\t') s)
+
+
 main = do
     ll <- liftM (filter (isHexDigit . head) . filter (not . null) . lines) $ readFile "confusables.txt"
-    B.writeFile "insert.sql" . B.intercalate ",\n" . map conv $ ll
+    B.writeFile "rules.txt" . B.intercalate "\n" . map convRules . Map.toList . Map.fromList . filter (\(_, b) -> length b < 6). map toPair $ ll