10064
+ − 1
{-# LANGUAGE OverloadedStrings #-}
+ − 2
module Confuse where
+ − 3
+ − 4
import Numeric
+ − 5
import Data.Char
+ − 6
import Control.Monad
+ − 7
import qualified Data.ByteString as B
+ − 8
import qualified Data.ByteString.UTF8 as UTF8
10073
+ − 9
import qualified Data.Map as Map
10064
+ − 10
+ − 11
hx :: [Char] -> String
+ − 12
hx cs = let ch = (chr . fst . last . readHex $ cs) in
+ − 13
case ch of
+ − 14
'\'' -> "''"
+ − 15
'\\' -> "\\\\"
+ − 16
c -> c : []
+ − 17
+ − 18
conv :: String -> B.ByteString
+ − 19
conv s = B.concat ["('", UTF8.fromString i, "', '", UTF8.fromString r, "')"]
+ − 20
where
+ − 21
i :: String
+ − 22
i = hx s
+ − 23
r :: String
+ − 24
r = concatMap hx . words . takeWhile ((/=) ';') . tail $ dropWhile ((/=) '\t') s
+ − 25
10073
+ − 26
convRules :: (B.ByteString, [B.ByteString]) -> B.ByteString
+ − 27
convRules (a, b) = B.concat ["<reset>", u a, "</reset>\n<s>", B.concat $ map u b, "</s>"]
+ − 28
where
+ − 29
u a = B.concat ["\\","u",a]
+ − 30
+ − 31
toPair :: String -> (B.ByteString, [B.ByteString])
+ − 32
toPair s = (UTF8.fromString $ takeWhile isHexDigit s, map UTF8.fromString . words . takeWhile ((/=) ';') . tail $ dropWhile ((/=) '\t') s)
+ − 33
+ − 34
10064
+ − 35
main = do
+ − 36
ll <- liftM (filter (isHexDigit . head) . filter (not . null) . lines) $ readFile "confusables.txt"
10075
+ − 37
B.writeFile "rules.txt" . B.intercalate "\n" . map convRules . Map.toList . Map.fromList . filter notTooLong . filter fits16bit . map toPair $ ll
+ − 38
where
+ − 39
notTooLong = (>) 6 . length . snd
+ − 40
fits16bit (a, b) = let f = (>) 5 . B.length in all f $ a:b
+ − 41