tools/confuse.hs
branchsdl2transition
changeset 11362 ed5a6478e710
parent 10075 dbaf90a0fbe0
equal deleted inserted replaced
11361:31570b766315 11362:ed5a6478e710
       
     1 {-# LANGUAGE OverloadedStrings #-}
       
     2 module Confuse where
       
     3 
       
     4 import Numeric
       
     5 import Data.Char
       
     6 import Control.Monad
       
     7 import qualified Data.ByteString as B
       
     8 import qualified Data.ByteString.UTF8 as UTF8
       
     9 import qualified Data.Map as Map
       
    10 
       
    11 hx :: [Char] -> String
       
    12 hx cs = let ch = (chr . fst . last . readHex $ cs) in
       
    13             case ch of
       
    14                  '\'' -> "''"
       
    15                  '\\' -> "\\\\"
       
    16                  c -> c : []
       
    17 
       
    18 conv :: String -> B.ByteString
       
    19 conv s = B.concat ["('", UTF8.fromString i, "', '", UTF8.fromString r, "')"]
       
    20     where
       
    21         i :: String
       
    22         i = hx s
       
    23         r :: String
       
    24         r = concatMap hx . words . takeWhile ((/=) ';') . tail $ dropWhile ((/=) '\t') s
       
    25 
       
    26 convRules :: (B.ByteString, [B.ByteString]) -> B.ByteString
       
    27 convRules (a, b) = B.concat ["<reset>", u a, "</reset>\n<s>", B.concat $ map u b, "</s>"]
       
    28     where
       
    29         u a = B.concat ["\\","u",a]
       
    30 
       
    31 toPair :: String -> (B.ByteString, [B.ByteString])
       
    32 toPair s = (UTF8.fromString $ takeWhile isHexDigit s, map UTF8.fromString . words . takeWhile ((/=) ';') . tail $ dropWhile ((/=) '\t') s)
       
    33 
       
    34 
       
    35 main = do
       
    36     ll <- liftM (filter (isHexDigit . head) . filter (not . null) . lines) $ readFile "confusables.txt"
       
    37     B.writeFile "rules.txt" . B.intercalate "\n" . map convRules . Map.toList . Map.fromList . filter notTooLong . filter fits16bit . map toPair $ ll
       
    38     where
       
    39         notTooLong = (>) 6 . length . snd
       
    40         fits16bit (a, b) = let f = (>) 5 . B.length in all f $ a:b
       
    41