equal
deleted
inserted
replaced
|
1 {-# LANGUAGE OverloadedStrings #-} |
|
2 module Confuse where |
|
3 |
|
4 import Numeric |
|
5 import Data.Char |
|
6 import Control.Monad |
|
7 import qualified Data.ByteString as B |
|
8 import qualified Data.ByteString.UTF8 as UTF8 |
|
9 import qualified Data.Map as Map |
|
10 |
|
11 hx :: [Char] -> String |
|
12 hx cs = let ch = (chr . fst . last . readHex $ cs) in |
|
13 case ch of |
|
14 '\'' -> "''" |
|
15 '\\' -> "\\\\" |
|
16 c -> c : [] |
|
17 |
|
18 conv :: String -> B.ByteString |
|
19 conv s = B.concat ["('", UTF8.fromString i, "', '", UTF8.fromString r, "')"] |
|
20 where |
|
21 i :: String |
|
22 i = hx s |
|
23 r :: String |
|
24 r = concatMap hx . words . takeWhile ((/=) ';') . tail $ dropWhile ((/=) '\t') s |
|
25 |
|
26 convRules :: (B.ByteString, [B.ByteString]) -> B.ByteString |
|
27 convRules (a, b) = B.concat ["<reset>", u a, "</reset>\n<s>", B.concat $ map u b, "</s>"] |
|
28 where |
|
29 u a = B.concat ["\\","u",a] |
|
30 |
|
31 toPair :: String -> (B.ByteString, [B.ByteString]) |
|
32 toPair s = (UTF8.fromString $ takeWhile isHexDigit s, map UTF8.fromString . words . takeWhile ((/=) ';') . tail $ dropWhile ((/=) '\t') s) |
|
33 |
|
34 |
|
35 main = do |
|
36 ll <- liftM (filter (isHexDigit . head) . filter (not . null) . lines) $ readFile "confusables.txt" |
|
37 B.writeFile "rules.txt" . B.intercalate "\n" . map convRules . Map.toList . Map.fromList . filter notTooLong . filter fits16bit . map toPair $ ll |
|
38 where |
|
39 notTooLong = (>) 6 . length . snd |
|
40 fits16bit (a, b) = let f = (>) 5 . B.length in all f $ a:b |
|
41 |