10064
|
1 |
{-# LANGUAGE OverloadedStrings #-}
|
|
2 |
module Confuse where
|
|
3 |
|
|
4 |
import Numeric
|
|
5 |
import Data.Char
|
|
6 |
import Control.Monad
|
|
7 |
import qualified Data.ByteString as B
|
|
8 |
import qualified Data.ByteString.UTF8 as UTF8
|
10073
|
9 |
import qualified Data.Map as Map
|
10064
|
10 |
|
|
11 |
hx :: [Char] -> String
|
|
12 |
hx cs = let ch = (chr . fst . last . readHex $ cs) in
|
|
13 |
case ch of
|
|
14 |
'\'' -> "''"
|
|
15 |
'\\' -> "\\\\"
|
|
16 |
c -> c : []
|
|
17 |
|
|
18 |
conv :: String -> B.ByteString
|
|
19 |
conv s = B.concat ["('", UTF8.fromString i, "', '", UTF8.fromString r, "')"]
|
|
20 |
where
|
|
21 |
i :: String
|
|
22 |
i = hx s
|
|
23 |
r :: String
|
|
24 |
r = concatMap hx . words . takeWhile ((/=) ';') . tail $ dropWhile ((/=) '\t') s
|
|
25 |
|
10073
|
26 |
convRules :: (B.ByteString, [B.ByteString]) -> B.ByteString
|
|
27 |
convRules (a, b) = B.concat ["<reset>", u a, "</reset>\n<s>", B.concat $ map u b, "</s>"]
|
|
28 |
where
|
|
29 |
u a = B.concat ["\\","u",a]
|
|
30 |
|
|
31 |
toPair :: String -> (B.ByteString, [B.ByteString])
|
|
32 |
toPair s = (UTF8.fromString $ takeWhile isHexDigit s, map UTF8.fromString . words . takeWhile ((/=) ';') . tail $ dropWhile ((/=) '\t') s)
|
|
33 |
|
|
34 |
|
10064
|
35 |
main = do
|
|
36 |
ll <- liftM (filter (isHexDigit . head) . filter (not . null) . lines) $ readFile "confusables.txt"
|
10075
|
37 |
B.writeFile "rules.txt" . B.intercalate "\n" . map convRules . Map.toList . Map.fromList . filter notTooLong . filter fits16bit . map toPair $ ll
|
|
38 |
where
|
|
39 |
notTooLong = (>) 6 . length . snd
|
|
40 |
fits16bit (a, b) = let f = (>) 5 . B.length in all f $ a:b
|
|
41 |
|