tools/confuse.hs
author Wuzzy <Wuzzy2@mail.ru>
Tue, 24 Jul 2018 20:07:58 +0200
branch0.9.24
changeset 13550 d42237d16acf
parent 10075 dbaf90a0fbe0
permissions -rw-r--r--
Limit max droplet count to 50 (fix for 0.9.24 branch only) This fixes the issue with insane amounts of droplets in 0.9.24. It's temporary, the real fix is in default branch, but would be desyncing.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
10064
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
     1
{-# LANGUAGE OverloadedStrings #-}
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
     2
module Confuse where
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
     3
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
     4
import Numeric
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
     5
import Data.Char
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
     6
import Control.Monad
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
     7
import qualified Data.ByteString as B
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
     8
import qualified Data.ByteString.UTF8 as UTF8
10073
865a4089278d Now convert to xml rules
unc0rr
parents: 10064
diff changeset
     9
import qualified Data.Map as Map
10064
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    10
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    11
hx :: [Char] -> String
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    12
hx cs = let ch = (chr . fst . last . readHex $ cs) in
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    13
            case ch of
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    14
                 '\'' -> "''"
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    15
                 '\\' -> "\\\\"
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    16
                 c -> c : []
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    17
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    18
conv :: String -> B.ByteString
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    19
conv s = B.concat ["('", UTF8.fromString i, "', '", UTF8.fromString r, "')"]
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    20
    where
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    21
        i :: String
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    22
        i = hx s
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    23
        r :: String
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    24
        r = concatMap hx . words . takeWhile ((/=) ';') . tail $ dropWhile ((/=) '\t') s
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    25
10073
865a4089278d Now convert to xml rules
unc0rr
parents: 10064
diff changeset
    26
convRules :: (B.ByteString, [B.ByteString]) -> B.ByteString
865a4089278d Now convert to xml rules
unc0rr
parents: 10064
diff changeset
    27
convRules (a, b) = B.concat ["<reset>", u a, "</reset>\n<s>", B.concat $ map u b, "</s>"]
865a4089278d Now convert to xml rules
unc0rr
parents: 10064
diff changeset
    28
    where
865a4089278d Now convert to xml rules
unc0rr
parents: 10064
diff changeset
    29
        u a = B.concat ["\\","u",a]
865a4089278d Now convert to xml rules
unc0rr
parents: 10064
diff changeset
    30
865a4089278d Now convert to xml rules
unc0rr
parents: 10064
diff changeset
    31
toPair :: String -> (B.ByteString, [B.ByteString])
865a4089278d Now convert to xml rules
unc0rr
parents: 10064
diff changeset
    32
toPair s = (UTF8.fromString $ takeWhile isHexDigit s, map UTF8.fromString . words . takeWhile ((/=) ';') . tail $ dropWhile ((/=) '\t') s)
865a4089278d Now convert to xml rules
unc0rr
parents: 10064
diff changeset
    33
865a4089278d Now convert to xml rules
unc0rr
parents: 10064
diff changeset
    34
10064
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    35
main = do
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    36
    ll <- liftM (filter (isHexDigit . head) . filter (not . null) . lines) $ readFile "confusables.txt"
10075
dbaf90a0fbe0 Filter confusables to some limits
unc0rr
parents: 10073
diff changeset
    37
    B.writeFile "rules.txt" . B.intercalate "\n" . map convRules . Map.toList . Map.fromList . filter notTooLong . filter fits16bit . map toPair $ ll
dbaf90a0fbe0 Filter confusables to some limits
unc0rr
parents: 10073
diff changeset
    38
    where
dbaf90a0fbe0 Filter confusables to some limits
unc0rr
parents: 10073
diff changeset
    39
        notTooLong = (>) 6 . length . snd
dbaf90a0fbe0 Filter confusables to some limits
unc0rr
parents: 10073
diff changeset
    40
        fits16bit (a, b) = let f = (>) 5 . B.length in all f $ a:b
dbaf90a0fbe0 Filter confusables to some limits
unc0rr
parents: 10073
diff changeset
    41