tools/confuse.hs
author sheepluva
Mon, 05 Aug 2019 00:20:45 +0200
changeset 15300 f382ec6dba11
parent 10075 dbaf90a0fbe0
permissions -rw-r--r--
In hindsight my emscripten-ifdef (70d416a8f63f) is nonsense. As fpcrtl_glShaderSource() would not be defined and lead to compiling issues. So either it's 3 ifdefs (in pas2cRedo, pas2cSystem and misc.c), in order to toggle between fpcrtl_ and the native function, or alternatively have no ifdef for it at all. I'm going with none at all, which means emscripten will compile with the original (const) function prototype, being wrapped by the fpcrtl_ function, same as non-emscripten builds.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
10064
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
     1
{-# LANGUAGE OverloadedStrings #-}
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
     2
module Confuse where
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
     3
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
     4
import Numeric
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
     5
import Data.Char
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
     6
import Control.Monad
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
     7
import qualified Data.ByteString as B
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
     8
import qualified Data.ByteString.UTF8 as UTF8
10073
865a4089278d Now convert to xml rules
unc0rr
parents: 10064
diff changeset
     9
import qualified Data.Map as Map
10064
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    10
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    11
hx :: [Char] -> String
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    12
hx cs = let ch = (chr . fst . last . readHex $ cs) in
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    13
            case ch of
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    14
                 '\'' -> "''"
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    15
                 '\\' -> "\\\\"
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    16
                 c -> c : []
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    17
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    18
conv :: String -> B.ByteString
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    19
conv s = B.concat ["('", UTF8.fromString i, "', '", UTF8.fromString r, "')"]
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    20
    where
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    21
        i :: String
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    22
        i = hx s
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    23
        r :: String
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    24
        r = concatMap hx . words . takeWhile ((/=) ';') . tail $ dropWhile ((/=) '\t') s
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    25
10073
865a4089278d Now convert to xml rules
unc0rr
parents: 10064
diff changeset
    26
convRules :: (B.ByteString, [B.ByteString]) -> B.ByteString
865a4089278d Now convert to xml rules
unc0rr
parents: 10064
diff changeset
    27
convRules (a, b) = B.concat ["<reset>", u a, "</reset>\n<s>", B.concat $ map u b, "</s>"]
865a4089278d Now convert to xml rules
unc0rr
parents: 10064
diff changeset
    28
    where
865a4089278d Now convert to xml rules
unc0rr
parents: 10064
diff changeset
    29
        u a = B.concat ["\\","u",a]
865a4089278d Now convert to xml rules
unc0rr
parents: 10064
diff changeset
    30
865a4089278d Now convert to xml rules
unc0rr
parents: 10064
diff changeset
    31
toPair :: String -> (B.ByteString, [B.ByteString])
865a4089278d Now convert to xml rules
unc0rr
parents: 10064
diff changeset
    32
toPair s = (UTF8.fromString $ takeWhile isHexDigit s, map UTF8.fromString . words . takeWhile ((/=) ';') . tail $ dropWhile ((/=) '\t') s)
865a4089278d Now convert to xml rules
unc0rr
parents: 10064
diff changeset
    33
865a4089278d Now convert to xml rules
unc0rr
parents: 10064
diff changeset
    34
10064
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    35
main = do
bf1a5ef4ef14 confusables.txt to insert query converter
unc0rr
parents:
diff changeset
    36
    ll <- liftM (filter (isHexDigit . head) . filter (not . null) . lines) $ readFile "confusables.txt"
10075
dbaf90a0fbe0 Filter confusables to some limits
unc0rr
parents: 10073
diff changeset
    37
    B.writeFile "rules.txt" . B.intercalate "\n" . map convRules . Map.toList . Map.fromList . filter notTooLong . filter fits16bit . map toPair $ ll
dbaf90a0fbe0 Filter confusables to some limits
unc0rr
parents: 10073
diff changeset
    38
    where
dbaf90a0fbe0 Filter confusables to some limits
unc0rr
parents: 10073
diff changeset
    39
        notTooLong = (>) 6 . length . snd
dbaf90a0fbe0 Filter confusables to some limits
unc0rr
parents: 10073
diff changeset
    40
        fits16bit (a, b) = let f = (>) 5 . B.length in all f $ a:b
dbaf90a0fbe0 Filter confusables to some limits
unc0rr
parents: 10073
diff changeset
    41