tools/pas2c/PascalParser.hs
author nemo
Mon, 10 Apr 2017 12:06:43 -0400
changeset 12213 bb5522e88ab2
parent 11398 c3a535886806
child 14235 3a9e4f35aa68
permissions -rw-r--r--
bulk copy of latest physfs to our misc/libphysfs since this seems to fix an off-by-1 error reliably hit in readln read of 1 byte probably introduced in the addition of the buffered read. Whether this is excessive or whether libphysfs should even be maintained by us is another matter. But at least we shouldn't crash

module PascalParser (
    pascalUnit,
    mainResultInit
    )
    where

import Text.Parsec
import Text.Parsec.Token
import Text.Parsec.Expr
import Control.Monad
import Data.Maybe
import Data.Char

import PascalBasics
import PascalUnitSyntaxTree


mainResultInit :: Phrase
mainResultInit = (\(Right a) -> a) $ parse phrase "<built-in>" "main:= 0;"

knownTypes :: [String]
knownTypes = ["shortstring", "ansistring", "char", "byte"]

pascalUnit :: Parsec String u PascalUnit
pascalUnit = do
    comments
    u <- choice [program, unit, systemUnit, redoUnit]
    comments
    return u

iD :: Parsec String u Identifier
iD = do
    i <- identifier pas
    comments
    when (i == "not") $ unexpected "'not' used as an identifier"
    return $ Identifier i BTUnknown

unit :: Parsec String u PascalUnit
unit = do
    string' "unit" >> comments
    name <- iD
    void $ semi pas
    comments
    int <- interface
    impl <- implementation
    comments
    return $ Unit name int impl Nothing Nothing


reference :: Parsec String u Reference
reference = term <?> "reference"
    where
    term = comments >> choice [
        parens pas (liftM RefExpression expression >>= postfixes) >>= postfixes
        , try $ typeCast >>= postfixes
        , char' '@' >> liftM Address reference >>= postfixes
        , liftM SimpleReference iD >>= postfixes
        ] <?> "simple reference"

    postfixes r = many postfix >>= return . foldl (flip ($)) r
    postfix = choice [
            parens pas (option [] parameters) >>= return . FunCall
          , char' '^' >> return Dereference
          , (brackets pas) (commaSep1 pas $ expression) >>= return . ArrayElement
          , (char' '.' >> notFollowedBy (char' '.')) >> liftM (flip RecordField) reference
        ]

    typeCast = do
        t <- choice $ map (\s -> try $ caseInsensitiveString s >>= \i -> notFollowedBy alphaNum >> return i) knownTypes
        e <- parens pas expression
        comments
        return $ TypeCast (Identifier t BTUnknown) e

varsDecl1, varsDecl :: Bool -> Parsec String u [TypeVarDeclaration]
varsDecl1 = varsParser sepEndBy1
varsDecl = varsParser sepEndBy

varsParser ::
    (Parsec String u TypeVarDeclaration
        -> Parsec String u String
        -> Parsec
            String u [TypeVarDeclaration])
    -> Bool
    -> Parsec
            String u [TypeVarDeclaration]
varsParser m endsWithSemi = do
    vs <- m (aVarDecl endsWithSemi) (semi pas)
    return vs

aVarDecl :: Bool -> Parsec String u TypeVarDeclaration
aVarDecl endsWithSemi = do
    isVar <- liftM (\i -> i == Just "var" || i == Just "out") $
        if not endsWithSemi then
            optionMaybe $ choice [
                try $ string "var"
                , try $ string "const"
                , try $ string "out"
                ]
            else
                return Nothing
    comments
    ids <- do
        i <- (commaSep1 pas) $ (try iD <?> "variable declaration")
        char' ':'
        return i
    comments
    t <- typeDecl <?> "variable type declaration"
    comments
    initialization <- option Nothing $ do
        char' '='
        comments
        e <- initExpression
        comments
        return (Just e)
    return $ VarDeclaration isVar False (ids, t) initialization

constsDecl :: Parsec String u [TypeVarDeclaration]
constsDecl = do
    vs <- many1 (try (aConstDecl >>= \i -> semi pas >> return i) >>= \i -> comments >> return i)
    comments
    return vs
    where
    aConstDecl = do
        comments
        i <- iD
        t <- optionMaybe $ do
            char' ':'
            comments
            t <- typeDecl
            comments
            return t
        char' '='
        comments
        e <- initExpression
        comments
        return $ VarDeclaration False (isNothing t) ([i], fromMaybe (DeriveType e) t) (Just e)

typeDecl :: Parsec String u TypeDecl
typeDecl = choice [
    char' '^' >> typeDecl >>= return . PointerTo
    , try (string' "shortstring") >> return String
    , try (string' "string") >> optionMaybe (brackets pas $ integer pas) >> return String
    , try (string' "ansistring") >> optionMaybe (brackets pas $ integer pas) >> return AString
    , arrayDecl
    , recordDecl
    , setDecl
    , functionType
    , sequenceDecl >>= return . Sequence
    , try iD >>= return . SimpleType
    , rangeDecl >>= return . RangeType
    ] <?> "type declaration"
    where
    arrayDecl = do
        try $ do
            optional $ (try $ string' "packed") >> comments
            string' "array"
        comments
        r <- option [] $ do
            char' '['
            r <- commaSep pas rangeDecl
            char' ']'
            comments
            return r
        string' "of"
        comments
        t <- typeDecl
        if null r then
            return $ ArrayDecl Nothing t
            else
            return $ foldr (\a b -> ArrayDecl (Just a) b) (ArrayDecl (Just $ head r) t) (tail r)
    recordDecl = do
        try $ do
            optional $ (try $ string' "packed") >> comments
            string' "record"
        comments
        vs <- varsDecl True
        union <- optionMaybe $ do
            string' "case"
            comments
            void $ iD
            comments
            string' "of"
            comments
            many unionCase
        string' "end"
        return $ RecordType vs union
    setDecl = do
        try $ string' "set" >> void space
        comments
        string' "of"
        comments
        liftM Set typeDecl
    unionCase = do
        void $ try $ commaSep pas $ (void $ iD) <|> (void $ integer pas)
        char' ':'
        comments
        u <- parens pas $ varsDecl True
        char' ';'
        comments
        return u
    sequenceDecl = (parens pas) $ (commaSep pas) (iD >>= \i -> optional (spaces >> char' '=' >> spaces >> integer pas) >> return i)
    functionType = do
        fp <- try (string "function") <|> try (string "procedure")
        comments
        vs <- option [] $ parens pas $ varsDecl False
        comments
        ret <- if (fp == "function") then do
            char' ':'
            comments
            ret <- typeDecl
            comments
            return ret
            else
            return VoidType
        optional $ try $ char' ';' >> comments >> string' "cdecl"
        comments
        return $ FunctionType ret vs

typesDecl :: Parsec String u [TypeVarDeclaration]
typesDecl = many (aTypeDecl >>= \t -> comments >> return t)
    where
    aTypeDecl = do
        i <- try $ do
            i <- iD <?> "type declaration"
            comments
            char' '='
            return i
        comments
        t <- typeDecl
        comments
        void $ semi pas
        comments
        return $ TypeDeclaration i t

rangeDecl :: Parsec String u Range
rangeDecl = choice [
    try $ rangeft
    , iD >>= return . Range
    ] <?> "range declaration"
    where
    rangeft = do
        e1 <- initExpression
        string' ".."
        e2 <- initExpression
        return $ RangeFromTo e1 e2

typeVarDeclaration :: Bool -> Parsec String u [TypeVarDeclaration]
typeVarDeclaration isImpl = (liftM concat . many . choice) [
    varSection,
    constSection,
    typeSection,
    funcDecl,
    operatorDecl
    ]
    where

    fixInit v = concat $ map (\x -> case x of
                    VarDeclaration a b (ids, t) c ->
                        let typeId = (Identifier ((\(Identifier i _) -> i) (head ids) ++ "_tt") BTUnknown) in
                        let res =  [TypeDeclaration typeId t, VarDeclaration a b (ids, (SimpleType typeId)) c] in
                        case t of
                            RecordType _ _ -> res -- create a separated type declaration
                            ArrayDecl _ _ -> res
                            _ -> [x]
                    _ -> error ("checkInit:\n" ++ (show v))) v

    varSection = do
        try $ string' "var"
        comments
        v <- varsDecl1 True <?> "variable declaration"
        comments
        return $ fixInit v

    constSection = do
        try $ string' "const"
        comments
        c <- constsDecl <?> "const declaration"
        comments
        return $ fixInit c

    typeSection = do
        try $ string' "type"
        comments
        t <- typesDecl <?> "type declaration"
        comments
        return t

    operatorDecl = do
        try $ string' "operator"
        comments
        i <- manyTill anyChar space
        comments
        vs <- parens pas $ varsDecl False
        comments
        rid <- iD
        comments
        char' ':'
        comments
        ret <- typeDecl
        comments
        -- return ret
        -- ^^^^^^^^^^ wth was this???
        char' ';'
        comments
        forward <- liftM isJust $ optionMaybe (try (string' "forward;") >> comments)
        inline <- liftM (any (== "inline;")) $ many functionDecorator
        b <- if isImpl && (not forward) then
                liftM Just functionBody
                else
                return Nothing
        return $ [OperatorDeclaration i rid inline ret vs b]


    funcDecl = do
        fp <- try (string "function") <|> try (string "procedure")
        comments
        i <- iD
        vs <- option [] $ parens pas $ varsDecl False
        comments
        ret <- if (fp == "function") then do
            char' ':'
            comments
            ret <- typeDecl
            comments
            return ret
            else
            return VoidType
        char' ';'
        comments
        forward <- liftM isJust $ optionMaybe (try (string "forward;") >> comments)
        decorators <- many functionDecorator
        let inline = any (== "inline;") decorators
            overload = any (== "overload;") decorators
            external = any (== "external;") decorators
        -- TODO: don't mangle external functions names (and remove fpcrtl.h defines hacks)
        b <- if isImpl && (not forward) && (not external) then
                liftM Just functionBody
                else
                return Nothing
        return $ [FunctionDeclaration i inline overload external ret vs b]

    functionDecorator = do
        d <- choice [
            try $ string "inline;"
            , try $ caseInsensitiveString "cdecl;"
            , try $ string "overload;"
            , try $ string "export;"
            , try $ string "varargs;"
            , try (string' "external") >> comments >> iD >> comments >>
                optional (string' "name" >> comments >> stringLiteral pas) >> string' ";" >> return "external;"
            ]
        comments
        return d


program :: Parsec String u PascalUnit
program = do
    string' "program"
    comments
    name <- iD
    (char' ';')
    comments
    comments
    u <- uses
    comments
    tv <- typeVarDeclaration True
    comments
    p <- phrase
    comments
    char' '.'
    comments
    return $ Program name (Implementation u (TypesAndVars tv)) p

interface :: Parsec String u Interface
interface = do
    string' "interface"
    comments
    u <- uses
    comments
    tv <- typeVarDeclaration False
    comments
    return $ Interface u (TypesAndVars tv)

implementation :: Parsec String u Implementation
implementation = do
    string' "implementation"
    comments
    u <- uses
    comments
    tv <- typeVarDeclaration True
    string' "end."
    comments
    return $ Implementation u (TypesAndVars tv)

expression :: Parsec String u Expression
expression = do
    buildExpressionParser table term <?> "expression"
    where
    term = comments >> choice [
        builtInFunction expression >>= \(n, e) -> return $ BuiltInFunCall e (SimpleReference (Identifier n BTUnknown))
        , try (parens pas expression >>= \e -> notFollowedBy (comments >> char' '.') >> return e)
        , brackets pas (commaSep pas iD) >>= return . SetExpression
        , try $ integer pas >>= \i -> notFollowedBy (char' '.') >> (return . NumberLiteral . show) i
        , float pas >>= return . FloatLiteral . show
        , try $ integer pas >>= return . NumberLiteral . show
        , try (string' "_S" >> stringLiteral pas) >>= return . StringLiteral
        , try (string' "_P" >> stringLiteral pas) >>= return . PCharLiteral
        , stringLiteral pas >>= return . strOrChar
        , try (string' "#$") >> many hexDigit >>= \c -> comments >> return (HexCharCode c)
        , char' '#' >> many digit >>= \c -> comments >> return (CharCode c)
        , char' '$' >> many hexDigit >>=  \h -> comments >> return (HexNumber h)
        --, char' '-' >> expression >>= return . PrefixOp "-"
        , char' '-' >> reference >>= return . PrefixOp "-" . Reference
        , (try $ string' "not" >> notFollowedBy comments) >> unexpected "'not'"
        , try $ string' "nil" >> return Null
        , reference >>= return . Reference
        ] <?> "simple expression"

    table = [
          [  Prefix (reservedOp pas "not">> return (PrefixOp "not"))
           , Prefix (try (char' '-') >> return (PrefixOp "-"))]
           ,
          [  Infix (char' '*' >> return (BinOp "*")) AssocLeft
           , Infix (char' '/' >> return (BinOp "/")) AssocLeft
           , Infix (try (string' "div") >> return (BinOp "div")) AssocLeft
           , Infix (try (string' "mod") >> return (BinOp "mod")) AssocLeft
           , Infix (try (string' "in") >> return (BinOp "in")) AssocNone
           , Infix (try $ string' "and" >> return (BinOp "and")) AssocLeft
           , Infix (try $ string' "shl" >> return (BinOp "shl")) AssocLeft
           , Infix (try $ string' "shr" >> return (BinOp "shr")) AssocLeft
          ]
        , [  Infix (char' '+' >> return (BinOp "+")) AssocLeft
           , Infix (char' '-' >> return (BinOp "-")) AssocLeft
           , Infix (try $ string' "or" >> return (BinOp "or")) AssocLeft
           , Infix (try $ string' "xor" >> return (BinOp "xor")) AssocLeft
          ]
        , [  Infix (try (string' "<>") >> return (BinOp "<>")) AssocNone
           , Infix (try (string' "<=") >> return (BinOp "<=")) AssocNone
           , Infix (try (string' ">=") >> return (BinOp ">=")) AssocNone
           , Infix (char' '<' >> return (BinOp "<")) AssocNone
           , Infix (char' '>' >> return (BinOp ">")) AssocNone
          ]
        {-, [  Infix (try $ string' "shl" >> return (BinOp "shl")) AssocNone
             , Infix (try $ string' "shr" >> return (BinOp "shr")) AssocNone
          ]
        , [
             Infix (try $ string' "or" >> return (BinOp "or")) AssocLeft
           , Infix (try $ string' "xor" >> return (BinOp "xor")) AssocLeft
          ]-}
        , [
             Infix (char' '=' >> return (BinOp "=")) AssocNone
          ]
        ]
    strOrChar [a] = CharCode . show . ord $ a
    strOrChar a = StringLiteral a

phrasesBlock :: Parsec String u Phrase
phrasesBlock = do
    try $ string' "begin"
    comments
    p <- manyTill phrase (try $ string' "end" >> notFollowedBy alphaNum)
    comments
    return $ Phrases p

phrase :: Parsec String u Phrase
phrase = do
    o <- choice [
        phrasesBlock
        , ifBlock
        , whileCycle
        , repeatCycle
        , switchCase
        , withBlock
        , forCycle
        , (try $ reference >>= \r -> string' ":=" >> return r) >>= \r -> comments >> expression >>= return . Assignment r
        , builtInFunction expression >>= \(n, e) -> return $ BuiltInFunctionCall e (SimpleReference (Identifier n BTUnknown))
        , procCall
        , char' ';' >> comments >> return NOP
        ]
    optional $ char' ';'
    comments
    return o

ifBlock :: Parsec String u Phrase
ifBlock = do
    try $ string "if" >> notFollowedBy (alphaNum <|> char '_')
    comments
    e <- expression
    comments
    string' "then"
    comments
    o1 <- phrase
    comments
    o2 <- optionMaybe $ do
        try $ string' "else" >> void space
        comments
        o <- option NOP phrase
        comments
        return o
    return $ IfThenElse e o1 o2

whileCycle :: Parsec String u Phrase
whileCycle = do
    try $ string' "while"
    comments
    e <- expression
    comments
    string' "do"
    comments
    o <- phrase
    return $ WhileCycle e o

withBlock :: Parsec String u Phrase
withBlock = do
    try $ string' "with" >> void space
    comments
    rs <- (commaSep1 pas) reference
    comments
    string' "do"
    comments
    o <- phrase
    return $ foldr WithBlock o rs

repeatCycle :: Parsec String u Phrase
repeatCycle = do
    try $ string' "repeat" >> void space
    comments
    o <- many phrase
    string' "until"
    comments
    e <- expression
    comments
    return $ RepeatCycle e o

forCycle :: Parsec String u Phrase
forCycle = do
    try $ string' "for" >> void space
    comments
    i <- iD
    comments
    string' ":="
    comments
    e1 <- expression
    comments
    up <- liftM (== Just "to") $
            optionMaybe $ choice [
                try $ string "to"
                , try $ string "downto"
                ]
    --choice [string' "to", string' "downto"]
    comments
    e2 <- expression
    comments
    string' "do"
    comments
    p <- phrase
    comments
    return $ ForCycle i e1 e2 p up

switchCase :: Parsec String u Phrase
switchCase = do
    try $ string' "case"
    comments
    e <- expression
    comments
    string' "of"
    comments
    cs <- many1 aCase
    o2 <- optionMaybe $ do
        try $ string' "else" >> notFollowedBy alphaNum
        comments
        o <- many phrase
        comments
        return o
    string' "end"
    comments
    return $ SwitchCase e cs o2
    where
    aCase = do
        e <- (commaSep pas) $ (liftM InitRange rangeDecl <|> initExpression)
        comments
        char' ':'
        comments
        p <- phrase
        comments
        return (e, p)

procCall :: Parsec String u Phrase
procCall = do
    r <- reference
    p <- option [] $ (parens pas) parameters
    return $ ProcCall r p

parameters :: Parsec String u [Expression]
parameters = (commaSep pas) expression <?> "parameters"

functionBody :: Parsec String u (TypesAndVars, Phrase)
functionBody = do
    tv <- typeVarDeclaration True
    comments
    p <- phrasesBlock
    char' ';'
    comments
    return (TypesAndVars tv, p)

uses :: Parsec String u Uses
uses = liftM Uses (option [] u)
    where
        u = do
            string' "uses"
            comments
            ulist <- (iD >>= \i -> comments >> return i) `sepBy1` (char' ',' >> comments)
            char' ';'
            comments
            return ulist

initExpression :: Parsec String u InitExpression
initExpression = buildExpressionParser table term <?> "initialization expression"
    where
    term = comments >> choice [
        liftM (uncurry BuiltInFunction) $ builtInFunction initExpression
        , try $ brackets pas (commaSep pas $ initExpression) >>= return . InitSet
        , try $ parens pas (commaSep pas $ initExpression) >>= \ia -> when ((notRecord $ head ia) && (null $ tail ia)) mzero >> return (InitArray ia)
        , try $ parens pas (sepEndBy recField (char' ';' >> comments)) >>= return . InitRecord
        , parens pas initExpression
        , try $ integer pas >>= \i -> notFollowedBy (char' '.') >> (return . InitNumber . show) i
        , try $ float pas >>= return . InitFloat . show
        , try $ integer pas >>= return . InitNumber . show
        , try (string' "_S" >> stringLiteral pas) >>= return . InitString
        , try (string' "_P" >> stringLiteral pas) >>= return . InitPChar
        , stringLiteral pas >>= return . InitString
        , char' '#' >> many digit >>= \c -> comments >> return (InitChar c)
        , char' '$' >> many hexDigit >>= \h -> comments >> return (InitHexNumber h)
        , char' '@' >> initExpression >>= \c -> comments >> return (InitAddress c)
        , try $ string' "nil" >> return InitNull
        , try itypeCast
        , iD >>= return . InitReference
        ]

    notRecord (InitRecord _) = False
    notRecord _ = True

    recField = do
        i <- iD
        spaces
        char' ':'
        spaces
        e <- initExpression
        spaces
        return (i ,e)

    table = [
          [
             Prefix (char' '-' >> return (InitPrefixOp "-"))
            ,Prefix (try (string' "not") >> return (InitPrefixOp "not"))
          ]
        , [  Infix (char' '*' >> return (InitBinOp "*")) AssocLeft
           , Infix (char' '/' >> return (InitBinOp "/")) AssocLeft
           , Infix (try (string' "div") >> return (InitBinOp "div")) AssocLeft
           , Infix (try (string' "mod") >> return (InitBinOp "mod")) AssocLeft
           , Infix (try $ string' "and" >> return (InitBinOp "and")) AssocLeft
           , Infix (try $ string' "shl" >> return (InitBinOp "shl")) AssocNone
           , Infix (try $ string' "shr" >> return (InitBinOp "shr")) AssocNone
          ]
        , [  Infix (char' '+' >> return (InitBinOp "+")) AssocLeft
           , Infix (char' '-' >> return (InitBinOp "-")) AssocLeft
           , Infix (try $ string' "or" >> return (InitBinOp "or")) AssocLeft
           , Infix (try $ string' "xor" >> return (InitBinOp "xor")) AssocLeft
          ]
        , [  Infix (try (string' "<>") >> return (InitBinOp "<>")) AssocNone
           , Infix (try (string' "<=") >> return (InitBinOp "<=")) AssocNone
           , Infix (try (string' ">=") >> return (InitBinOp ">=")) AssocNone
           , Infix (char' '<' >> return (InitBinOp "<")) AssocNone
           , Infix (char' '>' >> return (InitBinOp ">")) AssocNone
           , Infix (char' '=' >> return (InitBinOp "=")) AssocNone
          ]
        {--, [  Infix (try $ string' "and" >> return (InitBinOp "and")) AssocLeft
           , Infix (try $ string' "or" >> return (InitBinOp "or")) AssocLeft
           , Infix (try $ string' "xor" >> return (InitBinOp "xor")) AssocLeft
          ]
        , [  Infix (try $ string' "shl" >> return (InitBinOp "shl")) AssocNone
           , Infix (try $ string' "shr" >> return (InitBinOp "shr")) AssocNone
          ]--}
        --, [Prefix (try (string' "not") >> return (InitPrefixOp "not"))]
        ]

    itypeCast = do
        --t <- choice $ map (\s -> try $ caseInsensitiveString s >>= \i -> notFollowedBy alphaNum >> return i) knownTypes
        t <- iD
        i <- parens pas initExpression
        comments
        return $ InitTypeCast t i

builtInFunction :: Parsec String u a -> Parsec String u (String, [a])
builtInFunction e = do
    name <- choice $ map (\s -> try $ caseInsensitiveString s >>= \i -> notFollowedBy alphaNum >> return i) builtin
    spaces
    exprs <- option [] $ parens pas $ option [] $ commaSep1 pas $ e
    spaces
    return (name, exprs)

systemUnit :: Parsec String u PascalUnit
systemUnit = do
    string' "system;"
    comments
    string' "type"
    comments
    t <- typesDecl
    string' "var"
    v <- varsDecl True
    return $ System (t ++ v)

redoUnit :: Parsec String u PascalUnit
redoUnit = do
    string' "redo;"
    comments
    string' "type"
    comments
    t <- typesDecl
    string' "var"
    v <- varsDecl True
    return $ Redo (t ++ v)