-- Parsec parser of tokenized Edoc XML (tokenization made by Tagsoup). module Language.Edoc.Xml2Hs.Parser where import Text.HTML.TagSoup import Text.HTML.TagSoup.Parser import Text.HTML.TagSoup.Match import Text.HTML.TagSoup.Type import Text.ParserCombinators.Parsec import Text.ParserCombinators.Parsec.Pos import Language.Edoc.Xml2Hs.Type data PState = PState { counter :: Int -- unique numbers producer } -- Token primitives to distinguish between opening and closing tags. -- Match an opening tag. openTag s = token show (\_ -> newPos "" 0 0) (\tag -> case tag of TagOpen ts _ -> if ts == s then Just tag else Nothing _ -> Nothing) expTag s = openTag s s -- Match a closing tag. closeTag s = token show (\_ -> newPos "" 0 0) (\tag -> case tag of TagClose ts -> if ts == s then Just tag else Nothing _ -> Nothing) -- Get a text element in case it is not ignored. getText = token show (\_ -> newPos "" 0 0) (\tag -> case tag of TagText txt -> Just txt _ -> Nothing) -- Ignore these tags. Anything else (like TagText) is also ignored. -- This parser succeeds when a token HAS to be ignored. ignore = token show (\_ -> newPos "" 0 0) (\tag -> let ignored = ["?xml", "package", "modules", "description", "briefDescription", "fullDescription", "copyright", "version", "since", "deprecated", "see", "reference", "todo", "behaviour", "callbacks", "callback", "expr", "author", "p"] in case tag of TagOpen s _ | s `elem` ignored -> Just () TagOpen _ _ -> Nothing TagClose s | s `elem` ignored -> Just () TagClose _ -> Nothing _ -> Just ()) -- Parse a whole module definition. pDocMod :: GenParser Tag PState EDocMod pDocMod = do skipMany ignore edm <- expTag "module" ags <- many pArgs skipMany ignore tds <- many pTypeDecls fds <- pFunctions closeTag "module" return $ EDocMod { em_name = fromAttrib "name" edm ,em_args = concat ags ,em_tdefs = concat tds ,em_funcs = fds} -- Parse arguments. pArgs = do expTag "args" as <- many pArg closeTag "args" return as pArg = do expTag "arg" an <- pArgName manyTill anyToken (closeTag "arg") return an pArgName = do expTag "argName" tx <- getText closeTag "argName" return tx -- Parse function declarations. pFunctions = do expTag "functions" fds <- many1 pFunDecl closeTag "functions" return fds pFunDecl = do fn <- expTag "function" ats <- pArgs tss <- many pTypeSpec manyTill anyToken (closeTag "function") return $ EFunDecl { ef_name = fromAttrib "name" fn ,ef_arity = read $ fromAttrib "arity" fn ,ef_args = ats ,ef_tspec = tss} -- Parse type declarations. pTypeDecls = do expTag "typedecls" tds <- many1 pTypeDecl closeTag "typedecls" return tds -- Parse a single toplevel type declaration which is a TypeDef wrapped into -- a TypeDecl. pTypeDecl = do expTag "typedecl" td <- pTypeDef skipMany ignore closeTag "typedecl" return td -- Parse a local definition. pLocDef = do expTag "localdef" tta <- pTypevar <|> pAbstype t <- pErlType closeTag "localdef" return $ ELocDef tta t -- Parse a type definition. pTypeDef = do expTag "typedef" en <- pErlName ats <- pArgTypes t1 <- many pErlType lds <- many pLocDef closeTag "typedef" return $ ETypeDef { td_ename = en ,td_argtypes = ats ,td_type = if null t1 then ENothing else head t1 ,td_ldef = lds} -- Parse a type specification pTypeSpec = do expTag "typespec" en <- pErlName t1 <- many pErlType lds <- many pLocDef closeTag "typespec" return $ ETypeSpec { ts_ename = en ,ts_type = if null t1 then ENothing else head t1 ,ts_ldef = lds} -- Parse an Erlang Name. pErlName = do enm <- expTag "erlangName" let app = fromAttrib "app" enm mod = fromAttrib "module" enm nam = fromAttrib "name" enm closeTag "erlangName" return $ ErlName {en_app = app, en_mod = mod, en_name = nam} -- Parse argtypes. pArgTypes = try $ do expTag "argtypes" ats <- many pErlType closeTag "argtypes" return ats -- Parse a single type token. It may be either a constant or a type. pErlType = pType <|> pConst pConst = try $ do expTag "const" c <- pInteger <|> pFloat <|> pAtom closeTag "const" return c pType = try $ do expTag "type" t <- simples <|> pTypevar <|> pAtom <|> pInteger <|> pFloat <|> pRange <|> pList <|> pTuple <|> pFun <|> pRecord <|> pRecRef <|> pAbstype <|> pUnion closeTag "type" return t -- Possible variants of Erlang types. pSimple (t, c) = try $ do expTag t closeTag t return c abstype s = EAbsType (ErlName "" "" s) [] simples = foldr1 (<|>) $ map pSimple [ ("any", EAny), ("nil", ENil), ("pid", abstype "pid"), ("port", abstype "port"), ("maybe_improper_list", ENil), ("nonempty_maybe_improper_list", ENil), ("string", abstype "string"), ("nonempty_string", abstype "string"), ("no_return", ENothing), ("number", ENum), ("bool", abstype "bool"), ("integer", abstype "integer"), ("float", abstype "float"), ("pos_integer", abstype "integer"), ("non_neg_integer", abstype "integer")] pUnion = try $ do expTag "union" uns <- many1 pErlType closeTag "union" return $ EUnion uns pTypevar = try $ do v <- expTag "typevar" let n = fromAttrib "name" v closeTag "typevar" return $ ETypeVar n pAtom = try $ do a <- expTag "atom" let v = fromAttrib "value" a closeTag "atom" return $ EAtom v pInteger = try $ do i <- expTag "integer" let n = read $ fromAttrib "value" i closeTag "integer" return $ EInteger n pFloat = try $ do f <- expTag "float" let v = read $ fromAttrib "value" f closeTag "float" return $ EFloat v -- Several tags open a list. It should close with the same tag. pList = try $ do TagOpen tag _ <- foldr1 (<|>) $ map expTag ["list", "nonempty_list"] t <- pErlType closeTag tag return $ EList t -- Range type indeed corresponds an integer type; values -- themselves are ignored. pRange = try $ do expTag "range" i1 <- pConst i2 <- pConst closeTag "range" return $ EInteger 0 pTuple = try $ do expTag "tuple" ts <- many pErlType closeTag "tuple" return $ ETuple ts pFun = try $ do expTag "fun" as <- pArgTypes t <- pErlType closeTag "fun" return $ EFun as t pRecRef = try $ do expTag "recref" EAtom a <- pConst closeTag "recref" return $ ERecord a [] pRecord = try $ do expTag "record" EAtom a <- pAtom fs <- many pField closeTag "record" return $ ERecord a fs pField = try $ do expTag "field" EAtom a <- pAtom t <- pErlType closeTag "field" return (a, t) pAbstype = try $ do expTag "abstype" en <- pErlName ts <- many pErlType closeTag "abstype" return $ EAbsType en ts