module BoundsParser ( -- * Text selections Bounds(..), Range, -- * Symbols Symbol(..), collapse, -- * Parsing P, satisfy, pToken, getPos, mkBounded, unit, chainr, chainl ) where import Annotations import qualified Text.Parsec as P import qualified Text.Parsec.Pos as P import Data.Function import Control.Applicative -- | A structural selection expressed as a textual selection. The margins indicate the whitespace directly around the selected structure. data Bounds = Bounds { leftMargin :: Range, rightMargin :: Range } deriving (Eq, Show) -- | A simple textual selection: starting offset and ending offset, respectively. Offsets are 0-based. type Range = (Int, Int) -- left, right -- | Symbols form input for parsers. Minimal complete definition: 'unparse'. class Symbol s where -- | Unparses a symbol, converting it back to text. unparse :: s -> String -- | Yields the size of a symbol. Default implementation is @length . unparse@. symbolSize :: s -> Int symbolSize = length . unparse instance Symbol s => Symbol [s] where unparse = concatMap unparse symbolSize = sum . fmap symbolSize -- | Given a predicate that tells what tokens to discard, keeps only the meaningful tokens and couples them with position information. collapse :: Symbol s => (s -> Bool) -> [s] -> [(s, Bounds)] collapse space ts = collapse' (0, symbolSize lefts) space rest where (lefts, rest) = span space ts collapse' :: Symbol s => Range -> (s -> Bool) -> [s] -> [(s, Bounds)] collapse' _ _ [] = [] collapse' left space (t:ts) = new : collapse' right space rest where (_, leftInner) = left rightInner = leftInner + symbolSize t rightOuter = rightInner + symbolSize rights right = (rightInner, rightOuter) (rights, rest) = span space ts new = (t, Bounds left right) -- | A parser that works on symbols coupled with token information. The state maintains the current position in the stream. This position is between two tokens; it is a range because there might be whitespace between these two tokens. type P s = P.ParsecT [(s, Bounds)] Range -- | Yield the current position in the input. getPos :: Monad m => P s m Range getPos = P.getState -- | Recognise a symbol matching a predicate. satisfy :: (Monad m, Symbol s) => (s -> Bool) -> P s m s satisfy ok = do let pos _ (_, bounds) _ = P.newPos "" 0 (fst (rightMargin bounds) + 1) let match x@(tok, _) | ok tok = Just x | otherwise = Nothing (tok, bounds) <- P.tokenPrim (unparse . fst) pos match P.setState (rightMargin bounds) return tok -- | Recognise a specific symbol. pToken :: (Monad m, Symbol s, Eq s) => s -> P s m s pToken = satisfy . (==) -- | Given the left margin of a structure, asks the parser for the right margin and wraps the position information around the root of the tree. mkBounded :: Monad m => Range -> AnnFix1 Bounds f -> P s m (AnnFix Bounds f) mkBounded left x = do -- (\right -> mkAnnFix (Bounds left right) x) <$> getPos right <- getPos return (mkAnnFix (Bounds left right) x) -- | Wrap an unnotated tree with position information from the parse state. unit :: Monad m => P s m (AnnFix1 Bounds f) -> P s m (AnnFix Bounds f) unit p = do left <- getPos x <- p mkBounded left x -- | Parse right-recursive structures. chainr :: Monad m => P s m (AnnFix Bounds f) -> P s m (AnnFix Bounds f -> AnnFix Bounds f -> AnnFix1 Bounds f) -> P s m (AnnFix Bounds f) chainr px pf = fix $ \loop -> do left <- getPos x <- px P.option x $ do f <- pf y <- loop mkBounded left (f x y) -- | Parse left-recursive structures. chainl :: Monad m => P s m (AnnFix Bounds f) -> P s m (AnnFix Bounds f -> AnnFix Bounds f -> AnnFix1 Bounds f) -> P s m (AnnFix Bounds f) chainl px pf = do left <- getPos px >>= rest left where rest left = fix $ \loop x -> P.option x $ do f <- pf y <- px mkBounded left (f x y) >>= loop