{-# LANGUAGE TypeFamilies #-} module Generics.Annotations.Token ( Symbol(..), unparse, Token(..), unparseTokenS, collapse, tSatisfy, tSymbol ) where import Generics.Annotations.Satisfy import Control.Applicative -- | A symbol in the input stream of a parser. Sizes are used to determine positions within the stream. class Symbol a where -- | Yields the size of this symbol measured in number of characters. symbolSize :: a -> Int -- | Yields the size of the whitespace preceding this symbol. spaceBeforeSize :: a -> Int -- | Yields the size of the whitespace succeeding this token. spaceAfterSize :: a -> Int -- | Unparses a list of symbols back to a string. unparseS :: [a] -> ShowS -- | Variant of 'unparseS' that directly yields a 'String'. unparse :: Symbol a => [a] -> String unparse = ($ "") . unparseS instance Symbol Char where symbolSize = const 1 spaceBeforeSize = const 0 spaceAfterSize = const 0 unparseS = showString -- | A token is a symbol in an input stream accompanied by whitespace on both sides. data Token s = Token { tSpaceBefore :: [s] -- ^ Yields the whitespace preceding this token. , tImage :: s -- ^ Yields the meaningful symbol of this token. , tSpaceAfter :: [s] -- ^ Yields the whitespace succeeding this token. } deriving (Eq, Show) instance (Symbol s) => Symbol (Token s) where symbolSize (Token _ im _) = symbolSize im spaceBeforeSize (Token sb _ _) = sum (map symbolSize sb) spaceAfterSize (Token _ _ sa) = sum (map symbolSize sa) unparseS = unparseTokenS -- | Unparses a list of tokens, handling whitespace in a clever way. unparseTokenS :: Symbol s => [Token s] -> ShowS unparseTokenS [] = id unparseTokenS [Token sb im sa] = unparseS sb . unparseS [im] . unparseS sa unparseTokenS ((Token sb im _):ts) = unparseS sb . unparseS [im] . unparseS ts -- | Given a predicate that tells when a symbol is whitespace, converts a list of symbols to a list of tokens. collapse :: (s -> Bool) -> [s] -> [Token s] collapse space input = collapse' space xs yzs where (xs, yzs) = span space input collapse' :: (s -> Bool) -> [s] -> [s] -> [Token s] collapse' space spaceBefore input = output where output = if null input then [] else token : rest token = Token { tImage = head input , tSpaceBefore = spaceBefore , tSpaceAfter = spaceAfter } (spaceAfter, toProcess) = span space (tail input) rest = collapse' space spaceAfter toProcess -- | Recognise a specific tokenised symbol. tSatisfy :: (t -> Bool) -> SatisfyA (Token t) t tSatisfy p = tImage <$> satisfy (p . tImage) -- | Recognise a tokenised symbol matching a predicate. tSymbol :: Eq t => t -> SatisfyA (Token t) t -- tSymbol t = t <$ satisfy ((== t) . tImage) -- tSymbol = tSatisfy . (==) tSymbol t = tSatisfy (== t)