> module Week6 where
> import Data.Char
> import Data.List hiding (transpose)
> import Control.Monad
> import Control.Applicative
> import Test.QuickCheck

* Parsing

> data Expr = ETrue | ENot Expr | EOr Expr Expr

A parser maps a list of tokens to a result.

> type ParserV1 token result = [token] -> result
> type ExprParser = ParserV1 Char Expr

A parser may not consume the entire input.

> type ParserV2 token result = [token] -> (result, [token])

A parser may be non-deterministic:
Consider the grammar

A -> aA
A -> aB
B -> b

Represent using "list of successes" technique

> type Parser token result = [token] -> [(result, [token])]

parser recognizes ts if
parser ts   contains   (r, [])
==> all input ts has been consumed and the result is r

* Combinator parser
** Primitive parsers

pempty recognizes the empty language

> pempty :: Parser t r
> pempty ts = []

succeed recognizes the empty word

> succeed :: r -> Parser t r
> succeed r ts = [(r, ts)]

satisfy p   recognizes   [t]  if  (p t)  holds

> satisfy :: (t -> Bool) -> Parser t t
> satisfy p [] = []
> satisfy p (t:ts) | p t       = [(t, ts)]
>                  | otherwise = []

lit t     recognizes   [t] 

> lit :: Eq t => t -> Parser t t
> lit t = satisfy (== t)

** Combination of parsers

Alternative <|> recognizes the union of two languages

> palt :: Parser t r -> Parser t r -> Parser t r
> palt p1 p2 = \ts -> p1 ts ++ p2 ts

Sequence <*> recognizes the concatenation of two languages

> pseq :: Parser t (s -> r) -> Parser t s -> Parser t r
> pseq p1 p2 ts = [ (f s, ts'') | (f, ts') <- p1 ts, (s, ts'') <- p2 ts' ]

Semantic: pmap f p  recognizes  the same language as p,
          but changes the result

> pmap :: (s -> r) -> Parser t s -> Parser t r
> pmap f p ts = [ (f s, ts') | (s, ts') <- p ts]

** the example grammar

A -> aA
A -> aB
B -> b

> parseB = pmap (\x->[x]) $ lit 'b'
> parsea = pmap (:) $ lit 'a'  -- ('a':)
> parseA = palt (pseq parsea parseA) (pseq parsea parseB)

A -> aA
A -> aB
B -> bB
B -> 

> parseb = pmap (:) $ lit 'b' 
> parseB' = palt (pseq parseb parseB') (succeed "")
> parseA' = palt (pseq parsea parseA') (pseq parsea parseB')


* 20131128: An application to parsing expressions

Consider the grammar

E -> E "+" T
E -> T

E -> T E'
E' -> 
E' -> "+" T E'
T -> "let" Ident "=" E "in" E
T -> Ident
T -> Number
T -> "(" E ")"

x + 5 - z + 3
((x + 5) - z) + 3

parseE = palt (pseq parseE ...) parseT
defines a parser that never terminates on any input

Wanted: parseT "let x=5 in x+x"

A datatype to represent the result of parsing

> data E = V String | N Integer | A E E | L String E E
>   deriving (Eq, Show)

L :: String -> E -> E -> E

Transform the grammar; eliminate left recursion

The start of, we need a lexer that partitions the incoming list of
characters into a list of tokens. A token is either a single symbol,
an identifier, or a number. Whitespace characters are removed.

> data Token = Symbol Char | Ident String | Number Integer
>   deriving (Eq, Show)

> lexer :: String -> [Token]
> lexer [] = []
> lexer (c:cs) 
>   | isSpace c = lexer cs
>   | isAlpha c = Ident (c:acs) : lexer acs_rest
>   | isDigit c = Number (read $ c:ncs) : lexer ncs_rest
>   | otherwise = Symbol c : lexer cs
>   where acs = takeWhile isAlphaNum cs
>         acs_rest = dropWhile isAlphaNum cs
>         ncs = takeWhile isDigit cs
>         ncs_rest = dropWhile isDigit cs

> numberToken n (Number m:_) = m == n
> numberToken n _ = False

> prop_lex_num n = n >= 0 ==> numberToken n $ lexer (show n)

> isNumberToken (Number _) = True
> isNumberToken _ = False
> isIdentToken (Ident _) = True
> isIdentToken _ = False

> parserE :: Parser Token E
> parserE' :: Parser Token [E]
> parserT :: Parser Token E

> pIdent :: Parser Token String
> pIdent = pmap (\(Ident x) -> x) $ satisfy isIdentToken

> parserE  = pseq (pmap build_term parserT) parserE'
> parserE' = palt (pseq (pmap (const id) (lit $ Symbol '+'))
>                       (pseq (pmap (:) parserT) parserE')) (succeed [])
> parserT  = palt (pseq (pseq (pseq (pmap (const L) $ lit $ Ident "let")
>                       pIdent)
>                       (pseq (pmap (const id) $ lit $ Symbol '=')
>                       parserE))
>                       (pseq (pmap (const id) $ lit $ Ident "in")
>                       parserE)) $
>            palt (pmap (\(Ident x) -> V x) $ satisfy isIdentToken) $
>            palt (pmap (\(Number n) -> N n) $ satisfy isNumberToken) $
>            pseq (pmap (const id) $ lit $ Symbol '(')
>                 (pseq (pmap const $ parserE) (lit $ Symbol ')'))

> build_term :: E -> [E] -> E
> build_term t [] = t
> build_term t (t1:ts) = build_term (A t t1) ts

* 20131203: Parsers have rich structure ...
- many concepts from category theory can be mapped to programming concepts
- parsing fits many of these concepts

* Parsing is a functor

instance Functor (Parser t) where
  fmap f m = undefined

fmap :: Functor f => (a -> b) -> (f a -> f b)
map               :: (a -> b) -> ([a] -> [b])

> mapMaybe          :: (a -> b) -> Maybe a -> Maybe b
> mapMaybe f Nothing = Nothing
> mapMaybe f (Just a) = Just (f a)

- functorial laws

** fmap id_a == id_f_a
** fmap (f . g) == fmap f . fmap g


- for parsing: fmap = pmap

> newtype Parser' token result =
>   Parser' { exParser' :: Parser token result } 

> -- exParser' (Parser' p) = p

> instance Functor (Parser' token) where
>   fmap f (Parser' p) = Parser' (pmap f p)


* Parsing is a monad

instance Monad (Parser t) where
  return x = undefined
  ma >>= f = undefined

- monad laws
** return x >>= f  ==  f x
** ma >>= return   ==  ma        --  lhs:  ma >>= \x -> return x
** (ma >>= \x -> mb) >>= f  == ma >>= \x -> (mb >>= f)    --  x not in free(f)

- list monad:
instance Monad [] where
  return x = [x]
  ma >>= f = concat $ map f ma

-- ma       :: [a]
-- f        :: a -> [b]
-- ma >>= f :: [b]

Monad laws for the list monad:
(1) return x >>= f  ==
    [x] >>= f ==
    concat $ map f [x] ==
    concat $ [f x] ==
    f x
(2) [x1, x2, ...] >>= return   ==
    concat $ map (\x -> [x]) [x1, x2, ...] ==
    concat $ [[x1], [x2], ...] ==
    [x1, x2, ...]
(3) ...

> instance Monad (Parser' token) where
>   return x = Parser' (succeed x)
>   (Parser' p) >>= f = Parser' $
>               \ts -> [(b, ts'') | (a, ts') <- p ts, (b, ts'') <- exParser' (f a) ts']

using the list monad:  do { (a, ts') <- p ts; exParser' (f a) ts' }

ma :: Parser' t a
f  :: a -> Parser' t b
.. :: Parser' t b

fulfills the monad laws ...

can use the do notation for parsers:
to parse
 T -> "let" Ident "=" E "in" E 

do _ <- lit $ Ident "let"
   x <- pIdent
   _ <- lit $ Symbol '='
   e1 <- parserE
   _ <- lit $ Ident "in"
   e2 <- parserE
   return $ L x e1 e2

... readability is much improved ...


* Parsing is a monad with summation

instance MonadPlus (Parser t) where
  mzero = undefined
  mplus m1 m2 = undefined

for lists:

instance MonadPlus [] where
  mzero = []
  mplus = (++)

> instance MonadPlus (Parser' t) where
>   mzero = Parser' pempty
>   mplus (Parser' p1) (Parser' p2) = Parser' (palt p1 p2)


* Parsing is applicative

** Origin: applicative programming with effects (McBride, Paterson)

- example 1 : sequencing computation

sequence :: [IO a] -> IO [a]
sequence []       = return []
sequence (io:ios) = do x <- io
                       xs <- sequence ios
                       return (x:xs)

sequence (io:ios) = liftM2 (:) io (sequence ios)

(:) :: a -> [a] -> [a]
|
V
liftM2 (:) :: IO a -> IO [a] -> IO [a]

sequence []       = return []
sequence (io:ios) = return (:) `ap` io `ap` sequence ios

return (:)                  :: IO (a -> [a] -> [a])
return (:) `ap` io          :: IO ([a] -> [a])
return (:) `ap` io `ap` ios :: IO [a]

return :: Monad m => a -> m a
ap     :: Monad m => m (a -> b) -> m a -> m b

ma >>= \x -> if x then mb1 else mb2 


- example 2 : list of list transposition

> transpose :: [[a]] -> [[a]]
> transpose [] = repeat []
> transpose (xs:xss) = zipWith (:) xs (transpose xss)

zipWith :: (a -> b -> c) -> [a] -> [b] -> [c]

repeat :: a -> [a]

> zapp :: [a -> b] -> [a] -> [b]
> zapp fs xs = zipWith ($) fs xs

zipWith_n :: (a1 -> a2 -> ... -> an -> b) -> [a1] -> [a2] -> ... -> [an] -> [b]
zipWith_n f xs1 xs2 ... xsn = repeat f `zapp` xs1 `zapp` xs2 ...

transpose []       = repeat []
transpose (xs:xss) = repeat (:) `zapp` xs `zapp` transpose xss


- example 3 : interpretation

> data Exp v
>   = Var v
>   | Val Int
>   | Add (Exp v) (Exp v)

> eval :: Exp v -> Env v -> Int
> eval (Var v) env = fetch v env
> eval (Val i) env = i
> eval (Add e1 e2) env = eval e1 env + eval e2 env

> eval' :: Exp v -> Env v -> Int
> eval' (Var v) = fetch v
> eval' (Val i) = const i
> eval' (Add e1 e2) = const (+) `ess` (eval' e1) `ess` (eval' e2)

> ess a b c = (a c) (b c)

> type Env v = v -> Int
> fetch :: v -> Env v -> Int
> fetch v env = env v

- examples share a common structure

class Functor f => Applicative f where
  pure  :: a -> f a
  (<*>) :: f (a -> b) -> f a -> f b

** applicative laws
- Identity
pure id <*> v == v
- composition
pure (.) <*> u <*> v <*> w = u <*> (v <*> w)
- homomorphism
pure f <*> pure x = pure (f x)
- interchange
u <*> pure y = pure ($ y) <*> u

** further derived members of Applicative
(*>) :: f a -> f b -> f b
fa *> fb = fmap (const id) fa <*> fb

(<*) :: f a -> f b -> f a
fa <* fb = fmap const fa <*> fb

> instance Applicative (Parser' token) where
>   pure = return
>   (<*>) = ap

> instance Alternative (Parser' token) where
>   empty = mzero
>   (<|>) = mplus

can use the applicative notation for parsers:
to parse
   T -> "let" Ident "=" E "in" E 
   T -> Ident
   T -> Number
   T -> '(' E ')'

parserT = 
   pure L <* lIdent "let" <*> pIdent <* lSymbol '=' <*> parserE <* lIdent "in" <*> parserE
   <|> pure Ident <*> satisfy isIdentToken
   <|> pure Number <*> satisfy isNumberToken
   <|> lSymbol '(' *> parserE <* lSymbol ')'