module Data.ANN.Perceptron where

import Control.Monad
import Control.Monad.Error

import Data.List

-- For the examples
import Control.Monad.Random
import Control.Applicative

-- Conal Elliots automatic differentiation
import Dif

-- ### Types ###

type Weights = [Double]
type Bias = Double

type Activation = Dif Double -> Dif Double
{--
The activation function is required to be differentiable.
Why? Because backpropagation works by calculating the local error plane and
then moving downhill. If we use squared difference as the error measure then
the error plane depends nicely on the derivative of the activation function.
That only sounds complicated because there arent any pictures.

We could use an arbitrary differentiable error function as well but thats
kind of overkill.

Apparently backpropogation is a form of automatic differentiation applied to
the function that the neural network represents. It would be pretty cool to
figure out how to make this clear in the code...
--}

data Neuron = Neuron Weights Bias Activation
weights (Neuron ws _ _) = ws

instance Show Neuron where
    show (Neuron ws bias _) = "Neuron " ++ show ws ++ " " ++ show bias

data Perceptron =
      Base [Neuron]
    | Layer [Neuron] Perceptron
    deriving (Show)

type Input = [Double]
type Output = Double

-- ### Lots of error checking. Nothing exciting here ###

neuronInputs (Neuron ws _ _) = length ws
perceptronInputs (Base ns) = neuronInputs (head ns)
perceptronInputs (Layer ns p) = neuronInputs (head ns)
perceptronOutputs (Base ns) = length ns
perceptronOutputs (Layer ns p) = perceptronOutputs p

-- Check that all neurons in the list have the same number of inputs
checkNeurons [] = throwError "Empty neuron list"
checkNeurons (n:ns) = do
    when (any (/= neuronInputs n) (map neuronInputs ns)) (throwError "Nonmatching input lengths in neuron list")
    return (neuronInputs n)

neuron ws bias act =
    return (Neuron ws bias act)

base ns = do
    checkNeurons ns
    return (Base ns)

layer ns p = do
    nin <- checkNeurons ns
    when (nin /= perceptronInputs p) (throwError "Args do not match in Perceptron.layer")
    return (Layer ns p)

-- ### Running ###

runNeuron :: Input -> Neuron -> Either String (Dif Output)
runNeuron input n@(Neuron ws bias activate) = do
    when (length input /= neuronInputs n) (throwError "Wrong number of inputs to Perceptron.runNeuron")
    return $ activate $ dId $ bias + sum (zipWith (*) ws input)

runPerceptron :: Input -> Perceptron -> Either String [Dif Output]
runPerceptron input p = do
    when (length input /= perceptronInputs p) (throwError "Wrong number of inputs to Perceptron.runPerceptron")
    case p of
        (Base ns) -> mapM (runNeuron input) ns
        (Layer ns p) -> do
            input' <- mapM (runNeuron input) ns
            runPerceptron (map dVal input') p

-- ### Training ###

type TrainRate = Double
type Delta = Double

-- Where did deepSeq go?
seqList [] b = b
seqList (a:as) b = seq a (seqList as b)

adjustNeuron :: TrainRate -> Input -> Delta -> Neuron -> Neuron
adjustNeuron trainRate input delta n@(Neuron ws bias activate) =
    let ws' = [w + trainRate*delta*i | w <- ws | i <- input]
        bias' = bias + trainRate*delta
    in  seqList ws' $ seq bias' $ Neuron ws' bias' activate

-- Backpropagation against square error gradient
trainOnce :: TrainRate -> Input -> [Output] -> Perceptron -> Either String Perceptron
trainOnce trainRate input expectedOutputs p = do
    when (length input /= perceptronInputs p) (throwError "Wrong number of inputs to Perceptron.trainOnce")
    when (length expectedOutputs /= perceptronOutputs p) (throwError "Wrong number of expected outputs to Perceptron.trainOnce")
    (_,_,p') <- trainInner input p
    return p'

    -- Compare to backprop.png in src file
    where   trainInner :: Input -> Perceptron -> Either String ([Delta],[Weights],Perceptron)
            trainInner is (Base ns) = do
                outputs <- mapM (runNeuron is) ns
                let deltas = [(dVal $ deriv o) * (eo - dVal o) | o <- outputs | eo <- expectedOutputs]
                let ns' = [adjustNeuron trainRate is d n | d <- deltas | n <- ns]
                return (deltas, map weights ns, Base ns')
            trainInner is (Layer ns p) = do
                outputs <- mapM (runNeuron is) ns
                (deltas,wss,p') <- trainInner (map dVal outputs) p
                let deltas' = [(dVal $ deriv o) * sum (zipWith (*) ws deltas) | o <- outputs | ws <- transpose wss]
                let ns' = [adjustNeuron trainRate is d n | d <- deltas' | n <- ns]
                return (deltas',map weights ns, Layer ns' p')

train :: TrainRate -> [(Input,[Output])] -> Perceptron -> [Either String Perceptron]
train trainRate examples p = scanl (\ mp (i,eo) -> trainOnce trainRate i eo =<< mp) (return p) examples

-- ### Examples and tests ###

totalSquareError :: [(Input,Output)] -> Perceptron -> Either String Double
totalSquareError examples p = do
    let squareError (i,eo) = do
            [o] <- runPerceptron i p
            return ((eo - dVal o)**2)
    errs <- mapM squareError examples
    return $ (sqrt (sum errs / genericLength errs))

-- Traditional activation function
sigmoid x = 1 / (1 + (exp (-x)))

boolish :: (Bool -> Bool -> Bool) -> [Double] -> Double
boolish f [d1,d2] =
    if f (d1 >= 0.5) (d2 >= 0.5)
    then 1.0
    else 0.0

xor b1 b2 = (b1 || b2) && (not (b1 && b2))

inputs = do
    b1 <- map (/10) [1..10]
    b2 <- map (/10) [1..10]
    return [b2,b1]

every n ps = (head ps):(every n $ drop n ps)

quickTest f p = do
    (Right p') <- every 50 $ train 1.0 (cycle [(i,[f i]) | i <- inputs]) p
    let (Right err) = totalSquareError [(i,f i) | i <- inputs] p'
    return err

instance Applicative (Rand g) where
    pure = return
    rf <*> ra = do
        f <- rf
        a <- ra
        return (f a)

weight :: Rand StdGen Double
weight = getRandomR ((-1),1)

nn :: Int -> Int -> Rand StdGen [Neuron]
nn i j = replicateM j (Neuron <$> (replicateM i weight) <*> weight <*> return (sigmoid.(/ fromIntegral i)))

network :: [Int] -> Rand StdGen Perceptron
network [i,j] = Base <$> (nn i j)
network (i:j:rest) = Layer <$> (nn i j) <*> (network (j:rest))

example1 = network [2,1]
example2 = network [2,10,1]
example3 = network [2,20,20,1]