module Data.ANN.Perceptron where import Control.Monad import Control.Monad.Error import Data.List -- For the examples import Control.Monad.Random import Control.Applicative -- Conal Elliots automatic differentiation import Dif -- ### Types ### type Weights = [Double] type Bias = Double type Activation = Dif Double -> Dif Double {-- The activation function is required to be differentiable. Why? Because backpropagation works by calculating the local error plane and then moving downhill. If we use squared difference as the error measure then the error plane depends nicely on the derivative of the activation function. That only sounds complicated because there arent any pictures. We could use an arbitrary differentiable error function as well but thats kind of overkill. Apparently backpropogation is a form of automatic differentiation applied to the function that the neural network represents. It would be pretty cool to figure out how to make this clear in the code... --} data Neuron = Neuron Weights Bias Activation weights (Neuron ws _ _) = ws instance Show Neuron where show (Neuron ws bias _) = "Neuron " ++ show ws ++ " " ++ show bias data Perceptron = Base [Neuron] | Layer [Neuron] Perceptron deriving (Show) type Input = [Double] type Output = Double -- ### Lots of error checking. Nothing exciting here ### neuronInputs (Neuron ws _ _) = length ws perceptronInputs (Base ns) = neuronInputs (head ns) perceptronInputs (Layer ns p) = neuronInputs (head ns) perceptronOutputs (Base ns) = length ns perceptronOutputs (Layer ns p) = perceptronOutputs p -- Check that all neurons in the list have the same number of inputs checkNeurons [] = throwError "Empty neuron list" checkNeurons (n:ns) = do when (any (/= neuronInputs n) (map neuronInputs ns)) (throwError "Nonmatching input lengths in neuron list") return (neuronInputs n) neuron ws bias act = return (Neuron ws bias act) base ns = do checkNeurons ns return (Base ns) layer ns p = do nin <- checkNeurons ns when (nin /= perceptronInputs p) (throwError "Args do not match in Perceptron.layer") return (Layer ns p) -- ### Running ### runNeuron :: Input -> Neuron -> Either String (Dif Output) runNeuron input n@(Neuron ws bias activate) = do when (length input /= neuronInputs n) (throwError "Wrong number of inputs to Perceptron.runNeuron") return $ activate $ dId $ bias + sum (zipWith (*) ws input) runPerceptron :: Input -> Perceptron -> Either String [Dif Output] runPerceptron input p = do when (length input /= perceptronInputs p) (throwError "Wrong number of inputs to Perceptron.runPerceptron") case p of (Base ns) -> mapM (runNeuron input) ns (Layer ns p) -> do input' <- mapM (runNeuron input) ns runPerceptron (map dVal input') p -- ### Training ### type TrainRate = Double type Delta = Double -- Where did deepSeq go? seqList [] b = b seqList (a:as) b = seq a (seqList as b) adjustNeuron :: TrainRate -> Input -> Delta -> Neuron -> Neuron adjustNeuron trainRate input delta n@(Neuron ws bias activate) = let ws' = [w + trainRate*delta*i | w <- ws | i <- input] bias' = bias + trainRate*delta in seqList ws' $ seq bias' $ Neuron ws' bias' activate -- Backpropagation against square error gradient trainOnce :: TrainRate -> Input -> [Output] -> Perceptron -> Either String Perceptron trainOnce trainRate input expectedOutputs p = do when (length input /= perceptronInputs p) (throwError "Wrong number of inputs to Perceptron.trainOnce") when (length expectedOutputs /= perceptronOutputs p) (throwError "Wrong number of expected outputs to Perceptron.trainOnce") (_,_,p') <- trainInner input p return p' -- Compare to backprop.png in src file where trainInner :: Input -> Perceptron -> Either String ([Delta],[Weights],Perceptron) trainInner is (Base ns) = do outputs <- mapM (runNeuron is) ns let deltas = [(dVal $ deriv o) * (eo - dVal o) | o <- outputs | eo <- expectedOutputs] let ns' = [adjustNeuron trainRate is d n | d <- deltas | n <- ns] return (deltas, map weights ns, Base ns') trainInner is (Layer ns p) = do outputs <- mapM (runNeuron is) ns (deltas,wss,p') <- trainInner (map dVal outputs) p let deltas' = [(dVal $ deriv o) * sum (zipWith (*) ws deltas) | o <- outputs | ws <- transpose wss] let ns' = [adjustNeuron trainRate is d n | d <- deltas' | n <- ns] return (deltas',map weights ns, Layer ns' p') train :: TrainRate -> [(Input,[Output])] -> Perceptron -> [Either String Perceptron] train trainRate examples p = scanl (\ mp (i,eo) -> trainOnce trainRate i eo =<< mp) (return p) examples -- ### Examples and tests ### totalSquareError :: [(Input,Output)] -> Perceptron -> Either String Double totalSquareError examples p = do let squareError (i,eo) = do [o] <- runPerceptron i p return ((eo - dVal o)**2) errs <- mapM squareError examples return $ (sqrt (sum errs / genericLength errs)) -- Traditional activation function sigmoid x = 1 / (1 + (exp (-x))) boolish :: (Bool -> Bool -> Bool) -> [Double] -> Double boolish f [d1,d2] = if f (d1 >= 0.5) (d2 >= 0.5) then 1.0 else 0.0 xor b1 b2 = (b1 || b2) && (not (b1 && b2)) inputs = do b1 <- map (/10) [1..10] b2 <- map (/10) [1..10] return [b2,b1] every n ps = (head ps):(every n $ drop n ps) quickTest f p = do (Right p') <- every 50 $ train 1.0 (cycle [(i,[f i]) | i <- inputs]) p let (Right err) = totalSquareError [(i,f i) | i <- inputs] p' return err instance Applicative (Rand g) where pure = return rf <*> ra = do f <- rf a <- ra return (f a) weight :: Rand StdGen Double weight = getRandomR ((-1),1) nn :: Int -> Int -> Rand StdGen [Neuron] nn i j = replicateM j (Neuron <$> (replicateM i weight) <*> weight <*> return (sigmoid.(/ fromIntegral i))) network :: [Int] -> Rand StdGen Perceptron network [i,j] = Base <$> (nn i j) network (i:j:rest) = Layer <$> (nn i j) <*> (network (j:rest)) example1 = network [2,1] example2 = network [2,10,1] example3 = network [2,20,20,1]