module Data.Cluster.KCentres where import Data.Cluster.Types import Control.Monad (replicateM) import Control.Monad.Random (Rand,evalRand,getRandomR) import System.Random import Data.List (concatMap,sort,minimumBy,foldl1,groupBy) import Data.Ord (comparing) import Data.Either (partitionEithers) import qualified Data.Map as Map -- Use Centre to decide centre of cluster so we can get kMeans, kMediods etc -- Empty clusters, non-empty clusters, original points data Clusters c a = Clusters [c a] [c a] [c a] deriving (Eq,Ord,Show) getClusters (Clusters emptys nonemptys _) = emptys ++ nonemptys kCentresStart :: (Cluster c a, RandomGen g, Ord a) => Rand g a -> Int -> [a] -> Rand g (Clusters c a) kCentresStart gen k as = do startPoints <- replicateM k gen return (Clusters (map one startPoints) [] (map one as)) -- This is a bit fiddly because we need to check for clusters with zero elements -- or pairs of equal centres. To do this we assign an int to each centre in cs -- and track the elements moved to that centre. kCentresOne :: (Cluster c a, Ord a, Ord b) => Metric (c a) b -> Clusters c a -> Clusters c a kCentresOne metric (Clusters es cs as) = Clusters es' cs' as where (es',cs') = partitionEithers [fromListDefault c elems | (_,elems) <- Map.toAscList groups | (_,c) <- is] -- fromListDefault :: (c a,[a]) -> Either (c a) (c a) fromListDefault c as = if null as then Left c else Right (foldl1 pair as) -- groups :: Map.Map Int ([a]) groups = Map.map ($[]) $ Map.fromListWith (.) ([(z,id) | (z,_)<-is] ++ [(closest a,(a:)) | a<-as]) -- closest :: c a -> Int closest a = fst $ minimumBy (comparing (metric a . snd)) is is = zip [1..] (es++cs) untilStable :: (Cluster c a, Ord a) => [Clusters c a] -> Clusters c a untilStable cs = u [] cs where u as [] = error "Empty list input to Data.Cluster.KCentres.untilStable" u as [c] = c u as (c:cs) = if as' == as then c else u as' cs where as' = map (sort . toList) (getClusters c) kCentres :: (Cluster c a, RandomGen g, Ord a, Ord b) => Metric (c a) b -> Rand g a -> Int -> Int -> [a] -> Rand g [[a]] kCentres metric gen k limit as = do start <- kCentresStart gen k as let (Clusters es cs _) = untilStable $ take limit $ iterate (kCentresOne metric) start return ((map toList cs) ++ (map (const []) es)) {-- Simple example nos = [2,3,4,10,11,12,20,25,30] metric :: Metric (Medroid [] Double) Double metric = centreLink (\ a b -> abs (a-b)) result3 :: IO [[Double]] result3 = evalRand (kCentres metric (getRandomR (0,10)) 3 100 nos) `fmap` newStdGen --}