module Data.Cluster.HAC -- ( -- runHAC -- ) where import Data.Cluster.Types import Data.List (minimumBy,group,sort,sortBy,tails,insertBy,delete) import Data.Maybe (fromJust) import Data.Ord (comparing) import Control.Monad(guard) type PriorityList a b = [((a,a),b)] sortPL as = sortBy (comparing snd) as mergePL [] bs = bs mergePL as [] = as mergePL as@((a,pa):moreas) bs@((b,pb):morebs) = if pa < pb then (a,pa):(mergePL moreas bs) else (b,pb):(mergePL as morebs) filterPairPL (a1,a2) = filter (\((b1,b2),_) -> not ((a1==b1) || (a1==b2) || (a2==b1) || (a2==b2))) insertPL ((a1,a2),p) = insertBy (comparing snd) ((a1,a2),p) -- HAC: Hierarchichal Agglomerative Clustering -- We assume that every key is unique - duplicates are discarded data HAC c a b = HAC (PriorityList (c a) b) (Metric (c a) b) [c a] instance (Show (c a), Show b) => Show (HAC c a b) where show (HAC cache _ _) = "HAC: " ++ show cache hacFromList :: (Cluster c a, Ord a, Ord b) => Metric (c a) b -> [a] -> HAC c a b hacFromList metric [] = error "Cant cluster an empty list" hacFromList metric as = HAC cache metric cs where cache = sortPL [((c1,c2),(metric c1 c2)) | (c1:rest)<-tails cs, c2<-rest] cs = map (one . head) $ group $ sort as -- Only safe if length keys >= 2 mergeMinOne :: (Cluster c a, Eq (c a), Ord b) => HAC c a b -> HAC c a b mergeMinOne hac@(HAC cache metric keys) = HAC cache' metric (p:keys') where ((a1,a2),_) = head cache p = pair a1 a2 keys' = delete a1 $ delete a2 keys cache' = mergePL (sortPL [((p,a),metric p a) | a<-keys']) (filterPairPL (a1,a2) cache) mergeMinAll :: (Cluster c a, Eq (c a), Ord b) => HAC c a b -> c a mergeMinAll hac@(HAC cache _ keys) = c where (HAC _ _ [c]) = (iterate mergeMinOne hac) !! (length keys - 1) runHAC :: (Cluster c a, Eq (c a), Ord a, Ord b) => Metric (c a) b -> [a] -> c a runHAC metric = mergeMinAll . hacFromList metric