{-# LANGUAGE Rank2Types #-} {- | Build a lazy storable vector by incrementally adding an element. This is analogous to Data.Binary.Builder for Data.ByteString.Lazy. Attention: We have still a serious memory leak. -} module Data.StorableVector.Lazy.Builder ( Builder, toLazyStorableVector, put, flush, ) where import qualified Data.StorableVector as SV import qualified Data.StorableVector.Lazy as SVL import qualified Data.StorableVector.ST.Lazy as STV import qualified Control.Monad.Trans.State.Lazy as State import Data.StorableVector.Lazy (ChunkSize(ChunkSize), ) import Control.Monad.Trans.State.Lazy (StateT, runStateT, ) import Control.Monad.Trans (lift, ) import Control.Monad (liftM2, ) import Control.Monad.ST.Lazy (ST, runST, ) import Data.Monoid (Monoid(mempty, mappend), ) import Foreign.Storable (Storable, ) {- A reader, writer, state monoid wrapping an ST monad. Reader context: Chunk size State: current chunk and offset Writer: list of all complete chunks -} newtype Builder a = Builder {run :: forall s. ChunkSize -> StateT (STV.Vector s a, Int) (ST s) [SV.Vector a]} -- instance Monoid (Builder a) where {- Storable constraint not need in the current implementation, but who knows what will be in future ... -} instance Storable a => Monoid (Builder a) where {-# INLINE mempty #-} {-# INLINE mappend #-} mempty = Builder (\_ -> return []) mappend x y = Builder (\cs -> liftM2 (++) (run x cs) (run y cs)) {- | > SVL.unpack $ toLazyStorableVector (ChunkSize 7) $ Data.Monoid.mconcat $ map put ['a'..'z'] -} {-# INLINE toLazyStorableVector #-} toLazyStorableVector :: Storable a => ChunkSize -> Builder a -> SVL.Vector a toLazyStorableVector cs bld = runST (do v0 <- newChunk cs ~(chunks,vi1) <- runStateT (run bld cs) v0 {- If we ignore the last chunk, the memory leak disappears, but the vector construction is still very slow, certainly due to ST.Lazy's inefficiency. -} lastChunk <- return SV.empty -- lastChunk <- fixVector vi1 return $ SVL.fromChunks $ chunks ++ [lastChunk]) {- This looks nice, but has a memory leak as well - why? ~(_,_,chunks) <- runRWST (do run bld vi1 <- State.get lastChunk <- lift (fixVector vi1) State.tell [lastChunk]) cs (v0,0) return $ SVL.fromChunks chunks) -} {-# INLINE put #-} put :: Storable a => a -> Builder a put a = Builder (\cs -> do vi0@(v0,i0) <- State.get ((v1,i1), chunks) <- if i0 < STV.length v0 then return (vi0, []) else -- we could call 'flush' here, but this requires an extra 'SV.take' do chunk <- lift $ STV.unsafeFreeze v0 vi1 <- lift (newChunk cs) return (vi1,[chunk]) lift $ STV.write v1 i1 a State.put (v1, succ i1) return chunks ) {- | Set a laziness break. -} {-# INLINE flush #-} flush :: Storable a => Builder a flush = Builder (\cs -> do chunk <- lift . fixVector =<< State.get State.put =<< lift (newChunk cs) return [chunk] ) {-# INLINE newChunk #-} newChunk :: (Storable a) => ChunkSize -> ST s (STV.Vector s a, Int) newChunk (SVL.ChunkSize size) = fmap (flip (,) 0) $ STV.new_ size {-# INLINE fixVector #-} fixVector :: (Storable a) => (STV.Vector s a, Int) -> ST s (SV.Vector a) fixVector ~(v1,i1) = fmap (SV.take i1) $ STV.unsafeFreeze v1