import Data.List.Lazy (intercalate_, tails_) import Data.List.MultiValuedGrouping (GroupResult, isGroup, memberGroupingStrategy) import Data.List.Replace (replace, substAllM) import Network.Curl.Easy (curl_global_init, initialize) import Network.LongURL (CurlInstance(..), domains, longURL, supportedSites) import Control.Applicative ((<$>), (<*>)) import Control.Monad (guard, MonadPlus(..)) import Control.Monad.Writer.Lazy (runWriterT) import Data.ByteString.Lazy.Char8 (ByteString, pack, split, unpack) import qualified Data.ByteString.Lazy.Char8 as B import Data.Maybe (mapMaybe) import Data.Monoid (Monoid(..)) import Data.Set (fromList, Set, toList) import Text.Regex.Base.Context () import Text.Regex.Base.RegexLike (getAllMatches, makeRegexM, match) import Text.Regex.Posix.ByteString.Lazy (Regex) import Text.Regex.Posix.String () guarded :: MonadPlus m => (a -> Bool) -> a -> m a guarded p x = guard (p x) >> return x bsLines :: ByteString -> [ByteString] bsLines = mapMaybe (\t -> do { (bs:rest) <- return t; guarded (not . (null rest &&) . B.null) bs }) . tails_ . split '\n' -- | Split input into lines, in order to do incremental processing, and to try to limit memory usage mapMLines :: Monad m => (ByteString -> m a) -> ByteString -> m [a] mapMLines f = mapM f . bsLines type CmdLineOpts = Bool swap :: (a, b) -> (b, a) swap (x, y) = (y, x) format :: GroupResult ByteString ByteString -> [ByteString] format gr@(keySet, firstValue, subsequentValues) | isGroup gr = ( mempty : pack "*** Lines mentioning " `mappend` head (toList keySet) : firstValue : subsequentValues ) ++ [mempty] | otherwise = [firstValue] postProcess :: CmdLineOpts -> [(ByteString, Set ByteString)] -> [ByteString] postProcess True = (format =<<) . memberGroupingStrategy . (swap <$>) postProcess False = (fst <$>) main :: IO () main = do curl_global_init 2 -- Initialise just win32sock (if applicable) curl' <- initialize let ci = CurlInstance { curl = curl', userAgent = "hlongurl 0.9.3; Haskell" } supportedDomains <- (domains =<<) <$> supportedSites ci urlRegex <- (makeRegexM $ "http://(" ++ intercalate_ "|" (replace "." "\\." <$> supportedDomains) ++ ")/([-a-zA-Z0-9_\\'/\\\\\\+&%\\$#\\=~])*") :: IO Regex let substURLs :: ByteString -> IO (ByteString, Set ByteString) substURLs = ((fromList <$>) <$>) . runWriterT . (substAllM (((pack . show) <$>) . longURL ci . unpack) <*> getAllMatches . match urlRegex) groupOption :: Bool groupOption = True ((mapM_ B.putStrLn =<<) . (postProcess groupOption <$>) . mapMLines substURLs) =<< B.getContents