module NGram ( nGram ) where import List (sort) nGram :: Ord a => [a] -> Int -> [([a], Int)] nGram str = f where table = mkTable str f n = g table where g [] = [] g ((p,_):xs) = case takeN n p of Nothing -> g xs Just x -> case break (\(_,q) -> q < n) xs of (ys,zs) -> (x, 1 + length ys) : g zs mkTable :: Ord a => [a] -> [([a], Int)] mkTable str = zip p q where p = sort (suffixes str) q = 0 : zipWith phi p (tail p) phi (a:as) (b:bs) | a==b = 1 + phi as bs phi _ _ = 0 suffixes :: [a] -> [[a]] suffixes [] = [] suffixes a@(_:xs) = a : suffixes xs takeN :: Int -> [a] -> Maybe [a] takeN 0 _ = Just [] takeN (n+1) (x:xs) = do ys <- takeN n xs return (x:ys) takeN _ _ = Nothing