module NGram
( nGram
) where
import List (sort)
nGram :: Ord a => [a] -> Int -> [([a], Int)]
nGram str = f
where
table = mkTable str
f n = g table
where
g [] = []
g ((p,_):xs) =
case takeN n p of
Nothing -> g xs
Just x ->
case break (\(_,q) -> q < n) xs of
(ys,zs) -> (x, 1 + length ys) : g zs
mkTable :: Ord a => [a] -> [([a], Int)]
mkTable str = zip p q
where
p = sort (suffixes str)
q = 0 : zipWith phi p (tail p)
phi (a:as) (b:bs) | a==b = 1 + phi as bs
phi _ _ = 0
suffixes :: [a] -> [[a]]
suffixes [] = []
suffixes a@(_:xs) = a : suffixes xs
takeN :: Int -> [a] -> Maybe [a]
takeN 0 _ = Just []
takeN (n+1) (x:xs) = do
ys <- takeN n xs
return (x:ys)
takeN _ _ = Nothing