2007-12-30 [長年日記]
λ. HaskellでCYK法
文脈自由文法のパーサーの最も効率の良いアルゴリズムの一つとしてCYK法(CYK algorithm)があるのだけど、これまでどういうアルゴリズムか知らなかったので、ちょっと調べてHaskellで素朴に書いてみた。 手続き型言語で書くとテーブルをボトムアップに作る手順が肝になるが、Haskellだと順序を気にせず再帰的にテーブルを定義するだけで良い。
import Data.Array
type S = N
type N = Int
data Prod t = T t | !N :++ !N
type Rules t = Array N [Prod t]
type Table = Array Int (Array Int (Array N Bool))
parse :: Eq t => Rules t -> S -> [t] -> Bool
parse rules s xs = table ! 0 ! (n-1) ! s
where
n = length xs
table = mkTable rules (listArray (0, n-1) xs)
mkTable :: Eq t => Rules t -> Array Int t -> Table
mkTable rules ary = seq rules $ table
where
table =
mkA lb ub $ \i ->
mkA i ub $ \j ->
flip fmap rules $ \ps -> or $ do
p <- ps
case p of
T t ->
[i==j && t==ary!i]
n1 :++ n2 ->
[lookup i k n1 && lookup (k+1) j n2 | k <- [i..j-1]]
(lb, ub) = bounds ary
lookup i j n = table ! i ! j ! n
mkA i j f = listArray (i,j) [f x | x <- range (i,j)]
で、適当な例で試す。
plus = 0
mult = 1
lpar = 2
rpar = 3
expr = 4
factor = 5
plus_expr = 6
mult_factor = 7
expr_rpar = 8
rules :: Rules String
rules = array (0, 8)
[ (expr, num ++ [lpar :++ expr_rpar, factor :++ mult_factor, expr :++ plus_expr])
, (factor, num ++ [lpar :++ expr_rpar, factor :++ mult_factor])
, (plus_expr, [plus :++ expr])
, (mult_factor, [mult :++ factor])
, (expr_rpar, [expr :++ rpar])
, (plus, [T "+"])
, (mult, [T "*"])
, (lpar, [T "("])
, (rpar, [T ")"])
]
where
num = [T (show i) | i <- [0..10]]
test = parse rules expr ["1", "*", "(", "2", "+", "3", ")"]
[ツッコミを入れる]
