# # sregexp.rb - A simple regular expression library # # Copyright (C) 2002 Masahiro Sakai # All rights reserved. # This is free software with ABSOLUTELY NO WARRANTY. # # You can redistribute it and/or modify it under the terms of # the Ruby's licence. # # # This is a proto-type program which synthesizes a deterministic finite # automaton from a given regular expression. The method is based on the # one explained in the famous text book "The design and analysis of # computer algorithms" by A.V.Aho, J.E. Hopcroft and J.D. Ullman from # Addison Wesley (translated into Japanese from SAIENSU-SHA, 1977). # # A regular expression is composed by # SRegexp.epsilon (null string) # SRegexp.empty (emptyset) # SRegexp.union(*patterns) (union) # SRegexp.concat(*patterns) (concatenation) # SRegexp.star(pattern) (Kleene's star). # # For the user, SRegexp.new and SRegexp#accepted? are main interface routines. # SRegexp.new synthesizes automata from a given regular expression. # The SRegexp.accept? tests whether input string of symbols is in the specified # regular set. See the following example use of them. # # Example usage: # re = SRegexp.new(SRegexp.star(SRegexp.concat(0,1))) # re.accept? [0,1,0,1] #=> true # # Remark: SRegexp.new(SRegexp.empty) synthesizes the automaton which does not # accept any string including the empty string. class SRegexp # FIXME: XXX Arrow = Struct.new(:from, :input, :to) class Arrow def ==(rhs) to_a == rhs.to_a end alias eql? == def hash to_a.hash end end AM = Struct.new(:initial, :final, :arrows, :ep) class AM Ep = Struct.new(:from, :to) Syntax = Class.new Epsilon = Syntax.new def Epsilon.to_am(sf) s = sf.new f = sf.new AM.new(s, f, [], [Ep.new(s, f)]) end Empty = Syntax.new def Empty.to_am(sf) AM.new(sf.new, sf.new, [], []) end class Union < Syntax def initialize(patterns) super() @patterns = patterns end def to_am(sf) ams = @patterns.collect{|item| AM.compile(item, sf)} s = sf.new f = sf.new ep = Array.new ar = Array.new ams.each{|item| ar.concat(item.arrows) ep.concat(item.ep) ep.push(Ep.new(s, item.initial)) ep.push(Ep.new(item.final, f)) } AM.new(s, f, ar, ep) end end class Concatenation < Syntax def initialize(patterns) super() @patterns = patterns end def to_am(sf) ams = @patterns.collect{|item| AM.compile(item, sf)} s = sf.new f = sf.new ep = Array.new ar = Array.new ams.each{|item| ep.concat(item.ep) ar.concat(item.arrows) } ep.push(Ep.new(s, ams.first.initial)) ep.push(Ep.new(ams.last.final, f)) 0.upto(ams.size-2){|i| ep.push(Ep.new(ams[i].final, ams[i+1].initial)) } AM.new(s, f, ar, ep) end end class KleenesStar < Syntax def initialize(pattern) super() @pattern = pattern end def to_am(sf) am = AM.compile(@pattern, sf) s = sf.new f = sf.new ep = am.ep ep.push(Ep.new(s, am.initial)) ep.push(Ep.new(am.final, f)) ep.push(Ep.new(am.final, am.initial)) ep.push(Ep.new(s, f)) AM.new(s, f, am.arrows, ep) end end def self.compile(obj, sf) if obj.is_a? Syntax obj.to_am(sf) else s = sf.new f = sf.new AM.new(s, f, [Arrow.new(s, obj, f)], []) end end def ep_reach(a, &block) ep.each{|b| if b.from == a yield(b.to) ep_reach(b.to, &block) end } end end class DFAM attr_reader :initial def initialize(am) @am = am @inputs = @am.arrows.collect{|ar| ar.input} @inputs.uniq! dfam() numbering() minimize() @table = @states.collect{|s| @arrows.find_all{|ar| ar.from == s} } @am = @inputs = @arrows = @states = nil end def transit(state, input) if t = @table[state] t.each{|ar| return ar.to if ar.input == input } end nil end def final?(state) @finals.include? state end private def numbering m = Hash.new @states.each_with_index{|item, index| m[item] = index } @states.collect!{|item| m[item]} @initial = m[@initial] @finals.collect!{|item| m[item]} # @finals.sort! @arrows.each{|ar| ar.from = m[ar.from] ar.to = m[ar.to] } end def minimize m = Hash.new s = Array.new t = Array.new for i in 0...(@states.size) do for j in (i+1)...(@states.size) s.clear t.clear @arrows.each{|ar| s.push([ar.input, ar.to]) if ar.from == @states[i] t.push([ar.input, ar.to]) if ar.from == @states[j] } s.sort!{|a,b| a[1]<=>b[1]} t.sort!{|a,b| a[1]<=>b[1]} if s == t and @finals.include?(s) == @finals.include?(t) x = @states[i] while m.has_key? x x = m[@states[i]] end m[@states[j]] = x end end end @states.each{|s| m[s] ||= s} @states.collect!{|a| m[a]} @states.uniq! @initial = m[@initial] @finals.collect!{|item| m[item]} @finals.uniq! @arrows.each{|ar| ar.from = m[ar.from] ar.to = m[ar.to] } @arrows.uniq! end def dfam @states = Array.new @arrows = Array.new @finals = Array.new @initial = expand([@am.initial]) q = [@initial] while s = q.pop unless @states.include?(s) @states.push(s) delta(s){|i, n| q.push(n) @arrows.push(Arrow.new(s, i, n)) } end end @states.each{|state| state.each{|a| if a == @am.final @finals.push(state) break end } } end def delta(state) @inputs.each{|i| d = Array.new @am.arrows.each{|ar| d.push(ar.to) if ar.input == i and state.include? ar.from } yield i, expand(d) unless d.empty? } end def expand(state) e = state.dup state.each{|a| @am.ep_reach(a){|b| e.push(b) } } e.uniq! e.sort! e end end class StateFactory def initialize @i = -1 end def new @i = @i.succ end end def initialize(pattern) am = AM.compile(pattern, StateFactory.new) @dfam = DFAM.new(am) end def accept?(stream) s = @dfam.initial stream.each{|item| s = @dfam.transit(s, item) return false if s.nil? } @dfam.final?(s) end alias === accept? def self.union(*patterns) AM::Union.new(patterns) end def self.concat(*patterns) AM::Concatenation.new(patterns) end def self.star(pattern) AM::KleenesStar.new(pattern) end def self.empty AM::Empty end def self.epsilon AM::Epsilon end end if __FILE__ == $0 p SRegexp.new(SRegexp.star(SRegexp.union(1,2,3))).accept? 1..3 a = Object.new p SRegexp.new(SRegexp.concat(SRegexp.star(SRegexp.concat(0, 1)),a)).accept? [0, 1, 0, 1, a] p SRegexp.new(SRegexp.epsilon).accept? [] p SRegexp.new(SRegexp.empty).accept?([]) == false end