Automata

Scala’s standard library has pattern matching automata built into it. There’s not much documented, so here’s some code that calls it:

package regex;
 
import Console.println
import scala.util.automata._
import scala.util.regexp._
 
/**
This code is in the public domain.
@author Ross Judson
 
*/
object RegTest {
  def main(args: Array[String]): Unit = {
    ints
    strings
  }
  def ints: Unit = {
    import IntBerriSethi.lang._
    import IntBerriSethi._
    println("Regular Pattern Demonstration")
    val samplePattern = Sequ(4, Star(9), Alt(8, Sequ(9,5,6)))
    val detAutom = compile(samplePattern)
    def check(s: Seq[Int]*) = 
      for (val seq <- s) 
        println(seq.toString() + ": " + matchDet(detAutom, seq))
    check(Array(4,9,0,5,0), Array(4,9,9,9,5,6))
    println("Inline check: " + 
      matchPattern(Sequ(1,2,3,4), Array(1,2,3,4)))
  }
  def strings: Unit = {
    import StringBerriSethi.lang._
    import StringBerriSethi._
    val sample = Sequ(Alt("John", "Terri"), Wild, "Judson")
    val detAutom = compile(sample)
    def check(s: Seq[String]*) = 
      for (val seq <- s) 
        println(seq.toString() + ": " + matchDet(detAutom, seq))
    check(Array("John", "Buchanan", "Judson"), 
          Array("Terri", "Lynn", "Judson"),
          Array("John", "Lynn", "Sweat"))
  }
}
 
// Match on sequences of Ints
object IntBerriSethi extends GenericBerriSethi[Int] {
  implicit def intToExp(i: Int): GenericWordExp#Letter = lang.tToLetter(i)
}
// Match on sequences of Strings
object StringBerriSethi extends GenericBerriSethi[String] {
  implicit def stringToExp(i: String): GenericWordExp#Letter = lang.tToLetter(i)
}
 
class GenericBerriSethi[T] extends WordBerrySethi {
  val lang = Lang
  class GenericWordExp extends WordExp {
    type _labelT = GenericLabel
    type _regexpT = RegExp
    abstract class GenericLabel extends Label
    case class GLabel(val v: T) extends GenericLabel
    object Wild extends GenericLabel
  }
  object Lang extends GenericWordExp {
    implicit def wildToLetter(w: Wild.type): Letter = Letter(w)
    implicit def tToLetter(t: T): Letter = Letter(GLabel(t))
    implicit def tToLabel(t: T) = GLabel(t)
  }
  def matchDet(pat: DetWordAutom[Lang.GenericLabel], seq: Seq[T]): Boolean =
    !pat.isSink((0 /: seq) ((state, t) => pat.next(state, Lang.tToLabel(t))))
  def matchNonDet(pat: NondetWordAutom[Lang.GenericLabel], seq: Seq[T]): Boolean = 
    matchDet(new SubsetConstruction(pat).determinize, seq);
  def matchPattern(pat: Lang._regexpT, seq: Seq[T]) = 
    matchNonDet(automatonFrom(pat, 100000), seq)
  def compile(pat: Lang._regexpT) = 
    new SubsetConstruction(automatonFrom(pat, 100000)).determinize
  
  // The next two methods are overrides to
  // put back in some wildcarding behavior.
  // Doesn't work for Sequ(Wild), though...not
  // sure why.
  override protected def seenLabel( r:Lang._regexpT, i:Int, label: _labelT ): Unit = {
    this.labelAt = this.labelAt.update( i, label );
    if( label != Lang.Wild ) {
      this.labels += label ;
    }
  }
  override protected def makeTransition(src: Int, dest:Int, label: _labelT ):Unit = {
    if( label == Lang.Wild )
      defaultq.update(src, dest::defaultq( src ))
    else
      super.makeTransition(src, dest, label)
  }                  
 
}
 
 
 
code/automata.txt · Last modified: 2010/02/11 09:10
 
Recent changes RSS feed Valid XHTML 1.0 Driven by DokuWiki