Scala’s standard library has pattern matching automata built into it. There’s not much documented, so here’s some code that calls it:
package regex; import Console.println import scala.util.automata._ import scala.util.regexp._ /** This code is in the public domain. @author Ross Judson */ object RegTest { def main(args: Array[String]): Unit = { ints strings } def ints: Unit = { import IntBerriSethi.lang._ import IntBerriSethi._ println("Regular Pattern Demonstration") val samplePattern = Sequ(4, Star(9), Alt(8, Sequ(9,5,6))) val detAutom = compile(samplePattern) def check(s: Seq[Int]*) = for (val seq <- s) println(seq.toString() + ": " + matchDet(detAutom, seq)) check(Array(4,9,0,5,0), Array(4,9,9,9,5,6)) println("Inline check: " + matchPattern(Sequ(1,2,3,4), Array(1,2,3,4))) } def strings: Unit = { import StringBerriSethi.lang._ import StringBerriSethi._ val sample = Sequ(Alt("John", "Terri"), Wild, "Judson") val detAutom = compile(sample) def check(s: Seq[String]*) = for (val seq <- s) println(seq.toString() + ": " + matchDet(detAutom, seq)) check(Array("John", "Buchanan", "Judson"), Array("Terri", "Lynn", "Judson"), Array("John", "Lynn", "Sweat")) } } // Match on sequences of Ints object IntBerriSethi extends GenericBerriSethi[Int] { implicit def intToExp(i: Int): GenericWordExp#Letter = lang.tToLetter(i) } // Match on sequences of Strings object StringBerriSethi extends GenericBerriSethi[String] { implicit def stringToExp(i: String): GenericWordExp#Letter = lang.tToLetter(i) } class GenericBerriSethi[T] extends WordBerrySethi { val lang = Lang class GenericWordExp extends WordExp { type _labelT = GenericLabel type _regexpT = RegExp abstract class GenericLabel extends Label case class GLabel(val v: T) extends GenericLabel object Wild extends GenericLabel } object Lang extends GenericWordExp { implicit def wildToLetter(w: Wild.type): Letter = Letter(w) implicit def tToLetter(t: T): Letter = Letter(GLabel(t)) implicit def tToLabel(t: T) = GLabel(t) } def matchDet(pat: DetWordAutom[Lang.GenericLabel], seq: Seq[T]): Boolean = !pat.isSink((0 /: seq) ((state, t) => pat.next(state, Lang.tToLabel(t)))) def matchNonDet(pat: NondetWordAutom[Lang.GenericLabel], seq: Seq[T]): Boolean = matchDet(new SubsetConstruction(pat).determinize, seq); def matchPattern(pat: Lang._regexpT, seq: Seq[T]) = matchNonDet(automatonFrom(pat, 100000), seq) def compile(pat: Lang._regexpT) = new SubsetConstruction(automatonFrom(pat, 100000)).determinize // The next two methods are overrides to // put back in some wildcarding behavior. // Doesn't work for Sequ(Wild), though...not // sure why. override protected def seenLabel( r:Lang._regexpT, i:Int, label: _labelT ): Unit = { this.labelAt = this.labelAt.update( i, label ); if( label != Lang.Wild ) { this.labels += label ; } } override protected def makeTransition(src: Int, dest:Int, label: _labelT ):Unit = { if( label == Lang.Wild ) defaultq.update(src, dest::defaultq( src )) else super.makeTransition(src, dest, label) } }