4 people like it.

A Staged Regular Expression Matcher

A staged a regular expression interpreter is a compiler!!!

  1: 
  2: 
  3: 
  4: 
  5: 
  6: 
  7: 
  8: 
  9: 
 10: 
 11: 
 12: 
 13: 
 14: 
 15: 
 16: 
 17: 
 18: 
 19: 
 20: 
 21: 
 22: 
 23: 
 24: 
 25: 
 26: 
 27: 
 28: 
 29: 
 30: 
 31: 
 32: 
 33: 
 34: 
 35: 
 36: 
 37: 
 38: 
 39: 
 40: 
 41: 
 42: 
 43: 
 44: 
 45: 
 46: 
 47: 
 48: 
 49: 
 50: 
 51: 
 52: 
 53: 
 54: 
 55: 
 56: 
 57: 
 58: 
 59: 
 60: 
 61: 
 62: 
 63: 
 64: 
 65: 
 66: 
 67: 
 68: 
 69: 
 70: 
 71: 
 72: 
 73: 
 74: 
 75: 
 76: 
 77: 
 78: 
 79: 
 80: 
 81: 
 82: 
 83: 
 84: 
 85: 
 86: 
 87: 
 88: 
 89: 
 90: 
 91: 
 92: 
 93: 
 94: 
 95: 
 96: 
 97: 
 98: 
 99: 
100: 
101: 
102: 
103: 
104: 
105: 
// http://www.cs.princeton.edu/courses/archive/spr09/cos333/beautiful.html
// http://scala-lms.github.io/tutorials/regex.html


#r "../packages/FSharp.Compiler.Service.1.3.1.0/lib/net45/FSharp.Compiler.Service.dll"
#r "../packages/QuotationCompiler.0.0.7-alpha/lib/net45/QuotationCompiler.dll"


open QuotationCompiler

open Microsoft.FSharp.Quotations

// <@ fun x -> (% <@ x @> ) @> ~ lambda (fun x -> x)
let lambda (f : Expr<'T> -> Expr<'R>) : Expr<'T -> 'R> =
    let var = new Var("__temp__", typeof<'T>)
    Expr.Cast<_>(Expr.Lambda(var,  f (Expr.Cast<_>(Expr.Var var))))

// <@ fun x y -> (% <@ x @> ... <@ y @> ) @> ~ lambda (fun x y -> x ... y )
let lambda2 (f : Expr<'T> -> Expr<'S> -> Expr<'R>) : Expr<'T -> 'S -> 'R> =
    let var = new Var("__temp__", typeof<'T>)
    let var' = new Var("__temp'__", typeof<'S>)
    Expr.Cast<_>(Expr.Lambda(var, Expr.Lambda(var',  f (Expr.Cast<_>(Expr.Var var)) (Expr.Cast<_>(Expr.Var var')))))

let rec matchsearch (regexp : string) (text : Expr<string>) : Expr<bool> = 
    if regexp.[0] = '^' then
        matchhere regexp 1 text <@ 0 @>
    else
    <@
        let text = %text
        let mutable start = -1
        let mutable found = false
        while not found && start < text.Length do
            start <- start + 1
            found <- (% lambda2(fun text start -> matchhere regexp 0 text start) ) text start
        found
    @>

and matchhere (regexp : string) (restart : int) 
              (text : Expr<string>) (start : Expr<int>) : Expr<bool> = 
    if restart = regexp.Length then
        <@ true @>
    else if regexp.[restart] = '$' && restart + 1 = regexp.Length then
        <@ %start = String.length %text @>
    else if restart + 1 < regexp.Length && regexp.[restart + 1] = '*' then
        matchstar regexp.[restart] regexp (restart + 2) text start
    else
    <@ 
        if %start < (%text).Length && (% matchchar regexp.[restart] <@ (%text).[%start] @> ) then
            (% matchhere regexp (restart + 1) text <@ %start + 1 @> )
        else false
    @>

and matchstar (c : char) (regexp : string) (restart : int) (text : Expr<string>) (start: Expr<int>) : Expr<bool> =
    <@
        let text = %text
        let mutable sstart = %start
        let mutable found = (% lambda2(fun text sstart -> matchhere regexp restart text sstart) ) text sstart
        let mutable failed = false
        while not failed && not found && sstart < text.Length do
          failed <- not ((% lambda2(fun (text : Expr<string>) (sstart : Expr<int>) -> matchchar c <@ (%text).[%sstart] @>) ) text sstart)
          sstart <- sstart + 1
          found <- (% lambda2(fun text sstart -> matchhere regexp restart text sstart) ) text sstart
    
        not failed && found
    @>

and matchchar (c: char) (t : Expr<char>) : Expr<bool> = 
    if c = '.' then <@ true @> 
    else <@ c = %t @>

let compileRegEx (pattern : string) : string -> bool = 
    let f = QuotationCompiler.ToFunc(lambda (fun text -> matchsearch pattern text))
    f ()

let testmatch (f : string -> bool) (text : string) (expected : bool) = 
    if f text <> expected then
        failwith "oups"

 // Examples

let ``^hello$`` = compileRegEx "^hello$"
let ``hell`` = compileRegEx "hell"
let ``hel*`` = compileRegEx "hel*"
let ``hel*$`` = compileRegEx "hel*$"
let ``ab`` = compileRegEx "ab"
let ``^ab`` = compileRegEx "^ab"
let ``a*b`` = compileRegEx "a*b"
let ``^ab*`` = compileRegEx "^ab*"
let ``^ab*$`` = compileRegEx "^ab*$"

testmatch ``^hello$`` "hello" true
testmatch ``^hello$`` "hell" false
testmatch ``hell`` "hello" true
testmatch ``hell`` "hell" true
testmatch ``hel*`` "he" true
testmatch ``hel*$`` "hello" false
testmatch ``hel*`` "yo hello" true
testmatch ``ab`` "hello ab hello" true
testmatch ``^ab``  "hello ab hello" false
testmatch ``a*b`` "hello aab hello" true
testmatch ``^ab*`` "abcd"  true
testmatch ``^ab*``  "a"  true
testmatch ``^ab*`` "ac" true
testmatch ``^ab*`` "bac" false
testmatch ``^ab*$`` "ac" false
namespace Microsoft
namespace Microsoft.FSharp
namespace Microsoft.FSharp.Quotations
val lambda : f:(Expr<'T> -> Expr<'R>) -> Expr<('T -> 'R)>

Full name: Script.lambda
val f : (Expr<'T> -> Expr<'R>)
Multiple items
type Expr =
  override Equals : obj:obj -> bool
  member GetFreeVars : unit -> seq<Var>
  member Substitute : substitution:(Var -> Expr option) -> Expr
  member ToString : full:bool -> string
  member CustomAttributes : Expr list
  member Type : Type
  static member AddressOf : target:Expr -> Expr
  static member AddressSet : target:Expr * value:Expr -> Expr
  static member Application : functionExpr:Expr * argument:Expr -> Expr
  static member Applications : functionExpr:Expr * arguments:Expr list list -> Expr
  ...

Full name: Microsoft.FSharp.Quotations.Expr

--------------------
type Expr<'T> =
  inherit Expr
  member Raw : Expr

Full name: Microsoft.FSharp.Quotations.Expr<_>
val var : Var
Multiple items
type Var =
  interface IComparable
  new : name:string * typ:Type * ?isMutable:bool -> Var
  member IsMutable : bool
  member Name : string
  member Type : Type
  static member Global : name:string * typ:Type -> Var

Full name: Microsoft.FSharp.Quotations.Var

--------------------
new : name:string * typ:System.Type * ?isMutable:bool -> Var
val typeof<'T> : System.Type

Full name: Microsoft.FSharp.Core.Operators.typeof
static member Expr.Cast : source:Expr -> Expr<'T>
static member Expr.Lambda : parameter:Var * body:Expr -> Expr
static member Expr.Var : variable:Var -> Expr
val lambda2 : f:(Expr<'T> -> Expr<'S> -> Expr<'R>) -> Expr<('T -> 'S -> 'R)>

Full name: Script.lambda2
val f : (Expr<'T> -> Expr<'S> -> Expr<'R>)
val var' : Var
val matchsearch : regexp:string -> text:Expr<string> -> Expr<bool>

Full name: Script.matchsearch
val regexp : string
Multiple items
val string : value:'T -> string

Full name: Microsoft.FSharp.Core.Operators.string

--------------------
type string = System.String

Full name: Microsoft.FSharp.Core.string
val text : Expr<string>
type bool = System.Boolean

Full name: Microsoft.FSharp.Core.bool
val matchhere : regexp:string -> restart:int -> text:Expr<string> -> start:Expr<int> -> Expr<bool>

Full name: Script.matchhere
val text : string
val mutable start : int
val mutable found : bool
val not : value:bool -> bool

Full name: Microsoft.FSharp.Core.Operators.not
property System.String.Length: int
val start : Expr<int>
val restart : int
Multiple items
val int : value:'T -> int (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.int

--------------------
type int = int32

Full name: Microsoft.FSharp.Core.int

--------------------
type int<'Measure> = int

Full name: Microsoft.FSharp.Core.int<_>
module String

from Microsoft.FSharp.Core
val length : str:string -> int

Full name: Microsoft.FSharp.Core.String.length
val matchstar : c:char -> regexp:string -> restart:int -> text:Expr<string> -> start:Expr<int> -> Expr<bool>

Full name: Script.matchstar
val matchchar : c:char -> t:Expr<char> -> Expr<bool>

Full name: Script.matchchar
val c : char
Multiple items
val char : value:'T -> char (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.char

--------------------
type char = System.Char

Full name: Microsoft.FSharp.Core.char
val mutable sstart : int
val sstart : Expr<int>
val mutable failed : bool
val t : Expr<char>
val compileRegEx : pattern:string -> (string -> bool)

Full name: Script.compileRegEx
val pattern : string
val f : (unit -> string -> bool)
val testmatch : f:(string -> bool) -> text:string -> expected:bool -> unit

Full name: Script.testmatch
val f : (string -> bool)
val expected : bool
val failwith : message:string -> 'T

Full name: Microsoft.FSharp.Core.Operators.failwith
val ( ^hello$ ) : (string -> bool)

Full name: Script.( ^hello$ )
val ( hel* ) : (string -> bool)

Full name: Script.( hel* )
val ( hel*$ ) : (string -> bool)

Full name: Script.( hel*$ )
val ( ^ab ) : (string -> bool)

Full name: Script.( ^ab )
val ( a*b ) : (string -> bool)

Full name: Script.( a*b )
val ( ^ab* ) : (string -> bool)

Full name: Script.( ^ab* )
val ( ^ab*$ ) : (string -> bool)

Full name: Script.( ^ab*$ )
Raw view Test code New version

More information

Link:http://fssnip.net/tE
Posted:8 years ago
Author:Nick Palladinos
Tags: regex , staging