8 people like it.

Recursive descent parser using active patterns

Parsing without external dependencies. Uses recursive active patterns and regular expressions.

  1: 
  2: 
  3: 
  4: 
  5: 
  6: 
  7: 
  8: 
  9: 
 10: 
 11: 
 12: 
 13: 
 14: 
 15: 
 16: 
 17: 
 18: 
 19: 
 20: 
 21: 
 22: 
 23: 
 24: 
 25: 
 26: 
 27: 
 28: 
 29: 
 30: 
 31: 
 32: 
 33: 
 34: 
 35: 
 36: 
 37: 
 38: 
 39: 
 40: 
 41: 
 42: 
 43: 
 44: 
 45: 
 46: 
 47: 
 48: 
 49: 
 50: 
 51: 
 52: 
 53: 
 54: 
 55: 
 56: 
 57: 
 58: 
 59: 
 60: 
 61: 
 62: 
 63: 
 64: 
 65: 
 66: 
 67: 
 68: 
 69: 
 70: 
 71: 
 72: 
 73: 
 74: 
 75: 
 76: 
 77: 
 78: 
 79: 
 80: 
 81: 
 82: 
 83: 
 84: 
 85: 
 86: 
 87: 
 88: 
 89: 
 90: 
 91: 
 92: 
 93: 
 94: 
 95: 
 96: 
 97: 
 98: 
 99: 
100: 
101: 
102: 
103: 
104: 
105: 
106: 
107: 
108: 
109: 
110: 
111: 
112: 
113: 
114: 
115: 
module Parsing =
    open System.Text.RegularExpressions

    let (|QuotedId|_|) (s : string) =
        let s = s.TrimStart()
        if s.StartsWith("\"") then
            match s.IndexOf('"', 1) with
            | -1 -> None
            | x -> Some(s.[1 .. x - 1], s.Substring(x + 1))
        else
            None

    let idRegex = Regex("^[a-zA-Z_][a-zA-Z_0-9]*")

    let (|UnquotedId|_|) (s : string) =
        let s = s.TrimStart()
        let m = idRegex.Match(s)
        if m.Success then
            Some(m.Value, s.Substring(m.Value.Length))
        else
            None

    let (|Id|_|) s =
        match s with
        | QuotedId(id, rest)
        | UnquotedId(id, rest) -> Some(id, rest)
        | _ -> None

    let (|TL|_|) prefix (s : string) =
        let s = s.TrimStart()
        if s.StartsWith(prefix) then
            Some (s.Substring(prefix.Length))
        else
            None

    let rec (|Params|_|) s =
        match s with
        | Id(x, TL ")" rest) -> Some ([x], ")" + rest)
        | Id(x, TL "," (Params(pars, rest))) -> Some (x :: pars, rest)
        | _ -> None

    type Expr =
        | AllExpr of string * Expr
        | SomeExpr of string * Expr
        | FuncExpr of string * string list
        | AndExpr of Expr * Expr
        | OrExpr of Expr * Expr
        | ImplExpr of Expr * Expr
        | EquivExpr of Expr * Expr
        | NegExpr of Expr
        | IdExpr of string
        | TrueExpr
        | FalseExpr

    let rec (|BaseE|_|) s =
        match s with
        | TL "true" rest -> Some (TrueExpr, rest)
        | TL "false" rest -> Some (FalseExpr, rest)
        | TL "~" (Id(id, rest)) -> Some (NegExpr (IdExpr id), rest)
        | TL "~" (BaseE(e, rest)) -> Some (NegExpr e, rest)
        | TL "ALL" (Id(id, BaseE(e, rest))) -> Some (AllExpr(id, e), rest)
        | TL "SOME" (Id(id, BaseE(e, rest))) -> Some (SomeExpr(id, e), rest)
        | TL "(" (Expr(e, TL ")" rest)) -> Some (e, rest)
        | Id(id, TL "(" (Params(pars, TL ")" rest))) -> Some (FuncExpr(id, pars), rest)
        | Id(id, rest) -> Some (IdExpr id, rest)
        | _ -> None

    and (|AndE|_|) s =
        match s with
        | BaseE(e1, TL "&" (AndE(e2, rest))) -> Some (AndExpr(e1, e2), rest)
        | BaseE(e, rest) -> Some(e, rest)
        | _ -> None

    and (|OrE|_|) s =
        match s with
        | AndE(e1, TL "#" (OrE(e2, rest))) -> Some (OrExpr(e1, e2), rest)
        | AndE(e, rest) -> Some(e, rest)
        | _ -> None

    and (|ImplE|_|) s =
        match s with
        | OrE(e1, TL "->" (ImplE(e2, rest))) -> Some (ImplExpr(e1, e2), rest)
        | OrE(e, rest) -> Some(e, rest)
        | _ -> None

    and (|EquivE|_|) s =
        match s with
        | ImplE(e1, TL "<->" (ImplE(e2, rest))) -> Some (EquivExpr(e1, e2), rest)
        | _ -> None

    and (|Expr|_|) s =
        match s with
        | EquivE(e, rest)
        | ImplE(e, rest) -> Some(e, rest)
        | _ -> None


module ParsingTests =
    open Parsing

    match "ALL x (SOME y (pred(x, y) -> (f(x) <-> g(x, y))))" with
    | Expr(e, "") -> printfn "%A" e
    | _ -> printfn "Failed"

    match "A & ~(B # C) & D # E" with
    | Expr(e, "") -> printfn "%A" e
    | _ -> printfn "Failed"

    match "ALL x A & B" with
    | Expr(e, "") -> printfn "%A" e
    | _ -> printfn "Failed"

    match "ALL x (A & B)" with
    | Expr(e, "") -> printfn "%A" e
    | _ -> printfn "Failed"
namespace System
namespace System.Text
namespace System.Text.RegularExpressions
val s : string
Multiple items
val string : value:'T -> string

Full name: Microsoft.FSharp.Core.Operators.string

--------------------
type string = System.String

Full name: Microsoft.FSharp.Core.string
System.String.TrimStart([<System.ParamArray>] trimChars: char []) : string
System.String.StartsWith(value: string) : bool
System.String.StartsWith(value: string, comparisonType: System.StringComparison) : bool
System.String.StartsWith(value: string, ignoreCase: bool, culture: System.Globalization.CultureInfo) : bool
System.String.IndexOf(value: string) : int
System.String.IndexOf(value: char) : int
System.String.IndexOf(value: string, comparisonType: System.StringComparison) : int
System.String.IndexOf(value: string, startIndex: int) : int
System.String.IndexOf(value: char, startIndex: int) : int
System.String.IndexOf(value: string, startIndex: int, comparisonType: System.StringComparison) : int
System.String.IndexOf(value: string, startIndex: int, count: int) : int
System.String.IndexOf(value: char, startIndex: int, count: int) : int
System.String.IndexOf(value: string, startIndex: int, count: int, comparisonType: System.StringComparison) : int
union case Option.None: Option<'T>
val x : int
union case Option.Some: Value: 'T -> Option<'T>
System.String.Substring(startIndex: int) : string
System.String.Substring(startIndex: int, length: int) : string
val idRegex : Regex

Full name: Script.Parsing.idRegex
Multiple items
type Regex =
  new : pattern:string -> Regex + 1 overload
  member GetGroupNames : unit -> string[]
  member GetGroupNumbers : unit -> int[]
  member GroupNameFromNumber : i:int -> string
  member GroupNumberFromName : name:string -> int
  member IsMatch : input:string -> bool + 1 overload
  member Match : input:string -> Match + 2 overloads
  member Matches : input:string -> MatchCollection + 1 overload
  member Options : RegexOptions
  member Replace : input:string * replacement:string -> string + 5 overloads
  ...

Full name: System.Text.RegularExpressions.Regex

--------------------
Regex(pattern: string) : unit
Regex(pattern: string, options: RegexOptions) : unit
val m : Match
Regex.Match(input: string) : Match
Regex.Match(input: string, startat: int) : Match
Regex.Match(input: string, beginning: int, length: int) : Match
property Group.Success: bool
property Capture.Value: string
property System.String.Length: int
active recognizer QuotedId: string -> (string * string) option

Full name: Script.Parsing.( |QuotedId|_| )
val id : string
val rest : string
active recognizer UnquotedId: string -> (string * string) option

Full name: Script.Parsing.( |UnquotedId|_| )
val id : x:'T -> 'T

Full name: Microsoft.FSharp.Core.Operators.id
val prefix : string
active recognizer Id: string -> (string * string) option

Full name: Script.Parsing.( |Id|_| )
val x : string
active recognizer TL: string -> string -> string option

Full name: Script.Parsing.( |TL|_| )
active recognizer Params: string -> (string list * string) option

Full name: Script.Parsing.( |Params|_| )
val pars : string list
type Expr =
  | AllExpr of string * Expr
  | SomeExpr of string * Expr
  | FuncExpr of string * string list
  | AndExpr of Expr * Expr
  | OrExpr of Expr * Expr
  | ImplExpr of Expr * Expr
  | EquivExpr of Expr * Expr
  | NegExpr of Expr
  | IdExpr of string
  | TrueExpr
  ...

Full name: Script.Parsing.Expr
union case Expr.AllExpr: string * Expr -> Expr
union case Expr.SomeExpr: string * Expr -> Expr
union case Expr.FuncExpr: string * string list -> Expr
type 'T list = List<'T>

Full name: Microsoft.FSharp.Collections.list<_>
union case Expr.AndExpr: Expr * Expr -> Expr
union case Expr.OrExpr: Expr * Expr -> Expr
union case Expr.ImplExpr: Expr * Expr -> Expr
union case Expr.EquivExpr: Expr * Expr -> Expr
union case Expr.NegExpr: Expr -> Expr
union case Expr.IdExpr: string -> Expr
union case Expr.TrueExpr: Expr
union case Expr.FalseExpr: Expr
active recognizer BaseE: string -> (Expr * string) option

Full name: Script.Parsing.( |BaseE|_| )
val e : Expr
Multiple items
active recognizer Expr: string -> (Expr * string) option

Full name: Script.Parsing.( |Expr|_| )

--------------------
type Expr =
  | AllExpr of string * Expr
  | SomeExpr of string * Expr
  | FuncExpr of string * string list
  | AndExpr of Expr * Expr
  | OrExpr of Expr * Expr
  | ImplExpr of Expr * Expr
  | EquivExpr of Expr * Expr
  | NegExpr of Expr
  | IdExpr of string
  | TrueExpr
  ...

Full name: Script.Parsing.Expr
val e1 : Expr
active recognizer AndE: string -> (Expr * string) option

Full name: Script.Parsing.( |AndE|_| )
val e2 : Expr
active recognizer OrE: string -> (Expr * string) option

Full name: Script.Parsing.( |OrE|_| )
active recognizer ImplE: string -> (Expr * string) option

Full name: Script.Parsing.( |ImplE|_| )
active recognizer EquivE: string -> (Expr * string) option

Full name: Script.Parsing.( |EquivE|_| )
module Parsing

from Script
val printfn : format:Printf.TextWriterFormat<'T> -> 'T

Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.printfn

More information

Link:http://fssnip.net/bM
Posted:12 years ago
Author:
Tags: parsing , active patterns