26 people like it.

Uri Parser

A Uri parser using the Cashel library [1]. This implementation is using ArraySegment as the underlying state, as I'm using it within a server, but it would be trivial to switch it to using a list. Also, note that I am not parsing the Uri into any specific structure, though that, too, would be trivial. For my current purposes, I just needed to validate the Uri. [1] https://github.com/panesofglass/cashel

Monadic parser combinators for parsing a Uri.

  1: 
  2: 
  3: 
  4: 
  5: 
  6: 
  7: 
  8: 
  9: 
 10: 
 11: 
 12: 
 13: 
 14: 
 15: 
 16: 
 17: 
 18: 
 19: 
 20: 
 21: 
 22: 
 23: 
 24: 
 25: 
 26: 
 27: 
 28: 
 29: 
 30: 
 31: 
 32: 
 33: 
 34: 
 35: 
 36: 
 37: 
 38: 
 39: 
 40: 
 41: 
 42: 
 43: 
 44: 
 45: 
 46: 
 47: 
 48: 
 49: 
 50: 
 51: 
 52: 
 53: 
 54: 
 55: 
 56: 
 57: 
 58: 
 59: 
 60: 
 61: 
 62: 
 63: 
 64: 
 65: 
 66: 
 67: 
 68: 
 69: 
 70: 
 71: 
 72: 
 73: 
 74: 
 75: 
 76: 
 77: 
 78: 
 79: 
 80: 
 81: 
 82: 
 83: 
 84: 
 85: 
 86: 
 87: 
 88: 
 89: 
 90: 
 91: 
 92: 
 93: 
 94: 
 95: 
 96: 
 97: 
 98: 
 99: 
100: 
101: 
102: 
103: 
104: 
105: 
106: 
107: 
108: 
109: 
110: 
111: 
112: 
113: 
114: 
115: 
116: 
117: 
118: 
119: 
120: 
121: 
122: 
123: 
124: 
125: 
126: 
127: 
128: 
129: 
130: 
131: 
132: 
133: 
134: 
135: 
136: 
137: 
138: 
139: 
140: 
141: 
142: 
143: 
144: 
145: 
146: 
147: 
148: 
149: 
150: 
151: 
152: 
153: 
154: 
155: 
156: 
157: 
158: 
159: 
160: 
161: 
162: 
163: 
164: 
165: 
// NOTE: Requires Cashel: https://github.com/panesofglass/cashel
open System
open Cashel.Parser
open Cashel.ArraySegmentPrimitives

module Primitives =
  open System.Globalization

  let ch = matchToken
  let str = matchTokens

  let token = any [ 0uy..127uy ]
  let upper = any [ 'A'B..'Z'B ]
  let lower = any [ 'a'B..'z'B ]
  let alpha = upper +++ lower
  let digit = any [ '0'B..'9'B ]
  let digitval = parse {
    let! d = digit
    return int d - int 48uy }
  let alphanum = alpha +++ digit
  let control = any [ 0uy..31uy ] +++ ch 127uy
  let tab = ch '\t'B
  let lf = ch '\n'B
  let cr = ch '\r'B
  let crlf = str (List.ofSeq "\r\n"B)
  let space = ch ' 'B
  let dquote = ch '"'B
  let hash = ch '#'B
  let percent = ch '%'B
  let plus = ch '+'B
  let hyphen = ch '-'B
  let dot = ch '.'B
  let colon = ch ':'B
  let slash = ch '/'B
  let qmark = ch '?'B
  let xupper = any [ 'A'B..'F'B ]
  let xlower = any [ 'a'B..'f'B ]
  let xchar = xupper +++ xlower
  let xdigit = digit +++ xchar
  let escaped = parse {
    do! forget percent
    let! d1 = xdigit
    let! d2 = xdigit
    // NOTE: There is likely a more efficient way to calculate the hex byte.
    let hex = UInt16.Parse(String([|char d1; char d2|]), NumberStyles.HexNumber)
    return byte hex }

module UriParser =
  open Primitives

  let scheme = parse {
    let! init = alpha
    let! more = alpha +++ digit +++ plus +++ hyphen +++ dot |> repeat1
    return init::more }
  let mark = any [ '-'B;'_'B;'.'B;'!'B;'~'B;'*'B;'''B;'('B;')'B ]
  let reserved = any [ ';'B;'/'B;'?'B;':'B;'@'B;'&'B;'='B;'+'B;'$'B;','B ]
  let unreserved = alpha +++ mark
  let pchar = unreserved +++ escaped +++ any [ ':'B;'@'B;'&'B;'='B;'+'B;'$'B;'.'B ]
  let uric = reserved +++ unreserved +++ escaped
  let uricNoSlash = unreserved +++ escaped +++ any [ ';'B;'?'B;':'B;'@'B;'&'B;'='B;'+'B;'$'B;','B ]
  let relSegment = unreserved +++ escaped +++ any [ ';'B;'@'B;'&'B;'='B;'+'B;'$'B;','B ] |> repeat1
  let regName = unreserved +++ escaped +++ any [ '$'B;','B;';'B;':'B;'@'B;'&'B;'='B;'+'B ] |> repeat1
  let userInfo = unreserved +++ escaped +++ any [ ';'B;':'B;'&'B;'='B;'+'B;'$'B;','B ] |> repeat

  /// param returns a series of pchar.
  let param = pchar |> repeat
  /// segment returns a list of ;-separated params.
  let segment = parse {
    let! hd = param
    let! tl = ch ';'B >>= (fun c1 -> param >>= (fun c2 -> result (c1::c2))) |> repeat
    return (hd::tl |> List.concat) }
  /// pathSegments returns a list of /-separated segments.
  let pathSegments = parse {
    let! hd = segment
    let! tl = ch '/'B >>= (fun s1 -> segment >>= (fun s2 -> result (s1::s2))) |> repeat
    return (hd::tl |> List.concat) }

  let uriAbsPath = parse {
    let! p1 = ch '/'B
    let! p2 = pathSegments
    return p1::p2 }
  let relPath = parse {
    let! hd = relSegment
    let! tl = !? uriAbsPath
    match tl with | Some(t) -> return hd @ t | _ -> return hd }

  let uriQuery = uric |> repeat
  let uriFragment = uric |> repeat

  let ipv4Address = parse {
    let d = '.'B
    let ``digit 1-3`` = digit |> repeat1While (fun ds -> ds.Length < 3)
    let! d1 = ``digit 1-3`` .>> dot
    let! d2 = ``digit 1-3`` .>> dot
    let! d3 = ``digit 1-3`` .>> dot
    let! d4 = ``digit 1-3`` 
    return d1 @ d::d2 @ d::d3 @ d::d4 }

  let private _label first = parse {
    let! a1 = first
    let! a2 = alphanum +++ hyphen |> repeat
    return a1::a2 }
// TODO: Ensure the last item is not a hyphen.
//    match a1::a2 with
//    | hd::[] as res -> return res
//    | res ->
//        let! a3 = alphanum
//        return res @ [a3] }
  let topLabel = _label alpha
  let domainLabel = _label alphanum
  let hostname = parse {
    let! dl = !?(domainLabel >>= (fun d -> dot >>= (fun dt -> result (d @ [dt]))))
    let! tl = topLabel .>> !?dot
    match dl with Some(sub) -> return sub @ tl | _ -> return tl }

  let host = hostname +++ ipv4Address
  let port = digit |> repeat
  
  let hostport = parse {
    let! h = host
    let! p = !?(colon >>= (fun c -> port >>= (fun p -> result (c::p))))
    match p with Some(v) -> return h @ v | _ -> return h }
  let server = parse {
    let! ui = !?(userInfo >>= (fun ui -> ch '@'B >>= (fun a -> result (ui @ [a]))))
    let! hp = hostport
    match ui with Some(info) -> return info @ hp | _ -> return hp }
  let uriAuthority = server +++ regName
  let netPath = parse {
    let! slash = str (List.ofSeq "//"B)
    let! authority = uriAuthority
    let! absPath = !?uriAbsPath
    let domain = slash @ authority
    match absPath with Some(path) -> return domain @ path | _ -> return domain }

  let opaquePart = parse {
    let! u1 = uricNoSlash
    let! u2 = uric |> repeat
    return u1::u2 }
  let hierPart = parse {
    let! path = netPath +++ uriAbsPath
    let! query = !?(qmark >>= (fun m -> uriQuery >>= (fun q -> result (m::q))))
    match query with Some(qry) -> return path @ qry | _ -> return path }

  let absoluteUri = parse {
    let! s = scheme
    let! c = colon
    let! rest = hierPart +++ opaquePart
    return s @ c::rest }
  let relativeUri = parse {
    let! path = uriAbsPath +++ relPath
    let! query = !?(qmark >>= (fun m -> uriQuery >>= (fun q -> result (m::q))))
    match query with Some(qry) -> return path @ qry | _ -> return path }

  let uriReference = parse {
    let! uri = !?(absoluteUri +++ relativeUri)
    let! frag = !?(hash >>= (fun h -> uriFragment >>= (fun f -> result (h::f))))
    match uri with
    | None ->
        match frag with
        | None -> return []
        | Some(f) -> return f
    | Some(u) ->
        match frag with
        | None -> return u
        | Some(f) -> return u @ f }

Usage

1: 
2: 
3: 
4: 
5: 
let v = uriReference (ArraySegment<_>("http://ryan:riley@wizardsofsmart.net:8090?query=this#mark"B));;
new System.String(v |> Option.get |> fst |> List.map char |> List.toArray);;

let v = uriReference (ArraySegment<_>("#mark"B));;
new System.String(v |> Option.get |> fst |> List.map char |> List.toArray);;
namespace System
namespace System.Globalization
val ch : (byte -> obj)

Full name: Script.Primitives.ch
val str : (byte list -> obj)

Full name: Script.Primitives.str
val token : obj

Full name: Script.Primitives.token
val upper : obj

Full name: Script.Primitives.upper
val lower : obj

Full name: Script.Primitives.lower
val alpha : obj

Full name: Script.Primitives.alpha
val digit : 'a (requires member ( +++ ) and member ( +++ ))

Full name: Script.Primitives.digit
val digitval : 'a

Full name: Script.Primitives.digitval
Multiple items
val int : value:'T -> int (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.int

--------------------
type int = int32

Full name: Microsoft.FSharp.Core.int

--------------------
type int<'Measure> = int

Full name: Microsoft.FSharp.Core.int<_>
val alphanum : '_arg3

Full name: Script.Primitives.alphanum
val control : obj

Full name: Script.Primitives.control
val tab : obj

Full name: Script.Primitives.tab
val lf : obj

Full name: Script.Primitives.lf
val cr : obj

Full name: Script.Primitives.cr
val crlf : obj

Full name: Script.Primitives.crlf
Multiple items
module List

from Microsoft.FSharp.Collections

--------------------
type List<'T> =
  | ( [] )
  | ( :: ) of Head: 'T * Tail: 'T list
  interface IEnumerable
  interface IEnumerable<'T>
  member Head : 'T
  member IsEmpty : bool
  member Item : index:int -> 'T with get
  member Length : int
  member Tail : 'T list
  static member Cons : head:'T * tail:'T list -> 'T list
  static member Empty : 'T list

Full name: Microsoft.FSharp.Collections.List<_>
val ofSeq : source:seq<'T> -> 'T list

Full name: Microsoft.FSharp.Collections.List.ofSeq
val space : obj

Full name: Script.Primitives.space
val dquote : obj

Full name: Script.Primitives.dquote
val hash : obj

Full name: Script.Primitives.hash
val percent : obj

Full name: Script.Primitives.percent
val plus : obj

Full name: Script.Primitives.plus
val hyphen : obj

Full name: Script.Primitives.hyphen
val dot : obj

Full name: Script.Primitives.dot
val colon : obj

Full name: Script.Primitives.colon
val slash : obj

Full name: Script.Primitives.slash
val qmark : obj

Full name: Script.Primitives.qmark
val xupper : 'a (requires member ( +++ ) and member ( +++ ) and member ( +++ ))

Full name: Script.Primitives.xupper
val xlower : 'a (requires member ( +++ ) and member ( +++ ) and member ( +++ ))

Full name: Script.Primitives.xlower
val xchar : '_arg3 (requires member ( +++ ) and member ( +++ ))

Full name: Script.Primitives.xchar
val xdigit : '_arg3

Full name: Script.Primitives.xdigit
val escaped : 'a (requires member ( +++ ) and member ( +++ ) and member ( +++ ) and member ( +++ ) and member ( +++ ) and member ( +++ ) and member ( +++ ) and member ( +++ ))

Full name: Script.Primitives.escaped
type UInt16 =
  struct
    member CompareTo : value:obj -> int + 1 overload
    member Equals : obj:obj -> bool + 1 overload
    member GetHashCode : unit -> int
    member GetTypeCode : unit -> TypeCode
    member ToString : unit -> string + 3 overloads
    static val MaxValue : uint16
    static val MinValue : uint16
    static member Parse : s:string -> uint16 + 3 overloads
    static member TryParse : s:string * result:uint16 -> bool + 1 overload
  end

Full name: System.UInt16
UInt16.Parse(s: string) : uint16
UInt16.Parse(s: string, provider: IFormatProvider) : uint16
UInt16.Parse(s: string, style: NumberStyles) : uint16
UInt16.Parse(s: string, style: NumberStyles, provider: IFormatProvider) : uint16
Multiple items
type String =
  new : value:char -> string + 7 overloads
  member Chars : int -> char
  member Clone : unit -> obj
  member CompareTo : value:obj -> int + 1 overload
  member Contains : value:string -> bool
  member CopyTo : sourceIndex:int * destination:char[] * destinationIndex:int * count:int -> unit
  member EndsWith : value:string -> bool + 2 overloads
  member Equals : obj:obj -> bool + 2 overloads
  member GetEnumerator : unit -> CharEnumerator
  member GetHashCode : unit -> int
  ...

Full name: System.String

--------------------
String(value: nativeptr<char>) : unit
String(value: nativeptr<sbyte>) : unit
String(value: char []) : unit
String(c: char, count: int) : unit
String(value: nativeptr<char>, startIndex: int, length: int) : unit
String(value: nativeptr<sbyte>, startIndex: int, length: int) : unit
String(value: char [], startIndex: int, length: int) : unit
String(value: nativeptr<sbyte>, startIndex: int, length: int, enc: Text.Encoding) : unit
Multiple items
val char : value:'T -> char (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.char

--------------------
type char = Char

Full name: Microsoft.FSharp.Core.char
type NumberStyles =
  | None = 0
  | AllowLeadingWhite = 1
  | AllowTrailingWhite = 2
  | AllowLeadingSign = 4
  | AllowTrailingSign = 8
  | AllowParentheses = 16
  | AllowDecimalPoint = 32
  | AllowThousands = 64
  | AllowExponent = 128
  | AllowCurrencySymbol = 256
  ...

Full name: System.Globalization.NumberStyles
field NumberStyles.HexNumber = 515
Multiple items
val byte : value:'T -> byte (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.byte

--------------------
type byte = Byte

Full name: Microsoft.FSharp.Core.byte
type UriParser =
  static member IsKnownScheme : schemeName:string -> bool
  static member Register : uriParser:UriParser * schemeName:string * defaultPort:int -> unit

Full name: System.UriParser
module Primitives

from Script
val scheme : 'a

Full name: Script.UriParser.scheme
val mark : 'a (requires member ( +++ ) and member ( +++ ) and member ( +++ ) and member ( +++ ) and member ( +++ ) and member ( +++ ) and member ( +++ ) and member ( +++ ) and member ( +++ ))

Full name: Script.UriParser.mark
val reserved : 'a (requires member ( +++ ) and member ( +++ ) and member ( +++ ) and member ( +++ ) and member ( +++ ) and member ( +++ ) and member ( +++ ) and member ( +++ ))

Full name: Script.UriParser.reserved
val unreserved : '_arg3 (requires member ( +++ ) and member ( +++ ) and member ( +++ ) and member ( +++ ) and member ( +++ ) and member ( +++ ) and member ( +++ ) and member ( +++ ))

Full name: Script.UriParser.unreserved
val pchar : '_arg3

Full name: Script.UriParser.pchar
val uric : '_arg3

Full name: Script.UriParser.uric
val uricNoSlash : '_arg3

Full name: Script.UriParser.uricNoSlash
val relSegment : 'a

Full name: Script.UriParser.relSegment
val regName : 'a (requires member ( +++ ))

Full name: Script.UriParser.regName
val userInfo : 'a

Full name: Script.UriParser.userInfo
val param : 'a

Full name: Script.UriParser.param


 param returns a series of pchar.
val segment : 'a

Full name: Script.UriParser.segment


 segment returns a list of ;-separated params.
val concat : lists:seq<'T list> -> 'T list

Full name: Microsoft.FSharp.Collections.List.concat
val pathSegments : 'a

Full name: Script.UriParser.pathSegments


 pathSegments returns a list of /-separated segments.
val uriAbsPath : 'a

Full name: Script.UriParser.uriAbsPath
val relPath : 'a

Full name: Script.UriParser.relPath
union case Option.Some: Value: 'T -> Option<'T>
val uriQuery : 'a

Full name: Script.UriParser.uriQuery
val uriFragment : 'a

Full name: Script.UriParser.uriFragment
val ipv4Address : 'a (requires member ( +++ ))

Full name: Script.UriParser.ipv4Address
val first : 'a
val topLabel : 'a

Full name: Script.UriParser.topLabel
val private _label : first:'a -> 'b

Full name: Script.UriParser._label
val domainLabel : 'a

Full name: Script.UriParser.domainLabel
val hostname : 'a (requires member ( +++ ))

Full name: Script.UriParser.hostname
val host : '_arg3

Full name: Script.UriParser.host
val port : 'a

Full name: Script.UriParser.port
val hostport : 'a

Full name: Script.UriParser.hostport
val server : 'a (requires member ( +++ ))

Full name: Script.UriParser.server
val uriAuthority : '_arg3

Full name: Script.UriParser.uriAuthority
val netPath : 'a

Full name: Script.UriParser.netPath
val opaquePart : 'a

Full name: Script.UriParser.opaquePart
val hierPart : 'a

Full name: Script.UriParser.hierPart
val query : Linq.QueryBuilder

Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.query
val absoluteUri : 'a

Full name: Script.UriParser.absoluteUri
val relativeUri : 'a

Full name: Script.UriParser.relativeUri
val uriReference : 'a

Full name: Script.UriParser.uriReference
union case Option.None: Option<'T>
val v : (int list * 'a) option

Full name: Script.v
Multiple items
type ArraySegment<'T> =
  struct
    new : array:'T[] -> ArraySegment<'T> + 1 overload
    member Array : 'T[]
    member Count : int
    member Equals : obj:obj -> bool + 1 overload
    member GetHashCode : unit -> int
    member Offset : int
  end

Full name: System.ArraySegment<_>

--------------------
ArraySegment()
ArraySegment(array: 'T []) : unit
ArraySegment(array: 'T [], offset: int, count: int) : unit
module Option

from Microsoft.FSharp.Core
val get : option:'T option -> 'T

Full name: Microsoft.FSharp.Core.Option.get
val fst : tuple:('T1 * 'T2) -> 'T1

Full name: Microsoft.FSharp.Core.Operators.fst
val map : mapping:('T -> 'U) -> list:'T list -> 'U list

Full name: Microsoft.FSharp.Collections.List.map
val toArray : list:'T list -> 'T []

Full name: Microsoft.FSharp.Collections.List.toArray

More information

Link:http://fssnip.net/2e
Posted:13 years ago
Author:Ryan Riley
Tags: parsing , uri , monad