Web Crawler

This snippet features an F# Web crawler that i'm already using in 2 applications (slightly modified). It's based on a scalable network of communicating agents that follow URLs extracted from HTML pages until reaching the specified limit.

Copy Source
Copy Link
Tools:
  1: open System
  2: open System.Collections.Concurrent
  3: open System.Collections.Generic
  4: open System.IO
  5: open System.Net
  6: open System.Text.RegularExpressions
  7: 
  8: module Helpers =
  9: 
 10:     type Message =
 11:         | Done
 12:         | Mailbox of MailboxProcessor<Message>
 13:         | Stop
 14:         | Url of string option
 15: 
 16:     // Gates the number of crawling agents.
 17:     [<Literal>]
 18:     let Gate = 5
 19: 
 20:     // Extracts links from HTML.
 21:     let extractLinks html =
 22:         let pattern1 = "(?i)href\\s*=\\s*(\"|\')/?((?!#.*|/\B|mailto:|location\.|javascript:)[^\"\']+)(\"|\')"
 23:         let pattern2 = "(?i)^https?"
 24:  
 25:         let links =
 26:             [
 27:                 for x in Regex(pattern1).Matches(html) do
 28:                     yield x.Groups.[2].Value
 29:             ]
 30:             |> List.filter (fun x -> Regex(pattern2).IsMatch(x))
 31:         links
 32:     
 33:     // Fetches a Web page.
 34:     let fetch (url : string) =
 35:         try
 36:             let req = WebRequest.Create(url) :?> HttpWebRequest
 37:             req.UserAgent <- "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)"
 38:             req.Timeout <- 5000
 39:             use resp = req.GetResponse()
 40:             let content = resp.ContentType
 41:             let isHtml = Regex("html").IsMatch(content)
 42:             match isHtml with
 43:             | true -> use stream = resp.GetResponseStream()
 44:                       use reader = new StreamReader(stream)
 45:                       let html = reader.ReadToEnd()
 46:                       Some html
 47:             | false -> None
 48:         with
 49:         | _ -> None
 50:     
 51:     let collectLinks url =
 52:         let html = fetch url
 53:         match html with
 54:         | Some x -> extractLinks x
 55:         | None -> []
 56: 
 57: open Helpers
 58: 
 59: let crawl url limit =
 60:     // Concurrent queue for saving collected urls.
 61:     let q = ConcurrentQueue<string>()
 62:     
 63:     // Holds crawled URLs.
 64:     let set = HashSet<string>()
 65: 
 66:     let supervisor =
 67:         MailboxProcessor.Start(fun x ->
 68:             let rec loop run =
 69:                 async {
 70:                     let! msg = x.Receive()
 71:                     match msg with
 72:                     | Mailbox(mailbox) -> 
 73:                         let count = set.Count
 74:                         if count < limit - 1 && run then 
 75:                             let url = q.TryDequeue()
 76:                             match url with
 77:                             | true, str -> if not (set.Contains str) then
 78:                                                 let set'= set.Add str
 79:                                                 mailbox.Post <| Url(Some str)
 80:                                                 return! loop run
 81:                                             else
 82:                                                 mailbox.Post <| Url None
 83:                                                 return! loop run
 84: 
 85:                             | _ -> mailbox.Post <| Url None
 86:                                    return! loop run
 87:                         else
 88:                             mailbox.Post Stop
 89:                             return! loop run
 90:                     | Stop -> return! loop false
 91:                     | _ -> printfn "Supervisor is done."
 92:                            (x :> IDisposable).Dispose()
 93:                 }
 94:             loop true)
 95: 
 96:     
 97:     let urlCollector =
 98:         MailboxProcessor.Start(fun y ->
 99:             let rec loop count =
100:                 async {
101:                     let! msg = y.TryReceive(6000)
102:                     match msg with
103:                     | Some message ->
104:                         match message with
105:                         | Url u ->
106:                             match u with
107:                             | Some url -> q.Enqueue url
108:                                           return! loop count
109:                             | None -> return! loop count
110:                         | _ ->
111:                             match count with
112:                             | Gate -> supervisor.Post Done
113:                                       (y :> IDisposable).Dispose()
114:                                       printfn "URL collector is done."
115:                             | _ -> return! loop (count + 1)
116:                     | None -> supervisor.Post Stop
117:                               return! loop count
118:                 }
119:             loop 1)
120:     
121:     /// Initializes a crawling agent.
122:     let crawler id =
123:         MailboxProcessor.Start(fun inbox ->
124:             let rec loop() =
125:                 async {
126:                     let! msg = inbox.Receive()
127:                     match msg with
128:                     | Url x ->
129:                         match x with
130:                         | Some url -> 
131:                                 let links = collectLinks url
132:                                 printfn "%s crawled by agent %d." url id
133:                                 for link in links do
134:                                     urlCollector.Post <| Url (Some link)
135:                                 supervisor.Post(Mailbox(inbox))
136:                                 return! loop()
137:                         | None -> supervisor.Post(Mailbox(inbox))
138:                                   return! loop()
139:                     | _ -> urlCollector.Post Done
140:                            printfn "Agent %d is done." id
141:                            (inbox :> IDisposable).Dispose()
142:                     }
143:             loop())
144: 
145:     // Spawn the crawlers.
146:     let crawlers = 
147:         [
148:             for i in 1 .. Gate do
149:                 yield crawler i
150:         ]
151:     
152:     // Post the first messages.
153:     crawlers.Head.Post <| Url (Some url)
154:     crawlers.Tail |> List.iter (fun ag -> ag.Post <| Url None)
155: 
156: // Example:
157: crawl "http://news.google.com" 25
158: 
159: // Output:
160: // http://news.google.com crawled by agent 1.
161: // http://www.gstatic.com/news/img/favicon.ico crawled by agent 2.
162: // http://www.google.com/webhp?hl=en&tab=nw crawled by agent 5.
163: // http://www.google.com/imghp?hl=en&tab=ni crawled by agent 3.
164: // http://video.google.com/?hl=en&tab=nv crawled by agent 4.
165: // http://www.google.com/prdhp?hl=en&tab=nf crawled by agent 5.
166: // http://news.google.com/news?pz=1&amp;cf=all&amp;ned=us&amp;hl=en
167: // &amp;topic=h&amp;num=3&amp;output=rss crawled by agent 1.
168: // http://www.google.com/intl/en/options/ crawled by agent 4.
169: // http://maps.google.com/maps?hl=en&tab=nl crawled by agent 2.
170: // http://www.google.com/finance?hl=en&tab=ne crawled by agent 1.
171: // http://scholar.google.com/schhp?hl=en&tab=ns crawled by agent 2.
172: // http://www.google.com/realtime?hl=en&tab=nY crawled by agent 2.
173: // http://mail.google.com/mail/?tab=nm crawled by agent 3.
174: // http://books.google.com/bkshp?hl=en&tab=np crawled by agent 5.
175: // http://translate.google.com/?hl=en&tab=nT crawled by agent 4.
176: // http://blogsearch.google.com/?hl=en&tab=nb crawled by agent 1.
177: // http://www.google.com/calendar?hl=en&tab=nc crawled by agent 3.
178: // http://picasaweb.google.com/home?hl=en&tab=nq crawled by agent 5.
179: // http://www.google.com/reader/?tab=ny crawled by agent 1.
180: // http://docs.google.com/?tab=no crawled by agent 4.
181: // https://www.google.com/accounts/ServiceLogin?service=news&amp;pas
182: // sive=1209600&amp;continue=http://news.google.com/&amp;followup=htt
183: // p://news.google.com/ crawled by agent 1.
184: // Agent 1 is done.
185: // http://www.google.com/preferences?hl=en&prev=http://news.google.com/ crawled by agent 4.
186: // Agent 4 is done.
187: // http://sites.google.com/?tab=n3 crawled by agent 3.
188: // Agent 3 is done.
189: // http://www.youtube.com/?hl=en&tab=n1 crawled by agent 2.
190: // Agent 2 is done.
191: // http://groups.google.com/grphp?hl=en&tab=ng crawled by agent 5.
192: // Agent 5 is done.
193: // URL collector is done.
194: // Supervisor is done.
namespace System
namespace System.Collections
namespace System.Collections.Concurrent
namespace System.Collections.Generic
namespace System.IO
namespace System.Net
namespace System.Text
namespace System.Text.RegularExpressions
type Message =
  | Done
  | Mailbox of MailboxProcessor<Message>
  | Stop
  | Url of string option

Full name: Snippet.Helpers.Message

  type: Message
  implements: IEquatable<Message>
  implements: Collections.IStructuralEquatable
union case Message.Done: Message
union case Message.Mailbox: MailboxProcessor<Message> -> Message
type MailboxProcessor<'Msg> =
  class
    interface IDisposable
    new : body:(MailboxProcessor<'Msg> -> Async<unit>) * ?cancellationToken:Threading.CancellationToken -> MailboxProcessor<'Msg>
    member Post : message:'Msg -> unit
    member PostAndAsyncReply : buildMessage:(AsyncReplyChannel<'Reply> -> 'Msg) * ?timeout:int -> Async<'Reply>
    member PostAndReply : buildMessage:(AsyncReplyChannel<'Reply> -> 'Msg) * ?timeout:int -> 'Reply
    member PostAndTryAsyncReply : buildMessage:(AsyncReplyChannel<'Reply> -> 'Msg) * ?timeout:int -> Async<'Reply option>
    member Receive : ?timeout:int -> Async<'Msg>
    member Scan : scanner:('Msg -> Async<'T> option) * ?timeout:int -> Async<'T>
    member Start : unit -> unit
    member TryPostAndReply : buildMessage:(AsyncReplyChannel<'Reply> -> 'Msg) * ?timeout:int -> 'Reply option
    member TryReceive : ?timeout:int -> Async<'Msg option>
    member TryScan : scanner:('Msg -> Async<'T> option) * ?timeout:int -> Async<'T option>
    member add_Error : Handler<Exception> -> unit
    member CurrentQueueLength : int
    member DefaultTimeout : int
    member Error : IEvent<Exception>
    member remove_Error : Handler<Exception> -> unit
    member DefaultTimeout : int with set
    static member Start : body:(MailboxProcessor<'Msg> -> Async<unit>) * ?cancellationToken:Threading.CancellationToken -> MailboxProcessor<'Msg>
  end

Full name: Microsoft.FSharp.Control.MailboxProcessor<_>

  type: MailboxProcessor<'Msg>
  implements: IDisposable
union case Message.Stop: Message
union case Message.Url: string option -> Message
Multiple items
val string : 'T -> string

Full name: Microsoft.FSharp.Core.Operators.string

--------------------

type string = String

Full name: Microsoft.FSharp.Core.string

  type: string
  implements: IComparable
  implements: ICloneable
  implements: IConvertible
  implements: IComparable<string>
  implements: seq<char>
  implements: Collections.IEnumerable
  implements: IEquatable<string>
type 'T option = Option<'T>

Full name: Microsoft.FSharp.Core.option<_>

  type: 'T option
  implements: Collections.IStructuralEquatable
  implements: IComparable<Option<'T>>
  implements: IComparable
  implements: Collections.IStructuralComparable
type LiteralAttribute =
  class
    inherit Attribute
    new : unit -> LiteralAttribute
  end

Full name: Microsoft.FSharp.Core.LiteralAttribute

  type: LiteralAttribute
  implements: Runtime.InteropServices._Attribute
  inherits: Attribute
val Gate : int

Full name: Snippet.Helpers.Gate

  type: int
  implements: IComparable
  implements: IFormattable
  implements: IConvertible
  implements: IComparable<int>
  implements: IEquatable<int>
  inherits: ValueType
val extractLinks : string -> string list

Full name: Snippet.Helpers.extractLinks
val html : string

  type: string
  implements: IComparable
  implements: ICloneable
  implements: IConvertible
  implements: IComparable<string>
  implements: seq<char>
  implements: Collections.IEnumerable
  implements: IEquatable<string>
val pattern1 : string

  type: string
  implements: IComparable
  implements: ICloneable
  implements: IConvertible
  implements: IComparable<string>
  implements: seq<char>
  implements: Collections.IEnumerable
  implements: IEquatable<string>
val pattern2 : string

  type: string
  implements: IComparable
  implements: ICloneable
  implements: IConvertible
  implements: IComparable<string>
  implements: seq<char>
  implements: Collections.IEnumerable
  implements: IEquatable<string>
val links : string list

  type: string list
  implements: Collections.IStructuralEquatable
  implements: IComparable<List<string>>
  implements: IComparable
  implements: Collections.IStructuralComparable
  implements: IEnumerable<string>
  implements: Collections.IEnumerable
val x : Match

  type: Match
  inherits: Group
  inherits: Capture
type Regex =
  class
    new : string -> System.Text.RegularExpressions.Regex
    new : string * System.Text.RegularExpressions.RegexOptions -> System.Text.RegularExpressions.Regex
    member GetGroupNames : unit -> string []
    member GetGroupNumbers : unit -> int []
    member GroupNameFromNumber : int -> string
    member GroupNumberFromName : string -> int
    member IsMatch : string -> bool
    member IsMatch : string * int -> bool
    member Match : string -> System.Text.RegularExpressions.Match
    member Match : string * int -> System.Text.RegularExpressions.Match
    member Match : string * int * int -> System.Text.RegularExpressions.Match
    member Matches : string -> System.Text.RegularExpressions.MatchCollection
    member Matches : string * int -> System.Text.RegularExpressions.MatchCollection
    member Options : System.Text.RegularExpressions.RegexOptions
    member Replace : string * string -> string
    member Replace : string * System.Text.RegularExpressions.MatchEvaluator -> string
    member Replace : string * string * int -> string
    member Replace : string * System.Text.RegularExpressions.MatchEvaluator * int -> string
    member Replace : string * string * int * int -> string
    member Replace : string * System.Text.RegularExpressions.MatchEvaluator * int * int -> string
    member RightToLeft : bool
    member Split : string -> string []
    member Split : string * int -> string []
    member Split : string * int * int -> string []
    member ToString : unit -> string
    static member CacheSize : int with get, set
    static member CompileToAssembly : System.Text.RegularExpressions.RegexCompilationInfo [] * System.Reflection.AssemblyName -> unit
    static member CompileToAssembly : System.Text.RegularExpressions.RegexCompilationInfo [] * System.Reflection.AssemblyName * System.Reflection.Emit.CustomAttributeBuilder [] -> unit
    static member CompileToAssembly : System.Text.RegularExpressions.RegexCompilationInfo [] * System.Reflection.AssemblyName * System.Reflection.Emit.CustomAttributeBuilder [] * string -> unit
    static member Escape : string -> string
    static member IsMatch : string * string -> bool
    static member IsMatch : string * string * System.Text.RegularExpressions.RegexOptions -> bool
    static member Match : string * string -> System.Text.RegularExpressions.Match
    static member Match : string * string * System.Text.RegularExpressions.RegexOptions -> System.Text.RegularExpressions.Match
    static member Matches : string * string -> System.Text.RegularExpressions.MatchCollection
    static member Matches : string * string * System.Text.RegularExpressions.RegexOptions -> System.Text.RegularExpressions.MatchCollection
    static member Replace : string * string * string -> string
    static member Replace : string * string * System.Text.RegularExpressions.MatchEvaluator -> string
    static member Replace : string * string * string * System.Text.RegularExpressions.RegexOptions -> string
    static member Replace : string * string * System.Text.RegularExpressions.MatchEvaluator * System.Text.RegularExpressions.RegexOptions -> string
    static member Split : string * string -> string []
    static member Split : string * string * System.Text.RegularExpressions.RegexOptions -> string []
    static member Unescape : string -> string
  end

Full name: System.Text.RegularExpressions.Regex

  type: Regex
  implements: Runtime.Serialization.ISerializable
property Match.Groups: GroupCollection
type List<'T> =
  class
    new : unit -> System.Collections.Generic.List<'T>
    new : int -> System.Collections.Generic.List<'T>
    new : System.Collections.Generic.IEnumerable<'T> -> System.Collections.Generic.List<'T>
    member Add : 'T -> unit
    member AddRange : System.Collections.Generic.IEnumerable<'T> -> unit
    member AsReadOnly : unit -> System.Collections.ObjectModel.ReadOnlyCollection<'T>
    member BinarySearch : 'T -> int
    member BinarySearch : 'T * System.Collections.Generic.IComparer<'T> -> int
    member BinarySearch : int * int * 'T * System.Collections.Generic.IComparer<'T> -> int
    member Capacity : int with get, set
    member Clear : unit -> unit
    member Contains : 'T -> bool
    member ConvertAll<'TOutput> : System.Converter<'T,'TOutput> -> System.Collections.Generic.List<'TOutput>
    member CopyTo : 'T [] -> unit
    member CopyTo : 'T [] * int -> unit
    member CopyTo : int * 'T [] * int * int -> unit
    member Count : int
    member Exists : System.Predicate<'T> -> bool
    member Find : System.Predicate<'T> -> 'T
    member FindAll : System.Predicate<'T> -> System.Collections.Generic.List<'T>
    member FindIndex : System.Predicate<'T> -> int
    member FindIndex : int * System.Predicate<'T> -> int
    member FindIndex : int * int * System.Predicate<'T> -> int
    member FindLast : System.Predicate<'T> -> 'T
    member FindLastIndex : System.Predicate<'T> -> int
    member FindLastIndex : int * System.Predicate<'T> -> int
    member FindLastIndex : int * int * System.Predicate<'T> -> int
    member ForEach : System.Action<'T> -> unit
    member GetEnumerator : unit -> Enumerator<'T>
    member GetRange : int * int -> System.Collections.Generic.List<'T>
    member IndexOf : 'T -> int
    member IndexOf : 'T * int -> int
    member IndexOf : 'T * int * int -> int
    member Insert : int * 'T -> unit
    member InsertRange : int * System.Collections.Generic.IEnumerable<'T> -> unit
    member Item : int -> 'T with get, set
    member LastIndexOf : 'T -> int
    member LastIndexOf : 'T * int -> int
    member LastIndexOf : 'T * int * int -> int
    member Remove : 'T -> bool
    member RemoveAll : System.Predicate<'T> -> int
    member RemoveAt : int -> unit
    member RemoveRange : int * int -> unit
    member Reverse : unit -> unit
    member Reverse : int * int -> unit
    member Sort : unit -> unit
    member Sort : System.Collections.Generic.IComparer<'T> -> unit
    member Sort : System.Comparison<'T> -> unit
    member Sort : int * int * System.Collections.Generic.IComparer<'T> -> unit
    member ToArray : unit -> 'T []
    member TrimExcess : unit -> unit
    member TrueForAll : System.Predicate<'T> -> bool
    type Enumerator =
      struct
        member Current : 'T
        member Dispose : unit -> unit
        member MoveNext : unit -> bool
      end
  end

Full name: System.Collections.Generic.List<_>

  type: List<'T>
  implements: IList<'T>
  implements: ICollection<'T>
  implements: seq<'T>
  implements: Collections.IList
  implements: Collections.ICollection
  implements: Collections.IEnumerable
val filter : ('T -> bool) -> 'T list -> 'T list

Full name: Microsoft.FSharp.Collections.List.filter
val x : string

  type: string
  implements: IComparable
  implements: ICloneable
  implements: IConvertible
  implements: IComparable<string>
  implements: seq<char>
  implements: Collections.IEnumerable
  implements: IEquatable<string>
val fetch : string -> string option

Full name: Snippet.Helpers.fetch
val url : string

  type: string
  implements: IComparable
  implements: ICloneable
  implements: IConvertible
  implements: IComparable<string>
  implements: seq<char>
  implements: Collections.IEnumerable
  implements: IEquatable<string>
val req : HttpWebRequest

  type: HttpWebRequest
  implements: Runtime.Serialization.ISerializable
  inherits: WebRequest
  inherits: MarshalByRefObject
type WebRequest =
  class
    inherit System.MarshalByRefObject
    member Abort : unit -> unit
    member AuthenticationLevel : System.Net.Security.AuthenticationLevel with get, set
    member BeginGetRequestStream : System.AsyncCallback * obj -> System.IAsyncResult
    member BeginGetResponse : System.AsyncCallback * obj -> System.IAsyncResult
    member CachePolicy : System.Net.Cache.RequestCachePolicy with get, set
    member ConnectionGroupName : string with get, set
    member ContentLength : int64 with get, set
    member ContentType : string with get, set
    member Credentials : System.Net.ICredentials with get, set
    member EndGetRequestStream : System.IAsyncResult -> System.IO.Stream
    member EndGetResponse : System.IAsyncResult -> System.Net.WebResponse
    member GetRequestStream : unit -> System.IO.Stream
    member GetResponse : unit -> System.Net.WebResponse
    member Headers : System.Net.WebHeaderCollection with get, set
    member ImpersonationLevel : System.Security.Principal.TokenImpersonationLevel with get, set
    member Method : string with get, set
    member PreAuthenticate : bool with get, set
    member Proxy : System.Net.IWebProxy with get, set
    member RequestUri : System.Uri
    member Timeout : int with get, set
    member UseDefaultCredentials : bool with get, set
    static member Create : string -> System.Net.WebRequest
    static member Create : System.Uri -> System.Net.WebRequest
    static member CreateDefault : System.Uri -> System.Net.WebRequest
    static member DefaultCachePolicy : System.Net.Cache.RequestCachePolicy with get, set
    static member DefaultWebProxy : System.Net.IWebProxy with get, set
    static member GetSystemWebProxy : unit -> System.Net.IWebProxy
    static member RegisterPrefix : string * System.Net.IWebRequestCreate -> bool
  end

Full name: System.Net.WebRequest

  type: WebRequest
  implements: Runtime.Serialization.ISerializable
  inherits: MarshalByRefObject
Multiple overloads
WebRequest.Create(requestUri: Uri) : WebRequest
WebRequest.Create(requestUriString: string) : WebRequest
type HttpWebRequest =
  class
    inherit System.Net.WebRequest
    member Abort : unit -> unit
    member Accept : string with get, set
    member AddRange : int -> unit
    member AddRange : int64 -> unit
    member AddRange : int * int -> unit
    member AddRange : int64 * int64 -> unit
    member AddRange : string * int -> unit
    member AddRange : string * int64 -> unit
    member AddRange : string * int * int -> unit
    member AddRange : string * int64 * int64 -> unit
    member Address : System.Uri
    member AllowAutoRedirect : bool with get, set
    member AllowWriteStreamBuffering : bool with get, set
    member AutomaticDecompression : System.Net.DecompressionMethods with get, set
    member BeginGetRequestStream : System.AsyncCallback * obj -> System.IAsyncResult
    member BeginGetResponse : System.AsyncCallback * obj -> System.IAsyncResult
    member ClientCertificates : System.Security.Cryptography.X509Certificates.X509CertificateCollection with get, set
    member Connection : string with get, set
    member ConnectionGroupName : string with get, set
    member ContentLength : int64 with get, set
    member ContentType : string with get, set
    member ContinueDelegate : System.Net.HttpContinueDelegate with get, set
    member CookieContainer : System.Net.CookieContainer with get, set
    member Credentials : System.Net.ICredentials with get, set
    member Date : System.DateTime with get, set
    member EndGetRequestStream : System.IAsyncResult -> System.IO.Stream
    member EndGetRequestStream : System.IAsyncResult * System.Net.TransportContext -> System.IO.Stream
    member EndGetResponse : System.IAsyncResult -> System.Net.WebResponse
    member Expect : string with get, set
    member GetRequestStream : unit -> System.IO.Stream
    member GetRequestStream : System.Net.TransportContext -> System.IO.Stream
    member GetResponse : unit -> System.Net.WebResponse
    member HaveResponse : bool
    member Headers : System.Net.WebHeaderCollection with get, set
    member Host : string with get, set
    member IfModifiedSince : System.DateTime with get, set
    member KeepAlive : bool with get, set
    member MaximumAutomaticRedirections : int with get, set
    member MaximumResponseHeadersLength : int with get, set
    member MediaType : string with get, set
    member Method : string with get, set
    member Pipelined : bool with get, set
    member PreAuthenticate : bool with get, set
    member ProtocolVersion : System.Version with get, set
    member Proxy : System.Net.IWebProxy with get, set
    member ReadWriteTimeout : int with get, set
    member Referer : string with get, set
    member RequestUri : System.Uri
    member SendChunked : bool with get, set
    member ServicePoint : System.Net.ServicePoint
    member Timeout : int with get, set
    member TransferEncoding : string with get, set
    member UnsafeAuthenticatedConnectionSharing : bool with get, set
    member UseDefaultCredentials : bool with get, set
    member UserAgent : string with get, set
    static member DefaultCachePolicy : System.Net.Cache.RequestCachePolicy with get, set
    static member DefaultMaximumErrorResponseLength : int with get, set
    static member DefaultMaximumResponseHeadersLength : int with get, set
  end

Full name: System.Net.HttpWebRequest

  type: HttpWebRequest
  implements: Runtime.Serialization.ISerializable
  inherits: WebRequest
  inherits: MarshalByRefObject
property HttpWebRequest.UserAgent: string
property WebRequest.Timeout: int
val resp : WebResponse

  type: WebResponse
  implements: Runtime.Serialization.ISerializable
  implements: IDisposable
  inherits: MarshalByRefObject
WebRequest.GetResponse() : WebResponse
val content : string

  type: string
  implements: IComparable
  implements: ICloneable
  implements: IConvertible
  implements: IComparable<string>
  implements: seq<char>
  implements: Collections.IEnumerable
  implements: IEquatable<string>
property WebResponse.ContentType: string
val isHtml : bool

  type: bool
  implements: IComparable
  implements: IConvertible
  implements: IComparable<bool>
  implements: IEquatable<bool>
  inherits: ValueType
val stream : Stream

  type: Stream
  implements: IDisposable
  inherits: MarshalByRefObject
WebResponse.GetResponseStream() : Stream
val reader : StreamReader

  type: StreamReader
  implements: IDisposable
  inherits: TextReader
  inherits: MarshalByRefObject
type StreamReader =
  class
    inherit System.IO.TextReader
    new : System.IO.Stream -> System.IO.StreamReader
    new : System.IO.Stream * bool -> System.IO.StreamReader
    new : System.IO.Stream * System.Text.Encoding -> System.IO.StreamReader
    new : System.IO.Stream * System.Text.Encoding * bool -> System.IO.StreamReader
    new : System.IO.Stream * System.Text.Encoding * bool * int -> System.IO.StreamReader
    new : string -> System.IO.StreamReader
    new : string * bool -> System.IO.StreamReader
    new : string * System.Text.Encoding -> System.IO.StreamReader
    new : string * System.Text.Encoding * bool -> System.IO.StreamReader
    new : string * System.Text.Encoding * bool * int -> System.IO.StreamReader
    member BaseStream : System.IO.Stream
    member Close : unit -> unit
    member CurrentEncoding : System.Text.Encoding
    member DiscardBufferedData : unit -> unit
    member EndOfStream : bool
    member Peek : unit -> int
    member Read : unit -> int
    member Read : char [] * int * int -> int
    member ReadLine : unit -> string
    member ReadToEnd : unit -> string
    static val Null : System.IO.StreamReader
  end

Full name: System.IO.StreamReader

  type: StreamReader
  implements: IDisposable
  inherits: TextReader
  inherits: MarshalByRefObject
TextReader.ReadToEnd() : string
union case Option.Some: 'T -> Option<'T>
union case Option.None: Option<'T>
val collectLinks : string -> string list

Full name: Snippet.Helpers.collectLinks
val html : string option

  type: string option
  implements: Collections.IStructuralEquatable
  implements: IComparable<Option<string>>
  implements: IComparable
  implements: Collections.IStructuralComparable
module Helpers

from Snippet
val crawl : string -> int -> unit

Full name: Snippet.crawl
val limit : int

  type: int
  implements: IComparable
  implements: IFormattable
  implements: IConvertible
  implements: IComparable<int>
  implements: IEquatable<int>
  inherits: ValueType
val q : ConcurrentQueue<string>

  type: ConcurrentQueue<string>
  implements: IProducerConsumerCollection<string>
  implements: seq<string>
  implements: Collections.ICollection
  implements: Collections.IEnumerable
type ConcurrentQueue<'T> =
  class
    new : unit -> System.Collections.Concurrent.ConcurrentQueue<'T>
    new : System.Collections.Generic.IEnumerable<'T> -> System.Collections.Concurrent.ConcurrentQueue<'T>
    member CopyTo : 'T [] * int -> unit
    member Count : int
    member Enqueue : 'T -> unit
    member GetEnumerator : unit -> System.Collections.Generic.IEnumerator<'T>
    member IsEmpty : bool
    member ToArray : unit -> 'T []
    member TryDequeue : 'T -> bool
    member TryPeek : 'T -> bool
  end

Full name: System.Collections.Concurrent.ConcurrentQueue<_>

  type: ConcurrentQueue<'T>
  implements: IProducerConsumerCollection<'T>
  implements: seq<'T>
  implements: Collections.ICollection
  implements: Collections.IEnumerable
val set : HashSet<string>

  type: HashSet<string>
  implements: Runtime.Serialization.ISerializable
  implements: Runtime.Serialization.IDeserializationCallback
  implements: ISet<string>
  implements: ICollection<string>
  implements: seq<string>
  implements: Collections.IEnumerable
type HashSet<'T> =
  class
    new : unit -> System.Collections.Generic.HashSet<'T>
    new : System.Collections.Generic.IEqualityComparer<'T> -> System.Collections.Generic.HashSet<'T>
    new : System.Collections.Generic.IEnumerable<'T> -> System.Collections.Generic.HashSet<'T>
    new : System.Collections.Generic.IEnumerable<'T> * System.Collections.Generic.IEqualityComparer<'T> -> System.Collections.Generic.HashSet<'T>
    member Add : 'T -> bool
    member Clear : unit -> unit
    member Comparer : System.Collections.Generic.IEqualityComparer<'T>
    member Contains : 'T -> bool
    member CopyTo : 'T [] -> unit
    member CopyTo : 'T [] * int -> unit
    member CopyTo : 'T [] * int * int -> unit
    member Count : int
    member ExceptWith : System.Collections.Generic.IEnumerable<'T> -> unit
    member GetEnumerator : unit -> Enumerator<'T>
    member GetObjectData : System.Runtime.Serialization.SerializationInfo * System.Runtime.Serialization.StreamingContext -> unit
    member IntersectWith : System.Collections.Generic.IEnumerable<'T> -> unit
    member IsProperSubsetOf : System.Collections.Generic.IEnumerable<'T> -> bool
    member IsProperSupersetOf : System.Collections.Generic.IEnumerable<'T> -> bool
    member IsSubsetOf : System.Collections.Generic.IEnumerable<'T> -> bool
    member IsSupersetOf : System.Collections.Generic.IEnumerable<'T> -> bool
    member OnDeserialization : obj -> unit
    member Overlaps : System.Collections.Generic.IEnumerable<'T> -> bool
    member Remove : 'T -> bool
    member RemoveWhere : System.Predicate<'T> -> int
    member SetEquals : System.Collections.Generic.IEnumerable<'T> -> bool
    member SymmetricExceptWith : System.Collections.Generic.IEnumerable<'T> -> unit
    member TrimExcess : unit -> unit
    member UnionWith : System.Collections.Generic.IEnumerable<'T> -> unit
    static member CreateSetComparer : unit -> System.Collections.Generic.IEqualityComparer<System.Collections.Generic.HashSet<'T>>
    type Enumerator =
      struct
        member Current : 'T
        member Dispose : unit -> unit
        member MoveNext : unit -> bool
      end
  end

Full name: System.Collections.Generic.HashSet<_>

  type: HashSet<'T>
  implements: Runtime.Serialization.ISerializable
  implements: Runtime.Serialization.IDeserializationCallback
  implements: ISet<'T>
  implements: ICollection<'T>
  implements: seq<'T>
  implements: Collections.IEnumerable
val supervisor : MailboxProcessor<Message>

  type: MailboxProcessor<Message>
  implements: IDisposable
static member MailboxProcessor.Start : body:(MailboxProcessor<'Msg> -> Async<unit>) * ?cancellationToken:Threading.CancellationToken -> MailboxProcessor<'Msg>
val x : MailboxProcessor<Message>

  type: MailboxProcessor<Message>
  implements: IDisposable
val loop : (bool -> Async<unit>)
val run : bool

  type: bool
  implements: IComparable
  implements: IConvertible
  implements: IComparable<bool>
  implements: IEquatable<bool>
  inherits: ValueType
val async : AsyncBuilder

Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.async
val msg : Message

  type: Message
  implements: IEquatable<Message>
  implements: Collections.IStructuralEquatable
member MailboxProcessor.Receive : ?timeout:int -> Async<'Msg>
val mailbox : MailboxProcessor<Message>

  type: MailboxProcessor<Message>
  implements: IDisposable
val count : int

  type: int
  implements: IComparable
  implements: IFormattable
  implements: IConvertible
  implements: IComparable<int>
  implements: IEquatable<int>
  inherits: ValueType
property HashSet.Count: int
val url : bool * string
ConcurrentQueue.TryDequeue(result: byref<string>) : bool
val str : string

  type: string
  implements: IComparable
  implements: ICloneable
  implements: IConvertible
  implements: IComparable<string>
  implements: seq<char>
  implements: Collections.IEnumerable
  implements: IEquatable<string>
val not : bool -> bool

Full name: Microsoft.FSharp.Core.Operators.not
HashSet.Contains(item: string) : bool
val set' : bool

  type: bool
  implements: IComparable
  implements: IConvertible
  implements: IComparable<bool>
  implements: IEquatable<bool>
  inherits: ValueType
HashSet.Add(item: string) : bool
member MailboxProcessor.Post : message:'Msg -> unit
val printfn : Printf.TextWriterFormat<'T> -> 'T

Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.printfn
Multiple items
type IDisposable =
  interface
    member Dispose : unit -> unit
  end

Full name: System.IDisposable

--------------------

IDisposable
val urlCollector : MailboxProcessor<Message>

  type: MailboxProcessor<Message>
  implements: IDisposable
val y : MailboxProcessor<Message>

  type: MailboxProcessor<Message>
  implements: IDisposable
val loop : (int -> Async<unit>)
val msg : Message option

  type: Message option
  implements: Collections.IStructuralEquatable
  implements: IComparable<Option<Message>>
  implements: IComparable
  implements: Collections.IStructuralComparable
member MailboxProcessor.TryReceive : ?timeout:int -> Async<'Msg option>
val message : Message

  type: Message
  implements: IEquatable<Message>
  implements: Collections.IStructuralEquatable
val u : string option

  type: string option
  implements: Collections.IStructuralEquatable
  implements: IComparable<Option<string>>
  implements: IComparable
  implements: Collections.IStructuralComparable
ConcurrentQueue.Enqueue(item: string) : unit
val crawler : (int -> MailboxProcessor<Message>)

Initializes a crawling agent.
val id : int

  type: int
  implements: IComparable
  implements: IFormattable
  implements: IConvertible
  implements: IComparable<int>
  implements: IEquatable<int>
  inherits: ValueType
val inbox : MailboxProcessor<Message>

  type: MailboxProcessor<Message>
  implements: IDisposable
val loop : (unit -> Async<unit>)
val x : string option

  type: string option
  implements: Collections.IStructuralEquatable
  implements: IComparable<Option<string>>
  implements: IComparable
  implements: Collections.IStructuralComparable
val link : string

  type: string
  implements: IComparable
  implements: ICloneable
  implements: IConvertible
  implements: IComparable<string>
  implements: seq<char>
  implements: Collections.IEnumerable
  implements: IEquatable<string>
val crawlers : MailboxProcessor<Message> list

  type: MailboxProcessor<Message> list
  implements: Collections.IStructuralEquatable
  implements: IComparable<List<MailboxProcessor<Message>>>
  implements: IComparable
  implements: Collections.IStructuralComparable
  implements: IEnumerable<MailboxProcessor<Message>>
  implements: Collections.IEnumerable
val i : int

  type: int
  implements: IComparable
  implements: IFormattable
  implements: IConvertible
  implements: IComparable<int>
  implements: IEquatable<int>
  inherits: ValueType
property List.Head: MailboxProcessor<Message>
property List.Tail: MailboxProcessor<Message> list
val iter : ('T -> unit) -> 'T list -> unit

Full name: Microsoft.FSharp.Collections.List.iter
val ag : MailboxProcessor<Message>

  type: MailboxProcessor<Message>
  implements: IDisposable

More information

Link: http://fssnip.net/3K
Posted: 3 years ago
Author: Taha Hachana (website)
Tags: Web, Crawler, Agent, MailboxProcessor, Regex, HTML