1 people like it.

Kernel Density estimation and visualization with MathNet.Numerics and FsPlotly

Sample to estimate and visualize kernel densities of multiple distributions for visual comparison. Here the distributions of NY taxi fares are being compared by payment type (e.g. cash, credit card, etc.)

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
22: 
23: 
24: 
25: 
26: 
27: 
28: 
29: 
30: 
31: 
32: 
33: 
34: 
35: 
36: 
37: 
38: 
39: 
40: 
41: 
42: 
43: 
44: 
45: 
46: 
47: 
48: 
49: 
50: 
51: 
52: 
53: 
open FSharp.Data
open MathNet.Numerics
open FSharp.Plotly

(*
Sample based on NY Taxi data 

https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page
*)


type Trip = CsvProvider< @"C:\s\AutoMLDemo\taxi-fare-test.csv" >

let trips = Trip.GetSample()

let trows = trips.Rows |> Seq.toArray

let fareByPaymentType =
    trows
    |> Array.groupBy(fun x->x.Payment_type)
    |> Array.map (fun (p,xs)->p, xs |> Array.map (fun x->float x.Fare_amount))

let histograms() =
    fareByPaymentType
    |> Array.map (fun (v,fs)->
        Chart.Histogram fs
        |> Chart.withTitle v
        |> Chart.Show
    )

open MathNet.Numerics.Statistics

let densityByPaymentType() =
    let dsByV = 
        fareByPaymentType 
        |> Array.map(fun (v,fares) ->
            let frs = fares //|> Seq.sample (0.5) |> Seq.toArray
            let sfrs = Array.sort frs
            let xs = [|for i in 0.0 .. 0.1 .. 100.0 -> i|]
            let ds = xs |> Array.map (fun x -> KernelDensity.EstimateGaussian(x,1.0,sfrs))
            v,xs,ds)

    let area  xs = Chart.Area(xs, Opacity=0.1)
    let colors = [|"blue"; "red"|]
                                                        
    dsByV
    |> Array.mapi (fun i (v,xs,ds) -> 
        Array.zip xs ds
        |> area 
        |> Chart.withTraceName v)
    |> Chart.Combine
    |> Chart.withTitle "Fare Density by Payment Type"
    |> Chart.Show
namespace Microsoft.FSharp
namespace Microsoft.FSharp.Data
type Trip = obj
val trips : obj
val trows : obj []
module Seq

from Microsoft.FSharp.Collections
val toArray : source:seq<'T> -> 'T []
val fareByPaymentType : (obj * float []) []
module Array

from Microsoft.FSharp.Collections
val groupBy : projection:('T -> 'Key) -> array:'T [] -> ('Key * 'T []) [] (requires equality)
val x : obj
val map : mapping:('T -> 'U) -> array:'T [] -> 'U []
val p : obj
val xs : obj []
Multiple items
val float : value:'T -> float (requires member op_Explicit)

--------------------
type float = System.Double

--------------------
type float<'Measure> = float
val histograms : unit -> 'a []
val v : obj
val fs : float []
val densityByPaymentType : unit -> 'a
val dsByV : (obj * float [] * obj []) []
val fares : float []
val frs : float []
val sfrs : float []
val sort : array:'T [] -> 'T [] (requires comparison)
val xs : float []
val i : float
val ds : obj []
val x : float
val area : ('b -> 'c)
val xs : 'b
val colors : string []
val mapi : mapping:(int -> 'T -> 'U) -> array:'T [] -> 'U []
val i : int
val zip : array1:'T1 [] -> array2:'T2 [] -> ('T1 * 'T2) []

More information

Link:http://fssnip.net/7X9
Posted:11 months ago
Author:Faisal Waris
Tags: data science , visualization , kernel density