8 people like it.

Using machine learning tool Accord.Net from F#

This example uses the same data and methods as http://accord-framework.net/docs/html/T_Accord_Statistics_Models_Regression_LogisticRegression.htm

  1: 
  2: 
  3: 
  4: 
  5: 
  6: 
  7: 
  8: 
  9: 
 10: 
 11: 
 12: 
 13: 
 14: 
 15: 
 16: 
 17: 
 18: 
 19: 
 20: 
 21: 
 22: 
 23: 
 24: 
 25: 
 26: 
 27: 
 28: 
 29: 
 30: 
 31: 
 32: 
 33: 
 34: 
 35: 
 36: 
 37: 
 38: 
 39: 
 40: 
 41: 
 42: 
 43: 
 44: 
 45: 
 46: 
 47: 
 48: 
 49: 
 50: 
 51: 
 52: 
 53: 
 54: 
 55: 
 56: 
 57: 
 58: 
 59: 
 60: 
 61: 
 62: 
 63: 
 64: 
 65: 
 66: 
 67: 
 68: 
 69: 
 70: 
 71: 
 72: 
 73: 
 74: 
 75: 
 76: 
 77: 
 78: 
 79: 
 80: 
 81: 
 82: 
 83: 
 84: 
 85: 
 86: 
 87: 
 88: 
 89: 
 90: 
 91: 
 92: 
 93: 
 94: 
 95: 
 96: 
 97: 
 98: 
 99: 
100: 
101: 
102: 
#I @"./packages"
#r @"FSharp.Data.2.3.2/lib/net40/FSharp.Data.dll"
#r @"Accord.3.4.0/lib/net45/Accord.dll"
#r @"Accord.MachineLearning.3.4.0/lib/net45/Accord.MachineLearning.dll"
#r @"Accord.Math.3.4.0/lib/net45/Accord.Math.Core.dll"
#r @"Accord.Math.3.4.0/lib/net45/Accord.Math.dll"
#r @"Accord.Statistics.3.4.0/lib/net45/Accord.Statistics.dll"

open System
open FSharp.Data

// We have some sample data that we already know the results
// and use that to teach the machine:

(* sample.csv data content:

Age,Smokes,Had cancer
55,0,false
28,0,false
65,1,false
46,0,true
86,1,true
56,1,true
85,0,false
33,0,false
21,1,false
42,1,true

*)
#time
open Accord.Statistics.Models.Regression
open Accord.Statistics.Models.Regression.Fitting 

type People = CsvProvider<"sample.csv",",",InferRows=2000>
let inputs, output = 
    People.Load(@"sample.csv").Rows
    |> Seq.map (fun row -> [|float row.Age; row.Smokes |> Convert.ToDouble|], row.``Had cancer``)
    |> Seq.toArray
    |> Array.unzip

type Observation = People.Row
[<StructuredFormatDisplay("{AsString}")>]
type Feature = string * (Observation -> int)

let features:Feature[] = [|
    "Age",(fun obs -> obs.Age)
    "Smokes",(fun obs -> obs.Smokes |> Convert.ToInt32)
    |]

let learner = 
    let cancellationToken, source = 
        let s = new System.Threading.CancellationTokenSource()
        s.Token, s
    
    /// There are multiple algorithms available.
    /// For example:
    IterativeReweightedLeastSquares<LogisticRegression>(
        Tolerance = 1e-4, 
        Iterations = 1000,
        Regularization = 0.0,
        Token=cancellationToken
    )
    // Another one would be:
//    let alg = LogisticRegression(NumberOfInputs = (features |> Seq.length))
//    LogisticGradientDescent(alg, 
//        Tolerance = 0.001,
//        Iterations = 100000,
//        Token=cancellationToken)

/// Teach the model in background thread. This may take some time.
let modelTask = 
    System.Threading.Tasks.Task.Run(fun () ->
        learner.Learn(inputs, output)
    ) |> Async.AwaitTask

// When running background, you could cancel the task:
//source.Cancel()

// For now, let's just run as non-async:
let model= modelTask |> Async.RunSynchronously

// Print odds ratios:
features |> Seq.mapi(fun idx f ->
    let name = fst f
    let odds = model.GetOddsRatio(idx+1)
    name,odds )
|> Seq.sortBy snd
|> Seq.iter (printfn "%A")
// Output:
//("Age", 1.020859703)
//("Smokes", 5.858474898)

// Print estimated linear regression formula:
let formula = model.Linear.ToString()
// Output: 
// "y(x0, x1) = 0.0206451183100222*x0 + 1.76788931343272*x1 + -2.45774643623285"

// Test with current items. There is no point of course:
// You should split your sample data to two sets, and use the other to train
// the model, and the other to test the accuracy of predictions.
let items = model.Decide(inputs) |> Array.map Convert.ToDouble
// [|0.0; 0.0; 1.0; 0.0; 1.0; 1.0; 0.0; 0.0; 0.0; 1.0|]
namespace System
Multiple items
namespace FSharp

--------------------
namespace Microsoft.FSharp
Multiple items
namespace FSharp.Data

--------------------
namespace Microsoft.FSharp.Data
namespace Accord
namespace Accord.Statistics
namespace Accord.Statistics.Models
namespace Accord.Statistics.Models.Regression
namespace Accord.Statistics.Models.Regression.Fitting
type People = CsvProvider<...>

Full name: Script.People
type CsvProvider

Full name: FSharp.Data.CsvProvider


<summary>Typed representation of a CSV file.</summary>
       <param name='Sample'>Location of a CSV sample file or a string containing a sample CSV document.</param>
       <param name='Separators'>Column delimiter(s). Defaults to `,`.</param>
       <param name='InferRows'>Number of rows to use for inference. Defaults to `1000`. If this is zero, all rows are used.</param>
       <param name='Schema'>Optional column types, in a comma separated list. Valid types are `int`, `int64`, `bool`, `float`, `decimal`, `date`, `guid`, `string`, `int?`, `int64?`, `bool?`, `float?`, `decimal?`, `date?`, `guid?`, `int option`, `int64 option`, `bool option`, `float option`, `decimal option`, `date option`, `guid option` and `string option`.
       You can also specify a unit and the name of the column like this: `Name (type&lt;unit&gt;)`, or you can override only the name. If you don't want to specify all the columns, you can reference the columns by name like this: `ColumnName=type`.</param>
       <param name='HasHeaders'>Whether the sample contains the names of the columns as its first line.</param>
       <param name='IgnoreErrors'>Whether to ignore rows that have the wrong number of columns or which can't be parsed using the inferred or specified schema. Otherwise an exception is thrown when these rows are encountered.</param>
       <param name='SkipRows'>SKips the first n rows of the CSV file.</param>
       <param name='AssumeMissingValues'>When set to true, the type provider will assume all columns can have missing values, even if in the provided sample all values are present. Defaults to false.</param>
       <param name='PreferOptionals'>When set to true, inference will prefer to use the option type instead of nullable types, `double.NaN` or `""` for missing values. Defaults to false.</param>
       <param name='Quote'>The quotation mark (for surrounding values containing the delimiter). Defaults to `"`.</param>
       <param name='MissingValues'>The set of strings recogized as missing values. Defaults to `NaN,NA,N/A,#N/A,:,-,TBA,TBD`.</param>
       <param name='CacheRows'>Whether the rows should be caches so they can be iterated multiple times. Defaults to true. Disable for large datasets.</param>
       <param name='Culture'>The culture used for parsing numbers and dates. Defaults to the invariant culture.</param>
       <param name='Encoding'>The encoding used to read the sample. You can specify either the character set name or the codepage number. Defaults to UTF8 for files, and to ISO-8859-1 the for HTTP requests, unless `charset` is specified in the `Content-Type` response header.</param>
       <param name='ResolutionFolder'>A directory that is used when resolving relative file references (at design time and in hosted execution).</param>
       <param name='EmbeddedResource'>When specified, the type provider first attempts to load the sample from the specified resource
          (e.g. 'MyCompany.MyAssembly, resource_name.csv'). This is useful when exposing types generated by the type provider.</param>
val inputs : float [] []

Full name: Script.inputs
val output : obj []

Full name: Script.output
CsvProvider<...>.Load(uri: string) : CsvProvider<...>


Loads CSV from the specified uri

CsvProvider<...>.Load(reader: IO.TextReader) : CsvProvider<...>


Loads CSV from the specified reader

CsvProvider<...>.Load(stream: IO.Stream) : CsvProvider<...>


Loads CSV from the specified stream
module Seq

from Microsoft.FSharp.Collections
val map : mapping:('T -> 'U) -> source:seq<'T> -> seq<'U>

Full name: Microsoft.FSharp.Collections.Seq.map
val row : CsvProvider<...>.Row
Multiple items
val float : value:'T -> float (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.float

--------------------
type float = Double

Full name: Microsoft.FSharp.Core.float

--------------------
type float<'Measure> = float

Full name: Microsoft.FSharp.Core.float<_>
type Convert =
  static val DBNull : obj
  static member ChangeType : value:obj * typeCode:TypeCode -> obj + 3 overloads
  static member FromBase64CharArray : inArray:char[] * offset:int * length:int -> byte[]
  static member FromBase64String : s:string -> byte[]
  static member GetTypeCode : value:obj -> TypeCode
  static member IsDBNull : value:obj -> bool
  static member ToBase64CharArray : inArray:byte[] * offsetIn:int * length:int * outArray:char[] * offsetOut:int -> int + 1 overload
  static member ToBase64String : inArray:byte[] -> string + 3 overloads
  static member ToBoolean : value:obj -> bool + 17 overloads
  static member ToByte : value:obj -> byte + 18 overloads
  ...

Full name: System.Convert
Convert.ToDouble(value: DateTime) : float
   (+0 other overloads)
Convert.ToDouble(value: bool) : float
   (+0 other overloads)
Convert.ToDouble(value: string) : float
   (+0 other overloads)
Convert.ToDouble(value: decimal) : float
   (+0 other overloads)
Convert.ToDouble(value: float) : float
   (+0 other overloads)
Convert.ToDouble(value: float32) : float
   (+0 other overloads)
Convert.ToDouble(value: uint64) : float
   (+0 other overloads)
Convert.ToDouble(value: int64) : float
   (+0 other overloads)
Convert.ToDouble(value: uint32) : float
   (+0 other overloads)
Convert.ToDouble(value: int) : float
   (+0 other overloads)
val toArray : source:seq<'T> -> 'T []

Full name: Microsoft.FSharp.Collections.Seq.toArray
type Array =
  member Clone : unit -> obj
  member CopyTo : array:Array * index:int -> unit + 1 overload
  member GetEnumerator : unit -> IEnumerator
  member GetLength : dimension:int -> int
  member GetLongLength : dimension:int -> int64
  member GetLowerBound : dimension:int -> int
  member GetUpperBound : dimension:int -> int
  member GetValue : [<ParamArray>] indices:int[] -> obj + 7 overloads
  member Initialize : unit -> unit
  member IsFixedSize : bool
  ...

Full name: System.Array
val unzip : array:('T1 * 'T2) [] -> 'T1 [] * 'T2 []

Full name: Microsoft.FSharp.Collections.Array.unzip
type Observation = CsvProvider<...>.Row

Full name: Script.Observation
type Row =
  inherit Tuple<string>
  new : sampleCsv: string -> Row
  member Item1 : string
  member ``Sample.csv`` : string
  member ``System.ITuple.Size`` : int

Full name: FSharp.Data.CsvProvider,Sample="sample.csv",Separators=",",InferRows="2000".Row
Multiple items
type StructuredFormatDisplayAttribute =
  inherit Attribute
  new : value:string -> StructuredFormatDisplayAttribute
  member Value : string

Full name: Microsoft.FSharp.Core.StructuredFormatDisplayAttribute

--------------------
new : value:string -> StructuredFormatDisplayAttribute
type Feature = string * (Observation -> int)

Full name: Script.Feature
Multiple items
val string : value:'T -> string

Full name: Microsoft.FSharp.Core.Operators.string

--------------------
type string = String

Full name: Microsoft.FSharp.Core.string
Multiple items
val int : value:'T -> int (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.int

--------------------
type int = int32

Full name: Microsoft.FSharp.Core.int

--------------------
type int<'Measure> = int

Full name: Microsoft.FSharp.Core.int<_>
val features : Feature []

Full name: Script.features
val obs : Observation
Convert.ToInt32(value: DateTime) : int
   (+0 other overloads)
Convert.ToInt32(value: string) : int
   (+0 other overloads)
Convert.ToInt32(value: decimal) : int
   (+0 other overloads)
Convert.ToInt32(value: float) : int
   (+0 other overloads)
Convert.ToInt32(value: float32) : int
   (+0 other overloads)
Convert.ToInt32(value: uint64) : int
   (+0 other overloads)
Convert.ToInt32(value: int64) : int
   (+0 other overloads)
Convert.ToInt32(value: int) : int
   (+0 other overloads)
Convert.ToInt32(value: uint32) : int
   (+0 other overloads)
Convert.ToInt32(value: uint16) : int
   (+0 other overloads)
val learner : IterativeReweightedLeastSquares<LogisticRegression>

Full name: Script.learner
val cancellationToken : Threading.CancellationToken
val source : Threading.CancellationTokenSource
val s : Threading.CancellationTokenSource
namespace System.Threading
Multiple items
type CancellationTokenSource =
  new : unit -> CancellationTokenSource
  member Cancel : unit -> unit + 1 overload
  member Dispose : unit -> unit
  member IsCancellationRequested : bool
  member Token : CancellationToken
  static member CreateLinkedTokenSource : [<ParamArray>] tokens:CancellationToken[] -> CancellationTokenSource + 1 overload

Full name: System.Threading.CancellationTokenSource

--------------------
Threading.CancellationTokenSource() : unit
property Threading.CancellationTokenSource.Token: Threading.CancellationToken
Multiple items
type IterativeReweightedLeastSquares =
  inherit IterativeReweightedLeastSquares<GeneralizedLinearRegression>
  new : regression:LogisticRegression -> IterativeReweightedLeastSquares + 1 overload
  member ComputeError : inputs:float[][] * outputs:float[] -> float
  member Run : inputs:float[][] * outputs:int[] -> float + 6 overloads

Full name: Accord.Statistics.Models.Regression.Fitting.IterativeReweightedLeastSquares

--------------------
type IterativeReweightedLeastSquares<'TModel (requires default constructor and 'TModel :> GeneralizedLinearRegression)> =
  new : unit -> IterativeReweightedLeastSquares<'TModel>
  member ComputeStandardErrors : bool with get, set
  member GetInformationMatrix : unit -> float[][]
  member Gradient : float[]
  member Hessian : float[][]
  member Iterations : int with get, set
  member Learn : x:float[][] * y:int[] * ?weights:float[] -> 'TModel + 2 overloads
  member Model : 'TModel with get, set
  member Parameters : int
  member Previous : float[]
  ...

Full name: Accord.Statistics.Models.Regression.Fitting.IterativeReweightedLeastSquares<_>

--------------------
IterativeReweightedLeastSquares(regression: LogisticRegression) : unit
IterativeReweightedLeastSquares(regression: GeneralizedLinearRegression) : unit

--------------------
IterativeReweightedLeastSquares() : unit
Multiple items
type LogisticRegression =
  inherit GeneralizedLinearRegression
  new : unit -> LogisticRegression + 2 overloads
  member GetConfidenceInterval : index:int -> DoubleRange
  member GetOddsRatio : index:int -> float
  static member FromWeights : weights:float[] -> LogisticRegression + 1 overload

Full name: Accord.Statistics.Models.Regression.LogisticRegression

--------------------
LogisticRegression() : unit
val modelTask : Async<obj>

Full name: Script.modelTask


 There are multiple algorithms available.
 For example:
 Teach the model in background thread. This may take some time.
namespace System.Threading.Tasks
Multiple items
type Task<'TResult> =
  inherit Task
  new : function:Func<'TResult> -> Task<'TResult> + 7 overloads
  member ContinueWith : continuationAction:Action<Task<'TResult>> -> Task + 9 overloads
  member Result : 'TResult with get, set
  static member Factory : TaskFactory<'TResult>

Full name: System.Threading.Tasks.Task<_>

--------------------
type Task =
  new : action:Action -> Task + 7 overloads
  member AsyncState : obj
  member ContinueWith : continuationAction:Action<Task> -> Task + 9 overloads
  member CreationOptions : TaskCreationOptions
  member Dispose : unit -> unit
  member Exception : AggregateException
  member Id : int
  member IsCanceled : bool
  member IsCompleted : bool
  member IsFaulted : bool
  ...

Full name: System.Threading.Tasks.Task

--------------------
Threading.Tasks.Task(function: Func<'TResult>) : unit
Threading.Tasks.Task(function: Func<'TResult>, cancellationToken: Threading.CancellationToken) : unit
Threading.Tasks.Task(function: Func<'TResult>, creationOptions: Threading.Tasks.TaskCreationOptions) : unit
Threading.Tasks.Task(function: Func<obj,'TResult>, state: obj) : unit
Threading.Tasks.Task(function: Func<'TResult>, cancellationToken: Threading.CancellationToken, creationOptions: Threading.Tasks.TaskCreationOptions) : unit
Threading.Tasks.Task(function: Func<obj,'TResult>, state: obj, cancellationToken: Threading.CancellationToken) : unit
Threading.Tasks.Task(function: Func<obj,'TResult>, state: obj, creationOptions: Threading.Tasks.TaskCreationOptions) : unit
Threading.Tasks.Task(function: Func<obj,'TResult>, state: obj, cancellationToken: Threading.CancellationToken, creationOptions: Threading.Tasks.TaskCreationOptions) : unit

--------------------
Threading.Tasks.Task(action: Action) : unit
Threading.Tasks.Task(action: Action, cancellationToken: Threading.CancellationToken) : unit
Threading.Tasks.Task(action: Action, creationOptions: Threading.Tasks.TaskCreationOptions) : unit
Threading.Tasks.Task(action: Action<obj>, state: obj) : unit
Threading.Tasks.Task(action: Action, cancellationToken: Threading.CancellationToken, creationOptions: Threading.Tasks.TaskCreationOptions) : unit
Threading.Tasks.Task(action: Action<obj>, state: obj, cancellationToken: Threading.CancellationToken) : unit
Threading.Tasks.Task(action: Action<obj>, state: obj, creationOptions: Threading.Tasks.TaskCreationOptions) : unit
Threading.Tasks.Task(action: Action<obj>, state: obj, cancellationToken: Threading.CancellationToken, creationOptions: Threading.Tasks.TaskCreationOptions) : unit
IterativeReweightedLeastSquares.Learn(x: float [] [], y: float [], ?weights: float []) : LogisticRegression
IterativeReweightedLeastSquares.Learn(x: float [] [], y: bool [], ?weights: float []) : LogisticRegression
IterativeReweightedLeastSquares.Learn(x: float [] [], y: int [], ?weights: float []) : LogisticRegression
Multiple items
type Async
static member AsBeginEnd : computation:('Arg -> Async<'T>) -> ('Arg * AsyncCallback * obj -> IAsyncResult) * (IAsyncResult -> 'T) * (IAsyncResult -> unit)
static member AwaitEvent : event:IEvent<'Del,'T> * ?cancelAction:(unit -> unit) -> Async<'T> (requires delegate and 'Del :> Delegate)
static member AwaitIAsyncResult : iar:IAsyncResult * ?millisecondsTimeout:int -> Async<bool>
static member AwaitTask : task:Task -> Async<unit>
static member AwaitTask : task:Task<'T> -> Async<'T>
static member AwaitWaitHandle : waitHandle:WaitHandle * ?millisecondsTimeout:int -> Async<bool>
static member CancelDefaultToken : unit -> unit
static member Catch : computation:Async<'T> -> Async<Choice<'T,exn>>
static member FromBeginEnd : beginAction:(AsyncCallback * obj -> IAsyncResult) * endAction:(IAsyncResult -> 'T) * ?cancelAction:(unit -> unit) -> Async<'T>
static member FromBeginEnd : arg:'Arg1 * beginAction:('Arg1 * AsyncCallback * obj -> IAsyncResult) * endAction:(IAsyncResult -> 'T) * ?cancelAction:(unit -> unit) -> Async<'T>
static member FromBeginEnd : arg1:'Arg1 * arg2:'Arg2 * beginAction:('Arg1 * 'Arg2 * AsyncCallback * obj -> IAsyncResult) * endAction:(IAsyncResult -> 'T) * ?cancelAction:(unit -> unit) -> Async<'T>
static member FromBeginEnd : arg1:'Arg1 * arg2:'Arg2 * arg3:'Arg3 * beginAction:('Arg1 * 'Arg2 * 'Arg3 * AsyncCallback * obj -> IAsyncResult) * endAction:(IAsyncResult -> 'T) * ?cancelAction:(unit -> unit) -> Async<'T>
static member FromContinuations : callback:(('T -> unit) * (exn -> unit) * (OperationCanceledException -> unit) -> unit) -> Async<'T>
static member Ignore : computation:Async<'T> -> Async<unit>
static member OnCancel : interruption:(unit -> unit) -> Async<IDisposable>
static member Parallel : computations:seq<Async<'T>> -> Async<'T []>
static member RunSynchronously : computation:Async<'T> * ?timeout:int * ?cancellationToken:CancellationToken -> 'T
static member Sleep : millisecondsDueTime:int -> Async<unit>
static member Start : computation:Async<unit> * ?cancellationToken:CancellationToken -> unit
static member StartAsTask : computation:Async<'T> * ?taskCreationOptions:TaskCreationOptions * ?cancellationToken:CancellationToken -> Task<'T>
static member StartChild : computation:Async<'T> * ?millisecondsTimeout:int -> Async<Async<'T>>
static member StartChildAsTask : computation:Async<'T> * ?taskCreationOptions:TaskCreationOptions -> Async<Task<'T>>
static member StartImmediate : computation:Async<unit> * ?cancellationToken:CancellationToken -> unit
static member StartWithContinuations : computation:Async<'T> * continuation:('T -> unit) * exceptionContinuation:(exn -> unit) * cancellationContinuation:(OperationCanceledException -> unit) * ?cancellationToken:CancellationToken -> unit
static member SwitchToContext : syncContext:SynchronizationContext -> Async<unit>
static member SwitchToNewThread : unit -> Async<unit>
static member SwitchToThreadPool : unit -> Async<unit>
static member TryCancelled : computation:Async<'T> * compensation:(OperationCanceledException -> unit) -> Async<'T>
static member CancellationToken : Async<CancellationToken>
static member DefaultCancellationToken : CancellationToken

Full name: Microsoft.FSharp.Control.Async

--------------------
type Async<'T>

Full name: Microsoft.FSharp.Control.Async<_>
static member Async.AwaitTask : task:Threading.Tasks.Task -> Async<unit>
static member Async.AwaitTask : task:Threading.Tasks.Task<'T> -> Async<'T>
val model : obj

Full name: Script.model
static member Async.RunSynchronously : computation:Async<'T> * ?timeout:int * ?cancellationToken:Threading.CancellationToken -> 'T
val mapi : mapping:(int -> 'T -> 'U) -> source:seq<'T> -> seq<'U>

Full name: Microsoft.FSharp.Collections.Seq.mapi
val idx : int
val f : Feature
val name : string
val fst : tuple:('T1 * 'T2) -> 'T1

Full name: Microsoft.FSharp.Core.Operators.fst
val odds : IComparable
val sortBy : projection:('T -> 'Key) -> source:seq<'T> -> seq<'T> (requires comparison)

Full name: Microsoft.FSharp.Collections.Seq.sortBy
val snd : tuple:('T1 * 'T2) -> 'T2

Full name: Microsoft.FSharp.Core.Operators.snd
val iter : action:('T -> unit) -> source:seq<'T> -> unit

Full name: Microsoft.FSharp.Collections.Seq.iter
val printfn : format:Printf.TextWriterFormat<'T> -> 'T

Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.printfn
val formula : obj

Full name: Script.formula
namespace Accord.Statistics.Models.Regression.Linear
val items : obj []

Full name: Script.items
val map : mapping:('T -> 'U) -> array:'T [] -> 'U []

Full name: Microsoft.FSharp.Collections.Array.map
Raw view Test code New version

More information

Link:http://fssnip.net/7Sz
Posted:7 years ago
Author:Tuomas Hietanen
Tags: machine learning