1 people like it.
Like the snippet!
JaroWinkler in F#, and more
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
|
/// Calculate the Jaro-Winkler distance of s1 and s2
let jaroWinkler s1 s2 =
let jaroScore = jaro s1 s2
// Accumulate the number of matching initial characters
let maxLength = (min s1.Length s2.Length) - 1
let rec calcL i acc =
if i > maxLength || s1.[i] <> s2.[i] then acc
else calcL (i + 1) (acc + 1.0)
let l = min (calcL 0 0.0) 4.0
// Calculate the JW distance
let p = 0.1
let result = jaroScore + (l * p * (1.0 - jaroScore))
// This isn't strictly necessary as we can't divide by zero
// but it makes me feel better
if Double.IsNaN result then 0.0 else result
[<Fact>]
let ``Jaro-Winkler identity test`` () =
let result = jaroWinkler "RICK" "RICK"
Assert.Equal("1.000", String.Format("{0:0.000}", result))
[<Fact>]
let ``Jaro-Winkler martha test`` () =
let result = jaroWinkler "MARTHA" "MARHTA"
Assert.Equal("0.961", String.Format("{0:0.000}", result))
[<Fact>]
let ``Jaro-Winkler dwayne test`` () =
let result = jaroWinkler "DWAYNE" "DUANE"
Assert.Equal("0.840", String.Format("{0:0.000}", result))
[<Fact>]
let ``Jaro-Winkler dixon test`` () =
let result = jaroWinkler "DIXON" "DICKSONX"
Assert.Equal("0.813", String.Format("{0:0.000}", result))
|
val jaroWinkler : s1:'a -> s2:'b -> float
Full name: Script.jaroWinkler
Calculate the Jaro-Winkler distance of s1 and s2
val s1 : 'a
val s2 : 'b
val jaroScore : float
val maxLength : int
val min : e1:'T -> e2:'T -> 'T (requires comparison)
Full name: Microsoft.FSharp.Core.Operators.min
val calcL : (int -> float -> float)
val i : int
val acc : float
val l : float
val p : float
val result : float
val ( Jaro-Winkler identity test ) : unit -> 'a
Full name: Script.( Jaro-Winkler identity test )
module String
from Microsoft.FSharp.Core
Multiple items
type Format<'Printer,'State,'Residue,'Result> = PrintfFormat<'Printer,'State,'Residue,'Result>
Full name: Microsoft.FSharp.Core.Format<_,_,_,_>
--------------------
type Format<'Printer,'State,'Residue,'Result,'Tuple> = PrintfFormat<'Printer,'State,'Residue,'Result,'Tuple>
Full name: Microsoft.FSharp.Core.Format<_,_,_,_,_>
val ( Jaro-Winkler martha test ) : unit -> 'a
Full name: Script.( Jaro-Winkler martha test )
val ( Jaro-Winkler dwayne test ) : unit -> 'a
Full name: Script.( Jaro-Winkler dwayne test )
val ( Jaro-Winkler dixon test ) : unit -> 'a
Full name: Script.( Jaro-Winkler dixon test )
More information