1 people like it.

JaroWinkler in F#, and more

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
22: 
23: 
24: 
25: 
26: 
27: 
28: 
29: 
30: 
31: 
32: 
33: 
34: 
35: 
/// Calculate the Jaro-Winkler distance of s1 and s2
let jaroWinkler s1 s2 = 
    let jaroScore = jaro s1 s2
    // Accumulate the number of matching initial characters
    let maxLength = (min s1.Length s2.Length) - 1
    let rec calcL i acc =
        if i > maxLength || s1.[i] <> s2.[i] then acc
        else calcL (i + 1) (acc + 1.0)
    let l = min (calcL 0 0.0) 4.0
    // Calculate the JW distance
    let p = 0.1
    let result = jaroScore + (l * p * (1.0 - jaroScore))
    // This isn't strictly necessary as we can't divide by zero
    // but it makes me feel better 
    if Double.IsNaN result then 0.0 else result

[<Fact>]
let ``Jaro-Winkler identity test`` () = 
    let result = jaroWinkler "RICK" "RICK"
    Assert.Equal("1.000", String.Format("{0:0.000}", result))

[<Fact>]
let ``Jaro-Winkler martha test`` () = 
    let result = jaroWinkler "MARTHA" "MARHTA"
    Assert.Equal("0.961", String.Format("{0:0.000}", result))

[<Fact>]
let ``Jaro-Winkler dwayne test`` () = 
    let result = jaroWinkler "DWAYNE" "DUANE"
    Assert.Equal("0.840", String.Format("{0:0.000}", result))

[<Fact>]
let ``Jaro-Winkler dixon test`` () =
    let result = jaroWinkler "DIXON" "DICKSONX"
    Assert.Equal("0.813", String.Format("{0:0.000}", result))
val jaroWinkler : s1:'a -> s2:'b -> float

Full name: Script.jaroWinkler


 Calculate the Jaro-Winkler distance of s1 and s2
val s1 : 'a
val s2 : 'b
val jaroScore : float
val maxLength : int
val min : e1:'T -> e2:'T -> 'T (requires comparison)

Full name: Microsoft.FSharp.Core.Operators.min
val calcL : (int -> float -> float)
val i : int
val acc : float
val l : float
val p : float
val result : float
val ( Jaro-Winkler identity test ) : unit -> 'a

Full name: Script.( Jaro-Winkler identity test )
module String

from Microsoft.FSharp.Core
Multiple items
type Format<'Printer,'State,'Residue,'Result> = PrintfFormat<'Printer,'State,'Residue,'Result>

Full name: Microsoft.FSharp.Core.Format<_,_,_,_>

--------------------
type Format<'Printer,'State,'Residue,'Result,'Tuple> = PrintfFormat<'Printer,'State,'Residue,'Result,'Tuple>

Full name: Microsoft.FSharp.Core.Format<_,_,_,_,_>
val ( Jaro-Winkler martha test ) : unit -> 'a

Full name: Script.( Jaro-Winkler martha test )
val ( Jaro-Winkler dwayne test ) : unit -> 'a

Full name: Script.( Jaro-Winkler dwayne test )
val ( Jaro-Winkler dixon test ) : unit -> 'a

Full name: Script.( Jaro-Winkler dixon test )
Raw view Test code New version

More information

Link:http://fssnip.net/7I
Posted:14 years ago
Author:
Tags: