29 people like it.

# Jaro-Winkler in F#

Jaro-Winkler is a fast and effective name matching algorithm. For more info see "A Comparison of String Distance Metrics for Name-Matching Tasks" http://www.isi.edu/info-agents/workshops/ijcai03/papers/Cohen-p.pdf or the Wikipedia article http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance

 ``` 1: 2: 3: 4: 5: 6: 7: 8: 9: 10: 11: 12: 13: 14: 15: 16: 17: 18: 19: 20: 21: 22: 23: 24: 25: 26: 27: 28: 29: 30: 31: 32: 33: 34: 35: 36: 37: 38: 39: 40: 41: 42: 43: 44: 45: 46: 47: 48: ``` ``````let windowAt str offset radius = let startAt = max 0 (offset - radius) let endAt = min (offset + radius) (String.length str - 1) [ for i in startAt .. endAt -> str.[i] ] let jaro s1 s2 = let matchRadius = let s1_l, s2_l = String.length s1, String.length s2 in (min s1_l s2_l) / 2 + (min s1_l s2_l) % 2 let commonChars chars1 chars2 = [ for i = 0 to String.length chars1 - 1 do let matchChar = chars1.[i] let windowChars = windowAt chars2 i matchRadius if windowChars |> List.exists (fun c -> c = matchChar) then yield matchChar ] let common1 = commonChars s1 s2 let common2 = commonChars s2 s1 let transpositions = List.zip common1 common2 |> List.fold (fun s (c1,c2) -> if c1 <> c2 then s + 1.0 else s) 0.0 |> (fun x -> x / 2.0) let s1length = float (String.length s1) let s2length = float (String.length s2) let c1length = float (List.length common1) let c2length = float (List.length common2) ((c1length / s1length) + (c2length / s2length) + ((c1length - transpositions) / c1length)) / 3.0 let jaroWinkler s1 s2 = let jaroScore = jaro s1 s2 let p = 0.1 let maxLength = (min s1.Length s2.Length) - 1 let rec calcL i acc = if i > maxLength || s1.[i] <> s2.[i] then acc else calcL (i + 1) (acc + 1.0) let l = max (calcL 0 0.0) 4.0 jaroScore + (l * p * (1.0 - jaroScore)) ``````
val windowAt : str:string -> offset:int -> radius:int -> char list

Full name: Script.windowAt
val str : string
val offset : int
val startAt : int
val max : e1:'T -> e2:'T -> 'T (requires comparison)

Full name: Microsoft.FSharp.Core.Operators.max
val endAt : int
val min : e1:'T -> e2:'T -> 'T (requires comparison)

Full name: Microsoft.FSharp.Core.Operators.min
module String

from Microsoft.FSharp.Core
val length : str:string -> int

Full name: Microsoft.FSharp.Core.String.length
val i : int
val jaro : s1:string -> s2:string -> float

Full name: Script.jaro
val s1 : string
val s2 : string
val s1_l : int
val s2_l : int
val commonChars : (string -> string -> char list)
val chars1 : string
val chars2 : string
val matchChar : char
val windowChars : char list
Multiple items
module List

from Microsoft.FSharp.Collections

--------------------
type List<'T> =
| ( [] )
| ( :: ) of Head: 'T * Tail: 'T list
interface IEnumerable
interface IEnumerable<'T>
member IsEmpty : bool
member Item : index:int -> 'T with get
member Length : int
member Tail : 'T list
static member Cons : head:'T * tail:'T list -> 'T list
static member Empty : 'T list

Full name: Microsoft.FSharp.Collections.List<_>
val exists : predicate:('T -> bool) -> list:'T list -> bool

Full name: Microsoft.FSharp.Collections.List.exists
val c : char
val common1 : char list
val common2 : char list
val transpositions : float
val zip : list1:'T1 list -> list2:'T2 list -> ('T1 * 'T2) list

Full name: Microsoft.FSharp.Collections.List.zip
val fold : folder:('State -> 'T -> 'State) -> state:'State -> list:'T list -> 'State

Full name: Microsoft.FSharp.Collections.List.fold
val s : float
val c1 : char
val c2 : char
val x : float
val s1length : float
Multiple items
val float : value:'T -> float (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.float

--------------------
type float = System.Double

Full name: Microsoft.FSharp.Core.float

--------------------
type float<'Measure> = float

Full name: Microsoft.FSharp.Core.float<_>
val s2length : float
val c1length : float
val length : list:'T list -> int

Full name: Microsoft.FSharp.Collections.List.length
val c2length : float
val jaroWinkler : s1:string -> s2:string -> float

Full name: Script.jaroWinkler
val jaroScore : float
val p : float
val maxLength : int
property System.String.Length: int
val calcL : (int -> float -> float)
val acc : float
val l : float