3 people like it.

EditDistance

Computes the Minimum Edit Distance or the Levenshtein Distance between two words

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
22: 
23: 
24: 
25: 
26: 
27: 
28: 
29: 
30: 
31: 
32: 
33: 
34: 
35: 
36: 
37: 
38: 
39: 
40: 
41: 
42: 
43: 
44: 
45: 
46: 
47: 
48: 
type EditType = Deletion | Insertion | Substitution

type DistanceType = MinimumEditDistance | LevenshteinDistance

let getEditDistance distanceType (X:string) (Y:string) =
    let m = X.Length
    let n = Y.Length
    let d = Array2D.init (m + 1) (n + 1) (fun i j -> if j = 0 then i elif i = 0 then j else 0)
    let ptr = Array2D.init (m + 1) (n + 1) (fun i j -> if j = 0 then Deletion elif i = 0 then Insertion else Substitution)
    let penalizationForSubstitution = 
        match distanceType with
        | MinimumEditDistance -> 1
        | LevenshteinDistance -> 2
    for i in 1..m do
        for j in 1..n do
            let a, b = Seq.minBy fst [d.[i-1, j] + 1, Deletion
                                      d.[i, j-1] + 1, Insertion
                                      d.[i-1, j-1] + (if X.[i-1] <> Y.[j-1] then penalizationForSubstitution else 0), Substitution]
            d.[i, j] <- a
            ptr.[i, j] <- b
    let alignment = 
        (m, n) 
        |> Seq.unfold (fun (i, j) -> 
            if i = 0 && j = 0 then
                None
            else
                match ptr.[i, j] with
                | Deletion -> Some((X.[i-1], '*'), (i-1, j))
                | Insertion -> Some(('*', Y.[j-1]), (i, j-1))
                | Substitution -> Some((X.[i-1], Y.[j-1]), (i-1, j-1)))
        |> Array.ofSeq
        |> Array.rev
    d.[m, n], alignment

let printAlignment alignment = 
    let toString (chars : char array) = new string(chars)
    alignment |> Array.map fst |> toString |> printfn "%s"
    alignment |> Array.map snd |> toString |> printfn "%s"

let distanceM, alignmentM = getEditDistance MinimumEditDistance "intention" "execution"

printfn "Minimum Edit Distance: %d" distanceM
printAlignment alignmentM

let distanceL, alignmentL = getEditDistance LevenshteinDistance "intention" "execution"

printfn "Levenshtein Distance: %d" distanceL
printAlignment alignmentL
union case EditType.Deletion: EditType
union case EditType.Insertion: EditType
union case EditType.Substitution: EditType
type DistanceType =
  | MinimumEditDistance
  | LevenshteinDistance

Full name: Script.DistanceType
union case DistanceType.MinimumEditDistance: DistanceType
union case DistanceType.LevenshteinDistance: DistanceType
val getEditDistance : distanceType:DistanceType -> X:string -> Y:string -> int * (char * char) []

Full name: Script.getEditDistance
val distanceType : DistanceType
val X : string
Multiple items
val string : value:'T -> string

Full name: Microsoft.FSharp.Core.Operators.string

--------------------
type string = System.String

Full name: Microsoft.FSharp.Core.string
val Y : string
val m : int
property System.String.Length: int
val n : int
val d : int [,]
module Array2D

from Microsoft.FSharp.Collections
val init : length1:int -> length2:int -> initializer:(int -> int -> 'T) -> 'T [,]

Full name: Microsoft.FSharp.Collections.Array2D.init
val i : int
val j : int
val ptr : EditType [,]
val penalizationForSubstitution : int
val i : int32
val j : int32
val a : int
val b : EditType
module Seq

from Microsoft.FSharp.Collections
val minBy : projection:('T -> 'U) -> source:seq<'T> -> 'T (requires comparison)

Full name: Microsoft.FSharp.Collections.Seq.minBy
val fst : tuple:('T1 * 'T2) -> 'T1

Full name: Microsoft.FSharp.Core.Operators.fst
val alignment : (char * char) []
val unfold : generator:('State -> ('T * 'State) option) -> state:'State -> seq<'T>

Full name: Microsoft.FSharp.Collections.Seq.unfold
union case Option.None: Option<'T>
union case Option.Some: Value: 'T -> Option<'T>
module Array

from Microsoft.FSharp.Collections
val ofSeq : source:seq<'T> -> 'T []

Full name: Microsoft.FSharp.Collections.Array.ofSeq
val rev : array:'T [] -> 'T []

Full name: Microsoft.FSharp.Collections.Array.rev
val printAlignment : alignment:(char * char) [] -> unit

Full name: Script.printAlignment
val toString : (char array -> string)
val chars : char array
Multiple items
val char : value:'T -> char (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.char

--------------------
type char = System.Char

Full name: Microsoft.FSharp.Core.char
type 'T array = 'T []

Full name: Microsoft.FSharp.Core.array<_>
val map : mapping:('T -> 'U) -> array:'T [] -> 'U []

Full name: Microsoft.FSharp.Collections.Array.map
val printfn : format:Printf.TextWriterFormat<'T> -> 'T

Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.printfn
val snd : tuple:('T1 * 'T2) -> 'T2

Full name: Microsoft.FSharp.Core.Operators.snd
val distanceM : int

Full name: Script.distanceM
val alignmentM : (char * char) []

Full name: Script.alignmentM
val distanceL : int

Full name: Script.distanceL
val alignmentL : (char * char) []

Full name: Script.alignmentL
Raw view Test code New version

More information

Link:http://fssnip.net/bc
Posted:12 years ago
Author:Gustavo Guerra
Tags: algorithms , nlp