5 people like it.

Soundex phonetic algorithm

Soundex is a phonetic algorithm for indexing names by sound, as pronounced in English implemented in F#. https://en.wikipedia.org/wiki/Soundex

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
22: 
23: 
24: 
25: 
26: 
27: 
28: 
29: 
30: 
31: 
32: 
33: 
34: 
35: 
36: 
37: 
38: 
39: 
40: 
41: 
42: 
43: 
44: 
45: 
46: 
47: 
48: 
49: 
50: 
51: 
52: 
53: 
54: 
55: 
56: 
57: 
58: 
59: 
60: 
61: 
62: 
63: 
64: 
65: 
66: 
67: 
68: 
69: 
70: 
71: 
72: 
73: 
74: 
75: 
76: 
77: 
let americanSoundex (x : string) = 
    let toString (xs : char list) = new System.String(xs |> Array.ofList)
    let _americanSoundex =
        let toUpper (x : string) = x.ToUpper()
        let toArray (x : string) = x.ToCharArray()

        let f1 ch = 
            match ch with
            | 'H' | 'W' -> false
            | _ -> true

        let f2 ch =
            match ch with
            | 'B' | 'F' | 'P' | 'V' -> '1'
            | 'C' | 'G' | 'J' | 'K' | 'Q' | 'S' | 'X' | 'Z' -> '2'
            | 'D' | 'T' -> '3'
            | 'L' -> '4'        
            | 'M' | 'N' -> '5'
            | 'R' -> '6'
            | _ -> ch

        let rec f3 xs =
            match xs with
            | h0 :: h1 :: t -> h0 :: f3 (if (h0 = h1) then t else (h1 :: t))
            | h :: _ -> [h]
            | _ -> []

        let f4 ch = 
            match ch with
            | 'A' | 'E' | 'I' | 'O' | 'U' | 'Y' -> false
            | _ -> true

        let f5 ch first =
            if ('0' <= ch && ch <= '9') then first
            else ch

        let f6 xs =
            let len = List.length xs
            seq{for i = 0 to 3 - len do yield '0'} 
                |> Seq.append (xs |> Seq.take (System.Math.Min(4, len)))
                |> Seq.toList

        let a = x |> toUpper |> toArray |> Array.toList
        let b = a |> List.filter f1 //1
        let c = b |> List.map f2 //2
        let d = c |> f3 //3
        let e = d |> List.tail |> List.filter f4 //4
        let f = f5 (d |> List.head) (a |> List.head) :: e //5
        f6 f //6
    
    if (x.Length > 0) then toString(_americanSoundex) 
    else "0000"

["Robert"; "Rupert"; "Robbert"; "Rubin"; 
 "Beer"; "Bear"; "Bearer"; 
 "Smith"; "Smyth";
 "Ashcraft"; "Ashcroft";
  "Tymczak"; "Pfister"] 
 |> List.map (fun x -> (x, americanSoundex x)) |> List.iter (fun (x, y) -> printfn "%-8s = %s" x y)

(*

Robert   = R163
Rupert   = R163
Robbert  = R163
Rubin    = R150
Beer     = B600
Bear     = B600
Bearer   = B660
Smith    = S530
Smyth    = S530
Ashcraft = A261
Ashcroft = A261
Tymczak  = T522
Pfister  = P236

*)
val americanSoundex : x:string -> System.String

Full name: Script.americanSoundex
val x : string
Multiple items
val string : value:'T -> string

Full name: Microsoft.FSharp.Core.Operators.string

--------------------
type string = System.String

Full name: Microsoft.FSharp.Core.string
val toString : (char list -> System.String)
val xs : char list
Multiple items
val char : value:'T -> char (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.char

--------------------
type char = System.Char

Full name: Microsoft.FSharp.Core.char
type 'T list = List<'T>

Full name: Microsoft.FSharp.Collections.list<_>
namespace System
Multiple items
type String =
  new : value:char -> string + 7 overloads
  member Chars : int -> char
  member Clone : unit -> obj
  member CompareTo : value:obj -> int + 1 overload
  member Contains : value:string -> bool
  member CopyTo : sourceIndex:int * destination:char[] * destinationIndex:int * count:int -> unit
  member EndsWith : value:string -> bool + 2 overloads
  member Equals : obj:obj -> bool + 2 overloads
  member GetEnumerator : unit -> CharEnumerator
  member GetHashCode : unit -> int
  ...

Full name: System.String

--------------------
System.String(value: nativeptr<char>) : unit
System.String(value: nativeptr<sbyte>) : unit
System.String(value: char []) : unit
System.String(c: char, count: int) : unit
System.String(value: nativeptr<char>, startIndex: int, length: int) : unit
System.String(value: nativeptr<sbyte>, startIndex: int, length: int) : unit
System.String(value: char [], startIndex: int, length: int) : unit
System.String(value: nativeptr<sbyte>, startIndex: int, length: int, enc: System.Text.Encoding) : unit
module Array

from Microsoft.FSharp.Collections
val ofList : list:'T list -> 'T []

Full name: Microsoft.FSharp.Collections.Array.ofList
val toUpper : (string -> string)
System.String.ToUpper() : string
System.String.ToUpper(culture: System.Globalization.CultureInfo) : string
val toArray : (string -> char [])
System.String.ToCharArray() : char []
System.String.ToCharArray(startIndex: int, length: int) : char []
val f1 : (char -> bool)
val ch : char
val f2 : (char -> char)
val f3 : ('a list -> 'a list) (requires equality)
val xs : 'a list (requires equality)
val h0 : 'a (requires equality)
val h1 : 'a (requires equality)
val t : 'a list (requires equality)
val h : 'a (requires equality)
val f4 : (char -> bool)
val f5 : (char -> char -> char)
val first : char
val f6 : (char list -> char list)
val len : int
Multiple items
module List

from Microsoft.FSharp.Collections

--------------------
type List<'T> =
  | ( [] )
  | ( :: ) of Head: 'T * Tail: 'T list
  interface IEnumerable
  interface IEnumerable<'T>
  member Head : 'T
  member IsEmpty : bool
  member Item : index:int -> 'T with get
  member Length : int
  member Tail : 'T list
  static member Cons : head:'T * tail:'T list -> 'T list
  static member Empty : 'T list

Full name: Microsoft.FSharp.Collections.List<_>
val length : list:'T list -> int

Full name: Microsoft.FSharp.Collections.List.length
Multiple items
val seq : sequence:seq<'T> -> seq<'T>

Full name: Microsoft.FSharp.Core.Operators.seq

--------------------
type seq<'T> = System.Collections.Generic.IEnumerable<'T>

Full name: Microsoft.FSharp.Collections.seq<_>
val i : int
module Seq

from Microsoft.FSharp.Collections
val append : source1:seq<'T> -> source2:seq<'T> -> seq<'T>

Full name: Microsoft.FSharp.Collections.Seq.append
val take : count:int -> source:seq<'T> -> seq<'T>

Full name: Microsoft.FSharp.Collections.Seq.take
type Math =
  static val PI : float
  static val E : float
  static member Abs : value:sbyte -> sbyte + 6 overloads
  static member Acos : d:float -> float
  static member Asin : d:float -> float
  static member Atan : d:float -> float
  static member Atan2 : y:float * x:float -> float
  static member BigMul : a:int * b:int -> int64
  static member Ceiling : d:decimal -> decimal + 1 overload
  static member Cos : d:float -> float
  ...

Full name: System.Math
System.Math.Min(val1: decimal, val2: decimal) : decimal
   (+0 other overloads)
System.Math.Min(val1: float, val2: float) : float
   (+0 other overloads)
System.Math.Min(val1: float32, val2: float32) : float32
   (+0 other overloads)
System.Math.Min(val1: uint64, val2: uint64) : uint64
   (+0 other overloads)
System.Math.Min(val1: int64, val2: int64) : int64
   (+0 other overloads)
System.Math.Min(val1: uint32, val2: uint32) : uint32
   (+0 other overloads)
System.Math.Min(val1: int, val2: int) : int
   (+0 other overloads)
System.Math.Min(val1: uint16, val2: uint16) : uint16
   (+0 other overloads)
System.Math.Min(val1: int16, val2: int16) : int16
   (+0 other overloads)
System.Math.Min(val1: byte, val2: byte) : byte
   (+0 other overloads)
val toList : source:seq<'T> -> 'T list

Full name: Microsoft.FSharp.Collections.Seq.toList
val a : char list
val toList : array:'T [] -> 'T list

Full name: Microsoft.FSharp.Collections.Array.toList
val b : char list
val filter : predicate:('T -> bool) -> list:'T list -> 'T list

Full name: Microsoft.FSharp.Collections.List.filter
val c : char list
val map : mapping:('T -> 'U) -> list:'T list -> 'U list

Full name: Microsoft.FSharp.Collections.List.map
val d : char list
val e : char list
val tail : list:'T list -> 'T list

Full name: Microsoft.FSharp.Collections.List.tail
val f : char list
val head : list:'T list -> 'T

Full name: Microsoft.FSharp.Collections.List.head
property System.String.Length: int
val _americanSoundex : char list
val iter : action:('T -> unit) -> list:'T list -> unit

Full name: Microsoft.FSharp.Collections.List.iter
val y : System.String
val printfn : format:Printf.TextWriterFormat<'T> -> 'T

Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.printfn
Raw view Test code New version

More information

Link:http://fssnip.net/tI
Posted:8 years ago
Author:Fabio Galuppo
Tags: soundex , phonetic algorithm , algorithm