1 people like it.

DBSCAN

Density-based spatial clustering of applications with noise (DBSCAN) is a data clustering algorithm. For more information see http://en.wikipedia.org/wiki/DBSCAN. The implementation is based on the pseudocode in the article and the following C# code http://www.c-sharpcorner.com/uploadfile/b942f9/implementing-the-dbscan-algorithm-using-C-Sharp/ The implementation is not very functional but does the job. Added pwd by ignorance, the password is "fssnip" (without quotes)

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
22: 
23: 
24: 
25: 
26: 
27: 
28: 
29: 
30: 
31: 
32: 
33: 
34: 
35: 
36: 
37: 
38: 
39: 
40: 
41: 
42: 
43: 
44: 
45: 
46: 
47: 
48: 
49: 
50: 
51: 
52: 
53: 
54: 
55: 
56: 
57: 
58: 
59: 
60: 
61: 
62: 
63: 
64: 
65: 
66: 
67: 
68: 
69: 
70: 
71: 
72: 
73: 
74: 
75: 
76: 
77: 
78: 
79: 
80: 
81: 
82: 
83: 
84: 
85: 
86: 
87: 
88: 
89: 
90: 
open System.Collections.Generic

module Outliers =

    type Point(lon,lat) = 
        [<DefaultValue>] val mutable InCluster : bool
        [<DefaultValue>] val mutable Visited : bool
        [<DefaultValue>] val mutable Noise : bool
        member this.Lon = lon
        member this.Lat = lat

        // or
//        member val InCluster = false with get, set
//        member val Visited = false with get, set
//        member val Noise = false with get, set
//        member val Lon = lon with get
//        member val Lat = lat with get

    let getDistance (p1:Point) (p2:Point) =
        let diffX = p2.Lon - p1.Lon
        let diffY = p2.Lat - p1.Lat
        let d = diffX * diffX + diffY * diffY
        d

    let private getRegion P points eps =
        let region = points |> List.filter (fun p -> (getDistance P p) <= eps)
        new System.Collections.Generic.List<Point>(region)
    
    let private expandCluster points P (neighborPts:List<Point>) (C:List<Point>) eps minPts = 
        
        C.Add(P)
        P.InCluster <- true

        while neighborPts.Count > 0 do
            let last = neighborPts.Count-1
            let P' = neighborPts.[last]
            neighborPts.RemoveAt(last)

            if not P'.Visited then
                P'.Visited <- true
                let neighborPts' = getRegion P' points eps
                if neighborPts'.Count >= minPts then
                    neighborPts.AddRange(neighborPts')
            if not P'.InCluster then
                C.Add(P')
                P'.InCluster <- true

    
    let DBSCAN points eps minPts = 
        let eps = eps*eps
        let clusters = new List<List<Point>>()
        for (p:Point) in points do
            if not p.Visited then
                p.Visited <- true            
                let neighborPts = getRegion p points eps
    
                if neighborPts.Count < minPts then
                    p.Noise <- true
                else
                    let C = new List<Point>()
                    expandCluster points p neighborPts C eps minPts
                    if C.Count > 0 then clusters.Add(C)
        clusters

    let test = 
        let points = [new Point(0, 100);
                      new Point(0, 200);
                      (new Point(0, 275));
                      (new Point(100, 150));
                      (new Point(200, 100));
                      (new Point(250, 200));        
                      (new Point(0, 300));        
                      (new Point(100, 200));        
                      (new Point(600, 700));        
                      (new Point(650, 700));        
                      (new Point(675, 700));        
                      (new Point(675, 710));        
                      (new Point(675, 720));        
                      (new Point(50, 400))]
        let clusters = DBSCAN points 100 3
        for point in points do
            if point.Noise && not point.InCluster then
                printfn "noise %d %d" point.Lon point.Lat
        let mutable counter = 0
        for cluster in clusters do
            counter <- counter + 1
            printfn "Cluster %d consists of the following %d point(s)" counter cluster.Count
            for point in cluster do
                printf "(%d, %d) " point.Lon point.Lat
            printfn ""
namespace System
namespace System.Collections
namespace System.Collections.Generic
module Outliers

from Script
Multiple items
type Point =
  new : lon:int * lat:int -> Point
  val mutable InCluster: bool
  val mutable Visited: bool
  val mutable Noise: bool
  member Lat : int
  member Lon : int

Full name: Script.Outliers.Point

--------------------
new : lon:int * lat:int -> Point
val lon : int
val lat : int
Multiple items
type DefaultValueAttribute =
  inherit Attribute
  new : unit -> DefaultValueAttribute
  new : check:bool -> DefaultValueAttribute
  member Check : bool

Full name: Microsoft.FSharp.Core.DefaultValueAttribute

--------------------
new : unit -> DefaultValueAttribute
new : check:bool -> DefaultValueAttribute
Point.InCluster: bool
type bool = System.Boolean

Full name: Microsoft.FSharp.Core.bool
Point.Visited: bool
Point.Noise: bool
val this : Point
member Point.Lon : int

Full name: Script.Outliers.Point.Lon
member Point.Lat : int

Full name: Script.Outliers.Point.Lat
val getDistance : p1:Point -> p2:Point -> int

Full name: Script.Outliers.getDistance
val p1 : Point
val p2 : Point
val diffX : int
property Point.Lon: int
val diffY : int
property Point.Lat: int
val d : int
val private getRegion : P:Point -> points:Point list -> eps:int -> List<Point>

Full name: Script.Outliers.getRegion
val P : Point
val points : Point list
val eps : int
val region : Point list
Multiple items
type List<'T> =
  new : unit -> List<'T> + 2 overloads
  member Add : item:'T -> unit
  member AddRange : collection:IEnumerable<'T> -> unit
  member AsReadOnly : unit -> ReadOnlyCollection<'T>
  member BinarySearch : item:'T -> int + 2 overloads
  member Capacity : int with get, set
  member Clear : unit -> unit
  member Contains : item:'T -> bool
  member ConvertAll<'TOutput> : converter:Converter<'T, 'TOutput> -> List<'TOutput>
  member CopyTo : array:'T[] -> unit + 2 overloads
  ...
  nested type Enumerator

Full name: System.Collections.Generic.List<_>

--------------------
List() : unit
List(capacity: int) : unit
List(collection: IEnumerable<'T>) : unit
val filter : predicate:('T -> bool) -> list:'T list -> 'T list

Full name: Microsoft.FSharp.Collections.List.filter
val p : Point
val private expandCluster : points:Point list -> P:Point -> neighborPts:List<Point> -> C:List<Point> -> eps:int -> minPts:int -> unit

Full name: Script.Outliers.expandCluster
val neighborPts : List<Point>
val C : List<Point>
val minPts : int
List.Add(item: Point) : unit
property List.Count: int
val last : int
val P' : Point
List.RemoveAt(index: int) : unit
val not : value:bool -> bool

Full name: Microsoft.FSharp.Core.Operators.not
val neighborPts' : List<Point>
List.AddRange(collection: IEnumerable<Point>) : unit
val DBSCAN : points:Point list -> eps:int -> minPts:int -> List<List<Point>>

Full name: Script.Outliers.DBSCAN
val clusters : List<List<Point>>
List.Add(item: List<Point>) : unit
val test : unit

Full name: Script.Outliers.test
val point : Point
val printfn : format:Printf.TextWriterFormat<'T> -> 'T

Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.printfn
val mutable counter : int
val cluster : List<Point>
val printf : format:Printf.TextWriterFormat<'T> -> 'T

Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.printf

More information

Link:http://fssnip.net/jo
Posted:10 years ago
Author:Samuel Bosch
Tags: dbscan , cluster , math , spatial , statistics