[Java]数据分析--聚类

距离度量

需求：计算两点间的欧几里得距离、曼哈顿距离、切比雪夫距离、堪培拉距离
实现：利用commons.math3库相应函数

 1 import org.apache.commons.math3.ml.distance.*;

 2

 3 public class TestMetrics {

 4     public static void main(String[] args) {

 5         double[] x = {1, 3}, y = {5, 6};

 6

 7         EuclideanDistance eD = new EuclideanDistance();

 8         System.out.printf("Euclidean distance = %.2f%n", eD.compute(x,y));

 9

10         ManhattanDistance mD = new ManhattanDistance();

11         System.out.printf("Manhattan distance = %.2f%n", mD.compute(x,y));

12

13         ChebyshevDistance cD = new ChebyshevDistance();

14         System.out.printf("Chebyshev distance = %.2f%n", cD.compute(x,y));

15

16         CanberraDistance caD = new CanberraDistance();

17         System.out.printf("Canberra distance =  %.2f%n", caD.compute(x,y));

18     }

19 }

Euclidean distance = 5.00
Manhattan distance = 7.00
Chebyshev distance = 4.00
Canberra distance = 1.00

层次聚类

需求：将13个样本点分为3类
实现：m点划分为k类，先令m点的每个点为一类，然后找到中心最近的两个类，用一个新的聚类替换，重复m-k次

HierachicalClustering.java

 1 import java.util.HashSet;

 2

 3 public class HierarchicalClustering {

 4     private static final double[][] DATA = {{1,1}, {1,3}, {1,5}, {2,6}, {3,2},

 5         {3,4}, {4,3}, {5,6}, {6,3}, {6,4}, {7,1}, {7,5}, {7,6}};

 6     private static final int M = DATA.length;  // number of points

 7     private static final int K = 3;            // number of clusters

 8

 9     public static void main(String[] args) {

10         HashSet<Cluster> clusters = load(DATA);

11         for (int i = 0; i < M - K; i++) {

12             System.out.printf("%n%2d clusters:%n", M-i-1);

13             coalesce(clusters);

14             System.out.println(clusters);

15         }

16     }

17

18     private static HashSet<Cluster> load(double[][] data) {

19         HashSet<Cluster> clusters = new HashSet();

20         for (double[] datum : DATA) {

21             clusters.add(new Cluster(datum[0], datum[1]));

22         }

23         return clusters;

24     }

25

26     private static void coalesce(HashSet<Cluster> clusters) {

27         Cluster cluster1=null, cluster2=null;

28         double minDist = Double.POSITIVE_INFINITY;

29         for (Cluster c1 : clusters) {

30             for (Cluster c2 : clusters) {

31                 if (!c1.equals(c2) && Cluster.distance(c1, c2) < minDist) {

32                     cluster1 = c1;

33                     cluster2 = c2;

34                     minDist = Cluster.distance(c1, c2);

35                 }

36             }

37         }

38         clusters.remove(cluster1);

39         clusters.remove(cluster2);

40         clusters.add(Cluster.union(cluster1, cluster2));

41     }

42 }

Point.java

 1 public class Point {

 2     private final double x, y;

 3

 4     public Point(double x, double y) {

 5         this.x = x;

 6         this.y = y;

 7     }

 8

 9     public double getX() {

10         return x;

11     }

12

13     public double getY() {

14         return y;

15     }

16

17     @Override

18     public int hashCode() {

19         int xhC = new Double(x).hashCode();

20         int yhC = new Double(y).hashCode();

21         return (int)(xhC + 79*yhC);

22     }

23

24     @Override

25     public boolean equals(Object object) {

26         if (object == null) {

27             return false;

28         } else if (object == this) {

29             return true;

30         } else if (!(object instanceof Point)) {

31             return false;

32         }

33         Point that = (Point)object;

34         return bits(that.x) == bits(this.x) && bits(that.y) == bits(this.y);

35     }

36

37     private long bits(double d) {

38         return Double.doubleToLongBits(d);

39

40     }

41

42     @Override

43     public String toString() {

44         return String.format("(%.2f,%.2f)", x,y);

45     }

46 }

Cluster.java

 1 import java.util.HashSet;

 2

 3 public class Cluster {

 4     private final HashSet<Point> points;

 5     private Point centroid;

 6

 7     public Cluster(HashSet points, Point centroid) {

 8         this.points = points;

 9         this.centroid = centroid;

10     }

11

12     public Cluster(Point point) {

13         this.points = new HashSet();

14         this.points.add(point);

15         this.centroid = point;

16     }

17

18     public Cluster(double x, double y) {

19         this(new Point(x,y));

20     }

21

22     public Point getCentroid() {

23         return centroid;

24     }

25

26     public void add(Point point) {

27         points.add(point);

28         recomputeCentroid();

29     }

30

31     public void recomputeCentroid() {

32         double xSum=0.0, ySum=0.0;

33         for (Point point : points) {

34             xSum += point.getX();

35             ySum += point.getY();

36         }

37         centroid = new Point(xSum/points.size(), ySum/points.size());

38     }

39

40     public static double distance(Cluster c1, Cluster c2) {

41         double dx = c1.centroid.getX() - c2.centroid.getX();

42         double dy = c1.centroid.getY() - c2.centroid.getY();

43         return Math.sqrt(dx*dx + dy*dy);

44     }

45

46     public static Cluster union(Cluster c1, Cluster c2) {

47         Cluster cluster = new Cluster(c1.points, c1.centroid);

48         cluster.points.addAll(c2.points);

49         cluster.recomputeCentroid();

50         return cluster;

51     }

52

53     @Override

54     public int hashCode() {

55         return points.hashCode();

56     }

57

58     @Override

59     public boolean equals(Object object) {

60         if (object == null) {

61             return false;

62         } else if (object == this) {

63             return true;

64         } else if (!(object instanceof Cluster)) {

65             return false;

66         }

67         final Cluster that = (Cluster)object;

68         return that.points.equals(this.points);

69     }

70

71     @Override

72     public String toString() {

73         return String.format("%n{%s,%s}", centroid, points);

74     }

75 }

结果-->

  1 12 clusters:

  2 [

  3 {(1.00,1.00),[(1.00,1.00)]},

  4 {(1.00,3.00),[(1.00,3.00)]},

  5 {(2.00,6.00),[(2.00,6.00)]},

  6 {(3.00,2.00),[(3.00,2.00)]},

  7 {(4.00,3.00),[(4.00,3.00)]},

  8 {(6.00,4.00),[(6.00,4.00)]},

  9 {(7.00,1.00),[(7.00,1.00)]},

 10 {(7.00,5.50),[(7.00,6.00), (7.00,5.00)]},

 11 {(6.00,3.00),[(6.00,3.00)]},

 12 {(3.00,4.00),[(3.00,4.00)]},

 13 {(1.00,5.00),[(1.00,5.00)]},

 14 {(5.00,6.00),[(5.00,6.00)]}]

 15

 16 11 clusters:

 17 [

 18 {(1.00,1.00),[(1.00,1.00)]},

 19 {(1.00,3.00),[(1.00,3.00)]},

 20 {(2.00,6.00),[(2.00,6.00)]},

 21 {(3.00,2.00),[(3.00,2.00)]},

 22 {(4.00,3.00),[(4.00,3.00)]},

 23 {(7.00,1.00),[(7.00,1.00)]},

 24 {(7.00,5.50),[(7.00,6.00), (7.00,5.00)]},

 25 {(3.00,4.00),[(3.00,4.00)]},

 26 {(6.00,3.50),[(6.00,3.00), (6.00,4.00)]},

 27 {(1.00,5.00),[(1.00,5.00)]},

 28 {(5.00,6.00),[(5.00,6.00)]}]

 29

 30 10 clusters:

 31 [

 32 {(1.00,1.00),[(1.00,1.00)]},

 33 {(1.50,5.50),[(2.00,6.00), (1.00,5.00)]},

 34 {(1.00,3.00),[(1.00,3.00)]},

 35 {(3.00,2.00),[(3.00,2.00)]},

 36 {(4.00,3.00),[(4.00,3.00)]},

 37 {(7.00,1.00),[(7.00,1.00)]},

 38 {(7.00,5.50),[(7.00,6.00), (7.00,5.00)]},

 39 {(3.00,4.00),[(3.00,4.00)]},

 40 {(6.00,3.50),[(6.00,3.00), (6.00,4.00)]},

 41 {(5.00,6.00),[(5.00,6.00)]}]

 42

 43  9 clusters:

 44 [

 45 {(1.00,1.00),[(1.00,1.00)]},

 46 {(1.50,5.50),[(2.00,6.00), (1.00,5.00)]},

 47 {(1.00,3.00),[(1.00,3.00)]},

 48 {(7.00,1.00),[(7.00,1.00)]},

 49 {(7.00,5.50),[(7.00,6.00), (7.00,5.00)]},

 50 {(3.50,2.50),[(3.00,2.00), (4.00,3.00)]},

 51 {(3.00,4.00),[(3.00,4.00)]},

 52 {(6.00,3.50),[(6.00,3.00), (6.00,4.00)]},

 53 {(5.00,6.00),[(5.00,6.00)]}]

 54

 55  8 clusters:

 56 [

 57 {(1.00,1.00),[(1.00,1.00)]},

 58 {(1.50,5.50),[(2.00,6.00), (1.00,5.00)]},

 59 {(1.00,3.00),[(1.00,3.00)]},

 60 {(3.33,3.00),[(3.00,2.00), (4.00,3.00), (3.00,4.00)]},

 61 {(7.00,1.00),[(7.00,1.00)]},

 62 {(7.00,5.50),[(7.00,6.00), (7.00,5.00)]},

 63 {(6.00,3.50),[(6.00,3.00), (6.00,4.00)]},

 64 {(5.00,6.00),[(5.00,6.00)]}]

 65

 66  7 clusters:

 67 [

 68 {(1.50,5.50),[(2.00,6.00), (1.00,5.00)]},

 69 {(3.33,3.00),[(3.00,2.00), (4.00,3.00), (3.00,4.00)]},

 70 {(1.00,2.00),[(1.00,1.00), (1.00,3.00)]},

 71 {(7.00,1.00),[(7.00,1.00)]},

 72 {(7.00,5.50),[(7.00,6.00), (7.00,5.00)]},

 73 {(6.00,3.50),[(6.00,3.00), (6.00,4.00)]},

 74 {(5.00,6.00),[(5.00,6.00)]}]

 75

 76  6 clusters:

 77 [

 78 {(1.50,5.50),[(2.00,6.00), (1.00,5.00)]},

 79 {(3.33,3.00),[(3.00,2.00), (4.00,3.00), (3.00,4.00)]},

 80 {(1.00,2.00),[(1.00,1.00), (1.00,3.00)]},

 81 {(6.33,5.67),[(7.00,6.00), (7.00,5.00), (5.00,6.00)]},

 82 {(7.00,1.00),[(7.00,1.00)]},

 83 {(6.00,3.50),[(6.00,3.00), (6.00,4.00)]}]

 84

 85  5 clusters:

 86 [

 87 {(6.20,4.80),[(6.00,3.00), (7.00,6.00), (7.00,5.00), (6.00,4.00), (5.00,6.00)]},

 88 {(1.50,5.50),[(2.00,6.00), (1.00,5.00)]},

 89 {(3.33,3.00),[(3.00,2.00), (4.00,3.00), (3.00,4.00)]},

 90 {(1.00,2.00),[(1.00,1.00), (1.00,3.00)]},

 91 {(7.00,1.00),[(7.00,1.00)]}]

 92

 93  4 clusters:

 94 [

 95 {(6.20,4.80),[(6.00,3.00), (7.00,6.00), (7.00,5.00), (6.00,4.00), (5.00,6.00)]},

 96 {(1.50,5.50),[(2.00,6.00), (1.00,5.00)]},

 97 {(7.00,1.00),[(7.00,1.00)]},

 98 {(2.40,2.60),[(1.00,1.00), (3.00,2.00), (4.00,3.00), (3.00,4.00), (1.00,3.00)]}]

 99

100  3 clusters:

101 [

102 {(6.20,4.80),[(6.00,3.00), (7.00,6.00), (7.00,5.00), (6.00,4.00), (5.00,6.00)]},

103 {(7.00,1.00),[(7.00,1.00)]},

104 {(2.14,3.43),[(1.00,1.00), (2.00,6.00), (3.00,2.00), (4.00,3.00), (3.00,4.00), (1.00,3.00), (1.00,5.00)]}]

weka实现

 1 import java.util.ArrayList;

 2 import weka.clusterers.HierarchicalClusterer;

 3 import static weka.clusterers.HierarchicalClusterer.TAGS_LINK_TYPE;

 4 import weka.core.Attribute;

 5 import weka.core.Instance;

 6 import weka.core.Instances;

 7 import weka.core.SelectedTag;

 8 import weka.core.SparseInstance;

 9

10 public class WekaHierarchicalClustering {

11     private static final double[][] DATA = {{1,1}, {1,3}, {1,5}, {2,6}, {3,2},

12         {3,4}, {4,3}, {5,6}, {6,3}, {6,4}, {7,1}, {7,5}, {7,6}};

13     private static final int M = DATA.length;  // number of points

14     private static final int K = 3;            // number of clusters

15

16     public static void main(String[] args) {

17         Instances dataset = load(DATA);

18         HierarchicalClusterer hc = new HierarchicalClusterer();

19         hc.setLinkType(new SelectedTag(4, TAGS_LINK_TYPE));  // CENTROID

20         hc.setNumClusters(3);

21         try {

22             hc.buildClusterer(dataset);

23             for (Instance instance : dataset) {

24                 System.out.printf("(%.0f,%.0f): %s%n",

25                         instance.value(0), instance.value(1),

26                         hc.clusterInstance(instance));

27             }

28         } catch (Exception e) {

29             System.err.println(e);

30         }

31     }

32

33     private static Instances load(double[][] data) {

34         ArrayList<Attribute> attributes = new ArrayList<Attribute>();

35         attributes.add(new Attribute("X"));

36         attributes.add(new Attribute("Y"));

37         Instances dataset = new Instances("Dataset", attributes, M);

38         for (double[] datum : data) {

39             Instance instance = new SparseInstance(2);

40             instance.setValue(0, datum[0]);

41             instance.setValue(1, datum[1]);

42             dataset.add(instance);

43         }

44         return dataset;

45     }

46 }

结果-->

(1,1): 0

(1,3): 0

(1,5): 0

(2,6): 0

(3,2): 0

(3,4): 0

(4,3): 0

(5,6): 1

(6,3): 1

(6,4): 1

(7,1): 2

(7,5): 1

(7,6): 1

weka画图

 1 import java.awt.BorderLayout;

 2 import java.awt.Container;

 3 import java.util.ArrayList;

 4 import javax.swing.JFrame;

 5 import weka.clusterers.HierarchicalClusterer;

 6 import static weka.clusterers.HierarchicalClusterer.TAGS_LINK_TYPE;

 7 import weka.core.Attribute;

 8 import weka.core.Instance;

 9 import weka.core.Instances;

10 import weka.core.SelectedTag;

11 import weka.core.SparseInstance;

12 import weka.gui.hierarchyvisualizer.HierarchyVisualizer;

13

14 public class WekaHierarchicalClustering2 {

15     private static final double[][] DATA = {{1,1}, {1,3}, {1,5}, {2,6}, {3,2},

16         {3,4}, {4,3}, {5,6}, {6,3}, {6,4}, {7,1}, {7,5}, {7,6}};

17     private static final int M = DATA.length;  // number of points

18     private static final int K = 3;            // number of clusters

19

20     public static void main(String[] args) {

21         Instances dataset = load(DATA);

22         HierarchicalClusterer hc = new HierarchicalClusterer();

23         hc.setLinkType(new SelectedTag(4, TAGS_LINK_TYPE));  // CENTROID

24         hc.setNumClusters(1);

25         try {

26             hc.buildClusterer(dataset);

27             for (Instance instance : dataset) {

28                 System.out.printf("(%.0f,%.0f): %s%n",

29                         instance.value(0), instance.value(1),

30                         hc.clusterInstance(instance));

31             }

32             displayDendrogram(hc.graph());

33         } catch (Exception e) {

34             System.err.println(e);

35         }

36     }

37

38     private static Instances load(double[][] data) {

39         ArrayList<Attribute> attributes = new ArrayList<Attribute>();

40         attributes.add(new Attribute("X"));

41         attributes.add(new Attribute("Y"));

42         Instances dataset = new Instances("Dataset", attributes, M);

43         for (double[] datum : data) {

44             Instance instance = new SparseInstance(2);

45             instance.setValue(0, datum[0]);

46             instance.setValue(1, datum[1]);

47             dataset.add(instance);

48         }

49         return dataset;

50     }

51

52     public static void displayDendrogram(String graph) {

53         JFrame frame = new JFrame("Dendrogram");

54         frame.setSize(500, 400);

55         frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);

56         Container pane = frame.getContentPane();

57         pane.setLayout(new BorderLayout());

58         pane.add(new HierarchyVisualizer(graph));

59         frame.setVisible(true);

60     }

61 }

K-均值聚类

需求：同上
实现：从数据集中选k个点创建k个聚类，其余点添加到最近的聚类中，重新计算中心

KMeans.java（普通实现）

 1 import java.util.HashSet;

 2 import java.util.Random;

 3 import java.util.Set;

 4

 5 public class KMeans {

 6     private static final double[][] DATA = {{1,1}, {1,3}, {1,5}, {2,6}, {3,2},

 7             {3,4}, {4,3}, {5,6}, {6,3}, {6,4}, {7,1}, {7,5}, {7,6}};

 8     private static final int M = DATA.length;

 9     private static final int K = 3;

10     private static HashSet<Point> points;

11     private static HashSet<Cluster> clusters = new HashSet();

12     private static Random RANDOM = new Random();

13

14     public static void main(String[] args){

15         points = load(DATA);

16

17         int i0 = RANDOM.nextInt(M);

18         Point p = new Point(DATA[i0][0],DATA[i0][1]);

19         points.remove(p);

20

21         HashSet<Point> initSet = new HashSet();

22         initSet.add(p);

23

24         for(int i = 1; i < K; i ++){

25             p = farthestFrom(initSet);

26             initSet.add(p);

27             points.remove(p);

28         }

29

30         for(Point point:initSet){

31             Cluster cluster = new Cluster(point);

32             clusters.add(cluster);

33         }

34

35         for(Point point:points){

36             Cluster cluster = closestTo(point);

37             cluster.add(point);

38             cluster.recomputeCentroid();

39         }

40         System.out.println(clusters);

41     }

42

43     private static HashSet<Point> load(double[][] data) {

44         HashSet<Point> points = new HashSet();

45         for (double[] datum : DATA) {

46             points.add(new Point(datum[0], datum[1]));

47         }

48         return points;

49     }

50

51     // return the cluster whose centroid is closet to the specified point

52     private static Cluster closestTo(Point point){

53         double minDist = Double.POSITIVE_INFINITY;

54         Cluster c = null;

55         for(Cluster cluster:clusters){

56             double d = distance2(cluster.getCentroid(),point);

57             if(d < minDist){

58                 minDist = d;

59                 c = cluster;

60             }

61         }

62         return c;

63     }

64

65     // return the point that is farthest from the specified set

66     private static Point farthestFrom(Set<Point> set){

67         Point p = null;

68         double maxDist = 0.0;

69         for(Point point:points){

70             if(set.contains(point)){

71                 continue;

72             }

73             double d = dist(point,set);

74             if(d > maxDist){

75                 p = point;

76                 maxDist = d;

77             }

78         }

79         return p;

80     }

81

82     // return the distance from p to the nearest point in the set

83     public static double dist(Point p, Set<Point> set){

84         double minDist = Double.POSITIVE_INFINITY;

85         for(Point point:set){

86             double d = distance2(p,point);

87             minDist = (d < minDist ? d : minDist);

88         }

89         return minDist;

90     }

91

92     public static double distance2(Point p, Point q){

93         double dx = p.getX() - q.getX();

94         double dy = p.getY() - q.getY();

95         return dx*dx + dy*dy;

96     }

97 }

[{(2.40,2.60),[(1.00,1.00), (1.00,3.00), (3.00,2.00), (4.00,3.00), (3.00,4.00)]},
{(6.33,4.17),[(6.00,3.00), (7.00,6.00), (6.00,4.00), (7.00,5.00), (7.00,1.00), (5.00,6.00)]},
{(1.50,5.50),[(2.00,6.00), (1.00,5.00)]}]

KMeans.java（Weka 实现）

 1 import java.util.ArrayList;

 2 import weka.clusterers.SimpleKMeans;

 3 import weka.core.Attribute;

 4 import weka.core.Instance;

 5 import weka.core.Instances;

 6 import weka.core.SparseInstance;

 7

 8 public class KMeans {

 9     private static final double[][] DATA = {{1,1}, {1,3}, {1,5}, {2,6}, {3,2},

10         {3,4}, {4,3}, {5,6}, {6,3}, {6,4}, {7,1}, {7,5}, {7,6}};

11     private static final int M = DATA.length;  // number of points

12     private static final int K = 3;            // number of clusters

13

14     public static void main(String[] args) {

15         Instances dataset = load(DATA);

16         SimpleKMeans skm = new SimpleKMeans();

17         System.out.printf("%d clusters:%n", K);

18         try {

19             skm.setNumClusters(K);

20             skm.buildClusterer(dataset);

21             for (Instance instance : dataset) {

22                 System.out.printf("(%.0f,%.0f): %s%n",

23                         instance.value(0), instance.value(1),

24                         skm.clusterInstance(instance));

25             }

26         } catch (Exception e) {

27             System.err.println(e);

28         }

29     }

30

31     private static Instances load(double[][] data) {

32         ArrayList<Attribute> attributes = new ArrayList<Attribute>();

33         attributes.add(new Attribute("X"));

34         attributes.add(new Attribute("Y"));

35         Instances dataset = new Instances("Dataset", attributes, M);

36         for (double[] datum : data) {

37             Instance instance = new SparseInstance(2);

38             instance.setValue(0, datum[0]);

39             instance.setValue(1, datum[1]);

40             dataset.add(instance);

41         }

42         return dataset;

43     }

44 }

结果-->

(1,1): 1

(1,3): 1

(1,5): 0

(2,6): 0

(3,2): 1

(3,4): 0

(4,3): 0

(5,6): 0

(6,3): 2

(6,4): 2

(7,1): 2

(7,5): 2

(7,6): 2

KMeansPlusPlus.java（Apache Common Math 实现）

 1 import java.util.ArrayList;

 2 import java.util.List;

 3 import org.apache.commons.math3.ml.clustering.CentroidCluster;

 4 import org.apache.commons.math3.ml.clustering.DoublePoint;

 5 import org.apache.commons.math3.ml.clustering.KMeansPlusPlusClusterer;

 6 import org.apache.commons.math3.ml.distance.EuclideanDistance;

 7

 8 public class KMeansPlusPlus {

 9     private static final double[][] DATA = {{1,1}, {1,3}, {1,5}, {2,6}, {3,2},

10         {3,4}, {4,3}, {5,6}, {6,3}, {6,4}, {7,1}, {7,5}, {7,6}};

11     private static final int M = DATA.length;  // number of points

12     private static final int K = 3;  // number of clusters

13     private static final int MAX = 100;  // maximum number of iterations

14     private static final EuclideanDistance ED = new EuclideanDistance();

15

16     public static void main(String[] args) {

17         List<DoublePoint> points = load(DATA);

18         KMeansPlusPlusClusterer<DoublePoint> clusterer;

19         clusterer = new KMeansPlusPlusClusterer(K, MAX, ED);

20         List<CentroidCluster<DoublePoint>> clusters = clusterer.cluster(points);

21

22         for (CentroidCluster<DoublePoint> cluster : clusters) {

23             System.out.println(cluster.getPoints());

24         }

25     }

26

27     private static List<DoublePoint> load(double[][] data) {

28         List<DoublePoint> points = new ArrayList(M);

29         for (double[] pair : data) {

30             points.add(new DoublePoint(pair));

31         }

32         return points;

33     }

34 }

[[5.0, 6.0], [6.0, 3.0], [6.0, 4.0], [7.0, 5.0], [7.0, 6.0]]
[[1.0, 1.0], [1.0, 3.0], [1.0, 5.0], [2.0, 6.0], [3.0, 2.0], [3.0, 4.0], [4.0, 3.0]]
[[7.0, 1.0]]

仿射传播聚类

需求：同上
实现：
特点：不同于KMeans，聚类个数k不需事先确定，

 1 public class AffinityPropagation {

 2     private static double[][] x = {{1,2}, {2,3}, {4,1}, {4,4}, {5,3}};

 3     private static int n = x.length;                 // number of points

 4     private static double[][] s = new double[n][n];  // similarities

 5     private static double[][] r = new double[n][n];  // responsibilities

 6     private static double[][] a = new double[n][n];  // availabilities

 7     private static final int ITERATIONS = 10;

 8     private static final double DAMPER = 0.5;

 9

10     public static void main(String[] args) {

11         initSimilarities();

12         for (int i = 0; i < ITERATIONS; i++) {

13             updateResponsibilities();

14             updateAvailabilities();

15         }

16         printResults();

17     }

18

19     private static void initSimilarities() {

20         double sum = 0;

21         for (int i = 0; i < n; i++) {

22             for (int j = 0; j < i; j++) {

23                 sum += s[i][j] = s[j][i] = negSqEuclidDist(x[i], x[j]);

24             }

25         }

26         double average = 2*sum/(n*n - n);  // average of s[i][j] for j < i

27         for (int i = 0; i < n; i++) {

28             s[i][i] = average;

29         }

30     }

31

32     private static void updateResponsibilities() {

33         for (int i = 0; i < n; i++) {

34             for (int k = 0; k < n; k++) {

35                 double oldValue = r[i][k];

36                 double max = Double.NEGATIVE_INFINITY;

37                 for (int j = 0; j < n; j++) {

38                     if (j != k) {

39                         max = Math.max(max, a[i][j] + s[i][j]);

40                     }

41                 }

42                 double newValue = s[i][k] - max;

43                 r[i][k] = DAMPER*oldValue + (1 - DAMPER)*newValue;

44             }

45         }

46     }

47

48     private static void updateAvailabilities() {

49         for (int i = 0; i < n; i++) {

50             for (int k = 0; k < n; k++) {

51                 double oldValue = a[i][k];

52                 double newValue = Math.min(0, r[k][k] + sumOfPos(i,k));

53                 if (k == i) {

54                     newValue = sumOfPos(k,k);

55                 }

56                 a[i][k] = DAMPER*oldValue + (1 - DAMPER)*newValue;

57             }

58         }

59     }

60

61     /*  Returns the negative square of the Euclidean distance from x to y.

62     */

63     private static double negSqEuclidDist(double[] x, double[] y) {

64         double d0 = x[0] - y[0];

65         double d1 = x[1] - y[1];

66         return -(d0*d0 + d1*d1);

67     }

68

69     /*  Returns the sum of the positive r[j][k] excluding r[i][k] and r[k][k].

70     */

71     private static double sumOfPos(int i, int k) {

72         double sum = 0;

73         for (int j = 0; j < n; j++) {

74             if (j != i && j != k) {

75                 sum += Math.max(0, r[j][k]);

76             }

77         }

78         return sum;

79     }

80

81     private static void printResults() {

82         for (int i = 0; i < n; i++) {

83             double max = a[i][0] + r[i][0];

84             int k = 0;

85             for (int j = 1; j < n; j++) {

86                 double arij = a[i][j] + r[i][j];

87                 if (arij > max) {

88                     max = arij;

89                     k = j;

90                 }

91             }

92             System.out.printf("point %d has exemplar point %d%n", i, k);

93         }

94     }

95 }

point 0 has exemplar point 1
point 1 has exemplar point 1
point 2 has exemplar point 4
point 3 has exemplar point 4
point 4 has exemplar point 4

参考

https://blog.csdn.net/xzfreewind/article/details/73770327

巴特西

[Java]数据分析--聚类

最新文章

热门文章