iris:

# -*- coding: utf-8 -*-
# K-means with TensorFlow
#----------------------------------
#
# This script shows how to do k-means with TensorFlow import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn import datasets
from scipy.spatial import cKDTree
from sklearn.decomposition import PCA
from sklearn.preprocessing import scale
from tensorflow.python.framework import ops
ops.reset_default_graph() sess = tf.Session() iris = datasets.load_iris() num_pts = len(iris.data)
num_feats = len(iris.data[0]) # Set k-means parameters
# There are 3 types of iris flowers, see if we can predict them
k=3
generations = 25 data_points = tf.Variable(iris.data)
cluster_labels = tf.Variable(tf.zeros([num_pts], dtype=tf.int64)) # Randomly choose starting points
rand_starts = np.array([iris.data[np.random.choice(len(iris.data))] for _ in range(k)]) centroids = tf.Variable(rand_starts) # In order to calculate the distance between every data point and every centroid, we
# repeat the centroids into a (num_points) by k matrix.
centroid_matrix = tf.reshape(tf.tile(centroids, [num_pts, 1]), [num_pts, k, num_feats])
# Then we reshape the data points into k (3) repeats
point_matrix = tf.reshape(tf.tile(data_points, [1, k]), [num_pts, k, num_feats])
distances = tf.reduce_sum(tf.square(point_matrix - centroid_matrix), axis=2) #Find the group it belongs to with tf.argmin()
centroid_group = tf.argmin(distances, 1) # Find the group average
def data_group_avg(group_ids, data):
# Sum each group
sum_total = tf.unsorted_segment_sum(data, group_ids, 3)
# Count each group
num_total = tf.unsorted_segment_sum(tf.ones_like(data), group_ids, 3)
# Calculate average
avg_by_group = sum_total/num_total
return(avg_by_group) means = data_group_avg(centroid_group, data_points) update = tf.group(centroids.assign(means), cluster_labels.assign(centroid_group)) init = tf.global_variables_initializer() sess.run(init) for i in range(generations):
print('Calculating gen {}, out of {}.'.format(i, generations))
_, centroid_group_count = sess.run([update, centroid_group])
group_count = []
for ix in range(k):
group_count.append(np.sum(centroid_group_count==ix))
print('Group counts: {}'.format(group_count)) [centers, assignments] = sess.run([centroids, cluster_labels]) # Find which group assignments correspond to which group labels
# First, need a most common element function
def most_common(my_list):
return(max(set(my_list), key=my_list.count)) label0 = most_common(list(assignments[0:50]))
label1 = most_common(list(assignments[50:100]))
label2 = most_common(list(assignments[100:150])) group0_count = np.sum(assignments[0:50]==label0)
group1_count = np.sum(assignments[50:100]==label1)
group2_count = np.sum(assignments[100:150]==label2) accuracy = (group0_count + group1_count + group2_count)/150. print('Accuracy: {:.2}'.format(accuracy)) # Also plot the output
# First use PCA to transform the 4-dimensional data into 2-dimensions
pca_model = PCA(n_components=2)
reduced_data = pca_model.fit_transform(iris.data)
# Transform centers
reduced_centers = pca_model.transform(centers) # Step size of mesh for plotting
h = .02 # Plot the decision boundary. For that, we will assign a color to each
x_min, x_max = reduced_data[:, 0].min() - 1, reduced_data[:, 0].max() + 1
y_min, y_max = reduced_data[:, 1].min() - 1, reduced_data[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) # Get k-means classifications for the grid points
xx_pt = list(xx.ravel())
yy_pt = list(yy.ravel())
xy_pts = np.array([[x,y] for x,y in zip(xx_pt, yy_pt)])
mytree = cKDTree(reduced_centers)
dist, indexes = mytree.query(xy_pts) # Put the result into a color plot
indexes = indexes.reshape(xx.shape)
plt.figure(1)
plt.clf()
plt.imshow(indexes, interpolation='nearest',
extent=(xx.min(), xx.max(), yy.min(), yy.max()),
cmap=plt.cm.Paired,
aspect='auto', origin='lower') # Plot each of the true iris data groups
symbols = ['o', '^', 'D']
label_name = ['Setosa', 'Versicolour', 'Virginica']
for i in range(3):
temp_group = reduced_data[(i*50):(50)*(i+1)]
plt.plot(temp_group[:, 0], temp_group[:, 1], symbols[i], markersize=10, label=label_name[i])
# Plot the centroids as a white X
plt.scatter(reduced_centers[:, 0], reduced_centers[:, 1],
marker='x', s=169, linewidths=3,
color='w', zorder=10)
plt.title('K-means clustering on Iris Dataset\n'
'Centroids are marked with white cross')
plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)
plt.legend(loc='lower right')
plt.show()

最新文章

  1. 《Ansible权威指南》笔记(1)——安装,ssh密钥登陆,命令
  2. 【BZOJ-2476】战场的数目 矩阵乘法 + 递推
  3. SSH与Webservice整合记录
  4. 如何 在远程虚拟机 里 破解 最新版 SQL Prompt
  5. Ubuntu14.04安装redis和简单配置
  6. String系列
  7. Codeforces Round #100(140~~)
  8. JSP导出Excel文件
  9. XJOI1559树转二叉树
  10. java编写之jpg图片与base64编码之间的转换
  11. [Luogu4705] 玩游戏
  12. debian-pve-ceph
  13. C#导入c++ dll报找不到dll文件 masm32调用c++类库
  14. php使用curl模拟登录带验证码的网站[开发篇]
  15. linq使用Take和Skip实现分页
  16. spoon 更新数据
  17. MySQL主从数据库的安装
  18. LG3812 【模板】线性基
  19. Day 9 作业题(完成)
  20. BZOJ5312: 冒险【线段树】【位运算】

热门文章

  1. 将Cocos2d-x游戏打包成Android应用程序
  2. CentOS6 图形界面(gnome)安装(转)
  3. windows平台下为Nginx反向代理(负载均衡)使用openssl增加HTTPS/SSL功能。
  4. Maven自动生成web.xml配置文件
  5. IOS研究之网络编程(二)-Cocoa Streams使用具体解释
  6. 时间控件(DateTime Picker)
  7. zabbix-agent active 配置自动探测
  8. 多媒体开发之---h264 高度和宽度获取
  9. Grunt 学习笔记【1】----基础知识
  10. 【题解】CF45G Prime Problem