DBSCAN 的聚类类簇数k是自适应的。 太忙了没工夫写文字了。
from sklearn import datasets
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN
X1, y1 = datasets.make_circles(n_samples=5000, factor=.6, noise=.05)
X2, y2 = datasets.make_blobs(n_samples=1000, n_features=2, centers=[[1.2,1.2]], cluster_std=[[.1]], random_state=9)
X = np.concatenate((X1, X2))
print(X.shape)
# 绘制数据分布图-----------------------------
# plt.scatter(X[:, 0], X[:, 1], c="red", marker='o', label='see')
# plt.xlabel('petal length')
# plt.ylabel('petal width')
# plt.legend(loc=2)
# plt.show()
#---------------------------------------------
DB = DBSCAN(eps=0.1,min_samples=6).fit(X)
label_pred = DB.labels_
color = ['red','blue','green','grey','black']
marker = ['o','*','+']
for i in range(max(label_pred)+1):
cluster_i = X[label_pred == i]
print(len(cluster_i))
plt.scatter(cluster_i[:,0],cluster_i[:,1],c=color[i%5],marker=marker[i%3],label="class"+str(i+1)+'-'+str(len(cluster_i)))
plt.xlabel("XX")
plt.ylabel("YY")
plt.legend(loc=2)
plt.show()