KNN(k-nearest neighbor的縮寫)最近鄰算法

KNN可以看成：有一些已知標(biāo)簽的數(shù)據(jù)，當(dāng)有新數(shù)據(jù)進(jìn)入時(shí)，計(jì)算該數(shù)據(jù)與已知數(shù)據(jù)最近的k個(gè)點(diǎn)的距離，從而推測(cè)出這個(gè)數(shù)據(jù)時(shí)什么類型

常用算法

歐拉算法

計(jì)算兩個(gè)點(diǎn)每個(gè)維度差平方，最后求平方根

image

多維度情況下

image

可以表示為，X右下標(biāo)表示維度

image

進(jìn)而

image
曼哈頓算法

image
明可夫斯基算法

image

通過曼哈頓和歐拉算法推倒而來，上面為曼哈頓，下面為歐拉

image

進(jìn)而

image

最終推導(dǎo)出最底部的明可夫斯基

image

代碼實(shí)現(xiàn)

#這里使用歐拉算法來實(shí)現(xiàn)
import numpy as np
from math import sqrt
from collections import Counter#用來統(tǒng)計(jì)數(shù)組中元素出現(xiàn)次數(shù)的類庫

#k為要計(jì)算的點(diǎn)的個(gè)數(shù)，X_train為訓(xùn)練數(shù)據(jù)，y_train為訓(xùn)練標(biāo)簽，x為要預(yù)測(cè)的數(shù)據(jù)
def kNN_classify(k, X_train, y_train, x):

    assert 1 <= k <= X_train.shape[0], "k must be valid"
    assert X_train.shape[0] == y_train.shape[0], \
        "the size of X_train must equal to the size of y_train"
    assert X_train.shape[1] == x.shape[0], \
        "the feature number of x must be equal to X_train"

    #關(guān)鍵代碼，將計(jì)算出的差值存放到distances
    distances = [sqrt(np.sum((x_train - x)**2)) for x_train in X_train]
    #對(duì)distances進(jìn)行索引排序
    nearest = np.argsort(distances)
    #獲取差距最小的k個(gè)元素的標(biāo)簽
    topK_y = [y_train[i] for i in nearest[:k]]
    #計(jì)算結(jié)果各元素出現(xiàn)的次數(shù)
    votes = Counter(topK_y)
    #取出出現(xiàn)元素次數(shù)最多的標(biāo)簽
    return votes.most_common(1)[0][0]

使用scikit-learn 中的 kNN

#導(dǎo)包
from sklearn.neighbors import KNeighborsClassifier

#創(chuàng)建scikit-learn的knn分類器，k值為6
kNN_classifier = KNeighborsClassifier(n_neighbors=6)
#訓(xùn)練
kNN_classifier.fit(X_train, y_train)
#轉(zhuǎn)換維度，變?yōu)閚*2的矩陣，2為維度，n為要預(yù)測(cè)數(shù)據(jù)的個(gè)數(shù)
X_predict = x.reshape(-1, 2)

#預(yù)測(cè)，返回值為預(yù)測(cè)結(jié)果數(shù)組
y_predict = kNN_classifier.predict(X_predict)
#獲取預(yù)測(cè)值
y_predict[0]

案例鳶尾花識(shí)別

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets 
#獲取鳶尾花數(shù)據(jù)
iris = datasets.load_iris()
#iris.keys()#查看數(shù)據(jù)格式

#獲取X的數(shù)據(jù)
X = iris.data
#獲取標(biāo)簽
y = iris.target

#以下是分割數(shù)據(jù)代碼

#打亂數(shù)據(jù)，得到打亂后的索引
shuffled_indexes = np.random.permutation(len(X))
#劃分?jǐn)?shù)據(jù)，百分之20的是測(cè)試數(shù)據(jù)
test_ratio = 0.2
#獲取測(cè)試數(shù)據(jù)大小
test_size = int(len(X) * test_ratio)
#獲取測(cè)試數(shù)據(jù)索引
test_indexes = shuffled_indexes[:test_size]
#獲取訓(xùn)練數(shù)據(jù)索引
train_indexes = shuffled_indexes[test_size:]
#獲取所有的訓(xùn)練數(shù)據(jù)
X_train = X[train_indexes]
#獲取所有的訓(xùn)練標(biāo)簽
y_train = y[train_indexes]
#獲取所有的測(cè)試數(shù)據(jù)
X_test = X[test_indexes]
#獲取所有的測(cè)試數(shù)據(jù)
y_test = y[test_indexes]

#分割數(shù)據(jù)結(jié)束

#以下使用sklearn的分割方法
#導(dǎo)報(bào)
from sklearn.model_selection import train_test_split

#test_size分割大小，random_state隨機(jī)值不置則每次隨機(jī)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=666)

#使用數(shù)據(jù)
#導(dǎo)包
from sklearn.neighbors import KNeighborsClassifier

#創(chuàng)建scikit-learn的knn分類器，k值為6
kNN_classifier = KNeighborsClassifier(n_neighbors=6)
#訓(xùn)練
kNN_classifier.fit(X_train, y_train)
#轉(zhuǎn)換維度，變?yōu)閚*4的矩陣，4為維度，n為要預(yù)測(cè)數(shù)據(jù)的個(gè)數(shù)
X_predict = x.reshape(-1, 2)

#預(yù)測(cè)，返回值為預(yù)測(cè)結(jié)果數(shù)組
y_predict = kNN_classifier.predict(X_test)
#簡(jiǎn)單對(duì)預(yù)測(cè)進(jìn)行評(píng)分 
sum(y_predict == y_test) / len(y_test)

案例2手寫數(shù)字識(shí)別

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets

#導(dǎo)入手寫數(shù)字庫
digits = datasets.load_digits()
X = digits.data
y = digits.target


from sklearn.model_selection import train_test_split

#分割數(shù)據(jù)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=666)

from sklearn.neighbors import KNeighborsClassifier

#創(chuàng)建k為3的模型
knn_clf = KNeighborsClassifier(n_neighbors=3)
#擬合
knn_clf.fit(X_train, y_train)
#預(yù)測(cè)
y_predict = knn_clf.predict(X_test)
#導(dǎo)入評(píng)分包
from sklearn.metrics import accuracy_score
#使用評(píng)分包的評(píng)分
accuracy_score(y_test, y_predict)
#使用knn自帶評(píng)分
knn_clf.score(X_test, y_test)

色偷偷精品伊人,欧洲久久精品,欧美综合婷婷骚逼,国产AV主播,国产最新探花在线,九色在线视频一区,伊人大交九欧美,1769亚洲,黄色成人av

KNN最近鄰算法

KNN最近鄰算法

KNN(k-nearest neighbor的縮寫)最近鄰算法

常用算法

代碼實(shí)現(xiàn)

使用scikit-learn 中的 kNN

案例鳶尾花識(shí)別

案例2手寫數(shù)字識(shí)別

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容

色偷偷精品伊人,欧洲久久精品,欧美综合婷婷骚逼,国产AV主播,国产最新探花在线,九色在线视频一区,伊人大交九 欧美,1769亚洲,黄色成人av

KNN最近鄰算法

KNN(k-nearest neighbor的縮寫)最近鄰算法

常用算法

代碼實(shí)現(xiàn)

使用scikit-learn 中的 kNN

案例鳶尾花識(shí)別

案例2手寫數(shù)字識(shí)別

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容

色偷偷精品伊人,欧洲久久精品,欧美综合婷婷骚逼,国产AV主播,国产最新探花在线,九色在线视频一区,伊人大交九欧美,1769亚洲,黄色成人av