#!/usr/bin/python # -*- coding: utf-8 -*- from sklearn.neighbors import KNeighborsClassifier from sklearn.metrics import confusion_matrix from numpy import genfromtxt,asarray from imageio import imread # Trainingdaten einlesen trainingdata = genfromtxt('C:/temp/digits/digitsdata.csv', delimiter=',') # Testdaten einlesen testdata = genfromtxt('C:/temp/digits/digitsdatatest.csv', delimiter=',') # Trainingsfeatures und -klassen trennen. # Kolonne 0:255: Features, Kolonne 256 Klasse # Fuer Ekrlaerung der Syntax siehe: https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.indexing.html train_digits_features = trainingdata[:, :-1] train_digits_class = trainingdata[:, -1] # Testfeatures und -klassen trennen. test_digits_features = testdata[:, :-1] test_digits_class = testdata[:, -1] # kNN initilisieren mit 5 Nachbarn knn = KNeighborsClassifier(n_neighbors=5) # kNN traininieren knn.fit(train_digits_features, train_digits_class) # Vorhersage predicted_class = knn.predict(test_digits_features) # Wahre Test digits ausgeben print(test_digits_class) # Predicted Test digits ausgeben print(predicted_class) ## Vergleichen und Klassifikationsguete berechnen. ## Differenz muss = 0 sein wenn richtig klassifziert. Boolsche Variablen koennen summiert werden. print sum(predicted_class - test_digits_class == 0) / float(len(predicted_class)) ## Konfusionsmatrix berechnen mat_knn = confusion_matrix(test_digits_class, predicted_class) print(mat_knn) # Aquivalent der Funktion getPixelListFromFilePath def getPixelArrayFromFilePath(filepath): img = imread(filepath) return(asarray(((img.flatten()/float(255)*2-1).reshape(1,-1)))) getPixelArrayFromFilePath("C:/temp/digits/test/testimg_181710187299_0_.gif") print(knn.predict(getPixelArrayFromFilePath("C:/temp/digits/test/testimg_181710187299_0_.gif")))