KNN算法的实现 - 悲催的科学匠人 - 冷水's blog
KNN算法的实现
具体描述见machine learning in action
knn.h
/* knn.c */ struct KNN *KNNInit(int nvectors, int dim, int ngroup, int *ierr); void KNNFree(struct KNN *knn, int *ierr); double *KNNGetdata(struct KNN *knn, int ivec, int *ierr); double KNNGetCoord(struct KNN *knn, int ivec, int icoord, int *ierr); void KNNSetGroup(struct KNN *knn, int ivec, int group, int *ierr); void KNNSetDataGroup(struct KNN *knn, int ivec, double *data, int group, int *ierr); void KNNCalcScale(struct KNN *knn, int *ierr); double KNNCalcDistance(struct KNN *knn, int ivec, double *mydata, int *ierr); int KNNCalcGroup(struct KNN *knn, double *mydata, int k, int *ierr);
knn.c
#include <stdlib.h> #include <math.h> #include <stdlib.h> struct KNN{ int nvec; int dim; double *data; double *scale; int ngroups; int *group; /* group ID (0,1,..., ngroups-1) for each vec*/ }; inline int KNNGetDataOffset(struct KNN *knn, int ivec) { return ivec*knn->dim; } inline int KNNCheckIvec(struct KNN* knn, int ivec) { if( ivec<0 || 1+ivec>knn->nvec) return 1; else return 0; } struct KNN* KNNInit(int nvectors, int dim, int ngroup, int *ierr) { double *data, *scale; int *group, i; if(nvectors<1) {*ierr = 1; puts("KNNInit, error, nvectors<1"); return NULL; } if(dim<1) {*ierr= 2; puts("KNNInit, error, dim<1"); return NULL; } if(ngroup<2) {*ierr=4; puts("KNNInit, error, ngroup<2"); return; } data = (double*) calloc(nvectors*dim, sizeof(double)); if (data==NULL) {*ierr = 5; puts("KNNInit, error, calloc knn->data fail"); return; } group = (int*) calloc(nvectors, sizeof(int)); if(group==NULL) {*ierr=6; puts("KNNInit, error, calloc knn->group fail"); return;} for(i=0;i<nvectors;i++) group[i] = -1; scale = (double*) calloc(dim, sizeof(double)); if(scale==NULL) {*ierr=7; puts("KNNInit, error, calloc knn->scale fail"); return;} struct KNN* knn = (struct KNN*)calloc(1,sizeof(struct KNN)); if(knn==NULL) {*ierr=999; puts("KNNInit, error, calloc KNN fail"); return NULL;} knn->data = data; knn->group = group; knn->scale = scale; knn->nvec= nvectors; knn->dim = dim; knn->ngroups = ngroup; ierr = 0; return knn; } void KNNFree(struct KNN* knn, int* ierr) { if(knn==NULL) return; if(knn->data) free(knn->data); if(knn->group) free(knn->group); free(knn); } double* KNNGetdata(struct KNN *knn, int ivec, int *ierr) { *ierr = KNNCheckIvec(knn,ivec); if(*ierr) {puts("KNNGetData, irror, Wrong ivec"); return;} return knn->data + KNNGetDataOffset(knn, ivec); } double KNNGetCoord(struct KNN* knn, int ivec, int icoord, int* ierr) { double *data; data = KNNGetdata(knn, ivec, ierr); if(*ierr) return 0; if(icoord<0 || icoord>=knn->dim) { puts("KNNGetCoord, error, wrong icoord"); *ierr= 2; return 0; } *ierr = 0; return data[icoord]; } void KNNSetGroup(struct KNN* knn, int ivec, int group, int* ierr) { *ierr = KNNCheckIvec(knn,ivec); if(*ierr) {puts("KNNSetGroup, error, Wrong ivec"); return;} *ierr = group<0 || group>=knn->ngroups; if(*ierr) {puts("KNNSetGroup, error, Wrong group ID (should be [0,..,ngroups-1])"); return;} knn->group[ivec] = group; *ierr = 0; return; } void KNNSetDataGroup(struct KNN *knn,int ivec, double *data, int group, int* ierr) { int i; double *knndata = KNNGetdata(knn, ivec, ierr); if(*ierr) return; for(i=0; i<knn->dim; i++) knndata[i] = data[i]; KNNSetGroup(knn, ivec, group, ierr); if(*ierr) return; *ierr = 0; return; } static int KNNIsDataFilled(struct KNN* knn) { int i; for(i=0; i<knn->nvec; i++) if(knn->group[i]==-1) return 0; return 1; } void KNNCalcScale(struct KNN* knn, int *ierr) { int i,d; if(!KNNIsDataFilled(knn)) { puts("KNNCalcScale, error, data is not filled"); return; } for(d=0;d<knn->dim;d++){ double xmax,xmin; xmin = KNNGetCoord(knn, 0, d, ierr); xmax = xmin; for(i=1; i<knn->nvec; i++){ double f; f = KNNGetCoord(knn, i, d, ierr); if(xmax<f) xmax = f; if(xmin>f) xmin = f; } if(xmax!=xmin) knn->scale[d] = 1./((xmax-xmin)*(xmax-xmin)); else knn->scale[d] = 1.0; } } double KNNCalcDistance(struct KNN *knn, int ivec, double *mydata, int *ierr) { double *data=NULL, dist=0.0; int i; data = KNNGetdata(knn, ivec, ierr); if(*ierr) {puts("KNNCalcDistance, error, KNNGetData fail"); return;} for(i=0;i<knn->dim;i++){ dist += (mydata[i]-data[i]) * (mydata[i]-data[i]) * knn->scale[i]; } *ierr = 0; return sqrt(dist); } static void make_list(double *dlist, int* idlist, int n, int cid, double cdist) { double dmax; int imax, i; dmax = dlist[0]; imax=0; for(i=1;i<n;i++){ if(dlist[i]>dmax) {dmax=dlist[i]; imax=i;} } if(cdist>dmax) return; else{ dlist[imax] = cdist; idlist[imax]= cid; } } static int vote(struct KNN* knn, int *groups, int k) { int *votes; int i, max=0; votes = (int*) calloc(knn->ngroups, sizeof(int)); for(i=0;i<knn->ngroups;i++) votes[i]=0; for(i=0; i<k; i++) votes[ groups[i] ] ++; for(i=1; i<knn->ngroups; i++) { if(votes[i]>votes[max]) max=i; } return max; } int KNNCalcGroup(struct KNN* knn, double *mydata, int k, int* ierr) { double* dists; int* vecid; int n; double dmax,dmin; int imax,imin; dists = (double*) calloc(k, sizeof(double)); if(dists==NULL) {*ierr=-1; puts("KNNCalcGroup, error, cannot allocate space for dists"); return;} vecid = (int*) calloc(k, sizeof(int)); if(vecid==NULL) {*ierr=-1; puts("KNNCalcGroup, error, cannot allocate space for vecid"); return;} for(n=0;n<k;n++){ dists[n] = -1.; vecid[n] = -1; } for(n=0;n<knn->nvec;n++){ double cdist; cdist = KNNCalcDistance(knn, n, mydata, ierr); if(n==0){ int l; for(l=0;l<k;l++) { dists[l] = cdist; dmax=cdist;dmin=cdist; vecid[l] = n; imax=n; imin=n; } } else{ make_list(dists, vecid, k, n, cdist); } } free(dists); for(n=0;n<k;n++) vecid[n] = knn->group[vecid[n]]; n = vote(knn, vecid,k); free(vecid); return n; }
测试手写数字识别的程序 digits.c
#include <stdio.h> #include "knn.h" #define NVEC 1934 #define DIM 1024 #define NGROUP 10 #define K 3 void InputData(struct KNN *knn) { FILE* fp; int id=0, i, ndata[10]={188, 197, 194, 198, 185, 186, 194, 200, 179, 203}; double mydata[DIM]; int g, n, ierr; char fname[32]; for(g=0;g<10;g++) for(n=0;n<=ndata[g];n++){ double mydata[1024]; sprintf(fname,"trainingDigits/%d_%d.txt", g,n); //puts(fname); fp = fopen(fname,"r"); for(i=0;i<1024;i++){ int d; fscanf(fp,"%1d",&d); mydata[i] = d; } KNNSetDataGroup(knn, id, mydata, g, &ierr); if(ierr!=0) puts("ERROR!!"); id ++; fclose(fp); } printf("Read %d training data\n",id); } void Test(struct KNN *knn) { FILE* fp; int id=0, i, ndata[10]={86, 96, 91, 84, 99, 99, 86, 95, 90, 88}; double mydata[DIM]; int g, n, ierr,score; char fname[32]; for(g=0;g<10;g++) { score = 0; for(n=0;n<=ndata[g];n++){ double mydata[1024]; int ans; sprintf(fname,"testDigits/%d_%d.txt", g,n); fp = fopen(fname,"r"); for(i=0;i<1024;i++){ int d; fscanf(fp,"%1d",&d); mydata[i] = d; } fclose(fp); ans = KNNCalcGroup(knn, mydata, K, &ierr); if(ans==g) score ++; else{ printf(" fail for %s\n",fname); } } printf("Digit %1d score=%3d/%3d = %6.2f%%\n", g, score, ndata[g]+1, 100.*score/(ndata[g]+1.)); } } int main(void) { int ierr; struct KNN* knn; knn = KNNInit(NVEC,DIM,NGROUP, &ierr); if(ierr) {puts("can not KNNInit"); return; } InputData(knn); KNNCalcScale(knn, &ierr); Test(knn); KNNFree(knn, &ierr); }
2021年8月29日 05:39
Hi there, I discovered your blog per Google bit searching for such kinda educational advise moreover your inform beholds very remarkable for me.
2021年9月24日 10:28
Excellent read, Positive site, where did u come up with the information on this posting?I have read a few of the articles on your website now, and I really like your style. Thanks a million and please keep up the effective work.
2022年9月10日 07:21
Since the Government of AP also provided Computer labs for students in all Government schools also.Computer Knowledge is very much essential nowadays. Even Checking the Results of various examinations, AP 10th computer Question Paper Downloading hall tickets and study materials for multiple exams and booking tickets etc ..need minimum Technical Knowledge is much more important for everyone. Since the Government of AP also provided Computer labs for students in all Government schools also.
2023年1月18日 23:57
The k-nearest neighbor (KNN) algorithm is a simple technique used for both classification and regression. In classification, the KNN algorithm is used to classify a new data point based on its CBD Edibles distance from existing data points. In regression, the KNN algorithm is used to predict a new data point based on its distance from existing data points.
2024年1月13日 03:29
The best article I came across a number of years, write something about it on this page