KNN算法的实现 - 悲催的科学匠人 - 冷水's blog

KNN算法的实现

冷水 posted @ 2014年7月06日 19:22 in 未分类 with tags 机器学习 knn , 2307 阅读

具体描述见machine learning in action

knn.h

/* knn.c */
struct KNN *KNNInit(int nvectors, int dim, int ngroup, int *ierr);
void KNNFree(struct KNN *knn, int *ierr);
double *KNNGetdata(struct KNN *knn, int ivec, int *ierr);
double KNNGetCoord(struct KNN *knn, int ivec, int icoord, int *ierr);
void KNNSetGroup(struct KNN *knn, int ivec, int group, int *ierr);
void KNNSetDataGroup(struct KNN *knn, int ivec, double *data, int group, int *ierr);
void KNNCalcScale(struct KNN *knn, int *ierr);
double KNNCalcDistance(struct KNN *knn, int ivec, double *mydata, int *ierr);
int KNNCalcGroup(struct KNN *knn, double *mydata, int k, int *ierr);

knn.c

#include <stdlib.h>
#include <math.h>
#include <stdlib.h>

struct KNN{
  int nvec;
  int dim;
  double *data;
  double *scale;
  int ngroups;
  int *group;   /* group ID (0,1,..., ngroups-1) for each vec*/
};



inline int KNNGetDataOffset(struct KNN *knn, int ivec)
{
  return ivec*knn->dim;
}

inline int KNNCheckIvec(struct KNN* knn, int ivec)
{
    if( ivec<0 || 1+ivec>knn->nvec) 
      return 1;
    else
      return 0;
}


struct KNN*  KNNInit(int nvectors, int dim, int ngroup, int *ierr)
{
  double  *data, *scale;
  int *group,  i;

  if(nvectors<1) {*ierr = 1; puts("KNNInit, error, nvectors<1");  return NULL; }
  if(dim<1) {*ierr= 2; puts("KNNInit, error, dim<1"); return NULL; }
  if(ngroup<2) {*ierr=4; puts("KNNInit, error, ngroup<2"); return;  } 

  data = (double*) calloc(nvectors*dim, sizeof(double));
  if (data==NULL) {*ierr = 5; puts("KNNInit, error, calloc knn->data fail"); return; }

  group = (int*) calloc(nvectors, sizeof(int));
  if(group==NULL) {*ierr=6; puts("KNNInit, error, calloc knn->group fail"); return;}
  for(i=0;i<nvectors;i++) group[i] = -1;

  scale = (double*) calloc(dim, sizeof(double));
  if(scale==NULL) {*ierr=7; puts("KNNInit, error, calloc knn->scale fail"); return;}
  
  
  struct KNN* knn = (struct KNN*)calloc(1,sizeof(struct KNN));
  if(knn==NULL) {*ierr=999; puts("KNNInit, error, calloc KNN fail"); return NULL;}

  knn->data = data;
  knn->group = group;
  knn->scale = scale;

  knn->nvec= nvectors;
  knn->dim = dim;
  knn->ngroups = ngroup;

  ierr = 0;
  return knn;
}

void KNNFree(struct KNN* knn, int* ierr)
{
  if(knn==NULL) return;
  if(knn->data) free(knn->data);
  if(knn->group) free(knn->group);
  free(knn);
}



double* KNNGetdata(struct KNN *knn, int ivec, int *ierr)
{
   *ierr = KNNCheckIvec(knn,ivec);
   if(*ierr) {puts("KNNGetData, irror, Wrong ivec");  return;}

   return knn->data + KNNGetDataOffset(knn, ivec);
}


double KNNGetCoord(struct KNN* knn, int ivec, int icoord, int* ierr)
{
  double *data;
  data = KNNGetdata(knn, ivec, ierr);
  if(*ierr) return 0;
  if(icoord<0 || icoord>=knn->dim) {
     puts("KNNGetCoord, error, wrong icoord"); *ierr= 2; return 0;
  }
  *ierr = 0;
  return data[icoord];
}



void KNNSetGroup(struct KNN* knn, int ivec, int group, int* ierr)
{
  *ierr =  KNNCheckIvec(knn,ivec);  if(*ierr) {puts("KNNSetGroup, error, Wrong ivec");  return;}
  *ierr = group<0 || group>=knn->ngroups;   if(*ierr) {puts("KNNSetGroup, error, Wrong group ID (should be [0,..,ngroups-1])");  return;}

  knn->group[ivec] = group;
  *ierr = 0;
  return; 
}


void KNNSetDataGroup(struct KNN *knn,int ivec, double *data, int group, int* ierr)
{
   int i;
   double *knndata = KNNGetdata(knn, ivec, ierr);
   if(*ierr) return;

   for(i=0; i<knn->dim; i++)  knndata[i] = data[i];

   KNNSetGroup(knn, ivec, group, ierr);
   if(*ierr) return;

   *ierr = 0;
   return;
}

static int KNNIsDataFilled(struct KNN* knn)
{
   int i;
   for(i=0; i<knn->nvec; i++) if(knn->group[i]==-1)  return 0;
   return 1;
}

void KNNCalcScale(struct KNN* knn, int *ierr)
{
   int i,d;
   if(!KNNIsDataFilled(knn)) {  puts("KNNCalcScale, error, data is not filled");  return; }

   for(d=0;d<knn->dim;d++){
     double xmax,xmin;
     xmin = KNNGetCoord(knn, 0, d, ierr);
     xmax = xmin;
     for(i=1; i<knn->nvec; i++){
       double f;
       f = KNNGetCoord(knn, i, d, ierr);
       if(xmax<f) xmax = f;
       if(xmin>f) xmin = f;
     }
     if(xmax!=xmin) 
        knn->scale[d] = 1./((xmax-xmin)*(xmax-xmin));
     else
        knn->scale[d] = 1.0;
   }
}


double KNNCalcDistance(struct KNN *knn, int ivec, double *mydata, int *ierr)
{
  double *data=NULL, dist=0.0;
  int i;

  data = KNNGetdata(knn, ivec, ierr);  if(*ierr) {puts("KNNCalcDistance, error, KNNGetData fail"); return;}
  
  for(i=0;i<knn->dim;i++){
    dist += (mydata[i]-data[i]) * (mydata[i]-data[i]) * knn->scale[i];
  }
  *ierr = 0;
  return sqrt(dist);
}


static void make_list(double *dlist, int* idlist, int n,  int cid, double cdist)
{
   double dmax;
   int imax, i;
   dmax = dlist[0]; imax=0;

   for(i=1;i<n;i++){
     if(dlist[i]>dmax) {dmax=dlist[i]; imax=i;}
   }
   if(cdist>dmax) 
     return;
   else{
     dlist[imax] = cdist;
     idlist[imax]= cid;
   }
}


static int vote(struct KNN* knn, int *groups, int k)
{
   int *votes;
   int i, max=0;
   votes = (int*) calloc(knn->ngroups, sizeof(int));
   for(i=0;i<knn->ngroups;i++)  votes[i]=0;

   for(i=0; i<k; i++)
     votes[ groups[i] ] ++;
     
   for(i=1; i<knn->ngroups; i++) {
     if(votes[i]>votes[max]) max=i;
   }

   return max;
}


int KNNCalcGroup(struct KNN* knn, double *mydata, int k, int* ierr)
{
  double* dists;
  int*    vecid;
  int n;
  double dmax,dmin;
  int    imax,imin;
  dists = (double*) calloc(k, sizeof(double));
  if(dists==NULL) {*ierr=-1; puts("KNNCalcGroup, error, cannot allocate space for dists"); return;}
  
  vecid = (int*) calloc(k, sizeof(int));
  if(vecid==NULL) {*ierr=-1; puts("KNNCalcGroup, error, cannot allocate space for vecid"); return;}
  for(n=0;n<k;n++){
    dists[n] = -1.;
    vecid[n] = -1;
  }
  for(n=0;n<knn->nvec;n++){
    double cdist;
    cdist = KNNCalcDistance(knn, n, mydata, ierr);
    
    if(n==0){
      int l;
      for(l=0;l<k;l++) {
         dists[l] = cdist; dmax=cdist;dmin=cdist;
         vecid[l] = n;     imax=n;    imin=n;
      }
    }
    else{ 
      make_list(dists, vecid, k,  n, cdist);
    }
  }
  free(dists);
  
  for(n=0;n<k;n++) vecid[n] = knn->group[vecid[n]];
  n = vote(knn, vecid,k);
  free(vecid);
  return n;
}

测试手写数字识别的程序 digits.c

#include <stdio.h>

#include "knn.h"

#define NVEC 1934
#define DIM  1024
#define NGROUP 10
#define K 3

void InputData(struct KNN *knn)
{
  FILE* fp;
  int id=0, i, ndata[10]={188, 197, 194, 198, 185, 186, 194, 200, 179, 203};
  double mydata[DIM];
  int g, n, ierr;
  char fname[32];
  
  for(g=0;g<10;g++)
  for(n=0;n<=ndata[g];n++){
  
    double mydata[1024];
    sprintf(fname,"trainingDigits/%d_%d.txt", g,n);
    //puts(fname);
    fp = fopen(fname,"r"); 
    for(i=0;i<1024;i++){
      int d;
      fscanf(fp,"%1d",&d);
      mydata[i] = d;
    }
    
    KNNSetDataGroup(knn, id, mydata, g, &ierr);
    if(ierr!=0) puts("ERROR!!");
    id ++;
    fclose(fp);
  }
  printf("Read %d training data\n",id);
  
}


void Test(struct KNN *knn)
{

  FILE* fp;
  int id=0, i, ndata[10]={86, 96, 91, 84, 99, 99, 86, 95, 90, 88};
  double mydata[DIM];
  int g, n, ierr,score;
  char fname[32];
  
  for(g=0;g<10;g++)
  {
    score = 0;
    for(n=0;n<=ndata[g];n++){
  
      double mydata[1024];
      int ans;
      
      sprintf(fname,"testDigits/%d_%d.txt", g,n);
      fp = fopen(fname,"r");       
      for(i=0;i<1024;i++){
        int d;
        fscanf(fp,"%1d",&d);
        mydata[i] = d;
      }          
      fclose(fp);
    
      ans = KNNCalcGroup(knn, mydata, K, &ierr);
      if(ans==g) score ++;
      else{
         printf("                                 fail for %s\n",fname);
      }
    }
    
    printf("Digit %1d  score=%3d/%3d = %6.2f%%\n",
       g, score, ndata[g]+1,  100.*score/(ndata[g]+1.));
  }
   
}



int main(void)
{
   int ierr;
   struct KNN* knn;
   knn = KNNInit(NVEC,DIM,NGROUP, &ierr);
   if(ierr) {puts("can not KNNInit"); return; }

   InputData(knn);
   KNNCalcScale(knn, &ierr);

   Test(knn);
   KNNFree(knn, &ierr);
}

 

Avatar_small
mega888 apk 说:
2021年8月29日 05:39

Hi there, I discovered your blog per Google bit searching for such kinda educational advise moreover your inform beholds very remarkable for me.

Avatar_small
Satta king 说:
2021年9月24日 10:28

Excellent read, Positive site, where did u come up with the information on this posting?I have read a few of the articles on your website now, and I really like your style. Thanks a million and please keep up the effective work.

Avatar_small
AP 10th computer Que 说:
2022年9月10日 07:21

Since the Government of AP also provided Computer labs for students in all Government schools also.Computer Knowledge is very much essential nowadays. Even Checking the Results of various examinations, AP 10th computer Question Paper Downloading hall tickets and study materials for multiple exams and booking tickets etc ..need minimum Technical Knowledge is much more important for everyone. Since the Government of AP also provided Computer labs for students in all Government schools also.

Avatar_small
Emma 说:
2023年1月18日 23:57

The k-nearest neighbor (KNN) algorithm is a simple technique used for both classification and regression. In classification, the KNN algorithm is used to classify a new data point based on its CBD Edibles distance from existing data points. In regression, the KNN algorithm is used to predict a new data point based on its distance from existing data points.

Avatar_small
seo service london 说:
2024年1月13日 03:29

The best article I came across a number of years, write something about it on this page


登录 *


loading captcha image...
(输入验证码)
or Ctrl+Enter
Host by is-Programmer.com | Power by Chito 1.3.3 beta | © 2007 LinuxGem | Design by Matthew "Agent Spork" McGee