悲催的科学匠人 - 冷水's blog

KNN算法的实现

具体描述见machine learning in action

knn.h

/* knn.c */
struct KNN *KNNInit(int nvectors, int dim, int ngroup, int *ierr);
void KNNFree(struct KNN *knn, int *ierr);
double *KNNGetdata(struct KNN *knn, int ivec, int *ierr);
double KNNGetCoord(struct KNN *knn, int ivec, int icoord, int *ierr);
void KNNSetGroup(struct KNN *knn, int ivec, int group, int *ierr);
void KNNSetDataGroup(struct KNN *knn, int ivec, double *data, int group, int *ierr);
void KNNCalcScale(struct KNN *knn, int *ierr);
double KNNCalcDistance(struct KNN *knn, int ivec, double *mydata, int *ierr);
int KNNCalcGroup(struct KNN *knn, double *mydata, int k, int *ierr);

knn.c

#include <stdlib.h>
#include <math.h>
#include <stdlib.h>

struct KNN{
  int nvec;
  int dim;
  double *data;
  double *scale;
  int ngroups;
  int *group;   /* group ID (0,1,..., ngroups-1) for each vec*/
};



inline int KNNGetDataOffset(struct KNN *knn, int ivec)
{
  return ivec*knn->dim;
}

inline int KNNCheckIvec(struct KNN* knn, int ivec)
{
    if( ivec<0 || 1+ivec>knn->nvec) 
      return 1;
    else
      return 0;
}


struct KNN*  KNNInit(int nvectors, int dim, int ngroup, int *ierr)
{
  double  *data, *scale;
  int *group,  i;

  if(nvectors<1) {*ierr = 1; puts("KNNInit, error, nvectors<1");  return NULL; }
  if(dim<1) {*ierr= 2; puts("KNNInit, error, dim<1"); return NULL; }
  if(ngroup<2) {*ierr=4; puts("KNNInit, error, ngroup<2"); return;  } 

  data = (double*) calloc(nvectors*dim, sizeof(double));
  if (data==NULL) {*ierr = 5; puts("KNNInit, error, calloc knn->data fail"); return; }

  group = (int*) calloc(nvectors, sizeof(int));
  if(group==NULL) {*ierr=6; puts("KNNInit, error, calloc knn->group fail"); return;}
  for(i=0;i<nvectors;i++) group[i] = -1;

  scale = (double*) calloc(dim, sizeof(double));
  if(scale==NULL) {*ierr=7; puts("KNNInit, error, calloc knn->scale fail"); return;}
  
  
  struct KNN* knn = (struct KNN*)calloc(1,sizeof(struct KNN));
  if(knn==NULL) {*ierr=999; puts("KNNInit, error, calloc KNN fail"); return NULL;}

  knn->data = data;
  knn->group = group;
  knn->scale = scale;

  knn->nvec= nvectors;
  knn->dim = dim;
  knn->ngroups = ngroup;

  ierr = 0;
  return knn;
}

void KNNFree(struct KNN* knn, int* ierr)
{
  if(knn==NULL) return;
  if(knn->data) free(knn->data);
  if(knn->group) free(knn->group);
  free(knn);
}



double* KNNGetdata(struct KNN *knn, int ivec, int *ierr)
{
   *ierr = KNNCheckIvec(knn,ivec);
   if(*ierr) {puts("KNNGetData, irror, Wrong ivec");  return;}

   return knn->data + KNNGetDataOffset(knn, ivec);
}


double KNNGetCoord(struct KNN* knn, int ivec, int icoord, int* ierr)
{
  double *data;
  data = KNNGetdata(knn, ivec, ierr);
  if(*ierr) return 0;
  if(icoord<0 || icoord>=knn->dim) {
     puts("KNNGetCoord, error, wrong icoord"); *ierr= 2; return 0;
  }
  *ierr = 0;
  return data[icoord];
}



void KNNSetGroup(struct KNN* knn, int ivec, int group, int* ierr)
{
  *ierr =  KNNCheckIvec(knn,ivec);  if(*ierr) {puts("KNNSetGroup, error, Wrong ivec");  return;}
  *ierr = group<0 || group>=knn->ngroups;   if(*ierr) {puts("KNNSetGroup, error, Wrong group ID (should be [0,..,ngroups-1])");  return;}

  knn->group[ivec] = group;
  *ierr = 0;
  return; 
}


void KNNSetDataGroup(struct KNN *knn,int ivec, double *data, int group, int* ierr)
{
   int i;
   double *knndata = KNNGetdata(knn, ivec, ierr);
   if(*ierr) return;

   for(i=0; i<knn->dim; i++)  knndata[i] = data[i];

   KNNSetGroup(knn, ivec, group, ierr);
   if(*ierr) return;

   *ierr = 0;
   return;
}

static int KNNIsDataFilled(struct KNN* knn)
{
   int i;
   for(i=0; i<knn->nvec; i++) if(knn->group[i]==-1)  return 0;
   return 1;
}

void KNNCalcScale(struct KNN* knn, int *ierr)
{
   int i,d;
   if(!KNNIsDataFilled(knn)) {  puts("KNNCalcScale, error, data is not filled");  return; }

   for(d=0;d<knn->dim;d++){
     double xmax,xmin;
     xmin = KNNGetCoord(knn, 0, d, ierr);
     xmax = xmin;
     for(i=1; i<knn->nvec; i++){
       double f;
       f = KNNGetCoord(knn, i, d, ierr);
       if(xmax<f) xmax = f;
       if(xmin>f) xmin = f;
     }
     if(xmax!=xmin) 
        knn->scale[d] = 1./((xmax-xmin)*(xmax-xmin));
     else
        knn->scale[d] = 1.0;
   }
}


double KNNCalcDistance(struct KNN *knn, int ivec, double *mydata, int *ierr)
{
  double *data=NULL, dist=0.0;
  int i;

  data = KNNGetdata(knn, ivec, ierr);  if(*ierr) {puts("KNNCalcDistance, error, KNNGetData fail"); return;}
  
  for(i=0;i<knn->dim;i++){
    dist += (mydata[i]-data[i]) * (mydata[i]-data[i]) * knn->scale[i];
  }
  *ierr = 0;
  return sqrt(dist);
}


static void make_list(double *dlist, int* idlist, int n,  int cid, double cdist)
{
   double dmax;
   int imax, i;
   dmax = dlist[0]; imax=0;

   for(i=1;i<n;i++){
     if(dlist[i]>dmax) {dmax=dlist[i]; imax=i;}
   }
   if(cdist>dmax) 
     return;
   else{
     dlist[imax] = cdist;
     idlist[imax]= cid;
   }
}


static int vote(struct KNN* knn, int *groups, int k)
{
   int *votes;
   int i, max=0;
   votes = (int*) calloc(knn->ngroups, sizeof(int));
   for(i=0;i<knn->ngroups;i++)  votes[i]=0;

   for(i=0; i<k; i++)
     votes[ groups[i] ] ++;
     
   for(i=1; i<knn->ngroups; i++) {
     if(votes[i]>votes[max]) max=i;
   }

   return max;
}


int KNNCalcGroup(struct KNN* knn, double *mydata, int k, int* ierr)
{
  double* dists;
  int*    vecid;
  int n;
  double dmax,dmin;
  int    imax,imin;
  dists = (double*) calloc(k, sizeof(double));
  if(dists==NULL) {*ierr=-1; puts("KNNCalcGroup, error, cannot allocate space for dists"); return;}
  
  vecid = (int*) calloc(k, sizeof(int));
  if(vecid==NULL) {*ierr=-1; puts("KNNCalcGroup, error, cannot allocate space for vecid"); return;}
  for(n=0;n<k;n++){
    dists[n] = -1.;
    vecid[n] = -1;
  }
  for(n=0;n<knn->nvec;n++){
    double cdist;
    cdist = KNNCalcDistance(knn, n, mydata, ierr);
    
    if(n==0){
      int l;
      for(l=0;l<k;l++) {
         dists[l] = cdist; dmax=cdist;dmin=cdist;
         vecid[l] = n;     imax=n;    imin=n;
      }
    }
    else{ 
      make_list(dists, vecid, k,  n, cdist);
    }
  }
  free(dists);
  
  for(n=0;n<k;n++) vecid[n] = knn->group[vecid[n]];
  n = vote(knn, vecid,k);
  free(vecid);
  return n;
}

测试手写数字识别的程序 digits.c

#include <stdio.h>

#include "knn.h"

#define NVEC 1934
#define DIM  1024
#define NGROUP 10
#define K 3

void InputData(struct KNN *knn)
{
  FILE* fp;
  int id=0, i, ndata[10]={188, 197, 194, 198, 185, 186, 194, 200, 179, 203};
  double mydata[DIM];
  int g, n, ierr;
  char fname[32];
  
  for(g=0;g<10;g++)
  for(n=0;n<=ndata[g];n++){
  
    double mydata[1024];
    sprintf(fname,"trainingDigits/%d_%d.txt", g,n);
    //puts(fname);
    fp = fopen(fname,"r"); 
    for(i=0;i<1024;i++){
      int d;
      fscanf(fp,"%1d",&d);
      mydata[i] = d;
    }
    
    KNNSetDataGroup(knn, id, mydata, g, &ierr);
    if(ierr!=0) puts("ERROR!!");
    id ++;
    fclose(fp);
  }
  printf("Read %d training data\n",id);
  
}


void Test(struct KNN *knn)
{

  FILE* fp;
  int id=0, i, ndata[10]={86, 96, 91, 84, 99, 99, 86, 95, 90, 88};
  double mydata[DIM];
  int g, n, ierr,score;
  char fname[32];
  
  for(g=0;g<10;g++)
  {
    score = 0;
    for(n=0;n<=ndata[g];n++){
  
      double mydata[1024];
      int ans;
      
      sprintf(fname,"testDigits/%d_%d.txt", g,n);
      fp = fopen(fname,"r");       
      for(i=0;i<1024;i++){
        int d;
        fscanf(fp,"%1d",&d);
        mydata[i] = d;
      }          
      fclose(fp);
    
      ans = KNNCalcGroup(knn, mydata, K, &ierr);
      if(ans==g) score ++;
      else{
         printf("                                 fail for %s\n",fname);
      }
    }
    
    printf("Digit %1d  score=%3d/%3d = %6.2f%%\n",
       g, score, ndata[g]+1,  100.*score/(ndata[g]+1.));
  }
   
}



int main(void)
{
   int ierr;
   struct KNN* knn;
   knn = KNNInit(NVEC,DIM,NGROUP, &ierr);
   if(ierr) {puts("can not KNNInit"); return; }

   InputData(knn);
   KNNCalcScale(knn, &ierr);

   Test(knn);
   KNNFree(knn, &ierr);
}

 




Host by is-Programmer.com | Power by Chito 1.3.3 beta | © 2007 LinuxGem | Design by Matthew "Agent Spork" McGee