C++ - 悲催的科学匠人 - 冷水's blog
转一个网文:C++的反思
转一个网文:C++的反思
http://www.skywind.me/blog/archives/1398
----------------------------- 节选 ---------------------------------
关于 C++的笑话数都数不清:
笑话:C++是一门不吉祥的语言,据说波音公司之前用ADA为飞机硬件编程,一直用的好好的,后来招聘了一伙大学生,学生们说我靠还在用这么落后的语言,然后换成C++重构后飞机就坠毁了。
笑话:什么是C++程序员呢?就是本来10行写得完的程序,他非要用30行来完成,并自称“封装”,但每每到第二个项目的时候却将80%打破重写,并美其名曰 “重构”。
笑话:C容易擦枪走火打到自己的脚,用C++虽然不容易,但一旦走火,就会把你整条腿给炸飞了。
笑话:同时学习两年 Java的程序员在一起讨论的是面向对象和设计模式,而同时学习两年 C++的程序员,在一起讨论的是 template和各种语言规范到底怎么回事情。
笑话:教别人学 C++的人都挣大钱了,而很多真正用 C++的人,都死的很惨。
笑话:C++有太多地方可以让一个人表现自己“很聪明”,所以使用C++越久的人,约觉得自己“很聪明”结果步入陷阱都不知道,掉坑里了还觉得估计是自己没学好 C++。
笑话:好多写了十多年 C++程序的人,至今说不清楚 C++到底有多少规范,至今仍然时不时的落入某些坑中。
笑话:很多认为 C++方便跨平台的人,实际编写跨平台代码时,都会发现自己难找到两个支持相同标准的 C++编译器。
----------------------------- 节选 ---------------------------------
其中,作者提到“其实C++的矛盾在于一方面承认作为系统级语言内存管理应该交给用户决定,一方面自己却又定义很多不受用户控制的内存操作行为”。确实,看c++书籍的时候,对于其中编译器背后干的一些复杂的事情,感觉真是“你tmd还背着我干了啥?”
c++确实强大,但是好的c++程序员很少,科学计算领域的好c++程序员更少。我刚工作时,在一个小公司主管一个项目,技术方案上我说了算,因此我推c++开发。结果是组里5个人,能用c++的也就一个半(包括我自己),于是大多数c++编程的活都是我干了。所以后来管一个更大的cfd项目,果断用fortran 9x/2003。
后来碰到有人鄙视fortran,吹捧c++,我就心里“呵呵”了。
kmean算法实现
具体描述见machine learning in action
main函数是一个二维点cluster测试
#include <math.h> #include <stdlib.h> #include <stdio.h> struct KMEAN{ int nvec,dim,ngroup; double *data, /* [nvec][dim] */ *mu, /* [ngroup][dim] */ *scale, *min; /* [dim] */ int *groups, /* nvec */ *count; /* ngroup] */ }; struct KMEAN* KMEAN_init(int nvec, int dim, int ngroup) { struct KMEAN* kmean = (struct KMEAN*) calloc(1,sizeof(struct KMEAN)); kmean->nvec = nvec; kmean->dim = dim; kmean->ngroup = ngroup; kmean->data = (double*) calloc(nvec*dim, sizeof(double)); kmean->mu = (double*) calloc(ngroup*dim, sizeof(double)); kmean->scale = (double*) calloc(dim, sizeof(double)); kmean->min = (double*) calloc(dim, sizeof(double)); //kmean->max = (double*) calloc(dim, sizeof(double)); kmean->groups = (int*) calloc(nvec, sizeof(int)); //kmean->mark = (int*) calloc(nvec, sizeof(int)); kmean->count = (int*) calloc(ngroup, sizeof(int)); } void KMEAN_free(struct KMEAN* kmean) { free(kmean->data); free(kmean->mu); free(kmean->min); //free(kmean->max); free(kmean->scale); free(kmean->groups); //free(kmean->mark); free(kmean->count); } inline static double dataij(struct KMEAN* kmean, int i, int j) { return kmean->data[i*kmean->dim + j]; } inline static double muij(struct KMEAN *kmean, int i, int j) { return kmean->mu[i*kmean->dim + j]; } inline static void setdataij(struct KMEAN* kmean, int i, int j, double v) { kmean->data[i*kmean->dim + j] = v; } inline static void setmuij(struct KMEAN *kmean, int i, int j, double v) { kmean->mu[i*kmean->dim + j] = v; } inline static void addmuij(struct KMEAN *kmean, int i, int j, double v) { kmean->mu[i*kmean->dim + j] += v; } void KMEAN_OutputPlt(struct KMEAN* kmean, const char* fname) { FILE *fp = fopen(fname,"w"); int n,d; if(fp==NULL) {puts("Can not open file for outputing tecplot"); return;} fprintf(fp,"ZONE T=\"DATA-%s\"\n", fname); for(n=0;n<kmean->nvec;n++) { for(d=0;d<kmean->dim;d++) fprintf(fp,"%11.4E ", dataij(kmean,n,d)); fprintf(fp,"%d\n", kmean->groups[n] ); } fprintf(fp,"ZONE T=\"MU-%s\"\n", fname); for(n=0;n<kmean->ngroup;n++) { for(d=0;d<kmean->dim;d++) fprintf(fp,"%11.4E ", muij(kmean,n,d)); fprintf(fp,"%d\n", n ); } fclose(fp); } void KMEAN_CalcScale(struct KMEAN *kmean) { int d,n; double dmax,dmin; // compute range of each dim for(d=0;d<kmean->dim;d++){ dmax = dmin = dataij(kmean,0,d); for(n=1;n<kmean->nvec;n++){ double x; x = dataij(kmean,n,d); if( dmax<x ) dmax = x; if( dmin>x ) dmin = x; } kmean->scale[d] = dmax - dmin; kmean->min[d] = dmin; //kmean->max[d] = dmax; //printf("Scaling: DIM %4d min=%11.4E max=%11.4E\n",d,dmin,dmax); } // randomly init mu for(n=0;n<kmean->ngroup;n++){ //printf("init mu Group %3d ",n); for(d=0;d<kmean->dim;d++){ setmuij(kmean, n,d, kmean->min[d] + 0.8*(rand()/(double)(RAND_MAX)) * kmean->scale[d] ); //printf("x_%3d=%11.4E ",d,muij(kmean,n,d)); } //puts(""); } for(n=0;n<kmean->nvec;n++) kmean->groups[n] = 0; for(d=0;d<kmean->dim;d++) kmean->scale[d] = 1.0/ (kmean->scale[d] * kmean->scale[d]); } static int WhichGroup(struct KMEAN* kmean, int i) { int g; double mindist; int mingroup; for(g=0;g<kmean->ngroup;g++){ double dist=0; int d; for(d=0;d<kmean->dim;d++){ double xid,mgd; xid = dataij(kmean,i,d); mgd = muij(kmean,g,d); dist += (xid-mgd) * (xid-mgd) * kmean->scale[d]; } dist = sqrt(dist); if(g==0) { mindist = dist; mingroup = 0; }else if(mindist>dist){ mindist = dist; mingroup = g; } } return mingroup; } static double KMEAN_error(struct KMEAN* kmean) { double err=0.0; int n; for(n=0;n<kmean->nvec;n++) { int d,g; g = kmean->groups[n]; for(d=0;d<kmean->dim;d++) { double dd = dataij(kmean,n,d) - muij(kmean,g,d); err += dd*dd*kmean->scale[d]; } } return err; } static void UpdateMu(struct KMEAN* kmean) { int n,d; for(n=0;n<kmean->ngroup;n++) for(d=0;d<kmean->dim;d++) setmuij(kmean,n,d,0.0); for(n=0;n<kmean->nvec;n++) for(d=0;d<kmean->dim;d++){ addmuij(kmean, kmean->groups[n], d, dataij(kmean,n,d) ); } for(n=0;n<kmean->ngroup;n++) for(d=0;d<kmean->dim;d++) setmuij(kmean,n,d, muij(kmean,n,d)/kmean->count[n] ); } static void KMEAN_sweep(struct KMEAN* kmean, double *err, int *changed) { int n; *changed = 0; for(n=0;n<kmean->ngroup;n++) kmean->count[n] = 0; for(n=0;n<kmean->nvec;n++){ int g; g = WhichGroup(kmean,n); if(kmean->groups[n] != g) (*changed)++; kmean->groups[n] = g; kmean->count[ kmean->groups[n] ] ++; //printf("Vec %3d is Group %3d\n",n,g); } UpdateMu(kmean); *err = KMEAN_error(kmean); } double KMEAN_cluster(struct KMEAN *kmean) { int nchanged,it=0; double err; KMEAN_CalcScale(kmean); do{ KMEAN_sweep(kmean, &err, &nchanged); //printf("%6d changed %6d err=%11.4E\n",it++,nchanged, err); }while(nchanged>0); } void KMEAN_bisect(struct KMEAN *kmean, double *err) { int g, worstg; double err0,minderr; char fname[1024]; err0 = *err; minderr = err0; sprintf(fname,"skmean-g%d.plt",kmean->ngroup); KMEAN_OutputPlt(kmean, fname); // 找到最烂的分组 for(g=0;g<kmean->ngroup;g++) { struct KMEAN* subset = KMEAN_init(kmean->count[g], kmean->dim, 2); int i,si=0; double cerr; // fill group[g] into subset for(i=0;i<kmean->nvec;i++){ if(kmean->groups[i]==g){ int d; for(d=0;d<kmean->dim;d++) setdataij(subset,si,d, dataij(kmean,i,d)); //subset->mark[si] = kmean->mark[i]; si++; } } cerr = KMEAN_cluster(subset); KMEAN_free(subset); if(minderr > err0-cerr) {minderr = err0-cerr; worstg = g;} } g = worstg; // 最烂的分组 { struct KMEAN* subset = KMEAN_init(kmean->count[g], kmean->dim, 2); int i,si=0,d; double cerr; // fill group[g] into subset for(i=0;i<kmean->nvec;i++){ if(kmean->groups[i]==g){ for(d=0;d<kmean->dim;d++) setdataij(subset,si,d, dataij(kmean,i,d)); //subset->mark[si] = kmean->mark[i]; si++; } } // 更新mu for(d=0;d<kmean->dim;d++) { setmuij(kmean, g, d, muij(subset, 0, d) ); setmuij(kmean, kmean->ngroup, d, muij(subset, 1, d) ); } kmean->ngroup++; KMEAN_free(subset); // 重新对kmean做一次分组 *err = KMEAN_cluster(kmean); } return; } void KMEAN_BISECT(struct KMEAN* kmean) { int it=0,dest_ngroup = kmean->ngroup; double err; if(dest_ngroup<2) return; kmean->ngroup = 2; err = KMEAN_cluster(kmean); do{ KMEAN_bisect(kmean, &err); printf("Step %6d err=%11.4E\n",++it, err); }while(kmean->ngroup<dest_ngroup); } int main(void) { struct KMEAN *kmean = KMEAN_init(80, 2, 4); double err; { FILE* fp = fopen("testSet.txt","r"); int n=0; for(n=0;n<80;n++){ double x,y; fscanf(fp,"%lf%lf", &x,&y); setdataij(kmean,n,0, x); setdataij(kmean,n,1, y); } fclose(fp); } /* 下面这一行是采用普通kmean */ //err = KMEAN_cluster(kmean); printf("err=%11.4E\n", err); /* 下面这一行是采用二分kmean */ KMEAN_BISECT(kmean); KMEAN_OutputPlt(kmean, "skmean-g4.plt"); KMEAN_free(kmean); }
C++中实现动态多维数组模板的分析-06
这里先给出arrayindex类的源代码。
#ifndef ARRAYINDEX_HPP #define ARRAYINDEX_HPP #include <cstring> #ifdef debug0 #include <cstdio> #endif class ArrayIndex { public: ArrayIndex(size_t ndim, int ranges[][2]); ~ArrayIndex(void); size_t Getsidx(int a_midx[], int& o_stat); void Getmidx(size_t a_sidx, int o_midx[]); size_t Getlength(void); size_t Getndim(void); size_t Getdim(size_t a_n); void Getrange(size_t a_dim, int o_range[2], int& o_stat); public: enum{NORMAL, WRONG_DIM, WRONG_RANGE}; private: size_t length_; size_t ndim_; size_t maxdim_; size_t *dims_; size_t *size_; size_t **shift_; int **range_; }; inline size_t ArrayIndex::Getlength(void) { return length_; } inline size_t ArrayIndex::Getndim(void) { return ndim_; } inline size_t ArrayIndex::Getdim(size_t a_n) { return dims_[a_n]; } inline void ArrayIndex::Getrange(size_t a_dim, int o_range[2], int& o_stat) { if(a_dim>=ndim_) {o_stat = ArrayIndex::WRONG_DIM; return;} o_range[0] = range_[a_dim][0]; o_range[1] = range_[a_dim][1]; o_stat = ArrayIndex::NORMAL; return; } size_t ArrayIndex::Getsidx(int a_midx[], int& o_stat) { size_t sidx=a_midx[0]-range_[0][0]; for(size_t n=1;n<ndim_;n++) { sidx += shift_[n][ a_midx[n]-range_[n][0] ]; } return sidx; } void ArrayIndex::Getmidx(size_t a_sidx, int o_midx[]) { size_t loc = a_sidx; for(size_t n=ndim_-1; n>0; n--) { for(size_t m=0;m<dims_[n];m++) if( loc >= shift_[n][m] ) o_midx[n] = m; loc -= shift_[n][ o_midx[n] ]; o_midx[n] += range_[n][0]; } o_midx[0] = loc + range_[0][0]; } ArrayIndex::ArrayIndex(size_t ndim, int ranges[][2]): ndim_(ndim), range_(NULL) { range_ = new int* [ndim_]; range_[0] = new int [ndim_ * 2]; for(size_t n=1;n<ndim_;n++) range_[n] = range_[n-1] + 2; dims_ = new size_t[ndim_]; size_ = new size_t[ndim_]; length_ = 1; maxdim_ = 0; for(size_t n=0;n<ndim_;n++) { dims_[n] = ranges[n][1] - ranges[n][0] + 1; range_[n][0] = ranges[n][0]; range_[n][1] = ranges[n][1]; size_[n] = length_; length_ *= dims_[n]; if(dims_[n]>maxdim_) maxdim_ = dims_[n]; } shift_ = new size_t* [ndim_]; shift_[0] = new size_t[maxdim_*ndim_]; for(size_t n=1;n<ndim_;n++) shift_[n] = shift_[n-1] + maxdim_; for(size_t n=1;n<ndim_;n++) { shift_[n][0] = 0; for(size_t m=1;m<dims_[n];m++) { shift_[n][m] = shift_[n][m-1] + size_[n]; } } return; } ArrayIndex::~ArrayIndex(void) { delete [] shift_[0]; delete [] shift_; delete [] range_[0]; delete [] range_; delete [] size_; delete [] dims_; } #endif
还有一个测试代码
#include "qkarray.hpp" int main(void) { int stat; size_t ndim=5; int range[5][2] = {{1,2},{5,7},{1,5},{9,10},{1,1}}, midx[20]; ArrayIndex index(ndim,range); index.Getrange(4, range[0], stat); if(ndim==5) { size_t count = 0; for(size_t n=0;n<ndim;n++) { midx[n] = 0; index.Getrange(n,range[n],stat); printf(" Range[%5ld] = %5ld:%5ld\n", n, range[n][0],range[n][1]); } printf(" Array size = %d\n",index.Getlength() ); count = 0; for(int m=range[4][0];m<=range[4][1];m++) for(int l=range[3][0];l<=range[3][1];l++) for(int k=range[2][0];k<=range[2][1];k++) for(int j=range[1][0];j<=range[1][1];j++) for(int i=range[0][0];i<=range[0][1];i++) { midx[0] = i; midx[1] = j; midx[2] = k; midx[3] = l;midx[4] = m; index.Getmidx(count, midx); count ++; printf("[%5ld] [%5ld] [%5ld] [%5ld] [%5ld] == %5ld \n", midx[0], midx[1], midx[2], midx[3],midx[4], index.Getsidx(midx, stat)); } } return 1; }
C++中实现动态多维数组模板的分析-05
在分析04中,我写出了多维编号到一维编号的转换关系。不过那时假设多维编号采用C风格,都是从0开始的。实际我需要支持非0开始的编号,即可以从任一一个整数a开始以1递增到另外一个不小于a的整数b。在这种情况下,要将非0开始的多维编号转换为内部以0开始的编号,就可以使用前述的公式了。
这次,开始进行设计。
首先命名这个类为 arrayindex
它必须具备的最重要的方法有
1 根据给定的维数和每个维的范围创建一个实例
ArrayIndex(size_t ndim, int ranges[][2]);
ndim是一个非负数,表示维数。range是每个维的范围range[i][0]是起始编号,
range[i][1]是终止编号。为了方便,令i>=1
2 从一个多维编号转换到一维编号
size_t Getsidx(int a_midx[], int& o_stat);
a_midx[]存储多维编号,为方便起见,从a_mix[1]开始有效。
3 从一个一维编号转换到多维编号
void Getmidx(size_t a_sidx, int o_midx[]);
这个类的关键成员应该包含
size_t length_; // 数组数据总个数
size_t ndim_; // 维数
size_t maxdim_; // 最大维数
size_t *dims_; // 每个维的尺寸
size_t *size_; // 每个维度上切片的尺寸。比如,数组[5][3][2],第一维是一维的串,它的切片就是单个元素,尺寸为1;第二维是一个二维的片,其切片是低一维的串,每个串的尺寸是5;第三维是一个三维块,其切片是低一维片,尺寸是5X3。如此类推到更高维。
size_t **shift_; // 每个维上所有标号的偏移。比如数组[5][3][2],按照我们先前的分析,必须存储第一维的5个偏移,第二维的3个偏移和第三维的2个偏移。因此shift_是个二维数组。
int **range_; // 每个维的范围