@CrazyHenry
2018-04-23T11:30:08.000000Z
字数 3067
阅读 2011
hhhhfaiss
- Author:李英民 | Henry
- E-mail: li
_yingmin@outlookdotcom- Home: https://liyingmin.wixsite.com/henry
int nlist = 100;int k = 4;int m = 8; // bytes per vectorfaiss::IndexFlatL2 quantizer(d); // the other indexfaiss::IndexIVFPQ index(&quantizer, d, nlist, m, 8);//what is 8? faiss::METRIC_L2?// here we specify METRIC_L2, by default it performs inner-product searchindex.train(nb, xb);index.add(nb, xb);index.nprobe = 10;index.search(nq, xq, k, D, I);//default时,nprobe = 1
The vectors are still stored in Voronoi cells, but their size is reduced to a configurable number of bytes m (d must be a multiple of m).(PQ编码,是将yi全部这样编码,还是k-means保留原始结构,PQ再进一步编码。即q1和q2具体的计算是怎样的???)In this case, since the vectors are not stored exactly, the distances that are returned by the search method are also approximations.(近似的计算)Faiss offers variants that compress the stored vectors with a lossy compression based on product quantizers. The vectors are still stored in Voronoi cells, but their size is reduced to a configurable number of bytes m (d must be a multiple of m).(感觉像是所有yi都进行PQ编码)
/** Author: yingmin.li** Date: 2018/4/23** Detail: add a timespan part for demo3**/#include <iostream>#include <string>#include <vector>#include <algorithm>#include <cstdio>#include <cstdlib>#include <chrono>#include <faiss/IndexFlat.h>#include <faiss/IndexIVFPQ.h>//using std::using std::endl;using std::cout;//note: header file do not use usingint main() {int d = 256; // dimensionint nb = 100000; // database sizeint nq = 1; // nb of queriesfloat *xb = new float[d * nb];float *xq = new float[d * nq];for(int i = 0; i < nb; i++) {for(int j = 0; j < d; j++)xb[d * i + j] = drand48();xb[d * i] += i / 1000.;}for(int i = 0; i < nq; i++) {for(int j = 0; j < d; j++)xq[d * i + j] = drand48();xq[d * i] += i / 1000.;}int nlist = 100;int k = 4;int m = 8; // bytes per vectorfaiss::IndexFlatL2 quantizer(d); // the other indexfaiss::IndexIVFPQ index(&quantizer, d, nlist, m, 8);//what is 8? faiss::METRIC_L2?// here we specify METRIC_L2, by default it performs inner-product searchindex.train(nb, xb);index.add(nb, xb);// { // sanity check// long *I = new long[k * 5];// float *D = new float[k * 5];// index.search(5, xb, k, D, I);// printf("I=\n");// for(int i = 0; i < 5; i++) {// for(int j = 0; j < k; j++)// printf("%5ld ", I[i * k + j]);// printf("\n");// }// printf("D=\n");// for(int i = 0; i < 5; i++) {// for(int j = 0; j < k; j++)// printf("%7g ", D[i * k + j]);// printf("\n");// }// delete [] I;// delete [] D;// }{ // search xqlong *I = new long[k * nq];float *D = new float[k * nq];for(int loopi = 1; loopi <= 40; loopi += 10){index.nprobe = loopi;auto begin_time = std::chrono::steady_clock::now();for(int i = 0; i < 10; ++i){index.search(nq, xq, k, D, I);//default时,nprobe = 1}auto end_time = std::chrono::steady_clock::now();auto time_span = std::chrono::duration_cast<std::chrono::duration<long,std::ratio<1,1000>>>(end_time - begin_time);cout << endl << endl << "IVFFlat(use quantization, CPU, nlist == 100, dismethod == L2):" <<endl<< "when the nprobe = "<<loopi<<" ,"<< "avg_time(10 times avg) for:" << endl<< " -vec dimension d = " << d << endl<< " -database vec numbers nb = " << nb <<endl<< " -query vec batchings nq = " << nq << endl<< " -KNN algorithm's k = " << k << endl<< " is " << time_span.count() / 10 << " milliseconds" << endl;}// printf("I=\n");// for(int i = nq - 5; i < nq; i++) {// for(int j = 0; j < k; j++)// printf("%5ld ", I[i * k + j]);// printf("\n");// }delete [] I;delete [] D;}delete [] xb;delete [] xq;return 0;}