@CrazyHenry
2018-04-23T19:30:08.000000Z
字数 3067
阅读 1783
hhhhfaiss
- Author:李英民 | Henry
- E-mail: li
_
yingmin@
outlookdot
com- Home: https://liyingmin.wixsite.com/henry
int nlist = 100;
int k = 4;
int m = 8; // bytes per vector
faiss::IndexFlatL2 quantizer(d); // the other index
faiss::IndexIVFPQ index(&quantizer, d, nlist, m, 8);//what is 8? faiss::METRIC_L2?
// here we specify METRIC_L2, by default it performs inner-product search
index.train(nb, xb);
index.add(nb, xb);
index.nprobe = 10;
index.search(nq, xq, k, D, I);//default时,nprobe = 1
The vectors are still stored in Voronoi cells, but their size is reduced to a configurable number of bytes m (d must be a multiple of m).(PQ编码,是将yi全部这样编码,还是k-means保留原始结构,PQ再进一步编码。即q1和q2具体的计算是怎样的???)
In this case, since the vectors are not stored exactly, the distances that are returned by the search method are also approximations.(近似的计算)
Faiss offers variants that compress the stored vectors with a lossy compression based on product quantizers. The vectors are still stored in Voronoi cells, but their size is reduced to a configurable number of bytes m (d must be a multiple of m).(感觉像是所有yi都进行PQ编码)
/*
* Author: yingmin.li
*
* Date: 2018/4/23
*
* Detail: add a timespan part for demo3
*
*/
#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
#include <cstdio>
#include <cstdlib>
#include <chrono>
#include <faiss/IndexFlat.h>
#include <faiss/IndexIVFPQ.h>
//using std::
using std::endl;
using std::cout;
//note: header file do not use using
int main() {
int d = 256; // dimension
int nb = 100000; // database size
int nq = 1; // nb of queries
float *xb = new float[d * nb];
float *xq = new float[d * nq];
for(int i = 0; i < nb; i++) {
for(int j = 0; j < d; j++)
xb[d * i + j] = drand48();
xb[d * i] += i / 1000.;
}
for(int i = 0; i < nq; i++) {
for(int j = 0; j < d; j++)
xq[d * i + j] = drand48();
xq[d * i] += i / 1000.;
}
int nlist = 100;
int k = 4;
int m = 8; // bytes per vector
faiss::IndexFlatL2 quantizer(d); // the other index
faiss::IndexIVFPQ index(&quantizer, d, nlist, m, 8);//what is 8? faiss::METRIC_L2?
// here we specify METRIC_L2, by default it performs inner-product search
index.train(nb, xb);
index.add(nb, xb);
// { // sanity check
// long *I = new long[k * 5];
// float *D = new float[k * 5];
// index.search(5, xb, k, D, I);
// printf("I=\n");
// for(int i = 0; i < 5; i++) {
// for(int j = 0; j < k; j++)
// printf("%5ld ", I[i * k + j]);
// printf("\n");
// }
// printf("D=\n");
// for(int i = 0; i < 5; i++) {
// for(int j = 0; j < k; j++)
// printf("%7g ", D[i * k + j]);
// printf("\n");
// }
// delete [] I;
// delete [] D;
// }
{ // search xq
long *I = new long[k * nq];
float *D = new float[k * nq];
for(int loopi = 1; loopi <= 40; loopi += 10)
{
index.nprobe = loopi;
auto begin_time = std::chrono::steady_clock::now();
for(int i = 0; i < 10; ++i)
{
index.search(nq, xq, k, D, I);//default时,nprobe = 1
}
auto end_time = std::chrono::steady_clock::now();
auto time_span = std::chrono::duration_cast<std::chrono::duration<long,std::ratio<1,1000>>>(end_time - begin_time);
cout << endl << endl << "IVFFlat(use quantization, CPU, nlist == 100, dismethod == L2):" <<endl
<< "when the nprobe = "<<loopi<<" ,"
<< "avg_time(10 times avg) for:" << endl
<< " -vec dimension d = " << d << endl
<< " -database vec numbers nb = " << nb <<endl
<< " -query vec batchings nq = " << nq << endl
<< " -KNN algorithm's k = " << k << endl
<< " is " << time_span.count() / 10 << " milliseconds" << endl;
}
// printf("I=\n");
// for(int i = nq - 5; i < nq; i++) {
// for(int j = 0; j < k; j++)
// printf("%5ld ", I[i * k + j]);
// printf("\n");
// }
delete [] I;
delete [] D;
}
delete [] xb;
delete [] xq;
return 0;
}