[关闭]
@CrazyHenry 2018-04-23T19:30:08.000000Z 字数 3067 阅读 1783

3-IndexIVFPQ(k-means,有PQ)

hhhhfaiss


接口

  1. int nlist = 100;
  2. int k = 4;
  3. int m = 8; // bytes per vector
  4. faiss::IndexFlatL2 quantizer(d); // the other index
  5. faiss::IndexIVFPQ index(&quantizer, d, nlist, m, 8);//what is 8? faiss::METRIC_L2?
  6. // here we specify METRIC_L2, by default it performs inner-product search
  7. index.train(nb, xb);
  8. index.add(nb, xb);
  9. index.nprobe = 10;
  10. index.search(nq, xq, k, D, I);//default时,nprobe = 1

原理

  1. The vectors are still stored in Voronoi cells, but their size is reduced to a configurable number of bytes m (d must be a multiple of m).(PQ编码,是将yi全部这样编码,还是k-means保留原始结构,PQ再进一步编码。即q1q2具体的计算是怎样的???)
  2. In this case, since the vectors are not stored exactly, the distances that are returned by the search method are also approximations.(近似的计算)
  3. Faiss offers variants that compress the stored vectors with a lossy compression based on product quantizers. The vectors are still stored in Voronoi cells, but their size is reduced to a configurable number of bytes m (d must be a multiple of m).(感觉像是所有yi都进行PQ编码)

代码

  1. /*
  2. * Author: yingmin.li
  3. *
  4. * Date: 2018/4/23
  5. *
  6. * Detail: add a timespan part for demo3
  7. *
  8. */
  9. #include <iostream>
  10. #include <string>
  11. #include <vector>
  12. #include <algorithm>
  13. #include <cstdio>
  14. #include <cstdlib>
  15. #include <chrono>
  16. #include <faiss/IndexFlat.h>
  17. #include <faiss/IndexIVFPQ.h>
  18. //using std::
  19. using std::endl;
  20. using std::cout;
  21. //note: header file do not use using
  22. int main() {
  23. int d = 256; // dimension
  24. int nb = 100000; // database size
  25. int nq = 1; // nb of queries
  26. float *xb = new float[d * nb];
  27. float *xq = new float[d * nq];
  28. for(int i = 0; i < nb; i++) {
  29. for(int j = 0; j < d; j++)
  30. xb[d * i + j] = drand48();
  31. xb[d * i] += i / 1000.;
  32. }
  33. for(int i = 0; i < nq; i++) {
  34. for(int j = 0; j < d; j++)
  35. xq[d * i + j] = drand48();
  36. xq[d * i] += i / 1000.;
  37. }
  38. int nlist = 100;
  39. int k = 4;
  40. int m = 8; // bytes per vector
  41. faiss::IndexFlatL2 quantizer(d); // the other index
  42. faiss::IndexIVFPQ index(&quantizer, d, nlist, m, 8);//what is 8? faiss::METRIC_L2?
  43. // here we specify METRIC_L2, by default it performs inner-product search
  44. index.train(nb, xb);
  45. index.add(nb, xb);
  46. // { // sanity check
  47. // long *I = new long[k * 5];
  48. // float *D = new float[k * 5];
  49. // index.search(5, xb, k, D, I);
  50. // printf("I=\n");
  51. // for(int i = 0; i < 5; i++) {
  52. // for(int j = 0; j < k; j++)
  53. // printf("%5ld ", I[i * k + j]);
  54. // printf("\n");
  55. // }
  56. // printf("D=\n");
  57. // for(int i = 0; i < 5; i++) {
  58. // for(int j = 0; j < k; j++)
  59. // printf("%7g ", D[i * k + j]);
  60. // printf("\n");
  61. // }
  62. // delete [] I;
  63. // delete [] D;
  64. // }
  65. { // search xq
  66. long *I = new long[k * nq];
  67. float *D = new float[k * nq];
  68. for(int loopi = 1; loopi <= 40; loopi += 10)
  69. {
  70. index.nprobe = loopi;
  71. auto begin_time = std::chrono::steady_clock::now();
  72. for(int i = 0; i < 10; ++i)
  73. {
  74. index.search(nq, xq, k, D, I);//default时,nprobe = 1
  75. }
  76. auto end_time = std::chrono::steady_clock::now();
  77. auto time_span = std::chrono::duration_cast<std::chrono::duration<long,std::ratio<1,1000>>>(end_time - begin_time);
  78. cout << endl << endl << "IVFFlat(use quantization, CPU, nlist == 100, dismethod == L2):" <<endl
  79. << "when the nprobe = "<<loopi<<" ,"
  80. << "avg_time(10 times avg) for:" << endl
  81. << " -vec dimension d = " << d << endl
  82. << " -database vec numbers nb = " << nb <<endl
  83. << " -query vec batchings nq = " << nq << endl
  84. << " -KNN algorithm's k = " << k << endl
  85. << " is " << time_span.count() / 10 << " milliseconds" << endl;
  86. }
  87. // printf("I=\n");
  88. // for(int i = nq - 5; i < nq; i++) {
  89. // for(int j = 0; j < k; j++)
  90. // printf("%5ld ", I[i * k + j]);
  91. // printf("\n");
  92. // }
  93. delete [] I;
  94. delete [] D;
  95. }
  96. delete [] xb;
  97. delete [] xq;
  98. return 0;
  99. }
添加新批注
在作者公开此批注前,只有你和作者可见。
回复批注