[关闭]
@CrazyHenry 2018-04-21T18:40:12.000000Z 字数 3065 阅读 1384

2-IVFFlat.cpp

hhhhfaiss


接口:

  1. nlist, the number of Voronoi cells
  2. nprobe, the number of cells (out of nlist)
  3. index.train(xb)
  4. The search time roughly increases linearly with the number of probes plus some constant due to the quantization.(nprobe越大,搜索时间线性增加)
  5. The nprobe parameter is always a way of adjusting the tradeoff between speed and accuracy of the result. Setting nprobe = nlist gives the same result as the brute-force search (but slower).
  6. nprobe参数越大,结果越精确(同时也越慢),当nlist == nprobe时,结果与暴力搜索一样,而且更慢)
  7. 那么nlist怎么确定呢?

原理:

  1. At search time, only the database vectors y contained in the cell the query x falls in and a few neighboring ones are compared against the query vector.(只需要把和query x落到同一个Voronoi celldatabase y以及这个y周围的一些y query x做比较)
  1. The IndexIVFFlat also requires another index, the quantizer, that assigns vectors to Voronoi cells(把原始vector assignVoronoi cells). Each Voronoicell is defined by a centroid(质心), and finding the Voronoi cell a vector falls in(找到被原始vectorfall in Voronoi cell consists in finding the nearest neighbor of the vector in the set of centroids(相当于在set of centroids中做IndexFlatL2KNN). This is the task of the other index, which is typically an IndexFlatL2.

代码适配

  1. /*
  2. * Author: yingmin.li
  3. *
  4. * Date: 2018/4/21
  5. *
  6. * Detail: add a timespan part for demo2
  7. *
  8. */
  9. #include <iostream>
  10. #include <string>
  11. #include <vector>
  12. #include <algorithm>
  13. #include <cstdio>
  14. #include <cstdlib>
  15. #include <chrono>
  16. #include <faiss/IndexFlat.h>
  17. #include <faiss/IndexIVFFlat.h>
  18. //using std::
  19. using std::endl;
  20. using std::cout;
  21. //note: header file do not use using
  22. int main() {
  23. int d = 256; // dimension
  24. int nb = 100000; // database size
  25. int nq = 10000; // nb of queries
  26. float *xb = new float[d * nb];
  27. float *xq = new float[d * nq];
  28. for(int i = 0; i < nb; i++) {
  29. for(int j = 0; j < d; j++)
  30. xb[d * i + j] = drand48();
  31. xb[d * i] += i / 1000.;
  32. }
  33. for(int i = 0; i < nq; i++) {
  34. for(int j = 0; j < d; j++)
  35. xq[d * i + j] = drand48();
  36. xq[d * i] += i / 1000.;
  37. }
  38. int nlist = 100;
  39. int k = 4;
  40. faiss::IndexFlatL2 quantizer(d); // the other index
  41. faiss::IndexIVFFlat index(&quantizer, d, nlist, faiss::METRIC_L2);
  42. // here we specify METRIC_L2, by default it performs inner-product search,内积cosin
  43. assert(!index.is_trained);
  44. index.train(nb, xb);
  45. assert(index.is_trained);
  46. index.add(nb, xb);
  47. { // search xq
  48. long *I = new long[k * nq];
  49. float *D = new float[k * nq];
  50. for(int loopi = 1; loopi <= 40; loopi += 10)
  51. {
  52. index.nprobe = loopi;
  53. auto begin_time = std::chrono::steady_clock::now();
  54. for(int i = 0; i < 10; ++i)
  55. {
  56. index.search(nq, xq, k, D, I);//default时,nprobe = 1
  57. }
  58. auto end_time = std::chrono::steady_clock::now();
  59. auto time_span = std::chrono::duration_cast<std::chrono::duration<long,std::ratio<1,1000>>>(end_time - begin_time);
  60. cout << endl << endl << "IVFFlat(use quantization, CPU, nlist == 100):" <<endl
  61. << "when the nprobe = "<<loopi<<" ,"
  62. << "avg_time(10 times avg) for:" << endl
  63. << " -vec dimension d = " << d << endl
  64. << " -database vec numbers nb = " << nb <<endl
  65. << " -query vec batchings nq = " << nq << endl
  66. << " -KNN algorithm's k = " << k << endl
  67. << " is " << time_span.count() / 10 << " milliseconds" << endl;
  68. }
  69. // printf("I=\n");
  70. // for(int i = nq - 5; i < nq; i++) {
  71. // for(int j = 0; j < k; j++)
  72. // printf("%5ld ", I[i * k + j]);
  73. // printf("\n");
  74. // }
  75. // index.nprobe = 10;
  76. // index.search(nq, xq, k, D, I);
  77. // printf("I=\n");
  78. // for(int i = nq - 5; i < nq; i++) {
  79. // for(int j = 0; j < k; j++)
  80. // printf("%5ld ", I[i * k + j]);
  81. // printf("\n");
  82. // }
  83. delete [] I;
  84. delete [] D;
  85. }
  86. delete [] xb;
  87. delete [] xq;
  88. return 0;
  89. }

image_1cbjrl6c412h2588nrh1rimjd39.png-49.5kB

image_1cbjrm0tmjco19cli01lqpf36m.png-79.3kB

image_1cbjrmn0d12qt13hku5f1n2g1sg013.png-34.9kB

添加新批注
在作者公开此批注前,只有你和作者可见。
回复批注