@snuffles 2015-10-20T07:56:13.000000Z 字数 8385 阅读 2179

整像素运动估计代码分析

大作业

作业三
1. 找出ITM软件中整像素运动估计模块代码，并对搜索过程做简要分析
2. 分析openCV全局运动代码，描述其过程

reference
UMHexagonS算法代码有自己的注释

整像素运动估计的函数的定义在fastme.c中
其中fastme.c包括整像素估计和分像素估计算法（Fast integer pel motion estimation and fractional pel motion estimation）。主要包括
1. get_mem_FME() and free_mem_FME() are functions for allocation and release
  of memories about motion estimation
2. FME_BlockMotionSearch() is the function for fast integer pel motion
  estimation and fractional pel motion estimation
3. DefineThreshold() defined thresholds for early termination
其中整像素估计的算法是UMHexagonS算法。分为4个步骤
1. 步骤一：起始搜索点的预测，五种预测模式求预测运动矢量
（1）中值预测:利用空间相关性，取已求出的、当前帧的左、上、右上邻块的运动矢量的中间值；
（2）原点预测：取原点处；
（3）上层预测：采用从模式1（16*16）到模式7（4*4）的分级搜索顺序，将已求出的当前位置的上一级模式运动向量的作为当前模式的运动向量的预测值
（4）对应块预测：将前一帧相应位置的运动向量作为当前块的运动向量的预测值;
（5）相邻参考帧预测：利用时间相关性，将前面参考帧的运动向量按时间进行缩放。
2. 步骤二：不甚满意区块搜索（对应代码里面first_step）
（1）以最佳起始搜索点为中心，用非对称十字型搜索模板进行搜索，获得当前最佳点，判断此处是否属于满意或者很满意区，跳到相应的步骤三或步骤四，或继续搜索；
（2）以当前最佳点为中心，在[-2,2]的方形区域（5*5）中进行全搜索，获得当前最佳点，判断是否属于满意或很满意区，跳到相应的步骤三或步骤四，或继续搜索；
（3）用大六边形模板（16点）进行搜索，直至搜索到能符合相应阈值而进入步骤三或步骤四的点为止，否则也结束步骤二的搜索而进入步骤三。
3. 步骤三：满意区的块搜索（对应代码里面sec_step）
以当前最佳点为中心进行六边形搜索，直至最佳点为六边形中心。
4. 步骤四：很满意区的搜索（对应代码里面third_step）
以当前最佳点为中心进行菱形搜索，直至最佳点为菱形中心。
代码如下

/*
*************************************************************************
* Function: FastIntegerPelBlockMotionSearch: fast pixel block motion search 
this algrithm is called UMHexagonS which includes 
four steps with different kinds of search patterns
* Input:
pel_t**   orig_pic,     // <--  original picture
int       ref,          // <--  reference frame (0... or -1 (backward))
int       pic_pix_x,    // <--  absolute x-coordinate of regarded AxB block
int       pic_pix_y,    // <--  absolute y-coordinate of regarded AxB block
int       blocktype,    // <--  block type (1-16x16 ... 7-4x4)
int       pred_mv_x,    // <--  motion vector predictor (x) in sub-pel units
int       pred_mv_y,    // <--  motion vector predictor (y) in sub-pel units
int*      mv_x,         //  --> motion vector (x) - in pel units
int*      mv_y,         //  --> motion vector (y) - in pel units
int       search_range, // <--  1-d search range in pel units                         
int       min_mcost,    // <--  minimum motion cost (cost for center or huge value)
double    lambda        // <--  lagrangian parameter for determining motion cost
* Output:
* Return: 
* Attention: in this function, three macro definitions is gives,
SEARCH_ONE_PIXEL: search one pixel in search range
SEARCH_ONE_PIXEL1(value_iAbort): search one pixel in search range,
but give a parameter to show if mincost refeshed
*************************************************************************

int                                     //  ==> minimum motion cost after search
FastIntegerPelBlockMotionSearch  (pel_t**   orig_pic,     // <--  not used
                                  int       ref,          // <--  reference frame (0... or -1 (backward))
                                  int       pic_pix_x,    // <--  absolute x-coordinate of regarded AxB block
                                  int       pic_pix_y,    // <--  absolute y-coordinate of regarded AxB block
                                  int       blocktype,    // <--  block type (1-16x16 ... 7-4x4)
                                  int       pred_mv_x,    // <--  motion vector predictor (x) in sub-pel units
                                  int       pred_mv_y,    // <--  motion vector predictor (y) in sub-pel units
                                  int*      mv_x,         //  --> motion vector (x) - in pel units
                                  int*      mv_y,         //  --> motion vector (y) - in pel units
                                  int       search_range, // <--  1-d search range in pel units                         
                                  int       min_mcost,    // <--  minimum motion cost (cost for center or huge value)
                                  double    lambda)       // <--  lagrangian parameter for determining motion cost
{
  static int Diamond_x[4] = {-1, 0, 1, 0};
  static int Diamond_y[4] = {0, 1, 0, -1};
  static int Hexagon_x[6] = {2, 1, -1, -2, -1, 1};
  static int Hexagon_y[6] = {0, -2, -2, 0,  2, 2};
  static int Big_Hexagon_x[16] = {0,-2, -4,-4,-4, -4, -4, -2,  0,  2,  4,  4, 4, 4, 4, 2};
  static int Big_Hexagon_y[16] = {4, 3, 2,  1, 0, -1, -2, -3, -4, -3, -2, -1, 0, 1, 2, 3};
  int   pos, cand_x, cand_y,  mcost;
  pel_t *(*get_ref_line)(int, pel_t*, int, int);
  pel_t*  ref_pic       = img->type==B_IMG? Refbuf11 [ref+1] : Refbuf11[ref];
  int   best_pos      = 0;                                        // position with minimum motion cost
  int   max_pos       = (2*search_range+1)*(2*search_range+1);    // number of search positions
  int   lambda_factor = LAMBDA_FACTOR (lambda);                   // factor for determining lagragian motion cost
  int   mvshift       = 2;                  // motion vector shift for getting sub-pel units
  int   blocksize_y   = input->blc_size[blocktype][1];            // vertical block size
  int   blocksize_x   = input->blc_size[blocktype][0];            // horizontal block size
  int   blocksize_x4  = blocksize_x >> 2;                         // horizontal block size in 4-pel units
  int   pred_x        = (pic_pix_x << mvshift) + pred_mv_x;       // predicted position x (in sub-pel units)
  int   pred_y        = (pic_pix_y << mvshift) + pred_mv_y;       // predicted position y (in sub-pel units)
  int   center_x      = pic_pix_x + *mv_x;                        // center position x (in pel units)
  int   center_y      = pic_pix_y + *mv_y;                        // center position y (in pel units)
  int    best_x, best_y;
  int   check_for_00  = (blocktype==1 && !input->rdopt && img->type!=B_IMG && ref==0);
  int   search_step,iYMinNow, iXMinNow;
  int   i,m, iSADLayer; 
  int   iAbort;
  float betaSec,betaThird;
  int   height        = img->height;
  //===== set function for getting reference picture lines =====
  if ((center_x > search_range) && (center_x < img->width -1-search_range-blocksize_x) &&
    (center_y > search_range) && (center_y < height-1-search_range-blocksize_y)   )
  {
    get_ref_line = FastLineX;
  }
  else
  {
    get_ref_line = UMVLineX;
  }
  memset(McostState[0],0,(2*search_range+1)*(2*search_range+1)*4);
  ///////////////////////////////////////////////////////////////   
#ifdef MRF_HYU
#ifdef MRF_RAND_P
  if(img->type == B_IMG && ref > 0)   
#else
  if((input->low_delay && img->type != INTER_IMG && ref>0)||(input->low_delay == 0 && ref > 0))   
#endif
#else
  if(ref>0) 
#endif 
  {
    if(pred_SAD_ref!=0)
    {
      betaSec = Bsize[blocktype]/(pred_SAD_ref*pred_SAD_ref)-AlphaSec[blocktype];
      betaThird = Bsize[blocktype]/(pred_SAD_ref*pred_SAD_ref)-AlphaThird[blocktype];
    }
    else
    {
      betaSec = 0;
      betaThird = 0;
    }
  }
  else 
  {
    if(blocktype==1)
    {
      if(pred_SAD_space !=0)
      {
        betaSec = Bsize[blocktype]/(pred_SAD_space*pred_SAD_space)-AlphaSec[blocktype];
        betaThird = Bsize[blocktype]/(pred_SAD_space*pred_SAD_space)-AlphaThird[blocktype];
      }
      else
      {
        betaSec = 0;
        betaThird = 0;
      }
    }
    else
    {
      if(pred_SAD_uplayer !=0)
      {
        betaSec = Bsize[blocktype]/(pred_SAD_uplayer*pred_SAD_uplayer)-AlphaSec[blocktype];
        betaThird = Bsize[blocktype]/(pred_SAD_uplayer*pred_SAD_uplayer)-AlphaThird[blocktype];
      }
      else
      {
        betaSec = 0;
        betaThird = 0;
      }
    }
  }
  /*****************************/
  ////////////search around the predictor and (0,0)
  //check the center median predictor
  cand_x = center_x ;
  cand_y = center_y ;
  mcost = MV_COST (lambda_factor, mvshift, cand_x, cand_y, pred_x, pred_y);
  mcost = PartCalMad(ref_pic, orig_pic, get_ref_line,blocksize_y,blocksize_x,blocksize_x4,mcost,min_mcost,cand_x,cand_y);
  McostState[search_range][search_range] = mcost;
  if (mcost < min_mcost)
  {
    min_mcost = mcost;
    best_x = cand_x;
    best_y = cand_y;
  }
  iXMinNow = best_x;
  iYMinNow = best_y;
  for (m = 0; m < 4; m++)
  {     
    cand_x = iXMinNow + Diamond_x[m];
    cand_y = iYMinNow + Diamond_y[m];   
    SEARCH_ONE_PIXEL
  } 
  if(center_x != pic_pix_x || center_y != pic_pix_y)
  {
    cand_x = pic_pix_x ;
    cand_y = pic_pix_y ;
    SEARCH_ONE_PIXEL
      iXMinNow = best_x;
    iYMinNow = best_y;
    for (m = 0; m < 4; m++)
    {       
      cand_x = iXMinNow + Diamond_x[m];
      cand_y = iYMinNow + Diamond_y[m];   
      SEARCH_ONE_PIXEL
    } 
  }
  if(blocktype>1)
  {
    cand_x = pic_pix_x + (pred_MV_uplayer[0]/4);
    cand_y = pic_pix_y + (pred_MV_uplayer[1]/4);
    SEARCH_ONE_PIXEL
      if ((min_mcost-pred_SAD_uplayer)<pred_SAD_uplayer*betaThird)
        goto third_step;
      else if((min_mcost-pred_SAD_uplayer)<pred_SAD_uplayer*betaSec)
        goto sec_step;
  } 
  //coordinate position prediction
  if ((img->number > 1 + ref && ref!=-1) || (ref == -1 && Bframe_ctr > 1))
  {
    cand_x = pic_pix_x + pred_MV_time[0]/4;
    cand_y = pic_pix_y + pred_MV_time[1]/4;
    SEARCH_ONE_PIXEL
  }
  //prediciton using mV of last ref moiton vector
  if ((ref > 0) || (img->type == B_IMG && ref == 0))
  {
    cand_x = pic_pix_x + pred_MV_ref[0]/4;
    cand_y = pic_pix_y + pred_MV_ref[1]/4;
    SEARCH_ONE_PIXEL
  }
  //strengthen local search
  iXMinNow = best_x;
  iYMinNow = best_y;
  for (m = 0; m < 4; m++)
  {     
    cand_x = iXMinNow + Diamond_x[m];
    cand_y = iYMinNow + Diamond_y[m];   
    SEARCH_ONE_PIXEL
  } 
  EARLY_TERMINATION
    if(blocktype>6)
      goto sec_step;
    else
      goto first_step;
first_step: //Unsymmetrical-cross search 
  iXMinNow = best_x;
  iYMinNow = best_y;
  for(i=1;i<=search_range/2;i++)
  {
    search_step = 2*i - 1;
    cand_x = iXMinNow + search_step;
    cand_y = iYMinNow ;
    SEARCH_ONE_PIXEL        
      cand_x = iXMinNow - search_step;
    cand_y = iYMinNow ;
    SEARCH_ONE_PIXEL
  }
  for(i=1;i<=search_range/4;i++)
  {
    search_step = 2*i - 1;
    cand_x = iXMinNow ;
    cand_y = iYMinNow + search_step;
    SEARCH_ONE_PIXEL
      cand_x = iXMinNow ;
    cand_y = iYMinNow - search_step;
    SEARCH_ONE_PIXEL
  }
  EARLY_TERMINATION
    iXMinNow = best_x;
  iYMinNow = best_y;
  // Uneven Multi-Hexagon-grid Search   
  for(pos=1;pos<25;pos++)
  {
    cand_x = iXMinNow + spiral_search_x[pos];
    cand_y = iYMinNow + spiral_search_y[pos];
    SEARCH_ONE_PIXEL
  }
  EARLY_TERMINATION
    for(i=1;i<=search_range/4; i++)
    {
      iAbort = 0;   
      for (m = 0; m < 16; m++)
      {
        cand_x = iXMinNow + Big_Hexagon_x[m]*i;
        cand_y = iYMinNow + Big_Hexagon_y[m]*i; 
        SEARCH_ONE_PIXEL1(1)
      }
      if (iAbort)
      { 
        EARLY_TERMINATION
      }
    }
sec_step:  //Extended Hexagon-based Search
    iXMinNow = best_x;
    iYMinNow = best_y;
    for(i=0;i<search_range;i++) 
    {
      iAbort = 1;   
      for (m = 0; m < 6; m++)
      {     
        cand_x = iXMinNow + Hexagon_x[m];
        cand_y = iYMinNow + Hexagon_y[m];   
        SEARCH_ONE_PIXEL1(0)
      } 
      if(iAbort)
        break;
      iXMinNow = best_x;
      iYMinNow = best_y;
    }
third_step: // the third step with a small search pattern
    iXMinNow = best_x;
    iYMinNow = best_y;
    for(i=0;i<search_range;i++) 
    {
      iSADLayer = 65536;
      iAbort = 1;   
      for (m = 0; m < 4; m++)
      {     
        cand_x = iXMinNow + Diamond_x[m];
        cand_y = iYMinNow + Diamond_y[m];   
        SEARCH_ONE_PIXEL1(0)
      } 
      if(iAbort)
        break;
      iXMinNow = best_x;
      iYMinNow = best_y;
    }
    *mv_x = best_x - pic_pix_x;
    *mv_y = best_y - pic_pix_y; 
    return min_mcost;
}

整像素运动估计代码分析

内容目录