@Spongcer
2015-03-13T12:32:08.000000Z
字数 3595
阅读 1724
Code
//在一个DWORD中找到第一个(或最后一个)为1的位
#ifdef USE_NEW_BITMAP
static INLINE Count32_t bm_find_one_in_dword(Count32_t dwWord, Bool8_t bDir)
{
Count32_t dwPos = 0, dwShift = 0;
if (0 == dwWord) return BM_NOT_FOUND;
if (bDir)
{
dwShift = ((0 == (dwWord & 0x0000FFFF)) << 4);
dwPos += dwShift;
dwWord >>= dwShift;
dwShift = ((0 == (dwWord & 0x00FF)) << 3);
dwPos += dwShift;
dwWord >>= dwShift;
dwShift = ((0 == (dwWord & 0x0F)) << 2);
dwPos += dwShift;
dwWord >>= dwShift;
dwShift = ((0 == (dwWord & 0x03)) << 1);
dwPos += dwShift;
dwWord >>= dwShift;
dwShift = (0 == (dwWord & 0x01));
dwPos += dwShift;
dwWord >>= dwShift;
}
else
{
dwShift = ((0 == (dwWord & 0xFFFF0000)) << 4);
dwPos += dwShift;
dwWord <<= dwShift;
dwShift = ((0 == (dwWord & 0xFF000000)) << 3);
dwPos += dwShift;
dwWord <<= dwShift;
dwShift = ((0 == (dwWord & 0xF0000000)) << 2);
dwPos += dwShift;
dwWord <<= dwShift;
dwShift = ((0 == (dwWord & 0xC0000000)) << 1);
dwPos += dwShift;
dwWord <<= dwShift;
dwShift = (0 == (dwWord & 0x80000000));
dwPos += dwShift;
dwWord <<= dwShift;
dwPos += ((dwWord & 0x80000000) != 0);
dwPos = 32 - dwPos;
}
return dwPos;
}
#else
static Count32_t bm_find_one_in_dword (Count32_t dwWord, Bool8_t bDir )
{
Count32_t i=0;
Count32_t dwWord2;
if (0 == dwWord) return BM_NOT_FOUND;
if (bDir) {
while(1) {
dwWord2 = ((dwWord >> 1) << 1);
if (dwWord == dwWord2) {
dwWord >>= 1;
i++;
continue;
}
break;
}
} else {
while(dwWord>1) {
dwWord >>= 1;
i++;
}
}
return i;
}
#endif
//采用4路SIMD进行内存清空
#ifdef USE_SIMD_OPT
#define VectorBATZeroNullBitmap(pvbVector) \
{ \
Count32_t i = 0, j = 0, dwBlock = 0, dwSize = VECBAT_BITMAP_SIZE; \
Count32P_t pdwNullBitmap = (pvbVector)->dwNullBitmap; \
__m128i __mZero = _mm_setzero_si128(); \
__m128i *__m_aNullBitmap = (__m128i *)pdwNullBitmap; \
\
dwBlock = dwSize >> 4; \
\
for (j = 0; j < dwBlock; j++) \
{ \
_mm_storeu_si128(__m_aNullBitmap + i, __mZero); \
_mm_storeu_si128(__m_aNullBitmap + i + 1, __mZero); \
_mm_storeu_si128(__m_aNullBitmap + i + 2, __mZero); \
_mm_storeu_si128(__m_aNullBitmap + i + 3, __mZero); \
i += 4; \
} \
\
i <<= 2; \
\
for (; i < dwSize; i++) \
{ \
pdwNullBitmap[i] = 0; \
} \
}
#else
#define VectorBATZeroNullBitmap(pvbVector) MEMSET((pvbVector)->dwNullBitmap, 0, VECBAT_BITMAP_SIZE * sizeof(Count32_t))
#endif
//采用8路SIMD进行浮点数求和运算
static INLINE Datum
X_sum_vec_Double_t(X_AGG_VEC_ARGS)
{
Count_t i = 0;
Double_t dtdst = 0;
if (!bExistNull)
{
#if (defined(OSC_64BIT_ARCH) && defined(USE_SIMD_OPT))
__m128d __mSum0 = _mm_setzero_pd();
__m128d __mSum1 = _mm_setzero_pd();
__m128d __mSum2 = _mm_setzero_pd();
__m128d __mSum3 = _mm_setzero_pd();
__m128d __mSum4 = _mm_setzero_pd();
__m128d __mSum5 = _mm_setzero_pd();
__m128d __mSum6 = _mm_setzero_pd();
__m128d __mSum7 = _mm_setzero_pd();
__m128d __mLoad0, __mLoad1, __mLoad2, __mLoad3;
__m128d __mLoad4, __mLoad5, __mLoad6, __mLoad7;
Double_t *pDatum = (Double_t *)dtlft;
Count_t dwBlock = (dwCount >> 4);
Count_t j = 0;
for (j = 0; j < dwBlock; j++)
{
__mLoad0 = _mm_loadu_pd(pDatum + i);
__mLoad1 = _mm_loadu_pd(pDatum + i + 2);
__mLoad2 = _mm_loadu_pd(pDatum + i + 4);
__mLoad3 = _mm_loadu_pd(pDatum + i + 6);
__mLoad4 = _mm_loadu_pd(pDatum + i + 8);
__mLoad5 = _mm_loadu_pd(pDatum + i + 10);
__mLoad6 = _mm_loadu_pd(pDatum + i + 12);
__mLoad7 = _mm_loadu_pd(pDatum + i + 14);
__mSum0 = _mm_add_pd(__mSum0, __mLoad0);
__mSum1 = _mm_add_pd(__mSum1, __mLoad1);
__mSum2 = _mm_add_pd(__mSum2, __mLoad2);
__mSum3 = _mm_add_pd(__mSum3, __mLoad3);
__mSum4 = _mm_add_pd(__mSum4, __mLoad4);
__mSum5 = _mm_add_pd(__mSum5, __mLoad5);
__mSum6 = _mm_add_pd(__mSum6, __mLoad6);
__mSum7 = _mm_add_pd(__mSum7, __mLoad7);
i += 16;
}
__mSum0 = _mm_add_pd(__mSum0, __mSum1);
__mSum2 = _mm_add_pd(__mSum2, __mSum3);
__mSum4 = _mm_add_pd(__mSum4, __mSum5);
__mSum6 = _mm_add_pd(__mSum6, __mSum7);
__mSum0 = _mm_add_pd(__mSum0, __mSum2);
__mSum4 = _mm_add_pd(__mSum4, __mSum6);
__mSum0 = _mm_add_pd(__mSum0, __mSum4);
pDatum = (Double_t *)(&__mSum0);
dtdst = pDatum[0] + pDatum[1];
#endif
for(; i < dwCount; i++)
{
dtdst += (Double_t)DatumGetDouble_t(dtlft[i]);
}
}
else
{
for(i = 0; i < dwCount; i++)
{
if (BMTestZero_HY(dwNull, i))
{
dtdst += (Double_t)DatumGetDouble_t(dtlft[i]);
}
}
}
return Double_tGetDatum(dtdst);
}