diff --git a/src/detect-engine-siggroup.c b/src/detect-engine-siggroup.c index bd867dc0b7..ff7fcb2d4a 100644 --- a/src/detect-engine-siggroup.c +++ b/src/detect-engine-siggroup.c @@ -185,14 +185,6 @@ void SigGroupHeadFree(SigGroupHead *sgh) PatternMatchDestroyGroup(sgh); -#if defined(__SSE3__) || defined(__tile__) - if (sgh->mask_array != NULL) { - /* mask is aligned */ - SCFreeAligned(sgh->mask_array); - sgh->mask_array = NULL; - } -#endif - if (sgh->head_array != NULL) { SCFree(sgh->head_array); sgh->head_array = NULL; @@ -1760,28 +1752,6 @@ int SigGroupHeadBuildHeadArray(DetectEngineCtx *de_ctx, SigGroupHead *sgh) return 0; BUG_ON(sgh->head_array != NULL); -#if defined(__SSE3__) || defined(__tile__) - BUG_ON(sgh->mask_array != NULL); - - /* mask array is 16 byte aligned for SIMD checking, also we always - * alloc a multiple of 32/64 bytes */ - int cnt = sgh->sig_cnt; -#if __WORDSIZE == 32 - if (cnt % 32 != 0) { - cnt += (32 - (cnt % 32)); - } -#elif __WORDSIZE == 64 - if (cnt % 64 != 0) { - cnt += (64 - (cnt % 64)); - } -#endif /* __WORDSIZE */ - - sgh->mask_array = (SignatureMask *)SCMallocAligned((cnt * sizeof(SignatureMask)), 16); - if (sgh->mask_array == NULL) - return -1; - - memset(sgh->mask_array, 0, (cnt * sizeof(SignatureMask))); -#endif sgh->head_array = SCMalloc(sgh->sig_cnt * sizeof(SignatureHeader)); if (sgh->head_array == NULL) @@ -1802,9 +1772,6 @@ int SigGroupHeadBuildHeadArray(DetectEngineCtx *de_ctx, SigGroupHead *sgh) sgh->head_array[idx].hdr_copy3 = s->hdr_copy3; sgh->head_array[idx].full_sig = s; -#if defined(__SSE3__) || defined(__tile__) - sgh->mask_array[idx] = s->mask; -#endif idx++; } diff --git a/src/detect-simd.c b/src/detect-simd.c index 0773bc964c..114ee589a1 100644 --- a/src/detect-simd.c +++ b/src/detect-simd.c @@ -32,216 +32,6 @@ /* Included into detect.c */ -#if defined(__SSE3__) - -/** - * \brief SIMD implementation of mask prefiltering. - * - * Mass mask matching is done creating a bitmap of signatures that need - * futher inspection. - * - * On 32 bit systems we inspect in 32 sig batches, creating a u32 with flags. - * On 64 bit systems we inspect in 64 sig batches, creating a u64 with flags. - * The size of a register is leading here. - */ -void SigMatchSignaturesBuildMatchArray(DetectEngineThreadCtx *det_ctx, - Packet *p, SignatureMask mask, AppProto alproto) -{ - uint32_t u; - SigIntId x; - int bitno = 0; -#if __WORDSIZE == 32 - register uint32_t bm; /* bit mask, 32 bits used */ - - Vector pm, sm, r1, r2; - /* load the packet mask into each byte of the vector */ - pm.v = _mm_set1_epi8(mask); - - /* reset previous run */ - det_ctx->match_array_cnt = 0; - - for (u = 0; u < det_ctx->sgh->sig_cnt; u += 32) { - /* load a batch of masks */ - sm.v = _mm_load_si128((const __m128i *)&det_ctx->sgh->mask_array[u]); - /* logical AND them with the packet's mask */ - r1.v = _mm_and_si128(pm.v, sm.v); - /* compare the result with the original mask */ - r2.v = _mm_cmpeq_epi8(sm.v, r1.v); - /* convert into a bitarray */ - bm = ((uint32_t) _mm_movemask_epi8(r2.v)); - - SCLogDebug("bm1 %08x", bm); - - /* load a batch of masks */ - sm.v = _mm_load_si128((const __m128i *)&det_ctx->sgh->mask_array[u+16]); - /* logical AND them with the packet's mask */ - r1.v = _mm_and_si128(pm.v, sm.v); - /* compare the result with the original mask */ - r2.v = _mm_cmpeq_epi8(sm.v, r1.v); - /* convert into a bitarray */ - bm |= ((uint32_t) _mm_movemask_epi8(r2.v) << 16); - - SCLogDebug("bm2 %08x", bm); - - if (bm == 0) { - continue; - } - - /* Check each bit in the bit map. Little endian is assumed (SSE is x86), - * so the bits are in memory backwards, 0 is on the right edge, - * 31 on the left edge. This is why above we store the output of the - * _mm_movemask_epi8 in this order as well */ - bitno = 0; - for (x = u; x < det_ctx->sgh->sig_cnt && bitno < 32; x++, bitno++) { - if (bm & (1 << bitno)) { - SignatureHeader *s = &det_ctx->sgh->head_array[x]; - - if (SigMatchSignaturesBuildMatchArrayAddSignature(det_ctx, p, s, alproto) == 1) { - /* okay, store it */ - det_ctx->match_array[det_ctx->match_array_cnt] = s->full_sig; - det_ctx->match_array_cnt++; - } - } - } - } -#elif __WORDSIZE == 64 - register uint64_t bm; /* bit mask, 64 bits used */ - - Vector pm, sm, r1, r2; - /* load the packet mask into each byte of the vector */ - pm.v = _mm_set1_epi8(mask); - - /* reset previous run */ - det_ctx->match_array_cnt = 0; - - for (u = 0; u < det_ctx->sgh->sig_cnt; u += 64) { - /* load a batch of masks */ - sm.v = _mm_load_si128((const __m128i *)&det_ctx->sgh->mask_array[u]); - /* logical AND them with the packet's mask */ - r1.v = _mm_and_si128(pm.v, sm.v); - /* compare the result with the original mask */ - r2.v = _mm_cmpeq_epi8(sm.v, r1.v); - /* convert into a bitarray */ - bm = ((uint64_t) _mm_movemask_epi8(r2.v)); - - SCLogDebug("bm1 %08"PRIx64, bm); - - /* load a batch of masks */ - sm.v = _mm_load_si128((const __m128i *)&det_ctx->sgh->mask_array[u+16]); - /* logical AND them with the packet's mask */ - r1.v = _mm_and_si128(pm.v, sm.v); - /* compare the result with the original mask */ - r2.v = _mm_cmpeq_epi8(sm.v, r1.v); - /* convert into a bitarray */ - bm |= ((uint64_t) _mm_movemask_epi8(r2.v)) << 16; - - /* load a batch of masks */ - sm.v = _mm_load_si128((const __m128i *)&det_ctx->sgh->mask_array[u+32]); - /* logical AND them with the packet's mask */ - r1.v = _mm_and_si128(pm.v, sm.v); - /* compare the result with the original mask */ - r2.v = _mm_cmpeq_epi8(sm.v, r1.v); - /* convert into a bitarray */ - bm |= ((uint64_t) _mm_movemask_epi8(r2.v)) << 32; - - /* load a batch of masks */ - sm.v = _mm_load_si128((const __m128i *)&det_ctx->sgh->mask_array[u+48]); - /* logical AND them with the packet's mask */ - r1.v = _mm_and_si128(pm.v, sm.v); - /* compare the result with the original mask */ - r2.v = _mm_cmpeq_epi8(sm.v, r1.v); - /* convert into a bitarray */ - bm |= ((uint64_t) _mm_movemask_epi8(r2.v)) << 48; - - SCLogDebug("bm2 %08"PRIx64, bm); - - if (bm == 0) { - continue; - } - - /* Check each bit in the bit map. Little endian is assumed (SSE is x86-64), - * so the bits are in memory backwards, 0 is on the right edge, - * 63 on the left edge. This is why above we store the output of the - * _mm_movemask_epi8 in this order as well */ - bitno = 0; - for (x = u; x < det_ctx->sgh->sig_cnt && bitno < 64; x++, bitno++) { - if (bm & ((uint64_t)1 << bitno)) { - SignatureHeader *s = &det_ctx->sgh->head_array[x]; - - if (SigMatchSignaturesBuildMatchArrayAddSignature(det_ctx, p, s, alproto) == 1) { - /* okay, store it */ - det_ctx->match_array[det_ctx->match_array_cnt] = s->full_sig; - det_ctx->match_array_cnt++; - } - } - } - } -#else -#error Wordsize (__WORDSIZE) neither 32 or 64. -#endif -} - /* end defined(__SSE3__) */ -#elif defined(__tile__) - -/** - * \brief SIMD implementation of mask prefiltering for TILE-Gx - * - * Mass mask matching is done creating a bitmap of signatures that need - * futher inspection. - */ -void SigMatchSignaturesBuildMatchArray(DetectEngineThreadCtx *det_ctx, - Packet *p, SignatureMask mask, AppProto alproto) -{ - uint32_t u; - register uint64_t bm; /* bit mask, 64 bits used */ - - /* Keep local copies of variables that don't change during this function. */ - uint64_t *mask_vector = (uint64_t*)det_ctx->sgh->mask_array; - uint32_t sig_cnt = det_ctx->sgh->sig_cnt; - SignatureHeader *head_array = det_ctx->sgh->head_array; - - Signature **match_array = det_ctx->match_array; - uint32_t match_count = 0; - - /* Replicate the packet mask into each byte of the vector. */ - uint64_t pm = __insn_shufflebytes(mask, 0, 0); - - /* u is the signature index. */ - for (u = 0; u < sig_cnt; u += 8) { - /* Load 8 masks */ - uint64_t sm = *mask_vector++; - /* Binary AND 8 masks with the packet's mask */ - uint64_t r1 = pm & sm; - /* Compare the result with the original mask - * Result if equal puts a 1 in LSB of bytes that match. - */ - bm = __insn_v1cmpeq(sm, r1); - - /* Check the LSB bit of each byte in the bit map. Little endian is assumed, - * so the LSB byte is index 0. Uses count trailing zeros to find least - * significant bit that is set. */ - while (bm) { - /* Find first bit set starting from LSB. */ - unsigned int first_bit = __insn_ctz(bm); - unsigned int first_byte = first_bit >> 3; - unsigned int x = u + first_byte; - if (x >= sig_cnt) - break; - SignatureHeader *s = &head_array[x]; - - /* Clear the first bit set, so it is not found again. */ - bm -= (1UL << first_bit); - - if (SigMatchSignaturesBuildMatchArrayAddSignature(det_ctx, p, s, alproto) == 1) { - /* okay, store it */ - *match_array++ = s->full_sig; - match_count++; - } - } - } - det_ctx->match_array_cnt = match_count; -} -#endif /* defined(__tile__) */ #ifdef UNITTESTS