Use Tilera SIMD for Signature matching ala SSE3

Makes use of 8-wide byte compare instructions in signature matching.

For allocating aligned memory, _mm_malloc() is SSE only, so added
check for __tile__ to use memalign() instead.

Shows a 13% speed up.
pull/514/merge
Ken Steele 13 years ago committed by Victor Julien
parent 22225a7e99
commit 784843b146

@ -1,4 +1,4 @@
/* Copyright (C) 2007-2010 Open Information Security Foundation
/* Copyright (C) 2007-2013 Open Information Security Foundation
*
* You can copy, redistribute or modify this Program under the terms of
* the GNU General Public License version 2 as published by the Free
@ -175,7 +175,7 @@ void SigGroupHeadFree(SigGroupHead *sgh)
PatternMatchDestroyGroup(sgh);
#if defined(__SSE3__)
#if defined(__SSE3__) || defined(__tile__)
if (sgh->mask_array != NULL) {
/* mask is aligned */
SCFreeAligned(sgh->mask_array);
@ -1690,7 +1690,7 @@ int SigGroupHeadBuildHeadArray(DetectEngineCtx *de_ctx, SigGroupHead *sgh)
return 0;
BUG_ON(sgh->head_array != NULL);
#if defined(__SSE3__)
#if defined(__SSE3__) || defined(__tile__)
BUG_ON(sgh->mask_array != NULL);
/* mask array is 16 byte aligned for SIMD checking, also we always
@ -1706,7 +1706,7 @@ int SigGroupHeadBuildHeadArray(DetectEngineCtx *de_ctx, SigGroupHead *sgh)
}
#endif /* __WORDSIZE */
sgh->mask_array = SCMallocAligned((cnt * sizeof(SignatureMask)), 16);
sgh->mask_array = (SignatureMask *)SCMallocAligned((cnt * sizeof(SignatureMask)), 16);
if (sgh->mask_array == NULL)
return -1;
@ -1732,7 +1732,7 @@ int SigGroupHeadBuildHeadArray(DetectEngineCtx *de_ctx, SigGroupHead *sgh)
sgh->head_array[idx].hdr_copy3 = s->hdr_copy3;
sgh->head_array[idx].full_sig = s;
#if defined(__SSE3__)
#if defined(__SSE3__) || defined(__tile__)
sgh->mask_array[idx] = s->mask;
#endif
idx++;

@ -1,4 +1,4 @@
/* Copyright (C) 2007-2011 Open Information Security Foundation
/* Copyright (C) 2007-2013 Open Information Security Foundation
*
* You can copy, redistribute or modify this Program under the terms of
* the GNU General Public License version 2 as published by the Free
@ -576,7 +576,7 @@ static inline int SigMatchSignaturesBuildMatchArrayAddSignature(DetectEngineThre
* On 64 bit systems we inspect in 64 sig batches, creating a u64 with flags.
* The size of a register is leading here.
*/
static inline void SigMatchSignaturesBuildMatchArraySIMD(DetectEngineThreadCtx *det_ctx,
static inline void SigMatchSignaturesBuildMatchArray(DetectEngineThreadCtx *det_ctx,
Packet *p, SignatureMask mask, uint16_t alproto)
{
uint32_t u;
@ -712,28 +712,70 @@ static inline void SigMatchSignaturesBuildMatchArraySIMD(DetectEngineThreadCtx *
#error Wordsize (__WORDSIZE) neither 32 or 64.
#endif
}
#endif /* defined(__SSE3__) */
/* end defined(__SSE3__) */
#elif defined(__tile__)
static inline void SigMatchSignaturesBuildMatchArrayNoSIMD(DetectEngineThreadCtx *det_ctx,
/**
* \brief SIMD implementation of mask prefiltering for TILE-Gx
*
* Mass mask matching is done creating a bitmap of signatures that need
* futher inspection.
*/
static inline void SigMatchSignaturesBuildMatchArray(DetectEngineThreadCtx *det_ctx,
Packet *p, SignatureMask mask, uint16_t alproto)
{
uint32_t u;
register uint64_t bm; /* bit mask, 64 bits used */
/* reset previous run */
det_ctx->match_array_cnt = 0;
/* Keep local copies of variables that don't change during this function. */
uint64_t *mask_vector = (uint64_t*)det_ctx->sgh->mask_array;
uint32_t sig_cnt = det_ctx->sgh->sig_cnt;
SignatureHeader *head_array = det_ctx->sgh->head_array;
Signature **match_array = det_ctx->match_array;
uint32_t match_count = 0;
/* Replicate the packet mask into each byte of the vector. */
uint64_t pm = __insn_shufflebytes(mask, 0, 0);
/* u is the signature index. */
for (u = 0; u < sig_cnt; u += 8) {
/* Load 8 masks */
uint64_t sm = *mask_vector++;
/* Binary AND 8 masks with the packet's mask */
uint64_t r1 = pm & sm;
/* Compare the result with the original mask
* Result if equal puts a 1 in LSB of bytes that match.
*/
bm = __insn_v1cmpeq(sm, r1);
/* Check the LSB bit of each byte in the bit map. Little endian is assumed,
* so the LSB byte is index 0. Uses count trailing zeros to find least
* significant bit that is set. */
while (bm) {
/* Find first bit set starting from LSB. */
unsigned int first_bit = __insn_ctz(bm);
unsigned int first_byte = first_bit >> 3;
unsigned int x = u + first_byte;
if (x >= sig_cnt)
break;
SignatureHeader *s = &head_array[x];
/* Clear the first bit set, so it is not found again. */
bm -= (1UL << first_bit);
for (u = 0; u < det_ctx->sgh->sig_cnt; u++) {
SignatureHeader *s = &det_ctx->sgh->head_array[u];
if ((mask & s->mask) == s->mask) {
if (SigMatchSignaturesBuildMatchArrayAddSignature(det_ctx, p, s, alproto) == 1) {
/* okay, store it */
det_ctx->match_array[det_ctx->match_array_cnt] = s->full_sig;
det_ctx->match_array_cnt++;
*match_array++ = s->full_sig;
match_count++;
}
}
}
det_ctx->match_array_cnt = match_count;
}
/* end defined(__tile__) */
#else
/* No SIMD implementation */
/**
* \brief build an array of signatures that will be inspected
*
@ -745,15 +787,27 @@ static inline void SigMatchSignaturesBuildMatchArrayNoSIMD(DetectEngineThreadCtx
* \param mask Packets mask
* \param alproto application layer protocol
*/
static void SigMatchSignaturesBuildMatchArray(DetectEngineThreadCtx *det_ctx,
Packet *p, SignatureMask mask, uint16_t alproto)
static inline void SigMatchSignaturesBuildMatchArray(DetectEngineThreadCtx *det_ctx,
Packet *p, SignatureMask mask,
uint16_t alproto)
{
#if defined(__SSE3__)
SigMatchSignaturesBuildMatchArraySIMD(det_ctx, p, mask, alproto);
#else
SigMatchSignaturesBuildMatchArrayNoSIMD(det_ctx, p, mask, alproto);
#endif
uint32_t u;
/* reset previous run */
det_ctx->match_array_cnt = 0;
for (u = 0; u < det_ctx->sgh->sig_cnt; u++) {
SignatureHeader *s = &det_ctx->sgh->head_array[u];
if ((mask & s->mask) == s->mask) {
if (SigMatchSignaturesBuildMatchArrayAddSignature(det_ctx, p, s, alproto) == 1) {
/* okay, store it */
det_ctx->match_array[det_ctx->match_array_cnt] = s->full_sig;
det_ctx->match_array_cnt++;
}
}
}
}
#endif /* No SIMD implementation */
int SigMatchSignaturesRunPostMatch(ThreadVars *tv,
DetectEngineCtx *de_ctx, DetectEngineThreadCtx *det_ctx, Packet *p,

@ -1,4 +1,4 @@
/* Copyright (C) 2007-2011 Open Information Security Foundation
/* Copyright (C) 2007-2013 Open Information Security Foundation
*
* You can copy, redistribute or modify this Program under the terms of
* the GNU General Public License version 2 as published by the Free
@ -949,7 +949,7 @@ typedef struct SigGroupHead_ {
/** array of masks, used to check multiple masks against
* a packet using SIMD. */
#if defined(__SSE3__)
#if defined(__SSE3__) || defined(__tile__)
SignatureMask *mask_array;
#endif
/** chunk of memory containing the "header" part of each

@ -1,4 +1,4 @@
/* Copyright (C) 2007-2010 Open Information Security Foundation
/* Copyright (C) 2007-2013 Open Information Security Foundation
*
* You can copy, redistribute or modify this Program under the terms of
* the GNU General Public License version 2 as published by the Free
@ -36,6 +36,14 @@
#include "mm_malloc.h"
#endif
#if defined(__tile__)
/* Need to define __mm_ function alternatives, since these are SSE only.
*/
#include <malloc.h>
#define _mm_malloc(a,b) memalign((b),(a))
#define _mm_free(a) free((a))
#endif /* defined(__tile__) */
SC_ATOMIC_EXTERN(unsigned int, engine_stage);
/* Use this only if you want to debug memory allocation and free()

Loading…
Cancel
Save