mpm: introduce ac-ks

Introduce 'ac-ks' or the Kenneth Steele AC implementation. It's
actually 'ac-tile' written by Ken for the Tilera platform. This
patch adds support for it on other architectures as well.

Enable ac-tile for other archs as 'ac-ks'.

Fix a bunch of OOB reads in the loops that triggered ASAN.
pull/1978/head
Victor Julien 10 years ago
parent 3781b00dbc
commit 887ddf1ed8

@ -45,12 +45,12 @@ uint32_t FUNC_NAME(SCACTileSearchCtx *ctx, MpmThreadCtx *mpm_thread_ctx,
STYPE state = 0; STYPE state = 0;
int c = xlate[buf[0]]; int c = xlate[buf[0]];
/* If buflen at least 4 bytes and buf 4-byte aligned. */ /* If buflen at least 4 bytes and buf 4-byte aligned. */
if (buflen >= 4 && ((uint64_t)buf & 0x3) == 0) { if (buflen >= (4 + EXTRA) && ((uint64_t)buf & 0x3) == 0) {
BTYPE data = *(BTYPE* restrict)(&buf[0]); BTYPE data = *(BTYPE* restrict)(&buf[0]);
uint64_t index = 0; uint64_t index = 0;
/* Process 4*floor(buflen/4) bytes. */ /* Process 4*floor(buflen/4) bytes. */
i = 0; i = 0;
while (i < (buflen & ~0x3)) { while ((i + EXTRA) < (buflen & ~0x3)) {
BTYPE data1 = *(BTYPE* restrict)(&buf[i + 4]); BTYPE data1 = *(BTYPE* restrict)(&buf[i + 4]);
index = SINDEX(index, state); index = SINDEX(index, state);
state = SLOAD(state_table + index + c); state = SLOAD(state_table + index + c);
@ -88,7 +88,10 @@ uint32_t FUNC_NAME(SCACTileSearchCtx *ctx, MpmThreadCtx *mpm_thread_ctx,
uint64_t index = 0 ; uint64_t index = 0 ;
index = SINDEX(index, state); index = SINDEX(index, state);
state = SLOAD(state_table + index + c); state = SLOAD(state_table + index + c);
c = xlate[buf[i+1]]; #ifndef __tile__
if (likely(i+1 < buflen))
#endif
c = xlate[buf[i+1]];
if (unlikely(SCHECK(state))) { if (unlikely(SCHECK(state))) {
matches = CheckMatch(ctx, pmq, buf, buflen, state, i, matches, mpm_bitarray); matches = CheckMatch(ctx, pmq, buf, buflen, state, i, matches, mpm_bitarray);
} }

@ -80,15 +80,6 @@
#include "util-memcpy.h" #include "util-memcpy.h"
#include "util-mpm-ac-tile.h" #include "util-mpm-ac-tile.h"
#ifndef __tile__
void MpmACTileRegister(void)
{
}
#endif
/* There are Tilera Tile-Gx specific optimizations in this code. */
#ifdef __tile__
void SCACTileInitCtx(MpmCtx *); void SCACTileInitCtx(MpmCtx *);
void SCACTileInitThreadCtx(MpmCtx *, MpmThreadCtx *, uint32_t); void SCACTileInitThreadCtx(MpmCtx *, MpmThreadCtx *, uint32_t);
void SCACTileDestroyCtx(MpmCtx *); void SCACTileDestroyCtx(MpmCtx *);
@ -1050,7 +1041,7 @@ static void SCACTileClubOutputStatePresenceWithDeltaTable(MpmCtx *mpm_ctx)
mpm_ctx->memory_cnt++; mpm_ctx->memory_cnt++;
mpm_ctx->memory_size += size; mpm_ctx->memory_size += size;
SCLogInfo("Delta Table size %d, alphabet: %d, %d-byte states: %d", SCLogDebug("Delta Table size %d, alphabet: %d, %d-byte states: %d",
size, ctx->alphabet_size, ctx->bytes_per_state, ctx->state_count); size, ctx->alphabet_size, ctx->bytes_per_state, ctx->state_count);
/* Copy next state from Goto table, which is 32 bits and encode it into the next /* Copy next state from Goto table, which is 32 bits and encode it into the next
@ -1460,10 +1451,19 @@ void SCACTileDestroyCtx(MpmCtx *mpm_ctx)
#define SCHECK(x) ((x) > 0) #define SCHECK(x) ((x) > 0)
#define BTYPE int32_t #define BTYPE int32_t
// Extract byte N=0,1,2,3 from x // Extract byte N=0,1,2,3 from x
#ifdef __tile__
#define BYTE0(x) __insn_bfextu(x, 0, 7) #define BYTE0(x) __insn_bfextu(x, 0, 7)
#define BYTE1(x) __insn_bfextu(x, 8, 15) #define BYTE1(x) __insn_bfextu(x, 8, 15)
#define BYTE2(x) __insn_bfextu(x, 16, 23) #define BYTE2(x) __insn_bfextu(x, 16, 23)
#define BYTE3(x) __insn_bfextu(x, 24, 31) #define BYTE3(x) __insn_bfextu(x, 24, 31)
#define EXTRA 0
#else /* fallback */
#define BYTE0(x) (((x) & 0x000000ff) >> 0)
#define BYTE1(x) (((x) & 0x0000ff00) >> 8)
#define BYTE2(x) (((x) & 0x00ff0000) >> 16)
#define BYTE3(x) (((x) & 0xff000000) >> 24)
#define EXTRA 4 // need 4 extra bytes to avoid OOB reads
#endif
int CheckMatch(SCACTileSearchCtx *ctx, PatternMatcherQueue *pmq, int CheckMatch(SCACTileSearchCtx *ctx, PatternMatcherQueue *pmq,
uint8_t *buf, uint16_t buflen, uint8_t *buf, uint16_t buflen,
@ -1494,7 +1494,11 @@ int CheckMatch(SCACTileSearchCtx *ctx, PatternMatcherQueue *pmq,
/* Double check case-sensitve match now. */ /* Double check case-sensitve match now. */
if (patterns[k] >> 31) { if (patterns[k] >> 31) {
uint16_t patlen = pattern_list[pindex].patlen; uint16_t patlen = pattern_list[pindex].patlen;
#ifdef __tile__
if (SCMemcmpNZ(pattern_list[pindex].cs, buf_offset - patlen, patlen) != 0) { if (SCMemcmpNZ(pattern_list[pindex].cs, buf_offset - patlen, patlen) != 0) {
#else
if (SCMemcmp(pattern_list[pindex].cs, buf_offset - patlen, patlen) != 0) {
#endif
/* Case-sensitive match failed. */ /* Case-sensitive match failed. */
continue; continue;
} }
@ -1571,13 +1575,22 @@ uint32_t SCACTileSearchLarge(SCACTileSearchCtx *ctx, MpmThreadCtx *mpm_thread_ct
* Next state entry has MSB as "match" and 15 LSB bits as next-state index. * Next state entry has MSB as "match" and 15 LSB bits as next-state index.
*/ */
// y = 1<<log_mult * (x & (1<<width -1)) // y = 1<<log_mult * (x & (1<<width -1))
#ifdef __tile__
#define SINDEX_INTERNAL(y, x, log_mult, width) \ #define SINDEX_INTERNAL(y, x, log_mult, width) \
__insn_bfins(y, x, log_mult, log_mult + (width - 1)) __insn_bfins(y, x, log_mult, log_mult + (width - 1))
#else
#define SINDEX_INTERNAL(y, x, log_mult, width) \
((1<<log_mult) * (x & ((1<<width) - 1)))
#endif
/* Type of next_state */ /* Type of next_state */
#define STYPE int16_t #define STYPE int16_t
#ifdef __tile__
// Hint to compiler to expect L2 hit latency for Load int16_t // Hint to compiler to expect L2 hit latency for Load int16_t
#define SLOAD(x) __insn_ld2s_L2((STYPE* restrict)(x)) #define SLOAD(x) __insn_ld2s_L2((STYPE* restrict)(x))
#else
#define SLOAD(x) *(STYPE * restrict)(x)
#endif
#define FUNC_NAME SCACTileSearchSmall256 #define FUNC_NAME SCACTileSearchSmall256
// y = 256 * (x & 0x7FFF) // y = 256 * (x & 0x7FFF)
@ -1631,8 +1644,12 @@ uint32_t SCACTileSearchLarge(SCACTileSearchCtx *ctx, MpmThreadCtx *mpm_thread_ct
#undef STYPE #undef STYPE
#define STYPE int8_t #define STYPE int8_t
// Hint to compiler to expect L2 hit latency for Load int8_t // Hint to compiler to expect L2 hit latency for Load int8_t
#ifdef __tile__
#undef SLOAD #undef SLOAD
#define SLOAD(x) __insn_ld1s_L2((STYPE* restrict)(x)) #define SLOAD(x) __insn_ld1s_L2((STYPE* restrict)(x))
#else
/* no op for !__tile__ case */
#endif
#undef FUNC_NAME #undef FUNC_NAME
#undef SINDEX #undef SINDEX
@ -1770,7 +1787,11 @@ void SCACTilePrintInfo(MpmCtx *mpm_ctx)
*/ */
void MpmACTileRegister(void) void MpmACTileRegister(void)
{ {
#ifdef __tile__
mpm_table[MPM_AC_TILE].name = "ac-tile"; mpm_table[MPM_AC_TILE].name = "ac-tile";
#else
mpm_table[MPM_AC_TILE].name = "ac-ks";
#endif
mpm_table[MPM_AC_TILE].max_pattern_length = 0; mpm_table[MPM_AC_TILE].max_pattern_length = 0;
mpm_table[MPM_AC_TILE].InitCtx = SCACTileInitCtx; mpm_table[MPM_AC_TILE].InitCtx = SCACTileInitCtx;
@ -2852,4 +2873,3 @@ void SCACTileRegisterTests(void)
#endif #endif
} }
#endif /* __tile__ */

Loading…
Cancel
Save