add comments and todos for ac and ac-gfbs

remotes/origin/master-1.1.x
Anoop Saldanha 15 years ago committed by Victor Julien
parent 658ff5753d
commit b9a770740a

@ -24,6 +24,19 @@
* *
* Efficient String Matching: An Aid to Bibliographic Search * Efficient String Matching: An Aid to Bibliographic Search
* Alfred V. Aho and Margaret J. Corasick * Alfred V. Aho and Margaret J. Corasick
*
* - We use the goto-failure table to calculate transitions.
* - If we cross 2 ** 16 states, we use 4 bytes in the transition table
* to hold each state, otherwise we use 2 bytes.
* - To reduce memory consumption, we throw all the failure transitions
* out and use binary search to pick out the right transition in
* the modified goto table.
*
* \todo - Do a proper analyis of our existing MPMs and suggest a good one based
* on the pattern distribution and the expected traffic(say http).
* - Tried out loop unrolling without any perf increase. Need to dig deeper.
* - Try out holding whether they are any output strings from a particular
* state in one of the bytes of a state var. Will be useful in cuda esp.
*/ */
#include "suricata-common.h" #include "suricata-common.h"
@ -507,7 +520,7 @@ static inline void SCACGfbsCreateGotoTable(MpmCtx *mpm_ctx)
/* add each pattern to create the goto table */ /* add each pattern to create the goto table */
for (i = 0; i < mpm_ctx->pattern_cnt; i++) { for (i = 0; i < mpm_ctx->pattern_cnt; i++) {
SCACGfbsEnter(ctx->parray[i]->cs, ctx->parray[i]->len, SCACGfbsEnter(ctx->parray[i]->cs, ctx->parray[i]->len,
ctx->parray[i]->id, mpm_ctx); ctx->parray[i]->id, mpm_ctx);
} }
int ascii_code = 0; int ascii_code = 0;
@ -694,7 +707,7 @@ static inline void SCACGfbsCreateFailureTable(MpmCtx *mpm_ctx)
state = ctx->failure_table[state]; state = ctx->failure_table[state];
ctx->failure_table[temp_state] = ctx->goto_table[state][ascii_code]; ctx->failure_table[temp_state] = ctx->goto_table[state][ascii_code];
SCACGfbsClubOutputStates(temp_state, ctx->failure_table[temp_state], SCACGfbsClubOutputStates(temp_state, ctx->failure_table[temp_state],
mpm_ctx); mpm_ctx);
} }
} }

@ -20,19 +20,24 @@
* *
* \author Anoop Saldanha <poonaatsoc@gmail.com> * \author Anoop Saldanha <poonaatsoc@gmail.com>
* *
* Implementation of aho-corasick MPM from - * First iteration of aho-corasick MPM from -
* *
* Efficient String Matching: An Aid to Bibliographic Search * Efficient String Matching: An Aid to Bibliographic Search
* Alfred V. Aho and Margaret J. Corasick * Alfred V. Aho and Margaret J. Corasick
* *
* - Uses the delta table for calculating transitions, instead of having * - Uses the delta table for calculating transitions, instead of having
* separate goto and failure transitions. * separate goto and failure transitions.
* - Have currently set the state table, state size to 2bytes, limiting * - If we cross 2 ** 16 states, we use 4 bytes in the transition table
* the no of states to 65536. We will need to modify this later, so * to hold each state, otherwise we use 2 bytes.
* that we can use something like 24 bits for holding state, and the * - This version of the MPM is heavy on memory, but it performs well.
* last byte to indicate if we have any output entries for the state. * If you can fit the ruleset with this mpm on your box without hitting
* That way we save on the extra read from the output table, since * swap, this is the MPM to go for.
* most states won't have any output entries. *
* \todo - Do a proper analyis of our existing MPMs and suggest a good one based
* on the pattern distribution and the expected traffic(say http).
* - Tried out loop unrolling without any perf increase. Need to dig deeper.
* - Try out holding whether they are any output strings from a particular
* state in one of the bytes of a state var. Will be useful in cuda esp.
*/ */
#include "suricata-common.h" #include "suricata-common.h"

Loading…
Cancel
Save