From 967f7aefde878746316ce2c2b995d4215d4179ff Mon Sep 17 00:00:00 2001 From: Ken Steele Date: Fri, 11 Jul 2014 11:17:13 -0400 Subject: [PATCH] Store Boyer Moore no case strings in lower case. Rather than converting the search string to lower case while searching, convert it to lowercase during initialization. Changes the Boyer Moore search API for take BmCtx Change the API for BoyerMoore to take a BmCtx rather than the two parts that are stored in the context. Which is how it is mostly used. This enforces always calling BoyerMooreCtxToNocase() to convert to no-case. Use CtxInit and CtxDeinit functions to create and destroy the context, even in unit tests. --- src/app-layer-detect-proto.c | 4 +- src/detect-engine-content-inspection.c | 4 +- src/detect-filemagic.c | 3 +- src/detect-filename.c | 3 +- src/util-spm-bm.c | 42 +++++++++--- src/util-spm-bm.h | 10 +-- src/util-spm.c | 95 +++++++++++--------------- 7 files changed, 80 insertions(+), 81 deletions(-) diff --git a/src/app-layer-detect-proto.c b/src/app-layer-detect-proto.c index 9404bec7f0..61263adf95 100644 --- a/src/app-layer-detect-proto.c +++ b/src/app-layer-detect-proto.c @@ -190,9 +190,9 @@ static AppProto AppLayerProtoDetectPMMatchSignature(const AppLayerProtoDetectPMS s->cd->offset, s->cd->depth); if (s->cd->flags & DETECT_CONTENT_NOCASE) - found = BoyerMooreNocase(s->cd->content, s->cd->content_len, sbuf, sbuflen, s->cd->bm_ctx->bmGs, s->cd->bm_ctx->bmBc); + found = BoyerMooreNocase(s->cd->content, s->cd->content_len, sbuf, sbuflen, s->cd->bm_ctx); else - found = BoyerMoore(s->cd->content, s->cd->content_len, sbuf, sbuflen, s->cd->bm_ctx->bmGs, s->cd->bm_ctx->bmBc); + found = BoyerMoore(s->cd->content, s->cd->content_len, sbuf, sbuflen, s->cd->bm_ctx); if (found != NULL) proto = s->alproto; diff --git a/src/detect-engine-content-inspection.c b/src/detect-engine-content-inspection.c index 69d8b0192b..0feb0ff8f2 100644 --- a/src/detect-engine-content-inspection.c +++ b/src/detect-engine-content-inspection.c @@ -274,9 +274,9 @@ int DetectEngineContentInspection(DetectEngineCtx *de_ctx, DetectEngineThreadCtx /* do the actual search */ if (cd->flags & DETECT_CONTENT_NOCASE) - found = BoyerMooreNocase(cd->content, cd->content_len, sbuffer, sbuffer_len, cd->bm_ctx->bmGs, cd->bm_ctx->bmBc); + found = BoyerMooreNocase(cd->content, cd->content_len, sbuffer, sbuffer_len, cd->bm_ctx); else - found = BoyerMoore(cd->content, cd->content_len, sbuffer, sbuffer_len, cd->bm_ctx->bmGs, cd->bm_ctx->bmBc); + found = BoyerMoore(cd->content, cd->content_len, sbuffer, sbuffer_len, cd->bm_ctx); /* next we evaluate the result in combination with the * negation flag. */ diff --git a/src/detect-filemagic.c b/src/detect-filemagic.c index f838133b01..cd00d38671 100644 --- a/src/detect-filemagic.c +++ b/src/detect-filemagic.c @@ -220,8 +220,7 @@ static int DetectFilemagicMatch (ThreadVars *t, DetectEngineThreadCtx *det_ctx, /* we include the \0 in the inspection, so patterns can match on the * end of the string. */ if (BoyerMooreNocase(filemagic->name, filemagic->len, (uint8_t *)file->magic, - strlen(file->magic) + 1, filemagic->bm_ctx->bmGs, - filemagic->bm_ctx->bmBc) != NULL) + strlen(file->magic) + 1, filemagic->bm_ctx) != NULL) { #ifdef DEBUG if (SCLogDebugEnabled()) { diff --git a/src/detect-filename.c b/src/detect-filename.c index d41dab8ad3..a40da3ba92 100644 --- a/src/detect-filename.c +++ b/src/detect-filename.c @@ -105,8 +105,7 @@ static int DetectFilenameMatch (ThreadVars *t, DetectEngineThreadCtx *det_ctx, SCReturnInt(0); if (BoyerMooreNocase(filename->name, filename->len, file->name, - file->name_len, filename->bm_ctx->bmGs, - filename->bm_ctx->bmBc) != NULL) + file->name_len, filename->bm_ctx) != NULL) { #ifdef DEBUG if (SCLogDebugEnabled()) { diff --git a/src/util-spm-bm.c b/src/util-spm-bm.c index bdb9e97b71..c978e19219 100644 --- a/src/util-spm-bm.c +++ b/src/util-spm-bm.c @@ -36,6 +36,14 @@ #include "util-spm-bm.h" #include "util-debug.h" #include "util-error.h" +#include "util-memcpy.h" + +static int PreBmGs(const uint8_t *x, uint16_t m, uint16_t *bmGs); +static void PreBmBc(const uint8_t *x, uint16_t m, uint16_t *bmBc); +static void PreBmBcNocase(const uint8_t *x, uint16_t m, uint16_t *bmBc); +static void BoyerMooreSuffixesNocase(const uint8_t *x, uint16_t m, + uint16_t *suff); +static void PreBmGsNocase(const uint8_t *x, uint16_t m, uint16_t *bmGs); /** * \brief Given a BmCtx structure, recreate the pre/suffixes for @@ -46,6 +54,8 @@ * \param size length of the string */ void BoyerMooreCtxToNocase(BmCtx *bm_ctx, uint8_t *needle, uint16_t needle_len) { + /* Store the content as lower case to make searching faster */ + memcpy_tolower(needle, needle, needle_len); /* Prepare bad chars with nocase chars */ PreBmBcNocase(needle, needle_len, bm_ctx->bmBc); @@ -113,7 +123,8 @@ void BoyerMooreCtxDeInit(BmCtx *bmctx) * \param size length of the string * \param result pointer to an empty array that will hold the badchars */ -void PreBmBc(const uint8_t *x, uint16_t m, uint16_t *bmBc) { +static void PreBmBc(const uint8_t *x, uint16_t m, uint16_t *bmBc) +{ int32_t i; for (i = 0; i < 256; ++i) { @@ -131,7 +142,7 @@ void PreBmBc(const uint8_t *x, uint16_t m, uint16_t *bmBc) { * \param m length of the string * \param suff pointer to an empty array that will hold the prefixes (shifts) */ -void BoyerMooreSuffixes(const uint8_t *x, uint16_t m, uint16_t *suff) { +static void BoyerMooreSuffixes(const uint8_t *x, uint16_t m, uint16_t *suff) { int32_t f = 0, g, i; suff[m - 1] = m; g = m - 1; @@ -157,7 +168,8 @@ void BoyerMooreSuffixes(const uint8_t *x, uint16_t m, uint16_t *suff) { * \param bmGs pointer to an empty array that will hold the prefixes (shifts) * \retval 0 ok, -1 failed */ -int PreBmGs(const uint8_t *x, uint16_t m, uint16_t *bmGs) { +static int PreBmGs(const uint8_t *x, uint16_t m, uint16_t *bmGs) +{ int32_t i, j; uint16_t suff[m + 1]; @@ -187,7 +199,8 @@ int PreBmGs(const uint8_t *x, uint16_t m, uint16_t *bmGs) { * \param size length of the string * \param result pointer to an empty array that will hold the badchars */ -void PreBmBcNocase(const uint8_t *x, uint16_t m, uint16_t *bmBc) { +static void PreBmBcNocase(const uint8_t *x, uint16_t m, uint16_t *bmBc) +{ int32_t i; for (i = 0; i < 256; ++i) { @@ -198,7 +211,9 @@ void PreBmBcNocase(const uint8_t *x, uint16_t m, uint16_t *bmBc) { } } -void BoyerMooreSuffixesNocase(const uint8_t *x, uint16_t m, uint16_t *suff) { +static void BoyerMooreSuffixesNocase(const uint8_t *x, uint16_t m, + uint16_t *suff) +{ int32_t f = 0, g, i; suff[m - 1] = m; @@ -227,7 +242,8 @@ void BoyerMooreSuffixesNocase(const uint8_t *x, uint16_t m, uint16_t *suff) { * \param m length of the string * \param bmGs pointer to an empty array that will hold the prefixes (shifts) */ -void PreBmGsNocase(const uint8_t *x, uint16_t m, uint16_t *bmGs) { +static void PreBmGsNocase(const uint8_t *x, uint16_t m, uint16_t *bmGs) +{ int32_t i, j; uint16_t suff[m + 1]; @@ -266,7 +282,11 @@ void PreBmGsNocase(const uint8_t *x, uint16_t m, uint16_t *bmGs) { * * \retval ptr to start of the match; NULL if no match */ -uint8_t *BoyerMoore(uint8_t *x, uint16_t m, uint8_t *y, int32_t n, uint16_t *bmGs, uint16_t *bmBc) { +uint8_t *BoyerMoore(uint8_t *x, uint16_t m, uint8_t *y, int32_t n, BmCtx *bm_ctx) +{ + uint16_t *bmGs = bm_ctx->bmGs; + uint16_t *bmBc = bm_ctx->bmBc; + int i, j, m1, m2; #if 0 printf("\nBad:\n"); @@ -311,7 +331,10 @@ uint8_t *BoyerMoore(uint8_t *x, uint16_t m, uint8_t *y, int32_t n, uint16_t *bmG * * \retval ptr to start of the match; NULL if no match */ -uint8_t *BoyerMooreNocase(uint8_t *x, uint16_t m, uint8_t *y, int32_t n, uint16_t *bmGs, uint16_t *bmBc) { +uint8_t *BoyerMooreNocase(uint8_t *x, uint16_t m, uint8_t *y, int32_t n, BmCtx *bm_ctx) +{ + uint16_t *bmGs = bm_ctx->bmGs; + uint16_t *bmBc = bm_ctx->bmBc; int i, j, m1, m2; #if 0 printf("\nBad:\n"); @@ -325,7 +348,8 @@ uint8_t *BoyerMooreNocase(uint8_t *x, uint16_t m, uint8_t *y, int32_t n, uint16_ #endif j = 0; while (j <= n - m ) { - for (i = m - 1; i >= 0 && u8_tolower(x[i]) == u8_tolower(y[i + j]); --i); + /* x is stored in lowercase. */ + for (i = m - 1; i >= 0 && x[i] == u8_tolower(y[i + j]); --i); if (i < 0) { return y + j; diff --git a/src/util-spm-bm.h b/src/util-spm-bm.h index d78a03f9bb..2ee2d945da 100644 --- a/src/util-spm-bm.h +++ b/src/util-spm-bm.h @@ -40,14 +40,8 @@ typedef struct BmCtx_ { BmCtx *BoyerMooreCtxInit(uint8_t *needle, uint16_t needle_len); void BoyerMooreCtxToNocase(BmCtx *, uint8_t *, uint16_t); -void PreBmBc(const uint8_t *x, uint16_t m, uint16_t *bmBc); -void BoyerMooreSuffixes(const uint8_t *x, uint16_t m, uint16_t *suff); -int PreBmGs(const uint8_t *, uint16_t, uint16_t *); -uint8_t *BoyerMoore(uint8_t *x, uint16_t m, uint8_t *y, int32_t n, uint16_t *bmGs, uint16_t *bmBc); -void PreBmBcNocase(const uint8_t *x, uint16_t m, uint16_t *bmBc); -void BoyerMooreSuffixesNocase(const uint8_t *x, uint16_t m, uint16_t *suff); -void PreBmGsNocase(const uint8_t *x, uint16_t m, uint16_t *bmGs); -uint8_t *BoyerMooreNocase(uint8_t *x, uint16_t m, uint8_t *y, int32_t n, uint16_t *bmGs, uint16_t *bmBc); +uint8_t *BoyerMoore(uint8_t *x, uint16_t m, uint8_t *y, int32_t n, BmCtx *bm_ctx); +uint8_t *BoyerMooreNocase(uint8_t *x, uint16_t m, uint8_t *y, int32_t n, BmCtx *bm_ctx); void BoyerMooreCtxDeInit(BmCtx *); #endif /* __UTIL_SPM_BM__ */ diff --git a/src/util-spm.c b/src/util-spm.c index d097366a18..95eba9d974 100644 --- a/src/util-spm.c +++ b/src/util-spm.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2007-2013 Open Information Security Foundation +/* Copyright (C) 2007-2014 Open Information Security Foundation * * You can copy, redistribute or modify this Program under the terms of * the GNU General Public License version 2 as published by the Free @@ -103,16 +103,10 @@ uint8_t *Bs2bmNocaseSearch(uint8_t *text, uint32_t textlen, uint8_t *needle, uin */ uint8_t *BoyerMooreSearch(uint8_t *text, uint32_t textlen, uint8_t *needle, uint16_t needlelen) { - uint16_t bmBc[ALPHABET_SIZE]; - uint16_t *bmGs = SCMalloc(sizeof(uint16_t)*(needlelen + 1)); - if (unlikely(bmGs == NULL)) - return NULL; - - PreBmGs(needle, needlelen, bmGs); - PreBmBc(needle, needlelen, bmBc); + BmCtx *bm_ctx = BoyerMooreCtxInit(needle, needlelen); - uint8_t *ret = BoyerMoore(needle, needlelen, text, textlen, bmGs, bmBc); - SCFree(bmGs); + uint8_t *ret = BoyerMoore(needle, needlelen, text, textlen, bm_ctx); + BoyerMooreCtxDeInit(bm_ctx); return ret; } @@ -128,16 +122,11 @@ uint8_t *BoyerMooreSearch(uint8_t *text, uint32_t textlen, uint8_t *needle, uint */ uint8_t *BoyerMooreNocaseSearch(uint8_t *text, uint32_t textlen, uint8_t *needle, uint16_t needlelen) { - uint16_t bmBc[ALPHABET_SIZE]; - uint16_t *bmGs = SCMalloc(sizeof(uint16_t)*(needlelen + 1)); - if (unlikely(bmGs == NULL)) - return NULL; - - PreBmGsNocase(needle, needlelen, bmGs); - PreBmBcNocase(needle, needlelen, bmBc); + BmCtx *bm_ctx = BoyerMooreCtxInit(needle, needlelen); + BoyerMooreCtxToNocase(bm_ctx, needle, needlelen); - uint8_t *ret = BoyerMooreNocase(needle, needlelen, text, textlen, bmGs, bmBc); - SCFree(bmGs); + uint8_t *ret = BoyerMooreNocase(needle, needlelen, text, textlen, bm_ctx); + BoyerMooreCtxDeInit(bm_ctx); return ret; } @@ -241,50 +230,46 @@ uint8_t *BoyerMooreWrapper(uint8_t *text, uint8_t *needle, int times) uint32_t textlen = strlen((char *)text); uint16_t needlelen = strlen((char *)needle); - uint16_t bmBc[ALPHABET_SIZE]; - uint16_t *bmGs = SCMalloc(sizeof(uint16_t)*(needlelen + 1)); - if (unlikely(bmGs == NULL)) - return NULL; + BmCtx *bm_ctx = BoyerMooreCtxInit(needle, needlelen); uint8_t *ret = NULL; int i = 0; - PreBmGs(needle, needlelen, bmGs); - PreBmBc(needle, needlelen, bmBc); - CLOCK_INIT; if (times > 1) CLOCK_START; for (i = 0; i < times; i++) { - ret = BoyerMoore(needle, needlelen, text, textlen, bmGs, bmBc); + ret = BoyerMoore(needle, needlelen, text, textlen, bm_ctx); } if (times > 1) { CLOCK_END; CLOCK_PRINT_SEC; }; - SCFree(bmGs); + BoyerMooreCtxDeInit(bm_ctx); return ret; } -uint8_t *BoyerMooreNocaseWrapper(uint8_t *text, uint8_t *needle, int times) +uint8_t *BoyerMooreNocaseWrapper(uint8_t *text, uint8_t *in_needle, int times) { uint32_t textlen = strlen((char *)text); - uint16_t needlelen = strlen((char *)needle); + uint16_t needlelen = strlen((char *)in_needle); - uint16_t bmBc[ALPHABET_SIZE]; - uint16_t *bmGs = SCMalloc(sizeof(uint16_t)*(needlelen + 1)); - if (unlikely(bmGs == NULL)) + /* Make a copy of in_needle to be able to convert it to lowercase. */ + uint8_t *needle = SCMalloc(needlelen); + if (needle == NULL) return NULL; + memcpy(needle, in_needle, needlelen); + + BmCtx *bm_ctx = BoyerMooreCtxInit(needle, needlelen); + BoyerMooreCtxToNocase(bm_ctx, needle, needlelen); uint8_t *ret = NULL; int i = 0; - PreBmGsNocase(needle, needlelen, bmGs); - PreBmBcNocase(needle, needlelen, bmBc); - CLOCK_INIT; if (times > 1) CLOCK_START; for (i = 0; i < times; i++) { - ret = BoyerMooreNocase(needle, needlelen, text, textlen, bmGs, bmBc); + ret = BoyerMooreNocase(needle, needlelen, text, textlen, bm_ctx); } if (times > 1) { CLOCK_END; CLOCK_PRINT_SEC; }; - SCFree(bmGs); + BoyerMooreCtxDeInit(bm_ctx); + free(needle); return ret; } @@ -379,10 +364,7 @@ uint8_t *BoyerMooreCtxWrapper(uint8_t *text, uint8_t *needle, int times) uint32_t textlen = strlen((char *)text); uint16_t needlelen = strlen((char *)needle); - uint16_t bmBc[ALPHABET_SIZE]; - uint16_t *bmGs = SCMalloc(sizeof(uint16_t)*(needlelen + 1)); - if (unlikely(bmGs == NULL)) - return NULL; + BmCtx *bm_ctx = BoyerMooreCtxInit(needle, needlelen); uint8_t *ret = NULL; int i = 0; @@ -391,13 +373,11 @@ uint8_t *BoyerMooreCtxWrapper(uint8_t *text, uint8_t *needle, int times) if (times > 1) CLOCK_START; for (i = 0; i < times; i++) { /* Stats including context building */ - PreBmGs(needle, needlelen, bmGs); - PreBmBc(needle, needlelen, bmBc); - - ret = BoyerMoore(needle, needlelen, text, textlen, bmGs, bmBc); + ret = BoyerMoore(needle, needlelen, text, textlen, bm_ctx); } if (times > 1) { CLOCK_END; CLOCK_PRINT_SEC; }; - SCFree(bmGs); + BoyerMooreCtxDeInit(bm_ctx); + return ret; } @@ -418,15 +398,18 @@ uint8_t *RawCtxWrapper(uint8_t *text, uint8_t *needle, int times) return ret; } -uint8_t *BoyerMooreNocaseCtxWrapper(uint8_t *text, uint8_t *needle, int times) +uint8_t *BoyerMooreNocaseCtxWrapper(uint8_t *text, uint8_t *in_needle, int times) { uint32_t textlen = strlen((char *)text); - uint16_t needlelen = strlen((char *)needle); + uint16_t needlelen = strlen((char *)in_needle); - uint16_t bmBc[ALPHABET_SIZE]; - uint16_t *bmGs = SCMalloc(sizeof(uint16_t)*(needlelen + 1)); - if (unlikely(bmGs == NULL)) + /* Make a copy of in_needle to be able to convert it to lowercase. */ + uint8_t *needle = SCMalloc(needlelen); + if (needle == NULL) return NULL; + memcpy(needle, in_needle, needlelen); + + BmCtx *bm_ctx = BoyerMooreCtxInit(needle, needlelen); uint8_t *ret = NULL; int i = 0; @@ -435,12 +418,12 @@ uint8_t *BoyerMooreNocaseCtxWrapper(uint8_t *text, uint8_t *needle, int times) if (times > 1) CLOCK_START; for (i = 0; i < times; i++) { /* Stats including context building */ - PreBmGsNocase(needle, needlelen, bmGs); - PreBmBcNocase(needle, needlelen, bmBc); - ret = BoyerMooreNocase(needle, needlelen, text, textlen, bmGs, bmBc); + BoyerMooreCtxToNocase(bm_ctx, needle, needlelen); + ret = BoyerMooreNocase(needle, needlelen, text, textlen, bm_ctx); } if (times > 1) { CLOCK_END; CLOCK_PRINT_SEC; }; - SCFree(bmGs); + BoyerMooreCtxDeInit(bm_ctx); + free(needle); return ret; }