Store Boyer Moore no case strings in lower case.

Rather than converting the search string to lower case while searching,
convert it to lowercase during initialization.

Changes the Boyer Moore search API for take BmCtx

Change the API for BoyerMoore to take a BmCtx rather than the two parts that
are stored in the context. Which is how it is mostly used. This enforces
always calling BoyerMooreCtxToNocase() to convert to no-case.

Use CtxInit and CtxDeinit functions to create and destroy the context,
even in unit tests.
pull/1047/head
Ken Steele 11 years ago
parent 54214d1251
commit 967f7aefde

@ -190,9 +190,9 @@ static AppProto AppLayerProtoDetectPMMatchSignature(const AppLayerProtoDetectPMS
s->cd->offset, s->cd->depth); s->cd->offset, s->cd->depth);
if (s->cd->flags & DETECT_CONTENT_NOCASE) if (s->cd->flags & DETECT_CONTENT_NOCASE)
found = BoyerMooreNocase(s->cd->content, s->cd->content_len, sbuf, sbuflen, s->cd->bm_ctx->bmGs, s->cd->bm_ctx->bmBc); found = BoyerMooreNocase(s->cd->content, s->cd->content_len, sbuf, sbuflen, s->cd->bm_ctx);
else else
found = BoyerMoore(s->cd->content, s->cd->content_len, sbuf, sbuflen, s->cd->bm_ctx->bmGs, s->cd->bm_ctx->bmBc); found = BoyerMoore(s->cd->content, s->cd->content_len, sbuf, sbuflen, s->cd->bm_ctx);
if (found != NULL) if (found != NULL)
proto = s->alproto; proto = s->alproto;

@ -274,9 +274,9 @@ int DetectEngineContentInspection(DetectEngineCtx *de_ctx, DetectEngineThreadCtx
/* do the actual search */ /* do the actual search */
if (cd->flags & DETECT_CONTENT_NOCASE) if (cd->flags & DETECT_CONTENT_NOCASE)
found = BoyerMooreNocase(cd->content, cd->content_len, sbuffer, sbuffer_len, cd->bm_ctx->bmGs, cd->bm_ctx->bmBc); found = BoyerMooreNocase(cd->content, cd->content_len, sbuffer, sbuffer_len, cd->bm_ctx);
else else
found = BoyerMoore(cd->content, cd->content_len, sbuffer, sbuffer_len, cd->bm_ctx->bmGs, cd->bm_ctx->bmBc); found = BoyerMoore(cd->content, cd->content_len, sbuffer, sbuffer_len, cd->bm_ctx);
/* next we evaluate the result in combination with the /* next we evaluate the result in combination with the
* negation flag. */ * negation flag. */

@ -220,8 +220,7 @@ static int DetectFilemagicMatch (ThreadVars *t, DetectEngineThreadCtx *det_ctx,
/* we include the \0 in the inspection, so patterns can match on the /* we include the \0 in the inspection, so patterns can match on the
* end of the string. */ * end of the string. */
if (BoyerMooreNocase(filemagic->name, filemagic->len, (uint8_t *)file->magic, if (BoyerMooreNocase(filemagic->name, filemagic->len, (uint8_t *)file->magic,
strlen(file->magic) + 1, filemagic->bm_ctx->bmGs, strlen(file->magic) + 1, filemagic->bm_ctx) != NULL)
filemagic->bm_ctx->bmBc) != NULL)
{ {
#ifdef DEBUG #ifdef DEBUG
if (SCLogDebugEnabled()) { if (SCLogDebugEnabled()) {

@ -105,8 +105,7 @@ static int DetectFilenameMatch (ThreadVars *t, DetectEngineThreadCtx *det_ctx,
SCReturnInt(0); SCReturnInt(0);
if (BoyerMooreNocase(filename->name, filename->len, file->name, if (BoyerMooreNocase(filename->name, filename->len, file->name,
file->name_len, filename->bm_ctx->bmGs, file->name_len, filename->bm_ctx) != NULL)
filename->bm_ctx->bmBc) != NULL)
{ {
#ifdef DEBUG #ifdef DEBUG
if (SCLogDebugEnabled()) { if (SCLogDebugEnabled()) {

@ -36,6 +36,14 @@
#include "util-spm-bm.h" #include "util-spm-bm.h"
#include "util-debug.h" #include "util-debug.h"
#include "util-error.h" #include "util-error.h"
#include "util-memcpy.h"
static int PreBmGs(const uint8_t *x, uint16_t m, uint16_t *bmGs);
static void PreBmBc(const uint8_t *x, uint16_t m, uint16_t *bmBc);
static void PreBmBcNocase(const uint8_t *x, uint16_t m, uint16_t *bmBc);
static void BoyerMooreSuffixesNocase(const uint8_t *x, uint16_t m,
uint16_t *suff);
static void PreBmGsNocase(const uint8_t *x, uint16_t m, uint16_t *bmGs);
/** /**
* \brief Given a BmCtx structure, recreate the pre/suffixes for * \brief Given a BmCtx structure, recreate the pre/suffixes for
@ -46,6 +54,8 @@
* \param size length of the string * \param size length of the string
*/ */
void BoyerMooreCtxToNocase(BmCtx *bm_ctx, uint8_t *needle, uint16_t needle_len) { void BoyerMooreCtxToNocase(BmCtx *bm_ctx, uint8_t *needle, uint16_t needle_len) {
/* Store the content as lower case to make searching faster */
memcpy_tolower(needle, needle, needle_len);
/* Prepare bad chars with nocase chars */ /* Prepare bad chars with nocase chars */
PreBmBcNocase(needle, needle_len, bm_ctx->bmBc); PreBmBcNocase(needle, needle_len, bm_ctx->bmBc);
@ -113,7 +123,8 @@ void BoyerMooreCtxDeInit(BmCtx *bmctx)
* \param size length of the string * \param size length of the string
* \param result pointer to an empty array that will hold the badchars * \param result pointer to an empty array that will hold the badchars
*/ */
void PreBmBc(const uint8_t *x, uint16_t m, uint16_t *bmBc) { static void PreBmBc(const uint8_t *x, uint16_t m, uint16_t *bmBc)
{
int32_t i; int32_t i;
for (i = 0; i < 256; ++i) { for (i = 0; i < 256; ++i) {
@ -131,7 +142,7 @@ void PreBmBc(const uint8_t *x, uint16_t m, uint16_t *bmBc) {
* \param m length of the string * \param m length of the string
* \param suff pointer to an empty array that will hold the prefixes (shifts) * \param suff pointer to an empty array that will hold the prefixes (shifts)
*/ */
void BoyerMooreSuffixes(const uint8_t *x, uint16_t m, uint16_t *suff) { static void BoyerMooreSuffixes(const uint8_t *x, uint16_t m, uint16_t *suff) {
int32_t f = 0, g, i; int32_t f = 0, g, i;
suff[m - 1] = m; suff[m - 1] = m;
g = m - 1; g = m - 1;
@ -157,7 +168,8 @@ void BoyerMooreSuffixes(const uint8_t *x, uint16_t m, uint16_t *suff) {
* \param bmGs pointer to an empty array that will hold the prefixes (shifts) * \param bmGs pointer to an empty array that will hold the prefixes (shifts)
* \retval 0 ok, -1 failed * \retval 0 ok, -1 failed
*/ */
int PreBmGs(const uint8_t *x, uint16_t m, uint16_t *bmGs) { static int PreBmGs(const uint8_t *x, uint16_t m, uint16_t *bmGs)
{
int32_t i, j; int32_t i, j;
uint16_t suff[m + 1]; uint16_t suff[m + 1];
@ -187,7 +199,8 @@ int PreBmGs(const uint8_t *x, uint16_t m, uint16_t *bmGs) {
* \param size length of the string * \param size length of the string
* \param result pointer to an empty array that will hold the badchars * \param result pointer to an empty array that will hold the badchars
*/ */
void PreBmBcNocase(const uint8_t *x, uint16_t m, uint16_t *bmBc) { static void PreBmBcNocase(const uint8_t *x, uint16_t m, uint16_t *bmBc)
{
int32_t i; int32_t i;
for (i = 0; i < 256; ++i) { for (i = 0; i < 256; ++i) {
@ -198,7 +211,9 @@ void PreBmBcNocase(const uint8_t *x, uint16_t m, uint16_t *bmBc) {
} }
} }
void BoyerMooreSuffixesNocase(const uint8_t *x, uint16_t m, uint16_t *suff) { static void BoyerMooreSuffixesNocase(const uint8_t *x, uint16_t m,
uint16_t *suff)
{
int32_t f = 0, g, i; int32_t f = 0, g, i;
suff[m - 1] = m; suff[m - 1] = m;
@ -227,7 +242,8 @@ void BoyerMooreSuffixesNocase(const uint8_t *x, uint16_t m, uint16_t *suff) {
* \param m length of the string * \param m length of the string
* \param bmGs pointer to an empty array that will hold the prefixes (shifts) * \param bmGs pointer to an empty array that will hold the prefixes (shifts)
*/ */
void PreBmGsNocase(const uint8_t *x, uint16_t m, uint16_t *bmGs) { static void PreBmGsNocase(const uint8_t *x, uint16_t m, uint16_t *bmGs)
{
int32_t i, j; int32_t i, j;
uint16_t suff[m + 1]; uint16_t suff[m + 1];
@ -266,7 +282,11 @@ void PreBmGsNocase(const uint8_t *x, uint16_t m, uint16_t *bmGs) {
* *
* \retval ptr to start of the match; NULL if no match * \retval ptr to start of the match; NULL if no match
*/ */
uint8_t *BoyerMoore(uint8_t *x, uint16_t m, uint8_t *y, int32_t n, uint16_t *bmGs, uint16_t *bmBc) { uint8_t *BoyerMoore(uint8_t *x, uint16_t m, uint8_t *y, int32_t n, BmCtx *bm_ctx)
{
uint16_t *bmGs = bm_ctx->bmGs;
uint16_t *bmBc = bm_ctx->bmBc;
int i, j, m1, m2; int i, j, m1, m2;
#if 0 #if 0
printf("\nBad:\n"); printf("\nBad:\n");
@ -311,7 +331,10 @@ uint8_t *BoyerMoore(uint8_t *x, uint16_t m, uint8_t *y, int32_t n, uint16_t *bmG
* *
* \retval ptr to start of the match; NULL if no match * \retval ptr to start of the match; NULL if no match
*/ */
uint8_t *BoyerMooreNocase(uint8_t *x, uint16_t m, uint8_t *y, int32_t n, uint16_t *bmGs, uint16_t *bmBc) { uint8_t *BoyerMooreNocase(uint8_t *x, uint16_t m, uint8_t *y, int32_t n, BmCtx *bm_ctx)
{
uint16_t *bmGs = bm_ctx->bmGs;
uint16_t *bmBc = bm_ctx->bmBc;
int i, j, m1, m2; int i, j, m1, m2;
#if 0 #if 0
printf("\nBad:\n"); printf("\nBad:\n");
@ -325,7 +348,8 @@ uint8_t *BoyerMooreNocase(uint8_t *x, uint16_t m, uint8_t *y, int32_t n, uint16_
#endif #endif
j = 0; j = 0;
while (j <= n - m ) { while (j <= n - m ) {
for (i = m - 1; i >= 0 && u8_tolower(x[i]) == u8_tolower(y[i + j]); --i); /* x is stored in lowercase. */
for (i = m - 1; i >= 0 && x[i] == u8_tolower(y[i + j]); --i);
if (i < 0) { if (i < 0) {
return y + j; return y + j;

@ -40,14 +40,8 @@ typedef struct BmCtx_ {
BmCtx *BoyerMooreCtxInit(uint8_t *needle, uint16_t needle_len); BmCtx *BoyerMooreCtxInit(uint8_t *needle, uint16_t needle_len);
void BoyerMooreCtxToNocase(BmCtx *, uint8_t *, uint16_t); void BoyerMooreCtxToNocase(BmCtx *, uint8_t *, uint16_t);
void PreBmBc(const uint8_t *x, uint16_t m, uint16_t *bmBc); uint8_t *BoyerMoore(uint8_t *x, uint16_t m, uint8_t *y, int32_t n, BmCtx *bm_ctx);
void BoyerMooreSuffixes(const uint8_t *x, uint16_t m, uint16_t *suff); uint8_t *BoyerMooreNocase(uint8_t *x, uint16_t m, uint8_t *y, int32_t n, BmCtx *bm_ctx);
int PreBmGs(const uint8_t *, uint16_t, uint16_t *);
uint8_t *BoyerMoore(uint8_t *x, uint16_t m, uint8_t *y, int32_t n, uint16_t *bmGs, uint16_t *bmBc);
void PreBmBcNocase(const uint8_t *x, uint16_t m, uint16_t *bmBc);
void BoyerMooreSuffixesNocase(const uint8_t *x, uint16_t m, uint16_t *suff);
void PreBmGsNocase(const uint8_t *x, uint16_t m, uint16_t *bmGs);
uint8_t *BoyerMooreNocase(uint8_t *x, uint16_t m, uint8_t *y, int32_t n, uint16_t *bmGs, uint16_t *bmBc);
void BoyerMooreCtxDeInit(BmCtx *); void BoyerMooreCtxDeInit(BmCtx *);
#endif /* __UTIL_SPM_BM__ */ #endif /* __UTIL_SPM_BM__ */

@ -1,4 +1,4 @@
/* Copyright (C) 2007-2013 Open Information Security Foundation /* Copyright (C) 2007-2014 Open Information Security Foundation
* *
* You can copy, redistribute or modify this Program under the terms of * You can copy, redistribute or modify this Program under the terms of
* the GNU General Public License version 2 as published by the Free * the GNU General Public License version 2 as published by the Free
@ -103,16 +103,10 @@ uint8_t *Bs2bmNocaseSearch(uint8_t *text, uint32_t textlen, uint8_t *needle, uin
*/ */
uint8_t *BoyerMooreSearch(uint8_t *text, uint32_t textlen, uint8_t *needle, uint16_t needlelen) uint8_t *BoyerMooreSearch(uint8_t *text, uint32_t textlen, uint8_t *needle, uint16_t needlelen)
{ {
uint16_t bmBc[ALPHABET_SIZE]; BmCtx *bm_ctx = BoyerMooreCtxInit(needle, needlelen);
uint16_t *bmGs = SCMalloc(sizeof(uint16_t)*(needlelen + 1));
if (unlikely(bmGs == NULL))
return NULL;
PreBmGs(needle, needlelen, bmGs);
PreBmBc(needle, needlelen, bmBc);
uint8_t *ret = BoyerMoore(needle, needlelen, text, textlen, bmGs, bmBc); uint8_t *ret = BoyerMoore(needle, needlelen, text, textlen, bm_ctx);
SCFree(bmGs); BoyerMooreCtxDeInit(bm_ctx);
return ret; return ret;
} }
@ -128,16 +122,11 @@ uint8_t *BoyerMooreSearch(uint8_t *text, uint32_t textlen, uint8_t *needle, uint
*/ */
uint8_t *BoyerMooreNocaseSearch(uint8_t *text, uint32_t textlen, uint8_t *needle, uint16_t needlelen) uint8_t *BoyerMooreNocaseSearch(uint8_t *text, uint32_t textlen, uint8_t *needle, uint16_t needlelen)
{ {
uint16_t bmBc[ALPHABET_SIZE]; BmCtx *bm_ctx = BoyerMooreCtxInit(needle, needlelen);
uint16_t *bmGs = SCMalloc(sizeof(uint16_t)*(needlelen + 1)); BoyerMooreCtxToNocase(bm_ctx, needle, needlelen);
if (unlikely(bmGs == NULL))
return NULL;
PreBmGsNocase(needle, needlelen, bmGs);
PreBmBcNocase(needle, needlelen, bmBc);
uint8_t *ret = BoyerMooreNocase(needle, needlelen, text, textlen, bmGs, bmBc); uint8_t *ret = BoyerMooreNocase(needle, needlelen, text, textlen, bm_ctx);
SCFree(bmGs); BoyerMooreCtxDeInit(bm_ctx);
return ret; return ret;
} }
@ -241,50 +230,46 @@ uint8_t *BoyerMooreWrapper(uint8_t *text, uint8_t *needle, int times)
uint32_t textlen = strlen((char *)text); uint32_t textlen = strlen((char *)text);
uint16_t needlelen = strlen((char *)needle); uint16_t needlelen = strlen((char *)needle);
uint16_t bmBc[ALPHABET_SIZE]; BmCtx *bm_ctx = BoyerMooreCtxInit(needle, needlelen);
uint16_t *bmGs = SCMalloc(sizeof(uint16_t)*(needlelen + 1));
if (unlikely(bmGs == NULL))
return NULL;
uint8_t *ret = NULL; uint8_t *ret = NULL;
int i = 0; int i = 0;
PreBmGs(needle, needlelen, bmGs);
PreBmBc(needle, needlelen, bmBc);
CLOCK_INIT; CLOCK_INIT;
if (times > 1) CLOCK_START; if (times > 1) CLOCK_START;
for (i = 0; i < times; i++) { for (i = 0; i < times; i++) {
ret = BoyerMoore(needle, needlelen, text, textlen, bmGs, bmBc); ret = BoyerMoore(needle, needlelen, text, textlen, bm_ctx);
} }
if (times > 1) { CLOCK_END; CLOCK_PRINT_SEC; }; if (times > 1) { CLOCK_END; CLOCK_PRINT_SEC; };
SCFree(bmGs); BoyerMooreCtxDeInit(bm_ctx);
return ret; return ret;
} }
uint8_t *BoyerMooreNocaseWrapper(uint8_t *text, uint8_t *needle, int times) uint8_t *BoyerMooreNocaseWrapper(uint8_t *text, uint8_t *in_needle, int times)
{ {
uint32_t textlen = strlen((char *)text); uint32_t textlen = strlen((char *)text);
uint16_t needlelen = strlen((char *)needle); uint16_t needlelen = strlen((char *)in_needle);
uint16_t bmBc[ALPHABET_SIZE]; /* Make a copy of in_needle to be able to convert it to lowercase. */
uint16_t *bmGs = SCMalloc(sizeof(uint16_t)*(needlelen + 1)); uint8_t *needle = SCMalloc(needlelen);
if (unlikely(bmGs == NULL)) if (needle == NULL)
return NULL; return NULL;
memcpy(needle, in_needle, needlelen);
BmCtx *bm_ctx = BoyerMooreCtxInit(needle, needlelen);
BoyerMooreCtxToNocase(bm_ctx, needle, needlelen);
uint8_t *ret = NULL; uint8_t *ret = NULL;
int i = 0; int i = 0;
PreBmGsNocase(needle, needlelen, bmGs);
PreBmBcNocase(needle, needlelen, bmBc);
CLOCK_INIT; CLOCK_INIT;
if (times > 1) CLOCK_START; if (times > 1) CLOCK_START;
for (i = 0; i < times; i++) { for (i = 0; i < times; i++) {
ret = BoyerMooreNocase(needle, needlelen, text, textlen, bmGs, bmBc); ret = BoyerMooreNocase(needle, needlelen, text, textlen, bm_ctx);
} }
if (times > 1) { CLOCK_END; CLOCK_PRINT_SEC; }; if (times > 1) { CLOCK_END; CLOCK_PRINT_SEC; };
SCFree(bmGs); BoyerMooreCtxDeInit(bm_ctx);
free(needle);
return ret; return ret;
} }
@ -379,10 +364,7 @@ uint8_t *BoyerMooreCtxWrapper(uint8_t *text, uint8_t *needle, int times)
uint32_t textlen = strlen((char *)text); uint32_t textlen = strlen((char *)text);
uint16_t needlelen = strlen((char *)needle); uint16_t needlelen = strlen((char *)needle);
uint16_t bmBc[ALPHABET_SIZE]; BmCtx *bm_ctx = BoyerMooreCtxInit(needle, needlelen);
uint16_t *bmGs = SCMalloc(sizeof(uint16_t)*(needlelen + 1));
if (unlikely(bmGs == NULL))
return NULL;
uint8_t *ret = NULL; uint8_t *ret = NULL;
int i = 0; int i = 0;
@ -391,13 +373,11 @@ uint8_t *BoyerMooreCtxWrapper(uint8_t *text, uint8_t *needle, int times)
if (times > 1) CLOCK_START; if (times > 1) CLOCK_START;
for (i = 0; i < times; i++) { for (i = 0; i < times; i++) {
/* Stats including context building */ /* Stats including context building */
PreBmGs(needle, needlelen, bmGs); ret = BoyerMoore(needle, needlelen, text, textlen, bm_ctx);
PreBmBc(needle, needlelen, bmBc);
ret = BoyerMoore(needle, needlelen, text, textlen, bmGs, bmBc);
} }
if (times > 1) { CLOCK_END; CLOCK_PRINT_SEC; }; if (times > 1) { CLOCK_END; CLOCK_PRINT_SEC; };
SCFree(bmGs); BoyerMooreCtxDeInit(bm_ctx);
return ret; return ret;
} }
@ -418,15 +398,18 @@ uint8_t *RawCtxWrapper(uint8_t *text, uint8_t *needle, int times)
return ret; return ret;
} }
uint8_t *BoyerMooreNocaseCtxWrapper(uint8_t *text, uint8_t *needle, int times) uint8_t *BoyerMooreNocaseCtxWrapper(uint8_t *text, uint8_t *in_needle, int times)
{ {
uint32_t textlen = strlen((char *)text); uint32_t textlen = strlen((char *)text);
uint16_t needlelen = strlen((char *)needle); uint16_t needlelen = strlen((char *)in_needle);
uint16_t bmBc[ALPHABET_SIZE]; /* Make a copy of in_needle to be able to convert it to lowercase. */
uint16_t *bmGs = SCMalloc(sizeof(uint16_t)*(needlelen + 1)); uint8_t *needle = SCMalloc(needlelen);
if (unlikely(bmGs == NULL)) if (needle == NULL)
return NULL; return NULL;
memcpy(needle, in_needle, needlelen);
BmCtx *bm_ctx = BoyerMooreCtxInit(needle, needlelen);
uint8_t *ret = NULL; uint8_t *ret = NULL;
int i = 0; int i = 0;
@ -435,12 +418,12 @@ uint8_t *BoyerMooreNocaseCtxWrapper(uint8_t *text, uint8_t *needle, int times)
if (times > 1) CLOCK_START; if (times > 1) CLOCK_START;
for (i = 0; i < times; i++) { for (i = 0; i < times; i++) {
/* Stats including context building */ /* Stats including context building */
PreBmGsNocase(needle, needlelen, bmGs); BoyerMooreCtxToNocase(bm_ctx, needle, needlelen);
PreBmBcNocase(needle, needlelen, bmBc); ret = BoyerMooreNocase(needle, needlelen, text, textlen, bm_ctx);
ret = BoyerMooreNocase(needle, needlelen, text, textlen, bmGs, bmBc);
} }
if (times > 1) { CLOCK_END; CLOCK_PRINT_SEC; }; if (times > 1) { CLOCK_END; CLOCK_PRINT_SEC; };
SCFree(bmGs); BoyerMooreCtxDeInit(bm_ctx);
free(needle);
return ret; return ret;
} }

Loading…
Cancel
Save