Store Boyer Moore no case strings in lower case.

Rather than converting the search string to lower case while searching,
convert it to lowercase during initialization.

Changes the Boyer Moore search API for take BmCtx

Change the API for BoyerMoore to take a BmCtx rather than the two parts that
are stored in the context. Which is how it is mostly used. This enforces
always calling BoyerMooreCtxToNocase() to convert to no-case.

Use CtxInit and CtxDeinit functions to create and destroy the context,
even in unit tests.
pull/1047/head
Ken Steele 11 years ago
parent 54214d1251
commit 967f7aefde

@ -190,9 +190,9 @@ static AppProto AppLayerProtoDetectPMMatchSignature(const AppLayerProtoDetectPMS
s->cd->offset, s->cd->depth);
if (s->cd->flags & DETECT_CONTENT_NOCASE)
found = BoyerMooreNocase(s->cd->content, s->cd->content_len, sbuf, sbuflen, s->cd->bm_ctx->bmGs, s->cd->bm_ctx->bmBc);
found = BoyerMooreNocase(s->cd->content, s->cd->content_len, sbuf, sbuflen, s->cd->bm_ctx);
else
found = BoyerMoore(s->cd->content, s->cd->content_len, sbuf, sbuflen, s->cd->bm_ctx->bmGs, s->cd->bm_ctx->bmBc);
found = BoyerMoore(s->cd->content, s->cd->content_len, sbuf, sbuflen, s->cd->bm_ctx);
if (found != NULL)
proto = s->alproto;

@ -274,9 +274,9 @@ int DetectEngineContentInspection(DetectEngineCtx *de_ctx, DetectEngineThreadCtx
/* do the actual search */
if (cd->flags & DETECT_CONTENT_NOCASE)
found = BoyerMooreNocase(cd->content, cd->content_len, sbuffer, sbuffer_len, cd->bm_ctx->bmGs, cd->bm_ctx->bmBc);
found = BoyerMooreNocase(cd->content, cd->content_len, sbuffer, sbuffer_len, cd->bm_ctx);
else
found = BoyerMoore(cd->content, cd->content_len, sbuffer, sbuffer_len, cd->bm_ctx->bmGs, cd->bm_ctx->bmBc);
found = BoyerMoore(cd->content, cd->content_len, sbuffer, sbuffer_len, cd->bm_ctx);
/* next we evaluate the result in combination with the
* negation flag. */

@ -220,8 +220,7 @@ static int DetectFilemagicMatch (ThreadVars *t, DetectEngineThreadCtx *det_ctx,
/* we include the \0 in the inspection, so patterns can match on the
* end of the string. */
if (BoyerMooreNocase(filemagic->name, filemagic->len, (uint8_t *)file->magic,
strlen(file->magic) + 1, filemagic->bm_ctx->bmGs,
filemagic->bm_ctx->bmBc) != NULL)
strlen(file->magic) + 1, filemagic->bm_ctx) != NULL)
{
#ifdef DEBUG
if (SCLogDebugEnabled()) {

@ -105,8 +105,7 @@ static int DetectFilenameMatch (ThreadVars *t, DetectEngineThreadCtx *det_ctx,
SCReturnInt(0);
if (BoyerMooreNocase(filename->name, filename->len, file->name,
file->name_len, filename->bm_ctx->bmGs,
filename->bm_ctx->bmBc) != NULL)
file->name_len, filename->bm_ctx) != NULL)
{
#ifdef DEBUG
if (SCLogDebugEnabled()) {

@ -36,6 +36,14 @@
#include "util-spm-bm.h"
#include "util-debug.h"
#include "util-error.h"
#include "util-memcpy.h"
static int PreBmGs(const uint8_t *x, uint16_t m, uint16_t *bmGs);
static void PreBmBc(const uint8_t *x, uint16_t m, uint16_t *bmBc);
static void PreBmBcNocase(const uint8_t *x, uint16_t m, uint16_t *bmBc);
static void BoyerMooreSuffixesNocase(const uint8_t *x, uint16_t m,
uint16_t *suff);
static void PreBmGsNocase(const uint8_t *x, uint16_t m, uint16_t *bmGs);
/**
* \brief Given a BmCtx structure, recreate the pre/suffixes for
@ -46,6 +54,8 @@
* \param size length of the string
*/
void BoyerMooreCtxToNocase(BmCtx *bm_ctx, uint8_t *needle, uint16_t needle_len) {
/* Store the content as lower case to make searching faster */
memcpy_tolower(needle, needle, needle_len);
/* Prepare bad chars with nocase chars */
PreBmBcNocase(needle, needle_len, bm_ctx->bmBc);
@ -113,7 +123,8 @@ void BoyerMooreCtxDeInit(BmCtx *bmctx)
* \param size length of the string
* \param result pointer to an empty array that will hold the badchars
*/
void PreBmBc(const uint8_t *x, uint16_t m, uint16_t *bmBc) {
static void PreBmBc(const uint8_t *x, uint16_t m, uint16_t *bmBc)
{
int32_t i;
for (i = 0; i < 256; ++i) {
@ -131,7 +142,7 @@ void PreBmBc(const uint8_t *x, uint16_t m, uint16_t *bmBc) {
* \param m length of the string
* \param suff pointer to an empty array that will hold the prefixes (shifts)
*/
void BoyerMooreSuffixes(const uint8_t *x, uint16_t m, uint16_t *suff) {
static void BoyerMooreSuffixes(const uint8_t *x, uint16_t m, uint16_t *suff) {
int32_t f = 0, g, i;
suff[m - 1] = m;
g = m - 1;
@ -157,7 +168,8 @@ void BoyerMooreSuffixes(const uint8_t *x, uint16_t m, uint16_t *suff) {
* \param bmGs pointer to an empty array that will hold the prefixes (shifts)
* \retval 0 ok, -1 failed
*/
int PreBmGs(const uint8_t *x, uint16_t m, uint16_t *bmGs) {
static int PreBmGs(const uint8_t *x, uint16_t m, uint16_t *bmGs)
{
int32_t i, j;
uint16_t suff[m + 1];
@ -187,7 +199,8 @@ int PreBmGs(const uint8_t *x, uint16_t m, uint16_t *bmGs) {
* \param size length of the string
* \param result pointer to an empty array that will hold the badchars
*/
void PreBmBcNocase(const uint8_t *x, uint16_t m, uint16_t *bmBc) {
static void PreBmBcNocase(const uint8_t *x, uint16_t m, uint16_t *bmBc)
{
int32_t i;
for (i = 0; i < 256; ++i) {
@ -198,7 +211,9 @@ void PreBmBcNocase(const uint8_t *x, uint16_t m, uint16_t *bmBc) {
}
}
void BoyerMooreSuffixesNocase(const uint8_t *x, uint16_t m, uint16_t *suff) {
static void BoyerMooreSuffixesNocase(const uint8_t *x, uint16_t m,
uint16_t *suff)
{
int32_t f = 0, g, i;
suff[m - 1] = m;
@ -227,7 +242,8 @@ void BoyerMooreSuffixesNocase(const uint8_t *x, uint16_t m, uint16_t *suff) {
* \param m length of the string
* \param bmGs pointer to an empty array that will hold the prefixes (shifts)
*/
void PreBmGsNocase(const uint8_t *x, uint16_t m, uint16_t *bmGs) {
static void PreBmGsNocase(const uint8_t *x, uint16_t m, uint16_t *bmGs)
{
int32_t i, j;
uint16_t suff[m + 1];
@ -266,7 +282,11 @@ void PreBmGsNocase(const uint8_t *x, uint16_t m, uint16_t *bmGs) {
*
* \retval ptr to start of the match; NULL if no match
*/
uint8_t *BoyerMoore(uint8_t *x, uint16_t m, uint8_t *y, int32_t n, uint16_t *bmGs, uint16_t *bmBc) {
uint8_t *BoyerMoore(uint8_t *x, uint16_t m, uint8_t *y, int32_t n, BmCtx *bm_ctx)
{
uint16_t *bmGs = bm_ctx->bmGs;
uint16_t *bmBc = bm_ctx->bmBc;
int i, j, m1, m2;
#if 0
printf("\nBad:\n");
@ -311,7 +331,10 @@ uint8_t *BoyerMoore(uint8_t *x, uint16_t m, uint8_t *y, int32_t n, uint16_t *bmG
*
* \retval ptr to start of the match; NULL if no match
*/
uint8_t *BoyerMooreNocase(uint8_t *x, uint16_t m, uint8_t *y, int32_t n, uint16_t *bmGs, uint16_t *bmBc) {
uint8_t *BoyerMooreNocase(uint8_t *x, uint16_t m, uint8_t *y, int32_t n, BmCtx *bm_ctx)
{
uint16_t *bmGs = bm_ctx->bmGs;
uint16_t *bmBc = bm_ctx->bmBc;
int i, j, m1, m2;
#if 0
printf("\nBad:\n");
@ -325,7 +348,8 @@ uint8_t *BoyerMooreNocase(uint8_t *x, uint16_t m, uint8_t *y, int32_t n, uint16_
#endif
j = 0;
while (j <= n - m ) {
for (i = m - 1; i >= 0 && u8_tolower(x[i]) == u8_tolower(y[i + j]); --i);
/* x is stored in lowercase. */
for (i = m - 1; i >= 0 && x[i] == u8_tolower(y[i + j]); --i);
if (i < 0) {
return y + j;

@ -40,14 +40,8 @@ typedef struct BmCtx_ {
BmCtx *BoyerMooreCtxInit(uint8_t *needle, uint16_t needle_len);
void BoyerMooreCtxToNocase(BmCtx *, uint8_t *, uint16_t);
void PreBmBc(const uint8_t *x, uint16_t m, uint16_t *bmBc);
void BoyerMooreSuffixes(const uint8_t *x, uint16_t m, uint16_t *suff);
int PreBmGs(const uint8_t *, uint16_t, uint16_t *);
uint8_t *BoyerMoore(uint8_t *x, uint16_t m, uint8_t *y, int32_t n, uint16_t *bmGs, uint16_t *bmBc);
void PreBmBcNocase(const uint8_t *x, uint16_t m, uint16_t *bmBc);
void BoyerMooreSuffixesNocase(const uint8_t *x, uint16_t m, uint16_t *suff);
void PreBmGsNocase(const uint8_t *x, uint16_t m, uint16_t *bmGs);
uint8_t *BoyerMooreNocase(uint8_t *x, uint16_t m, uint8_t *y, int32_t n, uint16_t *bmGs, uint16_t *bmBc);
uint8_t *BoyerMoore(uint8_t *x, uint16_t m, uint8_t *y, int32_t n, BmCtx *bm_ctx);
uint8_t *BoyerMooreNocase(uint8_t *x, uint16_t m, uint8_t *y, int32_t n, BmCtx *bm_ctx);
void BoyerMooreCtxDeInit(BmCtx *);
#endif /* __UTIL_SPM_BM__ */

@ -1,4 +1,4 @@
/* Copyright (C) 2007-2013 Open Information Security Foundation
/* Copyright (C) 2007-2014 Open Information Security Foundation
*
* You can copy, redistribute or modify this Program under the terms of
* the GNU General Public License version 2 as published by the Free
@ -103,16 +103,10 @@ uint8_t *Bs2bmNocaseSearch(uint8_t *text, uint32_t textlen, uint8_t *needle, uin
*/
uint8_t *BoyerMooreSearch(uint8_t *text, uint32_t textlen, uint8_t *needle, uint16_t needlelen)
{
uint16_t bmBc[ALPHABET_SIZE];
uint16_t *bmGs = SCMalloc(sizeof(uint16_t)*(needlelen + 1));
if (unlikely(bmGs == NULL))
return NULL;
PreBmGs(needle, needlelen, bmGs);
PreBmBc(needle, needlelen, bmBc);
BmCtx *bm_ctx = BoyerMooreCtxInit(needle, needlelen);
uint8_t *ret = BoyerMoore(needle, needlelen, text, textlen, bmGs, bmBc);
SCFree(bmGs);
uint8_t *ret = BoyerMoore(needle, needlelen, text, textlen, bm_ctx);
BoyerMooreCtxDeInit(bm_ctx);
return ret;
}
@ -128,16 +122,11 @@ uint8_t *BoyerMooreSearch(uint8_t *text, uint32_t textlen, uint8_t *needle, uint
*/
uint8_t *BoyerMooreNocaseSearch(uint8_t *text, uint32_t textlen, uint8_t *needle, uint16_t needlelen)
{
uint16_t bmBc[ALPHABET_SIZE];
uint16_t *bmGs = SCMalloc(sizeof(uint16_t)*(needlelen + 1));
if (unlikely(bmGs == NULL))
return NULL;
PreBmGsNocase(needle, needlelen, bmGs);
PreBmBcNocase(needle, needlelen, bmBc);
BmCtx *bm_ctx = BoyerMooreCtxInit(needle, needlelen);
BoyerMooreCtxToNocase(bm_ctx, needle, needlelen);
uint8_t *ret = BoyerMooreNocase(needle, needlelen, text, textlen, bmGs, bmBc);
SCFree(bmGs);
uint8_t *ret = BoyerMooreNocase(needle, needlelen, text, textlen, bm_ctx);
BoyerMooreCtxDeInit(bm_ctx);
return ret;
}
@ -241,50 +230,46 @@ uint8_t *BoyerMooreWrapper(uint8_t *text, uint8_t *needle, int times)
uint32_t textlen = strlen((char *)text);
uint16_t needlelen = strlen((char *)needle);
uint16_t bmBc[ALPHABET_SIZE];
uint16_t *bmGs = SCMalloc(sizeof(uint16_t)*(needlelen + 1));
if (unlikely(bmGs == NULL))
return NULL;
BmCtx *bm_ctx = BoyerMooreCtxInit(needle, needlelen);
uint8_t *ret = NULL;
int i = 0;
PreBmGs(needle, needlelen, bmGs);
PreBmBc(needle, needlelen, bmBc);
CLOCK_INIT;
if (times > 1) CLOCK_START;
for (i = 0; i < times; i++) {
ret = BoyerMoore(needle, needlelen, text, textlen, bmGs, bmBc);
ret = BoyerMoore(needle, needlelen, text, textlen, bm_ctx);
}
if (times > 1) { CLOCK_END; CLOCK_PRINT_SEC; };
SCFree(bmGs);
BoyerMooreCtxDeInit(bm_ctx);
return ret;
}
uint8_t *BoyerMooreNocaseWrapper(uint8_t *text, uint8_t *needle, int times)
uint8_t *BoyerMooreNocaseWrapper(uint8_t *text, uint8_t *in_needle, int times)
{
uint32_t textlen = strlen((char *)text);
uint16_t needlelen = strlen((char *)needle);
uint16_t needlelen = strlen((char *)in_needle);
uint16_t bmBc[ALPHABET_SIZE];
uint16_t *bmGs = SCMalloc(sizeof(uint16_t)*(needlelen + 1));
if (unlikely(bmGs == NULL))
/* Make a copy of in_needle to be able to convert it to lowercase. */
uint8_t *needle = SCMalloc(needlelen);
if (needle == NULL)
return NULL;
memcpy(needle, in_needle, needlelen);
BmCtx *bm_ctx = BoyerMooreCtxInit(needle, needlelen);
BoyerMooreCtxToNocase(bm_ctx, needle, needlelen);
uint8_t *ret = NULL;
int i = 0;
PreBmGsNocase(needle, needlelen, bmGs);
PreBmBcNocase(needle, needlelen, bmBc);
CLOCK_INIT;
if (times > 1) CLOCK_START;
for (i = 0; i < times; i++) {
ret = BoyerMooreNocase(needle, needlelen, text, textlen, bmGs, bmBc);
ret = BoyerMooreNocase(needle, needlelen, text, textlen, bm_ctx);
}
if (times > 1) { CLOCK_END; CLOCK_PRINT_SEC; };
SCFree(bmGs);
BoyerMooreCtxDeInit(bm_ctx);
free(needle);
return ret;
}
@ -379,10 +364,7 @@ uint8_t *BoyerMooreCtxWrapper(uint8_t *text, uint8_t *needle, int times)
uint32_t textlen = strlen((char *)text);
uint16_t needlelen = strlen((char *)needle);
uint16_t bmBc[ALPHABET_SIZE];
uint16_t *bmGs = SCMalloc(sizeof(uint16_t)*(needlelen + 1));
if (unlikely(bmGs == NULL))
return NULL;
BmCtx *bm_ctx = BoyerMooreCtxInit(needle, needlelen);
uint8_t *ret = NULL;
int i = 0;
@ -391,13 +373,11 @@ uint8_t *BoyerMooreCtxWrapper(uint8_t *text, uint8_t *needle, int times)
if (times > 1) CLOCK_START;
for (i = 0; i < times; i++) {
/* Stats including context building */
PreBmGs(needle, needlelen, bmGs);
PreBmBc(needle, needlelen, bmBc);
ret = BoyerMoore(needle, needlelen, text, textlen, bmGs, bmBc);
ret = BoyerMoore(needle, needlelen, text, textlen, bm_ctx);
}
if (times > 1) { CLOCK_END; CLOCK_PRINT_SEC; };
SCFree(bmGs);
BoyerMooreCtxDeInit(bm_ctx);
return ret;
}
@ -418,15 +398,18 @@ uint8_t *RawCtxWrapper(uint8_t *text, uint8_t *needle, int times)
return ret;
}
uint8_t *BoyerMooreNocaseCtxWrapper(uint8_t *text, uint8_t *needle, int times)
uint8_t *BoyerMooreNocaseCtxWrapper(uint8_t *text, uint8_t *in_needle, int times)
{
uint32_t textlen = strlen((char *)text);
uint16_t needlelen = strlen((char *)needle);
uint16_t needlelen = strlen((char *)in_needle);
uint16_t bmBc[ALPHABET_SIZE];
uint16_t *bmGs = SCMalloc(sizeof(uint16_t)*(needlelen + 1));
if (unlikely(bmGs == NULL))
/* Make a copy of in_needle to be able to convert it to lowercase. */
uint8_t *needle = SCMalloc(needlelen);
if (needle == NULL)
return NULL;
memcpy(needle, in_needle, needlelen);
BmCtx *bm_ctx = BoyerMooreCtxInit(needle, needlelen);
uint8_t *ret = NULL;
int i = 0;
@ -435,12 +418,12 @@ uint8_t *BoyerMooreNocaseCtxWrapper(uint8_t *text, uint8_t *needle, int times)
if (times > 1) CLOCK_START;
for (i = 0; i < times; i++) {
/* Stats including context building */
PreBmGsNocase(needle, needlelen, bmGs);
PreBmBcNocase(needle, needlelen, bmBc);
ret = BoyerMooreNocase(needle, needlelen, text, textlen, bmGs, bmBc);
BoyerMooreCtxToNocase(bm_ctx, needle, needlelen);
ret = BoyerMooreNocase(needle, needlelen, text, textlen, bm_ctx);
}
if (times > 1) { CLOCK_END; CLOCK_PRINT_SEC; };
SCFree(bmGs);
BoyerMooreCtxDeInit(bm_ctx);
free(needle);
return ret;
}

Loading…
Cancel
Save