handle the cuda cleanup at shutdown. should get rid of any errors from the call to SigGroupCleanup

remotes/origin/master-1.0.x
Anoop Saldanha 16 years ago committed by Victor Julien
parent d281a6b8ac
commit c26e92733d

@ -45,6 +45,10 @@
#define INSPECT_BYTES 32
#define ALP_DETECT_MAX 256
/* undef __SC_CUDA_SUPPORT__. We will get back to this later. Need to
* analyze the performance of cuda support for app layer */
#undef __SC_CUDA_SUPPORT__
typedef struct AlpProtoDetectDirection_ {
MpmCtx mpm_ctx;
uint32_t id;
@ -69,8 +73,8 @@ void AlpProtoInit(AlpProtoDetectCtx *ctx) {
memset(ctx, 0x00, sizeof(AlpProtoDetectCtx));
#ifndef __SC_CUDA_SUPPORT__
MpmInitCtx(&ctx->toserver.mpm_ctx, PatternMatchDefaultMatcher(), -1);
MpmInitCtx(&ctx->toclient.mpm_ctx, PatternMatchDefaultMatcher(), -1);
MpmInitCtx(&ctx->toserver.mpm_ctx, MPM_B2G, -1);
MpmInitCtx(&ctx->toclient.mpm_ctx, MPM_B2G, -1);
#else
ctx->alp_content_module_handle = SCCudaHlRegisterModule("SC_ALP_CONTENT_B2G_CUDA");
MpmInitCtx(&ctx->toserver.mpm_ctx, MPM_B2G_CUDA, ctx->alp_content_module_handle);

@ -325,12 +325,23 @@ typedef struct Packet_
PatternMatcherQueue *cuda_pmq;
MpmCtx *cuda_mpm_ctx;
MpmThreadCtx *cuda_mtc;
/* this mutex corresponds to the condition variable defined below it */
SCMutex cuda_mutex_q;
/* this mutex corresponds to the condition variable defined below it.
* this mutex would be used for the search phase of the mpm */
SCMutex cuda_search_mutex_q;
/* we need this condition variable so that the cuda dispatcher thread
* can inform the client threads, when they are done with the pattern
* matching */
SCCondT cuda_cond_q;
* matching for the search phase */
SCCondT cuda_search_cond_q;
/* this mutex corresponds to the condition variable defined below it.
* this mutex would be used for the scan phase of the mpm */
SCMutex cuda_scan_mutex_q;
/* we need this condition variable so that the cuda dispatcher thread
* can inform the client threads, when they are done with the pattern
* matching for the scan phase*/
SCCondT cuda_scan_cond_q;
/* used to hold the match results. We can instead use a void *result
* instead here. That way we can make them hold any result. *todo* */
uint16_t cuda_matches;

@ -19,6 +19,7 @@
#include "detect-uricontent.h"
#include "util-mpm-b2g-cuda.h"
#include "util-enum.h"
#include "util-debug.h"
/** \todo make it possible to use multiple pattern matcher algorithms next to
@ -32,22 +33,33 @@
#endif
//#define PM MPM_B3G
/* holds the string-enum mapping for the enums that define the different MPM
* algos in util-mpm.h */
SCEnumCharMap sc_mpm_algo_map[] = {
{ "b2g", MPM_B2G },
{ "b3g", MPM_B3G },
{ "wumanber", MPM_WUMANBER },
#ifdef __SC_CUDA_SUPPORT__
{ "b2g_cuda", MPM_B2G_CUDA },
#endif
};
/** \brief Function to return the default multi pattern matcher algorithm to be
* used by the engine
* \retval mpm algo value
*/
uint16_t PatternMatchDefaultMatcher(void) {
char *mpm_algo;
uint16_t mpm_algo_val = PM;
int mpm_algo_val = PM;
/* Get the mpm algo defined in config file by the user */
if ((ConfGet("mpm-algo", &mpm_algo)) == 1) {
if(strncmp(mpm_algo, "b2g", 3) == 0) {
mpm_algo_val = MPM_B2G;
} else if (strncmp(mpm_algo, "b3g", 3) == 0) {
mpm_algo_val = MPM_B3G;
} else if (strncmp(mpm_algo, "wumanber", 7) == 0) {
mpm_algo_val = MPM_WUMANBER;
mpm_algo_val = SCMapEnumNameToValue(mpm_algo, sc_mpm_algo_map);
if (mpm_algo_val == -1) {
SCLogError(SC_ERR_INVALID_YAML_CONF_ENTRY, "Invalid mpm algo supplied "
"in the yaml conf file: \"%s\"", mpm_algo);
exit(EXIT_FAILURE);
}
}
@ -66,6 +78,7 @@ uint32_t PacketPatternScan(ThreadVars *tv, DetectEngineThreadCtx *det_ctx,
SCEnter();
det_ctx->pmq.mode = PMQ_MODE_SCAN;
#ifndef __SC_CUDA_SUPPORT__
uint32_t ret;
ret = mpm_table[det_ctx->sgh->mpm_ctx->mpm_type].Scan(det_ctx->sgh->mpm_ctx,
@ -82,10 +95,10 @@ uint32_t PacketPatternScan(ThreadVars *tv, DetectEngineThreadCtx *det_ctx,
p->cuda_mtc = &det_ctx->mtc;
p->cuda_pmq = &det_ctx->pmq;
B2gCudaPushPacketTo_tv_CMB2_RC(p);
SCMutexLock(&p->cuda_mutex_q);
SCondWait(&p->cuda_cond_q, &p->cuda_mutex_q);
SCMutexLock(&p->cuda_scan_mutex_q);
SCondWait(&p->cuda_scan_cond_q, &p->cuda_scan_mutex_q);
p->cuda_done = 1;
SCMutexUnlock(&p->cuda_mutex_q);
SCMutexUnlock(&p->cuda_scan_mutex_q);
SCReturnInt(p->cuda_matches);
#endif
@ -103,29 +116,39 @@ uint32_t UriPatternScan(ThreadVars *tv, DetectEngineThreadCtx *det_ctx,
SCEnter();
det_ctx->pmq.mode = PMQ_MODE_SCAN;
#ifndef __SC_CUDA_SUPPORT__
uint32_t ret;
#ifndef __SC_CUDA_SUPPORT__
ret = mpm_table[det_ctx->sgh->mpm_uri_ctx->mpm_type].Scan
(det_ctx->sgh->mpm_uri_ctx, &det_ctx->mtcu, &det_ctx->pmq,
uri, uri_len);
SCReturnInt(ret);
#else
Packet *p = malloc(sizeof(Packet));
if (p == NULL) {
SCLogError(SC_ERR_MEM_ALLOC, "Error allocating memory");
exit(EXIT_FAILURE);
}
memset(p, 0, sizeof(Packet));
p->cuda_done = 0;
SCMutexInit(&p->cuda_scan_mutex_q, NULL);
SCCondInit(&p->cuda_scan_cond_q, NULL);
//p->cuda_done = 0;
p->cuda_free_packet = 1;
p->cuda_search = 0;
//p->cuda_search = 0;
p->cuda_mpm_ctx = det_ctx->sgh->mpm_uri_ctx;
p->cuda_mtc = &det_ctx->mtcu;
p->cuda_pmq = &det_ctx->pmq;
p->payload = uri;
p->payload_len = uri_len;
B2gCudaPushPacketTo_tv_CMB2_RC(p);
SCMutexLock(&p->cuda_mutex_q);
SCondWait(&p->cuda_cond_q, &p->cuda_mutex_q);
SCMutexLock(&p->cuda_scan_mutex_q);
SCondWait(&p->cuda_scan_cond_q, &p->cuda_scan_mutex_q);
SCMutexUnlock(&p->cuda_scan_mutex_q);
ret = p->cuda_matches;
p->cuda_done = 1;
SCMutexUnlock(&p->cuda_mutex_q);
SCReturnInt(p->cuda_matches);
SCReturnInt(ret);
#endif
}
@ -140,6 +163,7 @@ uint32_t PacketPatternMatch(ThreadVars *tv, DetectEngineThreadCtx *det_ctx,
SCEnter();
det_ctx->pmq.mode = PMQ_MODE_SEARCH;
#ifndef __SC_CUDA_SUPPORT__
uint32_t ret;
ret = mpm_table[det_ctx->sgh->mpm_ctx->mpm_type].Search(det_ctx->sgh->mpm_ctx,
@ -149,17 +173,16 @@ uint32_t PacketPatternMatch(ThreadVars *tv, DetectEngineThreadCtx *det_ctx,
p->payload_len);
SCReturnInt(ret);
#else
p->cuda_done = 0;
p->cuda_search = 1;
p->cuda_free_packet = 0;
p->cuda_mpm_ctx = det_ctx->sgh->mpm_ctx;
p->cuda_mtc = &det_ctx->mtc;
p->cuda_pmq = &det_ctx->pmq;
SCMutexLock(&p->cuda_search_mutex_q);
B2gCudaPushPacketTo_tv_CMB2_RC(p);
SCMutexLock(&p->cuda_mutex_q);
SCondWait(&p->cuda_cond_q, &p->cuda_mutex_q);
SCondWait(&p->cuda_search_cond_q, &p->cuda_search_mutex_q);
p->cuda_done = 1;
SCMutexUnlock(&p->cuda_mutex_q);
SCMutexUnlock(&p->cuda_search_mutex_q);
SCReturnInt(p->cuda_matches);
#endif
}
@ -175,27 +198,39 @@ uint32_t UriPatternMatch(ThreadVars *tv, DetectEngineThreadCtx *det_ctx,
SCEnter();
det_ctx->pmq.mode = PMQ_MODE_SEARCH;
#ifndef __SC_CUDA_SUPPORT__
uint32_t ret;
#ifndef __SC_CUDA_SUPPORT__
ret = mpm_table[det_ctx->sgh->mpm_uri_ctx->mpm_type].Search
(det_ctx->sgh->mpm_uri_ctx, &det_ctx->mtcu, &det_ctx->pmq, uri,
uri_len);
SCReturnInt(ret);
#else
Packet *p = malloc(sizeof(Packet));
if (p == NULL) {
SCLogError(SC_ERR_MEM_ALLOC, "Error allocating memory");
exit(EXIT_FAILURE);
}
memset(p, 0, sizeof(Packet));
p->cuda_done = 0;
SCMutexInit(&p->cuda_search_mutex_q, NULL);
SCCondInit(&p->cuda_search_cond_q, NULL);
//p->cuda_done = 0;
p->cuda_free_packet = 1;
p->cuda_search = 1;
p->cuda_mpm_ctx = det_ctx->sgh->mpm_uri_ctx;
p->cuda_mtc = &det_ctx->mtcu;
p->cuda_pmq = &det_ctx->pmq;
p->payload = uri;
p->payload_len = uri_len;
B2gCudaPushPacketTo_tv_CMB2_RC(p);
SCMutexLock(&p->cuda_mutex_q);
SCondWait(&p->cuda_cond_q, &p->cuda_mutex_q);
SCMutexLock(&p->cuda_search_mutex_q);
SCondWait(&p->cuda_search_cond_q, &p->cuda_search_mutex_q);
SCMutexUnlock(&p->cuda_search_mutex_q);
ret = p->cuda_matches;
p->cuda_done = 1;
SCMutexUnlock(&p->cuda_mutex_q);
SCReturnInt(p->cuda_matches);
SCReturnInt(ret);
#endif
//printf("PacketPatternMatch: ret %" PRIu32 "\n", ret);

@ -2837,17 +2837,20 @@ int SigGroupBuild (DetectEngineCtx *de_ctx) {
SigAddressPrepareStage3(de_ctx);
#ifdef __SC_CUDA_SUPPORT__
/* the AddressPrepareStage3 actually handles the creation of device pointers
* on the gpu. The cuda context that stage3 used would still be attached to
* this host thread. We need to pop this cuda context so that the dispatcher
* thread that we are going to create for the above module we registered
* can attach to this cuda context */
CUcontext context;
if (SCCudaCtxPopCurrent(&context) == -1)
exit(EXIT_FAILURE);
/* start the dispatcher thread for this module */
if (B2gCudaStartDispatcherThreadRC("SC_RULES_CONTENT_B2G_CUDA") == -1)
exit(EXIT_FAILURE);
/* if a user has selected some other mpm algo other than b2g_cuda, inspite of
* enabling cuda support, then no cuda contexts or cuda vars would be created.
* Pop the cuda context, only on confirming that the MPM algo selected is the
* CUDA mpm algo */
if (de_ctx->mpm_matcher == MPM_B2G_CUDA) {
/* the AddressPrepareStage3 actually handles the creation of device
* pointers on the gpu. The cuda context that stage3 used would still be
* attached to this host thread. We need to pop this cuda context so that
* the dispatcher thread that we are going to create for the above module
* we registered can attach to this cuda context */
CUcontext context;
if (SCCudaCtxPopCurrent(&context) == -1)
exit(EXIT_FAILURE);
}
#endif
// SigAddressPrepareStage5(de_ctx);

@ -194,6 +194,13 @@ Packet *SetupPkt (void)
r = SCMutexInit(&p->mutex_rtv_cnt, NULL);
#ifdef __SC_CUDA_SUPPORT__
SCMutexInit(&p->cuda_scan_mutex_q, NULL);
SCCondInit(&p->cuda_scan_cond_q, NULL);
SCMutexInit(&p->cuda_search_mutex_q, NULL);
SCCondInit(&p->cuda_search_cond_q, NULL);
#endif
SCLogDebug("allocated a new packet...");
}
@ -773,6 +780,13 @@ int main(int argc, char **argv)
}
memset(p, 0, sizeof(Packet));
SCMutexInit(&p->mutex_rtv_cnt, NULL);
#ifdef __SC_CUDA_SUPPORT__
SCMutexInit(&p->cuda_scan_mutex_q, NULL);
SCCondInit(&p->cuda_scan_cond_q, NULL);
SCMutexInit(&p->cuda_search_mutex_q, NULL);
SCCondInit(&p->cuda_search_cond_q, NULL);
#endif
PacketEnqueue(&packet_q,p);
}
@ -834,6 +848,12 @@ int main(int argc, char **argv)
TmThreadPrioSummary("Suricata main()");
#ifdef __SC_CUDA_SUPPORT__
/* start the dispatcher thread for this module */
if (B2gCudaStartDispatcherThreadRC("SC_RULES_CONTENT_B2G_CUDA") == -1)
exit(EXIT_FAILURE);
#endif
/* Spawn the flow manager thread */
FlowManagerThreadSpawn();
@ -916,7 +936,29 @@ int main(int argc, char **argv)
HTPAtExitPrintStats();
/** \todo review whats needed here */
#ifdef __SC_CUDA_SUPPORT__
if (PatternMatchDefaultMatcher() == MPM_B2G_CUDA) {
/* all threadvars related to cuda should be free by now, which means
* the cuda contexts would be floating */
if (SCCudaHlPushCudaContextFromModule("SC_RULES_CONTENT_B2G_CUDA") == -1) {
SCLogError(SC_ERR_CUDA_HANDLER_ERROR, "Call to "
"SCCudaHlPushCudaContextForModule() failed during the "
"shutdown phase just before the call to SigGroupCleanup()");
}
}
#endif
SigGroupCleanup(de_ctx);
#ifdef __SC_CUDA_SUPPORT__
if (PatternMatchDefaultMatcher() == MPM_B2G_CUDA) {
/* pop the cuda context we just pushed before the call to SigGroupCleanup() */
if (SCCudaCtxPopCurrent(NULL) == -1) {
SCLogError(SC_ERR_CUDA_HANDLER_ERROR, "Call to SCCudaCtxPopCurrent() "
"during the shutdown phase just before the call to "
"SigGroupCleanup()");
return 0;
}
}
#endif
SigCleanSignatures(de_ctx);
DetectEngineCtxFree(de_ctx);
AlpProtoDestroy();

@ -111,6 +111,7 @@ const char * SCErrorToString(SCError err)
CASE_CODE (SC_ERR_TM_THREADS_ERROR);
CASE_CODE (SC_ERR_TM_MODULES_ERROR);
CASE_CODE (SC_ERR_B2G_CUDA_ERROR);
CASE_CODE (SC_ERR_INVALID_YAML_CONF_ENTRY);
default:
return "UNKNOWN_ERROR";
}

@ -128,6 +128,7 @@ typedef enum {
SC_ERR_TM_THREADS_ERROR,
SC_ERR_TM_MODULES_ERROR,
SC_ERR_B2G_CUDA_ERROR,
SC_ERR_INVALID_YAML_CONF_ENTRY,
} SCError;
const char *SCErrorToString(SCError);

@ -23,6 +23,7 @@
#include "util-debug.h"
#include "util-unittest.h"
#include "detect-engine-mpm.h"
#include "app-layer-detect-proto.h"
#include "util-cuda-handlers.h"
#include "util-cuda.h"
@ -2482,6 +2483,9 @@ TmEcode B2gCudaMpmDispThreadInit(ThreadVars *tv, void *initdata, void **data)
{
SCCudaHlModuleData *module_data = (SCCudaHlModuleData *)initdata;
if (PatternMatchDefaultMatcher() != MPM_B2G_CUDA)
return TM_ECODE_OK;
if (SCCudaCtxPushCurrent(module_data->cuda_context) == -1) {
SCLogError(SC_ERR_B2G_CUDA_ERROR, "Error pushing cuda context");
}
@ -2500,6 +2504,9 @@ TmEcode B2gCudaMpmDispThreadInit(ThreadVars *tv, void *initdata, void **data)
*/
TmEcode B2gCudaMpmDispThreadDeInit(ThreadVars *tv, void *data)
{
if (PatternMatchDefaultMatcher() != MPM_B2G_CUDA)
return TM_ECODE_OK;
if (SCCudaCtxPopCurrent(NULL) == -1) {
SCLogError(SC_ERR_B2G_CUDA_ERROR, "Error popping cuda context");
}
@ -2533,23 +2540,33 @@ TmEcode B2gCudaMpmDispatcher(ThreadVars *tv, Packet *p, void *data,
p->cuda_pmq,
p->payload,
p->payload_len);
/* signal the client that the result is ready */
SCCondSignal(&p->cuda_search_cond_q);
/* wait for the client indication that it has read the results. If the
* client still hasn't sent the indication, signal it again and do so
* every 50 microseconds */
while (p->cuda_done == 0) {
SCCondSignal(&p->cuda_search_cond_q);
usleep(50);
}
} else {
p->cuda_matches = mpm_table[p->cuda_mpm_ctx->mpm_type].Scan(p->cuda_mpm_ctx,
p->cuda_mtc,
p->cuda_pmq,
p->payload,
p->payload_len);
/* signal the client that the result is ready */
SCCondSignal(&p->cuda_scan_cond_q);
/* wait for the client indication that it has read the results. If the
* client still hasn't sent the indication, signal it again and do so
* every 50 microseconds */
while (p->cuda_done == 0) {
SCCondSignal(&p->cuda_scan_cond_q);
usleep(50);
}
}
/* signal the client that the result is ready */
SCCondSignal(&p->cuda_cond_q);
/* wait for the client indication that it has read the results. If the
* client still hasn't sent the indication, signal it again and do so
* every 50 microseconds */
while (p->cuda_done == 0) {
SCCondSignal(&p->cuda_cond_q);
usleep(50);
}
p->cuda_done = 0;
if (p->cuda_free_packet == 1) {
free(p);
@ -2573,8 +2590,6 @@ void TmModuleCudaMpmB2gRegister(void)
/***************************Code_Specific_To_Mpm_B2g***************************/
#ifdef UNITTESTS
int B2gCudaStartDispatcherThreadRC(const char *name)
{
SCCudaHlModuleData *data = NULL;
@ -2747,6 +2762,8 @@ void B2gCudaPushPacketTo_tv_CMB2_APC(Packet *p)
/*********************************Unittests************************************/
#ifdef UNITTESTS
static int B2gCudaTestInitTestEnv(void)
{
SCCudaHlRegisterModule("B2G_CUDA_TEST");

Loading…
Cancel
Save