Minor cosmetic changes to the cuda code.

Moved a couple of functions to more cuda relevant files;
Re-structured some data types.
pull/400/head
Anoop Saldanha 11 years ago committed by Victor Julien
parent c9f076def3
commit 602c91ed41

@ -258,6 +258,7 @@ util-crypt.c util-crypt.h \
util-cuda.c util-cuda.h \
util-cuda-buffer.c util-cuda-buffer.h \
util-cuda-handlers.c util-cuda-handlers.h \
util-cuda-vars.c util-cuda-vars.h \
util-daemon.c util-daemon.h \
util-debug.c util-debug.h \
util-debug-filters.c util-debug-filters.h \

@ -32,6 +32,7 @@
#ifdef __SC_CUDA_SUPPORT__
#include "util-cuda-buffer.h"
#include "util-cuda-vars.h"
#endif /* __SC_CUDA_SUPPORT__ */
typedef enum {
@ -491,12 +492,7 @@ typedef struct Packet_
PktProfiling profile;
#endif
#ifdef __SC_CUDA_SUPPORT__
uint8_t cuda_mpm_enabled;
uint8_t cuda_done;
uint16_t cuda_gpu_matches;
SCMutex cuda_mutex;
SCCondT cuda_cond;
uint32_t cuda_results[(UTIL_MPM_CUDA_DATA_BUFFER_SIZE_MAX_LIMIT_DEFAULT * 2) + 1];
CudaPacketVars cuda_pkt_vars;
#endif
} Packet;
@ -583,21 +579,7 @@ typedef struct DecodeThreadVars_
uint16_t counter_defrag_max_hit;
#ifdef __SC_CUDA_SUPPORT__
/* cb - CudaBuffer */
CudaBufferData *cuda_ac_cb;
MpmCtx *mpm_proto_other_ctx;
MpmCtx *mpm_proto_tcp_ctx_ts;
MpmCtx *mpm_proto_udp_ctx_ts;
MpmCtx *mpm_proto_tcp_ctx_tc;
MpmCtx *mpm_proto_udp_ctx_tc;
uint16_t data_buffer_size_max_limit;
uint16_t data_buffer_size_min_limit;
uint8_t mpm_is_cuda;
CudaThreadVars cuda_vars;
#endif
} DecodeThreadVars;
@ -625,8 +607,8 @@ typedef struct DecodeThreadVars_
PACKET_RESET_CHECKSUMS((p)); \
(p)->pkt = ((uint8_t *)(p)) + sizeof(Packet); \
(p)->livedev = NULL; \
SCMutexInit(&(p)->cuda_mutex, NULL); \
SCCondInit(&(p)->cuda_cond, NULL); \
SCMutexInit(&(p)->cuda_pkt_vars.cuda_mutex, NULL); \
SCCondInit(&(p)->cuda_pkt_vars.cuda_cond, NULL); \
} while (0)
#else
#define PACKET_INITIALIZE(p) { \

@ -225,7 +225,7 @@ uint32_t PacketPatternSearch(DetectEngineThreadCtx *det_ctx, Packet *p)
SCReturnInt(0);
#ifdef __SC_CUDA_SUPPORT__
if (p->cuda_mpm_enabled && p->pkt_src == PKT_SRC_WIRE) {
if (p->cuda_pkt_vars.cuda_mpm_enabled && p->pkt_src == PKT_SRC_WIRE) {
ret = SCACCudaPacketResultsProcessing(p, mpm_ctx, &det_ctx->pmq);
} else {
ret = mpm_table[mpm_ctx->mpm_type].Search(mpm_ctx,

@ -4388,255 +4388,6 @@ int SigAddressPrepareStage5(DetectEngineCtx *de_ctx) {
return 0;
}
#ifdef __SC_CUDA_SUPPORT__
static void DetermineCudaStateTableSize(DetectEngineCtx *de_ctx)
{
MpmCtx *mpm_ctx = NULL;
int ac_16_tables = 0;
int ac_32_tables = 0;
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_proto_tcp_packet, 0);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_proto_tcp_packet, 1);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_proto_udp_packet, 0);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_proto_udp_packet, 1);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_proto_other_packet, 0);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_uri, 0);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_uri, 1);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hcbd, 0);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hcbd, 1);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hhd, 0);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hhd, 1);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hrhd, 0);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hrhd, 1);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hmd, 0);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hmd, 1);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hcd, 0);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hcd, 1);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hrud, 0);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hrud, 1);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_stream, 0);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_stream, 1);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hsmd, 0);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hsmd, 1);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hscd, 0);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hscd, 1);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_huad, 0);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_huad, 1);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
if (ac_16_tables > 0 && ac_32_tables > 0)
SCACConstructBoth16and32StateTables();
SCLogDebug("Total mpm ac 16 bit state tables - %d\n", ac_16_tables);
SCLogDebug("Total mpm ac 32 bit state tables - %d\n", ac_32_tables);
}
#endif
/**
* \brief Convert the signature list into the runtime match structure.
*

@ -301,6 +301,15 @@ void RunModeDispatch(int runmode, const char *custom_mode, DetectEngineCtx *de_c
}
}
#ifdef __SC_CUDA_SUPPORT__
if (PatternMatchDefaultMatcher() == MPM_AC_CUDA &&
strcasecmp(custom_mode, "autofp") != 0) {
SCLogError(SC_ERR_RUNMODE, "When using a cuda mpm, the only runmode we "
"support is autofp.");
exit(EXIT_FAILURE);
}
#endif
RunMode *mode = RunModeGetCustomMode(runmode, custom_mode);
if (mode == NULL) {
SCLogError(SC_ERR_RUNMODE, "The custom type \"%s\" doesn't exist "

@ -51,8 +51,7 @@
#include "util-cuda-handlers.h"
#include "detect-engine.h"
#include "detect-engine-mpm.h"
static DetectEngineCtx *cuda_de_ctx = NULL;
#include "util-cuda-vars.h"
#endif /* __SC_CUDA_SUPPORT__ */
@ -124,15 +123,6 @@ void TmModuleDecodePcapFileRegister (void) {
tmm_modules[TMM_DECODEPCAPFILE].flags = TM_FLAG_DECODE_TM;
}
#ifdef __SC_CUDA_SUPPORT__
void DecodePcapFileSetCudaDeCtx(DetectEngineCtx *de_ctx)
{
cuda_de_ctx = de_ctx;
return;
}
#endif
void PcapFileCallbackLoop(char *user, struct pcap_pkthdr *h, u_char *pkt) {
SCEnter();
@ -342,90 +332,6 @@ TmEcode ReceivePcapFileThreadDeinit(ThreadVars *tv, void *data) {
SCReturnInt(TM_ECODE_OK);
}
#ifdef __SC_CUDA_SUPPORT__
static inline void DecodePcapFileBufferPacket(DecodeThreadVars *dtv, Packet *p)
{
if (p->cuda_mpm_enabled) {
while (!p->cuda_done) {
SCMutexLock(&p->cuda_mutex);
if (p->cuda_done) {
SCMutexUnlock(&p->cuda_mutex);
break;
} else {
SCCondWait(&p->cuda_cond, &p->cuda_mutex);
SCMutexUnlock(&p->cuda_mutex);
}
}
}
p->cuda_done = 0;
if (p->payload_len == 0 ||
(p->flags & (PKT_NOPAYLOAD_INSPECTION & PKT_NOPACKET_INSPECTION)) ||
(p->flags & PKT_ALLOC) ||
(dtv->data_buffer_size_min_limit != 0 && p->payload_len < dtv->data_buffer_size_min_limit) ||
(p->payload_len > dtv->data_buffer_size_max_limit && dtv->data_buffer_size_max_limit != 0) ) {
p->cuda_mpm_enabled = 0;
return;
}
MpmCtx *mpm_ctx = NULL;
if (p->proto == IPPROTO_TCP) {
if (p->flowflags & FLOW_PKT_TOSERVER)
mpm_ctx = dtv->mpm_proto_tcp_ctx_ts;
else
mpm_ctx = dtv->mpm_proto_tcp_ctx_tc;
} else if (p->proto == IPPROTO_UDP) {
if (p->flowflags & FLOW_PKT_TOSERVER)
mpm_ctx = dtv->mpm_proto_udp_ctx_ts;
else
mpm_ctx = dtv->mpm_proto_udp_ctx_tc;
} else {
mpm_ctx = dtv->mpm_proto_other_ctx;
}
if (mpm_ctx == NULL || mpm_ctx->pattern_cnt == 0) {
p->cuda_mpm_enabled = 0;
return;
}
#if __WORDSIZE==64
CudaBufferSlice *slice = CudaBufferGetSlice(dtv->cuda_ac_cb,
p->payload_len + sizeof(uint64_t) + sizeof(CUdeviceptr),
(void *)p);
if (slice == NULL) {
SCLogError(SC_ERR_FATAL, "Error retrieving slice. Please report "
"this to dev.");
p->cuda_mpm_enabled = 0;
return;
}
*((uint64_t *)(slice->buffer + slice->start_offset)) = p->payload_len;
*((CUdeviceptr *)(slice->buffer + slice->start_offset + sizeof(uint64_t))) = ((SCACCtx *)(mpm_ctx->ctx))->state_table_u32_cuda;
memcpy(slice->buffer + slice->start_offset + sizeof(uint64_t) + sizeof(CUdeviceptr), p->payload, p->payload_len);
#else
CudaBufferSlice *slice = CudaBufferGetSlice(dtv->cuda_ac_cb,
p->payload_len + sizeof(uint32_t) + sizeof(CUdeviceptr),
(void *)p);
if (slice == NULL) {
SCLogError(SC_ERR_FATAL, "Error retrieving slice. Please report "
"this to dev.");
p->cuda_mpm_enabled = 0;
return;
}
*((uint32_t *)(slice->buffer + slice->start_offset)) = p->payload_len;
*((CUdeviceptr *)(slice->buffer + slice->start_offset + sizeof(uint32_t))) = ((SCACCtx *)(mpm_ctx->ctx))->state_table_u32_cuda;
memcpy(slice->buffer + slice->start_offset + sizeof(uint32_t) + sizeof(CUdeviceptr), p->payload, p->payload_len);
#endif
p->cuda_mpm_enabled = 1;
SC_ATOMIC_SET(slice->done, 1);
SCLogDebug("cuda ac buffering packet %p, payload_len - %"PRIu16" and deviceptr - %"PRIu64"\n",
p, p->payload_len, (unsigned long)((SCACCtx *)(mpm_ctx->ctx))->state_table_u32_cuda);
return;
}
#endif /* __SC_CUDA_SUPPORT__ */
double prev_signaled_ts = 0;
TmEcode DecodePcapFile(ThreadVars *tv, Packet *p, void *data, PacketQueue *pq, PacketQueue *postpq)
@ -457,7 +363,6 @@ TmEcode DecodePcapFile(ThreadVars *tv, Packet *p, void *data, PacketQueue *pq, P
TimeSet(&p->ts);
/* call the decoder */
pcap_g.Decoder(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
#ifdef DEBUG
@ -465,41 +370,13 @@ TmEcode DecodePcapFile(ThreadVars *tv, Packet *p, void *data, PacketQueue *pq, P
#endif
#ifdef __SC_CUDA_SUPPORT__
if (dtv->mpm_is_cuda)
DecodePcapFileBufferPacket(dtv, p);
if (dtv->cuda_vars.mpm_is_cuda)
CudaBufferPacket(&dtv->cuda_vars, p);
#endif
SCReturnInt(TM_ECODE_OK);
}
#ifdef __SC_CUDA_SUPPORT__
static int DecodePcapFileThreadInitCuda(DecodeThreadVars *dtv)
{
if (PatternMatchDefaultMatcher() != MPM_AC_CUDA)
return 0;
MpmCudaConf *conf = CudaHandlerGetCudaProfile("mpm");
if (conf == NULL) {
SCLogError(SC_ERR_AC_CUDA_ERROR, "Error obtaining cuda mpm profile.");
return -1;
}
dtv->mpm_is_cuda = 1;
dtv->cuda_ac_cb = CudaHandlerModuleGetData(MPM_AC_CUDA_MODULE_NAME, MPM_AC_CUDA_MODULE_CUDA_BUFFER_NAME);
dtv->data_buffer_size_max_limit = conf->data_buffer_size_max_limit;
dtv->data_buffer_size_min_limit = conf->data_buffer_size_min_limit;
dtv->mpm_proto_tcp_ctx_ts = MpmFactoryGetMpmCtxForProfile(cuda_de_ctx, cuda_de_ctx->sgh_mpm_context_proto_tcp_packet, 0);
dtv->mpm_proto_tcp_ctx_tc = MpmFactoryGetMpmCtxForProfile(cuda_de_ctx, cuda_de_ctx->sgh_mpm_context_proto_tcp_packet, 1);
dtv->mpm_proto_udp_ctx_ts = MpmFactoryGetMpmCtxForProfile(cuda_de_ctx, cuda_de_ctx->sgh_mpm_context_proto_udp_packet, 0);
dtv->mpm_proto_udp_ctx_tc = MpmFactoryGetMpmCtxForProfile(cuda_de_ctx, cuda_de_ctx->sgh_mpm_context_proto_udp_packet, 1);
dtv->mpm_proto_other_ctx = MpmFactoryGetMpmCtxForProfile(cuda_de_ctx, cuda_de_ctx->sgh_mpm_context_proto_other_packet, 0);
return 0;
}
#endif /* __SC_CUDA_SUPPORT__ */
TmEcode DecodePcapFileThreadInit(ThreadVars *tv, void *initdata, void **data)
{
SCEnter();
@ -512,7 +389,7 @@ TmEcode DecodePcapFileThreadInit(ThreadVars *tv, void *initdata, void **data)
DecodeRegisterPerfCounters(dtv, tv);
#ifdef __SC_CUDA_SUPPORT__
if (DecodePcapFileThreadInitCuda(dtv) < 0)
if (CudaThreadVarsInit(&dtv->cuda_vars) < 0)
SCReturnInt(TM_ECODE_FAILED);
#endif

@ -26,9 +26,6 @@
void TmModuleReceivePcapFileRegister (void);
void TmModuleDecodePcapFileRegister (void);
#ifdef __SC_CUDA_SUPPORT__
void DecodePcapFileSetCudaDeCtx(DetectEngineCtx *de_ctx);
#endif
#endif /* __SOURCE_PCAP_FILE_H__ */

@ -1829,7 +1829,7 @@ int main(int argc, char **argv)
}
#ifdef __SC_CUDA_SUPPORT__
if (PatternMatchDefaultMatcher() == MPM_AC_CUDA)
DecodePcapFileSetCudaDeCtx(de_ctx);
CudaVarsSetDeCtx(de_ctx);
#endif /* __SC_CUDA_SUPPORT__ */
SCClassConfLoadClassficationConfigFile(de_ctx);

@ -272,7 +272,7 @@ CudaBufferSlice *CudaBufferGetSlice(CudaBufferData *cb_data, uint32_t len, void
if (cb_data->d_buffer_write < cb_data->d_buffer_read) {
if (cb_data->d_buffer_write + len >= cb_data->d_buffer_read) {
SCLogInfo("d_buffer full");
SCLogDebug("d_buffer full");
SCMutexUnlock(&cb_data->m);
SCMutexLock(&slice_pool_mutex);
@ -282,7 +282,7 @@ CudaBufferSlice *CudaBufferGetSlice(CudaBufferData *cb_data, uint32_t len, void
}
} else {
if (cb_data->d_buffer_write + len > cb_data->d_buffer_len) {
SCLogInfo("d_buffer limit hit - buffer_len - %"PRIu32,
SCLogDebug("d_buffer limit hit - buffer_len - %"PRIu32,
cb_data->d_buffer_len);
SCMutexUnlock(&cb_data->m);
@ -295,7 +295,7 @@ CudaBufferSlice *CudaBufferGetSlice(CudaBufferData *cb_data, uint32_t len, void
if (cb_data->op_buffer_write < cb_data->op_buffer_read) {
if (cb_data->op_buffer_write + 1 >= cb_data->op_buffer_read) {
SCLogInfo("op_buffer full");
SCLogDebug("op_buffer full");
SCMutexUnlock(&cb_data->m);
SCMutexLock(&slice_pool_mutex);
@ -305,7 +305,7 @@ CudaBufferSlice *CudaBufferGetSlice(CudaBufferData *cb_data, uint32_t len, void
}
} else {
if (cb_data->op_buffer_write + 1 > cb_data->op_buffer_len) {
SCLogInfo("op_buffer limit hit - buffer_len - %"PRIu32,
SCLogDebug("op_buffer limit hit - buffer_len - %"PRIu32,
cb_data->op_buffer_len);
SCMutexUnlock(&cb_data->m);
@ -866,7 +866,7 @@ int CudaBufferTest02(void)
int CudaBufferTest03(void)
{
CudaBufferSlice *slice1, *slice2, *slice3, *slice_temp;
CudaBufferSlice *slice, *slice_temp;
int result = 0;
uint8_t *d_buffer = SCMalloc(sizeof(uint8_t) * 64);
@ -886,9 +886,9 @@ int CudaBufferTest03(void)
goto end;
}
slice1 = CudaBufferGetSlice(data, 16, NULL);
slice2 = CudaBufferGetSlice(data, 16, NULL);
slice3 = CudaBufferGetSlice(data, 24, NULL);
slice = CudaBufferGetSlice(data, 16, NULL);
slice = CudaBufferGetSlice(data, 16, NULL);
slice = CudaBufferGetSlice(data, 24, NULL);
/* culling */
CudaBufferCulledInfo culled_info;

@ -0,0 +1,74 @@
/* Copyright (C) 2007-2010 Open Information Security Foundation
*
* You can copy, redistribute or modify this Program under the terms of
* the GNU General Public License version 2 as published by the Free
* Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
/**
* \file
*
* \author Anoop Saldanha <anoopsaldanha@gmail.com>
*/
#ifdef __SC_CUDA_SUPPORT__
#include "suricata.h"
#include "util-mpm.h"
#include "util-cuda-handlers.h"
#include "util-cuda-vars.h"
#include "detect-engine-mpm.h"
#include "util-debug.h"
#include "util-mpm-ac.h"
static DetectEngineCtx *cuda_de_ctx = NULL;
void CudaVarsSetDeCtx(DetectEngineCtx *de_ctx)
{
if (cuda_de_ctx != NULL) {
SCLogError(SC_ERR_FATAL, "CudaVarsSetDeCtx() called more than once. "
"This function should be called only once during the "
"lifetime of the engine.");
exit(EXIT_FAILURE);
}
cuda_de_ctx = de_ctx;
return;
}
int CudaThreadVarsInit(CudaThreadVars *ctv)
{
if (PatternMatchDefaultMatcher() != MPM_AC_CUDA)
return 0;
MpmCudaConf *conf = CudaHandlerGetCudaProfile("mpm");
if (conf == NULL) {
SCLogError(SC_ERR_AC_CUDA_ERROR, "Error obtaining cuda mpm profile.");
return -1;
}
ctv->mpm_is_cuda = 1;
ctv->cuda_ac_cb = CudaHandlerModuleGetData(MPM_AC_CUDA_MODULE_NAME, MPM_AC_CUDA_MODULE_CUDA_BUFFER_NAME);
ctv->data_buffer_size_max_limit = conf->data_buffer_size_max_limit;
ctv->data_buffer_size_min_limit = conf->data_buffer_size_min_limit;
ctv->mpm_proto_tcp_ctx_ts = MpmFactoryGetMpmCtxForProfile(cuda_de_ctx, cuda_de_ctx->sgh_mpm_context_proto_tcp_packet, 0);
ctv->mpm_proto_tcp_ctx_tc = MpmFactoryGetMpmCtxForProfile(cuda_de_ctx, cuda_de_ctx->sgh_mpm_context_proto_tcp_packet, 1);
ctv->mpm_proto_udp_ctx_ts = MpmFactoryGetMpmCtxForProfile(cuda_de_ctx, cuda_de_ctx->sgh_mpm_context_proto_udp_packet, 0);
ctv->mpm_proto_udp_ctx_tc = MpmFactoryGetMpmCtxForProfile(cuda_de_ctx, cuda_de_ctx->sgh_mpm_context_proto_udp_packet, 1);
ctv->mpm_proto_other_ctx = MpmFactoryGetMpmCtxForProfile(cuda_de_ctx, cuda_de_ctx->sgh_mpm_context_proto_other_packet, 0);
return 0;
}
#endif

@ -0,0 +1,65 @@
/* Copyright (C) 2007-2010 Open Information Security Foundation
*
* You can copy, redistribute or modify this Program under the terms of
* the GNU General Public License version 2 as published by the Free
* Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
/**
* \file
*
* \author Anoop Saldanha <anoopsaldanha@gmail.com>
*/
#ifdef __SC_CUDA_SUPPORT__
#ifndef __UTIL_CUDA_VARS__H__
#define __UTIL_CUDA_VARS__H__
#include "util-cuda-buffer.h"
#include "util-mpm.h"
#include "threads.h"
typedef struct CudaThreadVars_ {
/* cb - CudaBuffer */
CudaBufferData *cuda_ac_cb;
MpmCtx *mpm_proto_other_ctx;
MpmCtx *mpm_proto_tcp_ctx_ts;
MpmCtx *mpm_proto_udp_ctx_ts;
MpmCtx *mpm_proto_tcp_ctx_tc;
MpmCtx *mpm_proto_udp_ctx_tc;
uint16_t data_buffer_size_max_limit;
uint16_t data_buffer_size_min_limit;
uint8_t mpm_is_cuda;
} CudaThreadVars;
typedef struct CudaPacketVars_ {
uint8_t cuda_mpm_enabled;
uint8_t cuda_done;
uint16_t cuda_gpu_matches;
SCMutex cuda_mutex;
SCCondT cuda_cond;
uint32_t cuda_results[(UTIL_MPM_CUDA_DATA_BUFFER_SIZE_MAX_LIMIT_DEFAULT * 2) + 1];
} CudaPacketVars;
void CudaVarsSetDeCtx(struct DetectEngineCtx_ *de_ctx);
int CudaThreadVarsInit(CudaThreadVars *ctv);
#endif /* __UTIL_CUDA_VARS__H__ */
#endif /* __SC_CUDA_SUPPORT__ */

@ -1408,6 +1408,257 @@ void SCACPrintInfo(MpmCtx *mpm_ctx)
/****************************Cuda side of things****************************/
#ifdef __SC_CUDA_SUPPORT__
/* \todo Technically it's generic to all mpms, but since we use ac only, the
* code internally directly references ac and hence it has found its
* home in this file, instead of util-mpm.c
*/
void DetermineCudaStateTableSize(DetectEngineCtx *de_ctx)
{
MpmCtx *mpm_ctx = NULL;
int ac_16_tables = 0;
int ac_32_tables = 0;
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_proto_tcp_packet, 0);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_proto_tcp_packet, 1);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_proto_udp_packet, 0);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_proto_udp_packet, 1);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_proto_other_packet, 0);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_uri, 0);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_uri, 1);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hcbd, 0);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hcbd, 1);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hhd, 0);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hhd, 1);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hrhd, 0);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hrhd, 1);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hmd, 0);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hmd, 1);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hcd, 0);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hcd, 1);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hrud, 0);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hrud, 1);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_stream, 0);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_stream, 1);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hsmd, 0);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hsmd, 1);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hscd, 0);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hscd, 1);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_huad, 0);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_huad, 1);
if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
if (ctx->state_count < 32767)
ac_16_tables++;
else
ac_32_tables++;
}
if (ac_16_tables > 0 && ac_32_tables > 0)
SCACConstructBoth16and32StateTables();
SCLogDebug("Total mpm ac 16 bit state tables - %d\n", ac_16_tables);
SCLogDebug("Total mpm ac 32 bit state tables - %d\n", ac_32_tables);
}
/* \todos
* - Use texture memory - Can we fit all the arrays into a 3d texture.
* Texture memory definitely offers slightly better performance even
@ -1607,20 +1858,20 @@ static void *SCACCudaDispatcher(void *arg)
for (uint32_t i = 0; i < no_of_items; i++, i_op_start_offset++) {
Packet *p = (Packet *)cb_data->p_buffer[i_op_start_offset];
p->cuda_gpu_matches =
p->cuda_pkt_vars.cuda_gpu_matches =
cuda_results_buffer_h[((o_buffer[i_op_start_offset] - d_buffer_start_offset) * 2)];
if (p->cuda_gpu_matches != 0) {
memcpy(p->cuda_results,
if (p->cuda_pkt_vars.cuda_gpu_matches != 0) {
memcpy(p->cuda_pkt_vars.cuda_results,
cuda_results_buffer_h +
((o_buffer[i_op_start_offset] - d_buffer_start_offset) * 2),
(cuda_results_buffer_h[((o_buffer[i_op_start_offset] -
d_buffer_start_offset) * 2)] * sizeof(uint32_t)) + 4);
}
SCMutexLock(&p->cuda_mutex);
p->cuda_done = 1;
SCMutexUnlock(&p->cuda_mutex);
SCCondSignal(&p->cuda_cond);
SCMutexLock(&p->cuda_pkt_vars.cuda_mutex);
p->cuda_pkt_vars.cuda_done = 1;
SCMutexUnlock(&p->cuda_pkt_vars.cuda_mutex);
SCCondSignal(&p->cuda_pkt_vars.cuda_cond);
}
if (no_of_items != 0)
CudaBufferReportCulledConsumption(cb_data, &cb_culled_info);
@ -1666,25 +1917,25 @@ uint32_t SCACCudaPacketResultsProcessing(Packet *p, MpmCtx *mpm_ctx,
{
uint32_t u = 0;
while (!p->cuda_done) {
SCMutexLock(&p->cuda_mutex);
if (p->cuda_done) {
SCMutexUnlock(&p->cuda_mutex);
while (!p->cuda_pkt_vars.cuda_done) {
SCMutexLock(&p->cuda_pkt_vars.cuda_mutex);
if (p->cuda_pkt_vars.cuda_done) {
SCMutexUnlock(&p->cuda_pkt_vars.cuda_mutex);
break;
} else {
SCCondWait(&p->cuda_cond, &p->cuda_mutex);
SCMutexUnlock(&p->cuda_mutex);
SCCondWait(&p->cuda_pkt_vars.cuda_cond, &p->cuda_pkt_vars.cuda_mutex);
SCMutexUnlock(&p->cuda_pkt_vars.cuda_mutex);
}
} /* while */
p->cuda_done = 0;
p->cuda_mpm_enabled = 0;
p->cuda_pkt_vars.cuda_done = 0;
p->cuda_pkt_vars.cuda_mpm_enabled = 0;
uint32_t cuda_matches = p->cuda_gpu_matches;
uint32_t cuda_matches = p->cuda_pkt_vars.cuda_gpu_matches;
if (cuda_matches == 0)
return 0;
uint32_t matches = 0;
uint32_t *results = p->cuda_results + 1;
uint32_t *results = p->cuda_pkt_vars.cuda_results + 1;
uint8_t *buf = p->payload;
SCACCtx *ctx = mpm_ctx->ctx;
SCACOutputTable *output_table = ctx->output_table;

@ -30,6 +30,8 @@
#ifdef __SC_CUDA_SUPPORT__
#include "util-cuda.h"
#include "util-cuda-vars.h"
#include "decode.h"
#endif /* __SC_CUDA_SUPPORT__ */
typedef struct SCACPattern_ {
@ -108,6 +110,85 @@ void MpmACRegister(void);
#define MPM_AC_CUDA_MODULE_NAME "ac_cuda"
#define MPM_AC_CUDA_MODULE_CUDA_BUFFER_NAME "ac_cuda_cb"
static inline void CudaBufferPacket(CudaThreadVars *ctv, Packet *p)
{
if (p->cuda_pkt_vars.cuda_mpm_enabled) {
while (!p->cuda_pkt_vars.cuda_done) {
SCMutexLock(&p->cuda_pkt_vars.cuda_mutex);
if (p->cuda_pkt_vars.cuda_done) {
SCMutexUnlock(&p->cuda_pkt_vars.cuda_mutex);
break;
} else {
SCCondWait(&p->cuda_pkt_vars.cuda_cond, &p->cuda_pkt_vars.cuda_mutex);
SCMutexUnlock(&p->cuda_pkt_vars.cuda_mutex);
}
}
}
p->cuda_pkt_vars.cuda_done = 0;
if (p->payload_len == 0 ||
(p->flags & (PKT_NOPAYLOAD_INSPECTION & PKT_NOPACKET_INSPECTION)) ||
(p->flags & PKT_ALLOC) ||
(ctv->data_buffer_size_min_limit != 0 && p->payload_len < ctv->data_buffer_size_min_limit) ||
(p->payload_len > ctv->data_buffer_size_max_limit && ctv->data_buffer_size_max_limit != 0) ) {
p->cuda_pkt_vars.cuda_mpm_enabled = 0;
return;
}
MpmCtx *mpm_ctx = NULL;
if (p->proto == IPPROTO_TCP) {
if (p->flowflags & FLOW_PKT_TOSERVER)
mpm_ctx = ctv->mpm_proto_tcp_ctx_ts;
else
mpm_ctx = ctv->mpm_proto_tcp_ctx_tc;
} else if (p->proto == IPPROTO_UDP) {
if (p->flowflags & FLOW_PKT_TOSERVER)
mpm_ctx = ctv->mpm_proto_udp_ctx_ts;
else
mpm_ctx = ctv->mpm_proto_udp_ctx_tc;
} else {
mpm_ctx = ctv->mpm_proto_other_ctx;
}
if (mpm_ctx == NULL || mpm_ctx->pattern_cnt == 0) {
p->cuda_pkt_vars.cuda_mpm_enabled = 0;
return;
}
#if __WORDSIZE==64
CudaBufferSlice *slice = CudaBufferGetSlice(ctv->cuda_ac_cb,
p->payload_len + sizeof(uint64_t) + sizeof(CUdeviceptr),
(void *)p);
if (slice == NULL) {
SCLogError(SC_ERR_FATAL, "Error retrieving slice. Please report "
"this to dev.");
p->cuda_pkt_vars.cuda_mpm_enabled = 0;
return;
}
*((uint64_t *)(slice->buffer + slice->start_offset)) = p->payload_len;
*((CUdeviceptr *)(slice->buffer + slice->start_offset + sizeof(uint64_t))) = ((SCACCtx *)(mpm_ctx->ctx))->state_table_u32_cuda;
memcpy(slice->buffer + slice->start_offset + sizeof(uint64_t) + sizeof(CUdeviceptr), p->payload, p->payload_len);
#else
CudaBufferSlice *slice = CudaBufferGetSlice(ctv->cuda_ac_cb,
p->payload_len + sizeof(uint32_t) + sizeof(CUdeviceptr),
(void *)p);
if (slice == NULL) {
SCLogError(SC_ERR_FATAL, "Error retrieving slice. Please report "
"this to dev.");
p->cuda_pkt_vars.cuda_mpm_enabled = 0;
return;
}
*((uint32_t *)(slice->buffer + slice->start_offset)) = p->payload_len;
*((CUdeviceptr *)(slice->buffer + slice->start_offset + sizeof(uint32_t))) = ((SCACCtx *)(mpm_ctx->ctx))->state_table_u32_cuda;
memcpy(slice->buffer + slice->start_offset + sizeof(uint32_t) + sizeof(CUdeviceptr), p->payload, p->payload_len);
#endif
p->cuda_pkt_vars.cuda_mpm_enabled = 1;
SC_ATOMIC_SET(slice->done, 1);
SCLogDebug("cuda ac buffering packet %p, payload_len - %"PRIu16" and deviceptr - %"PRIu64"\n",
p, p->payload_len, (unsigned long)((SCACCtx *)(mpm_ctx->ctx))->state_table_u32_cuda);
return;
}
void MpmACCudaRegister(void);
void SCACConstructBoth16and32StateTables(void);
@ -117,6 +198,7 @@ void SCACCudaStartDispatcher(void);
void SCACCudaKillDispatcher(void);
uint32_t SCACCudaPacketResultsProcessing(Packet *p, MpmCtx *mpm_ctx,
PatternMatcherQueue *pmq);
void DetermineCudaStateTableSize(DetectEngineCtx *de_ctx);
#endif /* __SC_CUDA_SUPPORT__ */

@ -10,9 +10,9 @@
# conservative 1024. A higher number will make sure CPU's/CPU cores will be
# more easily kept busy, but may negatively impact caching.
#
# If you are using the CUDA pattern matcher (b2g_cuda below), different rules
# apply. In that case try something like 4000 or more. This is because the CUDA
# pattern matcher scans many packets in parallel.
# If you are using the CUDA pattern matcher (mpm-algo: ac-cuda), different rules
# apply. In that case try something like 60000 or more. This is because the CUDA
# pattern matcher buffers and scans as many packets as possible in parallel.
#max-pending-packets: 1024
# Runmode the engine should use. Please check --list-runmodes to get the available

Loading…
Cancel
Save