mirror of https://github.com/OISF/suricata
batching of packets support for cuda b2g mpm. Supported for both 32 and 64 bit platforms
parent
b3c22cd512
commit
33f4beb0bc
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,139 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2010 Open Information Security Foundation.
|
||||||
|
*
|
||||||
|
* \author Anoop Saldanha <poonaatsoc@gmail.com>
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __CUDA_PACKET_BATCHER_H__
|
||||||
|
#define __CUDA_PACKET_BATCHER_H__
|
||||||
|
|
||||||
|
#include "suricata-common.h"
|
||||||
|
|
||||||
|
/* compile in, only if we have a CUDA enabled on this machine */
|
||||||
|
#ifdef __SC_CUDA_SUPPORT__
|
||||||
|
|
||||||
|
#include "util-cuda.h"
|
||||||
|
|
||||||
|
/* The min no of packets that we allot the buffer for. We will make
|
||||||
|
* this user configurable(yaml) based on the traffic they expect. Either ways
|
||||||
|
* for a low/medium traffic network with occasional sgh matches, we shouldn't
|
||||||
|
* be enabling cuda. We will only end up screwing performance */
|
||||||
|
#define SC_CUDA_PB_MIN_NO_OF_PACKETS 4000
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Implement the template SCDQGenericQData to transfer the cuda
|
||||||
|
* packet buffer from the cuda batcher thread to the dispatcher
|
||||||
|
* thread using the queue SCDQDataQueue.
|
||||||
|
*/
|
||||||
|
typedef struct SCCudaPBPacketsBuffer_ {
|
||||||
|
/* these members from the template SCDQGenericQData that have to be
|
||||||
|
* compulsarily implemented */
|
||||||
|
struct SCDQGenericQData_ *next;
|
||||||
|
struct SCDQGenericQData_ *prev;
|
||||||
|
/* if we want to consider this pointer as the head of a list, this var
|
||||||
|
* holds the no of elements in the list */
|
||||||
|
//uint16_t len;
|
||||||
|
/* in case this data instance is the head of a list, we can refer the
|
||||||
|
* bottomost instance directly using this var */
|
||||||
|
//struct SCDQGenericaQData *bot;
|
||||||
|
|
||||||
|
/* our own members from here on*/
|
||||||
|
|
||||||
|
/* current count of packets held in packets_buffer. nop = no of packets */
|
||||||
|
uint32_t nop_in_buffer;
|
||||||
|
/* the packets buffer. We will assign buffer for SC_CUDA_PB_MIN_NO_OF_PACKETS
|
||||||
|
* packets. Basically the size of this buffer would be
|
||||||
|
* SC_CUDA_PB_MIN_NO_OF_PACKETS * sizeof(SCCudaPBPacketDataForGPU), so that
|
||||||
|
* we can hold mininum SC_CUDA_PB_MIN_NO_OF_PACKETS */
|
||||||
|
uint8_t *packets_buffer;
|
||||||
|
/* length of data buffered so far in packets_buffer, which would be sent
|
||||||
|
* to the GPU. We will need this to copy the buffered data from the
|
||||||
|
* packets_buffer here on the host, to the buffer on the GPU */
|
||||||
|
uint32_t packets_buffer_len;
|
||||||
|
/* packet offset within the packets_buffer. Each packet would be stored in
|
||||||
|
* packets buffer at a particular offset. This buffer would indicate the
|
||||||
|
* offset of a packet inside the packet buffer. We will allot space to hold
|
||||||
|
* offsets for SC_CUDA_PB_MIN_NO_OF_PACKETS packets
|
||||||
|
* \todo change it to holds offsets for more than SC_CUDA_PB_MIN_NO_OF_PACKETS
|
||||||
|
* when we use the buffer to hold packets based on the remaining size in the
|
||||||
|
* buffer rather than on a fixed limit like SC_CUDA_PB_MIN_NO_OF_PACKETS */
|
||||||
|
uint32_t *packets_offset_buffer;
|
||||||
|
|
||||||
|
/* the total packet payload lengths buffered so far. We will need this to
|
||||||
|
* transfer the total length of the results buffer that has to be transferred
|
||||||
|
* back from the gpu */
|
||||||
|
uint32_t packets_total_payload_len;
|
||||||
|
/* the payload offsets for the different payload lengths buffered in. For
|
||||||
|
* example if we buffer 4 packets of lengths 3, 4, 5, 6, we will store four
|
||||||
|
* offsets in the buffer {0, 3, 7, 12, 18} */
|
||||||
|
uint32_t *packets_payload_offset_buffer;
|
||||||
|
|
||||||
|
/* packet addresses for all the packets buffered in the packets_buffer. We
|
||||||
|
* will allot space to hold packet addresses for SC_CUDA_PB_MIN_NO_OF_PACKETS.
|
||||||
|
* We will need this, so that the cuda mpm b2g dispatcher thread can inform
|
||||||
|
* and store the b2g cuda mpm results for the packet*/
|
||||||
|
Packet **packets_address_buffer;
|
||||||
|
} SCCudaPBPacketsBuffer;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Structure for each packet that is being batched to the GPU.
|
||||||
|
*/
|
||||||
|
typedef struct SCCudaPBPacketDataForGPU_ {
|
||||||
|
/* holds B2gCudaCtx->m */
|
||||||
|
unsigned int m;
|
||||||
|
/* holds B2gCudaCtx->cuda_B2g */
|
||||||
|
CUdeviceptr table;
|
||||||
|
/* holds the length of the payload */
|
||||||
|
unsigned int payload_len;
|
||||||
|
/* holds the payload. While we actually store the payload in the buffer,
|
||||||
|
* we may not end up using the entire 1480 bytes if the payload is smaller */
|
||||||
|
uint8_t payload[1480];
|
||||||
|
} SCCudaPBPacketDataForGPU;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Same as struct SCCudaPBPacketDataForGPU_ except for the payload part.
|
||||||
|
* We will need this for calculating the size of the non-payload part
|
||||||
|
* of the packet data to be buffered.
|
||||||
|
*/
|
||||||
|
typedef struct SCCudaPBPacketDataForGPUNonPayload_ {
|
||||||
|
/* holds B2gCudaCtx->m */
|
||||||
|
unsigned int m;
|
||||||
|
/* holds B2gCudaCtx->cuda_B2g */
|
||||||
|
CUdeviceptr table;
|
||||||
|
/* holds the length of the payload */
|
||||||
|
unsigned int payload_len;
|
||||||
|
} SCCudaPBPacketDataForGPUNonPayload;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief The cuda packet batcher threading context.
|
||||||
|
*/
|
||||||
|
typedef struct SCCudaPBThreadCtx_ {
|
||||||
|
/* we need the detection engine context to retrieve the sgh while we start
|
||||||
|
* receiving and batching the packets */
|
||||||
|
DetectEngineCtx *de_ctx;
|
||||||
|
|
||||||
|
/* packets buffer currently in use inside the cuda batcher thread */
|
||||||
|
SCCudaPBPacketsBuffer *curr_pb;
|
||||||
|
} SCCudaPBThreadCtx;
|
||||||
|
|
||||||
|
SCCudaPBPacketsBuffer *SCCudaPBAllocSCCudaPBPacketsBuffer(void);
|
||||||
|
void SCCudaPBDeAllocSCCudaPBPacketsBuffer(SCCudaPBPacketsBuffer *);
|
||||||
|
|
||||||
|
void SCCudaPBSetBufferPacketThreshhold(uint32_t);
|
||||||
|
void SCCudaPBCleanUpQueuesAndBuffers(void);
|
||||||
|
void SCCudaPBSetUpQueuesAndBuffers(void);
|
||||||
|
void SCCudaPBKillBatchingPackets(void);
|
||||||
|
|
||||||
|
TmEcode SCCudaPBBatchPackets(ThreadVars *, Packet *, void *, PacketQueue *);
|
||||||
|
TmEcode SCCudaPBThreadInit(ThreadVars *, void *, void **);
|
||||||
|
TmEcode SCCudaPBThreadDeInit(ThreadVars *, void *);
|
||||||
|
void SCCudaPBThreadExitStats(ThreadVars *, void *);
|
||||||
|
void SCCudaPBRegisterTests(void);
|
||||||
|
|
||||||
|
void TmModuleCudaPacketBatcherRegister(void);
|
||||||
|
|
||||||
|
void *SCCudaPBTmThreadsSlot1(void *);
|
||||||
|
|
||||||
|
#endif /* __SC_CUDA_SUPPORT__ */
|
||||||
|
|
||||||
|
#endif /* __CUDA_PACKET_BATCHER_H__ */
|
||||||
@ -0,0 +1,93 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2009, 2010 Open Information Security Foundation.
|
||||||
|
*
|
||||||
|
* \author Anoop Saldanha <poonaatsoc@gmail.com>
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "suricata-common.h"
|
||||||
|
#include "data-queue.h"
|
||||||
|
#include "threads.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Enqueues data on the queue.
|
||||||
|
*
|
||||||
|
* \param q Pointer to the data queue.
|
||||||
|
* \param data Pointer to the data to be queued. It should be a pointer to a
|
||||||
|
* structure instance that implements the template structure
|
||||||
|
* struct SCDQGenericQData_ defined in data-queue.h.
|
||||||
|
*/
|
||||||
|
void SCDQDataEnqueue(SCDQDataQueue *q, SCDQGenericQData *data)
|
||||||
|
{
|
||||||
|
/* we already have some data in queue */
|
||||||
|
if (q->top != NULL) {
|
||||||
|
data->next = q->top;
|
||||||
|
q->top->prev = data;
|
||||||
|
q->top = data;
|
||||||
|
|
||||||
|
/* the queue is empty */
|
||||||
|
} else {
|
||||||
|
q->top = data;
|
||||||
|
q->bot = data;
|
||||||
|
}
|
||||||
|
|
||||||
|
q->len++;
|
||||||
|
|
||||||
|
#ifdef DBG_PERF
|
||||||
|
if (q->len > q->dbg_maxlen)
|
||||||
|
q->dbg_maxlen = q->len;
|
||||||
|
#endif /* DBG_PERF */
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Dequeues and returns an entry from the queue.
|
||||||
|
*
|
||||||
|
* \param q Pointer to the data queue.
|
||||||
|
* \param retval Pointer to the data that has been enqueued. The instance
|
||||||
|
* returned is/should be a pointer to a structure instance that
|
||||||
|
* implements the template structure struct SCDQGenericQData_
|
||||||
|
* defined in data-queue.h.
|
||||||
|
*/
|
||||||
|
SCDQGenericQData *SCDQDataDequeue(SCDQDataQueue *q)
|
||||||
|
{
|
||||||
|
SCDQGenericQData *data = NULL;
|
||||||
|
|
||||||
|
/* if the queue is empty there are is no data left and we return NULL */
|
||||||
|
if (q->len == 0) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If we are going to get the last packet, set len to 0
|
||||||
|
* before doing anything else (to make the threads to follow
|
||||||
|
* the SCondWait as soon as possible) */
|
||||||
|
q->len--;
|
||||||
|
|
||||||
|
/* pull the bottom packet from the queue */
|
||||||
|
data = q->bot;
|
||||||
|
|
||||||
|
#ifdef OS_DARWIN
|
||||||
|
/* Weird issue in OS_DARWIN
|
||||||
|
* Sometimes it looks that two thread arrive here at the same time
|
||||||
|
* so the bot ptr is NULL */
|
||||||
|
if (data == NULL) {
|
||||||
|
printf("No data to dequeue!\n");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
#endif /* OS_DARWIN */
|
||||||
|
|
||||||
|
/* more data in queue */
|
||||||
|
if (q->bot->prev != NULL) {
|
||||||
|
q->bot = q->bot->prev;
|
||||||
|
q->bot->next = NULL;
|
||||||
|
/* just the one we remove, so now empty */
|
||||||
|
} else {
|
||||||
|
q->top = NULL;
|
||||||
|
q->bot = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
data->next = NULL;
|
||||||
|
data->prev = NULL;
|
||||||
|
|
||||||
|
return data;
|
||||||
|
}
|
||||||
@ -0,0 +1,64 @@
|
|||||||
|
/**
|
||||||
|
* Copyright (c) 2009, 2010 Open Information Security Foundation.
|
||||||
|
*
|
||||||
|
* \author Anoop Saldanha <poonaatsoc@gmail.com>
|
||||||
|
*
|
||||||
|
* \file Generic queues. Any instance that wants to get itself on the generic
|
||||||
|
* queue, would have to implement the template struct SCDQGenericQData_
|
||||||
|
* defined below.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __DATA_QUEUE_H__
|
||||||
|
#define __DATA_QUEUE_H__
|
||||||
|
|
||||||
|
#include "threads.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Generic template for any data structure that wants to be on the
|
||||||
|
* queue. Any other data structure that wants to be on the queue
|
||||||
|
* needs to use this template and define its own members from
|
||||||
|
* <your_own_structure_members_from_here_on> onwards.
|
||||||
|
*/
|
||||||
|
typedef struct SCDQGenericQData_ {
|
||||||
|
/* this is needed when we want to supply a list of data items */
|
||||||
|
struct SCDQGenericQData_ *next;
|
||||||
|
struct SCDQGenericQData_ *prev;
|
||||||
|
/* if we want to consider this pointer as the head of a list, this var
|
||||||
|
* holds the no of elements in the list. Else it holds a <need_to_think>. */
|
||||||
|
//uint16_t len;
|
||||||
|
/* in case this data instance is the head of a list, we can refer the
|
||||||
|
* bottomost instance directly using this var */
|
||||||
|
//struct SCDQGenericaQData *bot;
|
||||||
|
|
||||||
|
|
||||||
|
/* any other data structure that wants to be on the queue can implement
|
||||||
|
* its own memebers from here on, in its structure definition. Just note
|
||||||
|
* that the first 2 members should always be next and prev in the same
|
||||||
|
* order */
|
||||||
|
// <your_own_structure_members_from_here_on>
|
||||||
|
} SCDQGenericQData;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief The data queue to hold instances that implement the template
|
||||||
|
* SCDQGenericQData.
|
||||||
|
*/
|
||||||
|
typedef struct SCDQDataQueue_ {
|
||||||
|
/* holds the item at the top of the queue */
|
||||||
|
SCDQGenericQData *top;
|
||||||
|
/* holds the item at the bottom of the queue */
|
||||||
|
SCDQGenericQData *bot;
|
||||||
|
/* no of items currently in the queue */
|
||||||
|
uint16_t len;
|
||||||
|
|
||||||
|
SCMutex mutex_q;
|
||||||
|
SCCondT cond_q;
|
||||||
|
|
||||||
|
#ifdef DBG_PERF
|
||||||
|
uint16_t dbg_maxlen;
|
||||||
|
#endif /* DBG_PERF */
|
||||||
|
} SCDQDataQueue;
|
||||||
|
|
||||||
|
void SCDQDataEnqueue(SCDQDataQueue *, SCDQGenericQData *);
|
||||||
|
SCDQGenericQData *SCDQDataDequeue(SCDQDataQueue *);
|
||||||
|
|
||||||
|
#endif /* __DATA_QUEUE_H__ */
|
||||||
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue