use of pagelocked memory for CUDA

remotes/origin/master-1.1.x
Martin Beyer 15 years ago committed by Victor Julien
parent 0e8e8e3728
commit 8adff3c63c

@ -43,6 +43,7 @@
#include "util-unittest.h"
#include "util-mpm-b2g-cuda.h"
#include "util-cuda-handlers.h"
#include "detect-engine-address.h"
#include "detect-engine-port.h"
#include "detect-engine.h"
@ -353,12 +354,21 @@ void SCCudaPBDeAllocSCCudaPBPacketsBuffer(SCCudaPBPacketsBuffer *pb)
if (pb == NULL)
return;
if (pb->packets_buffer != NULL)
free(pb->packets_buffer);
if (pb->packets_offset_buffer != NULL)
free(pb->packets_offset_buffer);
if (pb->packets_payload_offset_buffer != NULL)
free(pb->packets_payload_offset_buffer);
if (pb->packets_buffer != NULL){
if (SCCudaMemFreeHost(pb->packets_buffer) == -1)
SCLogError(SC_ERR_CUDA_ERROR, "Error deallocating pagelocked memory: "
"packets_buffer\n");
}
if (pb->packets_offset_buffer != NULL){
if (SCCudaMemFreeHost(pb->packets_offset_buffer) == -1)
SCLogError(SC_ERR_CUDA_ERROR, "Error deallocating pagelocked memory: "
"packets_offset_buffer\n");
}
if (pb->packets_payload_offset_buffer != NULL){
if (SCCudaMemFreeHost(pb->packets_payload_offset_buffer) == -1)
SCLogError(SC_ERR_CUDA_ERROR, "Error deallocating pagelocked memory "
"packets_payload_offset_buffer\n");
}
if (pb->packets_address_buffer != NULL)
free(pb->packets_address_buffer);
@ -381,38 +391,65 @@ SCCudaPBPacketsBuffer *SCCudaPBAllocSCCudaPBPacketsBuffer(void)
}
memset(pb, 0, sizeof(SCCudaPBPacketsBuffer));
/* Register new module, needed for some unit tests */
if (SCCudaHlGetModuleHandle("SC_CUDA_PACKET_BATCHER") == -1) {
SCCudaHlRegisterModule("SC_CUDA_PACKET_BATCHER");
}
/* the buffer for the packets to be sent over to the gpu. We allot space for
* a minimum of SC_CUDA_PB_MIN_NO_OF_PACKETS, i.e. if each packet buffered
* is full to the brim */
pb->packets_buffer = malloc(sizeof(SCCudaPBPacketDataForGPU) *
SC_CUDA_PB_MIN_NO_OF_PACKETS);
if (pb->packets_buffer == NULL) {
SCLogError(SC_ERR_MEM_ALLOC, "Error allocating memory");
SCCudaHlModuleData *data = NULL;
data = SCCudaHlGetModuleData(SCCudaHlGetModuleHandle("SC_CUDA_PACKET_BATCHER"));
if (data == NULL) {
SCLogDebug("Module not registered. To avail the benefits of this "
"registration facility, first register a module using "
"context using SCCudaHlRegisterModule(), after which you "
"can call this function");
return NULL;
}
if (SCCudaHlGetCudaContext(&data->cuda_context, data->handle) == -1){
SCLogError(SC_ERR_CUDA_HANDLER_ERROR, "Error getting cuda context\n");
return NULL;
}
if (SCCudaCtxPushCurrent(data->cuda_context) == -1){
SCLogError(SC_ERR_CUDA_HANDLER_ERROR,
"Error pushing cuda context to allocate memory\n");
}
if (SCCudaMemHostAlloc((void**)&pb->packets_buffer, sizeof(SCCudaPBPacketDataForGPU) *
SC_CUDA_PB_MIN_NO_OF_PACKETS, CU_MEMHOSTALLOC_PORTABLE) == -1){
SCLogError(SC_ERR_CUDA_ERROR, "Error allocating page-locked memory\n");
exit(EXIT_FAILURE);
}
memset(pb->packets_buffer, 0, sizeof(SCCudaPBPacketDataForGPU) *
SC_CUDA_PB_MIN_NO_OF_PACKETS);
/* used to hold the offsets of the buffered packets in the packets_buffer */
pb->packets_offset_buffer = malloc(sizeof(uint32_t) *
SC_CUDA_PB_MIN_NO_OF_PACKETS);
if (pb->packets_offset_buffer == NULL) {
SCLogError(SC_ERR_MEM_ALLOC, "Error allocating memory");
if (SCCudaMemHostAlloc((void**)&pb->packets_offset_buffer, sizeof(uint32_t) *
SC_CUDA_PB_MIN_NO_OF_PACKETS, CU_MEMHOSTALLOC_PORTABLE) == -1){
SCLogError(SC_ERR_CUDA_ERROR, "Error allocating page-locked memory\n");
exit(EXIT_FAILURE);
}
memset(pb->packets_offset_buffer, 0, sizeof(uint32_t) *
SC_CUDA_PB_MIN_NO_OF_PACKETS);
/* used to hold the offsets of the packets payload */
pb->packets_payload_offset_buffer = malloc(sizeof(uint32_t) *
SC_CUDA_PB_MIN_NO_OF_PACKETS);
if (pb->packets_payload_offset_buffer == NULL) {
SCLogError(SC_ERR_MEM_ALLOC, "Error allocating memory");
if (SCCudaMemHostAlloc((void**)&pb->packets_payload_offset_buffer, sizeof(uint32_t) *
SC_CUDA_PB_MIN_NO_OF_PACKETS, CU_MEMHOSTALLOC_PORTABLE) == -1){
SCLogError(SC_ERR_CUDA_ERROR, "Error allocating page-locked memory\n");
exit(EXIT_FAILURE);
}
memset(pb->packets_payload_offset_buffer, 0, sizeof(uint32_t) *
SC_CUDA_PB_MIN_NO_OF_PACKETS);
SCLogDebug("Allocated pagelocked CUDA memory\n");
if (SCCudaCtxPopCurrent(NULL) == -1){
SCLogError(SC_ERR_CUDA_HANDLER_ERROR, "Could not pop cuda context\n");
}
/* used to hold the packet addresses for all the packets buffered inside
* packets_buffer */
pb->packets_address_buffer = malloc(sizeof(Packet *) *
@ -477,6 +514,9 @@ TmEcode SCCudaPBThreadInit(ThreadVars *tv, void *initdata, void **data)
/* the first packet buffer from the queue */
tctx->curr_pb = (SCCudaPBPacketsBuffer *)SCDQDataDequeue(&data_queues[tmq_inq->id]);
/* register new module */
SCCudaHlRegisterModule("SC_CUDA_PACKET_BATCHER");
*data = tctx;
/* we will need the cuda packet batcher TM's inq for further use later. Read
@ -715,8 +755,21 @@ TmEcode SCCudaPBThreadDeInit(ThreadVars *tv, void *data)
if (tctx != NULL) {
if (tctx->curr_pb != NULL) {
if (SCCudaHlPushCudaContextFromModule("SC_CUDA_PACKET_BATCHER") == -1){
SCLogError(SC_ERR_CUDA_HANDLER_ERROR,
"Failed to push cuda context from module\n");
}
SCCudaPBDeAllocSCCudaPBPacketsBuffer(tctx->curr_pb);
tctx->curr_pb = NULL;
if (SCCudaCtxPopCurrent(NULL) == -1){
SCLogError(SC_ERR_CUDA_ERROR, "Failed to pop cuda context\n");
}
if (SCCudaHlDeRegisterModule("SC_CUDA_PACKET_BATCHER") == -1){
SCLogError(SC_ERR_CUDA_HANDLER_ERROR, "Failed to deregister module\n");
}
}
free(tctx);
}
@ -759,6 +812,10 @@ void SCCudaPBSetUpQueuesAndBuffers(void)
tmq_outq->reader_cnt++;
tmq_outq->writer_cnt++;
/* Register a new module to be used by the packet batcher to allocate
* page-locked memory */
SCCudaHlRegisterModule("SC_CUDA_PACKET_BATCHER");
/* allocate the packet buffer */
/* \todo need to work out the right no of packet buffers that we need to
* queue. I doubt we will need more than 4(as long as we don't run it on
@ -794,17 +851,26 @@ void SCCudaPBCleanUpQueuesAndBuffers(void)
"tmq_outq NULL");
return;
}
if (SCCudaHlPushCudaContextFromModule("SC_CUDA_PACKET_BATCHER") == -1){
SCLogError(SC_ERR_CUDA_HANDLER_ERROR, "Could not push cuda context from module\n");
}
/* clean all the buffers present in the inq */
dq = &data_queues[tmq_inq->id];
SCMutexLock(&dq->mutex_q);
while ( (pb = (SCCudaPBPacketsBuffer *)SCDQDataDequeue(dq)) != NULL) {
if (pb->packets_buffer != NULL)
free(pb->packets_buffer);
if (SCCudaMemFreeHost(pb->packets_buffer) == -1)
SCLogError(SC_ERR_CUDA_ERROR, "Error deallocating pagelocked memory: "
"packets_buffer\n");
if (pb->packets_offset_buffer != NULL)
free(pb->packets_offset_buffer);
if (SCCudaMemFreeHost(pb->packets_offset_buffer) == -1)
SCLogError(SC_ERR_CUDA_ERROR, "Error deallocating pagelocked memory: "
"packets_offset_buffer\n");
if (pb->packets_payload_offset_buffer != NULL)
free(pb->packets_payload_offset_buffer);
if (SCCudaMemFreeHost(pb->packets_payload_offset_buffer) == -1)
SCLogError(SC_ERR_CUDA_ERROR, "Error deallocating pagelocked memory: "
"packets_payload_offset_buffer\n");
free(pb);
}
@ -816,14 +882,23 @@ void SCCudaPBCleanUpQueuesAndBuffers(void)
SCMutexLock(&dq->mutex_q);
while ( (pb = (SCCudaPBPacketsBuffer *)SCDQDataDequeue(dq)) != NULL) {
if (pb->packets_buffer != NULL)
free(pb->packets_buffer);
if (SCCudaMemFreeHost(pb->packets_buffer) == -1)
SCLogError(SC_ERR_CUDA_ERROR, "Error deallocating pagelocked memory: "
"packets_buffer\n");
if (pb->packets_offset_buffer != NULL)
free(pb->packets_offset_buffer);
if (SCCudaMemFreeHost(pb->packets_offset_buffer) == -1)
SCLogError(SC_ERR_CUDA_ERROR, "Error deallocating pagelocked memory: "
"packets_offset_buffer\n");
if (pb->packets_payload_offset_buffer != NULL)
free(pb->packets_payload_offset_buffer);
if (SCCudaMemFreeHost(pb->packets_payload_offset_buffer) == -1)
SCLogError(SC_ERR_CUDA_ERROR, "Error deallocating pagelocked memory: "
"packets_payload_offset_buffer\n");
free(pb);
}
if (SCCudaCtxPopCurrent(NULL) == -1){
SCLogError(SC_ERR_CUDA_ERROR, "Could not pop cuda context\n");
}
SCMutexUnlock(&dq->mutex_q);
SCCondSignal(&dq->cond_q);

@ -1781,9 +1781,9 @@ TmEcode B2gCudaMpmDispThreadInit(ThreadVars *tv, void *initdata, void **data)
* extra 2 bytes(the 1 in 1481 instead of 1480) is to hold the no of
* matches for the payload. The remaining 1480 positions in the buffer
* is to hold the match offsets */
tctx->results_buffer = malloc(sizeof(uint16_t) * 1481 * SC_CUDA_PB_MIN_NO_OF_PACKETS);
if (tctx->results_buffer == NULL) {
SCLogError(SC_ERR_MEM_ALLOC, "Error allocating memory");
if (SCCudaMemHostAlloc((void**)&tctx->results_buffer, sizeof(uint16_t) * 1481 *
SC_CUDA_PB_MIN_NO_OF_PACKETS, CU_MEMHOSTALLOC_PORTABLE) == -1){
SCLogError(SC_ERR_CUDA_ERROR, "Error allocating page-locked memory\n");
exit(EXIT_FAILURE);
}
@ -1905,7 +1905,9 @@ TmEcode B2gCudaMpmDispThreadDeInit(ThreadVars *tv, void *data)
}
SCCudaCtxPushCurrent(dummy_context);
free(tctx->results_buffer);
if (SCCudaMemFreeHost(tctx->results_buffer) == -1)
SCLogError(SC_ERR_CUDA_ERROR, "Error deallocating pagelocked memory: "
"results_buffer\n");
SCCudaHlFreeCudaDevicePtr("MPM_B2G_RESULTS", tctx->b2g_cuda_module_handle);
SCCudaHlFreeCudaDevicePtr("MPM_B2G_PACKETS_BUFFER", tctx->b2g_cuda_module_handle);
SCCudaHlFreeCudaDevicePtr("MPM_B2G_PACKETS_BUFFER_OFFSETS",

Loading…
Cancel
Save