mirror of https://github.com/OISF/suricata
Version 1 of AC Cuda.
parent
2de59fc235
commit
17c763f855
@ -0,0 +1,357 @@
|
||||
/* Copyright (C) 2007-2012 Open Information Security Foundation
|
||||
*
|
||||
* You can copy, redistribute or modify this Program under the terms of
|
||||
* the GNU General Public License version 2 as published by the Free
|
||||
* Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* version 2 along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
*
|
||||
* \author Anoop Saldanha <anoopsaldanha@gmail.com>
|
||||
*/
|
||||
|
||||
/* compile in, only if we have a CUDA enabled device on the machine, with the
|
||||
* toolkit and the driver installed */
|
||||
#ifdef __SC_CUDA_SUPPORT__
|
||||
|
||||
#include "suricata-common.h"
|
||||
|
||||
#include "util-error.h"
|
||||
#include "util-debug.h"
|
||||
#include "conf.h"
|
||||
#include "util-cuda.h"
|
||||
#include "util-cuda-handlers.h"
|
||||
|
||||
/* file only exists if cuda is enabled */
|
||||
#include "cuda-ptxdump.h"
|
||||
|
||||
/************************conf file profile section**********************/
|
||||
|
||||
typedef struct CudaHandlerConfProfile_ {
|
||||
char *name;
|
||||
void *ctx;
|
||||
void (*Free)(void *);
|
||||
|
||||
struct CudaHandlerConfProfile_ *next;
|
||||
} CudaHandlerConfProfile;
|
||||
|
||||
static CudaHandlerConfProfile *conf_profiles = NULL;
|
||||
/* protects above var */
|
||||
static SCMutex mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||
|
||||
void CudaHandlerAddCudaProfileFromConf(const char *name,
|
||||
void *(*Callback)(ConfNode *node),
|
||||
void (*Free)(void *))
|
||||
{
|
||||
/* we don't do data validation */
|
||||
SCMutexLock(&mutex);
|
||||
|
||||
CudaHandlerConfProfile *tmp_cp = conf_profiles;
|
||||
while (tmp_cp != NULL && strcasecmp(name, tmp_cp->name) != 0)
|
||||
tmp_cp = tmp_cp->next;
|
||||
|
||||
if (tmp_cp != NULL) {
|
||||
SCLogError(SC_ERR_INVALID_ARGUMENT, "We already have a cuda conf "
|
||||
"profile by the name \"%s\" registered.", name);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
char tmp[200];
|
||||
int r = snprintf(tmp, sizeof(tmp), "%s%s", "cuda.", name);
|
||||
if (r < 0) {
|
||||
SCLogError(SC_ERR_FATAL, "snprintf failure.");
|
||||
exit(EXIT_FAILURE);
|
||||
} else if (r > (int)sizeof(tmp)) {
|
||||
SCLogError(SC_ERR_FATAL, "buffer not big enough to write param.");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
void *ctx = Callback(ConfGetNode(tmp));
|
||||
if (ctx == NULL) {
|
||||
SCMutexUnlock(&mutex);
|
||||
return;
|
||||
}
|
||||
|
||||
CudaHandlerConfProfile *new_cp = SCMalloc(sizeof(CudaHandlerConfProfile));
|
||||
if (new_cp == NULL)
|
||||
exit(EXIT_FAILURE);
|
||||
memset(new_cp, 0, sizeof(CudaHandlerConfProfile));
|
||||
new_cp->name = SCStrdup(name);
|
||||
if (new_cp->name == NULL)
|
||||
exit(EXIT_FAILURE);
|
||||
new_cp->ctx = ctx;
|
||||
new_cp->Free = Free;
|
||||
|
||||
if (conf_profiles == NULL) {
|
||||
conf_profiles = new_cp;
|
||||
} else {
|
||||
new_cp->next = conf_profiles;
|
||||
conf_profiles = new_cp;
|
||||
}
|
||||
|
||||
SCMutexUnlock(&mutex);
|
||||
return;
|
||||
}
|
||||
|
||||
void *CudaHandlerGetCudaProfile(const char *name)
|
||||
{
|
||||
SCMutexLock(&mutex);
|
||||
|
||||
CudaHandlerConfProfile *tmp_cp = conf_profiles;
|
||||
while (tmp_cp != NULL && strcasecmp(name, tmp_cp->name) != 0)
|
||||
tmp_cp = tmp_cp->next;
|
||||
|
||||
if (tmp_cp == NULL) {
|
||||
SCMutexUnlock(&mutex);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
SCMutexUnlock(&mutex);
|
||||
return tmp_cp->ctx;
|
||||
}
|
||||
|
||||
void CudaHandlerFreeProfiles(void)
|
||||
{
|
||||
SCMutexLock(&mutex);
|
||||
|
||||
CudaHandlerConfProfile *tmp = conf_profiles;
|
||||
while (tmp != NULL) {
|
||||
CudaHandlerConfProfile *curr = tmp;
|
||||
tmp = tmp->next;
|
||||
SCFree(curr->name);
|
||||
if (curr->Free != NULL)
|
||||
curr->Free(curr->ctx);
|
||||
SCFree(curr);
|
||||
}
|
||||
|
||||
SCMutexUnlock(&mutex);
|
||||
return;
|
||||
}
|
||||
|
||||
/*******************cuda context related data section*******************/
|
||||
|
||||
/* we use a concept where every device on the gpu has only 1 context. If
|
||||
* a section in the engine wants to use a device and tries to open a context
|
||||
* on it, we first check if a context is already created for the device and if
|
||||
* so we return it. If not we create a new one and update with the entry */
|
||||
|
||||
static CUcontext *cuda_contexts = NULL;
|
||||
static int no_of_cuda_contexts = 0;
|
||||
|
||||
typedef struct CudaHandlerModuleData_ {
|
||||
char *name;
|
||||
void *data;
|
||||
|
||||
struct CudaHandlerModuleData_ *next;
|
||||
} CudaHandlerModuleData;
|
||||
|
||||
typedef struct CudaHandlerModule_ {
|
||||
char *name;
|
||||
|
||||
/* the context used by this module */
|
||||
CUcontext context;
|
||||
/* the device on which the above context was created */
|
||||
int device_id;
|
||||
CudaHandlerModuleData *module_data;
|
||||
|
||||
struct CudaHandlerModule_ *next;
|
||||
} CudaHandlerModule;
|
||||
|
||||
static CudaHandlerModule *cudahl_modules = NULL;
|
||||
|
||||
CUcontext CudaHandlerModuleGetContext(const char *name, int device_id)
|
||||
{
|
||||
SCMutexLock(&mutex);
|
||||
|
||||
CudaHandlerModule *module = cudahl_modules;
|
||||
while (module != NULL && strcasecmp(module->name, name) != 0)
|
||||
module = module->next;
|
||||
if (module != NULL) {
|
||||
if (module->device_id != device_id) {
|
||||
SCLogError(SC_ERR_CUDA_HANDLER_ERROR, "Module already "
|
||||
"registered, but the new device_id is different "
|
||||
"from the already registered device_id.");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
SCMutexUnlock(&mutex);
|
||||
return module->context;
|
||||
}
|
||||
|
||||
CudaHandlerModule *new_module = SCMalloc(sizeof(CudaHandlerModule));
|
||||
if (new_module == NULL)
|
||||
exit(EXIT_FAILURE);
|
||||
memset(new_module, 0, sizeof(CudaHandlerModule));
|
||||
new_module->device_id = device_id;
|
||||
new_module->name = SCStrdup(name);
|
||||
if (new_module->name == NULL)
|
||||
exit(EXIT_FAILURE);
|
||||
if (cudahl_modules == NULL) {
|
||||
cudahl_modules = new_module;
|
||||
} else {
|
||||
new_module->next = cudahl_modules;
|
||||
cudahl_modules = new_module;
|
||||
}
|
||||
|
||||
if (no_of_cuda_contexts <= device_id) {
|
||||
cuda_contexts = SCRealloc(cuda_contexts, sizeof(CUcontext) * (device_id + 1));
|
||||
if (cuda_contexts == NULL)
|
||||
exit(EXIT_FAILURE);
|
||||
memset(cuda_contexts + no_of_cuda_contexts, 0,
|
||||
sizeof(CUcontext) * ((device_id + 1) - no_of_cuda_contexts));
|
||||
no_of_cuda_contexts = device_id + 1;
|
||||
}
|
||||
|
||||
if (cuda_contexts[device_id] == 0) {
|
||||
SCCudaDevices *devices = SCCudaGetDeviceList();
|
||||
if (SCCudaCtxCreate(&cuda_contexts[device_id], CU_CTX_SCHED_BLOCKING_SYNC,
|
||||
devices->devices[device_id]->device) == -1) {
|
||||
SCLogDebug("ctxcreate failure.");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
new_module->context = cuda_contexts[device_id];
|
||||
|
||||
SCMutexUnlock(&mutex);
|
||||
return cuda_contexts[device_id];
|
||||
}
|
||||
|
||||
void CudaHandlerModuleStoreData(const char *module_name,
|
||||
const char *data_name, void *data_ptr)
|
||||
{
|
||||
SCMutexLock(&mutex);
|
||||
|
||||
CudaHandlerModule *module = cudahl_modules;
|
||||
while (module != NULL && strcasecmp(module->name, module_name) != 0)
|
||||
module = module->next;
|
||||
if (module == NULL) {
|
||||
SCLogError(SC_ERR_CUDA_HANDLER_ERROR, "Trying to retrieve data "
|
||||
"\"%s\" from module \"%s\" that hasn't been registered "
|
||||
"yet.", module_name, data_name);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
CudaHandlerModuleData *data = module->module_data;
|
||||
while (data != NULL && (strcasecmp(data_name, data->name) != 0)) {
|
||||
data = data->next;
|
||||
}
|
||||
if (data != NULL) {
|
||||
SCLogWarning(SC_ERR_CUDA_HANDLER_ERROR, "Data \"%s\" already "
|
||||
"registered for this module \"%s\".", data_name,
|
||||
module_name);
|
||||
SCMutexUnlock(&mutex);
|
||||
goto end;
|
||||
}
|
||||
|
||||
CudaHandlerModuleData *new_data = SCMalloc(sizeof(CudaHandlerModuleData));
|
||||
if (new_data == NULL)
|
||||
exit(EXIT_FAILURE);
|
||||
memset(new_data, 0, sizeof(CudaHandlerModuleData));
|
||||
new_data->name = SCStrdup(data_name);
|
||||
if (new_data->name == NULL)
|
||||
exit(EXIT_FAILURE);
|
||||
new_data->data = data_ptr;
|
||||
|
||||
if (module->module_data == NULL) {
|
||||
module->module_data = new_data;
|
||||
} else {
|
||||
new_data->next = module->module_data;
|
||||
module->module_data = new_data;
|
||||
}
|
||||
|
||||
SCMutexUnlock(&mutex);
|
||||
|
||||
end:
|
||||
return;
|
||||
}
|
||||
|
||||
void *CudaHandlerModuleGetData(const char *module_name, const char *data_name)
|
||||
{
|
||||
SCMutexLock(&mutex);
|
||||
|
||||
CudaHandlerModule *module = cudahl_modules;
|
||||
while (module != NULL && strcasecmp(module->name, module_name) != 0)
|
||||
module = module->next;
|
||||
if (module == NULL) {
|
||||
SCLogError(SC_ERR_CUDA_HANDLER_ERROR, "Trying to retrieve data "
|
||||
"\"%s\" from module \"%s\" that hasn't been registered "
|
||||
"yet.", module_name, data_name);
|
||||
SCMutexUnlock(&mutex);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
CudaHandlerModuleData *data = module->module_data;
|
||||
while (data != NULL && (strcasecmp(data_name, data->name) != 0)) {
|
||||
data = data->next;
|
||||
}
|
||||
if (data == NULL) {
|
||||
SCLogInfo("Data \"%s\" already registered for this module \"%s\". "
|
||||
"Returning it.", data_name, module_name);
|
||||
SCMutexUnlock(&mutex);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
SCMutexUnlock(&mutex);
|
||||
return data->data;
|
||||
}
|
||||
|
||||
int CudaHandlerGetCudaModule(CUmodule *p_module, const char *ptx_image)
|
||||
{
|
||||
#define CUDA_HANDLER_GET_CUDA_MODULE_BUFFER_EXTRA_SPACE 15
|
||||
|
||||
int i = 0;
|
||||
|
||||
/* select the ptx image based on the compute capability supported by all
|
||||
* devices (i.e. the lowest) */
|
||||
char *image = SCMalloc(strlen(ptx_image) + CUDA_HANDLER_GET_CUDA_MODULE_BUFFER_EXTRA_SPACE);
|
||||
if (unlikely(image == NULL)) {
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
memset(image, 0x00, strlen(ptx_image) + CUDA_HANDLER_GET_CUDA_MODULE_BUFFER_EXTRA_SPACE);
|
||||
|
||||
int major = INT_MAX;
|
||||
int minor = INT_MAX;
|
||||
SCCudaDevices *devices = SCCudaGetDeviceList();
|
||||
for (i = 0; i < devices->count; i++){
|
||||
if (devices->devices[i]->major_rev < major){
|
||||
major = devices->devices[i]->major_rev;
|
||||
minor = devices->devices[i]->minor_rev;
|
||||
}
|
||||
if (devices->devices[i]->major_rev == major &&
|
||||
devices->devices[i]->minor_rev < minor){
|
||||
minor = devices->devices[i]->minor_rev;
|
||||
}
|
||||
}
|
||||
snprintf(image,
|
||||
strlen(ptx_image) + CUDA_HANDLER_GET_CUDA_MODULE_BUFFER_EXTRA_SPACE,
|
||||
"%s_sm_%u%u",
|
||||
ptx_image, major, minor);
|
||||
|
||||
/* we don't have a cuda module associated with this module. Create a
|
||||
* cuda module, update the module with this cuda module reference and
|
||||
* then return the module refernce back to the calling function using
|
||||
* the argument */
|
||||
SCLogDebug("Loading kernel module: %s\n",image);
|
||||
if (SCCudaModuleLoadData(p_module, (void *)SCCudaPtxDumpGetModule(image)) == -1)
|
||||
goto error;
|
||||
SCFree(image);
|
||||
|
||||
return 0;
|
||||
error:
|
||||
SCFree(image);
|
||||
return -1;
|
||||
|
||||
#undef CUDA_HANDLER_GET_CUDA_MODULE_BUFFER_EXTRA_SPACE
|
||||
}
|
||||
|
||||
|
||||
#endif /* __SC_CUDA_SUPPORT__ */
|
@ -0,0 +1,50 @@
|
||||
/* Copyright (C) 2007-2012 Open Information Security Foundation
|
||||
*
|
||||
* You can copy, redistribute or modify this Program under the terms of
|
||||
* the GNU General Public License version 2 as published by the Free
|
||||
* Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* version 2 along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
*
|
||||
* \author Anoop Saldanha <anoopsaldanha@gmail.com>
|
||||
*/
|
||||
|
||||
#ifndef __UTIL_CUDA_HANDLERS__H__
|
||||
#define __UTIL_CUDA_HANDLERS__H__
|
||||
|
||||
#include "conf.h"
|
||||
#include "util-cuda.h"
|
||||
|
||||
/************************conf file profile section**********************/
|
||||
|
||||
void CudaHandlerAddCudaProfileFromConf(const char *name,
|
||||
void *(*Callback)(ConfNode *node),
|
||||
void (*Free)(void *));
|
||||
void *CudaHandlerGetCudaProfile(const char *name);
|
||||
void CudaHandlerFreeProfiles(void);
|
||||
|
||||
/*******************cuda context related data section*******************/
|
||||
|
||||
#define CUDA_HANDLER_MODULE_DATA_TYPE_MEMORY_HOST 0
|
||||
#define CUDA_HANDLER_MODULE_DATA_TYPE_MEMORY_DEVICE 1
|
||||
#define CUDA_HANDLER_MODULE_DATA_TYPE_CUDA_BUFFER 2
|
||||
|
||||
CUcontext CudaHandlerModuleGetContext(const char *module_name, int device_id);
|
||||
void CudaHandlerModuleStoreData(const char *module_name,
|
||||
const char *data_name, void *data_ptr);
|
||||
void *CudaHandlerModuleGetData(const char *module_name, const char *data_name);
|
||||
int CudaHandlerGetCudaModule(CUmodule *p_module, const char *ptx_image);
|
||||
|
||||
#endif /* __UTIL_CUDA_HANDLERS__H__ */
|
@ -0,0 +1,96 @@
|
||||
/* Copyright (C) 2007-2012 Open Information Security Foundation
|
||||
*
|
||||
* You can copy, redistribute or modify this Program under the terms of
|
||||
* the GNU General Public License version 2 as published by the Free
|
||||
* Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* version 2 along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
*
|
||||
* \author Anoop Saldanha <anoopsaldanha@gmail.com>
|
||||
*
|
||||
* The Cuda kernel for MPM AC.
|
||||
*
|
||||
* \todo - This is a basic version of the kernel.
|
||||
* - Support 16 bit state tables.
|
||||
* - Texture memory.
|
||||
* - Multiple threads per blocks of threads. Make use of
|
||||
* shared memory/texture memory.
|
||||
*/
|
||||
|
||||
extern "C"
|
||||
__global__ void SCACCudaSearch64(unsigned char *d_buffer,
|
||||
unsigned int d_buffer_start_offset,
|
||||
unsigned int *o_buffer,
|
||||
unsigned int *results_buffer,
|
||||
unsigned int nop,
|
||||
unsigned char *tolower)
|
||||
{
|
||||
unsigned int u = 0;
|
||||
unsigned int tid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (tid >= nop)
|
||||
return;
|
||||
|
||||
unsigned int buflen = *((unsigned long *)(d_buffer + (o_buffer[tid] - d_buffer_start_offset)));
|
||||
unsigned int (*state_table_u32)[256] =
|
||||
(unsigned int (*)[256])*((unsigned long *)(d_buffer + (o_buffer[tid] - d_buffer_start_offset) + 8));
|
||||
unsigned char *buf = (d_buffer + (o_buffer[tid] - d_buffer_start_offset) + 16);
|
||||
|
||||
unsigned int state = 0;
|
||||
unsigned int matches = 0;
|
||||
unsigned int *results = (results_buffer + ((o_buffer[tid] - d_buffer_start_offset) * 2) + 1);
|
||||
for (u = 0; u < buflen; u++) {
|
||||
state = state_table_u32[state & 0x00FFFFFF][tolower[buf[u]]];
|
||||
if (state & 0xFF000000) {
|
||||
results[matches++] = u;
|
||||
results[matches++] = state & 0x00FFFFFF;
|
||||
}
|
||||
}
|
||||
|
||||
*(results - 1) = matches;
|
||||
return;
|
||||
}
|
||||
|
||||
extern "C"
|
||||
__global__ void SCACCudaSearch32(unsigned char *d_buffer,
|
||||
unsigned int d_buffer_start_offset,
|
||||
unsigned int *o_buffer,
|
||||
unsigned int *results_buffer,
|
||||
unsigned int nop,
|
||||
unsigned char *tolower)
|
||||
{
|
||||
unsigned int u = 0;
|
||||
unsigned int tid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (tid >= nop)
|
||||
return;
|
||||
|
||||
unsigned int buflen = *((unsigned int *)(d_buffer + (o_buffer[tid] - d_buffer_start_offset)));
|
||||
unsigned int (*state_table_u32)[256] =
|
||||
(unsigned int (*)[256])*((unsigned int *)(d_buffer + (o_buffer[tid] - d_buffer_start_offset) + 4));
|
||||
unsigned char *buf = (d_buffer + (o_buffer[tid] - d_buffer_start_offset) + 8);
|
||||
|
||||
unsigned int state = 0;
|
||||
unsigned int matches = 0;
|
||||
unsigned int *results = (results_buffer + ((o_buffer[tid] - d_buffer_start_offset) * 2) + 1);
|
||||
for (u = 0; u < buflen; u++) {
|
||||
state = state_table_u32[state & 0x00FFFFFF][tolower[buf[u]]];
|
||||
if (state & 0xFF000000) {
|
||||
results[matches++] = u;
|
||||
results[matches++] = state & 0x00FFFFFF;
|
||||
}
|
||||
}
|
||||
|
||||
*(results - 1) = matches;
|
||||
return;
|
||||
}
|
@ -1,112 +0,0 @@
|
||||
/* Copyright (C) 2007-2010 Open Information Security Foundation
|
||||
*
|
||||
* You can copy, redistribute or modify this Program under the terms of
|
||||
* the GNU General Public License version 2 as published by the Free
|
||||
* Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* version 2 along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
* 02110-1301, USA.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
*
|
||||
* \author Anoop Saldanha <anoopsaldanha@gmail.com>
|
||||
*
|
||||
* The Cuda kernel for MPM B2G.
|
||||
*
|
||||
* \todo This is a basic version of the kernel. Modify it to support multiple
|
||||
* blocks of threads. Make use of shared memory/texture memory.
|
||||
*/
|
||||
|
||||
#define B2G_CUDA_Q 2
|
||||
#define CUDA_THREADS 4000
|
||||
#define B2G_CUDA_HASHSHIFT 4
|
||||
#define B2G_CUDA_TYPE unsigned int
|
||||
#define B2G_CUDA_HASH16(a, b) (((a) << B2G_CUDA_HASHSHIFT) | (b))
|
||||
#define u8_tolower(c) g_u8_lowercasetable[(c)]
|
||||
|
||||
typedef struct SCCudaPBPacketDataForGPU_ {
|
||||
/* holds the value B2gCtx->m */
|
||||
unsigned int m;
|
||||
/* holds B2gCtx->B2g */
|
||||
unsigned int table;
|
||||
/* holds the length of the payload */
|
||||
unsigned int payload_len;
|
||||
/* holds the payload */
|
||||
unsigned char payload;
|
||||
} SCCudaPBPacketDataForGPU;
|
||||
|
||||
extern "C"
|
||||
__global__ void B2gCudaSearchBNDMq(unsigned short *results_buffer,
|
||||
unsigned char *packets_buffer,
|
||||
unsigned int *packets_offset_buffer,
|
||||
unsigned int *packets_payload_offset_buffer,
|
||||
unsigned int nop,
|
||||
unsigned char *g_u8_lowercasetable)
|
||||
{
|
||||
unsigned int tid = blockIdx.x * 32 + threadIdx.x;
|
||||
/* if the thread id is greater than the no of packets sent in the packets
|
||||
* buffer, terminate the thread */
|
||||
//if (tid <= nop)
|
||||
if (tid >= nop)
|
||||
return;
|
||||
|
||||
SCCudaPBPacketDataForGPU *packet = (SCCudaPBPacketDataForGPU *)(packets_buffer + packets_offset_buffer[tid]);
|
||||
unsigned int m = packet->m;
|
||||
unsigned char *buf = &packet->payload;
|
||||
unsigned int buflen = packet->payload_len;
|
||||
unsigned int *B2G = (unsigned int *)packet->table;
|
||||
unsigned int pos = m - B2G_CUDA_Q + 1;
|
||||
B2G_CUDA_TYPE d;
|
||||
unsigned short h;
|
||||
unsigned int first;
|
||||
unsigned int j = 0;
|
||||
|
||||
unsigned short *matches_count = results_buffer + packets_payload_offset_buffer[tid] + tid;
|
||||
//unsigned short *matches_count = results_buffer + packets_payload_offset_buffer[1] + 1;
|
||||
//unsigned short *offsets = results_buffer + packets_payload_offset_buffer[1] + 1 + 1;
|
||||
unsigned short *offsets = matches_count + 1;
|
||||
// temporarily hold the results here, before we shift it to matches_count
|
||||
// before returning
|
||||
unsigned short matches = 0;
|
||||
|
||||
while (pos <= (buflen - B2G_CUDA_Q + 1)) {
|
||||
h = B2G_CUDA_HASH16(u8_tolower(buf[pos - 1]), u8_tolower(buf[pos]));
|
||||
d = B2G[h];
|
||||
|
||||
if (d != 0) {
|
||||
j = pos;
|
||||
first = pos - (m - B2G_CUDA_Q + 1);
|
||||
|
||||
do {
|
||||
j = j - 1;
|
||||
if (d >= (1 << (m - 1))) {
|
||||
if (j > first) {
|
||||
pos = j;
|
||||
} else {
|
||||
offsets[matches++] = j;
|
||||
}
|
||||
}
|
||||
|
||||
if (j == 0)
|
||||
break;
|
||||
|
||||
h = B2G_CUDA_HASH16(u8_tolower(buf[j - 1]), u8_tolower(buf[j]));
|
||||
d = (d << 1) & B2G[h];
|
||||
} while (d != 0);
|
||||
}
|
||||
pos = pos + m - B2G_CUDA_Q + 1;
|
||||
}
|
||||
|
||||
matches_count[0] = matches;
|
||||
|
||||
return;
|
||||
}
|
Loading…
Reference in New Issue