|
|
|
|
@ -98,328 +98,6 @@ void B2gCudaPrintInfo(MpmCtx *);
|
|
|
|
|
void B2gCudaPrintSearchStats(MpmThreadCtx *);
|
|
|
|
|
void B2gCudaRegisterTests(void);
|
|
|
|
|
|
|
|
|
|
#if defined(__x86_64__) || defined(__ia64__)
|
|
|
|
|
const char *b2g_cuda_ptx_image_64_bit =
|
|
|
|
|
" .version 1.4\n"
|
|
|
|
|
" .target sm_10, map_f64_to_f32\n"
|
|
|
|
|
" .entry B2gCudaSearchBNDMq (\n"
|
|
|
|
|
" .param .u64 __cudaparm_B2gCudaSearchBNDMq_results_buffer,\n"
|
|
|
|
|
" .param .u64 __cudaparm_B2gCudaSearchBNDMq_packets_buffer,\n"
|
|
|
|
|
" .param .u64 __cudaparm_B2gCudaSearchBNDMq_packets_offset_buffer,\n"
|
|
|
|
|
" .param .u64 __cudaparm_B2gCudaSearchBNDMq_packets_payload_offset_buffer,\n"
|
|
|
|
|
" .param .u32 __cudaparm_B2gCudaSearchBNDMq_nop,\n"
|
|
|
|
|
" .param .u64 __cudaparm_B2gCudaSearchBNDMq_g_u8_lowercasetable)\n"
|
|
|
|
|
" {\n"
|
|
|
|
|
" .reg .u16 %rh<7>;\n"
|
|
|
|
|
" .reg .u32 %r<38>;\n"
|
|
|
|
|
" .reg .u64 %rd<41>;\n"
|
|
|
|
|
" .reg .pred %p<10>;\n"
|
|
|
|
|
" .loc 3 36 0\n"
|
|
|
|
|
"$LBB1_B2gCudaSearchBNDMq:\n"
|
|
|
|
|
" mov.u16 %rh1, %ctaid.x;\n"
|
|
|
|
|
" mul.wide.u16 %r1, %rh1, 32;\n"
|
|
|
|
|
" cvt.u32.u16 %r2, %tid.x;\n"
|
|
|
|
|
" add.u32 %r3, %r2, %r1;\n"
|
|
|
|
|
" ld.param.u32 %r4, [__cudaparm_B2gCudaSearchBNDMq_nop];\n"
|
|
|
|
|
" setp.gt.u32 %p1, %r4, %r3;\n"
|
|
|
|
|
" @%p1 bra $Lt_0_5634;\n"
|
|
|
|
|
" bra.uni $LBB17_B2gCudaSearchBNDMq;\n"
|
|
|
|
|
"$Lt_0_5634:\n"
|
|
|
|
|
" .loc 3 45 0\n"
|
|
|
|
|
" cvt.u64.u32 %rd1, %r3;\n"
|
|
|
|
|
" mul.lo.u64 %rd2, %rd1, 4;\n"
|
|
|
|
|
" ld.param.u64 %rd3, [__cudaparm_B2gCudaSearchBNDMq_packets_offset_buffer];\n"
|
|
|
|
|
" add.u64 %rd4, %rd3, %rd2;\n"
|
|
|
|
|
" ld.global.u32 %r5, [%rd4+0];\n"
|
|
|
|
|
" cvt.u64.u32 %rd5, %r5;\n"
|
|
|
|
|
" ld.param.u64 %rd6, [__cudaparm_B2gCudaSearchBNDMq_packets_buffer];\n"
|
|
|
|
|
" add.u64 %rd7, %rd5, %rd6;\n"
|
|
|
|
|
" .loc 3 46 0\n"
|
|
|
|
|
" ld.global.u32 %r6, [%rd7+0];\n"
|
|
|
|
|
" .loc 3 48 0\n"
|
|
|
|
|
" ld.global.u32 %r7, [%rd7+8];\n"
|
|
|
|
|
" .loc 3 49 0\n"
|
|
|
|
|
" ld.global.u32 %r8, [%rd7+4];\n"
|
|
|
|
|
" cvt.u64.u32 %rd8, %r8;\n"
|
|
|
|
|
" .loc 3 50 0\n"
|
|
|
|
|
" sub.u32 %r9, %r6, 1;\n"
|
|
|
|
|
" mov.s32 %r10, %r9;\n"
|
|
|
|
|
" .loc 3 56 0\n"
|
|
|
|
|
" ld.param.u64 %rd9, [__cudaparm_B2gCudaSearchBNDMq_results_buffer];\n"
|
|
|
|
|
" ld.param.u64 %rd10, [__cudaparm_B2gCudaSearchBNDMq_packets_payload_offset_buffer];\n"
|
|
|
|
|
" add.u64 %rd11, %rd10, %rd2;\n"
|
|
|
|
|
" ld.global.u32 %r11, [%rd11+0];\n"
|
|
|
|
|
" cvt.u64.u32 %rd12, %r11;\n"
|
|
|
|
|
" add.u64 %rd13, %rd12, %rd1;\n"
|
|
|
|
|
" mul.lo.u64 %rd14, %rd13, 2;\n"
|
|
|
|
|
" add.u64 %rd15, %rd9, %rd14;\n"
|
|
|
|
|
" sub.u32 %r12, %r7, 1;\n"
|
|
|
|
|
" setp.gt.u32 %p2, %r9, %r12;\n"
|
|
|
|
|
" mov.u32 %r13, 0;\n"
|
|
|
|
|
" @%p2 bra $Lt_0_9474;\n"
|
|
|
|
|
" add.u64 %rd16, %rd7, 12;\n"
|
|
|
|
|
" add.u64 %rd17, %rd15, 2;\n"
|
|
|
|
|
" ld.param.u64 %rd18, [__cudaparm_B2gCudaSearchBNDMq_g_u8_lowercasetable];\n"
|
|
|
|
|
"$Lt_0_6658:\n"
|
|
|
|
|
" //<loop> Loop body line 66\n"
|
|
|
|
|
" .loc 3 66 0\n"
|
|
|
|
|
" cvt.u64.u32 %rd19, %r10;\n"
|
|
|
|
|
" add.u64 %rd20, %rd19, %rd7;\n"
|
|
|
|
|
" ld.global.u8 %rh2, [%rd20+12];\n"
|
|
|
|
|
" cvt.u64.u8 %rd21, %rh2;\n"
|
|
|
|
|
" add.u64 %rd22, %rd21, %rd18;\n"
|
|
|
|
|
" ld.global.u8 %r14, [%rd22+0];\n"
|
|
|
|
|
" ld.global.u8 %rh3, [%rd20+11];\n"
|
|
|
|
|
" cvt.u64.u8 %rd23, %rh3;\n"
|
|
|
|
|
" add.u64 %rd24, %rd23, %rd18;\n"
|
|
|
|
|
" ld.global.u8 %r15, [%rd24+0];\n"
|
|
|
|
|
" shl.b32 %r16, %r15, 4;\n"
|
|
|
|
|
" or.b32 %r17, %r14, %r16;\n"
|
|
|
|
|
" cvt.u64.u32 %rd25, %r17;\n"
|
|
|
|
|
" mul.lo.u64 %rd26, %rd25, 4;\n"
|
|
|
|
|
" add.u64 %rd27, %rd8, %rd26;\n"
|
|
|
|
|
" ld.global.u32 %r18, [%rd27+0];\n"
|
|
|
|
|
" mov.u32 %r19, 0;\n"
|
|
|
|
|
" setp.eq.u32 %p3, %r18, %r19;\n"
|
|
|
|
|
" @%p3 bra $Lt_0_258;\n"
|
|
|
|
|
" //<loop> Part of loop body line 66, head labeled $Lt_0_6658\n"
|
|
|
|
|
" .loc 3 69 0\n"
|
|
|
|
|
" mov.s32 %r20, %r10;\n"
|
|
|
|
|
" .loc 3 70 0\n"
|
|
|
|
|
" sub.u32 %r21, %r10, %r6;\n"
|
|
|
|
|
" add.u32 %r22, %r21, 1;\n"
|
|
|
|
|
" sub.s32 %r23, %r6, 1;\n"
|
|
|
|
|
"$Lt_0_7682:\n"
|
|
|
|
|
" //<loop> Loop body line 73\n"
|
|
|
|
|
" .loc 3 73 0\n"
|
|
|
|
|
" sub.u32 %r20, %r20, 1;\n"
|
|
|
|
|
" shr.u32 %r24, %r18, %r23;\n"
|
|
|
|
|
" mov.u32 %r25, 0;\n"
|
|
|
|
|
" setp.eq.u32 %p4, %r24, %r25;\n"
|
|
|
|
|
" @%p4 bra $Lt_0_8450;\n"
|
|
|
|
|
" //<loop> Part of loop body line 73, head labeled $Lt_0_7682\n"
|
|
|
|
|
" setp.le.u32 %p5, %r20, %r22;\n"
|
|
|
|
|
" @%p5 bra $Lt_0_8706;\n"
|
|
|
|
|
" //<loop> Part of loop body line 73, head labeled $Lt_0_7682\n"
|
|
|
|
|
" .loc 3 76 0\n"
|
|
|
|
|
" mov.s32 %r10, %r20;\n"
|
|
|
|
|
" bra.uni $Lt_0_8450;\n"
|
|
|
|
|
"$Lt_0_8706:\n"
|
|
|
|
|
" //<loop> Part of loop body line 73, head labeled $Lt_0_7682\n"
|
|
|
|
|
" .loc 3 78 0\n"
|
|
|
|
|
" mov.s32 %r26, %r13;\n"
|
|
|
|
|
" add.u32 %r27, %r26, 1;\n"
|
|
|
|
|
" cvt.u16.u32 %r13, %r27;\n"
|
|
|
|
|
" cvt.u64.u32 %rd28, %r26;\n"
|
|
|
|
|
" mul.lo.u64 %rd29, %rd28, 2;\n"
|
|
|
|
|
" add.u64 %rd30, %rd15, %rd29;\n"
|
|
|
|
|
" st.global.u16 [%rd30+2], %r20;\n"
|
|
|
|
|
"$Lt_0_8450:\n"
|
|
|
|
|
"$Lt_0_7938:\n"
|
|
|
|
|
" //<loop> Part of loop body line 73, head labeled $Lt_0_7682\n"
|
|
|
|
|
" .loc 3 83 0\n"
|
|
|
|
|
" mov.u32 %r28, 0;\n"
|
|
|
|
|
" setp.eq.u32 %p6, %r20, %r28;\n"
|
|
|
|
|
" @%p6 bra $Lt_0_258;\n"
|
|
|
|
|
" //<loop> Part of loop body line 73, head labeled $Lt_0_7682\n"
|
|
|
|
|
" .loc 3 86 0\n"
|
|
|
|
|
" cvt.u64.u32 %rd31, %r20;\n"
|
|
|
|
|
" add.u64 %rd32, %rd31, %rd7;\n"
|
|
|
|
|
" ld.global.u8 %rh4, [%rd32+12];\n"
|
|
|
|
|
" cvt.u64.u8 %rd33, %rh4;\n"
|
|
|
|
|
" add.u64 %rd34, %rd33, %rd18;\n"
|
|
|
|
|
" ld.global.u8 %r29, [%rd34+0];\n"
|
|
|
|
|
" ld.global.u8 %rh5, [%rd32+11];\n"
|
|
|
|
|
" cvt.u64.u8 %rd35, %rh5;\n"
|
|
|
|
|
" add.u64 %rd36, %rd35, %rd18;\n"
|
|
|
|
|
" ld.global.u8 %r30, [%rd36+0];\n"
|
|
|
|
|
" shl.b32 %r31, %r30, 4;\n"
|
|
|
|
|
" or.b32 %r32, %r29, %r31;\n"
|
|
|
|
|
" cvt.u64.u32 %rd37, %r32;\n"
|
|
|
|
|
" mul.lo.u64 %rd38, %rd37, 4;\n"
|
|
|
|
|
" add.u64 %rd39, %rd8, %rd38;\n"
|
|
|
|
|
" ld.global.u32 %r33, [%rd39+0];\n"
|
|
|
|
|
" shl.b32 %r34, %r18, 1;\n"
|
|
|
|
|
" and.b32 %r18, %r33, %r34;\n"
|
|
|
|
|
" mov.u32 %r35, 0;\n"
|
|
|
|
|
" setp.ne.u32 %p7, %r18, %r35;\n"
|
|
|
|
|
" @%p7 bra $Lt_0_7682;\n"
|
|
|
|
|
"$Lt_0_258:\n"
|
|
|
|
|
"$Lt_0_6914:\n"
|
|
|
|
|
" //<loop> Part of loop body line 66, head labeled $Lt_0_6658\n"
|
|
|
|
|
" .loc 3 89 0\n"
|
|
|
|
|
" add.u32 %r36, %r6, %r10;\n"
|
|
|
|
|
" sub.u32 %r10, %r36, 1;\n"
|
|
|
|
|
" setp.ge.u32 %p8, %r12, %r10;\n"
|
|
|
|
|
" @%p8 bra $Lt_0_6658;\n"
|
|
|
|
|
" bra.uni $Lt_0_6146;\n"
|
|
|
|
|
"$Lt_0_9474:\n"
|
|
|
|
|
"$Lt_0_6146:\n"
|
|
|
|
|
" .loc 3 92 0\n"
|
|
|
|
|
" st.global.u16 [%rd15+0], %r13;\n"
|
|
|
|
|
"$LBB17_B2gCudaSearchBNDMq:\n"
|
|
|
|
|
" .loc 3 94 0\n"
|
|
|
|
|
" exit;\n"
|
|
|
|
|
"$LDWend_B2gCudaSearchBNDMq:\n"
|
|
|
|
|
" } // B2gCudaSearchBNDMq\n"
|
|
|
|
|
"";
|
|
|
|
|
#else
|
|
|
|
|
/**
|
|
|
|
|
* \todo Optimize the kernel. Also explore the options for compiling the
|
|
|
|
|
* *.cu file at compile/runtime.
|
|
|
|
|
*/
|
|
|
|
|
const char *b2g_cuda_ptx_image_32_bit =
|
|
|
|
|
" .version 1.4\n"
|
|
|
|
|
" .target sm_10, map_f64_to_f32\n"
|
|
|
|
|
" .entry B2gCudaSearchBNDMq (\n"
|
|
|
|
|
" .param .u32 __cudaparm_B2gCudaSearchBNDMq_results_buffer,\n"
|
|
|
|
|
" .param .u32 __cudaparm_B2gCudaSearchBNDMq_packets_buffer,\n"
|
|
|
|
|
" .param .u32 __cudaparm_B2gCudaSearchBNDMq_packets_offset_buffer,\n"
|
|
|
|
|
" .param .u32 __cudaparm_B2gCudaSearchBNDMq_packets_payload_offset_buffer,\n"
|
|
|
|
|
" .param .u32 __cudaparm_B2gCudaSearchBNDMq_nop,\n"
|
|
|
|
|
" .param .u32 __cudaparm_B2gCudaSearchBNDMq_g_u8_lowercasetable)\n"
|
|
|
|
|
" {\n"
|
|
|
|
|
" .reg .u16 %rh<6>;\n"
|
|
|
|
|
" .reg .u32 %r<65>;\n"
|
|
|
|
|
" .reg .pred %p<10>;\n"
|
|
|
|
|
" .loc 3 36 0\n"
|
|
|
|
|
"$LBB1_B2gCudaSearchBNDMq:\n"
|
|
|
|
|
" mov.u16 %rh1, %ctaid.x;\n"
|
|
|
|
|
" mul.wide.u16 %r1, %rh1, 32;\n"
|
|
|
|
|
" cvt.u32.u16 %r2, %tid.x;\n"
|
|
|
|
|
" add.u32 %r3, %r2, %r1;\n"
|
|
|
|
|
" ld.param.u32 %r4, [__cudaparm_B2gCudaSearchBNDMq_nop];\n"
|
|
|
|
|
" setp.gt.u32 %p1, %r4, %r3;\n"
|
|
|
|
|
" @%p1 bra $Lt_0_5634;\n"
|
|
|
|
|
" bra.uni $LBB17_B2gCudaSearchBNDMq;\n"
|
|
|
|
|
"$Lt_0_5634:\n"
|
|
|
|
|
" .loc 3 45 0\n"
|
|
|
|
|
" mul.lo.u32 %r5, %r3, 4;\n"
|
|
|
|
|
" ld.param.u32 %r6, [__cudaparm_B2gCudaSearchBNDMq_packets_offset_buffer];\n"
|
|
|
|
|
" add.u32 %r7, %r6, %r5;\n"
|
|
|
|
|
" ld.global.u32 %r8, [%r7+0];\n"
|
|
|
|
|
" ld.param.u32 %r9, [__cudaparm_B2gCudaSearchBNDMq_packets_buffer];\n"
|
|
|
|
|
" add.u32 %r10, %r8, %r9;\n"
|
|
|
|
|
" .loc 3 46 0\n"
|
|
|
|
|
" ld.global.u32 %r11, [%r10+0];\n"
|
|
|
|
|
" .loc 3 48 0\n"
|
|
|
|
|
" ld.global.u32 %r12, [%r10+8];\n"
|
|
|
|
|
" .loc 3 49 0\n"
|
|
|
|
|
" ld.global.u32 %r13, [%r10+4];\n"
|
|
|
|
|
" .loc 3 50 0\n"
|
|
|
|
|
" sub.u32 %r14, %r11, 1;\n"
|
|
|
|
|
" mov.s32 %r15, %r14;\n"
|
|
|
|
|
" .loc 3 56 0\n"
|
|
|
|
|
" ld.param.u32 %r16, [__cudaparm_B2gCudaSearchBNDMq_results_buffer];\n"
|
|
|
|
|
" ld.param.u32 %r17, [__cudaparm_B2gCudaSearchBNDMq_packets_payload_offset_buffer];\n"
|
|
|
|
|
" add.u32 %r18, %r17, %r5;\n"
|
|
|
|
|
" ld.global.u32 %r19, [%r18+0];\n"
|
|
|
|
|
" add.u32 %r20, %r19, %r3;\n"
|
|
|
|
|
" mul.lo.u32 %r21, %r20, 2;\n"
|
|
|
|
|
" add.u32 %r22, %r16, %r21;\n"
|
|
|
|
|
" sub.u32 %r23, %r12, 1;\n"
|
|
|
|
|
" setp.gt.u32 %p2, %r14, %r23;\n"
|
|
|
|
|
" mov.u16 %rh2, 0;\n"
|
|
|
|
|
" @%p2 bra $Lt_0_9474;\n"
|
|
|
|
|
" add.u32 %r24, %r10, 12;\n"
|
|
|
|
|
" add.u32 %r25, %r22, 2;\n"
|
|
|
|
|
" ld.param.u32 %r26, [__cudaparm_B2gCudaSearchBNDMq_g_u8_lowercasetable];\n"
|
|
|
|
|
"$Lt_0_6658:\n"
|
|
|
|
|
" //<loop> Loop body line 66\n"
|
|
|
|
|
" .loc 3 66 0\n"
|
|
|
|
|
" add.u32 %r27, %r10, %r15;\n"
|
|
|
|
|
" ld.global.u8 %r28, [%r27+12];\n"
|
|
|
|
|
" add.u32 %r29, %r28, %r26;\n"
|
|
|
|
|
" ld.global.u8 %r30, [%r29+0];\n"
|
|
|
|
|
" ld.global.u8 %r31, [%r27+11];\n"
|
|
|
|
|
" add.u32 %r32, %r31, %r26;\n"
|
|
|
|
|
" ld.global.u8 %r33, [%r32+0];\n"
|
|
|
|
|
" shl.b32 %r34, %r33, 4;\n"
|
|
|
|
|
" or.b32 %r35, %r30, %r34;\n"
|
|
|
|
|
" mul.lo.u32 %r36, %r35, 4;\n"
|
|
|
|
|
" add.u32 %r37, %r13, %r36;\n"
|
|
|
|
|
" ld.global.u32 %r38, [%r37+0];\n"
|
|
|
|
|
" mov.u32 %r39, 0;\n"
|
|
|
|
|
" setp.eq.u32 %p3, %r38, %r39;\n"
|
|
|
|
|
" @%p3 bra $Lt_0_258;\n"
|
|
|
|
|
" //<loop> Part of loop body line 66, head labeled $Lt_0_6658\n"
|
|
|
|
|
" .loc 3 69 0\n"
|
|
|
|
|
" mov.s32 %r40, %r15;\n"
|
|
|
|
|
" .loc 3 70 0\n"
|
|
|
|
|
" sub.u32 %r41, %r15, %r11;\n"
|
|
|
|
|
" add.u32 %r42, %r41, 1;\n"
|
|
|
|
|
" sub.s32 %r43, %r11, 1;\n"
|
|
|
|
|
"$Lt_0_7682:\n"
|
|
|
|
|
" //<loop> Loop body line 73\n"
|
|
|
|
|
" .loc 3 73 0\n"
|
|
|
|
|
" sub.u32 %r40, %r40, 1;\n"
|
|
|
|
|
" shr.u32 %r44, %r38, %r43;\n"
|
|
|
|
|
" mov.u32 %r45, 0;\n"
|
|
|
|
|
" setp.eq.u32 %p4, %r44, %r45;\n"
|
|
|
|
|
" @%p4 bra $Lt_0_8450;\n"
|
|
|
|
|
" //<loop> Part of loop body line 73, head labeled $Lt_0_7682\n"
|
|
|
|
|
" setp.le.u32 %p5, %r40, %r42;\n"
|
|
|
|
|
" @%p5 bra $Lt_0_8706;\n"
|
|
|
|
|
" //<loop> Part of loop body line 73, head labeled $Lt_0_7682\n"
|
|
|
|
|
" .loc 3 76 0\n"
|
|
|
|
|
" mov.s32 %r15, %r40;\n"
|
|
|
|
|
" bra.uni $Lt_0_8450;\n"
|
|
|
|
|
"$Lt_0_8706:\n"
|
|
|
|
|
" //<loop> Part of loop body line 73, head labeled $Lt_0_7682\n"
|
|
|
|
|
" .loc 3 78 0\n"
|
|
|
|
|
" mov.s16 %rh3, %rh2;\n"
|
|
|
|
|
" add.u16 %rh4, %rh3, 1;\n"
|
|
|
|
|
" mov.u16 %rh2, %rh4;\n"
|
|
|
|
|
" mul.wide.u16 %r46, %rh3, 2;\n"
|
|
|
|
|
" add.u32 %r47, %r22, %r46;\n"
|
|
|
|
|
" st.global.u16 [%r47+2], %r40;\n"
|
|
|
|
|
"$Lt_0_8450:\n"
|
|
|
|
|
"$Lt_0_7938:\n"
|
|
|
|
|
" //<loop> Part of loop body line 73, head labeled $Lt_0_7682\n"
|
|
|
|
|
" .loc 3 83 0\n"
|
|
|
|
|
" mov.u32 %r48, 0;\n"
|
|
|
|
|
" setp.eq.u32 %p6, %r40, %r48;\n"
|
|
|
|
|
" @%p6 bra $Lt_0_258;\n"
|
|
|
|
|
" //<loop> Part of loop body line 73, head labeled $Lt_0_7682\n"
|
|
|
|
|
" .loc 3 86 0\n"
|
|
|
|
|
" add.u32 %r49, %r10, %r40;\n"
|
|
|
|
|
" ld.global.u8 %r50, [%r49+12];\n"
|
|
|
|
|
" add.u32 %r51, %r50, %r26;\n"
|
|
|
|
|
" ld.global.u8 %r52, [%r51+0];\n"
|
|
|
|
|
" ld.global.u8 %r53, [%r49+11];\n"
|
|
|
|
|
" add.u32 %r54, %r53, %r26;\n"
|
|
|
|
|
" ld.global.u8 %r55, [%r54+0];\n"
|
|
|
|
|
" shl.b32 %r56, %r55, 4;\n"
|
|
|
|
|
" or.b32 %r57, %r52, %r56;\n"
|
|
|
|
|
" mul.lo.u32 %r58, %r57, 4;\n"
|
|
|
|
|
" add.u32 %r59, %r13, %r58;\n"
|
|
|
|
|
" ld.global.u32 %r60, [%r59+0];\n"
|
|
|
|
|
" shl.b32 %r61, %r38, 1;\n"
|
|
|
|
|
" and.b32 %r38, %r60, %r61;\n"
|
|
|
|
|
" mov.u32 %r62, 0;\n"
|
|
|
|
|
" setp.ne.u32 %p7, %r38, %r62;\n"
|
|
|
|
|
" @%p7 bra $Lt_0_7682;\n"
|
|
|
|
|
"$Lt_0_258:\n"
|
|
|
|
|
"$Lt_0_6914:\n"
|
|
|
|
|
" //<loop> Part of loop body line 66, head labeled $Lt_0_6658\n"
|
|
|
|
|
" .loc 3 89 0\n"
|
|
|
|
|
" add.u32 %r63, %r11, %r15;\n"
|
|
|
|
|
" sub.u32 %r15, %r63, 1;\n"
|
|
|
|
|
" setp.ge.u32 %p8, %r23, %r15;\n"
|
|
|
|
|
" @%p8 bra $Lt_0_6658;\n"
|
|
|
|
|
" bra.uni $Lt_0_6146;\n"
|
|
|
|
|
"$Lt_0_9474:\n"
|
|
|
|
|
"$Lt_0_6146:\n"
|
|
|
|
|
" .loc 3 92 0\n"
|
|
|
|
|
" st.global.u16 [%r22+0], %rh2;\n"
|
|
|
|
|
"$LBB17_B2gCudaSearchBNDMq:\n"
|
|
|
|
|
" .loc 3 94 0\n"
|
|
|
|
|
" exit;\n"
|
|
|
|
|
"$LDWend_B2gCudaSearchBNDMq:\n"
|
|
|
|
|
" } // B2gCudaSearchBNDMq\n"
|
|
|
|
|
"";
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* \brief Register the CUDA B2g Mpm.
|
|
|
|
|
*/
|
|
|
|
|
@ -1752,15 +1430,9 @@ static int B2gCudaMpmStreamDataInit(B2gCudaMpmThreadCtxData *tctx, MpmCudaConf *
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Load the CUmodule */
|
|
|
|
|
#if defined(__x86_64__) || defined(__ia64__)
|
|
|
|
|
sd->b2g_cuda_cumodule_handle = SCCudaHlGetCudaModule(&sd->b2g_cuda_cumodule,
|
|
|
|
|
b2g_cuda_ptx_image_64_bit,
|
|
|
|
|
"util-mpm-b2g-cuda-kernel",
|
|
|
|
|
module_data->handle);
|
|
|
|
|
#else
|
|
|
|
|
sd->b2g_cuda_cumodule_handle = SCCudaHlGetCudaModule(&sd->b2g_cuda_cumodule,
|
|
|
|
|
b2g_cuda_ptx_image_32_bit,
|
|
|
|
|
module_data->handle);
|
|
|
|
|
#endif
|
|
|
|
|
if (sd->b2g_cuda_cumodule_handle == -1) {
|
|
|
|
|
SCLogError(SC_ERR_B2G_CUDA_ERROR, "Error getting a cuda module");
|
|
|
|
|
goto error;
|
|
|
|
|
|