From 13d491f57749d1546fcd8cbc87a091b0ba4b001a Mon Sep 17 00:00:00 2001 From: Victor Julien Date: Sat, 1 Feb 2014 12:57:01 +0100 Subject: [PATCH] profiling: lower overhead when disabled Instead of a large (6k+) structure in the Packet, make the profiling storage dynamic. To do this the Packet->profile is now a pointer. Initial support for selective sampling, e.g. only profile every 1000th packet. --- src/decode.h | 2 +- src/util-profiling.c | 43 +++++++++++++++++--------- src/util-profiling.h | 73 +++++++++++++++++++++++--------------------- 3 files changed, 68 insertions(+), 50 deletions(-) diff --git a/src/decode.h b/src/decode.h index 49a1a69a06..4f3e1e0577 100644 --- a/src/decode.h +++ b/src/decode.h @@ -526,7 +526,7 @@ typedef struct Packet_ #ifdef PROFILING - PktProfiling profile; + PktProfiling *profile; #endif #ifdef __SC_CUDA_SUPPORT__ CudaPacketVars cuda_pkt_vars; diff --git a/src/util-profiling.c b/src/util-profiling.c index 89eceecd59..fdd927eca7 100644 --- a/src/util-profiling.c +++ b/src/util-profiling.c @@ -560,11 +560,11 @@ void SCProfilingDumpPacketStats(void) { } void SCProfilingPrintPacketProfile(Packet *p) { - if (profiling_packets_csv_enabled == 0 || p == NULL || packet_profile_csv_fp == NULL) { + if (profiling_packets_csv_enabled == 0 || p == NULL || packet_profile_csv_fp == NULL || p->profile == NULL) { return; } - uint64_t delta = p->profile.ticks_end - p->profile.ticks_start; + uint64_t delta = p->profile->ticks_end - p->profile->ticks_start; fprintf(packet_profile_csv_fp, "%"PRIu64",%c,%"PRIu8",%"PRIu64",", p->pcap_cnt, PKT_IS_IPV4(p) ? '4' : (PKT_IS_IPV6(p) ? '6' : '?'), p->proto, @@ -575,7 +575,7 @@ void SCProfilingPrintPacketProfile(Packet *p) { uint64_t tmm_streamtcp_tcp = 0; for (i = 0; i < TMM_SIZE; i++) { - PktProfilingTmmData *pdt = &p->profile.tmm[i]; + PktProfilingTmmData *pdt = &p->profile->tmm[i]; uint64_t tmm_delta = pdt->ticks_end - pdt->ticks_start; fprintf(packet_profile_csv_fp, "%"PRIu64",", tmm_delta); @@ -590,7 +590,7 @@ void SCProfilingPrintPacketProfile(Packet *p) { uint64_t app_total = 0; for (i = 0; i < ALPROTO_MAX; i++) { - PktProfilingAppData *pdt = &p->profile.app[i]; + PktProfilingAppData *pdt = &p->profile->app[i]; fprintf(packet_profile_csv_fp,"%"PRIu64",", pdt->ticks_spent); @@ -604,10 +604,10 @@ void SCProfilingPrintPacketProfile(Packet *p) { real_tcp = tmm_streamtcp_tcp - app_total; fprintf(packet_profile_csv_fp, "%"PRIu64",", real_tcp); - fprintf(packet_profile_csv_fp, "%"PRIu64",", p->profile.proto_detect); + fprintf(packet_profile_csv_fp, "%"PRIu64",", p->profile->proto_detect); for (i = 0; i < PROF_DETECT_SIZE; i++) { - PktProfilingDetectData *pdt = &p->profile.detect[i]; + PktProfilingDetectData *pdt = &p->profile->detect[i]; fprintf(packet_profile_csv_fp,"%"PRIu64",", pdt->ticks_spent); } @@ -639,7 +639,7 @@ static void SCProfilingUpdatePacketDetectRecord(PacketProfileDetectId id, uint8_ void SCProfilingUpdatePacketDetectRecords(Packet *p) { PacketProfileDetectId i; for (i = 0; i < PROF_DETECT_SIZE; i++) { - PktProfilingDetectData *pdt = &p->profile.detect[i]; + PktProfilingDetectData *pdt = &p->profile->detect[i]; if (pdt->ticks_spent > 0) { if (PKT_IS_IPV4(p)) { @@ -694,7 +694,7 @@ static void SCProfilingUpdatePacketAppRecord(int alproto, uint8_t ipproto, PktPr void SCProfilingUpdatePacketAppRecords(Packet *p) { int i; for (i = 0; i < ALPROTO_MAX; i++) { - PktProfilingAppData *pdt = &p->profile.app[i]; + PktProfilingAppData *pdt = &p->profile->app[i]; if (pdt->ticks_spent > 0) { if (PKT_IS_IPV4(p)) { @@ -705,11 +705,11 @@ void SCProfilingUpdatePacketAppRecords(Packet *p) { } } - if (p->profile.proto_detect > 0) { + if (p->profile->proto_detect > 0) { if (PKT_IS_IPV4(p)) { - SCProfilingUpdatePacketAppPdRecord(p->proto, p->profile.proto_detect, 4); + SCProfilingUpdatePacketAppPdRecord(p->proto, p->profile->proto_detect, 4); } else { - SCProfilingUpdatePacketAppPdRecord(p->proto, p->profile.proto_detect, 6); + SCProfilingUpdatePacketAppPdRecord(p->proto, p->profile->proto_detect, 6); } } } @@ -749,7 +749,7 @@ void SCProfilingUpdatePacketTmmRecord(int module, uint8_t proto, PktProfilingTmm void SCProfilingUpdatePacketTmmRecords(Packet *p) { int i; for (i = 0; i < TMM_SIZE; i++) { - PktProfilingTmmData *pdt = &p->profile.tmm[i]; + PktProfilingTmmData *pdt = &p->profile->tmm[i]; if (pdt->ticks_start == 0 || pdt->ticks_end == 0 || pdt->ticks_start > pdt->ticks_end) { continue; @@ -764,7 +764,9 @@ void SCProfilingUpdatePacketTmmRecords(Packet *p) { } void SCProfilingAddPacket(Packet *p) { - if (p->profile.ticks_start == 0 || p->profile.ticks_end == 0 || p->profile.ticks_start > p->profile.ticks_end) + if (p == NULL || p->profile == NULL || + p->profile->ticks_start == 0 || p->profile->ticks_end == 0 || + p->profile->ticks_start > p->profile->ticks_end) return; pthread_mutex_lock(&packet_profile_lock); @@ -776,7 +778,7 @@ void SCProfilingAddPacket(Packet *p) { if (PKT_IS_IPV4(p)) { SCProfilePacketData *pd = &packet_profile_data4[p->proto]; - uint64_t delta = p->profile.ticks_end - p->profile.ticks_start; + uint64_t delta = p->profile->ticks_end - p->profile->ticks_start; if (pd->min == 0 || delta < pd->min) { pd->min = delta; } @@ -808,7 +810,7 @@ void SCProfilingAddPacket(Packet *p) { } else if (PKT_IS_IPV6(p)) { SCProfilePacketData *pd = &packet_profile_data6[p->proto]; - uint64_t delta = p->profile.ticks_end - p->profile.ticks_start; + uint64_t delta = p->profile->ticks_end - p->profile->ticks_start; if (pd->min == 0 || delta < pd->min) { pd->min = delta; } @@ -841,6 +843,17 @@ void SCProfilingAddPacket(Packet *p) { pthread_mutex_unlock(&packet_profile_lock); } +static int rate = 1; +static SC_ATOMIC_DECL_AND_INIT(uint64_t, samples); + +PktProfiling *SCProfilePacketStart(void) { + uint64_t sample = SC_ATOMIC_ADD(samples, 1); + if (sample % rate == 0) + return SCCalloc(1, sizeof(PktProfiling)); + else + return NULL; +} + #define CASE_CODE(E) case E: return #E /** diff --git a/src/util-profiling.h b/src/util-profiling.h index 08bee49c6a..bfd5f57f81 100644 --- a/src/util-profiling.h +++ b/src/util-profiling.h @@ -85,14 +85,18 @@ extern __thread int profiling_keyword_entered; profiling_keyword_entered--; \ } +PktProfiling *SCProfilePacketStart(void); + #define PACKET_PROFILING_START(p) \ if (profiling_packets_enabled) { \ - (p)->profile.ticks_start = UtilCpuGetTicks(); \ + (p)->profile = SCProfilePacketStart(); \ + if ((p)->profile != NULL) \ + (p)->profile->ticks_start = UtilCpuGetTicks(); \ } #define PACKET_PROFILING_END(p) \ - if (profiling_packets_enabled) { \ - (p)->profile.ticks_end = UtilCpuGetTicks(); \ + if (profiling_packets_enabled && (p)->profile != NULL) { \ + (p)->profile->ticks_end = UtilCpuGetTicks(); \ SCProfilingAddPacket((p)); \ } @@ -115,20 +119,20 @@ extern __thread int profiling_keyword_entered; } while (0) #define PACKET_PROFILING_COPY_LOCKS(p, id) do { \ - (p)->profile.tmm[(id)].mutex_lock_cnt = mutex_lock_cnt; \ - (p)->profile.tmm[(id)].mutex_lock_wait_ticks = mutex_lock_wait_ticks; \ - (p)->profile.tmm[(id)].mutex_lock_contention = mutex_lock_contention; \ - (p)->profile.tmm[(id)].spin_lock_cnt = spin_lock_cnt; \ - (p)->profile.tmm[(id)].spin_lock_wait_ticks = spin_lock_wait_ticks; \ - (p)->profile.tmm[(id)].spin_lock_contention = spin_lock_contention; \ - (p)->profile.tmm[(id)].rww_lock_cnt = rww_lock_cnt; \ - (p)->profile.tmm[(id)].rww_lock_wait_ticks = rww_lock_wait_ticks; \ - (p)->profile.tmm[(id)].rww_lock_contention = rww_lock_contention; \ - (p)->profile.tmm[(id)].rwr_lock_cnt = rwr_lock_cnt; \ - (p)->profile.tmm[(id)].rwr_lock_wait_ticks = rwr_lock_wait_ticks; \ - (p)->profile.tmm[(id)].rwr_lock_contention = rwr_lock_contention; \ - record_locks = 0;\ - SCProfilingAddPacketLocks((p)); \ + (p)->profile->tmm[(id)].mutex_lock_cnt = mutex_lock_cnt; \ + (p)->profile->tmm[(id)].mutex_lock_wait_ticks = mutex_lock_wait_ticks; \ + (p)->profile->tmm[(id)].mutex_lock_contention = mutex_lock_contention; \ + (p)->profile->tmm[(id)].spin_lock_cnt = spin_lock_cnt; \ + (p)->profile->tmm[(id)].spin_lock_wait_ticks = spin_lock_wait_ticks; \ + (p)->profile->tmm[(id)].spin_lock_contention = spin_lock_contention; \ + (p)->profile->tmm[(id)].rww_lock_cnt = rww_lock_cnt; \ + (p)->profile->tmm[(id)].rww_lock_wait_ticks = rww_lock_wait_ticks; \ + (p)->profile->tmm[(id)].rww_lock_contention = rww_lock_contention; \ + (p)->profile->tmm[(id)].rwr_lock_cnt = rwr_lock_cnt; \ + (p)->profile->tmm[(id)].rwr_lock_wait_ticks = rwr_lock_wait_ticks; \ + (p)->profile->tmm[(id)].rwr_lock_contention = rwr_lock_contention; \ + record_locks = 0; \ + SCProfilingAddPacketLocks((p)); \ } while(0) #else #define PACKET_PROFILING_RESET_LOCKS @@ -136,24 +140,25 @@ extern __thread int profiling_keyword_entered; #endif #define PACKET_PROFILING_TMM_START(p, id) \ - if (profiling_packets_enabled) { \ + if (profiling_packets_enabled && (p)->profile != NULL) { \ if ((id) < TMM_SIZE) { \ - (p)->profile.tmm[(id)].ticks_start = UtilCpuGetTicks(); \ + (p)->profile->tmm[(id)].ticks_start = UtilCpuGetTicks();\ PACKET_PROFILING_RESET_LOCKS; \ } \ } #define PACKET_PROFILING_TMM_END(p, id) \ - if (profiling_packets_enabled) { \ + if (profiling_packets_enabled && (p)->profile != NULL) { \ if ((id) < TMM_SIZE) { \ PACKET_PROFILING_COPY_LOCKS((p), (id)); \ - (p)->profile.tmm[(id)].ticks_end = UtilCpuGetTicks(); \ + (p)->profile->tmm[(id)].ticks_end = UtilCpuGetTicks(); \ } \ } #define PACKET_PROFILING_RESET(p) \ - if (profiling_packets_enabled) { \ - memset(&(p)->profile, 0x00, sizeof(PktProfiling)); \ + if (profiling_packets_enabled && (p)->profile != NULL) { \ + SCFree((p)->profile); \ + (p)->profile = NULL; \ } #define PACKET_PROFILING_APP_START(dp, id) \ @@ -197,28 +202,28 @@ extern __thread int profiling_keyword_entered; } #define PACKET_PROFILING_APP_STORE(dp, p) \ - if (profiling_packets_enabled) { \ + if (profiling_packets_enabled && (p)->profile != NULL) { \ if ((dp)->alproto < ALPROTO_MAX) { \ - (p)->profile.app[(dp)->alproto].ticks_spent += (dp)->ticks_spent; \ - (p)->profile.proto_detect += (dp)->proto_detect_ticks_spent; \ + (p)->profile->app[(dp)->alproto].ticks_spent += (dp)->ticks_spent; \ + (p)->profile->proto_detect += (dp)->proto_detect_ticks_spent; \ } \ } #define PACKET_PROFILING_DETECT_START(p, id) \ - if (profiling_packets_enabled) { \ + if (profiling_packets_enabled && (p)->profile != NULL) { \ if ((id) < PROF_DETECT_SIZE) { \ - (p)->profile.detect[(id)].ticks_start = UtilCpuGetTicks(); \ + (p)->profile->detect[(id)].ticks_start = UtilCpuGetTicks(); \ } \ } #define PACKET_PROFILING_DETECT_END(p, id) \ - if (profiling_packets_enabled) { \ + if (profiling_packets_enabled && (p)->profile != NULL) { \ if ((id) < PROF_DETECT_SIZE) { \ - (p)->profile.detect[(id)].ticks_end = UtilCpuGetTicks();\ - if ((p)->profile.detect[(id)].ticks_start != 0 && \ - (p)->profile.detect[(id)].ticks_start < (p)->profile.detect[(id)].ticks_end) { \ - (p)->profile.detect[(id)].ticks_spent += \ - ((p)->profile.detect[(id)].ticks_end - (p)->profile.detect[(id)].ticks_start); \ + (p)->profile->detect[(id)].ticks_end = UtilCpuGetTicks();\ + if ((p)->profile->detect[(id)].ticks_start != 0 && \ + (p)->profile->detect[(id)].ticks_start < (p)->profile->detect[(id)].ticks_end) { \ + (p)->profile->detect[(id)].ticks_spent += \ + ((p)->profile->detect[(id)].ticks_end - (p)->profile->detect[(id)].ticks_start); \ } \ } \ }