mirror of https://github.com/OISF/suricata
You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
415 lines
14 KiB
C
415 lines
14 KiB
C
/* Copyright (C) 2023 Open Information Security Foundation
|
|
*
|
|
* You can copy, redistribute or modify this Program under the terms of
|
|
* the GNU General Public License version 2 as published by the Free
|
|
* Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* version 2 along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
* 02110-1301, USA.
|
|
*/
|
|
|
|
/**
|
|
* \file
|
|
*
|
|
* \author Lukas Sismis <lsismis@oisf.net>
|
|
*/
|
|
|
|
#include "suricata.h"
|
|
#include "util-debug.h"
|
|
#include "util-hugepages.h"
|
|
#include "util-path.h"
|
|
|
|
static uint16_t SystemHugepageSizesCntPerNodeGet(uint16_t node_index);
|
|
static uint16_t SystemNodeCountGet(void);
|
|
static void SystemHugepagePerNodeGetHugepageSizes(
|
|
uint16_t node_index, uint16_t hp_sizes_cnt, uint32_t *hp_sizes);
|
|
static HugepageInfo *SystemHugepageHugepageInfoCreate(uint16_t hp_size_cnt);
|
|
static int16_t SystemHugepagePerNodeGetHugepageInfo(uint16_t node_index, NodeInfo *node);
|
|
static void SystemHugepageHugepageInfoDestroy(HugepageInfo *h);
|
|
static void SystemHugepageNodeInfoDestroy(NodeInfo *n);
|
|
static void SystemHugepageNodeInfoDump(NodeInfo *n);
|
|
static void SystemHugepageSnapshotDump(SystemHugepageSnapshot *s);
|
|
|
|
typedef enum OSHugepageAction_ {
|
|
OS_UNKNOWN, // unknown/unsupported OS
|
|
OS_LINUX_SYS_DEVICES,
|
|
} OSHugepageAction;
|
|
|
|
static OSHugepageAction SystemHugepageDetermineOS(void)
|
|
{
|
|
// try Linux
|
|
if (SCPathExists("/sys/devices/system/node/")) {
|
|
return OS_LINUX_SYS_DEVICES;
|
|
}
|
|
|
|
return OS_UNKNOWN;
|
|
}
|
|
|
|
static bool SystemHugepageSupported(void)
|
|
{
|
|
if (SystemHugepageDetermineOS() != OS_UNKNOWN)
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* \brief Linux-specific function to detect number of NUMA nodes on the system
|
|
* \returns number of NUMA nodes, 0 on error
|
|
*/
|
|
static uint16_t SystemNodeCountGetLinux(void)
|
|
{
|
|
char dir_path[] = "/sys/devices/system/node/";
|
|
DIR *dir = opendir(dir_path);
|
|
if (dir == NULL)
|
|
FatalError("unable to open %s", dir_path);
|
|
|
|
uint16_t count = 0;
|
|
struct dirent *entry;
|
|
while ((entry = readdir(dir)) != NULL) {
|
|
char d_name[] = "node";
|
|
if (SCIsRegularDirectory(entry) && strncmp(entry->d_name, d_name, strlen(d_name)) == 0)
|
|
count++;
|
|
}
|
|
closedir(dir);
|
|
return count;
|
|
}
|
|
|
|
/**
|
|
* \brief Linux-specific function to detect number of unique hugepage sizes
|
|
* \param[in] node_index index of the NUMA node
|
|
* \returns number of hugepage sizes, 0 on error
|
|
*/
|
|
static uint16_t SystemHugepageSizesCntPerNodeGetLinux(uint16_t node_index)
|
|
{
|
|
char dir_path[256];
|
|
snprintf(dir_path, sizeof(dir_path), "/sys/devices/system/node/node%d/hugepages/", node_index);
|
|
DIR *dir = opendir(dir_path);
|
|
if (dir == NULL) {
|
|
SCLogInfo("unable to open %s", dir_path);
|
|
return 0;
|
|
}
|
|
|
|
uint16_t count = 0;
|
|
struct dirent *entry;
|
|
while ((entry = readdir(dir)) != NULL) {
|
|
char d_name[] = "hugepages-";
|
|
if (SCIsRegularDirectory(entry) && strncmp(entry->d_name, d_name, strlen(d_name)) == 0)
|
|
count++;
|
|
}
|
|
closedir(dir);
|
|
return count;
|
|
}
|
|
|
|
/**
|
|
* \brief Linux-specific function to detect unique hugepage sizes
|
|
* \note Arrays `hugepages` and `hp_sizes` are expected to have the same size
|
|
* \param[in] node_index index of the NUMA node
|
|
* \param[in] hp_sizes_cnt number of the unique hugepage sizes
|
|
* \param[out] hp_sizes a pointer to the array of hugepage sizes
|
|
*/
|
|
static void SystemHugepagePerNodeGetHugepageSizesLinux(
|
|
uint16_t node_index, uint16_t hp_sizes_cnt, uint32_t *hp_sizes)
|
|
{
|
|
char dir_path[256];
|
|
snprintf(dir_path, sizeof(dir_path), "/sys/devices/system/node/node%d/hugepages/", node_index);
|
|
DIR *dir = opendir(dir_path);
|
|
if (dir == NULL)
|
|
FatalError("unable to open %s", dir_path);
|
|
|
|
uint16_t index = 0;
|
|
struct dirent *entry;
|
|
while ((entry = readdir(dir)) != NULL) {
|
|
if (SCIsRegularDirectory(entry) && strncmp(entry->d_name, "hugepages-", 10) == 0) {
|
|
sscanf(entry->d_name, "hugepages-%ukB", &(hp_sizes[index]));
|
|
index++;
|
|
}
|
|
}
|
|
closedir(dir);
|
|
}
|
|
|
|
/**
|
|
* \brief Linux-specific function to detect number of unique hugepage sizes
|
|
* \note Arrays `hugepages` and `hp_sizes` are expected to have the same size
|
|
* \param[out] hugepages a pointer to the array of hugepage info structures
|
|
* \param[in] hp_sizes a pointer to the array of hugepage sizes
|
|
* \param[in] hp_sizes_cnt number of hugepage sizes
|
|
* \param[in] node_index index of the NUMA node
|
|
* \returns 0 on success, negative number on error
|
|
*/
|
|
static int16_t SystemHugepagePerNodeGetHugepageInfoLinux(
|
|
HugepageInfo *hugepages, uint32_t *hp_sizes, uint16_t hp_sizes_cnt, uint16_t node_index)
|
|
{
|
|
for (int16_t i = 0; i < hp_sizes_cnt; i++) {
|
|
hugepages[i].size_kb = hp_sizes[i];
|
|
char path[256];
|
|
snprintf(path, sizeof(path),
|
|
"/sys/devices/system/node/node%hu/hugepages/hugepages-%ukB/nr_hugepages",
|
|
node_index, hp_sizes[i]);
|
|
FILE *f = fopen(path, "r");
|
|
if (!f) {
|
|
SCLogInfo("unable to open %s", path);
|
|
return -SC_ENOENT;
|
|
}
|
|
if (fscanf(f, "%hu", &hugepages[i].allocated) != 1) {
|
|
SCLogInfo("failed to read the total number of allocated hugepages (%ukB) on node %hu",
|
|
hp_sizes[i], node_index);
|
|
fclose(f);
|
|
return -SC_EINVAL;
|
|
}
|
|
fclose(f);
|
|
|
|
snprintf(path, sizeof(path),
|
|
"/sys/devices/system/node/node%hu/hugepages/hugepages-%ukB/free_hugepages",
|
|
node_index, hp_sizes[i]);
|
|
f = fopen(path, "r");
|
|
if (!f) {
|
|
SCLogInfo("unable to open %s", path);
|
|
return -SC_ENOENT;
|
|
}
|
|
if (fscanf(f, "%hu", &hugepages[i].free) != 1) {
|
|
SCLogInfo("failed to read the total number of free hugepages (%ukB) on node %hu",
|
|
hp_sizes[i], node_index);
|
|
fclose(f);
|
|
return -SC_EINVAL;
|
|
}
|
|
fclose(f);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* \brief The function gathers information about hugepages on a given node
|
|
* \param[in] node_index index of the NUMA node
|
|
* \param[out] node a pointer to the structure to hold hugepage info
|
|
* \returns 0 on success, negative number on error
|
|
*/
|
|
static int16_t SystemHugepagePerNodeGetHugepageInfo(uint16_t node_index, NodeInfo *node)
|
|
{
|
|
uint16_t hp_sizes_cnt = SystemHugepageSizesCntPerNodeGet(node_index);
|
|
if (hp_sizes_cnt == 0) {
|
|
SCLogInfo("hugepages not found for node %d", node_index);
|
|
return -SC_ENOENT;
|
|
}
|
|
uint32_t *hp_sizes = SCCalloc(hp_sizes_cnt, sizeof(*hp_sizes));
|
|
if (hp_sizes == NULL) {
|
|
FatalError("failed to allocate memory for hugepage info");
|
|
}
|
|
SystemHugepagePerNodeGetHugepageSizes(node_index, hp_sizes_cnt, hp_sizes);
|
|
|
|
node->hugepages = SystemHugepageHugepageInfoCreate(hp_sizes_cnt);
|
|
node->num_hugepage_sizes = hp_sizes_cnt;
|
|
|
|
int16_t ret = 0;
|
|
if (SystemHugepageDetermineOS() == OS_LINUX_SYS_DEVICES)
|
|
ret = SystemHugepagePerNodeGetHugepageInfoLinux(
|
|
node->hugepages, hp_sizes, node->num_hugepage_sizes, node_index);
|
|
|
|
SCFree(hp_sizes);
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* \brief The function detects number of NUMA nodes on the system
|
|
* \returns 0 if detection is unsuccessful, otherwise number of detected nodes
|
|
*/
|
|
static uint16_t SystemNodeCountGet(void)
|
|
{
|
|
if (SystemHugepageDetermineOS() == OS_LINUX_SYS_DEVICES)
|
|
return SystemNodeCountGetLinux();
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* \brief The function detects the number of unique hugepage sizes
|
|
* \returns 0 if detection is unsuccessful, otherwise number of hugepage sizes
|
|
*/
|
|
static uint16_t SystemHugepageSizesCntPerNodeGet(uint16_t node_index)
|
|
{
|
|
if (SystemHugepageDetermineOS() == OS_LINUX_SYS_DEVICES)
|
|
return SystemHugepageSizesCntPerNodeGetLinux(node_index);
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* \brief The function fills an array with unique hugepage sizes
|
|
* \note Arrays `hugepages` and `hp_sizes` are expected to have the same size
|
|
* \param[in] node_index index of the NUMA node
|
|
* \param[in] hp_sizes_cnt number of hugepage sizes
|
|
* \param[out] hp_sizes a pointer to the array of hugepage sizes
|
|
*/
|
|
static void SystemHugepagePerNodeGetHugepageSizes(
|
|
uint16_t node_index, uint16_t hp_sizes_cnt, uint32_t *hp_sizes)
|
|
{
|
|
if (SystemHugepageDetermineOS() == OS_LINUX_SYS_DEVICES)
|
|
SystemHugepagePerNodeGetHugepageSizesLinux(node_index, hp_sizes_cnt, hp_sizes);
|
|
}
|
|
|
|
static HugepageInfo *SystemHugepageHugepageInfoCreate(uint16_t hp_size_cnt)
|
|
{
|
|
HugepageInfo *h = SCCalloc(hp_size_cnt, sizeof(*h));
|
|
if (h == NULL) {
|
|
FatalError("failed to allocate hugepage info array");
|
|
}
|
|
return h;
|
|
}
|
|
|
|
static void SystemHugepageHugepageInfoDestroy(HugepageInfo *h)
|
|
{
|
|
if (h != NULL)
|
|
SCFree(h);
|
|
}
|
|
|
|
static void SystemHugepageNodeInfoDestroy(NodeInfo *n)
|
|
{
|
|
if (n == NULL)
|
|
return;
|
|
|
|
SystemHugepageHugepageInfoDestroy(n->hugepages);
|
|
}
|
|
|
|
static void SystemHugepageNodeInfoDump(NodeInfo *n)
|
|
{
|
|
if (n == NULL)
|
|
return;
|
|
|
|
for (uint16_t i = 0; i < n->num_hugepage_sizes; i++) {
|
|
SCLogDebug("Hugepage size - %dkB - allocated: %d free: %d", n->hugepages[i].size_kb,
|
|
n->hugepages[i].allocated, n->hugepages[i].free);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* \brief The function prints out the hugepage snapshot
|
|
* \param[in] s a pointer to the snapshot
|
|
*/
|
|
static void SystemHugepageSnapshotDump(SystemHugepageSnapshot *s)
|
|
{
|
|
if (s == NULL)
|
|
return;
|
|
|
|
for (uint16_t i = 0; i < s->num_nodes; i++) {
|
|
SCLogDebug("NUMA Node %d", i);
|
|
SystemHugepageNodeInfoDump(&(s->nodes[i]));
|
|
}
|
|
}
|
|
|
|
void SystemHugepageSnapshotDestroy(SystemHugepageSnapshot *s)
|
|
{
|
|
if (s == NULL)
|
|
return;
|
|
|
|
for (uint16_t i = 0; i < s->num_nodes; i++) {
|
|
SystemHugepageNodeInfoDestroy(&(s->nodes[i]));
|
|
}
|
|
SCFree(s->nodes);
|
|
SCFree(s);
|
|
}
|
|
|
|
/**
|
|
* \brief The function creates a snapshot of the system's hugepage usage
|
|
* per NUMA node and per hugepage size.
|
|
* The snapshot is used to evaluate the system's hugepage usage after
|
|
* initialization of Suricata.
|
|
* \returns a pointer to the snapshot, NULL on error
|
|
*/
|
|
SystemHugepageSnapshot *SystemHugepageSnapshotCreate(void)
|
|
{
|
|
if (!SystemHugepageSupported())
|
|
return NULL;
|
|
|
|
uint16_t node_cnt = SystemNodeCountGet();
|
|
if (node_cnt == 0) {
|
|
SCLogInfo("hugepage snapshot failed - cannot obtain number of NUMA nodes in the system");
|
|
return NULL;
|
|
}
|
|
NodeInfo *nodes = SCCalloc(node_cnt, sizeof(*nodes));
|
|
if (nodes == NULL) {
|
|
FatalError("failed to allocate memory for NUMA node info");
|
|
}
|
|
|
|
SystemHugepageSnapshot *s = SCCalloc(1, sizeof(*s));
|
|
if (s == NULL) {
|
|
SCFree(nodes);
|
|
FatalError("failed to allocate memory for NUMA node snapshot");
|
|
}
|
|
s->num_nodes = node_cnt;
|
|
s->nodes = nodes;
|
|
|
|
for (uint16_t i = 0; i < s->num_nodes; i++) {
|
|
int16_t ret = SystemHugepagePerNodeGetHugepageInfo(i, &s->nodes[i]);
|
|
if (ret != 0) {
|
|
SystemHugepageSnapshotDestroy(s);
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
return s;
|
|
}
|
|
|
|
/**
|
|
* \brief The function compares two hugepage snapshots and prints out
|
|
* recommendations for hugepage configuration
|
|
* \param[in] pre_s a pointer to the snapshot taken before Suricata initialization
|
|
* \param[in] post_s a pointer to the snapshot taken after Suricata initialization
|
|
*/
|
|
void SystemHugepageEvaluateHugepages(SystemHugepageSnapshot *pre_s, SystemHugepageSnapshot *post_s)
|
|
{
|
|
if (!SystemHugepageSupported() || pre_s == NULL || post_s == NULL)
|
|
return;
|
|
|
|
SCLogDebug("Hugepages before initialization");
|
|
SystemHugepageSnapshotDump(pre_s);
|
|
|
|
SCLogDebug("Hugepages after initialization");
|
|
SystemHugepageSnapshotDump(post_s);
|
|
|
|
if (pre_s->num_nodes != post_s->num_nodes)
|
|
FatalError("Number of NUMA nodes changed during hugepage evaluation");
|
|
|
|
for (int32_t i = 0; i < post_s->num_nodes; i++) {
|
|
if (pre_s->nodes[i].num_hugepage_sizes != post_s->nodes[i].num_hugepage_sizes)
|
|
FatalError("Number of NUMA node hugepage sizes changed during hugepage evaluation");
|
|
|
|
for (int32_t j = 0; j < post_s->nodes->num_hugepage_sizes; j++) {
|
|
HugepageInfo *prerun_hp = &pre_s->nodes[i].hugepages[j];
|
|
HugepageInfo *postrun_hp = &post_s->nodes[i].hugepages[j];
|
|
|
|
if (prerun_hp->free == 0) {
|
|
continue; // this HP size on this node has no HPs allocated
|
|
} else if (prerun_hp->free < postrun_hp->free) {
|
|
SCLogWarning(
|
|
"Hugepage usage decreased while it should only increase/stay the same");
|
|
} else if (prerun_hp->free > 0 && prerun_hp->free == postrun_hp->free) {
|
|
SCLogPerf("%ukB hugepages on NUMA node %u are unused and can be deallocated",
|
|
postrun_hp->size_kb, i);
|
|
} else { // assumes this is an active NUMA node because at least some hugepages were
|
|
// used
|
|
// speculative hint only for 2048kB pages as e.g. 1 GB pages can leave a lot of room
|
|
// for additional allocations
|
|
if (postrun_hp->size_kb == 2048 && postrun_hp->free == 0) {
|
|
SCLogPerf("all %ukB hugepages used on NUMA node %d - consider increasing to "
|
|
"prevent memory allocation from other NUMA nodes",
|
|
postrun_hp->size_kb, i);
|
|
}
|
|
|
|
float free_hugepages_ratio = (float)postrun_hp->free / (float)prerun_hp->free;
|
|
if (free_hugepages_ratio > 0.5) {
|
|
int32_t used_hps = prerun_hp->free - postrun_hp->free;
|
|
SCLogPerf("Hugepages on NUMA node %u can be set to %.0lf (only using %u/%u "
|
|
"%ukB hugepages)",
|
|
i, ceil((prerun_hp->free - postrun_hp->free) * 1.15), used_hps,
|
|
prerun_hp->free, postrun_hp->size_kb);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|