You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
suricata/src/util-hugepages.c

415 lines
14 KiB
C

/* Copyright (C) 2023 Open Information Security Foundation
*
* You can copy, redistribute or modify this Program under the terms of
* the GNU General Public License version 2 as published by the Free
* Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
/**
* \file
*
* \author Lukas Sismis <lsismis@oisf.net>
*/
#include "suricata.h"
#include "util-debug.h"
#include "util-hugepages.h"
#include "util-path.h"
static uint16_t SystemHugepageSizesCntPerNodeGet(uint16_t node_index);
static uint16_t SystemNodeCountGet(void);
static void SystemHugepagePerNodeGetHugepageSizes(
uint16_t node_index, uint16_t hp_sizes_cnt, uint32_t *hp_sizes);
static HugepageInfo *SystemHugepageHugepageInfoCreate(uint16_t hp_size_cnt);
static int16_t SystemHugepagePerNodeGetHugepageInfo(uint16_t node_index, NodeInfo *node);
static void SystemHugepageHugepageInfoDestroy(HugepageInfo *h);
static void SystemHugepageNodeInfoDestroy(NodeInfo *n);
static void SystemHugepageNodeInfoDump(NodeInfo *n);
static void SystemHugepageSnapshotDump(SystemHugepageSnapshot *s);
typedef enum OSHugepageAction_ {
OS_UNKNOWN, // unknown/unsupported OS
OS_LINUX_SYS_DEVICES,
} OSHugepageAction;
static OSHugepageAction SystemHugepageDetermineOS(void)
{
// try Linux
if (SCPathExists("/sys/devices/system/node/")) {
return OS_LINUX_SYS_DEVICES;
}
return OS_UNKNOWN;
}
static bool SystemHugepageSupported(void)
{
if (SystemHugepageDetermineOS() != OS_UNKNOWN)
return true;
return false;
}
/**
* \brief Linux-specific function to detect number of NUMA nodes on the system
* \returns number of NUMA nodes, 0 on error
*/
static uint16_t SystemNodeCountGetLinux(void)
{
char dir_path[] = "/sys/devices/system/node/";
DIR *dir = opendir(dir_path);
if (dir == NULL)
FatalError("unable to open %s", dir_path);
uint16_t count = 0;
struct dirent *entry;
while ((entry = readdir(dir)) != NULL) {
char d_name[] = "node";
if (SCIsRegularDirectory(entry) && strncmp(entry->d_name, d_name, strlen(d_name)) == 0)
count++;
}
closedir(dir);
return count;
}
/**
* \brief Linux-specific function to detect number of unique hugepage sizes
* \param[in] node_index index of the NUMA node
* \returns number of hugepage sizes, 0 on error
*/
static uint16_t SystemHugepageSizesCntPerNodeGetLinux(uint16_t node_index)
{
char dir_path[256];
snprintf(dir_path, sizeof(dir_path), "/sys/devices/system/node/node%d/hugepages/", node_index);
DIR *dir = opendir(dir_path);
if (dir == NULL) {
SCLogInfo("unable to open %s", dir_path);
return 0;
}
uint16_t count = 0;
struct dirent *entry;
while ((entry = readdir(dir)) != NULL) {
char d_name[] = "hugepages-";
if (SCIsRegularDirectory(entry) && strncmp(entry->d_name, d_name, strlen(d_name)) == 0)
count++;
}
closedir(dir);
return count;
}
/**
* \brief Linux-specific function to detect unique hugepage sizes
* \note Arrays `hugepages` and `hp_sizes` are expected to have the same size
* \param[in] node_index index of the NUMA node
* \param[in] hp_sizes_cnt number of the unique hugepage sizes
* \param[out] hp_sizes a pointer to the array of hugepage sizes
*/
static void SystemHugepagePerNodeGetHugepageSizesLinux(
uint16_t node_index, uint16_t hp_sizes_cnt, uint32_t *hp_sizes)
{
char dir_path[256];
snprintf(dir_path, sizeof(dir_path), "/sys/devices/system/node/node%d/hugepages/", node_index);
DIR *dir = opendir(dir_path);
if (dir == NULL)
FatalError("unable to open %s", dir_path);
uint16_t index = 0;
struct dirent *entry;
while ((entry = readdir(dir)) != NULL) {
if (SCIsRegularDirectory(entry) && strncmp(entry->d_name, "hugepages-", 10) == 0) {
sscanf(entry->d_name, "hugepages-%ukB", &(hp_sizes[index]));
index++;
}
}
closedir(dir);
}
/**
* \brief Linux-specific function to detect number of unique hugepage sizes
* \note Arrays `hugepages` and `hp_sizes` are expected to have the same size
* \param[out] hugepages a pointer to the array of hugepage info structures
* \param[in] hp_sizes a pointer to the array of hugepage sizes
* \param[in] hp_sizes_cnt number of hugepage sizes
* \param[in] node_index index of the NUMA node
* \returns 0 on success, negative number on error
*/
static int16_t SystemHugepagePerNodeGetHugepageInfoLinux(
HugepageInfo *hugepages, uint32_t *hp_sizes, uint16_t hp_sizes_cnt, uint16_t node_index)
{
for (int16_t i = 0; i < hp_sizes_cnt; i++) {
hugepages[i].size_kb = hp_sizes[i];
char path[256];
snprintf(path, sizeof(path),
"/sys/devices/system/node/node%hu/hugepages/hugepages-%ukB/nr_hugepages",
node_index, hp_sizes[i]);
FILE *f = fopen(path, "r");
if (!f) {
SCLogInfo("unable to open %s", path);
return -SC_ENOENT;
}
if (fscanf(f, "%hu", &hugepages[i].allocated) != 1) {
SCLogInfo("failed to read the total number of allocated hugepages (%ukB) on node %hu",
hp_sizes[i], node_index);
fclose(f);
return -SC_EINVAL;
}
fclose(f);
snprintf(path, sizeof(path),
"/sys/devices/system/node/node%hu/hugepages/hugepages-%ukB/free_hugepages",
node_index, hp_sizes[i]);
f = fopen(path, "r");
if (!f) {
SCLogInfo("unable to open %s", path);
return -SC_ENOENT;
}
if (fscanf(f, "%hu", &hugepages[i].free) != 1) {
SCLogInfo("failed to read the total number of free hugepages (%ukB) on node %hu",
hp_sizes[i], node_index);
fclose(f);
return -SC_EINVAL;
}
fclose(f);
}
return 0;
}
/**
* \brief The function gathers information about hugepages on a given node
* \param[in] node_index index of the NUMA node
* \param[out] node a pointer to the structure to hold hugepage info
* \returns 0 on success, negative number on error
*/
static int16_t SystemHugepagePerNodeGetHugepageInfo(uint16_t node_index, NodeInfo *node)
{
uint16_t hp_sizes_cnt = SystemHugepageSizesCntPerNodeGet(node_index);
if (hp_sizes_cnt == 0) {
SCLogInfo("hugepages not found for node %d", node_index);
return -SC_ENOENT;
}
uint32_t *hp_sizes = SCCalloc(hp_sizes_cnt, sizeof(*hp_sizes));
if (hp_sizes == NULL) {
FatalError("failed to allocate memory for hugepage info");
}
SystemHugepagePerNodeGetHugepageSizes(node_index, hp_sizes_cnt, hp_sizes);
node->hugepages = SystemHugepageHugepageInfoCreate(hp_sizes_cnt);
node->num_hugepage_sizes = hp_sizes_cnt;
int16_t ret = 0;
if (SystemHugepageDetermineOS() == OS_LINUX_SYS_DEVICES)
ret = SystemHugepagePerNodeGetHugepageInfoLinux(
node->hugepages, hp_sizes, node->num_hugepage_sizes, node_index);
SCFree(hp_sizes);
return ret;
}
/**
* \brief The function detects number of NUMA nodes on the system
* \returns 0 if detection is unsuccessful, otherwise number of detected nodes
*/
static uint16_t SystemNodeCountGet(void)
{
if (SystemHugepageDetermineOS() == OS_LINUX_SYS_DEVICES)
return SystemNodeCountGetLinux();
return 0;
}
/**
* \brief The function detects the number of unique hugepage sizes
* \returns 0 if detection is unsuccessful, otherwise number of hugepage sizes
*/
static uint16_t SystemHugepageSizesCntPerNodeGet(uint16_t node_index)
{
if (SystemHugepageDetermineOS() == OS_LINUX_SYS_DEVICES)
return SystemHugepageSizesCntPerNodeGetLinux(node_index);
return 0;
}
/**
* \brief The function fills an array with unique hugepage sizes
* \note Arrays `hugepages` and `hp_sizes` are expected to have the same size
* \param[in] node_index index of the NUMA node
* \param[in] hp_sizes_cnt number of hugepage sizes
* \param[out] hp_sizes a pointer to the array of hugepage sizes
*/
static void SystemHugepagePerNodeGetHugepageSizes(
uint16_t node_index, uint16_t hp_sizes_cnt, uint32_t *hp_sizes)
{
if (SystemHugepageDetermineOS() == OS_LINUX_SYS_DEVICES)
SystemHugepagePerNodeGetHugepageSizesLinux(node_index, hp_sizes_cnt, hp_sizes);
}
static HugepageInfo *SystemHugepageHugepageInfoCreate(uint16_t hp_size_cnt)
{
HugepageInfo *h = SCCalloc(hp_size_cnt, sizeof(*h));
if (h == NULL) {
FatalError("failed to allocate hugepage info array");
}
return h;
}
static void SystemHugepageHugepageInfoDestroy(HugepageInfo *h)
{
if (h != NULL)
SCFree(h);
}
static void SystemHugepageNodeInfoDestroy(NodeInfo *n)
{
if (n == NULL)
return;
SystemHugepageHugepageInfoDestroy(n->hugepages);
}
static void SystemHugepageNodeInfoDump(NodeInfo *n)
{
if (n == NULL)
return;
for (uint16_t i = 0; i < n->num_hugepage_sizes; i++) {
SCLogDebug("Hugepage size - %dkB - allocated: %d free: %d", n->hugepages[i].size_kb,
n->hugepages[i].allocated, n->hugepages[i].free);
}
}
/**
* \brief The function prints out the hugepage snapshot
* \param[in] s a pointer to the snapshot
*/
static void SystemHugepageSnapshotDump(SystemHugepageSnapshot *s)
{
if (s == NULL)
return;
for (uint16_t i = 0; i < s->num_nodes; i++) {
SCLogDebug("NUMA Node %d", i);
SystemHugepageNodeInfoDump(&(s->nodes[i]));
}
}
void SystemHugepageSnapshotDestroy(SystemHugepageSnapshot *s)
{
if (s == NULL)
return;
for (uint16_t i = 0; i < s->num_nodes; i++) {
SystemHugepageNodeInfoDestroy(&(s->nodes[i]));
}
SCFree(s->nodes);
SCFree(s);
}
/**
* \brief The function creates a snapshot of the system's hugepage usage
* per NUMA node and per hugepage size.
* The snapshot is used to evaluate the system's hugepage usage after
* initialization of Suricata.
* \returns a pointer to the snapshot, NULL on error
*/
SystemHugepageSnapshot *SystemHugepageSnapshotCreate(void)
{
if (!SystemHugepageSupported())
return NULL;
uint16_t node_cnt = SystemNodeCountGet();
if (node_cnt == 0) {
SCLogInfo("hugepage snapshot failed - cannot obtain number of NUMA nodes in the system");
return NULL;
}
NodeInfo *nodes = SCCalloc(node_cnt, sizeof(*nodes));
if (nodes == NULL) {
FatalError("failed to allocate memory for NUMA node info");
}
SystemHugepageSnapshot *s = SCCalloc(1, sizeof(*s));
if (s == NULL) {
SCFree(nodes);
FatalError("failed to allocate memory for NUMA node snapshot");
}
s->num_nodes = node_cnt;
s->nodes = nodes;
for (uint16_t i = 0; i < s->num_nodes; i++) {
int16_t ret = SystemHugepagePerNodeGetHugepageInfo(i, &s->nodes[i]);
if (ret != 0) {
SystemHugepageSnapshotDestroy(s);
return NULL;
}
}
return s;
}
/**
* \brief The function compares two hugepage snapshots and prints out
* recommendations for hugepage configuration
* \param[in] pre_s a pointer to the snapshot taken before Suricata initialization
* \param[in] post_s a pointer to the snapshot taken after Suricata initialization
*/
void SystemHugepageEvaluateHugepages(SystemHugepageSnapshot *pre_s, SystemHugepageSnapshot *post_s)
{
if (!SystemHugepageSupported() || pre_s == NULL || post_s == NULL)
return;
SCLogDebug("Hugepages before initialization");
SystemHugepageSnapshotDump(pre_s);
SCLogDebug("Hugepages after initialization");
SystemHugepageSnapshotDump(post_s);
if (pre_s->num_nodes != post_s->num_nodes)
FatalError("Number of NUMA nodes changed during hugepage evaluation");
for (int32_t i = 0; i < post_s->num_nodes; i++) {
if (pre_s->nodes[i].num_hugepage_sizes != post_s->nodes[i].num_hugepage_sizes)
FatalError("Number of NUMA node hugepage sizes changed during hugepage evaluation");
for (int32_t j = 0; j < post_s->nodes->num_hugepage_sizes; j++) {
HugepageInfo *prerun_hp = &pre_s->nodes[i].hugepages[j];
HugepageInfo *postrun_hp = &post_s->nodes[i].hugepages[j];
if (prerun_hp->free == 0) {
continue; // this HP size on this node has no HPs allocated
} else if (prerun_hp->free < postrun_hp->free) {
SCLogWarning(
"Hugepage usage decreased while it should only increase/stay the same");
} else if (prerun_hp->free > 0 && prerun_hp->free == postrun_hp->free) {
SCLogPerf("%ukB hugepages on NUMA node %u are unused and can be deallocated",
postrun_hp->size_kb, i);
} else { // assumes this is an active NUMA node because at least some hugepages were
// used
// speculative hint only for 2048kB pages as e.g. 1 GB pages can leave a lot of room
// for additional allocations
if (postrun_hp->size_kb == 2048 && postrun_hp->free == 0) {
SCLogPerf("all %ukB hugepages used on NUMA node %d - consider increasing to "
"prevent memory allocation from other NUMA nodes",
postrun_hp->size_kb, i);
}
float free_hugepages_ratio = (float)postrun_hp->free / (float)prerun_hp->free;
if (free_hugepages_ratio > 0.5) {
int32_t used_hps = prerun_hp->free - postrun_hp->free;
SCLogPerf("Hugepages on NUMA node %u can be set to %.0lf (only using %u/%u "
"%ukB hugepages)",
i, ceil((prerun_hp->free - postrun_hp->free) * 1.15), used_hps,
prerun_hp->free, postrun_hp->size_kb);
}
}
}
}
}