dpdk: check for link up before full startup

ICE card (Intel E810) was not receiving packets immediatelly
after startup, Suricata workers would act as processing while
it was not. This eliminates the problem by only continuing
in the initialization if the link is already up.

The setting can be turned off manually from the configuraiton
file.

Ticket: 7381
pull/12888/head
Lukas Sismis 9 months ago committed by Victor Julien
parent 31fbfc322c
commit 640d0985c2

@ -214,3 +214,20 @@ Individual properties can still be set manually if needed.
``tx-descriptors`` in the TAP/IPS modes. Instead it can be set to
a fixed value (e.g. 16384).
.. _dpdk-link-state-change-timeout:
Link State Change timeout
-------------------------
The `linkup-timeout` YAML configuration option allows the user to set a timeout
period to wait until the interface's link is detected. This ensures that
Suricata does not start processing packets until the link is up. This option is
particularly useful for Intel E810 (Ice) NICs, which begin receiving packets
only after a few seconds have passed since the interface started. In such cases,
if this check is disabled, Suricata reports as started but only begins
processing packets after a few seconds. This issue has not been observed with
other cards.
Setting the value to 0 causes Suricata to skip the link check.
If the interface's link remains down after the timeout period, Suricata warns
the user but continues with the engine initialization.

@ -2183,6 +2183,7 @@ The whole DPDK configuration resides in the `dpdk:` node. This node encapsulates
multicast: true
checksum-checks: true
checksum-checks-offload: true
linkup-timeout: 10
mtu: 1500
mempool-size: auto
mempool-cache-size: auto

@ -134,6 +134,13 @@ Major changes
- DPDK interface mempools are now allocated per thread instead of per port. This
change improves performance and should not be visible from the user
configuration perspective.
- DPDK supports link state check, allowing Suricata to start only when the link
is up. This is especially useful for Intel E810 (ice) NICs as they need
a few seconds before they are ready to receive packets. With this check
disabled, Suricata reports as started but only begins processing packets
after the previously mentioned interval. Other cards were not observed to have
this issue. This feature is disabled by default.
See :ref:`dpdk-link-state-change-timeout`.
Removals
~~~~~~~~

@ -119,6 +119,7 @@ static void DPDKDerefConfig(void *conf);
#define DPDK_CONFIG_DEFAULT_CHECKSUM_VALIDATION 1
#define DPDK_CONFIG_DEFAULT_CHECKSUM_VALIDATION_OFFLOAD 1
#define DPDK_CONFIG_DEFAULT_VLAN_STRIP 0
#define DPDK_CONFIG_DEFAULT_LINKUP_TIMEOUT 0
#define DPDK_CONFIG_DEFAULT_COPY_MODE "none"
#define DPDK_CONFIG_DEFAULT_COPY_INTERFACE "none"
@ -132,6 +133,7 @@ DPDKIfaceConfigAttributes dpdk_yaml = {
.mtu = "mtu",
.vlan_strip_offload = "vlan-strip-offload",
.rss_hf = "rss-hash-functions",
.linkup_timeout = "linkup-timeout",
.mempool_size = "mempool-size",
.mempool_cache_size = "mempool-cache-size",
.rx_descriptors = "rx-descriptors",
@ -690,6 +692,19 @@ static int ConfigSetMtu(DPDKIfaceConfig *iconf, intmax_t entry_int)
SCReturnInt(0);
}
static int ConfigSetLinkupTimeout(DPDKIfaceConfig *iconf, intmax_t entry_int)
{
SCEnter();
if (entry_int < 0) {
SCLogError("%s: Link-up waiting timeout needs to be a positive number or 0 to disable",
iconf->iface);
SCReturnInt(-ERANGE);
}
iconf->linkup_timeout = entry_int;
SCReturnInt(0);
}
static bool ConfigSetPromiscuousMode(DPDKIfaceConfig *iconf, int entry_bool)
{
SCEnter();
@ -946,6 +961,13 @@ static int ConfigLoad(DPDKIfaceConfig *iconf, const char *iface)
ConfigSetVlanStrip(iconf, entry_bool);
}
retval = ConfGetChildValueIntWithDefault(
if_root, if_default, dpdk_yaml.linkup_timeout, &entry_int) != 1
? ConfigSetLinkupTimeout(iconf, DPDK_CONFIG_DEFAULT_LINKUP_TIMEOUT)
: ConfigSetLinkupTimeout(iconf, entry_int);
if (retval < 0)
SCReturnInt(retval);
retval = ConfGetChildValueWithDefault(if_root, if_default, dpdk_yaml.copy_mode, &copy_mode_str);
if (retval != 1) {
copy_mode_str = DPDK_CONFIG_DEFAULT_COPY_MODE;

@ -33,6 +33,7 @@ typedef struct DPDKIfaceConfigAttributes_ {
const char *mtu;
const char *vlan_strip_offload;
const char *rss_hf;
const char *linkup_timeout;
const char *mempool_size;
const char *mempool_cache_size;
const char *rx_descriptors;

@ -638,6 +638,45 @@ static TmEcode ReceiveDPDKThreadInit(ThreadVars *tv, const void *initdata, void
goto fail;
}
uint32_t timeout = dpdk_config->linkup_timeout * 10;
while (timeout > 0) {
struct rte_eth_link link = { 0 };
retval = rte_eth_link_get_nowait(ptv->port_id, &link);
if (retval != 0) {
if (retval == -ENOTSUP) {
SCLogInfo("%s: link status not supported, skipping", dpdk_config->iface);
} else {
SCLogInfo("%s: error (%s) when getting link status, skipping",
dpdk_config->iface, rte_strerror(-retval));
}
break;
}
if (link.link_status) {
char link_status_str[RTE_ETH_LINK_MAX_STR_LEN];
#if RTE_VERSION >= RTE_VERSION_NUM(20, 11, 0, 0)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
rte_eth_link_to_str(link_status_str, sizeof(link_status_str), &link);
#pragma GCC diagnostic pop
#else
snprintf(link_status_str, sizeof(link_status_str),
"Link Up, speed %u Mbps, %s", // 22 chars + 10 for digits + 11 for duplex
link.link_speed,
(link.link_duplex == ETH_LINK_FULL_DUPLEX) ? "full-duplex" : "half-duplex");
#endif
SCLogInfo("%s: %s", dpdk_config->iface, link_status_str);
break;
}
rte_delay_ms(100);
timeout--;
}
if (dpdk_config->linkup_timeout && timeout == 0) {
SCLogWarning("%s: link is down, trying to continue anyway", dpdk_config->iface);
}
// some PMDs requires additional actions only after the device has started
DevicePostStartPMDSpecificActions(ptv, dev_info.driver_name);

@ -75,6 +75,7 @@ typedef struct DPDKIfaceConfig_ {
uint32_t mempool_size;
uint32_t mempool_cache_size;
DPDKDeviceResources *pkt_mempools;
uint16_t linkup_timeout; // in seconds how long to wait for link to come up
SC_ATOMIC_DECLARE(unsigned int, ref);
/* threads bind queue id one by one */
SC_ATOMIC_DECLARE(uint16_t, queue_id);

@ -113,6 +113,10 @@
#define RTE_MBUF_F_RX_L4_CKSUM_BAD PKT_RX_L4_CKSUM_BAD
#endif
#if RTE_VERSION < RTE_VERSION_NUM(20, 11, 0, 0)
#define RTE_ETH_LINK_MAX_STR_LEN 40
#endif
typedef struct {
struct rte_mempool **pkt_mp;
uint16_t pkt_mp_cnt;

@ -798,6 +798,7 @@ dpdk:
# rss-hash-functions: 0x0 # advanced configuration option, use only if you use untested NIC card and experience RSS warnings,
# For `rss-hash-functions` use hexadecimal 0x01ab format to specify RSS hash function flags - DumpRssFlags can help (you can see output if you use -vvv option during Suri startup)
# setting auto to rss_hf sets the default RSS hash functions (based on IP addresses)
# linkup-timeout: 0 # how many seconds to wait before giving up, 0 to disable link state checking
# To approximately calculate required amount of space (in bytes) for interface's mempool: mempool-size * mtu
# Make sure you have enough allocated hugepages.
@ -822,6 +823,7 @@ dpdk:
mtu: 1500
vlan-strip-offload: false
rss-hash-functions: auto
linkup-timeout: 0
mempool-size: auto
mempool-cache-size: auto
rx-descriptors: auto

Loading…
Cancel
Save