diff --git a/doc/userguide/capture-hardware/dpdk.rst b/doc/userguide/capture-hardware/dpdk.rst index 68b989c561..757484d048 100644 --- a/doc/userguide/capture-hardware/dpdk.rst +++ b/doc/userguide/capture-hardware/dpdk.rst @@ -214,3 +214,20 @@ Individual properties can still be set manually if needed. ``tx-descriptors`` in the TAP/IPS modes. Instead it can be set to a fixed value (e.g. 16384). +.. _dpdk-link-state-change-timeout: + +Link State Change timeout +------------------------- + +The `linkup-timeout` YAML configuration option allows the user to set a timeout +period to wait until the interface's link is detected. This ensures that +Suricata does not start processing packets until the link is up. This option is +particularly useful for Intel E810 (Ice) NICs, which begin receiving packets +only after a few seconds have passed since the interface started. In such cases, +if this check is disabled, Suricata reports as started but only begins +processing packets after a few seconds. This issue has not been observed with +other cards. + +Setting the value to 0 causes Suricata to skip the link check. +If the interface's link remains down after the timeout period, Suricata warns +the user but continues with the engine initialization. diff --git a/doc/userguide/configuration/suricata-yaml.rst b/doc/userguide/configuration/suricata-yaml.rst index 3ff415777c..5b83480e62 100644 --- a/doc/userguide/configuration/suricata-yaml.rst +++ b/doc/userguide/configuration/suricata-yaml.rst @@ -2183,6 +2183,7 @@ The whole DPDK configuration resides in the `dpdk:` node. This node encapsulates multicast: true checksum-checks: true checksum-checks-offload: true + linkup-timeout: 10 mtu: 1500 mempool-size: auto mempool-cache-size: auto diff --git a/doc/userguide/upgrade.rst b/doc/userguide/upgrade.rst index a88bc2e9ea..08a437e21e 100644 --- a/doc/userguide/upgrade.rst +++ b/doc/userguide/upgrade.rst @@ -134,6 +134,13 @@ Major changes - DPDK interface mempools are now allocated per thread instead of per port. This change improves performance and should not be visible from the user configuration perspective. +- DPDK supports link state check, allowing Suricata to start only when the link + is up. This is especially useful for Intel E810 (ice) NICs as they need + a few seconds before they are ready to receive packets. With this check + disabled, Suricata reports as started but only begins processing packets + after the previously mentioned interval. Other cards were not observed to have + this issue. This feature is disabled by default. + See :ref:`dpdk-link-state-change-timeout`. Removals ~~~~~~~~ diff --git a/src/runmode-dpdk.c b/src/runmode-dpdk.c index d9fb0c25ab..e1b071721a 100644 --- a/src/runmode-dpdk.c +++ b/src/runmode-dpdk.c @@ -119,6 +119,7 @@ static void DPDKDerefConfig(void *conf); #define DPDK_CONFIG_DEFAULT_CHECKSUM_VALIDATION 1 #define DPDK_CONFIG_DEFAULT_CHECKSUM_VALIDATION_OFFLOAD 1 #define DPDK_CONFIG_DEFAULT_VLAN_STRIP 0 +#define DPDK_CONFIG_DEFAULT_LINKUP_TIMEOUT 0 #define DPDK_CONFIG_DEFAULT_COPY_MODE "none" #define DPDK_CONFIG_DEFAULT_COPY_INTERFACE "none" @@ -132,6 +133,7 @@ DPDKIfaceConfigAttributes dpdk_yaml = { .mtu = "mtu", .vlan_strip_offload = "vlan-strip-offload", .rss_hf = "rss-hash-functions", + .linkup_timeout = "linkup-timeout", .mempool_size = "mempool-size", .mempool_cache_size = "mempool-cache-size", .rx_descriptors = "rx-descriptors", @@ -690,6 +692,19 @@ static int ConfigSetMtu(DPDKIfaceConfig *iconf, intmax_t entry_int) SCReturnInt(0); } +static int ConfigSetLinkupTimeout(DPDKIfaceConfig *iconf, intmax_t entry_int) +{ + SCEnter(); + if (entry_int < 0) { + SCLogError("%s: Link-up waiting timeout needs to be a positive number or 0 to disable", + iconf->iface); + SCReturnInt(-ERANGE); + } + + iconf->linkup_timeout = entry_int; + SCReturnInt(0); +} + static bool ConfigSetPromiscuousMode(DPDKIfaceConfig *iconf, int entry_bool) { SCEnter(); @@ -946,6 +961,13 @@ static int ConfigLoad(DPDKIfaceConfig *iconf, const char *iface) ConfigSetVlanStrip(iconf, entry_bool); } + retval = ConfGetChildValueIntWithDefault( + if_root, if_default, dpdk_yaml.linkup_timeout, &entry_int) != 1 + ? ConfigSetLinkupTimeout(iconf, DPDK_CONFIG_DEFAULT_LINKUP_TIMEOUT) + : ConfigSetLinkupTimeout(iconf, entry_int); + if (retval < 0) + SCReturnInt(retval); + retval = ConfGetChildValueWithDefault(if_root, if_default, dpdk_yaml.copy_mode, ©_mode_str); if (retval != 1) { copy_mode_str = DPDK_CONFIG_DEFAULT_COPY_MODE; diff --git a/src/runmode-dpdk.h b/src/runmode-dpdk.h index 392874446d..b91838915c 100644 --- a/src/runmode-dpdk.h +++ b/src/runmode-dpdk.h @@ -33,6 +33,7 @@ typedef struct DPDKIfaceConfigAttributes_ { const char *mtu; const char *vlan_strip_offload; const char *rss_hf; + const char *linkup_timeout; const char *mempool_size; const char *mempool_cache_size; const char *rx_descriptors; diff --git a/src/source-dpdk.c b/src/source-dpdk.c index 8307b215ec..be93a4120e 100644 --- a/src/source-dpdk.c +++ b/src/source-dpdk.c @@ -638,6 +638,45 @@ static TmEcode ReceiveDPDKThreadInit(ThreadVars *tv, const void *initdata, void goto fail; } + uint32_t timeout = dpdk_config->linkup_timeout * 10; + while (timeout > 0) { + struct rte_eth_link link = { 0 }; + retval = rte_eth_link_get_nowait(ptv->port_id, &link); + if (retval != 0) { + if (retval == -ENOTSUP) { + SCLogInfo("%s: link status not supported, skipping", dpdk_config->iface); + } else { + SCLogInfo("%s: error (%s) when getting link status, skipping", + dpdk_config->iface, rte_strerror(-retval)); + } + break; + } + if (link.link_status) { + char link_status_str[RTE_ETH_LINK_MAX_STR_LEN]; +#if RTE_VERSION >= RTE_VERSION_NUM(20, 11, 0, 0) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + rte_eth_link_to_str(link_status_str, sizeof(link_status_str), &link); +#pragma GCC diagnostic pop +#else + snprintf(link_status_str, sizeof(link_status_str), + "Link Up, speed %u Mbps, %s", // 22 chars + 10 for digits + 11 for duplex + link.link_speed, + (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? "full-duplex" : "half-duplex"); +#endif + + SCLogInfo("%s: %s", dpdk_config->iface, link_status_str); + break; + } + + rte_delay_ms(100); + timeout--; + } + + if (dpdk_config->linkup_timeout && timeout == 0) { + SCLogWarning("%s: link is down, trying to continue anyway", dpdk_config->iface); + } + // some PMDs requires additional actions only after the device has started DevicePostStartPMDSpecificActions(ptv, dev_info.driver_name); diff --git a/src/source-dpdk.h b/src/source-dpdk.h index 28341c1704..350a5a164a 100644 --- a/src/source-dpdk.h +++ b/src/source-dpdk.h @@ -75,6 +75,7 @@ typedef struct DPDKIfaceConfig_ { uint32_t mempool_size; uint32_t mempool_cache_size; DPDKDeviceResources *pkt_mempools; + uint16_t linkup_timeout; // in seconds how long to wait for link to come up SC_ATOMIC_DECLARE(unsigned int, ref); /* threads bind queue id one by one */ SC_ATOMIC_DECLARE(uint16_t, queue_id); diff --git a/src/util-dpdk-common.h b/src/util-dpdk-common.h index 02125ff7c2..9a2779dc7c 100644 --- a/src/util-dpdk-common.h +++ b/src/util-dpdk-common.h @@ -113,6 +113,10 @@ #define RTE_MBUF_F_RX_L4_CKSUM_BAD PKT_RX_L4_CKSUM_BAD #endif +#if RTE_VERSION < RTE_VERSION_NUM(20, 11, 0, 0) +#define RTE_ETH_LINK_MAX_STR_LEN 40 +#endif + typedef struct { struct rte_mempool **pkt_mp; uint16_t pkt_mp_cnt; diff --git a/suricata.yaml.in b/suricata.yaml.in index 032a6d1550..e83ec1bfa6 100644 --- a/suricata.yaml.in +++ b/suricata.yaml.in @@ -798,6 +798,7 @@ dpdk: # rss-hash-functions: 0x0 # advanced configuration option, use only if you use untested NIC card and experience RSS warnings, # For `rss-hash-functions` use hexadecimal 0x01ab format to specify RSS hash function flags - DumpRssFlags can help (you can see output if you use -vvv option during Suri startup) # setting auto to rss_hf sets the default RSS hash functions (based on IP addresses) + # linkup-timeout: 0 # how many seconds to wait before giving up, 0 to disable link state checking # To approximately calculate required amount of space (in bytes) for interface's mempool: mempool-size * mtu # Make sure you have enough allocated hugepages. @@ -822,6 +823,7 @@ dpdk: mtu: 1500 vlan-strip-offload: false rss-hash-functions: auto + linkup-timeout: 0 mempool-size: auto mempool-cache-size: auto rx-descriptors: auto