diff mbox series

[iwl-next,v8,14/14] iavf: add support for Rx timestamps to hotpath

Message ID 20240730091509.18846-15-mateusz.polchlopek@intel.com
State Changes Requested
Headers show
Series Add support for Rx timestamping for both ice and iavf drivers. | expand

Commit Message

Mateusz Polchlopek July 30, 2024, 9:15 a.m. UTC
From: Jacob Keller <jacob.e.keller@intel.com>

Add support for receive timestamps to the Rx hotpath. This support only
works when using the flexible descriptor format, so make sure that we
request this format by default if we have receive timestamp support
available in the PTP capabilities.

In order to report the timestamps to userspace, we need to perform
timestamp extension. The Rx descriptor does actually contain the "40
bit" timestamp. However, upper 32 bits which contain nanoseconds are
conveniently stored separately in the descriptor. We could extract the
32bits and lower 8 bits, then perform a bitwise OR to calculate the
40bit value. This makes no sense, because the timestamp extension
algorithm would simply discard the lower 8 bits anyways.

Thus, implement timestamp extension as iavf_ptp_extend_32b_timestamp(),
and extract and forward only the 32bits of nominal nanoseconds.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Reviewed-by: Wojciech Drewek <wojciech.drewek@intel.com>
Reviewed-by: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Reviewed-by: Sunil Goutham <sgoutham@marvell.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Mateusz Polchlopek <mateusz.polchlopek@intel.com>
---
 drivers/net/ethernet/intel/iavf/iavf_main.c |  9 +++
 drivers/net/ethernet/intel/iavf/iavf_ptp.c  | 69 +++++++++++++++++++++
 drivers/net/ethernet/intel/iavf/iavf_ptp.h  |  4 ++
 drivers/net/ethernet/intel/iavf/iavf_txrx.c | 47 ++++++++++++++
 drivers/net/ethernet/intel/iavf/iavf_type.h |  3 +
 5 files changed, 132 insertions(+)

Comments

Alexander Lobakin July 30, 2024, 3:29 p.m. UTC | #1
From: Mateusz Polchlopek <mateusz.polchlopek@intel.com>
Date: Tue, 30 Jul 2024 05:15:09 -0400

> From: Jacob Keller <jacob.e.keller@intel.com>
> 
> Add support for receive timestamps to the Rx hotpath. This support only
> works when using the flexible descriptor format, so make sure that we
> request this format by default if we have receive timestamp support
> available in the PTP capabilities.
> 
> In order to report the timestamps to userspace, we need to perform
> timestamp extension. The Rx descriptor does actually contain the "40
> bit" timestamp. However, upper 32 bits which contain nanoseconds are
> conveniently stored separately in the descriptor. We could extract the
> 32bits and lower 8 bits, then perform a bitwise OR to calculate the
> 40bit value. This makes no sense, because the timestamp extension
> algorithm would simply discard the lower 8 bits anyways.
> 
> Thus, implement timestamp extension as iavf_ptp_extend_32b_timestamp(),
> and extract and forward only the 32bits of nominal nanoseconds.
> 
> Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
> Reviewed-by: Wojciech Drewek <wojciech.drewek@intel.com>
> Reviewed-by: Rahul Rameshbabu <rrameshbabu@nvidia.com>
> Reviewed-by: Sunil Goutham <sgoutham@marvell.com>
> Reviewed-by: Simon Horman <horms@kernel.org>
> Signed-off-by: Mateusz Polchlopek <mateusz.polchlopek@intel.com>
> ---
>  drivers/net/ethernet/intel/iavf/iavf_main.c |  9 +++
>  drivers/net/ethernet/intel/iavf/iavf_ptp.c  | 69 +++++++++++++++++++++
>  drivers/net/ethernet/intel/iavf/iavf_ptp.h  |  4 ++
>  drivers/net/ethernet/intel/iavf/iavf_txrx.c | 47 ++++++++++++++
>  drivers/net/ethernet/intel/iavf/iavf_type.h |  3 +
>  5 files changed, 132 insertions(+)
> 
> diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
> index 61720b27c8f1..03deb3e02279 100644
> --- a/drivers/net/ethernet/intel/iavf/iavf_main.c
> +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
> @@ -729,6 +729,15 @@ static u8 iavf_select_rx_desc_format(struct iavf_adapter *adapter)
>  	if (!RXDID_ALLOWED(adapter))
>  		return VIRTCHNL_RXDID_1_32B_BASE;
>  
> +	/* Rx timestamping requires the use of flexible NIC descriptors */
> +	if (iavf_ptp_cap_supported(adapter, VIRTCHNL_1588_PTP_CAP_RX_TSTAMP)) {
> +		if (supported_rxdids & BIT(VIRTCHNL_RXDID_2_FLEX_SQ_NIC))
> +			return VIRTCHNL_RXDID_2_FLEX_SQ_NIC;
> +
> +		dev_dbg(&adapter->pdev->dev,
> +			"Unable to negotiate flexible descriptor format.\n");

1. Remember about pci_*().
2. We usually don't put '.' to the end of kernel messages.

> +	}
> +
>  	/* Warn if the PF does not list support for the default legacy
>  	 * descriptor format. This shouldn't happen, as this is the format
>  	 * used if VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC is not supported. It is
> diff --git a/drivers/net/ethernet/intel/iavf/iavf_ptp.c b/drivers/net/ethernet/intel/iavf/iavf_ptp.c
> index 7754f4f24052..5fd17f8d1f36 100644
> --- a/drivers/net/ethernet/intel/iavf/iavf_ptp.c
> +++ b/drivers/net/ethernet/intel/iavf/iavf_ptp.c
> @@ -440,6 +440,9 @@ void iavf_ptp_release(struct iavf_adapter *adapter)
>  	}
>  	adapter->aq_required &= ~IAVF_FLAG_AQ_SEND_PTP_CMD;
>  	mutex_unlock(&adapter->ptp.aq_cmd_lock);
> +
> +	adapter->ptp.hwtstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
> +	iavf_ptp_disable_rx_tstamp(adapter);
>  }
>  
>  /**
> @@ -473,3 +476,69 @@ void iavf_ptp_process_caps(struct iavf_adapter *adapter)
>  		iavf_ptp_disable_rx_tstamp(adapter);
>  	}
>  }
> +
> +/**
> + * iavf_ptp_extend_32b_timestamp - Convert a 32b nanoseconds timestamp to 64b
> + * nanoseconds
> + * @cached_phc_time: recently cached copy of PHC time
> + * @in_tstamp: Ingress/egress 32b nanoseconds timestamp value
> + *
> + * Hardware captures timestamps which contain only 32 bits of nominal
> + * nanoseconds, as opposed to the 64bit timestamps that the stack expects.
> + *
> + * Extend the 32bit nanosecond timestamp using the following algorithm and
> + * assumptions:
> + *
> + * 1) have a recently cached copy of the PHC time
> + * 2) assume that the in_tstamp was captured 2^31 nanoseconds (~2.1
> + *    seconds) before or after the PHC time was captured.
> + * 3) calculate the delta between the cached time and the timestamp
> + * 4) if the delta is smaller than 2^31 nanoseconds, then the timestamp was
> + *    captured after the PHC time. In this case, the full timestamp is just
> + *    the cached PHC time plus the delta.
> + * 5) otherwise, if the delta is larger than 2^31 nanoseconds, then the
> + *    timestamp was captured *before* the PHC time, i.e. because the PHC
> + *    cache was updated after the timestamp was captured by hardware. In this
> + *    case, the full timestamp is the cached time minus the inverse delta.
> + *
> + * This algorithm works even if the PHC time was updated after a Tx timestamp
> + * was requested, but before the Tx timestamp event was reported from
> + * hardware.
> + *
> + * This calculation primarily relies on keeping the cached PHC time up to
> + * date. If the timestamp was captured more than 2^31 nanoseconds after the
> + * PHC time, it is possible that the lower 32bits of PHC time have
> + * overflowed more than once, and we might generate an incorrect timestamp.
> + *
> + * This is prevented by (a) periodically updating the cached PHC time once
> + * a second, and (b) discarding any Tx timestamp packet if it has waited for
> + * a timestamp for more than one second.
> + *
> + * Return: extended timestamp (to 64b)

...but here you actually need a period :D

> + */
> +u64 iavf_ptp_extend_32b_timestamp(u64 cached_phc_time, u32 in_tstamp)
> +{
> +	const u64 mask = GENMASK_ULL(31, 0);
> +	u32 delta;
> +	u64 ns;
> +
> +	/* Calculate the delta between the lower 32bits of the cached PHC
> +	 * time and the in_tstamp value
> +	 */
> +	delta = (in_tstamp - (u32)(cached_phc_time & mask));

`cached_phc_time & GENMASK_ULL(31, 0)` == lower_32_bits(cached_phc_time)

> +
> +	/* Do not assume that the in_tstamp is always more recent than the
> +	 * cached PHC time. If the delta is large, it indicates that the
> +	 * in_tstamp was taken in the past, and should be converted
> +	 * forward.
> +	 */
> +	if (delta > (mask / 2)) {

1. `mask / 2` == S32_MAX.
2. Parenthesis are redundant here.

> +		/* reverse the delta calculation here */
> +		delta = ((u32)(cached_phc_time & mask) - in_tstamp);

Here as well.

> +		ns = cached_phc_time - delta;
> +	} else {
> +		ns = cached_phc_time + delta;
> +	}

1. (u32) casts are not needed since your mask ends with bit 31.
2. (cached_phc_time & mask) is used two times. Perhaps you needed

	u32 low = lower_32_bits(cached_phc_time);

	delta = in_tstamp - low;
	if (delta > S32_MAX)
		ns = cached_phc_time - (low - in_tstamp);
	else
		ns = cached_phc_time + delta;


> +
> +	return ns;
> +}

[...]

> diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
> index 997fd0d520a9..8d74549c3535 100644
> --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c
> +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
> @@ -1085,6 +1085,52 @@ static void iavf_flex_rx_hash(const struct iavf_ring *ring,
>  	}
>  }
>  
> +/**
> + * iavf_flex_rx_tstamp - Capture Rx timestamp from the descriptor
> + * @rx_ring: descriptor ring
> + * @rx_desc: specific descriptor
> + * @skb: skb currently being received
> + *
> + * Read the Rx timestamp value from the descriptor and pass it to the stack.
> + *
> + * This function only operates on the VIRTCHNL_RXDID_2_FLEX_SQ_NIC flexible
> + * descriptor writeback format.
> + */
> +static void iavf_flex_rx_tstamp(const struct iavf_ring *rx_ring,
> +				const struct iavf_rx_desc *rx_desc,
> +				struct sk_buff *skb)
> +{
> +	struct skb_shared_hwtstamps *skb_tstamps;
> +	struct iavf_adapter *adapter;
> +	__le64 qw2 = rx_desc->qw2;
> +	__le64 qw3 = rx_desc->qw3;
> +	bool tstamp_valid;
> +	u32 tstamp;
> +	u64 ns;
> +
> +	/* Skip processing if timestamps aren't enabled */
> +	if (!(rx_ring->flags & IAVF_TXRX_FLAGS_HW_TSTAMP))
> +		return;
> +
> +	/* Check if this Rx descriptor has a valid timestamp */
> +	tstamp_valid = le64_get_bits(qw2, IAVF_PTP_40B_TSTAMP_VALID);
> +	if (!tstamp_valid)
> +		return;

Read/convert descriptor qwords when you actually need them.

	if (!(rx_ring->flags & HW_TSTAMP))
		return;

	if (!le64_get_bits(rx_desc->qw2, TSTAMP_VALID))
		return;

	tstamp = le64_get_bits(rx_desc->qw3, ...

> +
> +	adapter = netdev_priv(rx_ring->netdev);
> +
> +	/* the ts_low field only contains the valid bit and sub-nanosecond
> +	 * precision, so we don't need to extract it.
> +	 */
> +	tstamp = le64_get_bits(qw3, IAVF_RXD_FLEX_QW3_TSTAMP_HIGH_M);
> +	ns = iavf_ptp_extend_32b_timestamp(adapter->ptp.cached_phc_time,
> +					   tstamp);
> +
> +	skb_tstamps = skb_hwtstamps(skb);
> +	memset(skb_tstamps, 0, sizeof(*skb_tstamps));
> +	skb_tstamps->hwtstamp = ns_to_ktime(ns);

Ah, so many times I've seen this pattern :D

&skb_shared_hwtstamps is 2 fields. One field you always initialize
above, the other one needs to be cleared. IOW, just do

	*skb_hwtstamps(skb) = (struct skb_shared_hwtstamps){
		.hwtstamp	= ns_to_ktime(ns),
	};

That's it!

Thanks,
Olek
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index 61720b27c8f1..03deb3e02279 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -729,6 +729,15 @@  static u8 iavf_select_rx_desc_format(struct iavf_adapter *adapter)
 	if (!RXDID_ALLOWED(adapter))
 		return VIRTCHNL_RXDID_1_32B_BASE;
 
+	/* Rx timestamping requires the use of flexible NIC descriptors */
+	if (iavf_ptp_cap_supported(adapter, VIRTCHNL_1588_PTP_CAP_RX_TSTAMP)) {
+		if (supported_rxdids & BIT(VIRTCHNL_RXDID_2_FLEX_SQ_NIC))
+			return VIRTCHNL_RXDID_2_FLEX_SQ_NIC;
+
+		dev_dbg(&adapter->pdev->dev,
+			"Unable to negotiate flexible descriptor format.\n");
+	}
+
 	/* Warn if the PF does not list support for the default legacy
 	 * descriptor format. This shouldn't happen, as this is the format
 	 * used if VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC is not supported. It is
diff --git a/drivers/net/ethernet/intel/iavf/iavf_ptp.c b/drivers/net/ethernet/intel/iavf/iavf_ptp.c
index 7754f4f24052..5fd17f8d1f36 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_ptp.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_ptp.c
@@ -440,6 +440,9 @@  void iavf_ptp_release(struct iavf_adapter *adapter)
 	}
 	adapter->aq_required &= ~IAVF_FLAG_AQ_SEND_PTP_CMD;
 	mutex_unlock(&adapter->ptp.aq_cmd_lock);
+
+	adapter->ptp.hwtstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
+	iavf_ptp_disable_rx_tstamp(adapter);
 }
 
 /**
@@ -473,3 +476,69 @@  void iavf_ptp_process_caps(struct iavf_adapter *adapter)
 		iavf_ptp_disable_rx_tstamp(adapter);
 	}
 }
+
+/**
+ * iavf_ptp_extend_32b_timestamp - Convert a 32b nanoseconds timestamp to 64b
+ * nanoseconds
+ * @cached_phc_time: recently cached copy of PHC time
+ * @in_tstamp: Ingress/egress 32b nanoseconds timestamp value
+ *
+ * Hardware captures timestamps which contain only 32 bits of nominal
+ * nanoseconds, as opposed to the 64bit timestamps that the stack expects.
+ *
+ * Extend the 32bit nanosecond timestamp using the following algorithm and
+ * assumptions:
+ *
+ * 1) have a recently cached copy of the PHC time
+ * 2) assume that the in_tstamp was captured 2^31 nanoseconds (~2.1
+ *    seconds) before or after the PHC time was captured.
+ * 3) calculate the delta between the cached time and the timestamp
+ * 4) if the delta is smaller than 2^31 nanoseconds, then the timestamp was
+ *    captured after the PHC time. In this case, the full timestamp is just
+ *    the cached PHC time plus the delta.
+ * 5) otherwise, if the delta is larger than 2^31 nanoseconds, then the
+ *    timestamp was captured *before* the PHC time, i.e. because the PHC
+ *    cache was updated after the timestamp was captured by hardware. In this
+ *    case, the full timestamp is the cached time minus the inverse delta.
+ *
+ * This algorithm works even if the PHC time was updated after a Tx timestamp
+ * was requested, but before the Tx timestamp event was reported from
+ * hardware.
+ *
+ * This calculation primarily relies on keeping the cached PHC time up to
+ * date. If the timestamp was captured more than 2^31 nanoseconds after the
+ * PHC time, it is possible that the lower 32bits of PHC time have
+ * overflowed more than once, and we might generate an incorrect timestamp.
+ *
+ * This is prevented by (a) periodically updating the cached PHC time once
+ * a second, and (b) discarding any Tx timestamp packet if it has waited for
+ * a timestamp for more than one second.
+ *
+ * Return: extended timestamp (to 64b)
+ */
+u64 iavf_ptp_extend_32b_timestamp(u64 cached_phc_time, u32 in_tstamp)
+{
+	const u64 mask = GENMASK_ULL(31, 0);
+	u32 delta;
+	u64 ns;
+
+	/* Calculate the delta between the lower 32bits of the cached PHC
+	 * time and the in_tstamp value
+	 */
+	delta = (in_tstamp - (u32)(cached_phc_time & mask));
+
+	/* Do not assume that the in_tstamp is always more recent than the
+	 * cached PHC time. If the delta is large, it indicates that the
+	 * in_tstamp was taken in the past, and should be converted
+	 * forward.
+	 */
+	if (delta > (mask / 2)) {
+		/* reverse the delta calculation here */
+		delta = ((u32)(cached_phc_time & mask) - in_tstamp);
+		ns = cached_phc_time - delta;
+	} else {
+		ns = cached_phc_time + delta;
+	}
+
+	return ns;
+}
diff --git a/drivers/net/ethernet/intel/iavf/iavf_ptp.h b/drivers/net/ethernet/intel/iavf/iavf_ptp.h
index 656d360d2bb4..43033dfd8801 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_ptp.h
+++ b/drivers/net/ethernet/intel/iavf/iavf_ptp.h
@@ -6,6 +6,9 @@ 
 
 #include <linux/ptp_clock_kernel.h>
 
+/* bit indicating whether a 40bit timestamp is valid */
+#define IAVF_PTP_40B_TSTAMP_VALID	BIT(24)
+
 /* structure used to queue PTP commands for processing */
 struct iavf_ptp_aq_cmd {
 	struct list_head list;
@@ -41,5 +44,6 @@  int iavf_ptp_get_ts_config(struct iavf_adapter *adapter,
 int iavf_ptp_set_ts_config(struct iavf_adapter *adapter,
 			   struct kernel_hwtstamp_config *config,
 			   struct netlink_ext_ack *extack);
+u64 iavf_ptp_extend_32b_timestamp(u64 cached_phc_time, u32 in_tstamp);
 
 #endif /* _IAVF_PTP_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
index 997fd0d520a9..8d74549c3535 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
@@ -1085,6 +1085,52 @@  static void iavf_flex_rx_hash(const struct iavf_ring *ring,
 	}
 }
 
+/**
+ * iavf_flex_rx_tstamp - Capture Rx timestamp from the descriptor
+ * @rx_ring: descriptor ring
+ * @rx_desc: specific descriptor
+ * @skb: skb currently being received
+ *
+ * Read the Rx timestamp value from the descriptor and pass it to the stack.
+ *
+ * This function only operates on the VIRTCHNL_RXDID_2_FLEX_SQ_NIC flexible
+ * descriptor writeback format.
+ */
+static void iavf_flex_rx_tstamp(const struct iavf_ring *rx_ring,
+				const struct iavf_rx_desc *rx_desc,
+				struct sk_buff *skb)
+{
+	struct skb_shared_hwtstamps *skb_tstamps;
+	struct iavf_adapter *adapter;
+	__le64 qw2 = rx_desc->qw2;
+	__le64 qw3 = rx_desc->qw3;
+	bool tstamp_valid;
+	u32 tstamp;
+	u64 ns;
+
+	/* Skip processing if timestamps aren't enabled */
+	if (!(rx_ring->flags & IAVF_TXRX_FLAGS_HW_TSTAMP))
+		return;
+
+	/* Check if this Rx descriptor has a valid timestamp */
+	tstamp_valid = le64_get_bits(qw2, IAVF_PTP_40B_TSTAMP_VALID);
+	if (!tstamp_valid)
+		return;
+
+	adapter = netdev_priv(rx_ring->netdev);
+
+	/* the ts_low field only contains the valid bit and sub-nanosecond
+	 * precision, so we don't need to extract it.
+	 */
+	tstamp = le64_get_bits(qw3, IAVF_RXD_FLEX_QW3_TSTAMP_HIGH_M);
+	ns = iavf_ptp_extend_32b_timestamp(adapter->ptp.cached_phc_time,
+					   tstamp);
+
+	skb_tstamps = skb_hwtstamps(skb);
+	memset(skb_tstamps, 0, sizeof(*skb_tstamps));
+	skb_tstamps->hwtstamp = ns_to_ktime(ns);
+}
+
 /**
  * iavf_process_skb_fields - Populate skb header fields from Rx descriptor
  * @rx_ring: rx descriptor ring packet is being transacted on
@@ -1110,6 +1156,7 @@  static void iavf_process_skb_fields(const struct iavf_ring *rx_ring,
 		csum_bits = iavf_legacy_rx_csum(rx_ring->vsi, rx_desc, decoded);
 	} else {
 		iavf_flex_rx_hash(rx_ring, rx_desc, skb, decoded);
+		iavf_flex_rx_tstamp(rx_ring, rx_desc, skb);
 		csum_bits = iavf_flex_rx_csum(rx_ring->vsi, rx_desc, decoded);
 	}
 	iavf_rx_csum(rx_ring->vsi, skb, decoded, csum_bits);
diff --git a/drivers/net/ethernet/intel/iavf/iavf_type.h b/drivers/net/ethernet/intel/iavf/iavf_type.h
index 498746a83d35..4e369a5a8506 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_type.h
+++ b/drivers/net/ethernet/intel/iavf/iavf_type.h
@@ -537,4 +537,7 @@  struct iavf_eth_stats {
 #define IAVF_RXD_FLEX_QW2_XTRMD4_VALID_M	BIT(14)
 #define IAVF_RXD_FLEX_QW2_XTRMD5_VALID_M	BIT(15)
 
+/* Quad Word 3 */
+#define IAVF_RXD_FLEX_QW3_TSTAMP_HIGH_M		GENMASK_ULL(63, 32)
+
 #endif /* _IAVF_TYPE_H_ */