diff mbox series

[net-next,RFC,v1,07/10] nvme-tcp : Recalculate crc in the end of the capsule

Message ID 20200930162010.21610-8-borisp@mellanox.com
State RFC
Delegated to: David Miller
Headers show
Series nvme-tcp receive offloads | expand

Commit Message

Boris Pismenny Sept. 30, 2020, 4:20 p.m. UTC
From: Yoray Zack <yorayz@mellanox.com>

crc offload of the nvme capsule. Check if all the skb bits
are on, and if not recalculate the crc in SW and check it.

This patch reworks the receive-side crc calculation to always
run at the end, so as to keep a single flow for both offload
and non-offload. This change simplifies the code, but it may degrade
performance for non-offload crc calculation.

Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: Ben Ben-Ishay <benishay@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Yoray Zack <yorayz@mellanox.com>
---
 drivers/nvme/host/tcp.c | 66 ++++++++++++++++++++++++++++++++++++-----
 1 file changed, 58 insertions(+), 8 deletions(-)

Comments

Sagi Grimberg Oct. 8, 2020, 10:44 p.m. UTC | #1
> crc offload of the nvme capsule. Check if all the skb bits
> are on, and if not recalculate the crc in SW and check it.

Can you clarify in the patch description that this is only
for pdu data digest and not header digest?

> 
> This patch reworks the receive-side crc calculation to always
> run at the end, so as to keep a single flow for both offload
> and non-offload. This change simplifies the code, but it may degrade
> performance for non-offload crc calculation.

??

 From my scan it doeesn't look like you do that.. Am I missing something?
Can you explain?

> 
> Signed-off-by: Boris Pismenny <borisp@mellanox.com>
> Signed-off-by: Ben Ben-Ishay <benishay@mellanox.com>
> Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
> Signed-off-by: Yoray Zack <yorayz@mellanox.com>
> ---
>   drivers/nvme/host/tcp.c | 66 ++++++++++++++++++++++++++++++++++++-----
>   1 file changed, 58 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
> index 7bd97f856677..9a620d1dacb4 100644
> --- a/drivers/nvme/host/tcp.c
> +++ b/drivers/nvme/host/tcp.c
> @@ -94,6 +94,7 @@ struct nvme_tcp_queue {
>   	size_t			data_remaining;
>   	size_t			ddgst_remaining;
>   	unsigned int		nr_cqe;
> +	bool			crc_valid;
>   
>   	/* send state */
>   	struct nvme_tcp_request *request;
> @@ -233,6 +234,41 @@ static inline size_t nvme_tcp_pdu_last_send(struct nvme_tcp_request *req,
>   	return nvme_tcp_pdu_data_left(req) <= len;
>   }
>   
> +static inline bool nvme_tcp_device_ddgst_ok(struct nvme_tcp_queue *queue)

Maybe call it nvme_tcp_ddp_ddgst_ok?

> +{
> +	return queue->crc_valid;
> +}
> +
> +static inline void nvme_tcp_device_ddgst_update(struct nvme_tcp_queue *queue,
> +						struct sk_buff *skb)

Maybe call it nvme_tcp_ddp_ddgst_update?

> +{
> +	if (queue->crc_valid)
> +#ifdef CONFIG_TCP_DDP_CRC
> +		queue->crc_valid = skb->ddp_crc;
> +#else
> +		queue->crc_valid = false;
> +#endif
> +}
> +
> +static void nvme_tcp_crc_recalculate(struct nvme_tcp_queue *queue,
> +				     struct nvme_tcp_data_pdu *pdu)

Maybe call it nvme_tcp_ddp_ddgst_recalc?

> +{
> +	struct nvme_tcp_request *req;
> +	struct request *rq;
> +
> +	rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
> +	if (!rq)
> +		return;
> +	req = blk_mq_rq_to_pdu(rq);
> +	crypto_ahash_init(queue->rcv_hash);
> +	req->ddp.sg_table.sgl = req->ddp.first_sgl;
> +	/* req->ddp.sg_table is allocated and filled in nvme_tcp_setup_ddp */
> +	ahash_request_set_crypt(queue->rcv_hash, req->ddp.sg_table.sgl, NULL,
> +				le32_to_cpu(pdu->data_length));
> +	crypto_ahash_update(queue->rcv_hash);
> +}
> +
> +
>   #ifdef CONFIG_TCP_DDP
>   
>   bool nvme_tcp_resync_request(struct sock *sk, u32 seq, u32 flags);
> @@ -706,6 +742,7 @@ static void nvme_tcp_init_recv_ctx(struct nvme_tcp_queue *queue)
>   	queue->pdu_offset = 0;
>   	queue->data_remaining = -1;
>   	queue->ddgst_remaining = 0;
> +	queue->crc_valid = true;
>   }
>   
>   static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl)
> @@ -955,6 +992,8 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
>   	struct nvme_tcp_request *req;
>   	struct request *rq;
>   
> +	if (test_bit(NVME_TCP_Q_OFFLOADS, &queue->flags))
> +		nvme_tcp_device_ddgst_update(queue, skb);

Is the queue->data_digest condition missing here?

>   	rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
>   	if (!rq) {
>   		dev_err(queue->ctrl->ctrl.device,
> @@ -992,7 +1031,7 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
>   		recv_len = min_t(size_t, recv_len,
>   				iov_iter_count(&req->iter));
>   
> -		if (queue->data_digest)
> +		if (queue->data_digest && !test_bit(NVME_TCP_Q_OFFLOADS, &queue->flags))
>   			ret = skb_copy_and_hash_datagram_iter(skb, *offset,
>   				&req->iter, recv_len, queue->rcv_hash);

This is the skb copy and hash, not clear why you say that you move this
to the end...

>   		else
> @@ -1012,7 +1051,6 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
>   
>   	if (!queue->data_remaining) {
>   		if (queue->data_digest) {
> -			nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst);

If I instead do:
			if (!test_bit(NVME_TCP_Q_OFFLOADS,
                                       &queue->flags))
				nvme_tcp_ddgst_final(queue->rcv_hash,
						     &queue->exp_ddgst);

Does that help the mess in nvme_tcp_recv_ddgst?

>   			queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH;
>   		} else {
>   			if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
> @@ -1033,8 +1071,11 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue,
>   	char *ddgst = (char *)&queue->recv_ddgst;
>   	size_t recv_len = min_t(size_t, *len, queue->ddgst_remaining);
>   	off_t off = NVME_TCP_DIGEST_LENGTH - queue->ddgst_remaining;
> +	bool ddgst_offload_fail;
>   	int ret;
>   
> +	if (test_bit(NVME_TCP_Q_OFFLOADS, &queue->flags))
> +		nvme_tcp_device_ddgst_update(queue, skb);
>   	ret = skb_copy_bits(skb, *offset, &ddgst[off], recv_len);
>   	if (unlikely(ret))
>   		return ret;
> @@ -1045,12 +1086,21 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue,
>   	if (queue->ddgst_remaining)
>   		return 0;
>   
> -	if (queue->recv_ddgst != queue->exp_ddgst) {
> -		dev_err(queue->ctrl->ctrl.device,
> -			"data digest error: recv %#x expected %#x\n",
> -			le32_to_cpu(queue->recv_ddgst),
> -			le32_to_cpu(queue->exp_ddgst));
> -		return -EIO;
> +	ddgst_offload_fail = !nvme_tcp_device_ddgst_ok(queue);
> +	if (!test_bit(NVME_TCP_Q_OFFLOADS, &queue->flags) ||
> +	    ddgst_offload_fail) {
> +		if (test_bit(NVME_TCP_Q_OFFLOADS, &queue->flags) &&
> +		    ddgst_offload_fail)
> +			nvme_tcp_crc_recalculate(queue, pdu);
> +
> +		nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst);
> +		if (queue->recv_ddgst != queue->exp_ddgst) {
> +			dev_err(queue->ctrl->ctrl.device,
> +				"data digest error: recv %#x expected %#x\n",
> +				le32_to_cpu(queue->recv_ddgst),
> +				le32_to_cpu(queue->exp_ddgst));
> +			return -EIO;

This gets convoluted here...

> +		}
>   	}
>   
>   	if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
>
Shai Malin Nov. 8, 2020, 6:59 a.m. UTC | #2
On 09/10/2020 1:44, Sagi Grimberg wrote:
> On 9/30/20 7:20 PM, Boris Pismenny wrote:
> 
> > crc offload of the nvme capsule. Check if all the skb bits are on, 
> > and if not recalculate the crc in SW and check it.
> 
> Can you clarify in the patch description that this is only for pdu 
> data digest and not header digest?
> 

Not a security expert, but according to my understanding, the NVMeTCP data digest is a layer 5 CRC,  and as such it is expected to be end-to-end, meaning it is computed by layer 5 on the transmitter and verified on layer 5 on the receiver.
Any data corruption which happens in any of the lower layers, including their software processing, should be protected by this CRC. For example, if the IP or TCP stack has a bug that corrupts the NVMeTCP payload data, the CRC should protect against it. It seems that may not be the case with this offload.


> >
> > This patch reworks the receive-side crc calculation to always run at 
> > the end, so as to keep a single flow for both offload and non-offload.
> > This change simplifies the code, but it may degrade performance for 
> > non-offload crc calculation.
> 
> ??
> 
>  From my scan it doeesn't look like you do that.. Am I missing something?
> Can you explain?
> 
> >
> > Signed-off-by: Boris Pismenny <borisp@mellanox.com>
> > Signed-off-by: Ben Ben-Ishay <benishay@mellanox.com>
> > Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
> > Signed-off-by: Yoray Zack <yorayz@mellanox.com>
> > ---
> >   drivers/nvme/host/tcp.c | 66
> ++++++++++++++++++++++++++++++++++++-----
> >   1 file changed, 58 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index
> > 7bd97f856677..9a620d1dacb4 100644
> > --- a/drivers/nvme/host/tcp.c
> > +++ b/drivers/nvme/host/tcp.c
> > @@ -94,6 +94,7 @@ struct nvme_tcp_queue {
> >   	size_t			data_remaining;
> >   	size_t			ddgst_remaining;
> >   	unsigned int		nr_cqe;
> > +	bool			crc_valid;

I suggest to rename it to ddgst_valid.
Boris Pismenny Nov. 8, 2020, 7:28 a.m. UTC | #3
On 08/11/2020 8:59, Shai Malin wrote:
> On 09/10/2020 1:44, Sagi Grimberg wrote:
>> On 9/30/20 7:20 PM, Boris Pismenny wrote:
>>
>>> crc offload of the nvme capsule. Check if all the skb bits are on, 
>>> and if not recalculate the crc in SW and check it.
>> Can you clarify in the patch description that this is only for pdu 
>> data digest and not header digest?
>>
> Not a security expert, but according to my understanding, the NVMeTCP data digest is a layer 5 CRC,  and as such it is expected to be end-to-end, meaning it is computed by layer 5 on the transmitter and verified on layer 5 on the receiver.
> Any data corruption which happens in any of the lower layers, including their software processing, should be protected by this CRC. For example, if the IP or TCP stack has a bug that corrupts the NVMeTCP payload data, the CRC should protect against it. It seems that may not be the case with this offload.

If the TCP/IP stack corrupts packet data, then likely many TCP/IP consumers will be effected, and it will be fixed promptly.
Unlike with TOE, these bugs can be easily fixed/hotpatched by the community.

>
>>> This patch reworks the receive-side crc calculation to always run at 
>>> the end, so as to keep a single flow for both offload and non-offload.
>>> This change simplifies the code, but it may degrade performance for 
>>> non-offload crc calculation.
>> ??
>>
>>  From my scan it doeesn't look like you do that.. Am I missing something?
>> Can you explain?
>>
>>> Signed-off-by: Boris Pismenny <borisp@mellanox.com>
>>> Signed-off-by: Ben Ben-Ishay <benishay@mellanox.com>
>>> Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
>>> Signed-off-by: Yoray Zack <yorayz@mellanox.com>
>>> ---
>>>   drivers/nvme/host/tcp.c | 66
>> ++++++++++++++++++++++++++++++++++++-----
>>>   1 file changed, 58 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index
>>> 7bd97f856677..9a620d1dacb4 100644
>>> --- a/drivers/nvme/host/tcp.c
>>> +++ b/drivers/nvme/host/tcp.c
>>> @@ -94,6 +94,7 @@ struct nvme_tcp_queue {
>>>   	size_t			data_remaining;
>>>   	size_t			ddgst_remaining;
>>>   	unsigned int		nr_cqe;
>>> +	bool			crc_valid;
> I suggest to rename it to ddgst_valid.
>
Sure
Boris Pismenny Nov. 8, 2020, 2:46 p.m. UTC | #4
On 09/10/2020 1:44, Sagi Grimberg wrote:
>> crc offload of the nvme capsule. Check if all the skb bits
>> are on, and if not recalculate the crc in SW and check it.
> Can you clarify in the patch description that this is only
> for pdu data digest and not header digest?

Will do

>
>> This patch reworks the receive-side crc calculation to always
>> run at the end, so as to keep a single flow for both offload
>> and non-offload. This change simplifies the code, but it may degrade
>> performance for non-offload crc calculation.
> ??
>
>  From my scan it doeesn't look like you do that.. Am I missing something?
> Can you explain?

The performance of CRC data digest in the offload's fallback path may be less good compared to CRC calculation with skb_copy_and_hash.
To be clear, the fallback path is occurs when `queue->data_digest && test_bit(NVME_TCP_Q_OFF_CRC_RX, &queue->flags)`, while we receive SKBs where `skb->ddp_crc = 0`

>
>>   	rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
>>   	if (!rq) {
>>   		dev_err(queue->ctrl->ctrl.device,
>> @@ -992,7 +1031,7 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
>>   		recv_len = min_t(size_t, recv_len,
>>   				iov_iter_count(&req->iter));
>>   
>> -		if (queue->data_digest)
>> +		if (queue->data_digest && !test_bit(NVME_TCP_Q_OFFLOADS, &queue->flags))
>>   			ret = skb_copy_and_hash_datagram_iter(skb, *offset,
>>   				&req->iter, recv_len, queue->rcv_hash);
> This is the skb copy and hash, not clear why you say that you move this
> to the end...

See the offload fallback path below

>
>>   		else
>> @@ -1012,7 +1051,6 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
>>   
>>   	if (!queue->data_remaining) {
>>   		if (queue->data_digest) {
>> -			nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst);
> If I instead do:
> 			if (!test_bit(NVME_TCP_Q_OFFLOADS,
>                                        &queue->flags))
> 				nvme_tcp_ddgst_final(queue->rcv_hash,
> 						     &queue->exp_ddgst);
>
> Does that help the mess in nvme_tcp_recv_ddgst?

Not really, as the code path there takes care of the fallback path, i.e. offloaded requested, but didn't succeed.

>
>>   			queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH;
>>   		} else {
>>   			if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
>> @@ -1033,8 +1071,11 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue,
>>   	char *ddgst = (char *)&queue->recv_ddgst;
>>   	size_t recv_len = min_t(size_t, *len, queue->ddgst_remaining);
>>   	off_t off = NVME_TCP_DIGEST_LENGTH - queue->ddgst_remaining;
>> +	bool ddgst_offload_fail;
>>   	int ret;
>>   
>> +	if (test_bit(NVME_TCP_Q_OFFLOADS, &queue->flags))
>> +		nvme_tcp_device_ddgst_update(queue, skb);
>>   	ret = skb_copy_bits(skb, *offset, &ddgst[off], recv_len);
>>   	if (unlikely(ret))
>>   		return ret;
>> @@ -1045,12 +1086,21 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue,
>>   	if (queue->ddgst_remaining)
>>   		return 0;
>>   
>> -	if (queue->recv_ddgst != queue->exp_ddgst) {
>> -		dev_err(queue->ctrl->ctrl.device,
>> -			"data digest error: recv %#x expected %#x\n",
>> -			le32_to_cpu(queue->recv_ddgst),
>> -			le32_to_cpu(queue->exp_ddgst));
>> -		return -EIO;
>> +	ddgst_offload_fail = !nvme_tcp_device_ddgst_ok(queue);
>> +	if (!test_bit(NVME_TCP_Q_OFFLOADS, &queue->flags) ||
>> +	    ddgst_offload_fail) {
>> +		if (test_bit(NVME_TCP_Q_OFFLOADS, &queue->flags) &&
>> +		    ddgst_offload_fail)
>> +			nvme_tcp_crc_recalculate(queue, pdu);
>> +
>> +		nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst);
>> +		if (queue->recv_ddgst != queue->exp_ddgst) {
>> +			dev_err(queue->ctrl->ctrl.device,
>> +				"data digest error: recv %#x expected %#x\n",
>> +				le32_to_cpu(queue->recv_ddgst),
>> +				le32_to_cpu(queue->exp_ddgst));
>> +			return -EIO;
> This gets convoluted here...

Will try to simplify, the general idea is that there are 3 paths with common code:
1. non-offload
2. offload failed
3. offload success
(1) and (2) share the code for finalizing checking the data digest, while (3) skips this entirely.

In other words, how about this:
```
          offload_fail = !nvme_tcp_ddp_ddgst_ok(queue);                                               
          offload = test_bit(NVME_TCP_Q_OFF_CRC_RX, &queue->flags);                                   
          if (!offload || offload_fail) {                                                             
                  if (offload && offload_fail) // software-fallback                                   
                          nvme_tcp_ddp_ddgst_recalc(queue, pdu);                                      
                                                                                                      
                  nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst);                           
                  if (queue->recv_ddgst != queue->exp_ddgst) {                                        
                          dev_err(queue->ctrl->ctrl.device,                                           
                                  "data digest error: recv %#x expected %#x\n",                       
                                  le32_to_cpu(queue->recv_ddgst),                                     
                                  le32_to_cpu(queue->exp_ddgst));                                     
                          return -EIO;                                                                
                  }                                                                                   
          } 
```

>
>> +		}
>>   	}
>>   
>>   	if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
>>
diff mbox series

Patch

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 7bd97f856677..9a620d1dacb4 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -94,6 +94,7 @@  struct nvme_tcp_queue {
 	size_t			data_remaining;
 	size_t			ddgst_remaining;
 	unsigned int		nr_cqe;
+	bool			crc_valid;
 
 	/* send state */
 	struct nvme_tcp_request *request;
@@ -233,6 +234,41 @@  static inline size_t nvme_tcp_pdu_last_send(struct nvme_tcp_request *req,
 	return nvme_tcp_pdu_data_left(req) <= len;
 }
 
+static inline bool nvme_tcp_device_ddgst_ok(struct nvme_tcp_queue *queue)
+{
+	return queue->crc_valid;
+}
+
+static inline void nvme_tcp_device_ddgst_update(struct nvme_tcp_queue *queue,
+						struct sk_buff *skb)
+{
+	if (queue->crc_valid)
+#ifdef CONFIG_TCP_DDP_CRC
+		queue->crc_valid = skb->ddp_crc;
+#else
+		queue->crc_valid = false;
+#endif
+}
+
+static void nvme_tcp_crc_recalculate(struct nvme_tcp_queue *queue,
+				     struct nvme_tcp_data_pdu *pdu)
+{
+	struct nvme_tcp_request *req;
+	struct request *rq;
+
+	rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
+	if (!rq)
+		return;
+	req = blk_mq_rq_to_pdu(rq);
+	crypto_ahash_init(queue->rcv_hash);
+	req->ddp.sg_table.sgl = req->ddp.first_sgl;
+	/* req->ddp.sg_table is allocated and filled in nvme_tcp_setup_ddp */
+	ahash_request_set_crypt(queue->rcv_hash, req->ddp.sg_table.sgl, NULL,
+				le32_to_cpu(pdu->data_length));
+	crypto_ahash_update(queue->rcv_hash);
+}
+
+
 #ifdef CONFIG_TCP_DDP
 
 bool nvme_tcp_resync_request(struct sock *sk, u32 seq, u32 flags);
@@ -706,6 +742,7 @@  static void nvme_tcp_init_recv_ctx(struct nvme_tcp_queue *queue)
 	queue->pdu_offset = 0;
 	queue->data_remaining = -1;
 	queue->ddgst_remaining = 0;
+	queue->crc_valid = true;
 }
 
 static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl)
@@ -955,6 +992,8 @@  static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
 	struct nvme_tcp_request *req;
 	struct request *rq;
 
+	if (test_bit(NVME_TCP_Q_OFFLOADS, &queue->flags))
+		nvme_tcp_device_ddgst_update(queue, skb);
 	rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
 	if (!rq) {
 		dev_err(queue->ctrl->ctrl.device,
@@ -992,7 +1031,7 @@  static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
 		recv_len = min_t(size_t, recv_len,
 				iov_iter_count(&req->iter));
 
-		if (queue->data_digest)
+		if (queue->data_digest && !test_bit(NVME_TCP_Q_OFFLOADS, &queue->flags))
 			ret = skb_copy_and_hash_datagram_iter(skb, *offset,
 				&req->iter, recv_len, queue->rcv_hash);
 		else
@@ -1012,7 +1051,6 @@  static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
 
 	if (!queue->data_remaining) {
 		if (queue->data_digest) {
-			nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst);
 			queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH;
 		} else {
 			if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
@@ -1033,8 +1071,11 @@  static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue,
 	char *ddgst = (char *)&queue->recv_ddgst;
 	size_t recv_len = min_t(size_t, *len, queue->ddgst_remaining);
 	off_t off = NVME_TCP_DIGEST_LENGTH - queue->ddgst_remaining;
+	bool ddgst_offload_fail;
 	int ret;
 
+	if (test_bit(NVME_TCP_Q_OFFLOADS, &queue->flags))
+		nvme_tcp_device_ddgst_update(queue, skb);
 	ret = skb_copy_bits(skb, *offset, &ddgst[off], recv_len);
 	if (unlikely(ret))
 		return ret;
@@ -1045,12 +1086,21 @@  static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue,
 	if (queue->ddgst_remaining)
 		return 0;
 
-	if (queue->recv_ddgst != queue->exp_ddgst) {
-		dev_err(queue->ctrl->ctrl.device,
-			"data digest error: recv %#x expected %#x\n",
-			le32_to_cpu(queue->recv_ddgst),
-			le32_to_cpu(queue->exp_ddgst));
-		return -EIO;
+	ddgst_offload_fail = !nvme_tcp_device_ddgst_ok(queue);
+	if (!test_bit(NVME_TCP_Q_OFFLOADS, &queue->flags) ||
+	    ddgst_offload_fail) {
+		if (test_bit(NVME_TCP_Q_OFFLOADS, &queue->flags) &&
+		    ddgst_offload_fail)
+			nvme_tcp_crc_recalculate(queue, pdu);
+
+		nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst);
+		if (queue->recv_ddgst != queue->exp_ddgst) {
+			dev_err(queue->ctrl->ctrl.device,
+				"data digest error: recv %#x expected %#x\n",
+				le32_to_cpu(queue->recv_ddgst),
+				le32_to_cpu(queue->exp_ddgst));
+			return -EIO;
+		}
 	}
 
 	if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {